Implement FZ download
This commit is contained in:
parent
d6a9e7156f
commit
c8372b1fa5
23
app/Main.hs
23
app/Main.hs
@ -1,21 +1,12 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Main (main) where
|
||||
|
||||
import Control.Exception (bracket)
|
||||
import qualified Data.Text.IO as TextIO
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import qualified Yore.DB
|
||||
import Yore.Repl (getToday)
|
||||
|
||||
main :: IO ()
|
||||
main = bracket unsafeConnect Opium.close $ \conn -> do
|
||||
result <- Yore.DB.getTables conn
|
||||
|
||||
case result of
|
||||
Left e ->
|
||||
putStrLn $ "Got error: " ++ show e
|
||||
Right rows ->
|
||||
mapM_ print rows
|
||||
where
|
||||
unsafeConnect = either (error . show) id <$> Opium.connect "host=localhost port=5432 user=yore-test dbname=yore-test"
|
||||
main = do
|
||||
putStrLn "* Getting URLs for today..."
|
||||
urls <- getToday
|
||||
putStrLn "* Done:"
|
||||
mapM_ TextIO.putStrLn urls
|
||||
|
||||
@ -1,10 +1,16 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Yore.DB
|
||||
( Table (..)
|
||||
( DayIndex (..)
|
||||
, createDayFile
|
||||
, createDayIndex
|
||||
, readDayIndex
|
||||
, Table (..)
|
||||
, getTables
|
||||
) where
|
||||
|
||||
import Data.Functor.Identity (Identity (..))
|
||||
import Data.Time (Day)
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
@ -18,3 +24,30 @@ instance Opium.FromRow Table
|
||||
|
||||
getTables :: Opium.Connection -> IO (Either Opium.Error [Table])
|
||||
getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables"
|
||||
|
||||
data DayIndex = DayIndex
|
||||
{ id :: Int
|
||||
, day :: Day
|
||||
} deriving (Show, Generic)
|
||||
|
||||
instance Opium.FromRow DayIndex
|
||||
|
||||
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
|
||||
createDayIndex date = ex runIdentity .
|
||||
Opium.fetch
|
||||
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
|
||||
(Identity date)
|
||||
|
||||
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
|
||||
createDayFile dayId path =
|
||||
Opium.execute
|
||||
"INSERT INTO yore.day_file (day_id, relative_path) VALUES ($1, $2)" (dayId, path)
|
||||
|
||||
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
|
||||
readDayIndex date =
|
||||
Opium.fetch
|
||||
"SELECT * FROM yore.day_index WHERE day = $1"
|
||||
(Identity date)
|
||||
|
||||
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
|
||||
ex = fmap . fmap
|
||||
|
||||
@ -1,21 +1,28 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE QuasiQuotes #-}
|
||||
|
||||
module Yore.Download where
|
||||
module Yore.Download (downloadInto, getUrls) where
|
||||
|
||||
import Control.Lens hiding ((<.>))
|
||||
import Control.Monad (guard)
|
||||
import Data.Char
|
||||
import Data.List (find)
|
||||
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
|
||||
import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes)
|
||||
import Data.Text (Text)
|
||||
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
||||
import Network.HTTP.Req
|
||||
import Network.HTTP.Req hiding (queryParam)
|
||||
import System.Directory (createDirectoryIfMissing)
|
||||
import System.FilePath (makeRelative, takeExtension, (</>), (<.>))
|
||||
import Text.HTML.Parser
|
||||
import Text.Printf (printf)
|
||||
import Text.URI (URI)
|
||||
import Text.URI.Lens
|
||||
import Text.URI.QQ hiding (uri)
|
||||
|
||||
import qualified Data.List.NonEmpty as NonEmpty
|
||||
import qualified Data.ByteString.Lazy as LBS
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Text.Encoding as Encoding
|
||||
import qualified Data.UUID.V4 as UUID
|
||||
import qualified Text.URI as URI
|
||||
|
||||
data TargetSpec = TargetSpec
|
||||
@ -25,12 +32,36 @@ data TargetSpec = TargetSpec
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
getUrl :: Day -> IO Text
|
||||
getUrl date = do
|
||||
-- | Download a URL and save it to a directory.
|
||||
-- Returns the path of the downloaded file relative to the directory.
|
||||
downloadInto
|
||||
:: FilePath -- ^ Directory where to store the file.
|
||||
-> Text -- ^ The URL to download.
|
||||
-> IO FilePath
|
||||
downloadInto downloadDir textUrl = download >>= save
|
||||
where
|
||||
download :: IO LBS.ByteString
|
||||
download = do
|
||||
uri <- URI.mkURI textUrl
|
||||
let (url, opts) = fromJust $ useHttpsURI uri
|
||||
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse opts
|
||||
guard $ responseStatusCode res == 200
|
||||
pure $ responseBody res
|
||||
|
||||
save :: LBS.ByteString -> IO FilePath
|
||||
save contents = do
|
||||
let suffix = takeExtension $ Text.unpack textUrl
|
||||
uuid <- UUID.nextRandom
|
||||
let fullPath = downloadDir </> show uuid <.> suffix
|
||||
createDirectoryIfMissing True downloadDir
|
||||
LBS.writeFile fullPath contents
|
||||
pure $ makeRelative downloadDir fullPath
|
||||
|
||||
getUrls :: Day -> IO [Text]
|
||||
getUrls date = do
|
||||
let (y, m, _) = toGregorian date
|
||||
candidates <- getMonthOverview y m
|
||||
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates
|
||||
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
|
||||
candidates <- filter ((date ==) . fst) <$> getMonthOverview y m
|
||||
catMaybes <$> mapM (getShowpdf . snd) candidates
|
||||
|
||||
getShowpdf :: TargetSpec -> IO (Maybe Text)
|
||||
getShowpdf (TargetSpec y m d) = do
|
||||
@ -59,12 +90,14 @@ getShowpdf (TargetSpec y m d) = do
|
||||
httpEquiv <- getAttr "http-equiv" attrs
|
||||
guard $ httpEquiv == "refresh"
|
||||
content <- getAttr "content" attrs
|
||||
Just $ snd $ Text.breakOnEnd "URL=" content
|
||||
Just $ escapeBrackets $ snd $ Text.breakOnEnd "URL=" content
|
||||
getRefreshTarget _ =
|
||||
Nothing
|
||||
|
||||
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
|
||||
|
||||
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
|
||||
|
||||
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
||||
getMonthOverview year month = do
|
||||
res <- runReq defaultHttpConfig $ req
|
||||
@ -93,16 +126,16 @@ getMonthOverview year month = do
|
||||
extractDay uri =
|
||||
fromGregorian y m d
|
||||
where
|
||||
y = read $ maybe "0" Text.unpack $ getParam "year" uri
|
||||
m = read $ maybe "0" Text.unpack $ getParam "month" uri
|
||||
d = read $ maybe "0" (takeWhile isDigit . Text.unpack) $ getParam "day" uri
|
||||
y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
|
||||
m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
|
||||
d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
|
||||
|
||||
extractTarget uri =
|
||||
TargetSpec y m d
|
||||
where
|
||||
y = fromMaybe "" $ getParam "year" uri
|
||||
m = fromMaybe "" $ getParam "month" uri
|
||||
d = fromMaybe "" $ getParam "day" uri
|
||||
y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText)
|
||||
m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText)
|
||||
d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
|
||||
|
||||
decodeHtmlEntities = Text.pack . q0 . Text.unpack
|
||||
where
|
||||
@ -118,16 +151,14 @@ getMonthOverview year month = do
|
||||
-- Or if they end unexpectedly.
|
||||
| otherwise = c : q0 r
|
||||
|
||||
isShowpdf uri =
|
||||
getPath uri == ["show", "fz.cgi"] && getParam "cmd" uri == Just "showpdf"
|
||||
|
||||
getPath :: URI -> [Text]
|
||||
getPath uri = case URI.uriPath uri of
|
||||
Nothing -> []
|
||||
Just (_, rts) -> map URI.unRText $ NonEmpty.toList rts
|
||||
|
||||
getParam :: Text -> URI -> Maybe Text
|
||||
getParam param uri = listToMaybe [value | (key, value) <- map go (URI.uriQuery uri), key == param]
|
||||
isShowpdf :: URI -> Bool
|
||||
isShowpdf uri =
|
||||
path == ["show", "fz.cgi"] && cmd == Just "showpdf"
|
||||
where
|
||||
go (URI.QueryFlag k) = (URI.unRText k, "")
|
||||
go (URI.QueryParam k v) = (URI.unRText k, URI.unRText v)
|
||||
path = uri ^.. uriPath . each . unRText
|
||||
cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
|
||||
|
||||
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text
|
||||
getParam param fallback uri =
|
||||
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText)
|
||||
|
||||
22
src/Yore/Index.hs
Normal file
22
src/Yore/Index.hs
Normal file
@ -0,0 +1,22 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE OverloadedRecordDot #-}
|
||||
|
||||
module Yore.Index (createEntries) where
|
||||
|
||||
import Control.Exception (Exception, throwIO)
|
||||
import Control.Monad (forM_)
|
||||
import Data.Time (Day)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import qualified Yore.DB as DB
|
||||
|
||||
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO ()
|
||||
createEntries date urls conn = do
|
||||
confidently $ Opium.execute_ "BEGIN" conn
|
||||
dayIndex <- confidently $ DB.createDayIndex date conn
|
||||
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.id url conn
|
||||
confidently $ Opium.execute_ "COMMIT" conn
|
||||
|
||||
confidently :: Exception e => IO (Either e a) -> IO a
|
||||
confidently action = action >>= either throwIO pure
|
||||
@ -1,15 +1,58 @@
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Yore.Repl (connect, exec) where
|
||||
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
|
||||
|
||||
import Control.Exception (bracket, throwIO)
|
||||
import Control.Monad (forM)
|
||||
import Data.Text (Text)
|
||||
import Data.Time (Day)
|
||||
import Data.Time.LocalTime (getZonedTime, localDay, zonedTimeToLocalTime)
|
||||
import Text.Printf (printf)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import Yore.DB (DayIndex)
|
||||
import Yore.Download (downloadInto, getUrls)
|
||||
import Yore.Index (createEntries)
|
||||
import Yore.Time (addYears)
|
||||
|
||||
import qualified Yore.DB as DB
|
||||
|
||||
connect :: IO Opium.Connection
|
||||
connect =
|
||||
Opium.connect "host=localhost" >>= \case
|
||||
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
|
||||
Left e -> error $ show e
|
||||
Right c -> pure c
|
||||
|
||||
exec :: (Opium.Connection -> IO a) -> IO a
|
||||
exec f = f =<< connect
|
||||
exec = bracket connect Opium.close
|
||||
|
||||
getToday :: IO [Text]
|
||||
getToday = do
|
||||
now <- getZonedTime
|
||||
print now
|
||||
let currentDay = localDay $ zonedTimeToLocalTime now
|
||||
case addYears (-100) currentDay of
|
||||
Just then_ ->
|
||||
getUrls then_
|
||||
Nothing ->
|
||||
pure []
|
||||
|
||||
indexDay :: Day -> IO ()
|
||||
indexDay date = exec $ \conn -> do
|
||||
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
|
||||
printf "Checking whether %s already exists...\n" (show date)
|
||||
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
|
||||
case mbDi of
|
||||
Just (_ :: DayIndex) ->
|
||||
putStrLn "Nothing to do."
|
||||
Nothing -> do
|
||||
putStrLn "Retrieving URLs..."
|
||||
urls <- getUrls date
|
||||
paths <- forM urls $ \url -> do
|
||||
printf "Downloading %s...\n" url
|
||||
downloadInto "./download" url
|
||||
putStrLn "Creating DB entries..."
|
||||
createEntries date paths conn
|
||||
putStrLn "Done."
|
||||
|
||||
16
src/Yore/Time.hs
Normal file
16
src/Yore/Time.hs
Normal file
@ -0,0 +1,16 @@
|
||||
module Yore.Time (addYears) where
|
||||
|
||||
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
|
||||
|
||||
addYears :: Integer -> Day -> Maybe Day
|
||||
addYears yearOffset date
|
||||
| isFebruary29th && not (isLeapYear year') =
|
||||
Nothing
|
||||
| otherwise =
|
||||
Just $ addGregorianYearsClip yearOffset date
|
||||
|
||||
where
|
||||
(year, month, day) = toGregorian date
|
||||
year' = year + yearOffset
|
||||
|
||||
isFebruary29th = month == 2 && day == 29
|
||||
@ -32,19 +32,25 @@ library
|
||||
import: shared-options
|
||||
exposed-modules:
|
||||
Yore.DB
|
||||
, Yore.Repl
|
||||
, Yore.Download
|
||||
, Yore.Index
|
||||
, Yore.Repl
|
||||
, Yore.Time
|
||||
hs-source-dirs:
|
||||
src
|
||||
build-depends:
|
||||
base >=4.18 && <5
|
||||
, bytestring
|
||||
, directory
|
||||
, filepath
|
||||
, html-parse
|
||||
, lens
|
||||
, modern-uri
|
||||
, opium
|
||||
, req
|
||||
, text
|
||||
, time
|
||||
, uuid
|
||||
|
||||
executable yore
|
||||
import: shared-options
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user