Implement FZ download
This commit is contained in:
parent
d6a9e7156f
commit
c8372b1fa5
23
app/Main.hs
23
app/Main.hs
@ -1,21 +1,12 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
|
||||||
|
|
||||||
module Main (main) where
|
module Main (main) where
|
||||||
|
|
||||||
import Control.Exception (bracket)
|
import qualified Data.Text.IO as TextIO
|
||||||
|
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
import Yore.Repl (getToday)
|
||||||
|
|
||||||
import qualified Yore.DB
|
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = bracket unsafeConnect Opium.close $ \conn -> do
|
main = do
|
||||||
result <- Yore.DB.getTables conn
|
putStrLn "* Getting URLs for today..."
|
||||||
|
urls <- getToday
|
||||||
case result of
|
putStrLn "* Done:"
|
||||||
Left e ->
|
mapM_ TextIO.putStrLn urls
|
||||||
putStrLn $ "Got error: " ++ show e
|
|
||||||
Right rows ->
|
|
||||||
mapM_ print rows
|
|
||||||
where
|
|
||||||
unsafeConnect = either (error . show) id <$> Opium.connect "host=localhost port=5432 user=yore-test dbname=yore-test"
|
|
||||||
|
|||||||
@ -1,10 +1,16 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Yore.DB
|
module Yore.DB
|
||||||
( Table (..)
|
( DayIndex (..)
|
||||||
|
, createDayFile
|
||||||
|
, createDayIndex
|
||||||
|
, readDayIndex
|
||||||
|
, Table (..)
|
||||||
, getTables
|
, getTables
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Data.Functor.Identity (Identity (..))
|
||||||
|
import Data.Time (Day)
|
||||||
import GHC.Generics (Generic)
|
import GHC.Generics (Generic)
|
||||||
|
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
import qualified Database.PostgreSQL.Opium as Opium
|
||||||
@ -18,3 +24,30 @@ instance Opium.FromRow Table
|
|||||||
|
|
||||||
getTables :: Opium.Connection -> IO (Either Opium.Error [Table])
|
getTables :: Opium.Connection -> IO (Either Opium.Error [Table])
|
||||||
getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables"
|
getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables"
|
||||||
|
|
||||||
|
data DayIndex = DayIndex
|
||||||
|
{ id :: Int
|
||||||
|
, day :: Day
|
||||||
|
} deriving (Show, Generic)
|
||||||
|
|
||||||
|
instance Opium.FromRow DayIndex
|
||||||
|
|
||||||
|
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
|
||||||
|
createDayIndex date = ex runIdentity .
|
||||||
|
Opium.fetch
|
||||||
|
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
|
||||||
|
(Identity date)
|
||||||
|
|
||||||
|
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
|
||||||
|
createDayFile dayId path =
|
||||||
|
Opium.execute
|
||||||
|
"INSERT INTO yore.day_file (day_id, relative_path) VALUES ($1, $2)" (dayId, path)
|
||||||
|
|
||||||
|
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
|
||||||
|
readDayIndex date =
|
||||||
|
Opium.fetch
|
||||||
|
"SELECT * FROM yore.day_index WHERE day = $1"
|
||||||
|
(Identity date)
|
||||||
|
|
||||||
|
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
|
||||||
|
ex = fmap . fmap
|
||||||
|
|||||||
@ -1,21 +1,28 @@
|
|||||||
|
{-# LANGUAGE DataKinds #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE QuasiQuotes #-}
|
||||||
|
|
||||||
module Yore.Download where
|
module Yore.Download (downloadInto, getUrls) where
|
||||||
|
|
||||||
|
import Control.Lens hiding ((<.>))
|
||||||
import Control.Monad (guard)
|
import Control.Monad (guard)
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List (find)
|
import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes)
|
||||||
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
||||||
import Network.HTTP.Req
|
import Network.HTTP.Req hiding (queryParam)
|
||||||
|
import System.Directory (createDirectoryIfMissing)
|
||||||
|
import System.FilePath (makeRelative, takeExtension, (</>), (<.>))
|
||||||
import Text.HTML.Parser
|
import Text.HTML.Parser
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Text.URI (URI)
|
import Text.URI (URI)
|
||||||
|
import Text.URI.Lens
|
||||||
|
import Text.URI.QQ hiding (uri)
|
||||||
|
|
||||||
import qualified Data.List.NonEmpty as NonEmpty
|
import qualified Data.ByteString.Lazy as LBS
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import qualified Data.Text.Encoding as Encoding
|
import qualified Data.Text.Encoding as Encoding
|
||||||
|
import qualified Data.UUID.V4 as UUID
|
||||||
import qualified Text.URI as URI
|
import qualified Text.URI as URI
|
||||||
|
|
||||||
data TargetSpec = TargetSpec
|
data TargetSpec = TargetSpec
|
||||||
@ -25,12 +32,36 @@ data TargetSpec = TargetSpec
|
|||||||
}
|
}
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
getUrl :: Day -> IO Text
|
-- | Download a URL and save it to a directory.
|
||||||
getUrl date = do
|
-- Returns the path of the downloaded file relative to the directory.
|
||||||
|
downloadInto
|
||||||
|
:: FilePath -- ^ Directory where to store the file.
|
||||||
|
-> Text -- ^ The URL to download.
|
||||||
|
-> IO FilePath
|
||||||
|
downloadInto downloadDir textUrl = download >>= save
|
||||||
|
where
|
||||||
|
download :: IO LBS.ByteString
|
||||||
|
download = do
|
||||||
|
uri <- URI.mkURI textUrl
|
||||||
|
let (url, opts) = fromJust $ useHttpsURI uri
|
||||||
|
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse opts
|
||||||
|
guard $ responseStatusCode res == 200
|
||||||
|
pure $ responseBody res
|
||||||
|
|
||||||
|
save :: LBS.ByteString -> IO FilePath
|
||||||
|
save contents = do
|
||||||
|
let suffix = takeExtension $ Text.unpack textUrl
|
||||||
|
uuid <- UUID.nextRandom
|
||||||
|
let fullPath = downloadDir </> show uuid <.> suffix
|
||||||
|
createDirectoryIfMissing True downloadDir
|
||||||
|
LBS.writeFile fullPath contents
|
||||||
|
pure $ makeRelative downloadDir fullPath
|
||||||
|
|
||||||
|
getUrls :: Day -> IO [Text]
|
||||||
|
getUrls date = do
|
||||||
let (y, m, _) = toGregorian date
|
let (y, m, _) = toGregorian date
|
||||||
candidates <- getMonthOverview y m
|
candidates <- filter ((date ==) . fst) <$> getMonthOverview y m
|
||||||
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates
|
catMaybes <$> mapM (getShowpdf . snd) candidates
|
||||||
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
|
|
||||||
|
|
||||||
getShowpdf :: TargetSpec -> IO (Maybe Text)
|
getShowpdf :: TargetSpec -> IO (Maybe Text)
|
||||||
getShowpdf (TargetSpec y m d) = do
|
getShowpdf (TargetSpec y m d) = do
|
||||||
@ -59,12 +90,14 @@ getShowpdf (TargetSpec y m d) = do
|
|||||||
httpEquiv <- getAttr "http-equiv" attrs
|
httpEquiv <- getAttr "http-equiv" attrs
|
||||||
guard $ httpEquiv == "refresh"
|
guard $ httpEquiv == "refresh"
|
||||||
content <- getAttr "content" attrs
|
content <- getAttr "content" attrs
|
||||||
Just $ snd $ Text.breakOnEnd "URL=" content
|
Just $ escapeBrackets $ snd $ Text.breakOnEnd "URL=" content
|
||||||
getRefreshTarget _ =
|
getRefreshTarget _ =
|
||||||
Nothing
|
Nothing
|
||||||
|
|
||||||
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
|
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
|
||||||
|
|
||||||
|
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
|
||||||
|
|
||||||
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
||||||
getMonthOverview year month = do
|
getMonthOverview year month = do
|
||||||
res <- runReq defaultHttpConfig $ req
|
res <- runReq defaultHttpConfig $ req
|
||||||
@ -93,16 +126,16 @@ getMonthOverview year month = do
|
|||||||
extractDay uri =
|
extractDay uri =
|
||||||
fromGregorian y m d
|
fromGregorian y m d
|
||||||
where
|
where
|
||||||
y = read $ maybe "0" Text.unpack $ getParam "year" uri
|
y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
|
||||||
m = read $ maybe "0" Text.unpack $ getParam "month" uri
|
m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
|
||||||
d = read $ maybe "0" (takeWhile isDigit . Text.unpack) $ getParam "day" uri
|
d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
|
||||||
|
|
||||||
extractTarget uri =
|
extractTarget uri =
|
||||||
TargetSpec y m d
|
TargetSpec y m d
|
||||||
where
|
where
|
||||||
y = fromMaybe "" $ getParam "year" uri
|
y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText)
|
||||||
m = fromMaybe "" $ getParam "month" uri
|
m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText)
|
||||||
d = fromMaybe "" $ getParam "day" uri
|
d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
|
||||||
|
|
||||||
decodeHtmlEntities = Text.pack . q0 . Text.unpack
|
decodeHtmlEntities = Text.pack . q0 . Text.unpack
|
||||||
where
|
where
|
||||||
@ -118,16 +151,14 @@ getMonthOverview year month = do
|
|||||||
-- Or if they end unexpectedly.
|
-- Or if they end unexpectedly.
|
||||||
| otherwise = c : q0 r
|
| otherwise = c : q0 r
|
||||||
|
|
||||||
isShowpdf uri =
|
|
||||||
getPath uri == ["show", "fz.cgi"] && getParam "cmd" uri == Just "showpdf"
|
|
||||||
|
|
||||||
getPath :: URI -> [Text]
|
isShowpdf :: URI -> Bool
|
||||||
getPath uri = case URI.uriPath uri of
|
isShowpdf uri =
|
||||||
Nothing -> []
|
path == ["show", "fz.cgi"] && cmd == Just "showpdf"
|
||||||
Just (_, rts) -> map URI.unRText $ NonEmpty.toList rts
|
|
||||||
|
|
||||||
getParam :: Text -> URI -> Maybe Text
|
|
||||||
getParam param uri = listToMaybe [value | (key, value) <- map go (URI.uriQuery uri), key == param]
|
|
||||||
where
|
where
|
||||||
go (URI.QueryFlag k) = (URI.unRText k, "")
|
path = uri ^.. uriPath . each . unRText
|
||||||
go (URI.QueryParam k v) = (URI.unRText k, URI.unRText v)
|
cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
|
||||||
|
|
||||||
|
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text
|
||||||
|
getParam param fallback uri =
|
||||||
|
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText)
|
||||||
|
|||||||
22
src/Yore/Index.hs
Normal file
22
src/Yore/Index.hs
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE OverloadedRecordDot #-}
|
||||||
|
|
||||||
|
module Yore.Index (createEntries) where
|
||||||
|
|
||||||
|
import Control.Exception (Exception, throwIO)
|
||||||
|
import Control.Monad (forM_)
|
||||||
|
import Data.Time (Day)
|
||||||
|
|
||||||
|
import qualified Database.PostgreSQL.Opium as Opium
|
||||||
|
|
||||||
|
import qualified Yore.DB as DB
|
||||||
|
|
||||||
|
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO ()
|
||||||
|
createEntries date urls conn = do
|
||||||
|
confidently $ Opium.execute_ "BEGIN" conn
|
||||||
|
dayIndex <- confidently $ DB.createDayIndex date conn
|
||||||
|
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.id url conn
|
||||||
|
confidently $ Opium.execute_ "COMMIT" conn
|
||||||
|
|
||||||
|
confidently :: Exception e => IO (Either e a) -> IO a
|
||||||
|
confidently action = action >>= either throwIO pure
|
||||||
@ -1,15 +1,58 @@
|
|||||||
{-# LANGUAGE LambdaCase #-}
|
{-# LANGUAGE LambdaCase #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Yore.Repl (connect, exec) where
|
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
|
||||||
|
|
||||||
|
import Control.Exception (bracket, throwIO)
|
||||||
|
import Control.Monad (forM)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Time (Day)
|
||||||
|
import Data.Time.LocalTime (getZonedTime, localDay, zonedTimeToLocalTime)
|
||||||
|
import Text.Printf (printf)
|
||||||
|
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
import qualified Database.PostgreSQL.Opium as Opium
|
||||||
|
|
||||||
|
import Yore.DB (DayIndex)
|
||||||
|
import Yore.Download (downloadInto, getUrls)
|
||||||
|
import Yore.Index (createEntries)
|
||||||
|
import Yore.Time (addYears)
|
||||||
|
|
||||||
|
import qualified Yore.DB as DB
|
||||||
|
|
||||||
connect :: IO Opium.Connection
|
connect :: IO Opium.Connection
|
||||||
connect =
|
connect =
|
||||||
Opium.connect "host=localhost" >>= \case
|
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
|
||||||
Left e -> error $ show e
|
Left e -> error $ show e
|
||||||
Right c -> pure c
|
Right c -> pure c
|
||||||
|
|
||||||
exec :: (Opium.Connection -> IO a) -> IO a
|
exec :: (Opium.Connection -> IO a) -> IO a
|
||||||
exec f = f =<< connect
|
exec = bracket connect Opium.close
|
||||||
|
|
||||||
|
getToday :: IO [Text]
|
||||||
|
getToday = do
|
||||||
|
now <- getZonedTime
|
||||||
|
print now
|
||||||
|
let currentDay = localDay $ zonedTimeToLocalTime now
|
||||||
|
case addYears (-100) currentDay of
|
||||||
|
Just then_ ->
|
||||||
|
getUrls then_
|
||||||
|
Nothing ->
|
||||||
|
pure []
|
||||||
|
|
||||||
|
indexDay :: Day -> IO ()
|
||||||
|
indexDay date = exec $ \conn -> do
|
||||||
|
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
|
||||||
|
printf "Checking whether %s already exists...\n" (show date)
|
||||||
|
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
|
||||||
|
case mbDi of
|
||||||
|
Just (_ :: DayIndex) ->
|
||||||
|
putStrLn "Nothing to do."
|
||||||
|
Nothing -> do
|
||||||
|
putStrLn "Retrieving URLs..."
|
||||||
|
urls <- getUrls date
|
||||||
|
paths <- forM urls $ \url -> do
|
||||||
|
printf "Downloading %s...\n" url
|
||||||
|
downloadInto "./download" url
|
||||||
|
putStrLn "Creating DB entries..."
|
||||||
|
createEntries date paths conn
|
||||||
|
putStrLn "Done."
|
||||||
|
|||||||
16
src/Yore/Time.hs
Normal file
16
src/Yore/Time.hs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
module Yore.Time (addYears) where
|
||||||
|
|
||||||
|
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
|
||||||
|
|
||||||
|
addYears :: Integer -> Day -> Maybe Day
|
||||||
|
addYears yearOffset date
|
||||||
|
| isFebruary29th && not (isLeapYear year') =
|
||||||
|
Nothing
|
||||||
|
| otherwise =
|
||||||
|
Just $ addGregorianYearsClip yearOffset date
|
||||||
|
|
||||||
|
where
|
||||||
|
(year, month, day) = toGregorian date
|
||||||
|
year' = year + yearOffset
|
||||||
|
|
||||||
|
isFebruary29th = month == 2 && day == 29
|
||||||
@ -32,19 +32,25 @@ library
|
|||||||
import: shared-options
|
import: shared-options
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Yore.DB
|
Yore.DB
|
||||||
, Yore.Repl
|
|
||||||
, Yore.Download
|
, Yore.Download
|
||||||
|
, Yore.Index
|
||||||
|
, Yore.Repl
|
||||||
|
, Yore.Time
|
||||||
hs-source-dirs:
|
hs-source-dirs:
|
||||||
src
|
src
|
||||||
build-depends:
|
build-depends:
|
||||||
base >=4.18 && <5
|
base >=4.18 && <5
|
||||||
, bytestring
|
, bytestring
|
||||||
|
, directory
|
||||||
|
, filepath
|
||||||
, html-parse
|
, html-parse
|
||||||
|
, lens
|
||||||
, modern-uri
|
, modern-uri
|
||||||
, opium
|
, opium
|
||||||
, req
|
, req
|
||||||
, text
|
, text
|
||||||
, time
|
, time
|
||||||
|
, uuid
|
||||||
|
|
||||||
executable yore
|
executable yore
|
||||||
import: shared-options
|
import: shared-options
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user