Implement FZ download

This commit is contained in:
Paul Brinkmeier 2025-07-28 20:59:38 +02:00
parent d6a9e7156f
commit c8372b1fa5
7 changed files with 191 additions and 49 deletions

View File

@ -1,21 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where module Main (main) where
import Control.Exception (bracket) import qualified Data.Text.IO as TextIO
import qualified Database.PostgreSQL.Opium as Opium import Yore.Repl (getToday)
import qualified Yore.DB
main :: IO () main :: IO ()
main = bracket unsafeConnect Opium.close $ \conn -> do main = do
result <- Yore.DB.getTables conn putStrLn "* Getting URLs for today..."
urls <- getToday
case result of putStrLn "* Done:"
Left e -> mapM_ TextIO.putStrLn urls
putStrLn $ "Got error: " ++ show e
Right rows ->
mapM_ print rows
where
unsafeConnect = either (error . show) id <$> Opium.connect "host=localhost port=5432 user=yore-test dbname=yore-test"

View File

@ -1,10 +1,16 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Yore.DB module Yore.DB
( Table (..) ( DayIndex (..)
, createDayFile
, createDayIndex
, readDayIndex
, Table (..)
, getTables , getTables
) where ) where
import Data.Functor.Identity (Identity (..))
import Data.Time (Day)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
@ -18,3 +24,30 @@ instance Opium.FromRow Table
getTables :: Opium.Connection -> IO (Either Opium.Error [Table]) getTables :: Opium.Connection -> IO (Either Opium.Error [Table])
getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables" getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables"
data DayIndex = DayIndex
{ id :: Int
, day :: Day
} deriving (Show, Generic)
instance Opium.FromRow DayIndex
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
createDayIndex date = ex runIdentity .
Opium.fetch
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date)
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId path =
Opium.execute
"INSERT INTO yore.day_file (day_id, relative_path) VALUES ($1, $2)" (dayId, path)
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
readDayIndex date =
Opium.fetch
"SELECT * FROM yore.day_index WHERE day = $1"
(Identity date)
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
ex = fmap . fmap

View File

@ -1,21 +1,28 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
module Yore.Download where module Yore.Download (downloadInto, getUrls) where
import Control.Lens hiding ((<.>))
import Control.Monad (guard) import Control.Monad (guard)
import Data.Char import Data.Char
import Data.List (find) import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes)
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Data.Text (Text) import Data.Text (Text)
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian) import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
import Network.HTTP.Req import Network.HTTP.Req hiding (queryParam)
import System.Directory (createDirectoryIfMissing)
import System.FilePath (makeRelative, takeExtension, (</>), (<.>))
import Text.HTML.Parser import Text.HTML.Parser
import Text.Printf (printf) import Text.Printf (printf)
import Text.URI (URI) import Text.URI (URI)
import Text.URI.Lens
import Text.URI.QQ hiding (uri)
import qualified Data.List.NonEmpty as NonEmpty import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding import qualified Data.Text.Encoding as Encoding
import qualified Data.UUID.V4 as UUID
import qualified Text.URI as URI import qualified Text.URI as URI
data TargetSpec = TargetSpec data TargetSpec = TargetSpec
@ -25,12 +32,36 @@ data TargetSpec = TargetSpec
} }
deriving (Show) deriving (Show)
getUrl :: Day -> IO Text -- | Download a URL and save it to a directory.
getUrl date = do -- Returns the path of the downloaded file relative to the directory.
downloadInto
:: FilePath -- ^ Directory where to store the file.
-> Text -- ^ The URL to download.
-> IO FilePath
downloadInto downloadDir textUrl = download >>= save
where
download :: IO LBS.ByteString
download = do
uri <- URI.mkURI textUrl
let (url, opts) = fromJust $ useHttpsURI uri
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse opts
guard $ responseStatusCode res == 200
pure $ responseBody res
save :: LBS.ByteString -> IO FilePath
save contents = do
let suffix = takeExtension $ Text.unpack textUrl
uuid <- UUID.nextRandom
let fullPath = downloadDir </> show uuid <.> suffix
createDirectoryIfMissing True downloadDir
LBS.writeFile fullPath contents
pure $ makeRelative downloadDir fullPath
getUrls :: Day -> IO [Text]
getUrls date = do
let (y, m, _) = toGregorian date let (y, m, _) = toGregorian date
candidates <- getMonthOverview y m candidates <- filter ((date ==) . fst) <$> getMonthOverview y m
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates catMaybes <$> mapM (getShowpdf . snd) candidates
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
getShowpdf :: TargetSpec -> IO (Maybe Text) getShowpdf :: TargetSpec -> IO (Maybe Text)
getShowpdf (TargetSpec y m d) = do getShowpdf (TargetSpec y m d) = do
@ -59,12 +90,14 @@ getShowpdf (TargetSpec y m d) = do
httpEquiv <- getAttr "http-equiv" attrs httpEquiv <- getAttr "http-equiv" attrs
guard $ httpEquiv == "refresh" guard $ httpEquiv == "refresh"
content <- getAttr "content" attrs content <- getAttr "content" attrs
Just $ snd $ Text.breakOnEnd "URL=" content Just $ escapeBrackets $ snd $ Text.breakOnEnd "URL=" content
getRefreshTarget _ = getRefreshTarget _ =
Nothing Nothing
getAttr k = lookup k . map (\(Attr k' v) -> (k', v)) getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)] getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
getMonthOverview year month = do getMonthOverview year month = do
res <- runReq defaultHttpConfig $ req res <- runReq defaultHttpConfig $ req
@ -93,16 +126,16 @@ getMonthOverview year month = do
extractDay uri = extractDay uri =
fromGregorian y m d fromGregorian y m d
where where
y = read $ maybe "0" Text.unpack $ getParam "year" uri y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
m = read $ maybe "0" Text.unpack $ getParam "month" uri m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
d = read $ maybe "0" (takeWhile isDigit . Text.unpack) $ getParam "day" uri d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
extractTarget uri = extractTarget uri =
TargetSpec y m d TargetSpec y m d
where where
y = fromMaybe "" $ getParam "year" uri y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText)
m = fromMaybe "" $ getParam "month" uri m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText)
d = fromMaybe "" $ getParam "day" uri d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
decodeHtmlEntities = Text.pack . q0 . Text.unpack decodeHtmlEntities = Text.pack . q0 . Text.unpack
where where
@ -118,16 +151,14 @@ getMonthOverview year month = do
-- Or if they end unexpectedly. -- Or if they end unexpectedly.
| otherwise = c : q0 r | otherwise = c : q0 r
isShowpdf uri =
getPath uri == ["show", "fz.cgi"] && getParam "cmd" uri == Just "showpdf"
getPath :: URI -> [Text] isShowpdf :: URI -> Bool
getPath uri = case URI.uriPath uri of isShowpdf uri =
Nothing -> [] path == ["show", "fz.cgi"] && cmd == Just "showpdf"
Just (_, rts) -> map URI.unRText $ NonEmpty.toList rts
getParam :: Text -> URI -> Maybe Text
getParam param uri = listToMaybe [value | (key, value) <- map go (URI.uriQuery uri), key == param]
where where
go (URI.QueryFlag k) = (URI.unRText k, "") path = uri ^.. uriPath . each . unRText
go (URI.QueryParam k v) = (URI.unRText k, URI.unRText v) cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text
getParam param fallback uri =
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText)

22
src/Yore/Index.hs Normal file
View File

@ -0,0 +1,22 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedRecordDot #-}
module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO)
import Control.Monad (forM_)
import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO ()
createEntries date urls conn = do
confidently $ Opium.execute_ "BEGIN" conn
dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.id url conn
confidently $ Opium.execute_ "COMMIT" conn
confidently :: Exception e => IO (Either e a) -> IO a
confidently action = action >>= either throwIO pure

View File

@ -1,15 +1,58 @@
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Yore.Repl (connect, exec) where module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
import Control.Exception (bracket, throwIO)
import Control.Monad (forM)
import Data.Text (Text)
import Data.Time (Day)
import Data.Time.LocalTime (getZonedTime, localDay, zonedTimeToLocalTime)
import Text.Printf (printf)
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
import Yore.DB (DayIndex)
import Yore.Download (downloadInto, getUrls)
import Yore.Index (createEntries)
import Yore.Time (addYears)
import qualified Yore.DB as DB
connect :: IO Opium.Connection connect :: IO Opium.Connection
connect = connect =
Opium.connect "host=localhost" >>= \case Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
Left e -> error $ show e Left e -> error $ show e
Right c -> pure c Right c -> pure c
exec :: (Opium.Connection -> IO a) -> IO a exec :: (Opium.Connection -> IO a) -> IO a
exec f = f =<< connect exec = bracket connect Opium.close
getToday :: IO [Text]
getToday = do
now <- getZonedTime
print now
let currentDay = localDay $ zonedTimeToLocalTime now
case addYears (-100) currentDay of
Just then_ ->
getUrls then_
Nothing ->
pure []
indexDay :: Day -> IO ()
indexDay date = exec $ \conn -> do
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
printf "Checking whether %s already exists...\n" (show date)
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
case mbDi of
Just (_ :: DayIndex) ->
putStrLn "Nothing to do."
Nothing -> do
putStrLn "Retrieving URLs..."
urls <- getUrls date
paths <- forM urls $ \url -> do
printf "Downloading %s...\n" url
downloadInto "./download" url
putStrLn "Creating DB entries..."
createEntries date paths conn
putStrLn "Done."

16
src/Yore/Time.hs Normal file
View File

@ -0,0 +1,16 @@
module Yore.Time (addYears) where
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
addYears :: Integer -> Day -> Maybe Day
addYears yearOffset date
| isFebruary29th && not (isLeapYear year') =
Nothing
| otherwise =
Just $ addGregorianYearsClip yearOffset date
where
(year, month, day) = toGregorian date
year' = year + yearOffset
isFebruary29th = month == 2 && day == 29

View File

@ -32,19 +32,25 @@ library
import: shared-options import: shared-options
exposed-modules: exposed-modules:
Yore.DB Yore.DB
, Yore.Repl
, Yore.Download , Yore.Download
, Yore.Index
, Yore.Repl
, Yore.Time
hs-source-dirs: hs-source-dirs:
src src
build-depends: build-depends:
base >=4.18 && <5 base >=4.18 && <5
, bytestring , bytestring
, directory
, filepath
, html-parse , html-parse
, lens
, modern-uri , modern-uri
, opium , opium
, req , req
, text , text
, time , time
, uuid
executable yore executable yore
import: shared-options import: shared-options