Implement FZ download

This commit is contained in:
Paul Brinkmeier 2025-07-28 20:59:38 +02:00
parent d6a9e7156f
commit c8372b1fa5
7 changed files with 191 additions and 49 deletions

View File

@ -1,21 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Control.Exception (bracket)
import qualified Data.Text.IO as TextIO
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB
import Yore.Repl (getToday)
main :: IO ()
main = bracket unsafeConnect Opium.close $ \conn -> do
result <- Yore.DB.getTables conn
case result of
Left e ->
putStrLn $ "Got error: " ++ show e
Right rows ->
mapM_ print rows
where
unsafeConnect = either (error . show) id <$> Opium.connect "host=localhost port=5432 user=yore-test dbname=yore-test"
main = do
putStrLn "* Getting URLs for today..."
urls <- getToday
putStrLn "* Done:"
mapM_ TextIO.putStrLn urls

View File

@ -1,10 +1,16 @@
{-# LANGUAGE OverloadedStrings #-}
module Yore.DB
( Table (..)
( DayIndex (..)
, createDayFile
, createDayIndex
, readDayIndex
, Table (..)
, getTables
) where
import Data.Functor.Identity (Identity (..))
import Data.Time (Day)
import GHC.Generics (Generic)
import qualified Database.PostgreSQL.Opium as Opium
@ -18,3 +24,30 @@ instance Opium.FromRow Table
getTables :: Opium.Connection -> IO (Either Opium.Error [Table])
getTables = Opium.fetch_ "SELECT table_schema AS schema, table_name AS name FROM information_schema.tables"
data DayIndex = DayIndex
{ id :: Int
, day :: Day
} deriving (Show, Generic)
instance Opium.FromRow DayIndex
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
createDayIndex date = ex runIdentity .
Opium.fetch
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date)
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId path =
Opium.execute
"INSERT INTO yore.day_file (day_id, relative_path) VALUES ($1, $2)" (dayId, path)
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
readDayIndex date =
Opium.fetch
"SELECT * FROM yore.day_index WHERE day = $1"
(Identity date)
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
ex = fmap . fmap

View File

@ -1,21 +1,28 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
module Yore.Download where
module Yore.Download (downloadInto, getUrls) where
import Control.Lens hiding ((<.>))
import Control.Monad (guard)
import Data.Char
import Data.List (find)
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes)
import Data.Text (Text)
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
import Network.HTTP.Req
import Network.HTTP.Req hiding (queryParam)
import System.Directory (createDirectoryIfMissing)
import System.FilePath (makeRelative, takeExtension, (</>), (<.>))
import Text.HTML.Parser
import Text.Printf (printf)
import Text.URI (URI)
import Text.URI.Lens
import Text.URI.QQ hiding (uri)
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding
import qualified Data.UUID.V4 as UUID
import qualified Text.URI as URI
data TargetSpec = TargetSpec
@ -25,12 +32,36 @@ data TargetSpec = TargetSpec
}
deriving (Show)
getUrl :: Day -> IO Text
getUrl date = do
-- | Download a URL and save it to a directory.
-- Returns the path of the downloaded file relative to the directory.
downloadInto
:: FilePath -- ^ Directory where to store the file.
-> Text -- ^ The URL to download.
-> IO FilePath
downloadInto downloadDir textUrl = download >>= save
where
download :: IO LBS.ByteString
download = do
uri <- URI.mkURI textUrl
let (url, opts) = fromJust $ useHttpsURI uri
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse opts
guard $ responseStatusCode res == 200
pure $ responseBody res
save :: LBS.ByteString -> IO FilePath
save contents = do
let suffix = takeExtension $ Text.unpack textUrl
uuid <- UUID.nextRandom
let fullPath = downloadDir </> show uuid <.> suffix
createDirectoryIfMissing True downloadDir
LBS.writeFile fullPath contents
pure $ makeRelative downloadDir fullPath
getUrls :: Day -> IO [Text]
getUrls date = do
let (y, m, _) = toGregorian date
candidates <- getMonthOverview y m
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
candidates <- filter ((date ==) . fst) <$> getMonthOverview y m
catMaybes <$> mapM (getShowpdf . snd) candidates
getShowpdf :: TargetSpec -> IO (Maybe Text)
getShowpdf (TargetSpec y m d) = do
@ -59,12 +90,14 @@ getShowpdf (TargetSpec y m d) = do
httpEquiv <- getAttr "http-equiv" attrs
guard $ httpEquiv == "refresh"
content <- getAttr "content" attrs
Just $ snd $ Text.breakOnEnd "URL=" content
Just $ escapeBrackets $ snd $ Text.breakOnEnd "URL=" content
getRefreshTarget _ =
Nothing
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
getMonthOverview year month = do
res <- runReq defaultHttpConfig $ req
@ -93,16 +126,16 @@ getMonthOverview year month = do
extractDay uri =
fromGregorian y m d
where
y = read $ maybe "0" Text.unpack $ getParam "year" uri
m = read $ maybe "0" Text.unpack $ getParam "month" uri
d = read $ maybe "0" (takeWhile isDigit . Text.unpack) $ getParam "day" uri
y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
extractTarget uri =
TargetSpec y m d
where
y = fromMaybe "" $ getParam "year" uri
m = fromMaybe "" $ getParam "month" uri
d = fromMaybe "" $ getParam "day" uri
y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText)
m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText)
d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
decodeHtmlEntities = Text.pack . q0 . Text.unpack
where
@ -118,16 +151,14 @@ getMonthOverview year month = do
-- Or if they end unexpectedly.
| otherwise = c : q0 r
isShowpdf :: URI -> Bool
isShowpdf uri =
getPath uri == ["show", "fz.cgi"] && getParam "cmd" uri == Just "showpdf"
getPath :: URI -> [Text]
getPath uri = case URI.uriPath uri of
Nothing -> []
Just (_, rts) -> map URI.unRText $ NonEmpty.toList rts
getParam :: Text -> URI -> Maybe Text
getParam param uri = listToMaybe [value | (key, value) <- map go (URI.uriQuery uri), key == param]
path == ["show", "fz.cgi"] && cmd == Just "showpdf"
where
go (URI.QueryFlag k) = (URI.unRText k, "")
go (URI.QueryParam k v) = (URI.unRText k, URI.unRText v)
path = uri ^.. uriPath . each . unRText
cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text
getParam param fallback uri =
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText)

22
src/Yore/Index.hs Normal file
View File

@ -0,0 +1,22 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedRecordDot #-}
module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO)
import Control.Monad (forM_)
import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO ()
createEntries date urls conn = do
confidently $ Opium.execute_ "BEGIN" conn
dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.id url conn
confidently $ Opium.execute_ "COMMIT" conn
confidently :: Exception e => IO (Either e a) -> IO a
confidently action = action >>= either throwIO pure

View File

@ -1,15 +1,58 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Yore.Repl (connect, exec) where
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
import Control.Exception (bracket, throwIO)
import Control.Monad (forM)
import Data.Text (Text)
import Data.Time (Day)
import Data.Time.LocalTime (getZonedTime, localDay, zonedTimeToLocalTime)
import Text.Printf (printf)
import qualified Database.PostgreSQL.Opium as Opium
import Yore.DB (DayIndex)
import Yore.Download (downloadInto, getUrls)
import Yore.Index (createEntries)
import Yore.Time (addYears)
import qualified Yore.DB as DB
connect :: IO Opium.Connection
connect =
Opium.connect "host=localhost" >>= \case
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
Left e -> error $ show e
Right c -> pure c
exec :: (Opium.Connection -> IO a) -> IO a
exec f = f =<< connect
exec = bracket connect Opium.close
getToday :: IO [Text]
getToday = do
now <- getZonedTime
print now
let currentDay = localDay $ zonedTimeToLocalTime now
case addYears (-100) currentDay of
Just then_ ->
getUrls then_
Nothing ->
pure []
indexDay :: Day -> IO ()
indexDay date = exec $ \conn -> do
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
printf "Checking whether %s already exists...\n" (show date)
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
case mbDi of
Just (_ :: DayIndex) ->
putStrLn "Nothing to do."
Nothing -> do
putStrLn "Retrieving URLs..."
urls <- getUrls date
paths <- forM urls $ \url -> do
printf "Downloading %s...\n" url
downloadInto "./download" url
putStrLn "Creating DB entries..."
createEntries date paths conn
putStrLn "Done."

16
src/Yore/Time.hs Normal file
View File

@ -0,0 +1,16 @@
module Yore.Time (addYears) where
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
addYears :: Integer -> Day -> Maybe Day
addYears yearOffset date
| isFebruary29th && not (isLeapYear year') =
Nothing
| otherwise =
Just $ addGregorianYearsClip yearOffset date
where
(year, month, day) = toGregorian date
year' = year + yearOffset
isFebruary29th = month == 2 && day == 29

View File

@ -32,19 +32,25 @@ library
import: shared-options
exposed-modules:
Yore.DB
, Yore.Repl
, Yore.Download
, Yore.Index
, Yore.Repl
, Yore.Time
hs-source-dirs:
src
build-depends:
base >=4.18 && <5
, bytestring
, directory
, filepath
, html-parse
, lens
, modern-uri
, opium
, req
, text
, time
, uuid
executable yore
import: shared-options