Add label to indexed files, implement rudimentary server

This commit is contained in:
Paul Brinkmeier 2025-08-02 17:28:00 +02:00
parent 70861b1339
commit 1b2445c28e
10 changed files with 211 additions and 76 deletions

View File

@ -1,12 +1,86 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where module Main (main) where
import qualified Data.Text.IO as TextIO import Control.Concurrent (getNumCapabilities)
import Control.Exception (Exception, throwIO)
import Data.Pool (Pool, defaultPoolConfig, newPool, withResource)
import Data.Proxy (Proxy (..))
import Data.Text (Text)
import Network.HTTP.Media ((//), (/:))
import Network.HTTP.Types (status200)
import Servant (Accept (..), Get, Server, serve, MimeRender (..), (:<|>) (..), (:>), Raw, Capture, Tagged (..))
import System.FilePath ((</>))
import Text.Blaze.Html5 ((!))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.Printf (printf)
import Yore.Repl (getToday) import qualified Data.Text as Text
import qualified Database.PostgreSQL.Opium as Opium
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Network.Wai (responseFile)
import Yore.DB (DayFile (..))
import Yore.Time (addYears, getCurrentDay)
import qualified Yore.DB as DB
instance Exception Opium.ConnectionError
main :: IO () main :: IO ()
main = do main = do
putStrLn "* Getting URLs for today..." capabilities <- getNumCapabilities
urls <- getToday let maxResources = capabilities
putStrLn "* Done:" connPool <- newPool $ defaultPoolConfig
mapM_ TextIO.putStrLn urls (unsafeConnect "host=localhost port=5433 user=yore-test dbname=yore-test")
Opium.close
10
maxResources
Warp.run 3000 $ serve (Proxy :: Proxy API) $ server connPool
where
unsafeConnect s = either throwIO pure =<< Opium.connect s
type API
= Get '[HTML] RootModel
:<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel
:<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> Raw
server :: Pool Opium.Connection -> Server API
server connPool = rootR :<|> todayR :<|> apiTodayR
where
rootR =
pure $ RootModel "/api/today/issue/0/fz.pdf"
todayR issue =
pure $ RootModel $ Text.pack $ printf "/api/today/issue/%d/fz.pdf" issue
apiTodayR issue =
Tagged $ \_ respond -> do
dateNow <- getCurrentDay
let Just dateThen = addYears (-100) dateNow
print dateThen
res <- withResource connPool $ DB.getDayFileByIssue dateThen issue
dayFile <- either throwIO pure res
let fullPath = "download" </> dayFile.relative_path
respond $ responseFile status200 [("content-type", "application/pdf")] fullPath Nothing
newtype RootModel = RootModel Text
instance MimeRender HTML RootModel where
mimeRender _ (RootModel url) = renderHtml $ do
H.docTypeHtml $ do
H.body ! A.style "margin: 0" $ do
H.iframe ! A.src (H.toValue url) ! A.style "width: 100vw; height: 100vh; border: 0;" $ mempty
-- Utils
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")

View File

@ -0,0 +1,5 @@
-- migrate:up
ALTER TABLE yore.day_file ADD COLUMN label text NOT NULL DEFAULT 'missing label';
-- migrate:down
ALTER TABLE yore.day_file DROP COLUMN label;

View File

@ -37,7 +37,8 @@ CREATE TABLE public.schema_migrations (
CREATE TABLE yore.day_file ( CREATE TABLE yore.day_file (
day_file_id bigint NOT NULL, day_file_id bigint NOT NULL,
day_index_id bigint, day_index_id bigint,
relative_path text NOT NULL relative_path text NOT NULL,
label text DEFAULT 'missing label'::text NOT NULL
); );
@ -131,4 +132,5 @@ ALTER TABLE ONLY yore.day_file
INSERT INTO public.schema_migrations (version) VALUES INSERT INTO public.schema_migrations (version) VALUES
('20250728054639'), ('20250728054639'),
('20250728060412'), ('20250728060412'),
('20250729075246'); ('20250729075246'),
('20250802151406');

View File

@ -65,18 +65,26 @@
opium_ = opium.packages.${system}.opium; opium_ = opium.packages.${system}.opium;
}; };
devShells.default = devShells = {
(pkgs.haskellPackages.developPackage { default =
root = ./.; (pkgs.haskellPackages.developPackage {
modifier = drv: root = ./.;
pkgs.haskell.lib.addBuildTools drv [ modifier = drv:
pkgs.cabal-install pkgs.haskell.lib.addBuildTools drv [
pkgs.haskellPackages.implicit-hie pkgs.cabal-install
pkgs.haskell-language-server pkgs.haskellPackages.implicit-hie
pkgs.postgresql pkgs.haskell-language-server
pkgs.dbmate pkgs.postgresql
]; pkgs.dbmate
overrides = addOpium; ];
}).env; overrides = addOpium;
}); }).env;
just-hls = pkgs.mkShell {
packages = [
pkgs.haskell-language-server
pkgs.gcc
];
};
};
});
} }

View File

@ -4,15 +4,18 @@
module Yore.DB module Yore.DB
( DayIndex (..) ( DayIndex (..)
, DayFile (..)
, createDayFile , createDayFile
, createDayIndex , createDayIndex
, readDayIndex , readDayIndex
, readDayPaths , readDayPaths
, getDayFileByIssue
, Table (..) , Table (..)
, getTables , getTables
) where ) where
import Data.Functor.Identity (Identity (..)) import Data.Functor.Identity (Identity (..))
import Data.Text (Text)
import Data.Time (Day) import Data.Time (Day)
import GHC.Generics (Generic) import GHC.Generics (Generic)
@ -34,7 +37,8 @@ data DayIndex = DayIndex
data DayFile = DayFile data DayFile = DayFile
{ day_file_id :: Int { day_file_id :: Int
, day_index_id :: Int , day_index_id :: Int
, relative_path :: String , label :: Text
, relative_path :: FilePath
} deriving (Show, Generic, Opium.FromRow) } deriving (Show, Generic, Opium.FromRow)
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex) createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
@ -43,10 +47,10 @@ createDayIndex date = ex runIdentity .
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *" "INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date) (Identity date)
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ()) createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId path = createDayFile dayId label path =
Opium.execute Opium.execute
"INSERT INTO yore.day_file (day_index_id, relative_path) VALUES ($1, $2)" (dayId, path) "INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)" (dayId, label, path)
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex)) readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
readDayIndex date = readDayIndex date =
@ -60,5 +64,11 @@ readDayPaths date =
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1" "SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
(Identity date) (Identity date)
getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile)
getDayFileByIssue date issue = ex runIdentity .
Opium.fetch
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
(date, issue)
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b) ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
ex = fmap . fmap ex = fmap . fmap

View File

@ -1,13 +1,14 @@
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
module Yore.Download (downloadInto, getUrls) where module Yore.Download (downloadInto, getUrls) where
import Control.Lens hiding ((<.>)) import Control.Lens hiding ((<.>))
import Control.Monad (guard) import Control.Monad (forM, guard)
import Data.Char import Data.Char
import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes) import Data.Maybe (fromJust, listToMaybe, mapMaybe, catMaybes)
import Data.Text (Text) import Data.Text (Text)
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian) import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
import Network.HTTP.Req hiding (queryParam) import Network.HTTP.Req hiding (queryParam)
@ -57,11 +58,19 @@ downloadInto downloadDir textUrl = download >>= save
LBS.writeFile fullPath contents LBS.writeFile fullPath contents
pure $ makeRelative downloadDir fullPath pure $ makeRelative downloadDir fullPath
getUrls :: Day -> IO [Text] getUrls :: Day -> IO [(Text, Text)]
getUrls date = do getUrls date = do
let (y, m, _) = toGregorian date let (y, m, _) = toGregorian date
candidates <- filter ((date ==) . fst) <$> getMonthOverview y m allTargets <- getMonthOverview y m
catMaybes <$> mapM (getShowpdf . snd) candidates
let candidates = [t | t@(tDate, _, _) <- allTargets, tDate == date]
mb <- forM candidates $ \(_, target, text) -> do
mbUrl <- getShowpdf target
case mbUrl of
Nothing -> pure Nothing
Just url -> pure $ Just (text, url)
pure $ catMaybes mb
getShowpdf :: TargetSpec -> IO (Maybe Text) getShowpdf :: TargetSpec -> IO (Maybe Text)
getShowpdf (TargetSpec y m d) = do getShowpdf (TargetSpec y m d) = do
@ -98,7 +107,7 @@ getShowpdf (TargetSpec y m d) = do
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D" escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)] getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec, Text)]
getMonthOverview year month = do getMonthOverview year month = do
res <- runReq defaultHttpConfig $ req res <- runReq defaultHttpConfig $ req
GET GET
@ -108,57 +117,72 @@ getMonthOverview year month = do
("cmd" =: ("showmonth" :: Text) ("cmd" =: ("showmonth" :: Text)
<> "year" =: (printf "%04d" year :: String) <> "year" =: (printf "%04d" year :: String)
<> "month" =: (printf "%02d" month :: String) <> "month" =: (printf "%02d" month :: String)
-- TODO: What does this actually do?
<> "project" =: ("3" :: Text)) <> "project" =: ("3" :: Text))
guard $ responseStatusCode res == 200 guard $ responseStatusCode res == 200
let contents = Encoding.decodeUtf8Lenient $ responseBody res let contents = Encoding.decodeUtf8Lenient $ responseBody res
let tokens = parseTokens contents let tokens = parseTokens contents
let links = mapMaybe (fmap decodeHtmlEntities . getHref) tokens pure $ parseTargetSpecs tokens
uris <- mapM URI.mkURI links
pure [(extractDay uri, extractTarget uri) | uri <- uris, isShowpdf uri]
parseTargetSpecs :: [Token] -> [(Day, TargetSpec, Text)]
parseTargetSpecs = q0 []
where where
getHref (TagOpen "a" attrs) = listToMaybe [href | Attr "href" href <- attrs] q0 tss ((getShowDay -> Just uri) : ts) = q1 tss "" uri ts
getHref _ = Nothing q0 tss (_ : ts) = q0 tss ts
q0 tss [] = reverse tss
extractDay uri = q1 tss text t (ContentText content : ts) = q1 tss (text <> content) t ts
fromGregorian y m d q1 tss text t (TagClose "a" : ts) = q0 ((extractDay t, t, text) : tss) ts
where q1 tss _ _ ts = q0 tss ts
y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
extractTarget uri = getShowDay :: Token -> Maybe TargetSpec
TargetSpec y m d getShowDay t = do
where href <- getAHref t
y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText) uri <- URI.mkURI $ decodeHexEntities href
m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText) extractShowday uri
d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
decodeHtmlEntities = Text.pack . q0 . Text.unpack getAHref :: Token -> Maybe Text
where getAHref (TagOpen "a" attrs) = listToMaybe [href | Attr "href" href <- attrs]
q0 "" = "" getAHref _ = Nothing
q0 ('&' : '#' : r) = q1 0 r
q0 (c : r) = c : q0 r
-- Silently swallows broken entities at the end of the string... extractDay :: TargetSpec -> Day
q1 _ "" = "" extractDay ts =
q1 acc (';' : r) = chr acc : q0 r fromGregorian y m d
q1 acc (c : r) where
| isDigit c = q1 (10 * acc + ord c - ord '0') r y = read $ Text.unpack $ tsYear ts
-- Or if they end unexpectedly. m = read $ Text.unpack $ tsMonth ts
| otherwise = c : q0 r d = read $ Text.unpack $ Text.takeWhile isDigit $ tsDay ts
extractShowday :: URI -> Maybe TargetSpec
isShowpdf :: URI -> Bool extractShowday uri
isShowpdf uri = | path == ["show", "fz.cgi"] && cmd == Just "showday" = do
path == ["show", "fz.cgi"] && cmd == Just "showpdf" y <- getParam [queryKey|year|] uri
m <- getParam [queryKey|month|] uri
d <- getParam [queryKey|day|] uri
pure $ TargetSpec y m d
| otherwise =
Nothing
where where
path = uri ^.. uriPath . each . unRText path = uri ^.. uriPath . each . unRText
cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text getParam :: URI.RText URI.QueryKey -> URI -> Maybe Text
getParam param fallback uri = getParam param uri =
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText) uri ^? uriQuery . queryParam param . unRText
decodeHexEntities :: Text -> Text
decodeHexEntities = Text.pack . q0 . Text.unpack
where
q0 "" = ""
q0 ('&' : '#' : r) = q1 0 r
q0 (c : r) = c : q0 r
-- Silently swallows broken entities at the end of the string...
q1 _ "" = ""
q1 acc (';' : r) = chr acc : q0 r
q1 acc (c : r)
| isDigit c = q1 (10 * acc + ord c - ord '0') r
-- Or if they end unexpectedly.
| otherwise = c : q0 r

View File

@ -5,17 +5,19 @@ module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO) import Control.Exception (Exception, throwIO)
import Control.Monad (forM_) import Control.Monad (forM_)
import Data.Text (Text)
import Data.Time (Day) import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB import qualified Yore.DB as DB
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO () createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
createEntries date urls conn = do createEntries date urls conn = do
confidently $ Opium.execute_ "BEGIN" conn confidently $ Opium.execute_ "BEGIN" conn
dayIndex <- confidently $ DB.createDayIndex date conn dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.day_index_id url conn forM_ urls $ \(text, url) ->
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
confidently $ Opium.execute_ "COMMIT" conn confidently $ Opium.execute_ "COMMIT" conn
confidently :: Exception e => IO (Either e a) -> IO a confidently :: Exception e => IO (Either e a) -> IO a

View File

@ -28,7 +28,7 @@ connect =
exec :: (Opium.Connection -> IO a) -> IO a exec :: (Opium.Connection -> IO a) -> IO a
exec = bracket connect Opium.close exec = bracket connect Opium.close
getToday :: IO [Text] getToday :: IO [(Text, Text)]
getToday = do getToday = do
now <- getZonedTime now <- getZonedTime
print now print now
@ -50,9 +50,9 @@ indexDay date = exec $ \conn -> do
Nothing -> do Nothing -> do
putStrLn "Retrieving URLs..." putStrLn "Retrieving URLs..."
urls <- getUrls date urls <- getUrls date
paths <- forM urls $ \url -> do paths <- forM urls $ \(text, url) -> do
printf "Downloading %s...\n" url printf "Downloading %s...\n" url
downloadInto "./download" url (text,) <$> downloadInto "./download" url
putStrLn "Creating DB entries..." putStrLn "Creating DB entries..."
createEntries date paths conn createEntries date paths conn
putStrLn "Done." putStrLn "Done."

View File

@ -1,6 +1,7 @@
module Yore.Time (addYears) where module Yore.Time (addYears, getCurrentDay) where
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian) import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
import Data.Time.LocalTime (localDay, zonedTimeToLocalTime, getZonedTime)
addYears :: Integer -> Day -> Maybe Day addYears :: Integer -> Day -> Maybe Day
addYears yearOffset date addYears yearOffset date
@ -14,3 +15,8 @@ addYears yearOffset date
year' = year + yearOffset year' = year + yearOffset
isFebruary29th = month == 2 && day == 29 isFebruary29th = month == 2 && day == 29
getCurrentDay :: IO Day
getCurrentDay = do
print =<< getZonedTime
localDay . zonedTimeToLocalTime <$> getZonedTime

View File

@ -67,9 +67,13 @@ executable yore
, base >=4.18 && <5 , base >=4.18 && <5
, blaze-html , blaze-html
, bytestring , bytestring
, http-media , filepath
, opium , opium
, http-media
, http-types
, resource-pool
, servant-server , servant-server
, text , text
, time
, wai , wai
, warp , warp