Add label to indexed files, implement rudimentary server

This commit is contained in:
Paul Brinkmeier 2025-08-02 17:28:00 +02:00
parent 70861b1339
commit 1b2445c28e
10 changed files with 211 additions and 76 deletions

View File

@ -1,12 +1,86 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import qualified Data.Text.IO as TextIO
import Control.Concurrent (getNumCapabilities)
import Control.Exception (Exception, throwIO)
import Data.Pool (Pool, defaultPoolConfig, newPool, withResource)
import Data.Proxy (Proxy (..))
import Data.Text (Text)
import Network.HTTP.Media ((//), (/:))
import Network.HTTP.Types (status200)
import Servant (Accept (..), Get, Server, serve, MimeRender (..), (:<|>) (..), (:>), Raw, Capture, Tagged (..))
import System.FilePath ((</>))
import Text.Blaze.Html5 ((!))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.Printf (printf)
import Yore.Repl (getToday)
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Opium as Opium
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Network.Wai (responseFile)
import Yore.DB (DayFile (..))
import Yore.Time (addYears, getCurrentDay)
import qualified Yore.DB as DB
instance Exception Opium.ConnectionError
main :: IO ()
main = do
putStrLn "* Getting URLs for today..."
urls <- getToday
putStrLn "* Done:"
mapM_ TextIO.putStrLn urls
capabilities <- getNumCapabilities
let maxResources = capabilities
connPool <- newPool $ defaultPoolConfig
(unsafeConnect "host=localhost port=5433 user=yore-test dbname=yore-test")
Opium.close
10
maxResources
Warp.run 3000 $ serve (Proxy :: Proxy API) $ server connPool
where
unsafeConnect s = either throwIO pure =<< Opium.connect s
type API
= Get '[HTML] RootModel
:<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel
:<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> Raw
server :: Pool Opium.Connection -> Server API
server connPool = rootR :<|> todayR :<|> apiTodayR
where
rootR =
pure $ RootModel "/api/today/issue/0/fz.pdf"
todayR issue =
pure $ RootModel $ Text.pack $ printf "/api/today/issue/%d/fz.pdf" issue
apiTodayR issue =
Tagged $ \_ respond -> do
dateNow <- getCurrentDay
let Just dateThen = addYears (-100) dateNow
print dateThen
res <- withResource connPool $ DB.getDayFileByIssue dateThen issue
dayFile <- either throwIO pure res
let fullPath = "download" </> dayFile.relative_path
respond $ responseFile status200 [("content-type", "application/pdf")] fullPath Nothing
newtype RootModel = RootModel Text
instance MimeRender HTML RootModel where
mimeRender _ (RootModel url) = renderHtml $ do
H.docTypeHtml $ do
H.body ! A.style "margin: 0" $ do
H.iframe ! A.src (H.toValue url) ! A.style "width: 100vw; height: 100vh; border: 0;" $ mempty
-- Utils
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")

View File

@ -0,0 +1,5 @@
-- migrate:up
ALTER TABLE yore.day_file ADD COLUMN label text NOT NULL DEFAULT 'missing label';
-- migrate:down
ALTER TABLE yore.day_file DROP COLUMN label;

View File

@ -37,7 +37,8 @@ CREATE TABLE public.schema_migrations (
CREATE TABLE yore.day_file (
day_file_id bigint NOT NULL,
day_index_id bigint,
relative_path text NOT NULL
relative_path text NOT NULL,
label text DEFAULT 'missing label'::text NOT NULL
);
@ -131,4 +132,5 @@ ALTER TABLE ONLY yore.day_file
INSERT INTO public.schema_migrations (version) VALUES
('20250728054639'),
('20250728060412'),
('20250729075246');
('20250729075246'),
('20250802151406');

View File

@ -65,18 +65,26 @@
opium_ = opium.packages.${system}.opium;
};
devShells.default =
(pkgs.haskellPackages.developPackage {
root = ./.;
modifier = drv:
pkgs.haskell.lib.addBuildTools drv [
pkgs.cabal-install
pkgs.haskellPackages.implicit-hie
pkgs.haskell-language-server
pkgs.postgresql
pkgs.dbmate
];
overrides = addOpium;
}).env;
});
devShells = {
default =
(pkgs.haskellPackages.developPackage {
root = ./.;
modifier = drv:
pkgs.haskell.lib.addBuildTools drv [
pkgs.cabal-install
pkgs.haskellPackages.implicit-hie
pkgs.haskell-language-server
pkgs.postgresql
pkgs.dbmate
];
overrides = addOpium;
}).env;
just-hls = pkgs.mkShell {
packages = [
pkgs.haskell-language-server
pkgs.gcc
];
};
};
});
}

View File

@ -4,15 +4,18 @@
module Yore.DB
( DayIndex (..)
, DayFile (..)
, createDayFile
, createDayIndex
, readDayIndex
, readDayPaths
, getDayFileByIssue
, Table (..)
, getTables
) where
import Data.Functor.Identity (Identity (..))
import Data.Text (Text)
import Data.Time (Day)
import GHC.Generics (Generic)
@ -34,7 +37,8 @@ data DayIndex = DayIndex
data DayFile = DayFile
{ day_file_id :: Int
, day_index_id :: Int
, relative_path :: String
, label :: Text
, relative_path :: FilePath
} deriving (Show, Generic, Opium.FromRow)
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
@ -43,10 +47,10 @@ createDayIndex date = ex runIdentity .
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date)
createDayFile :: Int -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId path =
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId label path =
Opium.execute
"INSERT INTO yore.day_file (day_index_id, relative_path) VALUES ($1, $2)" (dayId, path)
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)" (dayId, label, path)
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
readDayIndex date =
@ -60,5 +64,11 @@ readDayPaths date =
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
(Identity date)
getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile)
getDayFileByIssue date issue = ex runIdentity .
Opium.fetch
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
(date, issue)
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
ex = fmap . fmap

View File

@ -1,13 +1,14 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE QuasiQuotes #-}
module Yore.Download (downloadInto, getUrls) where
import Control.Lens hiding ((<.>))
import Control.Monad (guard)
import Control.Monad (forM, guard)
import Data.Char
import Data.Maybe (fromJust, fromMaybe, listToMaybe, mapMaybe, catMaybes)
import Data.Maybe (fromJust, listToMaybe, mapMaybe, catMaybes)
import Data.Text (Text)
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
import Network.HTTP.Req hiding (queryParam)
@ -57,11 +58,19 @@ downloadInto downloadDir textUrl = download >>= save
LBS.writeFile fullPath contents
pure $ makeRelative downloadDir fullPath
getUrls :: Day -> IO [Text]
getUrls :: Day -> IO [(Text, Text)]
getUrls date = do
let (y, m, _) = toGregorian date
candidates <- filter ((date ==) . fst) <$> getMonthOverview y m
catMaybes <$> mapM (getShowpdf . snd) candidates
allTargets <- getMonthOverview y m
let candidates = [t | t@(tDate, _, _) <- allTargets, tDate == date]
mb <- forM candidates $ \(_, target, text) -> do
mbUrl <- getShowpdf target
case mbUrl of
Nothing -> pure Nothing
Just url -> pure $ Just (text, url)
pure $ catMaybes mb
getShowpdf :: TargetSpec -> IO (Maybe Text)
getShowpdf (TargetSpec y m d) = do
@ -98,7 +107,7 @@ getShowpdf (TargetSpec y m d) = do
escapeBrackets = Text.replace "[" "%5B" . Text.replace "]" "%5D"
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec, Text)]
getMonthOverview year month = do
res <- runReq defaultHttpConfig $ req
GET
@ -108,57 +117,72 @@ getMonthOverview year month = do
("cmd" =: ("showmonth" :: Text)
<> "year" =: (printf "%04d" year :: String)
<> "month" =: (printf "%02d" month :: String)
-- TODO: What does this actually do?
<> "project" =: ("3" :: Text))
guard $ responseStatusCode res == 200
let contents = Encoding.decodeUtf8Lenient $ responseBody res
let tokens = parseTokens contents
let links = mapMaybe (fmap decodeHtmlEntities . getHref) tokens
uris <- mapM URI.mkURI links
pure [(extractDay uri, extractTarget uri) | uri <- uris, isShowpdf uri]
pure $ parseTargetSpecs tokens
parseTargetSpecs :: [Token] -> [(Day, TargetSpec, Text)]
parseTargetSpecs = q0 []
where
getHref (TagOpen "a" attrs) = listToMaybe [href | Attr "href" href <- attrs]
getHref _ = Nothing
q0 tss ((getShowDay -> Just uri) : ts) = q1 tss "" uri ts
q0 tss (_ : ts) = q0 tss ts
q0 tss [] = reverse tss
extractDay uri =
fromGregorian y m d
where
y = read $ Text.unpack $ getParam [queryKey|year|] "0" uri
m = read $ Text.unpack $ getParam [queryKey|month|] "0" uri
d = read $ Text.unpack $ Text.takeWhile isDigit $ getParam [queryKey|day|] "0" uri
q1 tss text t (ContentText content : ts) = q1 tss (text <> content) t ts
q1 tss text t (TagClose "a" : ts) = q0 ((extractDay t, t, text) : tss) ts
q1 tss _ _ ts = q0 tss ts
extractTarget uri =
TargetSpec y m d
where
y = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|year|] . unRText)
m = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|month|] . unRText)
d = fromMaybe "" (uri ^? uriQuery . queryParam [queryKey|day|] . unRText)
getShowDay :: Token -> Maybe TargetSpec
getShowDay t = do
href <- getAHref t
uri <- URI.mkURI $ decodeHexEntities href
extractShowday uri
decodeHtmlEntities = Text.pack . q0 . Text.unpack
where
q0 "" = ""
q0 ('&' : '#' : r) = q1 0 r
q0 (c : r) = c : q0 r
getAHref :: Token -> Maybe Text
getAHref (TagOpen "a" attrs) = listToMaybe [href | Attr "href" href <- attrs]
getAHref _ = Nothing
-- Silently swallows broken entities at the end of the string...
q1 _ "" = ""
q1 acc (';' : r) = chr acc : q0 r
q1 acc (c : r)
| isDigit c = q1 (10 * acc + ord c - ord '0') r
-- Or if they end unexpectedly.
| otherwise = c : q0 r
extractDay :: TargetSpec -> Day
extractDay ts =
fromGregorian y m d
where
y = read $ Text.unpack $ tsYear ts
m = read $ Text.unpack $ tsMonth ts
d = read $ Text.unpack $ Text.takeWhile isDigit $ tsDay ts
isShowpdf :: URI -> Bool
isShowpdf uri =
path == ["show", "fz.cgi"] && cmd == Just "showpdf"
extractShowday :: URI -> Maybe TargetSpec
extractShowday uri
| path == ["show", "fz.cgi"] && cmd == Just "showday" = do
y <- getParam [queryKey|year|] uri
m <- getParam [queryKey|month|] uri
d <- getParam [queryKey|day|] uri
pure $ TargetSpec y m d
| otherwise =
Nothing
where
path = uri ^.. uriPath . each . unRText
cmd = uri ^? uriQuery . queryParam [queryKey|cmd|] . unRText
getParam :: URI.RText URI.QueryKey -> Text -> URI -> Text
getParam param fallback uri =
fromMaybe fallback (uri ^? uriQuery . queryParam param . unRText)
getParam :: URI.RText URI.QueryKey -> URI -> Maybe Text
getParam param uri =
uri ^? uriQuery . queryParam param . unRText
decodeHexEntities :: Text -> Text
decodeHexEntities = Text.pack . q0 . Text.unpack
where
q0 "" = ""
q0 ('&' : '#' : r) = q1 0 r
q0 (c : r) = c : q0 r
-- Silently swallows broken entities at the end of the string...
q1 _ "" = ""
q1 acc (';' : r) = chr acc : q0 r
q1 acc (c : r)
| isDigit c = q1 (10 * acc + ord c - ord '0') r
-- Or if they end unexpectedly.
| otherwise = c : q0 r

View File

@ -5,17 +5,19 @@ module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO)
import Control.Monad (forM_)
import Data.Text (Text)
import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB
createEntries :: Day -> [FilePath] -> Opium.Connection -> IO ()
createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
createEntries date urls conn = do
confidently $ Opium.execute_ "BEGIN" conn
dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \url -> confidently $ DB.createDayFile dayIndex.day_index_id url conn
forM_ urls $ \(text, url) ->
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
confidently $ Opium.execute_ "COMMIT" conn
confidently :: Exception e => IO (Either e a) -> IO a

View File

@ -28,7 +28,7 @@ connect =
exec :: (Opium.Connection -> IO a) -> IO a
exec = bracket connect Opium.close
getToday :: IO [Text]
getToday :: IO [(Text, Text)]
getToday = do
now <- getZonedTime
print now
@ -50,9 +50,9 @@ indexDay date = exec $ \conn -> do
Nothing -> do
putStrLn "Retrieving URLs..."
urls <- getUrls date
paths <- forM urls $ \url -> do
paths <- forM urls $ \(text, url) -> do
printf "Downloading %s...\n" url
downloadInto "./download" url
(text,) <$> downloadInto "./download" url
putStrLn "Creating DB entries..."
createEntries date paths conn
putStrLn "Done."

View File

@ -1,6 +1,7 @@
module Yore.Time (addYears) where
module Yore.Time (addYears, getCurrentDay) where
import Data.Time (Day, addGregorianYearsClip, isLeapYear, toGregorian)
import Data.Time.LocalTime (localDay, zonedTimeToLocalTime, getZonedTime)
addYears :: Integer -> Day -> Maybe Day
addYears yearOffset date
@ -14,3 +15,8 @@ addYears yearOffset date
year' = year + yearOffset
isFebruary29th = month == 2 && day == 29
getCurrentDay :: IO Day
getCurrentDay = do
print =<< getZonedTime
localDay . zonedTimeToLocalTime <$> getZonedTime

View File

@ -67,9 +67,13 @@ executable yore
, base >=4.18 && <5
, blaze-html
, bytestring
, http-media
, filepath
, opium
, http-media
, http-types
, resource-pool
, servant-server
, text
, time
, wai
, warp