yore/app/Main.hs
2025-09-22 16:07:42 +02:00

287 lines
9.0 KiB
Haskell

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE QuantifiedConstraints #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Main (main) where
import Control.Concurrent (forkIO)
import Control.Monad (forM, forM_)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
import Data.Bifunctor (Bifunctor (..))
import Data.Proxy (Proxy (..))
import Data.Text (Text)
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian)
import GHC.Generics (Generic)
import Network.HTTP.Media ((//), (/:))
import Network.HTTP.Types (Status (..), mkStatus, status200)
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
import Servant
( Accept (..)
, Capture
, Get
, Handler
, MimeRender (..)
, Raw
, ServerError (..)
, ServerT
, Tagged (..)
, err404
, err500
, hoistServer
, runHandler
, serve
, (:<|>) (..)
, (:>)
, pattern MkHandler
)
import System.Exit (exitFailure)
import System.FilePath ((</>))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.Blaze.Html5 ((!))
import Text.Printf (printf)
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding
import qualified Database.PostgreSQL.Opium as Opium
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Envy (type (=@!), type (=@@), type (?))
import Yore.DB (DB, DayFile (..))
import Yore.Download (downloadInto)
import Yore.Error (Error (..))
import Yore.Schedule (schedule)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
import qualified Envy
import qualified Yore.DB as DB
import qualified Yore.Log as Log
newtype ConnectionString = ConnectionString String
deriving (Show)
instance Envy.ReadEnvVar ConnectionString where
readEnvVar = fmap ConnectionString . Envy.readEnvVar
data ConfigT f = Config
{ yorePort :: f =@@ Int ? 3000
, yoreDownloadDir :: f =@@ FilePath ? "./download"
, yoreDb :: f =@! Text
}
deriving (Generic)
type Config = ConfigT Envy.Value
deriving instance Show Config
main :: IO ()
main = do
cfg <-
Envy.load @ConfigT >>= \case
Left err -> do
Log.error $ printf "failed to read config: %s" err
exitFailure
Right c ->
pure c
db <- DB.initDB cfg.yoreDb
_ <- runExceptT $ doIndex cfg db
_ <- forkIO $ runIndexer cfg db
runServer cfg db
runIndexer :: Config -> DB -> IO ()
runIndexer cfg db =
schedule shouldRunAt $ runExceptT $ doIndex cfg db
where
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
TimeOfDay 3 0 _ -> True
_ -> False
doIndex :: Config -> DB -> ExceptT Error IO ()
doIndex cfg db =
catchE
( DB.withTransaction db $ \conn -> do
forM_ [0, 1, 2] $ \i -> do
dayThen <- getTodayWithOffset (-100) i
indexDay cfg dayThen conn
)
(Log.error . show)
runServer :: Config -> DB -> IO ()
runServer cfg db =
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
where
settings =
foldr
($)
Warp.defaultSettings
[ Warp.setLogger logger
, Warp.setPort cfg.yorePort
, Warp.setBeforeMainLoop $ Log.info $ printf "listening on port %d" cfg.yorePort
, Warp.setOnException onException
]
logger req status _ = do
Log.info $
printf
"%d %s"
(statusCode status)
(requestLine req)
onException mbReq ex = do
Log.error $ printf "unhandled exception%s: %s%s" (maybe "" ((" in " ++) . requestLine) mbReq) (show ex)
requestLine :: Request -> String
requestLine req =
printf
"%s %s"
(BS8.unpack $ requestMethod req)
(BS8.unpack $ rawPathInfo req <> rawQueryString req)
nt :: ExceptT Error IO a -> Handler a
nt action = MkHandler $ do
res <- runExceptT action
case res of
Left err -> do
Log.error $ show err
pure $ Left $ toServerError err
Right x ->
pure $ Right x
type API =
Get '[HTML] RootModel
:<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel
:<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> Raw
toServerError :: Error -> ServerError
toServerError = \case
ConnectionError msg ->
err500 {errBody = encodeUtf8LBS $ "database connection failed:\n" ++ show msg}
DBError (Opium.ErrorNotExactlyOneRow 0) ->
err404 {errBody = "db error: could not find record"}
DBError msg ->
err500 {errBody = encodeUtf8LBS $ "db error: " ++ show msg}
GenericError msg ->
err500 {errBody = encodeUtf8LBS $ "generic error:\n" ++ show msg}
encodeUtf8LBS :: String -> LBS.ByteString
encodeUtf8LBS = LBS.fromStrict . Encoding.encodeUtf8 . Text.pack
handlerToRaw :: ExceptT Error IO Response -> Tagged t Application
handlerToRaw handler = Tagged $ \_ respond -> do
r <- runHandler $ nt handler
case r of
Left e ->
respond $ responseLBS (mkStatus e.errHTTPCode $ BS8.pack e.errReasonPhrase) e.errHeaders e.errBody
Right response ->
respond response
server :: Config -> DB -> ServerT API (ExceptT Error IO)
server cfg db = rootR :<|> todayR :<|> apiTodayR
where
rootR = todayR 0
todayR issue = do
dateThen <- ExceptT get100YearsAgo
count <- DB.withConn db $ DB.getNumberOfIssues dateThen
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
pure $ RootModel dateThen dayFile issue count
apiTodayR issue = handlerToRaw $ do
dateThen <- ExceptT get100YearsAgo
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
pure $
responseFile
status200
[ ("content-type", "application/pdf")
, ("cache-control", BS8.pack $ printf "public, max-age=%d" secondsUntilMidnight)
]
fullPath
Nothing
data RootModel = RootModel Day DB.DayFile Int Int
instance MimeRender HTML RootModel where
mimeRender _ (RootModel dateThen dayFile issue count) = renderHtml $ do
H.docTypeHtml $ do
H.head $ do
H.title $ H.text $ Text.pack $ printf "FZ ⊛ %02d.%02d.%04d ⊛ %s" d m y dayFile.label
H.style
"body { margin: 0; font-family: Helvetica, sans-serif; } .layout { display: flex; flex-direction: column; width: 100vw; height: 100vh; } .topbar { user-select: none; text-align: center; padding: .5em; } .content { flex: 1; } iframe { border: 0; }"
H.body $ do
H.div ! A.class_ "layout" $ do
H.div ! A.class_ "topbar" $ do
H.text $ Text.pack $ printf "Freiburger Zeitung ⊛ %02d.%02d.%04d" d m y
H.br
buildLink "" (issue - 1)
H.text $ " " <> dayFile.label <> " "
buildLink "" (issue + 1)
H.iframe ! A.src (H.toValue url) ! A.class_ "content" $ mempty
where
url :: String
url = printf "/api/today/issue/%d/fz.pdf" issue
(y, m, d) = toGregorian dateThen
buildLink label issue'
| issue' == 0 =
H.a ! A.href "/" $ label
| issue' > 0 && issue' < count =
H.a ! A.href (H.toValue (printf "/today/issue/%d" issue' :: String)) $ label
| otherwise =
H.span ! A.style "color: grey;" $ label
-- Utils
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
get100YearsAgo :: IO (Either Error Day)
get100YearsAgo = runExceptT $ getTodayWithOffset (-100) 0
getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day
getTodayWithOffset yearOffset dayOffset =
ExceptT $
first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>))
. addYears yearOffset
. addDays dayOffset
<$> getCurrentDay
indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
indexDay cfg dayThen conn = do
-- Transaction-level lock released automatically after transaction
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
mbDi <- DB.readDayIndex dayThen conn
case mbDi of
Just _ ->
Log.info $ printf "index for %s already exists." (show dayThen)
Nothing -> do
Log.info $ printf "scraping issues for %s" (show dayThen)
issues <- liftIO $ getIssuesByDay dayThen
paths <- forM issues $ \issue -> do
Log.info $ printf "downloading %s" issue.url
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
pure (issue.label, path)
Log.info "creating DB entries"
dayIndex <- DB.createDayIndex dayThen conn
forM_ paths $ \(text, url) ->
DB.createDayFile dayIndex.day_index_id text url conn
Log.info "done."