yore/app/Main.hs
Paul Brinkmeier c5c11ed181
All checks were successful
deliver / deliver (push) Successful in 1m36s
Don't buffer stdout
2025-10-07 12:53:14 +02:00

283 lines
9.1 KiB
Haskell

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE QuantifiedConstraints #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Main (main) where
import Control.Concurrent (forkIO)
import Control.Monad (forM, forM_)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
import Data.Bifunctor (Bifunctor (..))
import Data.Proxy (Proxy (..))
import Data.Text (Text)
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, nominalDiffTimeToSeconds, toGregorian)
import GHC.Generics (Generic)
import Network.HTTP.Types (Status (..))
import Network.Wai (Middleware, Request (..), responseStatus)
import Servant
( Capture
, Get
, Handler
, MimeRender (..)
, ServerError (..)
, ServerT
, err404
, err500
, hoistServer
, serve
, (:<|>) (..)
, (:>)
, pattern MkHandler
)
import System.Exit (exitFailure)
import System.FilePath ((</>))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.Blaze.Html5 ((!))
import Text.Printf (printf)
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding
import qualified Database.PostgreSQL.Opium as Opium
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Envy (type (=@!), type (=@@), type (?))
import Yore.DB (DB, DayFile (..))
import Yore.Download (downloadInto)
import Yore.Error (Error (..))
import Yore.Schedule (schedule)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Servant (GetSendfile, HTML, Sendfile (..))
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
import Data.Fixed (Pico)
import Data.Time.Clock.POSIX (getPOSIXTime)
import qualified Envy
import qualified Yore.DB as DB
import qualified Yore.Log as Log
import System.IO (hSetBuffering, BufferMode (..), stdout)
data ConfigT f = Config
{ yorePort :: f =@@ Int ? 3000
, yoreDownloadDir :: f =@@ FilePath ? "./download"
, yoreDb :: f =@! Text
}
deriving (Generic)
type Config = ConfigT Envy.Value
deriving instance Show Config
main :: IO ()
main = do
hSetBuffering stdout NoBuffering
cfg <-
Envy.load @ConfigT >>= \case
Left errs -> do
forM_ errs $ Log.error . printf "failed to read config: %s"
exitFailure
Right c ->
pure c
db <- DB.initDB cfg.yoreDb
_ <- runExceptT $ doIndex cfg db
_ <- forkIO $ runIndexer cfg db
runServer cfg db
runIndexer :: Config -> DB -> IO ()
runIndexer cfg db =
schedule shouldRunAt $ runExceptT $ doIndex cfg db
where
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
TimeOfDay 3 0 _ -> True
_ -> False
doIndex :: Config -> DB -> ExceptT Error IO ()
doIndex cfg db =
catchE
( DB.withTransaction db $ \conn -> do
forM_ [0, 1, 2] $ \i -> do
dayThen <- getTodayWithOffset (-100) i
indexDay cfg dayThen conn
)
(Log.error . show)
runServer :: Config -> DB -> IO ()
runServer cfg db =
Warp.runSettings settings $ loggerMiddleware $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
where
settings =
foldr
($)
Warp.defaultSettings
[ Warp.setPort cfg.yorePort
, Warp.setBeforeMainLoop $ Log.info $ printf "listening on port %d" cfg.yorePort
, Warp.setOnException onException
]
logger :: Request -> Status -> Pico -> IO ()
logger req status s = do
Log.info $ printf "%d %s %ss" (statusCode status) (requestLine req) (formatMetric s)
loggerMiddleware :: Middleware
loggerMiddleware app req respond = do
begin <- getPOSIXTime
app req $ \res -> do
rr <- respond res
end <- getPOSIXTime
logger req (responseStatus res) (nominalDiffTimeToSeconds (end - begin))
pure rr
onException mbReq ex = do
Log.error $ printf "unhandled exception%s: %s%s" (maybe "" ((" in " ++) . requestLine) mbReq) (show ex)
requestLine :: Request -> String
requestLine req =
printf
"%s %s"
(BS8.unpack $ requestMethod req)
(BS8.unpack $ rawPathInfo req <> rawQueryString req)
formatMetric :: Pico -> String
formatMetric x = go (-9) prefixes
where
prefixes = ["p", "n", "u", "m", "", "k", "M", "G"]
go :: Int -> [String] -> String
go _ [] = show x
go e (p : ps)
| x < 10 ^^ e =
show @Int (floor $ x * 10 ^^ (-e + 3)) ++ p
| otherwise = go (e + 3) ps
nt :: ExceptT Error IO a -> Handler a
nt action = MkHandler $ do
res <- runExceptT action
case res of
Left err -> do
Log.error $ show err
pure $ Left $ toServerError err
Right x ->
pure $ Right x
toServerError :: Error -> ServerError
toServerError = \case
ConnectionError msg ->
err500 {errBody = encodeUtf8LBS $ "database connection failed:\n" ++ show msg}
DBError (Opium.ErrorNotExactlyOneRow 0) ->
err404 {errBody = "db error: could not find record"}
DBError msg ->
err500 {errBody = encodeUtf8LBS $ "db error: " ++ show msg}
GenericError msg ->
err500 {errBody = encodeUtf8LBS $ "generic error:\n" ++ show msg}
encodeUtf8LBS :: String -> LBS.ByteString
encodeUtf8LBS = LBS.fromStrict . Encoding.encodeUtf8 . Text.pack
type API =
Get '[HTML] RootModel
:<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel
:<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> GetSendfile
server :: Config -> DB -> ServerT API (ExceptT Error IO)
server cfg db = rootR :<|> todayR :<|> apiTodayR
where
rootR = todayR 0
todayR issue = do
dateThen <- getTodayWithOffset (-100) 0
count <- DB.withConn db $ DB.getNumberOfIssues dateThen
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
pure $ RootModel dateThen dayFile issue count
apiTodayR issue = do
dateThen <- getTodayWithOffset (-100) 0
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
pure
Sendfile
{ headers =
[ ("content-type", "application/pdf")
, ("cache-control", BS8.pack $ printf "public, max-age=%d" secondsUntilMidnight)
]
, path =
fullPath
}
data RootModel = RootModel Day DB.DayFile Int Int
instance MimeRender HTML RootModel where
mimeRender _ (RootModel dateThen dayFile issue count) = renderHtml $ do
H.docTypeHtml $ do
H.head $ do
H.title $ H.text $ Text.pack $ printf "FZ ⊛ %02d.%02d.%04d ⊛ %s" d m y dayFile.label
H.style
"body { margin: 0; font-family: Helvetica, sans-serif; } .layout { display: flex; flex-direction: column; width: 100vw; height: 100vh; } .topbar { user-select: none; text-align: center; padding: .5em; } .content { flex: 1; } iframe { border: 0; }"
H.body $ do
H.div ! A.class_ "layout" $ do
H.div ! A.class_ "topbar" $ do
H.text $ Text.pack $ printf "Freiburger Zeitung ⊛ %02d.%02d.%04d" d m y
H.br
buildLink "" (issue - 1)
H.text $ " " <> dayFile.label <> " "
buildLink "" (issue + 1)
H.iframe ! A.src (H.toValue url) ! A.class_ "content" $ mempty
where
url :: String
url = printf "/api/today/issue/%d/fz.pdf" issue
(y, m, d) = toGregorian dateThen
buildLink label issue'
| issue' == 0 =
H.a ! A.href "/" $ label
| issue' > 0 && issue' < count =
H.a ! A.href (H.toValue (printf "/today/issue/%d" issue' :: String)) $ label
| otherwise =
H.span ! A.style "color: grey;" $ label
-- Utils
getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day
getTodayWithOffset yearOffset dayOffset =
ExceptT $
first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>))
. addYears yearOffset
. addDays dayOffset
<$> getCurrentDay
indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
indexDay cfg dayThen conn = do
-- Transaction-level lock released automatically after transaction
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
mbDi <- DB.readDayIndex dayThen conn
case mbDi of
Just _ ->
Log.info $ printf "index for %s already exists." (show dayThen)
Nothing -> do
Log.info $ printf "scraping issues for %s" (show dayThen)
issues <- liftIO $ getIssuesByDay dayThen
paths <- forM issues $ \issue -> do
Log.info $ printf "downloading %s" issue.url
path <- downloadInto cfg.yoreDownloadDir issue.url
pure (issue.label, path)
Log.info "creating DB entries"
dayIndex <- DB.createDayIndex dayThen conn
forM_ paths $ \(text, url) ->
DB.createDayFile dayIndex.day_index_id text url conn
Log.info "done."