290 lines
9.2 KiB
Haskell
290 lines
9.2 KiB
Haskell
{-# LANGUAGE DataKinds #-}
|
|
{-# LANGUAGE ExplicitNamespaces #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
{-# LANGUAGE OverloadedRecordDot #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE PatternSynonyms #-}
|
|
{-# LANGUAGE QuantifiedConstraints #-}
|
|
{-# LANGUAGE StandaloneDeriving #-}
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
|
|
module Main (main) where
|
|
|
|
import Control.Concurrent (forkIO)
|
|
import Control.Monad (forM, forM_)
|
|
import Control.Monad.IO.Class (liftIO)
|
|
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
|
|
import Data.Bifunctor (Bifunctor (..))
|
|
import Data.Proxy (Proxy (..))
|
|
import Data.Text (Text)
|
|
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian)
|
|
import GHC.Generics (Generic)
|
|
import Network.HTTP.Media ((//), (/:))
|
|
import Network.HTTP.Types (Status (..), mkStatus, status200)
|
|
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
|
|
import Servant
|
|
( Accept (..)
|
|
, Capture
|
|
, Get
|
|
, Handler
|
|
, MimeRender (..)
|
|
, Raw
|
|
, ServerError (..)
|
|
, ServerT
|
|
, Tagged (..)
|
|
, err404
|
|
, err500
|
|
, hoistServer
|
|
, runHandler
|
|
, serve
|
|
, (:<|>) (..)
|
|
, (:>)
|
|
, pattern MkHandler
|
|
)
|
|
import System.Exit (exitFailure)
|
|
import System.FilePath ((</>))
|
|
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
|
|
import Text.Blaze.Html5 ((!))
|
|
import Text.Printf (printf)
|
|
|
|
import qualified Data.ByteString.Char8 as BS8
|
|
import qualified Data.ByteString.Lazy as LBS
|
|
import qualified Data.Text as Text
|
|
import qualified Data.Text.Encoding as Encoding
|
|
import qualified Database.PostgreSQL.Opium as Opium
|
|
import qualified Network.Wai.Handler.Warp as Warp
|
|
import qualified Text.Blaze.Html5 as H
|
|
import qualified Text.Blaze.Html5.Attributes as A
|
|
|
|
import Envy (type (=@!), type (=@@), type (?))
|
|
import Yore.DB (DB, DayFile (..))
|
|
import Yore.Download (downloadInto)
|
|
import Yore.Error (Error (..))
|
|
import Yore.Schedule (schedule)
|
|
import Yore.Scrape (Issue (..), getIssuesByDay)
|
|
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
|
|
|
|
import qualified Envy
|
|
import qualified Yore.DB as DB
|
|
import qualified Yore.Log as Log
|
|
|
|
newtype ConnectionString = ConnectionString String
|
|
deriving (Show)
|
|
|
|
instance Envy.ReadEnvVar ConnectionString where
|
|
readEnvVar = fmap ConnectionString . Envy.readEnvVar
|
|
|
|
data ConfigT f = Config
|
|
{ yorePort :: f =@@ Int ? 3000
|
|
, yoreDownloadDir :: f =@@ FilePath ? "./download"
|
|
, yoreDb :: f =@! Text
|
|
}
|
|
deriving (Generic)
|
|
|
|
type Config = ConfigT Envy.Value
|
|
|
|
deriving instance Show Config
|
|
|
|
main :: IO ()
|
|
main = do
|
|
cfg <-
|
|
Envy.load @ConfigT >>= \case
|
|
Left err -> do
|
|
Log.error $ printf "failed to read config: %s" err
|
|
exitFailure
|
|
Right c ->
|
|
pure c
|
|
|
|
db <- DB.initDB cfg.yoreDb
|
|
|
|
_ <- runExceptT $ doIndex cfg db
|
|
|
|
_ <- forkIO $ runIndexer cfg db
|
|
runServer cfg db
|
|
|
|
runIndexer :: Config -> DB -> IO ()
|
|
runIndexer cfg db =
|
|
schedule shouldRunAt $ runExceptT $ doIndex cfg db
|
|
where
|
|
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
|
|
TimeOfDay 3 0 _ -> True
|
|
_ -> False
|
|
|
|
doIndex :: Config -> DB -> ExceptT Error IO ()
|
|
doIndex cfg db =
|
|
catchE
|
|
( DB.withTransaction db $ \conn -> do
|
|
dayThen <- getTodayWithOffset (-100) 0
|
|
indexDay cfg dayThen conn
|
|
dayThen2 <- getTodayWithOffset (-100) 1
|
|
indexDay cfg dayThen2 conn
|
|
dayThen3 <- getTodayWithOffset (-100) 2
|
|
indexDay cfg dayThen3 conn
|
|
)
|
|
(Log.error . show)
|
|
|
|
runServer :: Config -> DB -> IO ()
|
|
runServer cfg db =
|
|
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
|
|
where
|
|
settings =
|
|
foldr
|
|
($)
|
|
Warp.defaultSettings
|
|
[ Warp.setLogger logger
|
|
, Warp.setPort cfg.yorePort
|
|
, Warp.setBeforeMainLoop $ Log.info $ printf "listening on port %d" cfg.yorePort
|
|
, Warp.setOnException onException
|
|
]
|
|
|
|
logger req status _ = do
|
|
Log.info $
|
|
printf
|
|
"%d %s"
|
|
(statusCode status)
|
|
(requestLine req)
|
|
|
|
requestLine :: Request -> String
|
|
requestLine req =
|
|
printf
|
|
"%s %s"
|
|
(BS8.unpack $ requestMethod req)
|
|
(BS8.unpack $ rawPathInfo req <> rawQueryString req)
|
|
|
|
onException mbReq ex = do
|
|
Log.error $ printf "unhandled exception%s: %s%s" (maybe "" ((" in " ++) . requestLine) mbReq) (show ex)
|
|
|
|
nt :: ExceptT Error IO a -> Handler a
|
|
nt action = MkHandler $ do
|
|
res <- runExceptT action
|
|
case res of
|
|
Left err -> do
|
|
Log.error $ show err
|
|
pure $ Left $ toServerError err
|
|
Right x ->
|
|
pure $ Right x
|
|
|
|
type API =
|
|
Get '[HTML] RootModel
|
|
:<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel
|
|
:<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> Raw
|
|
|
|
toServerError :: Error -> ServerError
|
|
toServerError = \case
|
|
ConnectionError msg ->
|
|
err500 {errBody = encodeUtf8LBS $ "database connection failed:\n" ++ show msg}
|
|
DBError (Opium.ErrorNotExactlyOneRow 0) ->
|
|
err404 {errBody = "db error: could not find record"}
|
|
DBError msg ->
|
|
err500 {errBody = encodeUtf8LBS $ "db error: " ++ show msg}
|
|
GenericError msg ->
|
|
err500 {errBody = encodeUtf8LBS $ "generic error:\n" ++ show msg}
|
|
|
|
encodeUtf8LBS :: String -> LBS.ByteString
|
|
encodeUtf8LBS = LBS.fromStrict . Encoding.encodeUtf8 . Text.pack
|
|
|
|
handlerToRaw :: ExceptT Error IO Response -> Tagged t Application
|
|
handlerToRaw handler = Tagged $ \_ respond -> do
|
|
r <- runHandler $ nt handler
|
|
case r of
|
|
Left e ->
|
|
respond $ responseLBS (mkStatus e.errHTTPCode $ BS8.pack e.errReasonPhrase) e.errHeaders e.errBody
|
|
Right response ->
|
|
respond response
|
|
|
|
server :: Config -> DB -> ServerT API (ExceptT Error IO)
|
|
server cfg db = rootR :<|> todayR :<|> apiTodayR
|
|
where
|
|
rootR = todayR 0
|
|
|
|
todayR issue = do
|
|
dateThen <- ExceptT get100YearsAgo
|
|
count <- DB.withConn db $ DB.getNumberOfIssues dateThen
|
|
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
|
pure $ RootModel dateThen dayFile issue count
|
|
|
|
apiTodayR issue = handlerToRaw $ do
|
|
dateThen <- ExceptT get100YearsAgo
|
|
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
|
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
|
|
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
|
|
pure $
|
|
responseFile
|
|
status200
|
|
[ ("content-type", "application/pdf")
|
|
, ("cache-control", BS8.pack $ printf "public, max-age=%d" secondsUntilMidnight)
|
|
]
|
|
fullPath
|
|
Nothing
|
|
|
|
data RootModel = RootModel Day DB.DayFile Int Int
|
|
|
|
instance MimeRender HTML RootModel where
|
|
mimeRender _ (RootModel dateThen dayFile issue count) = renderHtml $ do
|
|
H.docTypeHtml $ do
|
|
H.head $ do
|
|
H.title $ H.text $ Text.pack $ printf "FZ ⊛ %02d.%02d.%04d ⊛ %s" d m y dayFile.label
|
|
H.style
|
|
"body { margin: 0; font-family: Helvetica, sans-serif; } .layout { display: flex; flex-direction: column; width: 100vw; height: 100vh; } .topbar { user-select: none; text-align: center; padding: .5em; } .content { flex: 1; } iframe { border: 0; }"
|
|
H.body $ do
|
|
H.div ! A.class_ "layout" $ do
|
|
H.div ! A.class_ "topbar" $ do
|
|
H.text $ Text.pack $ printf "Freiburger Zeitung ⊛ %02d.%02d.%04d" d m y
|
|
H.br
|
|
buildLink "⟽" (issue - 1)
|
|
H.text $ " " <> dayFile.label <> " "
|
|
buildLink "⟾" (issue + 1)
|
|
H.iframe ! A.src (H.toValue url) ! A.class_ "content" $ mempty
|
|
where
|
|
url :: String
|
|
url = printf "/api/today/issue/%d/fz.pdf" issue
|
|
|
|
(y, m, d) = toGregorian dateThen
|
|
|
|
buildLink label issue'
|
|
| issue' == 0 =
|
|
H.a ! A.href "/" $ label
|
|
| issue' > 0 && issue' < count =
|
|
H.a ! A.href (H.toValue (printf "/today/issue/%d" issue' :: String)) $ label
|
|
| otherwise =
|
|
H.span ! A.style "color: grey;" $ label
|
|
|
|
-- Utils
|
|
|
|
data HTML
|
|
|
|
instance Accept HTML where
|
|
contentType _ = "text" // "html" /: ("charset", "utf-8")
|
|
|
|
get100YearsAgo :: IO (Either Error Day)
|
|
get100YearsAgo = runExceptT $ getTodayWithOffset (-100) 0
|
|
|
|
getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day
|
|
getTodayWithOffset yearOffset dayOffset =
|
|
ExceptT $
|
|
first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>))
|
|
. addYears yearOffset
|
|
. addDays dayOffset
|
|
<$> getCurrentDay
|
|
|
|
indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
|
|
indexDay cfg dayThen conn = do
|
|
-- Transaction-level lock released automatically after transaction
|
|
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
|
|
mbDi <- DB.readDayIndex dayThen conn
|
|
case mbDi of
|
|
Just _ ->
|
|
Log.info $ printf "index for %s already exists." (show dayThen)
|
|
Nothing -> do
|
|
Log.info $ printf "scraping issues for %s" (show dayThen)
|
|
issues <- liftIO $ getIssuesByDay dayThen
|
|
paths <- forM issues $ \issue -> do
|
|
Log.info $ printf "downloading %s" issue.url
|
|
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
|
|
pure (issue.label, path)
|
|
Log.info "creating DB entries"
|
|
dayIndex <- DB.createDayIndex dayThen conn
|
|
forM_ paths $ \(text, url) ->
|
|
DB.createDayFile dayIndex.day_index_id text url conn
|
|
Log.info "done."
|