{-# LANGUAGE DataKinds #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE QuantifiedConstraints #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeSynonymInstances #-} module Main (main) where import Control.Concurrent (forkIO) import Control.Monad (forM, forM_) import Control.Monad.IO.Class (liftIO) import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT) import Data.Bifunctor (Bifunctor (..)) import Data.Proxy (Proxy (..)) import Data.Text (Text) import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, nominalDiffTimeToSeconds, toGregorian) import GHC.Generics (Generic) import Network.HTTP.Types (Status (..)) import Network.Wai (Middleware, Request (..), responseStatus) import Servant ( Capture , Get , Handler , MimeRender (..) , ServerError (..) , ServerT , err404 , err500 , hoistServer , serve , (:<|>) (..) , (:>) , pattern MkHandler ) import System.Exit (exitFailure) import System.FilePath (()) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.Blaze.Html5 ((!)) import Text.Printf (printf) import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString.Lazy as LBS import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import qualified Database.PostgreSQL.Opium as Opium import qualified Network.Wai.Handler.Warp as Warp import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as A import Envy (type (=@!), type (=@@), type (?)) import Yore.DB (DB, DayFile (..)) import Yore.Download (downloadInto) import Yore.Error (Error (..)) import Yore.Schedule (schedule) import Yore.Scrape (Issue (..), getIssuesByDay) import Yore.Servant (GetSendfile, HTML, Sendfile (..)) import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight) import Data.Fixed (Pico) import Data.Time.Clock.POSIX (getPOSIXTime) import qualified Envy import qualified Yore.DB as DB import qualified Yore.Log as Log import System.IO (hSetBuffering, BufferMode (..), stdout) data ConfigT f = Config { yorePort :: f =@@ Int ? 3000 , yoreDownloadDir :: f =@@ FilePath ? "./download" , yoreDb :: f =@! Text } deriving (Generic) type Config = ConfigT Envy.Value deriving instance Show Config main :: IO () main = do hSetBuffering stdout NoBuffering cfg <- Envy.load @ConfigT >>= \case Left errs -> do forM_ errs $ Log.error . printf "failed to read config: %s" exitFailure Right c -> pure c db <- DB.initDB cfg.yoreDb _ <- runExceptT $ doIndex cfg db _ <- forkIO $ runIndexer cfg db runServer cfg db runIndexer :: Config -> DB -> IO () runIndexer cfg db = schedule shouldRunAt $ runExceptT $ doIndex cfg db where shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of TimeOfDay 3 0 _ -> True _ -> False doIndex :: Config -> DB -> ExceptT Error IO () doIndex cfg db = catchE ( DB.withTransaction db $ \conn -> do forM_ [0, 1, 2] $ \i -> do dayThen <- getTodayWithOffset (-100) i indexDay cfg dayThen conn ) (Log.error . show) runServer :: Config -> DB -> IO () runServer cfg db = Warp.runSettings settings $ loggerMiddleware $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db where settings = foldr ($) Warp.defaultSettings [ Warp.setPort cfg.yorePort , Warp.setBeforeMainLoop $ Log.info $ printf "listening on port %d" cfg.yorePort , Warp.setOnException onException ] logger :: Request -> Status -> Pico -> IO () logger req status s = do Log.info $ printf "%d %s %ss" (statusCode status) (requestLine req) (formatMetric s) loggerMiddleware :: Middleware loggerMiddleware app req respond = do begin <- getPOSIXTime app req $ \res -> do rr <- respond res end <- getPOSIXTime logger req (responseStatus res) (nominalDiffTimeToSeconds (end - begin)) pure rr onException mbReq ex = do Log.error $ printf "unhandled exception%s: %s%s" (maybe "" ((" in " ++) . requestLine) mbReq) (show ex) requestLine :: Request -> String requestLine req = printf "%s %s" (BS8.unpack $ requestMethod req) (BS8.unpack $ rawPathInfo req <> rawQueryString req) formatMetric :: Pico -> String formatMetric x = go (-9) prefixes where prefixes = ["p", "n", "u", "m", "", "k", "M", "G"] go :: Int -> [String] -> String go _ [] = show x go e (p : ps) | x < 10 ^^ e = show @Int (floor $ x * 10 ^^ (-e + 3)) ++ p | otherwise = go (e + 3) ps nt :: ExceptT Error IO a -> Handler a nt action = MkHandler $ do res <- runExceptT action case res of Left err -> do Log.error $ show err pure $ Left $ toServerError err Right x -> pure $ Right x toServerError :: Error -> ServerError toServerError = \case ConnectionError msg -> err500 {errBody = encodeUtf8LBS $ "database connection failed:\n" ++ show msg} DBError (Opium.ErrorNotExactlyOneRow 0) -> err404 {errBody = "db error: could not find record"} DBError msg -> err500 {errBody = encodeUtf8LBS $ "db error: " ++ show msg} GenericError msg -> err500 {errBody = encodeUtf8LBS $ "generic error:\n" ++ show msg} encodeUtf8LBS :: String -> LBS.ByteString encodeUtf8LBS = LBS.fromStrict . Encoding.encodeUtf8 . Text.pack type API = Get '[HTML] RootModel :<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel :<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> GetSendfile server :: Config -> DB -> ServerT API (ExceptT Error IO) server cfg db = rootR :<|> todayR :<|> apiTodayR where rootR = todayR 0 todayR issue = do dateThen <- getTodayWithOffset (-100) 0 count <- DB.withConn db $ DB.getNumberOfIssues dateThen dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue pure $ RootModel dateThen dayFile issue count apiTodayR issue = do dateThen <- getTodayWithOffset (-100) 0 dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue let fullPath = cfg.yoreDownloadDir dayFile.relative_path secondsUntilMidnight <- liftIO getSecondsUntilMidnight pure Sendfile { headers = [ ("content-type", "application/pdf") , ("cache-control", BS8.pack $ printf "public, max-age=%d" secondsUntilMidnight) ] , path = fullPath } data RootModel = RootModel Day DB.DayFile Int Int instance MimeRender HTML RootModel where mimeRender _ (RootModel dateThen dayFile issue count) = renderHtml $ do H.docTypeHtml $ do H.head $ do H.title $ H.text $ Text.pack $ printf "FZ ⊛ %02d.%02d.%04d ⊛ %s" d m y dayFile.label H.style "body { margin: 0; font-family: Helvetica, sans-serif; } .layout { display: flex; flex-direction: column; width: 100vw; height: 100vh; } .topbar { user-select: none; text-align: center; padding: .5em; } .content { flex: 1; } iframe { border: 0; }" H.body $ do H.div ! A.class_ "layout" $ do H.div ! A.class_ "topbar" $ do H.text $ Text.pack $ printf "Freiburger Zeitung ⊛ %02d.%02d.%04d" d m y H.br buildLink "⟽" (issue - 1) H.text $ " " <> dayFile.label <> " " buildLink "⟾" (issue + 1) H.iframe ! A.src (H.toValue url) ! A.class_ "content" $ mempty where url :: String url = printf "/api/today/issue/%d/fz.pdf" issue (y, m, d) = toGregorian dateThen buildLink label issue' | issue' == 0 = H.a ! A.href "/" $ label | issue' > 0 && issue' < count = H.a ! A.href (H.toValue (printf "/today/issue/%d" issue' :: String)) $ label | otherwise = H.span ! A.style "color: grey;" $ label -- Utils getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day getTodayWithOffset yearOffset dayOffset = ExceptT $ first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>)) . addYears yearOffset . addDays dayOffset <$> getCurrentDay indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO () indexDay cfg dayThen conn = do -- Transaction-level lock released automatically after transaction DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn mbDi <- DB.readDayIndex dayThen conn case mbDi of Just _ -> Log.info $ printf "index for %s already exists." (show dayThen) Nothing -> do Log.info $ printf "scraping issues for %s" (show dayThen) issues <- liftIO $ getIssuesByDay dayThen paths <- forM issues $ \issue -> do Log.info $ printf "downloading %s" issue.url path <- downloadInto cfg.yoreDownloadDir issue.url pure (issue.label, path) Log.info "creating DB entries" dayIndex <- DB.createDayIndex dayThen conn forM_ paths $ \(text, url) -> DB.createDayFile dayIndex.day_index_id text url conn Log.info "done."