{-# LANGUAGE DataKinds #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE QuantifiedConstraints #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeSynonymInstances #-} module Main (main) where import Control.Concurrent (forkIO) import Control.Monad (forM, forM_) import Control.Monad.IO.Class (liftIO) import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT) import Data.Bifunctor (Bifunctor (..)) import Data.Proxy (Proxy (..)) import Data.Text (Text) import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian) import GHC.Generics (Generic) import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Types (Status (..), mkStatus, status200) import Network.Wai (Application, Request (..), Response, responseFile, responseLBS) import Servant ( Accept (..) , Capture , Get , Handler , MimeRender (..) , Raw , ServerError (..) , ServerT , Tagged (..) , err404 , err500 , hoistServer , runHandler , serve , (:<|>) (..) , (:>) , pattern MkHandler ) import System.Exit (exitFailure) import System.FilePath (()) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.Blaze.Html5 ((!)) import Text.Printf (printf) import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString.Lazy as LBS import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import qualified Database.PostgreSQL.Opium as Opium import qualified Network.Wai.Handler.Warp as Warp import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as A import Envy (type (=@!), type (=@@), type (?)) import Yore.DB (DB, DayFile (..)) import Yore.Download (downloadInto) import Yore.Error (Error (..)) import Yore.Schedule (schedule) import Yore.Scrape (Issue (..), getIssuesByDay) import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight) import qualified Envy import qualified Yore.DB as DB import qualified Yore.Log as Log newtype ConnectionString = ConnectionString String deriving (Show) instance Envy.ReadEnvVar ConnectionString where readEnvVar = fmap ConnectionString . Envy.readEnvVar data ConfigT f = Config { yorePort :: f =@@ Int ? 3000 , yoreDownloadDir :: f =@@ FilePath ? "./download" , yoreDb :: f =@! Text } deriving (Generic) type Config = ConfigT Envy.Value deriving instance Show Config main :: IO () main = do cfg <- Envy.load @ConfigT >>= \case Left err -> do Log.error $ printf "failed to read config: %s" err exitFailure Right c -> pure c db <- DB.initDB cfg.yoreDb _ <- runExceptT $ doIndex cfg db _ <- forkIO $ runIndexer cfg db runServer cfg db runIndexer :: Config -> DB -> IO () runIndexer cfg db = schedule shouldRunAt $ runExceptT $ doIndex cfg db where shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of TimeOfDay 3 0 _ -> True _ -> False doIndex :: Config -> DB -> ExceptT Error IO () doIndex cfg db = catchE ( DB.withTransaction db $ \conn -> do forM_ [0, 1, 2] $ \i -> do dayThen <- getTodayWithOffset (-100) i indexDay cfg dayThen conn ) (Log.error . show) runServer :: Config -> DB -> IO () runServer cfg db = Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db where settings = foldr ($) Warp.defaultSettings [ Warp.setLogger logger , Warp.setPort cfg.yorePort , Warp.setBeforeMainLoop $ Log.info $ printf "listening on port %d" cfg.yorePort , Warp.setOnException onException ] logger req status _ = do Log.info $ printf "%d %s" (statusCode status) (requestLine req) onException mbReq ex = do Log.error $ printf "unhandled exception%s: %s%s" (maybe "" ((" in " ++) . requestLine) mbReq) (show ex) requestLine :: Request -> String requestLine req = printf "%s %s" (BS8.unpack $ requestMethod req) (BS8.unpack $ rawPathInfo req <> rawQueryString req) nt :: ExceptT Error IO a -> Handler a nt action = MkHandler $ do res <- runExceptT action case res of Left err -> do Log.error $ show err pure $ Left $ toServerError err Right x -> pure $ Right x type API = Get '[HTML] RootModel :<|> "today" :> "issue" :> Capture "issue" Int :> Get '[HTML] RootModel :<|> "api" :> "today" :> "issue" :> Capture "issue" Int :> "fz.pdf" :> Raw toServerError :: Error -> ServerError toServerError = \case ConnectionError msg -> err500 {errBody = encodeUtf8LBS $ "database connection failed:\n" ++ show msg} DBError (Opium.ErrorNotExactlyOneRow 0) -> err404 {errBody = "db error: could not find record"} DBError msg -> err500 {errBody = encodeUtf8LBS $ "db error: " ++ show msg} GenericError msg -> err500 {errBody = encodeUtf8LBS $ "generic error:\n" ++ show msg} encodeUtf8LBS :: String -> LBS.ByteString encodeUtf8LBS = LBS.fromStrict . Encoding.encodeUtf8 . Text.pack handlerToRaw :: ExceptT Error IO Response -> Tagged t Application handlerToRaw handler = Tagged $ \_ respond -> do r <- runHandler $ nt handler case r of Left e -> respond $ responseLBS (mkStatus e.errHTTPCode $ BS8.pack e.errReasonPhrase) e.errHeaders e.errBody Right response -> respond response server :: Config -> DB -> ServerT API (ExceptT Error IO) server cfg db = rootR :<|> todayR :<|> apiTodayR where rootR = todayR 0 todayR issue = do dateThen <- ExceptT get100YearsAgo count <- DB.withConn db $ DB.getNumberOfIssues dateThen dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue pure $ RootModel dateThen dayFile issue count apiTodayR issue = handlerToRaw $ do dateThen <- ExceptT get100YearsAgo dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue let fullPath = cfg.yoreDownloadDir dayFile.relative_path secondsUntilMidnight <- liftIO getSecondsUntilMidnight pure $ responseFile status200 [ ("content-type", "application/pdf") , ("cache-control", BS8.pack $ printf "public, max-age=%d" secondsUntilMidnight) ] fullPath Nothing data RootModel = RootModel Day DB.DayFile Int Int instance MimeRender HTML RootModel where mimeRender _ (RootModel dateThen dayFile issue count) = renderHtml $ do H.docTypeHtml $ do H.head $ do H.title $ H.text $ Text.pack $ printf "FZ ⊛ %02d.%02d.%04d ⊛ %s" d m y dayFile.label H.style "body { margin: 0; font-family: Helvetica, sans-serif; } .layout { display: flex; flex-direction: column; width: 100vw; height: 100vh; } .topbar { user-select: none; text-align: center; padding: .5em; } .content { flex: 1; } iframe { border: 0; }" H.body $ do H.div ! A.class_ "layout" $ do H.div ! A.class_ "topbar" $ do H.text $ Text.pack $ printf "Freiburger Zeitung ⊛ %02d.%02d.%04d" d m y H.br buildLink "⟽" (issue - 1) H.text $ " " <> dayFile.label <> " " buildLink "⟾" (issue + 1) H.iframe ! A.src (H.toValue url) ! A.class_ "content" $ mempty where url :: String url = printf "/api/today/issue/%d/fz.pdf" issue (y, m, d) = toGregorian dateThen buildLink label issue' | issue' == 0 = H.a ! A.href "/" $ label | issue' > 0 && issue' < count = H.a ! A.href (H.toValue (printf "/today/issue/%d" issue' :: String)) $ label | otherwise = H.span ! A.style "color: grey;" $ label -- Utils data HTML instance Accept HTML where contentType _ = "text" // "html" /: ("charset", "utf-8") get100YearsAgo :: IO (Either Error Day) get100YearsAgo = runExceptT $ getTodayWithOffset (-100) 0 getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day getTodayWithOffset yearOffset dayOffset = ExceptT $ first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>)) . addYears yearOffset . addDays dayOffset <$> getCurrentDay indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO () indexDay cfg dayThen conn = do -- Transaction-level lock released automatically after transaction DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn mbDi <- DB.readDayIndex dayThen conn case mbDi of Just _ -> Log.info $ printf "index for %s already exists." (show dayThen) Nothing -> do Log.info $ printf "scraping issues for %s" (show dayThen) issues <- liftIO $ getIssuesByDay dayThen paths <- forM issues $ \issue -> do Log.info $ printf "downloading %s" issue.url path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url pure (issue.label, path) Log.info "creating DB entries" dayIndex <- DB.createDayIndex dayThen conn forM_ paths $ \(text, url) -> DB.createDayFile dayIndex.day_index_id text url conn Log.info "done."