Remove Yore.Repl and Yore.Index, only run indexer at 3 AM

This commit is contained in:
Paul Brinkmeier 2025-09-20 15:33:54 +02:00
parent 67127fd28d
commit 82d8ce63d8
6 changed files with 44 additions and 118 deletions

View File

@ -10,14 +10,18 @@
module Main (main) where module Main (main) where
import Control.Concurrent (forkIO)
import Control.Monad (forM, forM_)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT) import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
import Data.Bifunctor (Bifunctor (..))
import Data.Proxy (Proxy (..)) import Data.Proxy (Proxy (..))
import Data.Text (Text) import Data.Text (Text)
import Data.Time (Day, addDays, toGregorian) import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import Network.HTTP.Types (Status (..), mkStatus, status200) import Network.HTTP.Types (Status (..), mkStatus, status200)
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
import Servant import Servant
( Accept (..) ( Accept (..)
, Capture , Capture
@ -48,25 +52,21 @@ import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding import qualified Data.Text.Encoding as Encoding
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
import qualified Network.Wai.Handler.Warp as Warp import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A import qualified Text.Blaze.Html5.Attributes as A
import Envy (type (=@!), type (=@@), type (?)) import Envy (type (=@!), type (=@@), type (?))
import Yore.DB (DB, DayFile (..)) import Yore.DB (DB, DayFile (..))
import Yore.Download (downloadInto)
import Yore.Error (Error (..)) import Yore.Error (Error (..))
import Yore.Schedule (schedule) import Yore.Schedule (schedule)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight) import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
import Control.Concurrent (forkIO)
import Control.Monad (forM, forM_)
import Data.Bifunctor (Bifunctor (..))
import qualified Envy import qualified Envy
import qualified Yore.DB as DB import qualified Yore.DB as DB
import Yore.Download (downloadInto)
import qualified Yore.Log as Log import qualified Yore.Log as Log
import Yore.Scrape (Issue (..), getIssuesByDay)
newtype ConnectionString = ConnectionString String newtype ConnectionString = ConnectionString String
deriving (Show) deriving (Show)
@ -109,7 +109,7 @@ main = do
runServer cfg db runServer cfg db
runIndexer :: Config -> DB -> IO () runIndexer :: Config -> DB -> IO ()
runIndexer cfg db = schedule (const True) $ do runIndexer cfg db = schedule shouldRunAt $ do
_ <- _ <-
runExceptT $ runExceptT $
catchE catchE
@ -121,6 +121,10 @@ runIndexer cfg db = schedule (const True) $ do
) )
(Log.error . show) (Log.error . show)
pure () pure ()
where
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
TimeOfDay 3 0 _ -> True
_ -> False
runServer :: Config -> DB -> IO () runServer :: Config -> DB -> IO ()
runServer cfg db = runServer cfg db =
@ -198,13 +202,13 @@ server cfg db = rootR :<|> todayR :<|> apiTodayR
todayR issue = do todayR issue = do
dateThen <- ExceptT get100YearsAgo dateThen <- ExceptT get100YearsAgo
count <- DB.withConn db $ DB.lift . DB.getNumberOfIssues dateThen count <- DB.withConn db $ DB.getNumberOfIssues dateThen
dayFile <- DB.withConn db $ DB.lift . DB.getDayFileByIssue dateThen issue dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
pure $ RootModel dateThen dayFile issue count pure $ RootModel dateThen dayFile issue count
apiTodayR issue = handlerToRaw $ do apiTodayR issue = handlerToRaw $ do
dateThen <- ExceptT get100YearsAgo dateThen <- ExceptT get100YearsAgo
dayFile <- DB.withConn db $ DB.lift . DB.getDayFileByIssue dateThen issue dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
secondsUntilMidnight <- liftIO getSecondsUntilMidnight secondsUntilMidnight <- liftIO getSecondsUntilMidnight
pure $ pure $
@ -267,7 +271,7 @@ indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
indexDay cfg dayThen conn = do indexDay cfg dayThen conn = do
-- Transaction-level lock released automatically after transaction -- Transaction-level lock released automatically after transaction
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
mbDi <- DB.lift $ DB.readDayIndex dayThen conn mbDi <- DB.readDayIndex dayThen conn
case mbDi of case mbDi of
Just _ -> Just _ ->
Log.info $ printf "index for %s already exists." (show dayThen) Log.info $ printf "index for %s already exists." (show dayThen)
@ -279,7 +283,7 @@ indexDay cfg dayThen conn = do
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
pure (issue.label, path) pure (issue.label, path)
Log.info "creating DB entries" Log.info "creating DB entries"
dayIndex <- DB.lift $ DB.createDayIndex dayThen conn dayIndex <- DB.createDayIndex dayThen conn
forM_ paths $ \(text, url) -> forM_ paths $ \(text, url) ->
DB.lift $ DB.createDayFile dayIndex.day_index_id text url conn DB.createDayFile dayIndex.day_index_id text url conn
Log.info "done." Log.info "done."

View File

@ -98,44 +98,47 @@ newtype CountResult = CountResult
} }
deriving (Show, Generic, Opium.FromRow) deriving (Show, Generic, Opium.FromRow)
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex) createDayIndex :: Day -> Opium.Connection -> ExceptT Error IO DayIndex
createDayIndex date = createDayIndex date =
ex runIdentity ex runIdentity
. Opium.fetch . Opium.fetch
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *" "INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date) (Identity date)
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ()) createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> ExceptT Error IO ()
createDayFile dayId label path = createDayFile dayId label path =
Opium.execute lift
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)" . Opium.execute
(dayId, label, path) "INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
(dayId, label, path)
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex)) readDayIndex :: Day -> Opium.Connection -> ExceptT Error IO (Maybe DayIndex)
readDayIndex date = readDayIndex date =
Opium.fetch lift
"SELECT * FROM yore.day_index WHERE day = $1" . Opium.fetch
(Identity date) "SELECT * FROM yore.day_index WHERE day = $1"
(Identity date)
readDayPaths :: Day -> Opium.Connection -> IO (Either Opium.Error [(DayIndex, DayFile)]) readDayPaths :: Day -> Opium.Connection -> ExceptT Error IO [(DayIndex, DayFile)]
readDayPaths date = readDayPaths date =
Opium.fetch lift
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1" . Opium.fetch
(Identity date) "SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
(Identity date)
getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile) getDayFileByIssue :: Day -> Int -> Opium.Connection -> ExceptT Error IO DayFile
getDayFileByIssue date issue = getDayFileByIssue date issue =
ex runIdentity ex runIdentity
. Opium.fetch . Opium.fetch
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2" "SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
(date, issue) (date, issue)
getNumberOfIssues :: Day -> Opium.Connection -> IO (Either Opium.Error Int) getNumberOfIssues :: Day -> Opium.Connection -> ExceptT Error IO Int
getNumberOfIssues date = getNumberOfIssues date =
ex (count . runIdentity) ex (count . runIdentity)
. Opium.fetch . Opium.fetch
"SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1" "SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
(Identity date) (Identity date)
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b) ex :: (a -> b) -> IO (Either Opium.Error a) -> ExceptT Error IO b
ex = fmap . fmap ex f = fmap f . lift

View File

@ -1,21 +0,0 @@
{-# LANGUAGE OverloadedRecordDot #-}
module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO)
import Control.Monad (forM_)
import Data.Text (Text)
import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB
createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
createEntries date urls conn = do
dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \(text, url) ->
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
confidently :: (Exception e) => IO (Either e a) -> IO a
confidently action = action >>= either throwIO pure

View File

@ -1,59 +0,0 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
import Control.Exception (bracket, throwIO)
import Control.Monad (forM)
import Data.Time (Day, getZonedTime, localDay, zonedTimeToLocalTime)
import Text.Printf (printf)
import qualified Database.PostgreSQL.Opium as Opium
import Yore.DB (DayIndex)
import Yore.Download (downloadInto)
import Yore.Index (createEntries)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Time (addYears)
import qualified Yore.DB as DB
connect :: IO Opium.Connection
connect =
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
Left e -> error $ show e
Right c -> pure c
exec :: (Opium.Connection -> IO a) -> IO a
exec = bracket connect Opium.close
getToday :: IO [Issue]
getToday = do
now <- getZonedTime
print now
let currentDay = localDay $ zonedTimeToLocalTime now
case addYears (-100) currentDay of
Right then_ ->
getIssuesByDay then_
Left _ ->
pure []
indexDay :: Day -> IO ()
indexDay date = exec $ \conn -> do
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
printf "Checking whether %s already exists...\n" (show date)
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
case mbDi of
Just (_ :: DayIndex) ->
putStrLn "Nothing to do."
Nothing -> do
putStrLn "Retrieving URLs..."
issues <- getIssuesByDay date
paths <- forM issues $ \issue -> do
printf "Downloading %s...\n" issue.url
path <- downloadInto "./download" issue.url
pure (issue.label, path)
putStrLn "Creating DB entries..."
createEntries date paths conn
putStrLn "Done."

View File

@ -1,12 +1,13 @@
module Yore.Schedule (schedule) where module Yore.Schedule (schedule) where
import Control.Concurrent (forkIO, threadDelay)
import Control.Monad (when) import Control.Monad (when)
import Data.Time (ZonedTime, getZonedTime, secondsToNominalDiffTime, nominalDiffTimeToSeconds) import Data.Functor (($>))
import Control.Concurrent (threadDelay, forkIO) import Data.Time (ZonedTime, getZonedTime, nominalDiffTimeToSeconds, secondsToNominalDiffTime)
import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Time.Clock.POSIX (getPOSIXTime)
schedule :: (ZonedTime -> Bool) -> IO () -> IO () schedule :: (ZonedTime -> Bool) -> IO a -> IO ()
schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) f schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) (f $> ())
everyMinute :: (ZonedTime -> IO ()) -> IO () everyMinute :: (ZonedTime -> IO ()) -> IO ()
everyMinute f = do everyMinute f = do

View File

@ -37,9 +37,7 @@ library
, Yore.DB , Yore.DB
, Yore.Download , Yore.Download
, Yore.Error , Yore.Error
, Yore.Index
, Yore.Log , Yore.Log
, Yore.Repl
, Yore.Schedule , Yore.Schedule
, Yore.Scrape , Yore.Scrape
, Yore.Time , Yore.Time