Remove Yore.Repl and Yore.Index, only run indexer at 3 AM
This commit is contained in:
parent
67127fd28d
commit
82d8ce63d8
32
app/Main.hs
32
app/Main.hs
@ -10,14 +10,18 @@
|
|||||||
|
|
||||||
module Main (main) where
|
module Main (main) where
|
||||||
|
|
||||||
|
import Control.Concurrent (forkIO)
|
||||||
|
import Control.Monad (forM, forM_)
|
||||||
import Control.Monad.IO.Class (liftIO)
|
import Control.Monad.IO.Class (liftIO)
|
||||||
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
|
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
|
||||||
|
import Data.Bifunctor (Bifunctor (..))
|
||||||
import Data.Proxy (Proxy (..))
|
import Data.Proxy (Proxy (..))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Time (Day, addDays, toGregorian)
|
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian)
|
||||||
import GHC.Generics (Generic)
|
import GHC.Generics (Generic)
|
||||||
import Network.HTTP.Media ((//), (/:))
|
import Network.HTTP.Media ((//), (/:))
|
||||||
import Network.HTTP.Types (Status (..), mkStatus, status200)
|
import Network.HTTP.Types (Status (..), mkStatus, status200)
|
||||||
|
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
|
||||||
import Servant
|
import Servant
|
||||||
( Accept (..)
|
( Accept (..)
|
||||||
, Capture
|
, Capture
|
||||||
@ -48,25 +52,21 @@ import qualified Data.ByteString.Lazy as LBS
|
|||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import qualified Data.Text.Encoding as Encoding
|
import qualified Data.Text.Encoding as Encoding
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
import qualified Database.PostgreSQL.Opium as Opium
|
||||||
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
|
|
||||||
import qualified Network.Wai.Handler.Warp as Warp
|
import qualified Network.Wai.Handler.Warp as Warp
|
||||||
import qualified Text.Blaze.Html5 as H
|
import qualified Text.Blaze.Html5 as H
|
||||||
import qualified Text.Blaze.Html5.Attributes as A
|
import qualified Text.Blaze.Html5.Attributes as A
|
||||||
|
|
||||||
import Envy (type (=@!), type (=@@), type (?))
|
import Envy (type (=@!), type (=@@), type (?))
|
||||||
import Yore.DB (DB, DayFile (..))
|
import Yore.DB (DB, DayFile (..))
|
||||||
|
import Yore.Download (downloadInto)
|
||||||
import Yore.Error (Error (..))
|
import Yore.Error (Error (..))
|
||||||
import Yore.Schedule (schedule)
|
import Yore.Schedule (schedule)
|
||||||
|
import Yore.Scrape (Issue (..), getIssuesByDay)
|
||||||
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
|
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
|
||||||
|
|
||||||
import Control.Concurrent (forkIO)
|
|
||||||
import Control.Monad (forM, forM_)
|
|
||||||
import Data.Bifunctor (Bifunctor (..))
|
|
||||||
import qualified Envy
|
import qualified Envy
|
||||||
import qualified Yore.DB as DB
|
import qualified Yore.DB as DB
|
||||||
import Yore.Download (downloadInto)
|
|
||||||
import qualified Yore.Log as Log
|
import qualified Yore.Log as Log
|
||||||
import Yore.Scrape (Issue (..), getIssuesByDay)
|
|
||||||
|
|
||||||
newtype ConnectionString = ConnectionString String
|
newtype ConnectionString = ConnectionString String
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
@ -109,7 +109,7 @@ main = do
|
|||||||
runServer cfg db
|
runServer cfg db
|
||||||
|
|
||||||
runIndexer :: Config -> DB -> IO ()
|
runIndexer :: Config -> DB -> IO ()
|
||||||
runIndexer cfg db = schedule (const True) $ do
|
runIndexer cfg db = schedule shouldRunAt $ do
|
||||||
_ <-
|
_ <-
|
||||||
runExceptT $
|
runExceptT $
|
||||||
catchE
|
catchE
|
||||||
@ -121,6 +121,10 @@ runIndexer cfg db = schedule (const True) $ do
|
|||||||
)
|
)
|
||||||
(Log.error . show)
|
(Log.error . show)
|
||||||
pure ()
|
pure ()
|
||||||
|
where
|
||||||
|
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
|
||||||
|
TimeOfDay 3 0 _ -> True
|
||||||
|
_ -> False
|
||||||
|
|
||||||
runServer :: Config -> DB -> IO ()
|
runServer :: Config -> DB -> IO ()
|
||||||
runServer cfg db =
|
runServer cfg db =
|
||||||
@ -198,13 +202,13 @@ server cfg db = rootR :<|> todayR :<|> apiTodayR
|
|||||||
|
|
||||||
todayR issue = do
|
todayR issue = do
|
||||||
dateThen <- ExceptT get100YearsAgo
|
dateThen <- ExceptT get100YearsAgo
|
||||||
count <- DB.withConn db $ DB.lift . DB.getNumberOfIssues dateThen
|
count <- DB.withConn db $ DB.getNumberOfIssues dateThen
|
||||||
dayFile <- DB.withConn db $ DB.lift . DB.getDayFileByIssue dateThen issue
|
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||||
pure $ RootModel dateThen dayFile issue count
|
pure $ RootModel dateThen dayFile issue count
|
||||||
|
|
||||||
apiTodayR issue = handlerToRaw $ do
|
apiTodayR issue = handlerToRaw $ do
|
||||||
dateThen <- ExceptT get100YearsAgo
|
dateThen <- ExceptT get100YearsAgo
|
||||||
dayFile <- DB.withConn db $ DB.lift . DB.getDayFileByIssue dateThen issue
|
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||||
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
|
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
|
||||||
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
|
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
|
||||||
pure $
|
pure $
|
||||||
@ -267,7 +271,7 @@ indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
|
|||||||
indexDay cfg dayThen conn = do
|
indexDay cfg dayThen conn = do
|
||||||
-- Transaction-level lock released automatically after transaction
|
-- Transaction-level lock released automatically after transaction
|
||||||
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
|
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
|
||||||
mbDi <- DB.lift $ DB.readDayIndex dayThen conn
|
mbDi <- DB.readDayIndex dayThen conn
|
||||||
case mbDi of
|
case mbDi of
|
||||||
Just _ ->
|
Just _ ->
|
||||||
Log.info $ printf "index for %s already exists." (show dayThen)
|
Log.info $ printf "index for %s already exists." (show dayThen)
|
||||||
@ -279,7 +283,7 @@ indexDay cfg dayThen conn = do
|
|||||||
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
|
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
|
||||||
pure (issue.label, path)
|
pure (issue.label, path)
|
||||||
Log.info "creating DB entries"
|
Log.info "creating DB entries"
|
||||||
dayIndex <- DB.lift $ DB.createDayIndex dayThen conn
|
dayIndex <- DB.createDayIndex dayThen conn
|
||||||
forM_ paths $ \(text, url) ->
|
forM_ paths $ \(text, url) ->
|
||||||
DB.lift $ DB.createDayFile dayIndex.day_index_id text url conn
|
DB.createDayFile dayIndex.day_index_id text url conn
|
||||||
Log.info "done."
|
Log.info "done."
|
||||||
|
|||||||
@ -98,44 +98,47 @@ newtype CountResult = CountResult
|
|||||||
}
|
}
|
||||||
deriving (Show, Generic, Opium.FromRow)
|
deriving (Show, Generic, Opium.FromRow)
|
||||||
|
|
||||||
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
|
createDayIndex :: Day -> Opium.Connection -> ExceptT Error IO DayIndex
|
||||||
createDayIndex date =
|
createDayIndex date =
|
||||||
ex runIdentity
|
ex runIdentity
|
||||||
. Opium.fetch
|
. Opium.fetch
|
||||||
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
|
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
|
||||||
(Identity date)
|
(Identity date)
|
||||||
|
|
||||||
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
|
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> ExceptT Error IO ()
|
||||||
createDayFile dayId label path =
|
createDayFile dayId label path =
|
||||||
Opium.execute
|
lift
|
||||||
|
. Opium.execute
|
||||||
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
|
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
|
||||||
(dayId, label, path)
|
(dayId, label, path)
|
||||||
|
|
||||||
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
|
readDayIndex :: Day -> Opium.Connection -> ExceptT Error IO (Maybe DayIndex)
|
||||||
readDayIndex date =
|
readDayIndex date =
|
||||||
Opium.fetch
|
lift
|
||||||
|
. Opium.fetch
|
||||||
"SELECT * FROM yore.day_index WHERE day = $1"
|
"SELECT * FROM yore.day_index WHERE day = $1"
|
||||||
(Identity date)
|
(Identity date)
|
||||||
|
|
||||||
readDayPaths :: Day -> Opium.Connection -> IO (Either Opium.Error [(DayIndex, DayFile)])
|
readDayPaths :: Day -> Opium.Connection -> ExceptT Error IO [(DayIndex, DayFile)]
|
||||||
readDayPaths date =
|
readDayPaths date =
|
||||||
Opium.fetch
|
lift
|
||||||
|
. Opium.fetch
|
||||||
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
||||||
(Identity date)
|
(Identity date)
|
||||||
|
|
||||||
getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile)
|
getDayFileByIssue :: Day -> Int -> Opium.Connection -> ExceptT Error IO DayFile
|
||||||
getDayFileByIssue date issue =
|
getDayFileByIssue date issue =
|
||||||
ex runIdentity
|
ex runIdentity
|
||||||
. Opium.fetch
|
. Opium.fetch
|
||||||
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
|
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
|
||||||
(date, issue)
|
(date, issue)
|
||||||
|
|
||||||
getNumberOfIssues :: Day -> Opium.Connection -> IO (Either Opium.Error Int)
|
getNumberOfIssues :: Day -> Opium.Connection -> ExceptT Error IO Int
|
||||||
getNumberOfIssues date =
|
getNumberOfIssues date =
|
||||||
ex (count . runIdentity)
|
ex (count . runIdentity)
|
||||||
. Opium.fetch
|
. Opium.fetch
|
||||||
"SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
"SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
||||||
(Identity date)
|
(Identity date)
|
||||||
|
|
||||||
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
|
ex :: (a -> b) -> IO (Either Opium.Error a) -> ExceptT Error IO b
|
||||||
ex = fmap . fmap
|
ex f = fmap f . lift
|
||||||
|
|||||||
@ -1,21 +0,0 @@
|
|||||||
{-# LANGUAGE OverloadedRecordDot #-}
|
|
||||||
|
|
||||||
module Yore.Index (createEntries) where
|
|
||||||
|
|
||||||
import Control.Exception (Exception, throwIO)
|
|
||||||
import Control.Monad (forM_)
|
|
||||||
import Data.Text (Text)
|
|
||||||
import Data.Time (Day)
|
|
||||||
|
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
|
||||||
|
|
||||||
import qualified Yore.DB as DB
|
|
||||||
|
|
||||||
createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
|
|
||||||
createEntries date urls conn = do
|
|
||||||
dayIndex <- confidently $ DB.createDayIndex date conn
|
|
||||||
forM_ urls $ \(text, url) ->
|
|
||||||
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
|
|
||||||
|
|
||||||
confidently :: (Exception e) => IO (Either e a) -> IO a
|
|
||||||
confidently action = action >>= either throwIO pure
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
{-# LANGUAGE LambdaCase #-}
|
|
||||||
{-# LANGUAGE OverloadedRecordDot #-}
|
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
|
||||||
|
|
||||||
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
|
|
||||||
|
|
||||||
import Control.Exception (bracket, throwIO)
|
|
||||||
import Control.Monad (forM)
|
|
||||||
import Data.Time (Day, getZonedTime, localDay, zonedTimeToLocalTime)
|
|
||||||
import Text.Printf (printf)
|
|
||||||
|
|
||||||
import qualified Database.PostgreSQL.Opium as Opium
|
|
||||||
|
|
||||||
import Yore.DB (DayIndex)
|
|
||||||
import Yore.Download (downloadInto)
|
|
||||||
import Yore.Index (createEntries)
|
|
||||||
import Yore.Scrape (Issue (..), getIssuesByDay)
|
|
||||||
import Yore.Time (addYears)
|
|
||||||
|
|
||||||
import qualified Yore.DB as DB
|
|
||||||
|
|
||||||
connect :: IO Opium.Connection
|
|
||||||
connect =
|
|
||||||
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
|
|
||||||
Left e -> error $ show e
|
|
||||||
Right c -> pure c
|
|
||||||
|
|
||||||
exec :: (Opium.Connection -> IO a) -> IO a
|
|
||||||
exec = bracket connect Opium.close
|
|
||||||
|
|
||||||
getToday :: IO [Issue]
|
|
||||||
getToday = do
|
|
||||||
now <- getZonedTime
|
|
||||||
print now
|
|
||||||
let currentDay = localDay $ zonedTimeToLocalTime now
|
|
||||||
case addYears (-100) currentDay of
|
|
||||||
Right then_ ->
|
|
||||||
getIssuesByDay then_
|
|
||||||
Left _ ->
|
|
||||||
pure []
|
|
||||||
|
|
||||||
indexDay :: Day -> IO ()
|
|
||||||
indexDay date = exec $ \conn -> do
|
|
||||||
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
|
|
||||||
printf "Checking whether %s already exists...\n" (show date)
|
|
||||||
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
|
|
||||||
case mbDi of
|
|
||||||
Just (_ :: DayIndex) ->
|
|
||||||
putStrLn "Nothing to do."
|
|
||||||
Nothing -> do
|
|
||||||
putStrLn "Retrieving URLs..."
|
|
||||||
issues <- getIssuesByDay date
|
|
||||||
paths <- forM issues $ \issue -> do
|
|
||||||
printf "Downloading %s...\n" issue.url
|
|
||||||
path <- downloadInto "./download" issue.url
|
|
||||||
pure (issue.label, path)
|
|
||||||
putStrLn "Creating DB entries..."
|
|
||||||
createEntries date paths conn
|
|
||||||
putStrLn "Done."
|
|
||||||
@ -1,12 +1,13 @@
|
|||||||
module Yore.Schedule (schedule) where
|
module Yore.Schedule (schedule) where
|
||||||
|
|
||||||
|
import Control.Concurrent (forkIO, threadDelay)
|
||||||
import Control.Monad (when)
|
import Control.Monad (when)
|
||||||
import Data.Time (ZonedTime, getZonedTime, secondsToNominalDiffTime, nominalDiffTimeToSeconds)
|
import Data.Functor (($>))
|
||||||
import Control.Concurrent (threadDelay, forkIO)
|
import Data.Time (ZonedTime, getZonedTime, nominalDiffTimeToSeconds, secondsToNominalDiffTime)
|
||||||
import Data.Time.Clock.POSIX (getPOSIXTime)
|
import Data.Time.Clock.POSIX (getPOSIXTime)
|
||||||
|
|
||||||
schedule :: (ZonedTime -> Bool) -> IO () -> IO ()
|
schedule :: (ZonedTime -> Bool) -> IO a -> IO ()
|
||||||
schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) f
|
schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) (f $> ())
|
||||||
|
|
||||||
everyMinute :: (ZonedTime -> IO ()) -> IO ()
|
everyMinute :: (ZonedTime -> IO ()) -> IO ()
|
||||||
everyMinute f = do
|
everyMinute f = do
|
||||||
|
|||||||
@ -37,9 +37,7 @@ library
|
|||||||
, Yore.DB
|
, Yore.DB
|
||||||
, Yore.Download
|
, Yore.Download
|
||||||
, Yore.Error
|
, Yore.Error
|
||||||
, Yore.Index
|
|
||||||
, Yore.Log
|
, Yore.Log
|
||||||
, Yore.Repl
|
|
||||||
, Yore.Schedule
|
, Yore.Schedule
|
||||||
, Yore.Scrape
|
, Yore.Scrape
|
||||||
, Yore.Time
|
, Yore.Time
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user