Compare commits
4 Commits
22867bc229
...
dbea1c549f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dbea1c549f | ||
|
|
82d8ce63d8 | ||
|
|
67127fd28d | ||
|
|
368bbb0124 |
@ -44,3 +44,5 @@ By setting `$BASE_DIR` you can persist the database for later runs.
|
||||
- CI
|
||||
- Docker container (in flake)
|
||||
- try fourmolu
|
||||
- Test leap second handling in the `time` package
|
||||
- use queue to sync logging of indexer and main thread
|
||||
|
||||
90
app/Main.hs
90
app/Main.hs
@ -10,14 +10,18 @@
|
||||
|
||||
module Main (main) where
|
||||
|
||||
import Control.Concurrent (forkIO)
|
||||
import Control.Monad (forM, forM_)
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import Control.Monad.Trans.Except (ExceptT (..), runExceptT)
|
||||
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT)
|
||||
import Data.Bifunctor (Bifunctor (..))
|
||||
import Data.Proxy (Proxy (..))
|
||||
import Data.Text (Text)
|
||||
import Data.Time (Day, toGregorian)
|
||||
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian)
|
||||
import GHC.Generics (Generic)
|
||||
import Network.HTTP.Media ((//), (/:))
|
||||
import Network.HTTP.Types (Status (..), mkStatus, status200)
|
||||
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
|
||||
import Servant
|
||||
( Accept (..)
|
||||
, Capture
|
||||
@ -48,14 +52,16 @@ import qualified Data.ByteString.Lazy as LBS
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Text.Encoding as Encoding
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
|
||||
import qualified Network.Wai.Handler.Warp as Warp
|
||||
import qualified Text.Blaze.Html5 as H
|
||||
import qualified Text.Blaze.Html5.Attributes as A
|
||||
|
||||
import Envy (type (=@!), type (=@@), type (?))
|
||||
import Yore.DB (DayFile (..))
|
||||
import Yore.DB (DB, DayFile (..))
|
||||
import Yore.Download (downloadInto)
|
||||
import Yore.Error (Error (..))
|
||||
import Yore.Schedule (schedule)
|
||||
import Yore.Scrape (Issue (..), getIssuesByDay)
|
||||
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
|
||||
|
||||
import qualified Envy
|
||||
@ -89,7 +95,38 @@ main = do
|
||||
Right c ->
|
||||
pure c
|
||||
|
||||
let
|
||||
db <- DB.initDB cfg.yoreDb
|
||||
|
||||
_ <- runExceptT $ doIndex cfg db
|
||||
|
||||
_ <- forkIO $ runIndexer cfg db
|
||||
runServer cfg db
|
||||
|
||||
runIndexer :: Config -> DB -> IO ()
|
||||
runIndexer cfg db =
|
||||
schedule shouldRunAt $ runExceptT $ doIndex cfg db
|
||||
where
|
||||
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
|
||||
TimeOfDay 3 0 _ -> True
|
||||
_ -> False
|
||||
|
||||
doIndex :: Config -> DB -> ExceptT Error IO ()
|
||||
doIndex cfg db =
|
||||
catchE
|
||||
( DB.withTransaction db $ \conn -> do
|
||||
dayThen <- getTodayWithOffset (-100) 0
|
||||
indexDay cfg dayThen conn
|
||||
dayThen2 <- getTodayWithOffset (-100) 1
|
||||
indexDay cfg dayThen2 conn
|
||||
dayThen3 <- getTodayWithOffset (-100) 2
|
||||
indexDay cfg dayThen3 conn
|
||||
)
|
||||
(Log.error . show)
|
||||
|
||||
runServer :: Config -> DB -> IO ()
|
||||
runServer cfg db =
|
||||
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
|
||||
where
|
||||
settings =
|
||||
foldr
|
||||
($)
|
||||
@ -100,9 +137,6 @@ main = do
|
||||
, Warp.setOnException onException
|
||||
]
|
||||
|
||||
db <- DB.initDB cfg.yoreDb
|
||||
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
|
||||
where
|
||||
logger req status _ = do
|
||||
Log.info $
|
||||
printf
|
||||
@ -158,20 +192,20 @@ handlerToRaw handler = Tagged $ \_ respond -> do
|
||||
Right response ->
|
||||
respond response
|
||||
|
||||
server :: Config -> DB.DB -> ServerT API (ExceptT Error IO)
|
||||
server :: Config -> DB -> ServerT API (ExceptT Error IO)
|
||||
server cfg db = rootR :<|> todayR :<|> apiTodayR
|
||||
where
|
||||
rootR = todayR 0
|
||||
|
||||
todayR issue = do
|
||||
dateThen <- ExceptT get100YearsAgo
|
||||
count <- ExceptT $ DB.withConn db $ DB.getNumberOfIssues dateThen
|
||||
dayFile <- ExceptT $ DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||
count <- DB.withConn db $ DB.getNumberOfIssues dateThen
|
||||
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||
pure $ RootModel dateThen dayFile issue count
|
||||
|
||||
apiTodayR issue = handlerToRaw $ do
|
||||
dateThen <- ExceptT get100YearsAgo
|
||||
dayFile <- ExceptT $ DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue
|
||||
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
|
||||
secondsUntilMidnight <- liftIO getSecondsUntilMidnight
|
||||
pure $
|
||||
@ -223,5 +257,33 @@ instance Accept HTML where
|
||||
contentType _ = "text" // "html" /: ("charset", "utf-8")
|
||||
|
||||
get100YearsAgo :: IO (Either Error Day)
|
||||
get100YearsAgo =
|
||||
either (Left . GenericError . ("can't go back 100 years: " <>)) Right . addYears (-100) <$> getCurrentDay
|
||||
get100YearsAgo = runExceptT $ getTodayWithOffset (-100) 0
|
||||
|
||||
getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day
|
||||
getTodayWithOffset yearOffset dayOffset =
|
||||
ExceptT $
|
||||
first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>))
|
||||
. addYears yearOffset
|
||||
. addDays dayOffset
|
||||
<$> getCurrentDay
|
||||
|
||||
indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
|
||||
indexDay cfg dayThen conn = do
|
||||
-- Transaction-level lock released automatically after transaction
|
||||
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
|
||||
mbDi <- DB.readDayIndex dayThen conn
|
||||
case mbDi of
|
||||
Just _ ->
|
||||
Log.info $ printf "index for %s already exists." (show dayThen)
|
||||
Nothing -> do
|
||||
Log.info $ printf "scraping issues for %s" (show dayThen)
|
||||
issues <- liftIO $ getIssuesByDay dayThen
|
||||
paths <- forM issues $ \issue -> do
|
||||
Log.info $ printf "downloading %s" issue.url
|
||||
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
|
||||
pure (issue.label, path)
|
||||
Log.info "creating DB entries"
|
||||
dayIndex <- DB.createDayIndex dayThen conn
|
||||
forM_ paths $ \(text, url) ->
|
||||
DB.createDayFile dayIndex.day_index_id text url conn
|
||||
Log.info "done."
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
YORE_DB="host=localhost port=${DB_PORT} user=${DB_USER} dbname=${DB_DBNAME}" cabal repl exe:yore
|
||||
@ -26,4 +26,6 @@ fi
|
||||
|
||||
DATABASE_URL="postgres://${DB_USER}@localhost:${DB_PORT}/${DB_DBNAME}?sslmode=disable" dbmate up
|
||||
|
||||
export YORE_DB="host=localhost port=${DB_PORT} user=${DB_USER} dbname=${DB_DBNAME}"
|
||||
|
||||
"$@"
|
||||
|
||||
@ -9,6 +9,7 @@ module Yore.DB
|
||||
, Error (..)
|
||||
, initDB
|
||||
, withConn
|
||||
, withTransaction
|
||||
, DayIndex (..)
|
||||
, DayFile (..)
|
||||
, createDayFile
|
||||
@ -17,9 +18,11 @@ module Yore.DB
|
||||
, readDayPaths
|
||||
, getDayFileByIssue
|
||||
, getNumberOfIssues
|
||||
, lift
|
||||
) where
|
||||
|
||||
import Control.Concurrent (getNumCapabilities)
|
||||
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT, throwE)
|
||||
import Data.Functor.Identity (Identity (..))
|
||||
import Data.Pool (Pool, defaultPoolConfig, newPool, withResource)
|
||||
import Data.Text (Text)
|
||||
@ -28,6 +31,7 @@ import GHC.Generics (Generic)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import Data.Bifunctor (Bifunctor (..))
|
||||
import Yore.Error (Error (..))
|
||||
|
||||
newtype DB = DB (Pool (Either Opium.ConnectionError Opium.Connection))
|
||||
@ -45,13 +49,35 @@ initDB connString = do
|
||||
maxResources
|
||||
)
|
||||
|
||||
withConn :: DB -> (Opium.Connection -> IO (Either Opium.Error a)) -> IO (Either Error a)
|
||||
-- TODO: This should probably also do something like bracket...
|
||||
-- For now let's assume no exceptions are thrown in f.
|
||||
withConn :: DB -> (Opium.Connection -> ExceptT Error IO a) -> ExceptT Error IO a
|
||||
withConn (DB connPool) f =
|
||||
withResource connPool $ \case
|
||||
ExceptT $ withResource connPool $ \case
|
||||
Left connectionError ->
|
||||
pure $ Left $ ConnectionError connectionError
|
||||
Right conn ->
|
||||
either (Left . DBError) Right <$> f conn
|
||||
runExceptT $ f conn
|
||||
|
||||
withTransaction :: DB -> (Opium.Connection -> ExceptT Error IO a) -> ExceptT Error IO a
|
||||
withTransaction db f =
|
||||
withConn db $ \conn ->
|
||||
( do
|
||||
lift $ Opium.execute_ "BEGIN" conn
|
||||
x <- f conn
|
||||
lift $ Opium.execute_ "COMMIT" conn
|
||||
pure x
|
||||
)
|
||||
`catchE` ( \err -> do
|
||||
-- rollback open transactions and release transaction level locks...
|
||||
lift $ Opium.execute_ "ROLLBACK" conn
|
||||
-- ...then rethrow the error so the app can handle it too.
|
||||
throwE err
|
||||
)
|
||||
|
||||
lift :: IO (Either Opium.Error a) -> ExceptT Error IO a
|
||||
lift f =
|
||||
ExceptT $ first DBError <$> f
|
||||
|
||||
data DayIndex = DayIndex
|
||||
{ day_index_id :: Int
|
||||
@ -72,44 +98,47 @@ newtype CountResult = CountResult
|
||||
}
|
||||
deriving (Show, Generic, Opium.FromRow)
|
||||
|
||||
createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
|
||||
createDayIndex :: Day -> Opium.Connection -> ExceptT Error IO DayIndex
|
||||
createDayIndex date =
|
||||
ex runIdentity
|
||||
. Opium.fetch
|
||||
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
|
||||
(Identity date)
|
||||
|
||||
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
|
||||
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> ExceptT Error IO ()
|
||||
createDayFile dayId label path =
|
||||
Opium.execute
|
||||
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
|
||||
(dayId, label, path)
|
||||
lift
|
||||
. Opium.execute
|
||||
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
|
||||
(dayId, label, path)
|
||||
|
||||
readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
|
||||
readDayIndex :: Day -> Opium.Connection -> ExceptT Error IO (Maybe DayIndex)
|
||||
readDayIndex date =
|
||||
Opium.fetch
|
||||
"SELECT * FROM yore.day_index WHERE day = $1"
|
||||
(Identity date)
|
||||
lift
|
||||
. Opium.fetch
|
||||
"SELECT * FROM yore.day_index WHERE day = $1"
|
||||
(Identity date)
|
||||
|
||||
readDayPaths :: Day -> Opium.Connection -> IO (Either Opium.Error [(DayIndex, DayFile)])
|
||||
readDayPaths :: Day -> Opium.Connection -> ExceptT Error IO [(DayIndex, DayFile)]
|
||||
readDayPaths date =
|
||||
Opium.fetch
|
||||
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
||||
(Identity date)
|
||||
lift
|
||||
. Opium.fetch
|
||||
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
||||
(Identity date)
|
||||
|
||||
getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile)
|
||||
getDayFileByIssue :: Day -> Int -> Opium.Connection -> ExceptT Error IO DayFile
|
||||
getDayFileByIssue date issue =
|
||||
ex runIdentity
|
||||
. Opium.fetch
|
||||
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
|
||||
(date, issue)
|
||||
|
||||
getNumberOfIssues :: Day -> Opium.Connection -> IO (Either Opium.Error Int)
|
||||
getNumberOfIssues :: Day -> Opium.Connection -> ExceptT Error IO Int
|
||||
getNumberOfIssues date =
|
||||
ex (count . runIdentity)
|
||||
. Opium.fetch
|
||||
"SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
|
||||
(Identity date)
|
||||
|
||||
ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
|
||||
ex = fmap . fmap
|
||||
ex :: (a -> b) -> IO (Either Opium.Error a) -> ExceptT Error IO b
|
||||
ex f = fmap f . lift
|
||||
|
||||
@ -1,24 +0,0 @@
|
||||
{-# LANGUAGE OverloadedRecordDot #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Yore.Index (createEntries) where
|
||||
|
||||
import Control.Exception (Exception, throwIO)
|
||||
import Control.Monad (forM_)
|
||||
import Data.Text (Text)
|
||||
import Data.Time (Day)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import qualified Yore.DB as DB
|
||||
|
||||
createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
|
||||
createEntries date urls conn = do
|
||||
confidently $ Opium.execute_ "BEGIN" conn
|
||||
dayIndex <- confidently $ DB.createDayIndex date conn
|
||||
forM_ urls $ \(text, url) ->
|
||||
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
|
||||
confidently $ Opium.execute_ "COMMIT" conn
|
||||
|
||||
confidently :: (Exception e) => IO (Either e a) -> IO a
|
||||
confidently action = action >>= either throwIO pure
|
||||
@ -1,18 +1,19 @@
|
||||
module Yore.Log (Yore.Log.error, info) where
|
||||
|
||||
import Control.Monad.IO.Class (MonadIO (..))
|
||||
import Data.Time (ZonedTime, getZonedTime)
|
||||
import Data.Time.Format (defaultTimeLocale, formatTime)
|
||||
import GHC.Stack (HasCallStack, SrcLoc (..), callStack, getCallStack)
|
||||
import Text.Printf (printf)
|
||||
|
||||
info :: (HasCallStack) => String -> IO ()
|
||||
info :: (HasCallStack, MonadIO m) => String -> m ()
|
||||
info = doLog "INF"
|
||||
|
||||
error :: (HasCallStack) => String -> IO ()
|
||||
error :: (HasCallStack, MonadIO m) => String -> m ()
|
||||
error = doLog "ERR"
|
||||
|
||||
doLog :: (HasCallStack) => String -> String -> IO ()
|
||||
doLog level msg = do
|
||||
doLog :: (HasCallStack, MonadIO m) => String -> String -> m ()
|
||||
doLog level msg = liftIO $ do
|
||||
now <- getZonedTime
|
||||
let location = getLocation $ getCallStack callStack
|
||||
printf "(%s) (%s) (%s) %s\n" (iso8601Show now) level location msg
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE OverloadedRecordDot #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
|
||||
|
||||
import Control.Exception (bracket, throwIO)
|
||||
import Control.Monad (forM)
|
||||
import Data.Time (Day, getZonedTime, localDay, zonedTimeToLocalTime)
|
||||
import Text.Printf (printf)
|
||||
|
||||
import qualified Database.PostgreSQL.Opium as Opium
|
||||
|
||||
import Yore.DB (DayIndex)
|
||||
import Yore.Download (downloadInto)
|
||||
import Yore.Index (createEntries)
|
||||
import Yore.Scrape (Issue (..), getIssuesByDay)
|
||||
import Yore.Time (addYears)
|
||||
|
||||
import qualified Yore.DB as DB
|
||||
|
||||
connect :: IO Opium.Connection
|
||||
connect =
|
||||
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
|
||||
Left e -> error $ show e
|
||||
Right c -> pure c
|
||||
|
||||
exec :: (Opium.Connection -> IO a) -> IO a
|
||||
exec = bracket connect Opium.close
|
||||
|
||||
getToday :: IO [Issue]
|
||||
getToday = do
|
||||
now <- getZonedTime
|
||||
print now
|
||||
let currentDay = localDay $ zonedTimeToLocalTime now
|
||||
case addYears (-100) currentDay of
|
||||
Right then_ ->
|
||||
getIssuesByDay then_
|
||||
Left _ ->
|
||||
pure []
|
||||
|
||||
indexDay :: Day -> IO ()
|
||||
indexDay date = exec $ \conn -> do
|
||||
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
|
||||
printf "Checking whether %s already exists...\n" (show date)
|
||||
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
|
||||
case mbDi of
|
||||
Just (_ :: DayIndex) ->
|
||||
putStrLn "Nothing to do."
|
||||
Nothing -> do
|
||||
putStrLn "Retrieving URLs..."
|
||||
issues <- getIssuesByDay date
|
||||
paths <- forM issues $ \issue -> do
|
||||
printf "Downloading %s...\n" issue.url
|
||||
path <- downloadInto "./download" issue.url
|
||||
pure (issue.label, path)
|
||||
putStrLn "Creating DB entries..."
|
||||
createEntries date paths conn
|
||||
putStrLn "Done."
|
||||
22
src/Yore/Schedule.hs
Normal file
22
src/Yore/Schedule.hs
Normal file
@ -0,0 +1,22 @@
|
||||
module Yore.Schedule (schedule) where
|
||||
|
||||
import Control.Concurrent (forkIO, threadDelay)
|
||||
import Control.Monad (when)
|
||||
import Data.Functor (($>))
|
||||
import Data.Time (ZonedTime, getZonedTime, nominalDiffTimeToSeconds, secondsToNominalDiffTime)
|
||||
import Data.Time.Clock.POSIX (getPOSIXTime)
|
||||
|
||||
schedule :: (ZonedTime -> Bool) -> IO a -> IO ()
|
||||
schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) (f $> ())
|
||||
|
||||
everyMinute :: (ZonedTime -> IO ()) -> IO ()
|
||||
everyMinute f = do
|
||||
-- Use POSIX time to avoid having to handle leap seconds
|
||||
now <- getPOSIXTime
|
||||
let posixSeconds :: Int
|
||||
posixSeconds = floor $ nominalDiffTimeToSeconds now
|
||||
lastFullMinute = secondsToNominalDiffTime $ fromIntegral $ posixSeconds - posixSeconds `mod` 60
|
||||
secondsSinceLastFullMinute = now - lastFullMinute
|
||||
threadDelay $ ceiling $ (60 - secondsSinceLastFullMinute) * 1_000_000
|
||||
_ <- forkIO $ f =<< getZonedTime
|
||||
everyMinute f
|
||||
@ -37,9 +37,8 @@ library
|
||||
, Yore.DB
|
||||
, Yore.Download
|
||||
, Yore.Error
|
||||
, Yore.Index
|
||||
, Yore.Log
|
||||
, Yore.Repl
|
||||
, Yore.Schedule
|
||||
, Yore.Scrape
|
||||
, Yore.Time
|
||||
hs-source-dirs:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user