Compare commits

..

No commits in common. "dbea1c549f6e2d2f5b7f35ad1640222f7b7928c9" and "22867bc22907f705e52bf92ef6a9af8c2c9e3d80" have entirely different histories.

10 changed files with 126 additions and 157 deletions

View File

@ -44,5 +44,3 @@ By setting `$BASE_DIR` you can persist the database for later runs.
- CI - CI
- Docker container (in flake) - Docker container (in flake)
- try fourmolu - try fourmolu
- Test leap second handling in the `time` package
- use queue to sync logging of indexer and main thread

View File

@ -10,18 +10,14 @@
module Main (main) where module Main (main) where
import Control.Concurrent (forkIO)
import Control.Monad (forM, forM_)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT) import Control.Monad.Trans.Except (ExceptT (..), runExceptT)
import Data.Bifunctor (Bifunctor (..))
import Data.Proxy (Proxy (..)) import Data.Proxy (Proxy (..))
import Data.Text (Text) import Data.Text (Text)
import Data.Time (Day, LocalTime (..), TimeOfDay (..), ZonedTime (..), addDays, toGregorian) import Data.Time (Day, toGregorian)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import Network.HTTP.Types (Status (..), mkStatus, status200) import Network.HTTP.Types (Status (..), mkStatus, status200)
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
import Servant import Servant
( Accept (..) ( Accept (..)
, Capture , Capture
@ -52,16 +48,14 @@ import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding import qualified Data.Text.Encoding as Encoding
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
import Network.Wai (Application, Request (..), Response, responseFile, responseLBS)
import qualified Network.Wai.Handler.Warp as Warp import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A import qualified Text.Blaze.Html5.Attributes as A
import Envy (type (=@!), type (=@@), type (?)) import Envy (type (=@!), type (=@@), type (?))
import Yore.DB (DB, DayFile (..)) import Yore.DB (DayFile (..))
import Yore.Download (downloadInto)
import Yore.Error (Error (..)) import Yore.Error (Error (..))
import Yore.Schedule (schedule)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight) import Yore.Time (addYears, getCurrentDay, getSecondsUntilMidnight)
import qualified Envy import qualified Envy
@ -95,38 +89,7 @@ main = do
Right c -> Right c ->
pure c pure c
db <- DB.initDB cfg.yoreDb let
_ <- runExceptT $ doIndex cfg db
_ <- forkIO $ runIndexer cfg db
runServer cfg db
runIndexer :: Config -> DB -> IO ()
runIndexer cfg db =
schedule shouldRunAt $ runExceptT $ doIndex cfg db
where
shouldRunAt zt = case localTimeOfDay $ zonedTimeToLocalTime zt of
TimeOfDay 3 0 _ -> True
_ -> False
doIndex :: Config -> DB -> ExceptT Error IO ()
doIndex cfg db =
catchE
( DB.withTransaction db $ \conn -> do
dayThen <- getTodayWithOffset (-100) 0
indexDay cfg dayThen conn
dayThen2 <- getTodayWithOffset (-100) 1
indexDay cfg dayThen2 conn
dayThen3 <- getTodayWithOffset (-100) 2
indexDay cfg dayThen3 conn
)
(Log.error . show)
runServer :: Config -> DB -> IO ()
runServer cfg db =
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
where
settings = settings =
foldr foldr
($) ($)
@ -137,6 +100,9 @@ runServer cfg db =
, Warp.setOnException onException , Warp.setOnException onException
] ]
db <- DB.initDB cfg.yoreDb
Warp.runSettings settings $ serve (Proxy @API) $ hoistServer (Proxy @API) nt $ server cfg db
where
logger req status _ = do logger req status _ = do
Log.info $ Log.info $
printf printf
@ -192,20 +158,20 @@ handlerToRaw handler = Tagged $ \_ respond -> do
Right response -> Right response ->
respond response respond response
server :: Config -> DB -> ServerT API (ExceptT Error IO) server :: Config -> DB.DB -> ServerT API (ExceptT Error IO)
server cfg db = rootR :<|> todayR :<|> apiTodayR server cfg db = rootR :<|> todayR :<|> apiTodayR
where where
rootR = todayR 0 rootR = todayR 0
todayR issue = do todayR issue = do
dateThen <- ExceptT get100YearsAgo dateThen <- ExceptT get100YearsAgo
count <- DB.withConn db $ DB.getNumberOfIssues dateThen count <- ExceptT $ DB.withConn db $ DB.getNumberOfIssues dateThen
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue dayFile <- ExceptT $ DB.withConn db $ DB.getDayFileByIssue dateThen issue
pure $ RootModel dateThen dayFile issue count pure $ RootModel dateThen dayFile issue count
apiTodayR issue = handlerToRaw $ do apiTodayR issue = handlerToRaw $ do
dateThen <- ExceptT get100YearsAgo dateThen <- ExceptT get100YearsAgo
dayFile <- DB.withConn db $ DB.getDayFileByIssue dateThen issue dayFile <- ExceptT $ DB.withConn db $ DB.getDayFileByIssue dateThen issue
let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path let fullPath = cfg.yoreDownloadDir </> dayFile.relative_path
secondsUntilMidnight <- liftIO getSecondsUntilMidnight secondsUntilMidnight <- liftIO getSecondsUntilMidnight
pure $ pure $
@ -257,33 +223,5 @@ instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8") contentType _ = "text" // "html" /: ("charset", "utf-8")
get100YearsAgo :: IO (Either Error Day) get100YearsAgo :: IO (Either Error Day)
get100YearsAgo = runExceptT $ getTodayWithOffset (-100) 0 get100YearsAgo =
either (Left . GenericError . ("can't go back 100 years: " <>)) Right . addYears (-100) <$> getCurrentDay
getTodayWithOffset :: Integer -> Integer -> ExceptT Error IO Day
getTodayWithOffset yearOffset dayOffset =
ExceptT $
first (GenericError . (Text.pack (printf "can't go back %d years and go forward %d days: " (-yearOffset) dayOffset) <>))
. addYears yearOffset
. addDays dayOffset
<$> getCurrentDay
indexDay :: Config -> Day -> Opium.Connection -> ExceptT Error IO ()
indexDay cfg dayThen conn = do
-- Transaction-level lock released automatically after transaction
DB.lift $ Opium.execute_ "SELECT pg_advisory_xact_lock(42);" conn
mbDi <- DB.readDayIndex dayThen conn
case mbDi of
Just _ ->
Log.info $ printf "index for %s already exists." (show dayThen)
Nothing -> do
Log.info $ printf "scraping issues for %s" (show dayThen)
issues <- liftIO $ getIssuesByDay dayThen
paths <- forM issues $ \issue -> do
Log.info $ printf "downloading %s" issue.url
path <- liftIO $ downloadInto cfg.yoreDownloadDir issue.url
pure (issue.label, path)
Log.info "creating DB entries"
dayIndex <- DB.createDayIndex dayThen conn
forM_ paths $ \(text, url) ->
DB.createDayFile dayIndex.day_index_id text url conn
Log.info "done."

3
scripts/dev Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
YORE_DB="host=localhost port=${DB_PORT} user=${DB_USER} dbname=${DB_DBNAME}" cabal repl exe:yore

View File

@ -26,6 +26,4 @@ fi
DATABASE_URL="postgres://${DB_USER}@localhost:${DB_PORT}/${DB_DBNAME}?sslmode=disable" dbmate up DATABASE_URL="postgres://${DB_USER}@localhost:${DB_PORT}/${DB_DBNAME}?sslmode=disable" dbmate up
export YORE_DB="host=localhost port=${DB_PORT} user=${DB_USER} dbname=${DB_DBNAME}"
"$@" "$@"

View File

@ -9,7 +9,6 @@ module Yore.DB
, Error (..) , Error (..)
, initDB , initDB
, withConn , withConn
, withTransaction
, DayIndex (..) , DayIndex (..)
, DayFile (..) , DayFile (..)
, createDayFile , createDayFile
@ -18,11 +17,9 @@ module Yore.DB
, readDayPaths , readDayPaths
, getDayFileByIssue , getDayFileByIssue
, getNumberOfIssues , getNumberOfIssues
, lift
) where ) where
import Control.Concurrent (getNumCapabilities) import Control.Concurrent (getNumCapabilities)
import Control.Monad.Trans.Except (ExceptT (..), catchE, runExceptT, throwE)
import Data.Functor.Identity (Identity (..)) import Data.Functor.Identity (Identity (..))
import Data.Pool (Pool, defaultPoolConfig, newPool, withResource) import Data.Pool (Pool, defaultPoolConfig, newPool, withResource)
import Data.Text (Text) import Data.Text (Text)
@ -31,7 +28,6 @@ import GHC.Generics (Generic)
import qualified Database.PostgreSQL.Opium as Opium import qualified Database.PostgreSQL.Opium as Opium
import Data.Bifunctor (Bifunctor (..))
import Yore.Error (Error (..)) import Yore.Error (Error (..))
newtype DB = DB (Pool (Either Opium.ConnectionError Opium.Connection)) newtype DB = DB (Pool (Either Opium.ConnectionError Opium.Connection))
@ -49,35 +45,13 @@ initDB connString = do
maxResources maxResources
) )
-- TODO: This should probably also do something like bracket... withConn :: DB -> (Opium.Connection -> IO (Either Opium.Error a)) -> IO (Either Error a)
-- For now let's assume no exceptions are thrown in f.
withConn :: DB -> (Opium.Connection -> ExceptT Error IO a) -> ExceptT Error IO a
withConn (DB connPool) f = withConn (DB connPool) f =
ExceptT $ withResource connPool $ \case withResource connPool $ \case
Left connectionError -> Left connectionError ->
pure $ Left $ ConnectionError connectionError pure $ Left $ ConnectionError connectionError
Right conn -> Right conn ->
runExceptT $ f conn either (Left . DBError) Right <$> f conn
withTransaction :: DB -> (Opium.Connection -> ExceptT Error IO a) -> ExceptT Error IO a
withTransaction db f =
withConn db $ \conn ->
( do
lift $ Opium.execute_ "BEGIN" conn
x <- f conn
lift $ Opium.execute_ "COMMIT" conn
pure x
)
`catchE` ( \err -> do
-- rollback open transactions and release transaction level locks...
lift $ Opium.execute_ "ROLLBACK" conn
-- ...then rethrow the error so the app can handle it too.
throwE err
)
lift :: IO (Either Opium.Error a) -> ExceptT Error IO a
lift f =
ExceptT $ first DBError <$> f
data DayIndex = DayIndex data DayIndex = DayIndex
{ day_index_id :: Int { day_index_id :: Int
@ -98,47 +72,44 @@ newtype CountResult = CountResult
} }
deriving (Show, Generic, Opium.FromRow) deriving (Show, Generic, Opium.FromRow)
createDayIndex :: Day -> Opium.Connection -> ExceptT Error IO DayIndex createDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error DayIndex)
createDayIndex date = createDayIndex date =
ex runIdentity ex runIdentity
. Opium.fetch . Opium.fetch
"INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *" "INSERT INTO yore.day_index (day) VALUES ($1) RETURNING *"
(Identity date) (Identity date)
createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> ExceptT Error IO () createDayFile :: Int -> Text -> FilePath -> Opium.Connection -> IO (Either Opium.Error ())
createDayFile dayId label path = createDayFile dayId label path =
lift Opium.execute
. Opium.execute "INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)"
"INSERT INTO yore.day_file (day_index_id, label, relative_path) VALUES ($1, $2, $3)" (dayId, label, path)
(dayId, label, path)
readDayIndex :: Day -> Opium.Connection -> ExceptT Error IO (Maybe DayIndex) readDayIndex :: Day -> Opium.Connection -> IO (Either Opium.Error (Maybe DayIndex))
readDayIndex date = readDayIndex date =
lift Opium.fetch
. Opium.fetch "SELECT * FROM yore.day_index WHERE day = $1"
"SELECT * FROM yore.day_index WHERE day = $1" (Identity date)
(Identity date)
readDayPaths :: Day -> Opium.Connection -> ExceptT Error IO [(DayIndex, DayFile)] readDayPaths :: Day -> Opium.Connection -> IO (Either Opium.Error [(DayIndex, DayFile)])
readDayPaths date = readDayPaths date =
lift Opium.fetch
. Opium.fetch "SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
"SELECT * FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1" (Identity date)
(Identity date)
getDayFileByIssue :: Day -> Int -> Opium.Connection -> ExceptT Error IO DayFile getDayFileByIssue :: Day -> Int -> Opium.Connection -> IO (Either Opium.Error DayFile)
getDayFileByIssue date issue = getDayFileByIssue date issue =
ex runIdentity ex runIdentity
. Opium.fetch . Opium.fetch
"SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2" "SELECT yore.day_file.* FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1 ORDER BY day_file_id LIMIT 1 OFFSET $2"
(date, issue) (date, issue)
getNumberOfIssues :: Day -> Opium.Connection -> ExceptT Error IO Int getNumberOfIssues :: Day -> Opium.Connection -> IO (Either Opium.Error Int)
getNumberOfIssues date = getNumberOfIssues date =
ex (count . runIdentity) ex (count . runIdentity)
. Opium.fetch . Opium.fetch
"SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1" "SELECT count(*) FROM yore.day_index LEFT JOIN yore.day_file USING (day_index_id) WHERE day = $1"
(Identity date) (Identity date)
ex :: (a -> b) -> IO (Either Opium.Error a) -> ExceptT Error IO b ex :: (a -> b) -> IO (Either e a) -> IO (Either e b)
ex f = fmap f . lift ex = fmap . fmap

24
src/Yore/Index.hs Normal file
View File

@ -0,0 +1,24 @@
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
module Yore.Index (createEntries) where
import Control.Exception (Exception, throwIO)
import Control.Monad (forM_)
import Data.Text (Text)
import Data.Time (Day)
import qualified Database.PostgreSQL.Opium as Opium
import qualified Yore.DB as DB
createEntries :: Day -> [(Text, FilePath)] -> Opium.Connection -> IO ()
createEntries date urls conn = do
confidently $ Opium.execute_ "BEGIN" conn
dayIndex <- confidently $ DB.createDayIndex date conn
forM_ urls $ \(text, url) ->
confidently $ DB.createDayFile dayIndex.day_index_id text url conn
confidently $ Opium.execute_ "COMMIT" conn
confidently :: (Exception e) => IO (Either e a) -> IO a
confidently action = action >>= either throwIO pure

View File

@ -1,19 +1,18 @@
module Yore.Log (Yore.Log.error, info) where module Yore.Log (Yore.Log.error, info) where
import Control.Monad.IO.Class (MonadIO (..))
import Data.Time (ZonedTime, getZonedTime) import Data.Time (ZonedTime, getZonedTime)
import Data.Time.Format (defaultTimeLocale, formatTime) import Data.Time.Format (defaultTimeLocale, formatTime)
import GHC.Stack (HasCallStack, SrcLoc (..), callStack, getCallStack) import GHC.Stack (HasCallStack, SrcLoc (..), callStack, getCallStack)
import Text.Printf (printf) import Text.Printf (printf)
info :: (HasCallStack, MonadIO m) => String -> m () info :: (HasCallStack) => String -> IO ()
info = doLog "INF" info = doLog "INF"
error :: (HasCallStack, MonadIO m) => String -> m () error :: (HasCallStack) => String -> IO ()
error = doLog "ERR" error = doLog "ERR"
doLog :: (HasCallStack, MonadIO m) => String -> String -> m () doLog :: (HasCallStack) => String -> String -> IO ()
doLog level msg = liftIO $ do doLog level msg = do
now <- getZonedTime now <- getZonedTime
let location = getLocation $ getCallStack callStack let location = getLocation $ getCallStack callStack
printf "(%s) (%s) (%s) %s\n" (iso8601Show now) level location msg printf "(%s) (%s) (%s) %s\n" (iso8601Show now) level location msg

59
src/Yore/Repl.hs Normal file
View File

@ -0,0 +1,59 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
module Yore.Repl (connect, exec, getToday, indexDay, addYears) where
import Control.Exception (bracket, throwIO)
import Control.Monad (forM)
import Data.Time (Day, getZonedTime, localDay, zonedTimeToLocalTime)
import Text.Printf (printf)
import qualified Database.PostgreSQL.Opium as Opium
import Yore.DB (DayIndex)
import Yore.Download (downloadInto)
import Yore.Index (createEntries)
import Yore.Scrape (Issue (..), getIssuesByDay)
import Yore.Time (addYears)
import qualified Yore.DB as DB
connect :: IO Opium.Connection
connect =
Opium.connect "host=localhost port=5433 user=yore-test" >>= \case
Left e -> error $ show e
Right c -> pure c
exec :: (Opium.Connection -> IO a) -> IO a
exec = bracket connect Opium.close
getToday :: IO [Issue]
getToday = do
now <- getZonedTime
print now
let currentDay = localDay $ zonedTimeToLocalTime now
case addYears (-100) currentDay of
Right then_ ->
getIssuesByDay then_
Left _ ->
pure []
indexDay :: Day -> IO ()
indexDay date = exec $ \conn -> do
either throwIO pure =<< Opium.execute_ "SELECT pg_advisory_lock(42);" conn
printf "Checking whether %s already exists...\n" (show date)
mbDi <- either throwIO pure =<< DB.readDayIndex date conn
case mbDi of
Just (_ :: DayIndex) ->
putStrLn "Nothing to do."
Nothing -> do
putStrLn "Retrieving URLs..."
issues <- getIssuesByDay date
paths <- forM issues $ \issue -> do
printf "Downloading %s...\n" issue.url
path <- downloadInto "./download" issue.url
pure (issue.label, path)
putStrLn "Creating DB entries..."
createEntries date paths conn
putStrLn "Done."

View File

@ -1,22 +0,0 @@
module Yore.Schedule (schedule) where
import Control.Concurrent (forkIO, threadDelay)
import Control.Monad (when)
import Data.Functor (($>))
import Data.Time (ZonedTime, getZonedTime, nominalDiffTimeToSeconds, secondsToNominalDiffTime)
import Data.Time.Clock.POSIX (getPOSIXTime)
schedule :: (ZonedTime -> Bool) -> IO a -> IO ()
schedule shouldRunAt f = everyMinute $ \now -> when (shouldRunAt now) (f $> ())
everyMinute :: (ZonedTime -> IO ()) -> IO ()
everyMinute f = do
-- Use POSIX time to avoid having to handle leap seconds
now <- getPOSIXTime
let posixSeconds :: Int
posixSeconds = floor $ nominalDiffTimeToSeconds now
lastFullMinute = secondsToNominalDiffTime $ fromIntegral $ posixSeconds - posixSeconds `mod` 60
secondsSinceLastFullMinute = now - lastFullMinute
threadDelay $ ceiling $ (60 - secondsSinceLastFullMinute) * 1_000_000
_ <- forkIO $ f =<< getZonedTime
everyMinute f

View File

@ -37,8 +37,9 @@ library
, Yore.DB , Yore.DB
, Yore.Download , Yore.Download
, Yore.Error , Yore.Error
, Yore.Index
, Yore.Log , Yore.Log
, Yore.Schedule , Yore.Repl
, Yore.Scrape , Yore.Scrape
, Yore.Time , Yore.Time
hs-source-dirs: hs-source-dirs: