Add getUrl
This commit is contained in:
parent
6d49da993c
commit
ff3b47baa9
@ -1,10 +1,11 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Yore.Download (downloadToFile, getJpgURIs) where
|
module Yore.Download where
|
||||||
|
|
||||||
import Control.Monad (guard)
|
import Control.Monad (guard)
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe, fromJust)
|
import Data.List (find)
|
||||||
|
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
||||||
import Network.HTTP.Req
|
import Network.HTTP.Req
|
||||||
@ -12,7 +13,6 @@ import Text.HTML.Parser
|
|||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Text.URI (URI)
|
import Text.URI (URI)
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as LBS
|
|
||||||
import qualified Data.List.NonEmpty as NonEmpty
|
import qualified Data.List.NonEmpty as NonEmpty
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import qualified Data.Text.Encoding as Encoding
|
import qualified Data.Text.Encoding as Encoding
|
||||||
@ -25,27 +25,45 @@ data TargetSpec = TargetSpec
|
|||||||
}
|
}
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
downloadToFile :: URI -> FilePath -> IO ()
|
getUrl :: Day -> IO Text
|
||||||
downloadToFile uri path = do
|
getUrl date = do
|
||||||
let (url, q) = fromJust $ useHttpsURI uri
|
let (y, m, _) = toGregorian date
|
||||||
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse q
|
candidates <- getMonthOverview y m
|
||||||
|
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates
|
||||||
|
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
|
||||||
|
|
||||||
|
getShowpdf :: TargetSpec -> IO (Maybe Text)
|
||||||
|
getShowpdf (TargetSpec y m d) = do
|
||||||
|
res <- runReq defaultHttpConfig $ req
|
||||||
|
GET
|
||||||
|
-- E.g. https://fz.ub.uni-freiburg.de/show/fz.cgi?cmd=showpdf&day=01r1&year=1925&month=07&project=3
|
||||||
|
(https "fz.ub.uni-freiburg.de" /: "show" /: "fz.cgi")
|
||||||
|
NoReqBody
|
||||||
|
bsResponse
|
||||||
|
(formToQuery
|
||||||
|
[ ("cmd" :: Text, "showpdf")
|
||||||
|
, ("year", y)
|
||||||
|
, ("month", m)
|
||||||
|
, ("day", d)
|
||||||
|
])
|
||||||
|
|
||||||
guard $ responseStatusCode res == 200
|
guard $ responseStatusCode res == 200
|
||||||
|
|
||||||
LBS.writeFile path $ responseBody res
|
let tokens = parseTokens $ Encoding.decodeUtf8Lenient $ responseBody res
|
||||||
|
let targets = mapMaybe getRefreshTarget tokens
|
||||||
|
|
||||||
getJpgURIs :: Day -> IO [URI]
|
pure $ listToMaybe targets
|
||||||
getJpgURIs date = do
|
|
||||||
ov <- getMonthOverview year month
|
|
||||||
pure [ts2uri ts | (date', ts) <- ov, date' == date]
|
|
||||||
|
|
||||||
where
|
where
|
||||||
(year, month, _) = toGregorian date
|
getRefreshTarget (TagOpen "meta" attrs) = do
|
||||||
|
httpEquiv <- getAttr "http-equiv" attrs
|
||||||
|
guard $ httpEquiv == "refresh"
|
||||||
|
content <- getAttr "content" attrs
|
||||||
|
Just $ snd $ Text.breakOnEnd "URL=" content
|
||||||
|
getRefreshTarget _ =
|
||||||
|
Nothing
|
||||||
|
|
||||||
ts2uri (TargetSpec y m d) = URI.emptyURI
|
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
|
||||||
{ URI.uriScheme = URI.mkScheme "https"
|
|
||||||
, URI.uriAuthority = Right $ URI.Authority Nothing (fromJust $ URI.mkHost "freiburger-zeitung.ub.uni-freiburg.de") Nothing
|
|
||||||
, URI.uriPath = Just (False, fromJust . URI.mkPathPiece <$> NonEmpty.fromList ["show", "pics", Text.take 2 y <> "ff", y, m, Text.pack $ printf "frz.%s-%s-%s.%s.jpg" y m d ("01" :: Text)])
|
|
||||||
}
|
|
||||||
|
|
||||||
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
||||||
getMonthOverview year month = do
|
getMonthOverview year month = do
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user