Add getUrl
This commit is contained in:
parent
6d49da993c
commit
ff3b47baa9
@ -1,10 +1,11 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Yore.Download (downloadToFile, getJpgURIs) where
|
||||
module Yore.Download where
|
||||
|
||||
import Control.Monad (guard)
|
||||
import Data.Char
|
||||
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe, fromJust)
|
||||
import Data.List (find)
|
||||
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
|
||||
import Data.Text (Text)
|
||||
import Data.Time (Day, MonthOfYear, Year, fromGregorian, toGregorian)
|
||||
import Network.HTTP.Req
|
||||
@ -12,7 +13,6 @@ import Text.HTML.Parser
|
||||
import Text.Printf (printf)
|
||||
import Text.URI (URI)
|
||||
|
||||
import qualified Data.ByteString.Lazy as LBS
|
||||
import qualified Data.List.NonEmpty as NonEmpty
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Text.Encoding as Encoding
|
||||
@ -25,27 +25,45 @@ data TargetSpec = TargetSpec
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
downloadToFile :: URI -> FilePath -> IO ()
|
||||
downloadToFile uri path = do
|
||||
let (url, q) = fromJust $ useHttpsURI uri
|
||||
res <- runReq defaultHttpConfig $ req GET url NoReqBody lbsResponse q
|
||||
getUrl :: Day -> IO Text
|
||||
getUrl date = do
|
||||
let (y, m, _) = toGregorian date
|
||||
candidates <- getMonthOverview y m
|
||||
let (_, ts) = fromMaybe (error "no candidates") $ find ((date ==) . fst) candidates
|
||||
fromMaybe (error "could not get showpdf url") <$> getShowpdf ts
|
||||
|
||||
getShowpdf :: TargetSpec -> IO (Maybe Text)
|
||||
getShowpdf (TargetSpec y m d) = do
|
||||
res <- runReq defaultHttpConfig $ req
|
||||
GET
|
||||
-- E.g. https://fz.ub.uni-freiburg.de/show/fz.cgi?cmd=showpdf&day=01r1&year=1925&month=07&project=3
|
||||
(https "fz.ub.uni-freiburg.de" /: "show" /: "fz.cgi")
|
||||
NoReqBody
|
||||
bsResponse
|
||||
(formToQuery
|
||||
[ ("cmd" :: Text, "showpdf")
|
||||
, ("year", y)
|
||||
, ("month", m)
|
||||
, ("day", d)
|
||||
])
|
||||
|
||||
guard $ responseStatusCode res == 200
|
||||
|
||||
LBS.writeFile path $ responseBody res
|
||||
let tokens = parseTokens $ Encoding.decodeUtf8Lenient $ responseBody res
|
||||
let targets = mapMaybe getRefreshTarget tokens
|
||||
|
||||
getJpgURIs :: Day -> IO [URI]
|
||||
getJpgURIs date = do
|
||||
ov <- getMonthOverview year month
|
||||
pure [ts2uri ts | (date', ts) <- ov, date' == date]
|
||||
pure $ listToMaybe targets
|
||||
|
||||
where
|
||||
(year, month, _) = toGregorian date
|
||||
getRefreshTarget (TagOpen "meta" attrs) = do
|
||||
httpEquiv <- getAttr "http-equiv" attrs
|
||||
guard $ httpEquiv == "refresh"
|
||||
content <- getAttr "content" attrs
|
||||
Just $ snd $ Text.breakOnEnd "URL=" content
|
||||
getRefreshTarget _ =
|
||||
Nothing
|
||||
|
||||
ts2uri (TargetSpec y m d) = URI.emptyURI
|
||||
{ URI.uriScheme = URI.mkScheme "https"
|
||||
, URI.uriAuthority = Right $ URI.Authority Nothing (fromJust $ URI.mkHost "freiburger-zeitung.ub.uni-freiburg.de") Nothing
|
||||
, URI.uriPath = Just (False, fromJust . URI.mkPathPiece <$> NonEmpty.fromList ["show", "pics", Text.take 2 y <> "ff", y, m, Text.pack $ printf "frz.%s-%s-%s.%s.jpg" y m d ("01" :: Text)])
|
||||
}
|
||||
getAttr k = lookup k . map (\(Attr k' v) -> (k', v))
|
||||
|
||||
getMonthOverview :: Year -> MonthOfYear -> IO [(Day, TargetSpec)]
|
||||
getMonthOverview year month = do
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user