{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeOperators #-} module Main (main) where import Data.Char (chr) import Data.FileEmbed (embedFile) import Data.Foldable (for_) import Data.List (intercalate) import Data.Maybe (fromMaybe, maybeToList) import Data.Text (Text) import Data.Word (Word8) import Network.HTTP.Media ((//), (/:)) import Network.Wai (Application) import Servant ( Accept (..) , Handler , MimeRender (..) , Server , ServerError (..) , PlainText , Proxy (..) , Capture , Get , err400 , serve , throwError , (:>) , (:<|>) (..) ) import Text.Blaze.Html5 ((!)) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.Printf (PrintfArg, printf) import qualified Data.ByteString.Lazy as BL import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import qualified Network.Wai.Handler.Warp as Warp import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as A import qualified Unicode.Char.General.Blocks as UnicodeBlocks import qualified Unicode.Char.General.Names as UnicodeNames import qualified UToy.Decode as Decode import qualified UToy.Table as Table import qualified UToy.Parsers as Parsers main :: IO () main = Warp.run 3000 app app :: Application app = serve (Proxy :: Proxy API) server type API = "utf8" :> Capture "bytes" Text :> Get '[PlainText, HTML] Utf8Model :<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText, HTML] CodepointsModel :<|> "text" :> Capture "text" Text :> Get '[PlainText, HTML] TextModel server :: Server API server = utf8R :<|> codepointsR :<|> textR where utf8R bytesP = do bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400 pure $ mkUtf8Model bytes codepointsR codepointsP = do codepoints' <- Parsers.parseCodepoints codepointsP `orThrow` const err400 pure $ mkCodepointsModel codepoints' textR textP = do pure $ TextModel textP -- /bytes/ newtype Utf8Model = Utf8Model { codepoints :: [([Word8], Either String Char)] } mkUtf8Model :: [Word8] -> Utf8Model mkUtf8Model = Utf8Model . Decode.decodeUtf8 instance MimeRender PlainText Utf8Model where mimeRender _ model = renderText $ Table.render " " $ concat [ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes ] , map Table.cl (Text.pack (unwords $ map showByteBin bytes) : case eiC of Left err -> [ "Decoding error: " <> Text.pack err ] Right c -> [ Text.pack [c] , Text.pack $ printf "U+%04X" c , Text.pack $ intercalate ", " $ allNames c , Text.pack $ fromMaybe "" $ blockName c ] ) ] | (bytes, eiC) <- model.codepoints ] instance MimeRender HTML Utf8Model where mimeRender _ model = renderHtml $ documentWithBody $ do H.table $ for_ model.codepoints $ \(bytes, eiC) -> do H.tr $ do H.td $ H.pre $ H.toHtml $ unlines $ map unwords [map showByteHex bytes, map showByteBin bytes] case eiC of Left err -> H.td ! A.colspan "4" $ H.toHtml $ "Decoding error: " ++ err Right c -> do H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c]) H.td $ H.code $ printfHtml "U+%04X" c H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c -- /codepoints/ newtype CodepointsModel = CodepointsModel { codepoints :: [(Word, Either String Char)] } mkCodepointsModel :: [(Word, Word)] -> CodepointsModel mkCodepointsModel = CodepointsModel -- Limit number of returned codepoints. Otherwise it's -- too easy to provoke massive response bodies with requests like -- /codepoints/0-99999999 . take 100000 . map go . concatMap (uncurry enumFromTo) where go codepoint = (codepoint, toChar codepoint) toChar codepoint | codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)" | isSurrogate codepoint = Left "Is a surrogate" | otherwise = Right $ chr $ fromIntegral codepoint isSurrogate codepoint = 0xD800 <= codepoint && codepoint <= 0xDFFF instance MimeRender PlainText CodepointsModel where mimeRender _ model = renderText $ Table.render " " [ map Table.cl (Text.pack (printf "0x%X" codepoint) : case eiC of Left err -> [ "Decoding error: " <> Text.pack err ] Right c -> [ Text.pack [c] , Text.pack $ printf "U+%04X" c , Text.pack $ intercalate ", " $ allNames c , Text.pack $ fromMaybe "" $ blockName c ] ) | (codepoint, eiC) <- model.codepoints ] instance MimeRender HTML CodepointsModel where mimeRender _ model = renderHtml $ documentWithBody $ do H.table $ for_ model.codepoints $ \(codepoint, eiC) -> H.tr $ do H.td $ H.code $ H.toHtml $ Text.pack $ printf "0x%X" codepoint case eiC of Left err -> do H.td ! A.colspan "4" $ H.code $ H.toHtml $ "Decoding error: " <> Text.pack err Right c -> do H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c]) H.td $ H.code $ printfHtml "U+%04X" c H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c -- /text/ newtype TextModel = TextModel { text :: Text } instance MimeRender HTML TextModel where mimeRender _ model = renderHtml $ documentWithBody $ do H.table $ for_ (Text.unpack model.text) $ \c -> do H.tr $ do H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c]) H.td $ H.code $ printfHtml "U+%04X" c H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c instance MimeRender PlainText TextModel where mimeRender _ model = renderText $ Table.render " " [ map (Table.cl) [ Text.pack [c] , Text.pack $ printf "U+%04X" c , Text.pack $ intercalate ", " $ allNames c , Text.pack $ fromMaybe "" $ blockName c ] | c <- Text.unpack model.text ] -- Utilities renderText :: Text -> BL.ByteString renderText = BL.fromStrict . Encoding.encodeUtf8 showByteHex :: Word8 -> String showByteHex = printf " %02X" showByteBin :: Word8 -> String showByteBin = printf "%08b" -- | Retrieve name and aliases (suffixed with @*@) of a 'Char'. allNames :: Char -> [String] allNames c = maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) blockName :: Char -> Maybe String blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c orThrow :: Either a b -> (a -> ServerError) -> Handler b orThrow (Left err) f = throwError $ f err orThrow (Right val) _ = pure val printfHtml :: PrintfArg a => String -> a -> H.Html printfHtml fmt = (H.toHtml :: String -> H.Html) . printf fmt documentWithBody :: H.Html -> H.Html documentWithBody body = H.docTypeHtml $ do H.head $ do H.meta ! A.charset "utf-8" H.title "utoy" H.style $ H.toHtml $ Encoding.decodeUtf8 $(embedFile "static/utoy.css") H.body body -- HTML routes data HTML instance Accept HTML where contentType _ = "text" // "html" /: ("charset", "utf-8")