Add /codepoints

This commit is contained in:
Paul Brinkmeier 2023-03-01 03:41:39 +01:00
parent 3a817a8e7a
commit 944edaf445
2 changed files with 82 additions and 14 deletions

View File

@ -1,5 +1,6 @@
{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedRecordDot #-}
@ -10,6 +11,7 @@
module Main (main) where module Main (main) where
import Data.Char (chr)
import Data.Foldable (for_) import Data.Foldable (for_)
import Data.List (intercalate) import Data.List (intercalate)
import Data.Maybe (fromMaybe, maybeToList) import Data.Maybe (fromMaybe, maybeToList)
@ -31,6 +33,7 @@ import Servant
, serve , serve
, throwError , throwError
, (:>) , (:>)
, (:<|>) (..)
) )
import Text.Blaze.Html5 ((!)) import Text.Blaze.Html5 ((!))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
@ -56,32 +59,40 @@ app :: Application
app = serve (Proxy :: Proxy API) server app = serve (Proxy :: Proxy API) server
type API = type API =
"hex" :> Capture "bytes" Text :> Get '[PlainText, HTML] HexModel "bytes" :> Capture "bytes" Text :> Get '[PlainText, HTML] BytesModel
:<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText] CodepointsModel
server :: Server API server :: Server API
server = server =
hex bytesR :<|> codepointsR
where where
hex bytesP = do bytesR bytesP = do
bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400 bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
pure $ HexModel $ Decode.decodeUtf8 bytes pure $ BytesModel $ Decode.decodeUtf8 bytes
newtype HexModel = HexModel codepointsR codepointsP = do
codepoints' <- Parsers.parseCodepoints codepointsP `orThrow` const err400
pure $ mkCodepointsModel codepoints'
-- /bytes/<bytes>
newtype BytesModel = BytesModel
{ codepoints :: [([Word8], Either String Char)] { codepoints :: [([Word8], Either String Char)]
} }
instance MimeRender PlainText HexModel where instance MimeRender PlainText BytesModel where
mimeRender _ model = BL.fromStrict $ Encoding.encodeUtf8 $ mimeRender _ model = renderText $
Table.render " " $ concat Table.render " " $ concat
[ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes [ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes
] ]
, map Table.cl (Text.pack (unwords $ map showByteBin bytes) , map Table.cl (Text.pack (unwords $ map showByteBin bytes)
: case eiC of : case eiC of
Left err -> Left err ->
[ Text.pack err [ "Decoding error: " <> Text.pack err
] ]
Right c -> Right c ->
[ Text.pack $ printf "U+%04X" c [ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) , Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
, Text.pack $ fromMaybe "" $ blockName c , Text.pack $ fromMaybe "" $ blockName c
] ]
@ -90,7 +101,7 @@ instance MimeRender PlainText HexModel where
| (bytes, eiC) <- model.codepoints | (bytes, eiC) <- model.codepoints
] ]
instance MimeRender HTML HexModel where instance MimeRender HTML BytesModel where
mimeRender _ model = renderHtml $ H.docTypeHtml $ do mimeRender _ model = renderHtml $ H.docTypeHtml $ do
H.head $ do H.head $ do
H.meta ! A.charset "utf-8" H.meta ! A.charset "utf-8"
@ -110,8 +121,46 @@ instance MimeRender HTML HexModel where
H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
H.td $ H.toHtml $ fromMaybe "" $ blockName c H.td $ H.toHtml $ fromMaybe "" $ blockName c
-- /codepoints/<codepoints>
newtype CodepointsModel = CodepointsModel
{ codepoints :: [(Word, Either String Char)]
}
mkCodepointsModel :: [Word] -> CodepointsModel
mkCodepointsModel = CodepointsModel . map go
where
go codepoint = (codepoint, toChar codepoint)
toChar codepoint
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
| isSurrogate codepoint = Left "Is a surrogate"
| otherwise = Right $ chr $ fromIntegral codepoint
isSurrogate codepoint = 0xD800 <= codepoint && codepoint <= 0xDFFF
instance MimeRender PlainText CodepointsModel where
mimeRender _ model = renderText $ Table.render " "
[ map Table.cl (Text.pack (printf "0x%X" codepoint)
: case eiC of
Left err ->
[ "Decoding error: " <> Text.pack err
]
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
, Text.pack $ fromMaybe "" $ blockName c
]
)
| (codepoint, eiC) <- model.codepoints
]
-- Utilities -- Utilities
renderText :: Text -> BL.ByteString
renderText = BL.fromStrict . Encoding.encodeUtf8
showByteHex :: Word8 -> String showByteHex :: Word8 -> String
showByteHex = printf " %02X" showByteHex = printf " %02X"

View File

@ -1,8 +1,9 @@
module UToy.Parsers module UToy.Parsers
( parseHexBytes ( parseHexBytes
, parseCodepoints
) where ) where
import Data.Char (isHexDigit, ord) import Data.Char (isDigit, isHexDigit, ord)
import Data.Text (Text) import Data.Text (Text)
import Data.Word (Word8) import Data.Word (Word8)
import Text.Printf (printf) import Text.Printf (printf)
@ -23,9 +24,27 @@ hexBytes = hexByte `Atto.sepBy` separators
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
hexDigitToInt c hexDigitToInt c
| '0' <= c && c <= '9' = ord c - ord '0' | isDigit c = ord c - ord '0'
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10 | 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
| otherwise = error $ printf "not a hex digit: %c" c | otherwise = error $ printf "not a hex digit: %c" c
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +." parseCodepoints :: Text -> Either String [Word]
parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
codepoints :: Atto.Parser [Word]
codepoints = codepoint `Atto.sepBy` separators
where
codepoint = Atto.choice [literal, decLiteral, hexLiteral]
literal = Atto.decimal
decLiteral = Atto.char 'd' *> Atto.decimal
hexLiteral = Atto.char 'x' *> Atto.hexadecimal
-- Common
separators :: Atto.Parser ()
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."