Add /codepoints

This commit is contained in:
Paul Brinkmeier 2023-03-01 03:41:39 +01:00
parent 3a817a8e7a
commit 944edaf445
2 changed files with 82 additions and 14 deletions

View File

@ -1,5 +1,6 @@
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedRecordDot #-}
@ -10,6 +11,7 @@
module Main (main) where
import Data.Char (chr)
import Data.Foldable (for_)
import Data.List (intercalate)
import Data.Maybe (fromMaybe, maybeToList)
@ -31,6 +33,7 @@ import Servant
, serve
, throwError
, (:>)
, (:<|>) (..)
)
import Text.Blaze.Html5 ((!))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
@ -56,32 +59,40 @@ app :: Application
app = serve (Proxy :: Proxy API) server
type API =
"hex" :> Capture "bytes" Text :> Get '[PlainText, HTML] HexModel
"bytes" :> Capture "bytes" Text :> Get '[PlainText, HTML] BytesModel
:<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText] CodepointsModel
server :: Server API
server =
hex
bytesR :<|> codepointsR
where
hex bytesP = do
bytesR bytesP = do
bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
pure $ HexModel $ Decode.decodeUtf8 bytes
pure $ BytesModel $ Decode.decodeUtf8 bytes
newtype HexModel = HexModel
codepointsR codepointsP = do
codepoints' <- Parsers.parseCodepoints codepointsP `orThrow` const err400
pure $ mkCodepointsModel codepoints'
-- /bytes/<bytes>
newtype BytesModel = BytesModel
{ codepoints :: [([Word8], Either String Char)]
}
instance MimeRender PlainText HexModel where
mimeRender _ model = BL.fromStrict $ Encoding.encodeUtf8 $
instance MimeRender PlainText BytesModel where
mimeRender _ model = renderText $
Table.render " " $ concat
[ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes
]
, map Table.cl (Text.pack (unwords $ map showByteBin bytes)
: case eiC of
Left err ->
[ Text.pack err
[ "Decoding error: " <> Text.pack err
]
Right c ->
[ Text.pack $ printf "U+%04X" c
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
, Text.pack $ fromMaybe "" $ blockName c
]
@ -90,7 +101,7 @@ instance MimeRender PlainText HexModel where
| (bytes, eiC) <- model.codepoints
]
instance MimeRender HTML HexModel where
instance MimeRender HTML BytesModel where
mimeRender _ model = renderHtml $ H.docTypeHtml $ do
H.head $ do
H.meta ! A.charset "utf-8"
@ -110,8 +121,46 @@ instance MimeRender HTML HexModel where
H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
H.td $ H.toHtml $ fromMaybe "" $ blockName c
-- /codepoints/<codepoints>
newtype CodepointsModel = CodepointsModel
{ codepoints :: [(Word, Either String Char)]
}
mkCodepointsModel :: [Word] -> CodepointsModel
mkCodepointsModel = CodepointsModel . map go
where
go codepoint = (codepoint, toChar codepoint)
toChar codepoint
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
| isSurrogate codepoint = Left "Is a surrogate"
| otherwise = Right $ chr $ fromIntegral codepoint
isSurrogate codepoint = 0xD800 <= codepoint && codepoint <= 0xDFFF
instance MimeRender PlainText CodepointsModel where
mimeRender _ model = renderText $ Table.render " "
[ map Table.cl (Text.pack (printf "0x%X" codepoint)
: case eiC of
Left err ->
[ "Decoding error: " <> Text.pack err
]
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
, Text.pack $ fromMaybe "" $ blockName c
]
)
| (codepoint, eiC) <- model.codepoints
]
-- Utilities
renderText :: Text -> BL.ByteString
renderText = BL.fromStrict . Encoding.encodeUtf8
showByteHex :: Word8 -> String
showByteHex = printf " %02X"

View File

@ -1,8 +1,9 @@
module UToy.Parsers
( parseHexBytes
, parseCodepoints
) where
import Data.Char (isHexDigit, ord)
import Data.Char (isDigit, isHexDigit, ord)
import Data.Text (Text)
import Data.Word (Word8)
import Text.Printf (printf)
@ -23,9 +24,27 @@ hexBytes = hexByte `Atto.sepBy` separators
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
hexDigitToInt c
| '0' <= c && c <= '9' = ord c - ord '0'
| isDigit c = ord c - ord '0'
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
| otherwise = error $ printf "not a hex digit: %c" c
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."
parseCodepoints :: Text -> Either String [Word]
parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
codepoints :: Atto.Parser [Word]
codepoints = codepoint `Atto.sepBy` separators
where
codepoint = Atto.choice [literal, decLiteral, hexLiteral]
literal = Atto.decimal
decLiteral = Atto.char 'd' *> Atto.decimal
hexLiteral = Atto.char 'x' *> Atto.hexadecimal
-- Common
separators :: Atto.Parser ()
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."