diff --git a/app/Main.hs b/app/Main.hs index 0386eff..580ede2 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,5 +1,6 @@ {-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedRecordDot #-} @@ -10,6 +11,7 @@ module Main (main) where +import Data.Char (chr) import Data.Foldable (for_) import Data.List (intercalate) import Data.Maybe (fromMaybe, maybeToList) @@ -31,6 +33,7 @@ import Servant , serve , throwError , (:>) + , (:<|>) (..) ) import Text.Blaze.Html5 ((!)) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -56,32 +59,40 @@ app :: Application app = serve (Proxy :: Proxy API) server type API = - "hex" :> Capture "bytes" Text :> Get '[PlainText, HTML] HexModel + "bytes" :> Capture "bytes" Text :> Get '[PlainText, HTML] BytesModel + :<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText] CodepointsModel server :: Server API server = - hex + bytesR :<|> codepointsR where - hex bytesP = do + bytesR bytesP = do bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400 - pure $ HexModel $ Decode.decodeUtf8 bytes + pure $ BytesModel $ Decode.decodeUtf8 bytes -newtype HexModel = HexModel + codepointsR codepointsP = do + codepoints' <- Parsers.parseCodepoints codepointsP `orThrow` const err400 + pure $ mkCodepointsModel codepoints' + +-- /bytes/ + +newtype BytesModel = BytesModel { codepoints :: [([Word8], Either String Char)] } -instance MimeRender PlainText HexModel where - mimeRender _ model = BL.fromStrict $ Encoding.encodeUtf8 $ +instance MimeRender PlainText BytesModel where + mimeRender _ model = renderText $ Table.render " " $ concat [ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes ] , map Table.cl (Text.pack (unwords $ map showByteBin bytes) : case eiC of Left err -> - [ Text.pack err + [ "Decoding error: " <> Text.pack err ] Right c -> - [ Text.pack $ printf "U+%04X" c + [ Text.pack [c] + , Text.pack $ printf "U+%04X" c , Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) , Text.pack $ fromMaybe "" $ blockName c ] @@ -90,7 +101,7 @@ instance MimeRender PlainText HexModel where | (bytes, eiC) <- model.codepoints ] -instance MimeRender HTML HexModel where +instance MimeRender HTML BytesModel where mimeRender _ model = renderHtml $ H.docTypeHtml $ do H.head $ do H.meta ! A.charset "utf-8" @@ -110,8 +121,46 @@ instance MimeRender HTML HexModel where H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) H.td $ H.toHtml $ fromMaybe "" $ blockName c +-- /codepoints/ + +newtype CodepointsModel = CodepointsModel + { codepoints :: [(Word, Either String Char)] + } + +mkCodepointsModel :: [Word] -> CodepointsModel +mkCodepointsModel = CodepointsModel . map go + where + go codepoint = (codepoint, toChar codepoint) + + toChar codepoint + | codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)" + | isSurrogate codepoint = Left "Is a surrogate" + | otherwise = Right $ chr $ fromIntegral codepoint + + isSurrogate codepoint = 0xD800 <= codepoint && codepoint <= 0xDFFF + +instance MimeRender PlainText CodepointsModel where + mimeRender _ model = renderText $ Table.render " " + [ map Table.cl (Text.pack (printf "0x%X" codepoint) + : case eiC of + Left err -> + [ "Decoding error: " <> Text.pack err + ] + Right c -> + [ Text.pack [c] + , Text.pack $ printf "U+%04X" c + , Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) + , Text.pack $ fromMaybe "" $ blockName c + ] + ) + | (codepoint, eiC) <- model.codepoints + ] + -- Utilities +renderText :: Text -> BL.ByteString +renderText = BL.fromStrict . Encoding.encodeUtf8 + showByteHex :: Word8 -> String showByteHex = printf " %02X" diff --git a/src/UToy/Parsers.hs b/src/UToy/Parsers.hs index 9b65b4a..b956fb6 100644 --- a/src/UToy/Parsers.hs +++ b/src/UToy/Parsers.hs @@ -1,8 +1,9 @@ module UToy.Parsers ( parseHexBytes + , parseCodepoints ) where -import Data.Char (isHexDigit, ord) +import Data.Char (isDigit, isHexDigit, ord) import Data.Text (Text) import Data.Word (Word8) import Text.Printf (printf) @@ -23,9 +24,27 @@ hexBytes = hexByte `Atto.sepBy` separators hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit hexDigitToInt c - | '0' <= c && c <= '9' = ord c - ord '0' + | isDigit c = ord c - ord '0' | 'A' <= c && c <= 'F' = ord c - ord 'A' + 10 | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 | otherwise = error $ printf "not a hex digit: %c" c - - separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +." + +parseCodepoints :: Text -> Either String [Word] +parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput + +codepoints :: Atto.Parser [Word] +codepoints = codepoint `Atto.sepBy` separators + where + codepoint = Atto.choice [literal, decLiteral, hexLiteral] + + literal = Atto.decimal + + decLiteral = Atto.char 'd' *> Atto.decimal + + hexLiteral = Atto.char 'x' *> Atto.hexadecimal + + +-- Common + +separators :: Atto.Parser () +separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."