Add /codepoints

2023-03-01 03:41:39 +01:00 · 2023-03-01 03:41:39 +01:00 · 944edaf445
commit 944edaf445
parent 3a817a8e7a
2 changed files with 82 additions and 14 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@ -1,5 +1,6 @@
 {-# LANGUAGE BinaryLiterals #-}
 {-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DuplicateRecordFields #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedRecordDot #-}
@ -10,6 +11,7 @@

 module Main (main) where

+import Data.Char (chr)
 import Data.Foldable (for_)
 import Data.List (intercalate)
 import Data.Maybe (fromMaybe, maybeToList)
@ -31,6 +33,7 @@ import Servant
  , serve
  , throwError
  , (:>)
+  , (:<|>) (..)
  )
 import Text.Blaze.Html5 ((!))
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
@ -56,32 +59,40 @@ app :: Application
 app = serve (Proxy :: Proxy API) server

 type API =
-  "hex" :> Capture "bytes" Text :> Get '[PlainText, HTML] HexModel
+  "bytes" :> Capture "bytes" Text :> Get '[PlainText, HTML] BytesModel
+  :<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText] CodepointsModel

 server :: Server API
 server =
-  hex
+  bytesR :<|> codepointsR
  where
-    hex bytesP = do
+    bytesR bytesP = do
      bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
-      pure $ HexModel $ Decode.decodeUtf8 bytes
+      pure $ BytesModel $ Decode.decodeUtf8 bytes

-newtype HexModel = HexModel
+    codepointsR codepointsP = do
+      codepoints' <- Parsers.parseCodepoints codepointsP `orThrow` const err400
+      pure $ mkCodepointsModel codepoints'
+
+-- /bytes/<bytes>
+
+newtype BytesModel = BytesModel
  { codepoints :: [([Word8], Either String Char)]
  }

-instance MimeRender PlainText HexModel where
-  mimeRender _ model = BL.fromStrict $ Encoding.encodeUtf8 $
+instance MimeRender PlainText BytesModel where
+  mimeRender _ model = renderText $
    Table.render "  " $ concat
      [ [ [ Table.cl $ Text.pack $ unwords $ map showByteHex bytes
          ]
        , map Table.cl (Text.pack (unwords $ map showByteBin bytes)
          : case eiC of
            Left err ->
-              [ Text.pack err
+              [ "Decoding error: " <> Text.pack err
              ]
            Right c ->
-              [ Text.pack $ printf "U+%04X" c
+              [ Text.pack [c]
+              , Text.pack $ printf "U+%04X" c
              , Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
              , Text.pack $ fromMaybe "" $ blockName c
              ]
@ -90,7 +101,7 @@ instance MimeRender PlainText HexModel where
      | (bytes, eiC) <- model.codepoints
      ]

-instance MimeRender HTML HexModel where
+instance MimeRender HTML BytesModel where
  mimeRender _ model = renderHtml $ H.docTypeHtml $ do
    H.head $ do
      H.meta ! A.charset "utf-8"
@ -110,8 +121,46 @@ instance MimeRender HTML HexModel where
              H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
              H.td $ H.toHtml $ fromMaybe "" $ blockName c

+-- /codepoints/<codepoints>
+
+newtype CodepointsModel = CodepointsModel
+  { codepoints :: [(Word, Either String Char)]
+  }
+
+mkCodepointsModel :: [Word] -> CodepointsModel
+mkCodepointsModel = CodepointsModel . map go
+  where
+    go codepoint = (codepoint, toChar codepoint)
+
+    toChar codepoint
+      | codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
+      | isSurrogate codepoint = Left "Is a surrogate"
+      | otherwise = Right $ chr $ fromIntegral codepoint
+
+    isSurrogate codepoint = 0xD800 <= codepoint && codepoint <= 0xDFFF
+
+instance MimeRender PlainText CodepointsModel where
+  mimeRender _ model = renderText $ Table.render "  "
+    [ map Table.cl (Text.pack (printf "0x%X" codepoint)
+      : case eiC of
+        Left err ->
+          [ "Decoding error: " <> Text.pack err
+          ]
+        Right c ->
+          [ Text.pack [c]
+          , Text.pack $ printf "U+%04X" c
+          , Text.pack $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
+          , Text.pack $ fromMaybe "" $ blockName c
+          ]
+      )
+    | (codepoint, eiC) <- model.codepoints
+    ]
+
 -- Utilities

+renderText :: Text -> BL.ByteString
+renderText = BL.fromStrict . Encoding.encodeUtf8
+
 showByteHex :: Word8 -> String
 showByteHex = printf "      %02X"

--- a/src/UToy/Parsers.hs
+++ b/src/UToy/Parsers.hs
@ -1,8 +1,9 @@
 module UToy.Parsers
  ( parseHexBytes
+  , parseCodepoints
  ) where

-import Data.Char (isHexDigit, ord)
+import Data.Char (isDigit, isHexDigit, ord)
 import Data.Text (Text)
 import Data.Word (Word8)
 import Text.Printf (printf)
@ -23,9 +24,27 @@ hexBytes = hexByte `Atto.sepBy` separators
    hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit

    hexDigitToInt c
-      | '0' <= c && c <= '9' = ord c - ord '0'
+      | isDigit c = ord c - ord '0'
      | 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
      | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
      | otherwise = error $ printf "not a hex digit: %c" c
-    
-    separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."
+
+parseCodepoints :: Text -> Either String [Word]
+parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
+
+codepoints :: Atto.Parser [Word]
+codepoints = codepoint `Atto.sepBy` separators
+  where
+    codepoint = Atto.choice [literal, decLiteral, hexLiteral]
+
+    literal = Atto.decimal
+
+    decLiteral = Atto.char 'd' *> Atto.decimal
+
+    hexLiteral = Atto.char 'x' *> Atto.hexadecimal
+
+
+-- Common
+
+separators :: Atto.Parser ()
+separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."