Implement /hex

This commit is contained in:
Paul Brinkmeier 2023-03-01 03:41:39 +01:00
parent b462796dbd
commit 69d9e21c80
11 changed files with 257 additions and 14 deletions

View File

@ -1,4 +1,108 @@
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
module Main (main) where module Main (main) where
import Data.Foldable (for_)
import Data.List (intercalate)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Text (Text)
import Data.Word (Word8)
import Network.HTTP.Media ((//), (/:))
import Network.Wai (Application)
import Servant
( Accept (..)
, Handler
, MimeRender (..)
, Server
, ServerError (..)
, Proxy (..)
, Capture
, Get
, err400
, serve
, throwError
, (:>)
)
import Text.Blaze.Html5 ((!))
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.Printf (PrintfArg, printf)
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
import qualified Unicode.Char.General.Names as UnicodeNames
import qualified UToy.Decode as Decode
import qualified UToy.Parsers as Parsers
main :: IO () main :: IO ()
main = putStrLn "Hello, World!" main = Warp.run 3000 app
app :: Application
app = serve (Proxy :: Proxy API) server
type API =
"hex" :> Capture "bytes" Text :> Get '[HTML] HexModel
server :: Server API
server =
hex
where
hex bytesP = do
bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
pure $ HexModel $ Decode.decodeUtf8 bytes
newtype HexModel = HexModel
{ codepoints :: [([Word8], Either String Char)]
}
instance MimeRender HTML HexModel where
mimeRender _ model = renderHtml $ H.docTypeHtml $ do
H.head $ do
H.meta ! A.charset "utf-8"
H.title "utoy"
H.style "html { font-size: 32px; font-family: 'Noto Sans', sans-serif; } td { padding: 0.5em 1em; } pre { margin: 0; font-size: 0.5em; } body { display: flex; justify-content: center; }"
H.body $ do
H.table $ for_ model.codepoints $ \(bytes, eiC) -> do
H.tr $ do
H.td $ H.pre $ H.toHtml $ unlines $ map unwords [map showByteHex bytes, map showByteBin bytes]
case eiC of
Left err ->
H.td ! A.colspan "4" $ H.toHtml $ "Decoding error: " ++ err
Right c -> do
H.td $ do
H.input ! A.value (H.toValue [c]) ! A.style "text-align: center; width: 2em; font-size: 1em;"
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
H.td $ H.toHtml $ fromMaybe "" $ blockName c
where
showByteHex = printf " %02X"
showByteBin = printf "%8b"
blockName :: Char -> Maybe String
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
-- Utilities
orThrow :: Either a b -> (a -> ServerError) -> Handler b
orThrow (Left err) f = throwError $ f err
orThrow (Right val) _ = pure val
printfHtml :: PrintfArg a => String -> a -> H.Html
printfHtml fmt = (H.toHtml :: String -> H.Html) . printf fmt
-- HTML routes
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")

View File

@ -1,7 +1,10 @@
cradle: cradle:
stack: stack:
- path: "./test/Spec.hs" - path: "./src"
component: "utoy:test:utoy-test" component: "utoy:lib"
- path: "./app/Main.hs" - path: "./app/Main.hs"
component: "utoy:exe:utoy-exe" component: "utoy:exe:utoy-exe"
- path: "./test"
component: "utoy:test:utoy-test"

View File

@ -1,4 +1,12 @@
haskellPackages: with haskellPackages; [ haskellPackages: with haskellPackages; [
attoparsec
blaze-html
bytestring
http-media
servant-server
text
unicode-data unicode-data
unicode-data-names unicode-data-names
wai
warp
] ]

View File

@ -14,7 +14,11 @@ let
}; };
haskellOverlay = pkgs: final: prev: { haskellOverlay = pkgs: final: prev: {
unicode-data = prev.unicode-data_0_4_0; attoparsec-iso8601 = prev.attoparsec-iso8601_1_1_0_0;
http-api-data = prev.http-api-data_0_5;
servant = pkgs.haskell.lib.doJailbreak prev.servant;
servant-server = pkgs.haskell.lib.doJailbreak prev.servant-server;
unicode-data = prev.unicode-data_0_4_0_1;
unicode-data-names = pkgs.haskell.lib.markUnbroken prev.unicode-data-names; unicode-data-names = pkgs.haskell.lib.markUnbroken prev.unicode-data-names;
}; };
in in

View File

@ -12,15 +12,15 @@
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
}, },
"nixpkgs": { "nixpkgs": {
"branch": "nixos-22.11", "branch": "nixpkgs-unstable",
"description": "Nix Packages collection", "description": "Nix Packages collection",
"homepage": "", "homepage": "",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "7076110064c09f0b3942f609f2134c1358ef2e50", "rev": "9952d6bc395f5841262b006fbace8dd7e143b634",
"sha256": "0j7idx8vyb0spwxqb7rr8pk15wi7yfyf5hp608wkhaz7wjw8k9nf", "sha256": "0kvpf63dda6nzbqd2kyr99qh1av89mva26xykp3zb4diyicp7yji",
"type": "tarball", "type": "tarball",
"url": "https://github.com/NixOS/nixpkgs/archive/7076110064c09f0b3942f609f2134c1358ef2e50.tar.gz", "url": "https://github.com/NixOS/nixpkgs/archive/9952d6bc395f5841262b006fbace8dd7e143b634.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
} }
} }

View File

@ -13,6 +13,8 @@ extra-source-files:
dependencies: dependencies:
- base >= 4.7 && < 5 - base >= 4.7 && < 5
- attoparsec
- text
ghc-options: ghc-options:
- -Wall - -Wall
@ -38,6 +40,15 @@ executables:
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- utoy - utoy
- blaze-html
- bytestring
- http-media
- servant-server
- text
- unicode-data
- unicode-data-names
- wai
- warp
# Fix "Multiple files use the same module name", see # Fix "Multiple files use the same module name", see
# https://stackoverflow.com/questions/67519851/multiple-files-use-the-same-module-name # https://stackoverflow.com/questions/67519851/multiple-files-use-the-same-module-name
when: when:

View File

@ -11,8 +11,9 @@ let
# haskell tools # haskell tools
stack stack
(pkgs.haskell.packages."${settings.ghc}".ghcWithPackages haskellDeps) (haskellPackages.ghcWithPackages haskellDeps)
pkgs.haskell.packages."${settings.ghc}".haskell-language-server haskellPackages.haskell-language-server
haskellPackages.implicit-hie
]; ];
in in
pkgs.mkShellNoCC { pkgs.mkShellNoCC {

BIN
src/UToy/.Decode.hs.swp Normal file

Binary file not shown.

64
src/UToy/Decode.hs Normal file
View File

@ -0,0 +1,64 @@
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NumericUnderscores #-}
module UToy.Decode
( decodeUtf8
) where
import Data.Bits (shiftL, (.&.), (.|.))
import Data.Char (chr)
import Data.Word (Word8)
import Text.Printf (printf)
decodeUtf8 :: [Word8] -> [([Word8], Either String Char)]
decodeUtf8 = \case
(cu0:rest)
| isAscii cu0 ->
([cu0], Right $ chr $ fromIntegral cu0) : decodeUtf8 rest
(cu0:cu1:rest)
| isTwoByteStarter cu0 && isContinuation cu1 ->
( [cu0, cu1]
, Right $ chr $
fromIntegral (cu0 .&. 0b0001_1111) `shiftL` 6
.|. fromIntegral (cu1 .&. 0b0011_1111)
) : decodeUtf8 rest
(cu0:cu1:cu2:rest)
| isThreeByteStarter cu0 && isContinuation cu1 && isContinuation cu2 ->
( [cu0, cu1, cu2]
, Right $ chr $
fromIntegral (cu0 .&. 0b0000_1111) `shiftL` 12
.|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 6
.|. fromIntegral (cu2 .&. 0b0011_1111)
) : decodeUtf8 rest
(cu0:cu1:cu2:cu3:rest)
| isFourByteStarter cu0 && isContinuation cu1 && isContinuation cu2 && isContinuation cu3 ->
( [cu0, cu1, cu2, cu3]
, let
codepoint =
fromIntegral (cu0 .&. 0b0000_0111) `shiftL` 18
.|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 12
.|. fromIntegral (cu2 .&. 0b0011_1111) `shiftL` 6
.|. fromIntegral (cu3 .&. 0b0011_1111)
in
if codepoint > 0x10_ffff then
Left $ printf "Code point U+%X would be too big (maximum: U+10FFFF)" codepoint
else
Right $ chr codepoint
) : decodeUtf8 rest
(cu0:rest) ->
( [cu0]
, Left "Invalid start of code point"
) : decodeUtf8 rest
[] ->
[]
where
isAscii cu = cu .&. 0b1000_0000 == 0b0000_0000
isTwoByteStarter cu = cu .&. 0b1110_0000 == 0b1100_0000
isThreeByteStarter cu = cu .&. 0b1111_0000 == 0b1110_0000
isFourByteStarter cu = cu .&. 0b1111_1000 == 0b1111_0000
isContinuation cu = cu .&. 0b1100_0000 == 0b1000_0000

31
src/UToy/Parsers.hs Normal file
View File

@ -0,0 +1,31 @@
module UToy.Parsers
( parseHexBytes
) where
import Data.Char (isHexDigit, ord)
import Data.Text (Text)
import Data.Word (Word8)
import Text.Printf (printf)
import qualified Data.Attoparsec.Text as Atto
parseHexBytes :: Text -> Either String [Word8]
parseHexBytes = Atto.parseOnly $ hexBytes <* Atto.endOfInput
hexBytes :: Atto.Parser [Word8]
hexBytes = hexByte `Atto.sepBy` separators
where
hexByte = do
high <- hexDigit
low <- hexDigit
pure $ fromIntegral $ high * 16 + low
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
hexDigitToInt c
| '0' <= c && c <= '9' = ord c - ord '0'
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
| otherwise = error $ printf "not a hex digit: %c" c
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."

View File

@ -1,6 +1,6 @@
cabal-version: 1.12 cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.35.0. -- This file has been generated from package.yaml by hpack version 0.35.2.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
@ -20,13 +20,18 @@ source-repository head
location: https://git.pbrinkmeier.de/paul/utoy location: https://git.pbrinkmeier.de/paul/utoy
library library
exposed-modules:
UToy.Decode
UToy.Parsers
other-modules: other-modules:
Paths_utoy Paths_utoy
hs-source-dirs: hs-source-dirs:
src src
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends: build-depends:
base >=4.7 && <5 attoparsec
, base >=4.7 && <5
, text
default-language: Haskell2010 default-language: Haskell2010
executable utoy-exe executable utoy-exe
@ -35,8 +40,18 @@ executable utoy-exe
app app
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
base >=4.7 && <5 attoparsec
, base >=4.7 && <5
, blaze-html
, bytestring
, http-media
, servant-server
, text
, unicode-data
, unicode-data-names
, utoy , utoy
, wai
, warp
default-language: Haskell2010 default-language: Haskell2010
test-suite utoy-test test-suite utoy-test
@ -48,6 +63,8 @@ test-suite utoy-test
test test
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
base >=4.7 && <5 attoparsec
, base >=4.7 && <5
, text
, utoy , utoy
default-language: Haskell2010 default-language: Haskell2010