Implement /hex
This commit is contained in:
parent
b462796dbd
commit
69d9e21c80
106
app/Main.hs
106
app/Main.hs
@ -1,4 +1,108 @@
|
||||
{-# LANGUAGE BinaryLiterals #-}
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE OverloadedRecordDot #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TypeOperators #-}
|
||||
|
||||
module Main (main) where
|
||||
|
||||
import Data.Foldable (for_)
|
||||
import Data.List (intercalate)
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
import Data.Text (Text)
|
||||
import Data.Word (Word8)
|
||||
import Network.HTTP.Media ((//), (/:))
|
||||
import Network.Wai (Application)
|
||||
import Servant
|
||||
( Accept (..)
|
||||
, Handler
|
||||
, MimeRender (..)
|
||||
, Server
|
||||
, ServerError (..)
|
||||
, Proxy (..)
|
||||
, Capture
|
||||
, Get
|
||||
, err400
|
||||
, serve
|
||||
, throwError
|
||||
, (:>)
|
||||
)
|
||||
import Text.Blaze.Html5 ((!))
|
||||
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
|
||||
import Text.Printf (PrintfArg, printf)
|
||||
|
||||
import qualified Network.Wai.Handler.Warp as Warp
|
||||
import qualified Text.Blaze.Html5 as H
|
||||
import qualified Text.Blaze.Html5.Attributes as A
|
||||
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
|
||||
import qualified Unicode.Char.General.Names as UnicodeNames
|
||||
|
||||
import qualified UToy.Decode as Decode
|
||||
import qualified UToy.Parsers as Parsers
|
||||
|
||||
main :: IO ()
|
||||
main = putStrLn "Hello, World!"
|
||||
main = Warp.run 3000 app
|
||||
|
||||
app :: Application
|
||||
app = serve (Proxy :: Proxy API) server
|
||||
|
||||
type API =
|
||||
"hex" :> Capture "bytes" Text :> Get '[HTML] HexModel
|
||||
|
||||
server :: Server API
|
||||
server =
|
||||
hex
|
||||
where
|
||||
hex bytesP = do
|
||||
bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
|
||||
pure $ HexModel $ Decode.decodeUtf8 bytes
|
||||
|
||||
newtype HexModel = HexModel
|
||||
{ codepoints :: [([Word8], Either String Char)]
|
||||
}
|
||||
|
||||
instance MimeRender HTML HexModel where
|
||||
mimeRender _ model = renderHtml $ H.docTypeHtml $ do
|
||||
H.head $ do
|
||||
H.meta ! A.charset "utf-8"
|
||||
H.title "utoy"
|
||||
H.style "html { font-size: 32px; font-family: 'Noto Sans', sans-serif; } td { padding: 0.5em 1em; } pre { margin: 0; font-size: 0.5em; } body { display: flex; justify-content: center; }"
|
||||
H.body $ do
|
||||
H.table $ for_ model.codepoints $ \(bytes, eiC) -> do
|
||||
H.tr $ do
|
||||
H.td $ H.pre $ H.toHtml $ unlines $ map unwords [map showByteHex bytes, map showByteBin bytes]
|
||||
case eiC of
|
||||
Left err ->
|
||||
H.td ! A.colspan "4" $ H.toHtml $ "Decoding error: " ++ err
|
||||
Right c -> do
|
||||
H.td $ do
|
||||
H.input ! A.value (H.toValue [c]) ! A.style "text-align: center; width: 2em; font-size: 1em;"
|
||||
H.td $ H.code $ printfHtml "U+%04X" c
|
||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c)
|
||||
H.td $ H.toHtml $ fromMaybe "" $ blockName c
|
||||
where
|
||||
showByteHex = printf " %02X"
|
||||
showByteBin = printf "%8b"
|
||||
|
||||
blockName :: Char -> Maybe String
|
||||
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
|
||||
|
||||
-- Utilities
|
||||
|
||||
orThrow :: Either a b -> (a -> ServerError) -> Handler b
|
||||
orThrow (Left err) f = throwError $ f err
|
||||
orThrow (Right val) _ = pure val
|
||||
|
||||
printfHtml :: PrintfArg a => String -> a -> H.Html
|
||||
printfHtml fmt = (H.toHtml :: String -> H.Html) . printf fmt
|
||||
|
||||
-- HTML routes
|
||||
|
||||
data HTML
|
||||
|
||||
instance Accept HTML where
|
||||
contentType _ = "text" // "html" /: ("charset", "utf-8")
|
||||
|
7
hie.yaml
7
hie.yaml
@ -1,7 +1,10 @@
|
||||
cradle:
|
||||
stack:
|
||||
- path: "./test/Spec.hs"
|
||||
component: "utoy:test:utoy-test"
|
||||
- path: "./src"
|
||||
component: "utoy:lib"
|
||||
|
||||
- path: "./app/Main.hs"
|
||||
component: "utoy:exe:utoy-exe"
|
||||
|
||||
- path: "./test"
|
||||
component: "utoy:test:utoy-test"
|
||||
|
@ -1,4 +1,12 @@
|
||||
haskellPackages: with haskellPackages; [
|
||||
attoparsec
|
||||
blaze-html
|
||||
bytestring
|
||||
http-media
|
||||
servant-server
|
||||
text
|
||||
unicode-data
|
||||
unicode-data-names
|
||||
wai
|
||||
warp
|
||||
]
|
||||
|
@ -14,7 +14,11 @@ let
|
||||
};
|
||||
|
||||
haskellOverlay = pkgs: final: prev: {
|
||||
unicode-data = prev.unicode-data_0_4_0;
|
||||
attoparsec-iso8601 = prev.attoparsec-iso8601_1_1_0_0;
|
||||
http-api-data = prev.http-api-data_0_5;
|
||||
servant = pkgs.haskell.lib.doJailbreak prev.servant;
|
||||
servant-server = pkgs.haskell.lib.doJailbreak prev.servant-server;
|
||||
unicode-data = prev.unicode-data_0_4_0_1;
|
||||
unicode-data-names = pkgs.haskell.lib.markUnbroken prev.unicode-data-names;
|
||||
};
|
||||
in
|
||||
|
@ -12,15 +12,15 @@
|
||||
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
|
||||
},
|
||||
"nixpkgs": {
|
||||
"branch": "nixos-22.11",
|
||||
"branch": "nixpkgs-unstable",
|
||||
"description": "Nix Packages collection",
|
||||
"homepage": "",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "7076110064c09f0b3942f609f2134c1358ef2e50",
|
||||
"sha256": "0j7idx8vyb0spwxqb7rr8pk15wi7yfyf5hp608wkhaz7wjw8k9nf",
|
||||
"rev": "9952d6bc395f5841262b006fbace8dd7e143b634",
|
||||
"sha256": "0kvpf63dda6nzbqd2kyr99qh1av89mva26xykp3zb4diyicp7yji",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/7076110064c09f0b3942f609f2134c1358ef2e50.tar.gz",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/9952d6bc395f5841262b006fbace8dd7e143b634.tar.gz",
|
||||
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
|
||||
}
|
||||
}
|
||||
|
11
package.yaml
11
package.yaml
@ -13,6 +13,8 @@ extra-source-files:
|
||||
|
||||
dependencies:
|
||||
- base >= 4.7 && < 5
|
||||
- attoparsec
|
||||
- text
|
||||
|
||||
ghc-options:
|
||||
- -Wall
|
||||
@ -38,6 +40,15 @@ executables:
|
||||
- -with-rtsopts=-N
|
||||
dependencies:
|
||||
- utoy
|
||||
- blaze-html
|
||||
- bytestring
|
||||
- http-media
|
||||
- servant-server
|
||||
- text
|
||||
- unicode-data
|
||||
- unicode-data-names
|
||||
- wai
|
||||
- warp
|
||||
# Fix "Multiple files use the same module name", see
|
||||
# https://stackoverflow.com/questions/67519851/multiple-files-use-the-same-module-name
|
||||
when:
|
||||
|
@ -11,8 +11,9 @@ let
|
||||
|
||||
# haskell tools
|
||||
stack
|
||||
(pkgs.haskell.packages."${settings.ghc}".ghcWithPackages haskellDeps)
|
||||
pkgs.haskell.packages."${settings.ghc}".haskell-language-server
|
||||
(haskellPackages.ghcWithPackages haskellDeps)
|
||||
haskellPackages.haskell-language-server
|
||||
haskellPackages.implicit-hie
|
||||
];
|
||||
in
|
||||
pkgs.mkShellNoCC {
|
||||
|
BIN
src/UToy/.Decode.hs.swp
Normal file
BIN
src/UToy/.Decode.hs.swp
Normal file
Binary file not shown.
64
src/UToy/Decode.hs
Normal file
64
src/UToy/Decode.hs
Normal file
@ -0,0 +1,64 @@
|
||||
{-# LANGUAGE BinaryLiterals #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE NumericUnderscores #-}
|
||||
|
||||
module UToy.Decode
|
||||
( decodeUtf8
|
||||
) where
|
||||
|
||||
import Data.Bits (shiftL, (.&.), (.|.))
|
||||
import Data.Char (chr)
|
||||
import Data.Word (Word8)
|
||||
import Text.Printf (printf)
|
||||
|
||||
decodeUtf8 :: [Word8] -> [([Word8], Either String Char)]
|
||||
decodeUtf8 = \case
|
||||
(cu0:rest)
|
||||
| isAscii cu0 ->
|
||||
([cu0], Right $ chr $ fromIntegral cu0) : decodeUtf8 rest
|
||||
(cu0:cu1:rest)
|
||||
| isTwoByteStarter cu0 && isContinuation cu1 ->
|
||||
( [cu0, cu1]
|
||||
, Right $ chr $
|
||||
fromIntegral (cu0 .&. 0b0001_1111) `shiftL` 6
|
||||
.|. fromIntegral (cu1 .&. 0b0011_1111)
|
||||
) : decodeUtf8 rest
|
||||
(cu0:cu1:cu2:rest)
|
||||
| isThreeByteStarter cu0 && isContinuation cu1 && isContinuation cu2 ->
|
||||
( [cu0, cu1, cu2]
|
||||
, Right $ chr $
|
||||
fromIntegral (cu0 .&. 0b0000_1111) `shiftL` 12
|
||||
.|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 6
|
||||
.|. fromIntegral (cu2 .&. 0b0011_1111)
|
||||
) : decodeUtf8 rest
|
||||
(cu0:cu1:cu2:cu3:rest)
|
||||
| isFourByteStarter cu0 && isContinuation cu1 && isContinuation cu2 && isContinuation cu3 ->
|
||||
( [cu0, cu1, cu2, cu3]
|
||||
, let
|
||||
codepoint =
|
||||
fromIntegral (cu0 .&. 0b0000_0111) `shiftL` 18
|
||||
.|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 12
|
||||
.|. fromIntegral (cu2 .&. 0b0011_1111) `shiftL` 6
|
||||
.|. fromIntegral (cu3 .&. 0b0011_1111)
|
||||
in
|
||||
if codepoint > 0x10_ffff then
|
||||
Left $ printf "Code point U+%X would be too big (maximum: U+10FFFF)" codepoint
|
||||
else
|
||||
Right $ chr codepoint
|
||||
|
||||
) : decodeUtf8 rest
|
||||
(cu0:rest) ->
|
||||
( [cu0]
|
||||
, Left "Invalid start of code point"
|
||||
) : decodeUtf8 rest
|
||||
[] ->
|
||||
[]
|
||||
|
||||
where
|
||||
isAscii cu = cu .&. 0b1000_0000 == 0b0000_0000
|
||||
|
||||
isTwoByteStarter cu = cu .&. 0b1110_0000 == 0b1100_0000
|
||||
isThreeByteStarter cu = cu .&. 0b1111_0000 == 0b1110_0000
|
||||
isFourByteStarter cu = cu .&. 0b1111_1000 == 0b1111_0000
|
||||
|
||||
isContinuation cu = cu .&. 0b1100_0000 == 0b1000_0000
|
31
src/UToy/Parsers.hs
Normal file
31
src/UToy/Parsers.hs
Normal file
@ -0,0 +1,31 @@
|
||||
module UToy.Parsers
|
||||
( parseHexBytes
|
||||
) where
|
||||
|
||||
import Data.Char (isHexDigit, ord)
|
||||
import Data.Text (Text)
|
||||
import Data.Word (Word8)
|
||||
import Text.Printf (printf)
|
||||
|
||||
import qualified Data.Attoparsec.Text as Atto
|
||||
|
||||
parseHexBytes :: Text -> Either String [Word8]
|
||||
parseHexBytes = Atto.parseOnly $ hexBytes <* Atto.endOfInput
|
||||
|
||||
hexBytes :: Atto.Parser [Word8]
|
||||
hexBytes = hexByte `Atto.sepBy` separators
|
||||
where
|
||||
hexByte = do
|
||||
high <- hexDigit
|
||||
low <- hexDigit
|
||||
pure $ fromIntegral $ high * 16 + low
|
||||
|
||||
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
|
||||
|
||||
hexDigitToInt c
|
||||
| '0' <= c && c <= '9' = ord c - ord '0'
|
||||
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
|
||||
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
|
||||
| otherwise = error $ printf "not a hex digit: %c" c
|
||||
|
||||
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."
|
25
utoy.cabal
25
utoy.cabal
@ -1,6 +1,6 @@
|
||||
cabal-version: 1.12
|
||||
|
||||
-- This file has been generated from package.yaml by hpack version 0.35.0.
|
||||
-- This file has been generated from package.yaml by hpack version 0.35.2.
|
||||
--
|
||||
-- see: https://github.com/sol/hpack
|
||||
|
||||
@ -20,13 +20,18 @@ source-repository head
|
||||
location: https://git.pbrinkmeier.de/paul/utoy
|
||||
|
||||
library
|
||||
exposed-modules:
|
||||
UToy.Decode
|
||||
UToy.Parsers
|
||||
other-modules:
|
||||
Paths_utoy
|
||||
hs-source-dirs:
|
||||
src
|
||||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
|
||||
build-depends:
|
||||
base >=4.7 && <5
|
||||
attoparsec
|
||||
, base >=4.7 && <5
|
||||
, text
|
||||
default-language: Haskell2010
|
||||
|
||||
executable utoy-exe
|
||||
@ -35,8 +40,18 @@ executable utoy-exe
|
||||
app
|
||||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
|
||||
build-depends:
|
||||
base >=4.7 && <5
|
||||
attoparsec
|
||||
, base >=4.7 && <5
|
||||
, blaze-html
|
||||
, bytestring
|
||||
, http-media
|
||||
, servant-server
|
||||
, text
|
||||
, unicode-data
|
||||
, unicode-data-names
|
||||
, utoy
|
||||
, wai
|
||||
, warp
|
||||
default-language: Haskell2010
|
||||
|
||||
test-suite utoy-test
|
||||
@ -48,6 +63,8 @@ test-suite utoy-test
|
||||
test
|
||||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
|
||||
build-depends:
|
||||
base >=4.7 && <5
|
||||
attoparsec
|
||||
, base >=4.7 && <5
|
||||
, text
|
||||
, utoy
|
||||
default-language: Haskell2010
|
||||
|
Loading…
x
Reference in New Issue
Block a user