diff --git a/app/Main.hs b/app/Main.hs index 8e366f2..c4ce311 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,4 +1,108 @@ +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} + module Main (main) where +import Data.Foldable (for_) +import Data.List (intercalate) +import Data.Maybe (fromMaybe, maybeToList) +import Data.Text (Text) +import Data.Word (Word8) +import Network.HTTP.Media ((//), (/:)) +import Network.Wai (Application) +import Servant + ( Accept (..) + , Handler + , MimeRender (..) + , Server + , ServerError (..) + , Proxy (..) + , Capture + , Get + , err400 + , serve + , throwError + , (:>) + ) +import Text.Blaze.Html5 ((!)) +import Text.Blaze.Html.Renderer.Utf8 (renderHtml) +import Text.Printf (PrintfArg, printf) + +import qualified Network.Wai.Handler.Warp as Warp +import qualified Text.Blaze.Html5 as H +import qualified Text.Blaze.Html5.Attributes as A +import qualified Unicode.Char.General.Blocks as UnicodeBlocks +import qualified Unicode.Char.General.Names as UnicodeNames + +import qualified UToy.Decode as Decode +import qualified UToy.Parsers as Parsers + main :: IO () -main = putStrLn "Hello, World!" +main = Warp.run 3000 app + +app :: Application +app = serve (Proxy :: Proxy API) server + +type API = + "hex" :> Capture "bytes" Text :> Get '[HTML] HexModel + +server :: Server API +server = + hex + where + hex bytesP = do + bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400 + pure $ HexModel $ Decode.decodeUtf8 bytes + +newtype HexModel = HexModel + { codepoints :: [([Word8], Either String Char)] + } + +instance MimeRender HTML HexModel where + mimeRender _ model = renderHtml $ H.docTypeHtml $ do + H.head $ do + H.meta ! A.charset "utf-8" + H.title "utoy" + H.style "html { font-size: 32px; font-family: 'Noto Sans', sans-serif; } td { padding: 0.5em 1em; } pre { margin: 0; font-size: 0.5em; } body { display: flex; justify-content: center; }" + H.body $ do + H.table $ for_ model.codepoints $ \(bytes, eiC) -> do + H.tr $ do + H.td $ H.pre $ H.toHtml $ unlines $ map unwords [map showByteHex bytes, map showByteBin bytes] + case eiC of + Left err -> + H.td ! A.colspan "4" $ H.toHtml $ "Decoding error: " ++ err + Right c -> do + H.td $ do + H.input ! A.value (H.toValue [c]) ! A.style "text-align: center; width: 2em; font-size: 1em;" + H.td $ H.code $ printfHtml "U+%04X" c + H.td $ H.code $ H.toHtml $ intercalate ", " $ maybeToList (UnicodeNames.name c) ++ map (++ "*") (UnicodeNames.nameAliases c) + H.td $ H.toHtml $ fromMaybe "" $ blockName c + where + showByteHex = printf " %02X" + showByteBin = printf "%8b" + + blockName :: Char -> Maybe String + blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c + +-- Utilities + +orThrow :: Either a b -> (a -> ServerError) -> Handler b +orThrow (Left err) f = throwError $ f err +orThrow (Right val) _ = pure val + +printfHtml :: PrintfArg a => String -> a -> H.Html +printfHtml fmt = (H.toHtml :: String -> H.Html) . printf fmt + +-- HTML routes + +data HTML + +instance Accept HTML where + contentType _ = "text" // "html" /: ("charset", "utf-8") diff --git a/hie.yaml b/hie.yaml index af9c93d..f68809f 100644 --- a/hie.yaml +++ b/hie.yaml @@ -1,7 +1,10 @@ cradle: stack: - - path: "./test/Spec.hs" - component: "utoy:test:utoy-test" + - path: "./src" + component: "utoy:lib" - path: "./app/Main.hs" component: "utoy:exe:utoy-exe" + + - path: "./test" + component: "utoy:test:utoy-test" diff --git a/nix/haskell-deps.nix b/nix/haskell-deps.nix index 7edd5e4..f9524a1 100644 --- a/nix/haskell-deps.nix +++ b/nix/haskell-deps.nix @@ -1,4 +1,12 @@ haskellPackages: with haskellPackages; [ + attoparsec + blaze-html + bytestring + http-media + servant-server + text unicode-data unicode-data-names + wai + warp ] diff --git a/nix/pkgs.nix b/nix/pkgs.nix index de9cae8..48dbd09 100644 --- a/nix/pkgs.nix +++ b/nix/pkgs.nix @@ -14,7 +14,11 @@ let }; haskellOverlay = pkgs: final: prev: { - unicode-data = prev.unicode-data_0_4_0; + attoparsec-iso8601 = prev.attoparsec-iso8601_1_1_0_0; + http-api-data = prev.http-api-data_0_5; + servant = pkgs.haskell.lib.doJailbreak prev.servant; + servant-server = pkgs.haskell.lib.doJailbreak prev.servant-server; + unicode-data = prev.unicode-data_0_4_0_1; unicode-data-names = pkgs.haskell.lib.markUnbroken prev.unicode-data-names; }; in diff --git a/nix/sources.json b/nix/sources.json index d486d47..f7baa89 100644 --- a/nix/sources.json +++ b/nix/sources.json @@ -12,15 +12,15 @@ "url_template": "https://github.com///archive/.tar.gz" }, "nixpkgs": { - "branch": "nixos-22.11", + "branch": "nixpkgs-unstable", "description": "Nix Packages collection", "homepage": "", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7076110064c09f0b3942f609f2134c1358ef2e50", - "sha256": "0j7idx8vyb0spwxqb7rr8pk15wi7yfyf5hp608wkhaz7wjw8k9nf", + "rev": "9952d6bc395f5841262b006fbace8dd7e143b634", + "sha256": "0kvpf63dda6nzbqd2kyr99qh1av89mva26xykp3zb4diyicp7yji", "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/7076110064c09f0b3942f609f2134c1358ef2e50.tar.gz", + "url": "https://github.com/NixOS/nixpkgs/archive/9952d6bc395f5841262b006fbace8dd7e143b634.tar.gz", "url_template": "https://github.com///archive/.tar.gz" } } diff --git a/package.yaml b/package.yaml index a798d03..5c081c5 100644 --- a/package.yaml +++ b/package.yaml @@ -13,6 +13,8 @@ extra-source-files: dependencies: - base >= 4.7 && < 5 +- attoparsec +- text ghc-options: - -Wall @@ -38,6 +40,15 @@ executables: - -with-rtsopts=-N dependencies: - utoy + - blaze-html + - bytestring + - http-media + - servant-server + - text + - unicode-data + - unicode-data-names + - wai + - warp # Fix "Multiple files use the same module name", see # https://stackoverflow.com/questions/67519851/multiple-files-use-the-same-module-name when: diff --git a/shell.nix b/shell.nix index 33ce29d..e95d7f9 100644 --- a/shell.nix +++ b/shell.nix @@ -11,8 +11,9 @@ let # haskell tools stack - (pkgs.haskell.packages."${settings.ghc}".ghcWithPackages haskellDeps) - pkgs.haskell.packages."${settings.ghc}".haskell-language-server + (haskellPackages.ghcWithPackages haskellDeps) + haskellPackages.haskell-language-server + haskellPackages.implicit-hie ]; in pkgs.mkShellNoCC { diff --git a/src/UToy/.Decode.hs.swp b/src/UToy/.Decode.hs.swp new file mode 100644 index 0000000..fb7123f Binary files /dev/null and b/src/UToy/.Decode.hs.swp differ diff --git a/src/UToy/Decode.hs b/src/UToy/Decode.hs new file mode 100644 index 0000000..7fbd7d5 --- /dev/null +++ b/src/UToy/Decode.hs @@ -0,0 +1,64 @@ +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NumericUnderscores #-} + +module UToy.Decode + ( decodeUtf8 + ) where + +import Data.Bits (shiftL, (.&.), (.|.)) +import Data.Char (chr) +import Data.Word (Word8) +import Text.Printf (printf) + +decodeUtf8 :: [Word8] -> [([Word8], Either String Char)] +decodeUtf8 = \case + (cu0:rest) + | isAscii cu0 -> + ([cu0], Right $ chr $ fromIntegral cu0) : decodeUtf8 rest + (cu0:cu1:rest) + | isTwoByteStarter cu0 && isContinuation cu1 -> + ( [cu0, cu1] + , Right $ chr $ + fromIntegral (cu0 .&. 0b0001_1111) `shiftL` 6 + .|. fromIntegral (cu1 .&. 0b0011_1111) + ) : decodeUtf8 rest + (cu0:cu1:cu2:rest) + | isThreeByteStarter cu0 && isContinuation cu1 && isContinuation cu2 -> + ( [cu0, cu1, cu2] + , Right $ chr $ + fromIntegral (cu0 .&. 0b0000_1111) `shiftL` 12 + .|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 6 + .|. fromIntegral (cu2 .&. 0b0011_1111) + ) : decodeUtf8 rest + (cu0:cu1:cu2:cu3:rest) + | isFourByteStarter cu0 && isContinuation cu1 && isContinuation cu2 && isContinuation cu3 -> + ( [cu0, cu1, cu2, cu3] + , let + codepoint = + fromIntegral (cu0 .&. 0b0000_0111) `shiftL` 18 + .|. fromIntegral (cu1 .&. 0b0011_1111) `shiftL` 12 + .|. fromIntegral (cu2 .&. 0b0011_1111) `shiftL` 6 + .|. fromIntegral (cu3 .&. 0b0011_1111) + in + if codepoint > 0x10_ffff then + Left $ printf "Code point U+%X would be too big (maximum: U+10FFFF)" codepoint + else + Right $ chr codepoint + + ) : decodeUtf8 rest + (cu0:rest) -> + ( [cu0] + , Left "Invalid start of code point" + ) : decodeUtf8 rest + [] -> + [] + + where + isAscii cu = cu .&. 0b1000_0000 == 0b0000_0000 + + isTwoByteStarter cu = cu .&. 0b1110_0000 == 0b1100_0000 + isThreeByteStarter cu = cu .&. 0b1111_0000 == 0b1110_0000 + isFourByteStarter cu = cu .&. 0b1111_1000 == 0b1111_0000 + + isContinuation cu = cu .&. 0b1100_0000 == 0b1000_0000 diff --git a/src/UToy/Parsers.hs b/src/UToy/Parsers.hs new file mode 100644 index 0000000..9b65b4a --- /dev/null +++ b/src/UToy/Parsers.hs @@ -0,0 +1,31 @@ +module UToy.Parsers + ( parseHexBytes + ) where + +import Data.Char (isHexDigit, ord) +import Data.Text (Text) +import Data.Word (Word8) +import Text.Printf (printf) + +import qualified Data.Attoparsec.Text as Atto + +parseHexBytes :: Text -> Either String [Word8] +parseHexBytes = Atto.parseOnly $ hexBytes <* Atto.endOfInput + +hexBytes :: Atto.Parser [Word8] +hexBytes = hexByte `Atto.sepBy` separators + where + hexByte = do + high <- hexDigit + low <- hexDigit + pure $ fromIntegral $ high * 16 + low + + hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit + + hexDigitToInt c + | '0' <= c && c <= '9' = ord c - ord '0' + | 'A' <= c && c <= 'F' = ord c - ord 'A' + 10 + | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 + | otherwise = error $ printf "not a hex digit: %c" c + + separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +." diff --git a/utoy.cabal b/utoy.cabal index 9712630..4af2fa6 100644 --- a/utoy.cabal +++ b/utoy.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.0. +-- This file has been generated from package.yaml by hpack version 0.35.2. -- -- see: https://github.com/sol/hpack @@ -20,13 +20,18 @@ source-repository head location: https://git.pbrinkmeier.de/paul/utoy library + exposed-modules: + UToy.Decode + UToy.Parsers other-modules: Paths_utoy hs-source-dirs: src ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints build-depends: - base >=4.7 && <5 + attoparsec + , base >=4.7 && <5 + , text default-language: Haskell2010 executable utoy-exe @@ -35,8 +40,18 @@ executable utoy-exe app ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N build-depends: - base >=4.7 && <5 + attoparsec + , base >=4.7 && <5 + , blaze-html + , bytestring + , http-media + , servant-server + , text + , unicode-data + , unicode-data-names , utoy + , wai + , warp default-language: Haskell2010 test-suite utoy-test @@ -48,6 +63,8 @@ test-suite utoy-test test ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N build-depends: - base >=4.7 && <5 + attoparsec + , base >=4.7 && <5 + , text , utoy default-language: Haskell2010