Compare commits

..

No commits in common. "main" and "0.6" have entirely different histories.
main ... 0.6

15 changed files with 264 additions and 208 deletions

2
.gitignore vendored
View File

@ -1,6 +1,4 @@
.stack-work/
dist-newstyle
.vscode/
*.swp
result
bench.html

View File

@ -6,6 +6,12 @@
$ nix build
```
## Running
```
$ nix run
```
## Building the Docker image
```
@ -20,7 +26,9 @@ Includes Stack, GHC, `haskell-language-server`, `gen-hie` etc.
$ nix develop
```
## TODO
## Running Stack and GHC
- [x] Benchmark, profile and optimize search
- [ ] Trim down the docker image
```
$ nix run .#stack
$ nix run .#ghc
```

View File

@ -3,7 +3,6 @@
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
@ -16,7 +15,8 @@ module Main (main) where
import Data.Char (chr)
import Data.FileEmbed (embedFile)
import Data.Foldable (for_)
import Data.Maybe (fromMaybe)
import Data.List (intercalate)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Text (Text)
import Data.Word (Word8)
import Network.HTTP.Media ((//), (/:))
@ -48,10 +48,11 @@ import qualified Data.Text.Encoding as Encoding
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
import qualified Unicode.Char.General.Names as UnicodeNames
import qualified UToy.Decode as Decode
import qualified UToy.Table as Table
import qualified UToy.Names as Names
import qualified UToy.Parsers as Parsers
main :: IO ()
@ -65,14 +66,13 @@ type API =
:<|> "utf8" :> Capture "bytes" Text :> Get '[PlainText, HTML] Utf8Model
:<|> "codepoints" :> Capture "codepoints" Text :> Get '[PlainText, HTML] CodepointsModel
:<|> "text" :> Capture "text" Text :> Get '[PlainText, HTML] TextModel
:<|> "search" :> Capture "search" Text :> Get '[PlainText, HTML] SearchModel
server :: Server API
server =
rootR :<|> utf8R :<|> codepointsR :<|> textR :<|> searchR
rootR :<|> utf8R :<|> codepointsR :<|> textR
where
rootR host' = do
pure $ RootModel $ fromMaybe "" host'
rootR host = do
pure $ RootModel $ fromMaybe "" host
utf8R bytesP = do
bytes <- Parsers.parseHexBytes bytesP `orThrow` const err400
@ -85,9 +85,6 @@ server =
textR textP = do
pure $ TextModel textP
searchR searchP = do
pure $ mkSearchModel searchP
-- /
newtype RootModel = RootModel
@ -99,7 +96,6 @@ examples =
[ "/text/✅🤔"
, "/codepoints/x2705+x1F914"
, "/utf8/e2.9c.85.f0.9f.a4.94"
, "/search/asterisk"
]
instance MimeRender PlainText RootModel where
@ -143,8 +139,8 @@ instance MimeRender PlainText Utf8Model where
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
]
)
]
@ -162,8 +158,8 @@ instance MimeRender HTML Utf8Model where
Right c -> do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
-- /codepoints/<codepoints>
@ -177,14 +173,14 @@ mkCodepointsModel =
-- Limit number of returned codepoints. Otherwise it's
-- too easy to provoke massive response bodies with requests like
-- /codepoints/0-99999999
. take 100_000
. take 100000
. map go
. concatMap (uncurry enumFromTo)
where
go codepoint = (codepoint, toChar codepoint)
toChar codepoint
| codepoint > 0x10_FFFF = Left "Would be too big (maximum: U+10FFFF)"
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
| isSurrogate codepoint = Left "Is a surrogate"
| otherwise = Right $ chr $ fromIntegral codepoint
@ -200,8 +196,8 @@ instance MimeRender PlainText CodepointsModel where
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
]
)
| (codepoint, eiC) <- model.codepoints
@ -218,8 +214,8 @@ instance MimeRender HTML CodepointsModel where
Right c -> do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
-- /text/<text>
@ -228,50 +224,28 @@ newtype TextModel = TextModel
}
instance MimeRender HTML TextModel where
mimeRender _ model = charTableHtml $ Text.unpack model.text
instance MimeRender PlainText TextModel where
mimeRender _ model = charTableText $ Text.unpack model.text
-- /search/<search>
newtype SearchModel = SearchModel
{ results :: [Char]
}
mkSearchModel :: Text -> SearchModel
mkSearchModel search = SearchModel $ take 100_000 $ Names.searchCaseInsensitive search
instance MimeRender HTML SearchModel where
mimeRender _ model = charTableHtml model.results
instance MimeRender PlainText SearchModel where
mimeRender _ model = charTableText model.results
-- Utilities
charTableHtml :: [Char] -> BL.ByteString
charTableHtml chars =
renderHtml $ documentWithBody $ do
H.table $ for_ chars $ \c -> do
mimeRender _ model = renderHtml $ documentWithBody $ do
H.table $ for_ (Text.unpack model.text) $ \c -> do
H.tr $ do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
charTableText :: [Char] -> BL.ByteString
charTableText chars =
renderText $ Table.render " "
instance MimeRender PlainText TextModel where
mimeRender _ model = renderText $ Table.render " "
[ map Table.cl
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
]
| c <- chars
| c <- Text.unpack model.text
]
-- Utilities
renderText :: Text -> BL.ByteString
renderText = BL.fromStrict . Encoding.encodeUtf8
@ -281,6 +255,15 @@ showByteHex = printf " %02X"
showByteBin :: Word8 -> String
showByteBin = printf "%08b"
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
allNames :: Char -> [String]
allNames c =
maybeToList (UnicodeNames.name c)
++ map (++ "*") (UnicodeNames.nameAliases c)
blockName :: Char -> Maybe String
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
orThrow :: Either a b -> (a -> ServerError) -> Handler b
orThrow (Left err) f = throwError $ f err
orThrow (Right val) _ = pure val

View File

@ -1,69 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Criterion.Main (bench, bgroup, defaultMain, whnf)
import Data.Char (ord)
import Data.Maybe (maybeToList)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Unicode.Char.General.Names as UnicodeNames
main = defaultMain
[ bench "naiveSearchCI" $ whnf naiveSearchCI "latin"
, bench "naiveSearchCS" $ whnf naiveSearchCS "LATIN"
, bench "memoSearchCI" $ whnf memoSearchCI "latin"
, bench "memoSearchCS" $ whnf memoSearchCS "LATIN"
]
naiveSearchCI :: Text -> [Char]
naiveSearchCI search =
filter go [minBound..maxBound]
where
go c = any matches $ allNames c
matches t = Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack t)
naiveSearchCS :: Text -> [Char]
naiveSearchCS search =
filter go [minBound..maxBound]
where
go c = any matches $ allNames c
matches t = search `Text.isInfixOf` Text.pack t
memoSearchCS :: Text -> [Char]
memoSearchCS search =
filter go [minBound..maxBound]
where
go c = any matches $ allNamesText c
matches t = search `Text.isInfixOf` t
memoSearchCI :: Text -> [Char]
memoSearchCI search =
filter go [minBound..maxBound]
where
go c = any matches $ allNamesTextLower c
matches t = Text.toLower search `Text.isInfixOf` t
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
allNames :: Char -> [String]
allNames c =
maybeToList (UnicodeNames.name c)
++ map (++ "*") (UnicodeNames.nameAliases c)
allNamesText :: Char -> [Text]
allNamesText c = Vector.unsafeIndex textNames $ ord c
textNames :: Vector.Vector [Text]
textNames = Vector.fromList $ map go [minBound..maxBound]
where
go c = map Text.pack $ allNames c
allNamesTextLower :: Char -> [Text]
allNamesTextLower c = Vector.unsafeIndex textNamesLower $ ord c
textNamesLower :: Vector.Vector [Text]
textNamesLower = Vector.fromList $ map go [minBound..maxBound]
where
go c = map (Text.toLower . Text.pack) $ allNames c

8
flake.lock generated
View File

@ -2,16 +2,16 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1740243248,
"narHash": "sha256-GJ/fV5AE+4i38/eXmAgBZs4lKYXfScpxPYviUV8ujlk=",
"lastModified": 1680273054,
"narHash": "sha256-Bs6/5LpvYp379qVqGt9mXxxx9GSE789k3oFc+OAL07M=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "a6c4bfe37c51686c042b4bdb4e650a9414e96680",
"rev": "3364b5b117f65fe1ce65a3cdd5612a078a3b31e3",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "release-24.11",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}

View File

@ -1,45 +1,90 @@
{
description = "Unicode toy";
inputs.nixpkgs.url = "github:nixos/nixpkgs/release-24.11";
inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
outputs = { self, nixpkgs }:
let
pkgs = nixpkgs.legacyPackages.x86_64-linux;
overlay = import ./nix/overlay.nix;
settings = import ./nix/settings.nix;
haskellDeps = import ./nix/haskell-deps.nix;
utoy = pkgs.haskellPackages.developPackage {
root = ./.;
overrides = self: super: {
unicode-data = super.unicode-data_0_6_0;
unicode-data-names = pkgs.haskell.lib.markUnbroken super.unicode-data-names;
};
pkgs = nixpkgs.legacyPackages.x86_64-linux.extend overlay;
haskellPackages = pkgs.haskell.packages."${settings.ghc}";
ghc = haskellPackages.ghcWithPackages haskellDeps;
# Wrap stack to disable its slow Nix integration.
# Instead, make it use the GHC defined above.
stack = pkgs.stdenv.mkDerivation {
name = "stack";
# The build is simply a call to makeWrapper, so we don't have to
# do any of the typical build steps.
phases = [ "installPhase" ];
nativeBuildInputs = [ pkgs.makeWrapper ];
# makeBinaryWrapper creates a stack executable for us that uses
# the GHC defined in this file.
installPhase = ''
makeWrapper ${pkgs.stack}/bin/stack $out/bin/stack \
--prefix PATH : ${ghc}/bin \
--add-flags '--no-nix --system-ghc --no-install-ghc'
'';
};
utoy = pkgs.haskell.lib.justStaticExecutables (haskellPackages.callPackage
({ mkDerivation }:
mkDerivation {
# Keep this in sync with package.yaml
version = "0.6";
pname = "utoy";
license = pkgs.lib.licenses.mit;
src =
# We only need these files for building:
let
whitelist = [
./LICENSE
./utoy.cabal
./Setup.hs
./app
./src
./static
./test
];
in
pkgs.lib.sources.cleanSourceWith {
src = ./.;
filter = path: _type: pkgs.lib.any (prefix: pkgs.lib.hasPrefix (toString prefix) path) whitelist;
};
libraryHaskellDepends = haskellDeps haskellPackages;
}) {});
in {
packages.x86_64-linux = rec {
docker =
packages.x86_64-linux = {
inherit ghc;
inherit stack;
docker =
pkgs.dockerTools.buildImage {
name = "git.pbrinkmeier.de/paul/utoy";
tag = utoy.version;
config.Cmd = [ "${pkgs.haskell.lib.justStaticExecutables utoy}/bin/utoy" ];
config.Cmd = [ "${utoy}/bin/utoy" ];
};
default = utoy;
};
devShells.x86_64-linux.default =
(pkgs.haskellPackages.developPackage {
root = ./.;
overrides = self: super: {
unicode-data = super.unicode-data_0_6_0;
unicode-data-names = pkgs.haskell.lib.markUnbroken super.unicode-data-names;
};
modifier = drv:
pkgs.haskell.lib.addBuildTools drv [
pkgs.cabal-install
pkgs.haskellPackages.implicit-hie
pkgs.haskell-language-server
];
cabal2nixOptions = "--benchmark";
}).env;
devShells.x86_64-linux.default = pkgs.mkShell {
packages = [
stack
ghc
haskellPackages.haskell-language-server
haskellPackages.implicit-hie
];
shellHook = ''
PS1+="(utoy) ";
'';
};
};
}

View File

@ -1,10 +1,10 @@
cradle:
cabal:
- path: "src"
component: "lib:utoy"
stack:
- path: "./src"
component: "utoy:lib"
- path: "app/Main.hs"
- path: "./app/Main.hs"
component: "utoy:exe:utoy"
- path: "bench/Main.hs"
component: "utoy:bench:utoy-bench"
- path: "./test"
component: "utoy:test:utoy-test"

13
nix/haskell-deps.nix Normal file
View File

@ -0,0 +1,13 @@
haskellPackages: with haskellPackages; [
attoparsec
blaze-html
bytestring
file-embed
http-media
servant-server
text
unicode-data
unicode-data-names
wai
warp
]

23
nix/overlay.nix Normal file
View File

@ -0,0 +1,23 @@
let
settings = import ./settings.nix;
overlay = final: prev: {
haskell = prev.haskell // {
packages = prev.haskell.packages // {
"${settings.ghc}" = prev.haskell.packages."${settings.ghc}".override {
overrides = haskellOverlay prev;
};
};
};
};
haskellOverlay = pkgs: final: prev: {
attoparsec-iso8601 = prev.attoparsec-iso8601_1_1_0_0;
http-api-data = prev.http-api-data_0_5;
servant = pkgs.haskell.lib.doJailbreak prev.servant;
servant-server = pkgs.haskell.lib.doJailbreak prev.servant-server;
unicode-data = prev.unicode-data_0_4_0_1;
unicode-data-names = pkgs.haskell.lib.markUnbroken prev.unicode-data-names;
};
in
overlay

5
nix/settings.nix Normal file
View File

@ -0,0 +1,5 @@
{
# GHC version to use with Nix.
# Should match the one in stack.yaml.
ghc = "ghc944";
}

70
package.yaml Normal file
View File

@ -0,0 +1,70 @@
# Adapted from new-template.hsfiles
name: utoy
# Keep this in sync with the version in flake.nix.
version: 0.6
git: "https://git.pbrinkmeier.de/paul/utoy"
license: MIT
author: "Paul Brinkmeier"
maintainer: "hallo@pbrinkmeier.de"
copyright: "2023 Paul Brinkmeier"
extra-source-files:
- README.md
- static/utoy.css
dependencies:
- base >= 4.7 && < 5
- attoparsec
- text
ghc-options:
- -Wall
- -Wcompat
- -Widentities
- -Wincomplete-record-updates
- -Wincomplete-uni-patterns
- -Wmissing-export-lists
- -Wmissing-home-modules
- -Wpartial-fields
- -Wredundant-constraints
library:
source-dirs: src
executables:
utoy:
main: Main.hs
source-dirs: app
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- utoy
- blaze-html
- bytestring
- file-embed
- http-media
- servant-server
- text
- unicode-data
- unicode-data-names
- wai
- warp
# Fix "Multiple files use the same module name", see
# https://stackoverflow.com/questions/67519851/multiple-files-use-the-same-module-name
when:
- condition: false
other-modules: Paths_utoy
tests:
utoy-test:
main: Spec.hs
source-dirs: test
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- utoy

View File

@ -1,31 +0,0 @@
module UToy.Names (allNames, blockName, searchCaseInsensitive) where
import Data.Char (ord)
import Data.Maybe (maybeToList)
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
import qualified Unicode.Char.General.Names as UnicodeNames
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
allNames :: Char -> [Text]
allNames c = map Text.pack $
maybeToList (UnicodeNames.name c)
++ map (++ "*") (UnicodeNames.nameAliases c)
blockName :: Char -> Maybe Text
blockName c = Text.pack . UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
searchCaseInsensitive :: Text -> [Char]
searchCaseInsensitive search = filter go [minBound..maxBound]
where
go c = any matches $ Vector.unsafeIndex lowerNames (ord c - ord minBound)
matches t = Text.toLower search `Text.isInfixOf` t
lowerNames :: Vector [Text]
lowerNames = Vector.fromList $ map go [minBound..maxBound]
where
go = map Text.toLower . allNames

5
stack.yaml Normal file
View File

@ -0,0 +1,5 @@
# You can get a working environment using nix develop.
# Keep this in sync with nix/settings.nix
resolver: ghc-9.4.4
packages:
- .

2
test/Spec.hs Normal file
View File

@ -0,0 +1,2 @@
main :: IO ()
main = putStrLn "Test suite not yet implemented"

View File

@ -1,7 +1,11 @@
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.35.2.
--
-- see: https://github.com/sol/hpack
name: utoy
version: 0.6.2
version: 0.6
author: Paul Brinkmeier
maintainer: hallo@pbrinkmeier.de
copyright: 2023 Paul Brinkmeier
@ -20,7 +24,6 @@ library
exposed-modules:
UToy.Decode
UToy.Parsers
UToy.Names
UToy.Table
other-modules:
Paths_utoy
@ -31,9 +34,6 @@ library
attoparsec
, base >=4.7 && <5
, text
, unicode-data
, unicode-data-names
, vector
default-language: Haskell2010
executable utoy
@ -50,20 +50,24 @@ executable utoy
, http-media
, servant-server
, text
, unicode-data
, unicode-data-names
, utoy
, wai
, warp
default-language: Haskell2010
benchmark utoy-bench
test-suite utoy-test
type: exitcode-stdio-1.0
main-is: Main.hs
main-is: Spec.hs
other-modules:
Paths_utoy
hs-source-dirs:
bench
test
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, criterion
attoparsec
, base >=4.7 && <5
, text
, unicode-data-names
, vector
, utoy
default-language: Haskell2010