Compare commits
3 Commits
a37643b5cd
...
4ffefe1f9e
Author | SHA1 | Date | |
---|---|---|---|
4ffefe1f9e | |||
34c28813fe | |||
8b31df9193 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,4 +1,6 @@
|
|||||||
.stack-work/
|
.stack-work/
|
||||||
|
dist-newstyle
|
||||||
.vscode/
|
.vscode/
|
||||||
*.swp
|
*.swp
|
||||||
result
|
result
|
||||||
|
bench.html
|
||||||
|
16
README.md
16
README.md
@ -6,12 +6,6 @@
|
|||||||
$ nix build
|
$ nix build
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running
|
|
||||||
|
|
||||||
```
|
|
||||||
$ nix run
|
|
||||||
```
|
|
||||||
|
|
||||||
## Building the Docker image
|
## Building the Docker image
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -26,13 +20,7 @@ Includes Stack, GHC, `haskell-language-server`, `gen-hie` etc.
|
|||||||
$ nix develop
|
$ nix develop
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running Stack and GHC
|
|
||||||
|
|
||||||
```
|
|
||||||
$ nix run .#stack
|
|
||||||
$ nix run .#ghc
|
|
||||||
```
|
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
- [ ] Benchmark, profile and optimize search
|
- [x] Benchmark, profile and optimize search
|
||||||
|
- [ ] Trim down the docker image
|
||||||
|
50
app/Main.hs
50
app/Main.hs
@ -3,6 +3,7 @@
|
|||||||
{-# LANGUAGE DuplicateRecordFields #-}
|
{-# LANGUAGE DuplicateRecordFields #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
|
{-# LANGUAGE NumericUnderscores #-}
|
||||||
{-# LANGUAGE OverloadedRecordDot #-}
|
{-# LANGUAGE OverloadedRecordDot #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
@ -15,8 +16,7 @@ module Main (main) where
|
|||||||
import Data.Char (chr)
|
import Data.Char (chr)
|
||||||
import Data.FileEmbed (embedFile)
|
import Data.FileEmbed (embedFile)
|
||||||
import Data.Foldable (for_)
|
import Data.Foldable (for_)
|
||||||
import Data.List (intercalate)
|
import Data.Maybe (fromMaybe)
|
||||||
import Data.Maybe (fromMaybe, maybeToList)
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Word (Word8)
|
import Data.Word (Word8)
|
||||||
import Network.HTTP.Media ((//), (/:))
|
import Network.HTTP.Media ((//), (/:))
|
||||||
@ -48,11 +48,10 @@ import qualified Data.Text.Encoding as Encoding
|
|||||||
import qualified Network.Wai.Handler.Warp as Warp
|
import qualified Network.Wai.Handler.Warp as Warp
|
||||||
import qualified Text.Blaze.Html5 as H
|
import qualified Text.Blaze.Html5 as H
|
||||||
import qualified Text.Blaze.Html5.Attributes as A
|
import qualified Text.Blaze.Html5.Attributes as A
|
||||||
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
|
|
||||||
import qualified Unicode.Char.General.Names as UnicodeNames
|
|
||||||
|
|
||||||
import qualified UToy.Decode as Decode
|
import qualified UToy.Decode as Decode
|
||||||
import qualified UToy.Table as Table
|
import qualified UToy.Table as Table
|
||||||
|
import qualified UToy.Names as Names
|
||||||
import qualified UToy.Parsers as Parsers
|
import qualified UToy.Parsers as Parsers
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
@ -100,6 +99,7 @@ examples =
|
|||||||
[ "/text/✅🤔"
|
[ "/text/✅🤔"
|
||||||
, "/codepoints/x2705+x1F914"
|
, "/codepoints/x2705+x1F914"
|
||||||
, "/utf8/e2.9c.85.f0.9f.a4.94"
|
, "/utf8/e2.9c.85.f0.9f.a4.94"
|
||||||
|
, "/search/asterisk"
|
||||||
]
|
]
|
||||||
|
|
||||||
instance MimeRender PlainText RootModel where
|
instance MimeRender PlainText RootModel where
|
||||||
@ -143,8 +143,8 @@ instance MimeRender PlainText Utf8Model where
|
|||||||
Right c ->
|
Right c ->
|
||||||
[ Text.pack [c]
|
[ Text.pack [c]
|
||||||
, Text.pack $ printf "U+%04X" c
|
, Text.pack $ printf "U+%04X" c
|
||||||
, Text.pack $ intercalate ", " $ allNames c
|
, Text.intercalate ", " $ Names.allNames c
|
||||||
, Text.pack $ fromMaybe "" $ blockName c
|
, fromMaybe "" $ Names.blockName c
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
@ -162,8 +162,8 @@ instance MimeRender HTML Utf8Model where
|
|||||||
Right c -> do
|
Right c -> do
|
||||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||||
H.td $ H.code $ printfHtml "U+%04X" c
|
H.td $ H.code $ printfHtml "U+%04X" c
|
||||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||||
|
|
||||||
-- /codepoints/<codepoints>
|
-- /codepoints/<codepoints>
|
||||||
|
|
||||||
@ -177,14 +177,14 @@ mkCodepointsModel =
|
|||||||
-- Limit number of returned codepoints. Otherwise it's
|
-- Limit number of returned codepoints. Otherwise it's
|
||||||
-- too easy to provoke massive response bodies with requests like
|
-- too easy to provoke massive response bodies with requests like
|
||||||
-- /codepoints/0-99999999
|
-- /codepoints/0-99999999
|
||||||
. take 100000
|
. take 100_000
|
||||||
. map go
|
. map go
|
||||||
. concatMap (uncurry enumFromTo)
|
. concatMap (uncurry enumFromTo)
|
||||||
where
|
where
|
||||||
go codepoint = (codepoint, toChar codepoint)
|
go codepoint = (codepoint, toChar codepoint)
|
||||||
|
|
||||||
toChar codepoint
|
toChar codepoint
|
||||||
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
|
| codepoint > 0x10_FFFF = Left "Would be too big (maximum: U+10FFFF)"
|
||||||
| isSurrogate codepoint = Left "Is a surrogate"
|
| isSurrogate codepoint = Left "Is a surrogate"
|
||||||
| otherwise = Right $ chr $ fromIntegral codepoint
|
| otherwise = Right $ chr $ fromIntegral codepoint
|
||||||
|
|
||||||
@ -200,8 +200,8 @@ instance MimeRender PlainText CodepointsModel where
|
|||||||
Right c ->
|
Right c ->
|
||||||
[ Text.pack [c]
|
[ Text.pack [c]
|
||||||
, Text.pack $ printf "U+%04X" c
|
, Text.pack $ printf "U+%04X" c
|
||||||
, Text.pack $ intercalate ", " $ allNames c
|
, Text.intercalate ", " $ Names.allNames c
|
||||||
, Text.pack $ fromMaybe "" $ blockName c
|
, fromMaybe "" $ Names.blockName c
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
| (codepoint, eiC) <- model.codepoints
|
| (codepoint, eiC) <- model.codepoints
|
||||||
@ -218,8 +218,8 @@ instance MimeRender HTML CodepointsModel where
|
|||||||
Right c -> do
|
Right c -> do
|
||||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||||
H.td $ H.code $ printfHtml "U+%04X" c
|
H.td $ H.code $ printfHtml "U+%04X" c
|
||||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||||
|
|
||||||
-- /text/<text>
|
-- /text/<text>
|
||||||
|
|
||||||
@ -239,10 +239,7 @@ newtype SearchModel = SearchModel
|
|||||||
}
|
}
|
||||||
|
|
||||||
mkSearchModel :: Text -> SearchModel
|
mkSearchModel :: Text -> SearchModel
|
||||||
mkSearchModel search = SearchModel $ searchAllChars search
|
mkSearchModel search = SearchModel $ take 100_000 $ Names.searchCaseInsensitive search
|
||||||
|
|
||||||
searchAllChars :: Text -> [Char]
|
|
||||||
searchAllChars search = [c | c <- [minBound..maxBound], any (\name -> Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack name)) (allNames c)]
|
|
||||||
|
|
||||||
instance MimeRender HTML SearchModel where
|
instance MimeRender HTML SearchModel where
|
||||||
mimeRender _ model = charTableHtml model.results
|
mimeRender _ model = charTableHtml model.results
|
||||||
@ -259,8 +256,8 @@ charTableHtml chars =
|
|||||||
H.tr $ do
|
H.tr $ do
|
||||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||||
H.td $ H.code $ printfHtml "U+%04X" c
|
H.td $ H.code $ printfHtml "U+%04X" c
|
||||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||||
|
|
||||||
|
|
||||||
charTableText :: [Char] -> BL.ByteString
|
charTableText :: [Char] -> BL.ByteString
|
||||||
@ -269,8 +266,8 @@ charTableText chars =
|
|||||||
[ map Table.cl
|
[ map Table.cl
|
||||||
[ Text.pack [c]
|
[ Text.pack [c]
|
||||||
, Text.pack $ printf "U+%04X" c
|
, Text.pack $ printf "U+%04X" c
|
||||||
, Text.pack $ intercalate ", " $ allNames c
|
, Text.intercalate ", " $ Names.allNames c
|
||||||
, Text.pack $ fromMaybe "" $ blockName c
|
, fromMaybe "" $ Names.blockName c
|
||||||
]
|
]
|
||||||
| c <- chars
|
| c <- chars
|
||||||
]
|
]
|
||||||
@ -284,15 +281,6 @@ showByteHex = printf " %02X"
|
|||||||
showByteBin :: Word8 -> String
|
showByteBin :: Word8 -> String
|
||||||
showByteBin = printf "%08b"
|
showByteBin = printf "%08b"
|
||||||
|
|
||||||
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
|
|
||||||
allNames :: Char -> [String]
|
|
||||||
allNames c =
|
|
||||||
maybeToList (UnicodeNames.name c)
|
|
||||||
++ map (++ "*") (UnicodeNames.nameAliases c)
|
|
||||||
|
|
||||||
blockName :: Char -> Maybe String
|
|
||||||
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
|
|
||||||
|
|
||||||
orThrow :: Either a b -> (a -> ServerError) -> Handler b
|
orThrow :: Either a b -> (a -> ServerError) -> Handler b
|
||||||
orThrow (Left err) f = throwError $ f err
|
orThrow (Left err) f = throwError $ f err
|
||||||
orThrow (Right val) _ = pure val
|
orThrow (Right val) _ = pure val
|
||||||
|
69
bench/Main.hs
Normal file
69
bench/Main.hs
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Main where
|
||||||
|
|
||||||
|
import Criterion.Main (bench, bgroup, defaultMain, whnf)
|
||||||
|
import Data.Char (ord)
|
||||||
|
import Data.Maybe (maybeToList)
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
|
import qualified Unicode.Char.General.Names as UnicodeNames
|
||||||
|
|
||||||
|
main = defaultMain
|
||||||
|
[ bench "naiveSearchCI" $ whnf naiveSearchCI "latin"
|
||||||
|
, bench "naiveSearchCS" $ whnf naiveSearchCS "LATIN"
|
||||||
|
, bench "memoSearchCI" $ whnf memoSearchCI "latin"
|
||||||
|
, bench "memoSearchCS" $ whnf memoSearchCS "LATIN"
|
||||||
|
]
|
||||||
|
|
||||||
|
naiveSearchCI :: Text -> [Char]
|
||||||
|
naiveSearchCI search =
|
||||||
|
filter go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = any matches $ allNames c
|
||||||
|
matches t = Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack t)
|
||||||
|
|
||||||
|
naiveSearchCS :: Text -> [Char]
|
||||||
|
naiveSearchCS search =
|
||||||
|
filter go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = any matches $ allNames c
|
||||||
|
matches t = search `Text.isInfixOf` Text.pack t
|
||||||
|
|
||||||
|
memoSearchCS :: Text -> [Char]
|
||||||
|
memoSearchCS search =
|
||||||
|
filter go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = any matches $ allNamesText c
|
||||||
|
matches t = search `Text.isInfixOf` t
|
||||||
|
|
||||||
|
memoSearchCI :: Text -> [Char]
|
||||||
|
memoSearchCI search =
|
||||||
|
filter go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = any matches $ allNamesTextLower c
|
||||||
|
matches t = Text.toLower search `Text.isInfixOf` t
|
||||||
|
|
||||||
|
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
|
||||||
|
allNames :: Char -> [String]
|
||||||
|
allNames c =
|
||||||
|
maybeToList (UnicodeNames.name c)
|
||||||
|
++ map (++ "*") (UnicodeNames.nameAliases c)
|
||||||
|
|
||||||
|
allNamesText :: Char -> [Text]
|
||||||
|
allNamesText c = Vector.unsafeIndex textNames $ ord c
|
||||||
|
|
||||||
|
textNames :: Vector.Vector [Text]
|
||||||
|
textNames = Vector.fromList $ map go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = map Text.pack $ allNames c
|
||||||
|
|
||||||
|
allNamesTextLower :: Char -> [Text]
|
||||||
|
allNamesTextLower c = Vector.unsafeIndex textNamesLower $ ord c
|
||||||
|
|
||||||
|
textNamesLower :: Vector.Vector [Text]
|
||||||
|
textNamesLower = Vector.fromList $ map go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = map (Text.toLower . Text.pack) $ allNames c
|
@ -31,6 +31,7 @@
|
|||||||
pkgs.haskellPackages.implicit-hie
|
pkgs.haskellPackages.implicit-hie
|
||||||
pkgs.haskell-language-server
|
pkgs.haskell-language-server
|
||||||
];
|
];
|
||||||
|
cabal2nixOptions = "--benchmark";
|
||||||
}).env;
|
}).env;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
12
hie.yaml
12
hie.yaml
@ -1,10 +1,10 @@
|
|||||||
cradle:
|
cradle:
|
||||||
stack:
|
cabal:
|
||||||
- path: "./src"
|
- path: "src"
|
||||||
component: "utoy:lib"
|
component: "lib:utoy"
|
||||||
|
|
||||||
- path: "./app/Main.hs"
|
- path: "app/Main.hs"
|
||||||
component: "utoy:exe:utoy"
|
component: "utoy:exe:utoy"
|
||||||
|
|
||||||
- path: "./test"
|
- path: "bench/Main.hs"
|
||||||
component: "utoy:test:utoy-test"
|
component: "utoy:bench:utoy-bench"
|
||||||
|
31
src/UToy/Names.hs
Normal file
31
src/UToy/Names.hs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
module UToy.Names (allNames, blockName, searchCaseInsensitive) where
|
||||||
|
|
||||||
|
import Data.Char (ord)
|
||||||
|
import Data.Maybe (maybeToList)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Vector (Vector)
|
||||||
|
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
|
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
|
||||||
|
import qualified Unicode.Char.General.Names as UnicodeNames
|
||||||
|
|
||||||
|
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
|
||||||
|
allNames :: Char -> [Text]
|
||||||
|
allNames c = map Text.pack $
|
||||||
|
maybeToList (UnicodeNames.name c)
|
||||||
|
++ map (++ "*") (UnicodeNames.nameAliases c)
|
||||||
|
|
||||||
|
blockName :: Char -> Maybe Text
|
||||||
|
blockName c = Text.pack . UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
|
||||||
|
|
||||||
|
searchCaseInsensitive :: Text -> [Char]
|
||||||
|
searchCaseInsensitive search = filter go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go c = any matches $ Vector.unsafeIndex lowerNames (ord c - ord minBound)
|
||||||
|
matches t = Text.toLower search `Text.isInfixOf` t
|
||||||
|
|
||||||
|
lowerNames :: Vector [Text]
|
||||||
|
lowerNames = Vector.fromList $ map go [minBound..maxBound]
|
||||||
|
where
|
||||||
|
go = map Text.toLower . allNames
|
@ -1,2 +0,0 @@
|
|||||||
main :: IO ()
|
|
||||||
main = putStrLn "Test suite not yet implemented"
|
|
28
utoy.cabal
28
utoy.cabal
@ -1,11 +1,7 @@
|
|||||||
cabal-version: 1.12
|
cabal-version: 1.12
|
||||||
|
|
||||||
-- This file has been generated from package.yaml by hpack version 0.35.2.
|
|
||||||
--
|
|
||||||
-- see: https://github.com/sol/hpack
|
|
||||||
|
|
||||||
name: utoy
|
name: utoy
|
||||||
version: 0.6.1
|
version: 0.6.2
|
||||||
author: Paul Brinkmeier
|
author: Paul Brinkmeier
|
||||||
maintainer: hallo@pbrinkmeier.de
|
maintainer: hallo@pbrinkmeier.de
|
||||||
copyright: 2023 Paul Brinkmeier
|
copyright: 2023 Paul Brinkmeier
|
||||||
@ -24,6 +20,7 @@ library
|
|||||||
exposed-modules:
|
exposed-modules:
|
||||||
UToy.Decode
|
UToy.Decode
|
||||||
UToy.Parsers
|
UToy.Parsers
|
||||||
|
UToy.Names
|
||||||
UToy.Table
|
UToy.Table
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_utoy
|
Paths_utoy
|
||||||
@ -34,6 +31,9 @@ library
|
|||||||
attoparsec
|
attoparsec
|
||||||
, base >=4.7 && <5
|
, base >=4.7 && <5
|
||||||
, text
|
, text
|
||||||
|
, unicode-data
|
||||||
|
, unicode-data-names
|
||||||
|
, vector
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable utoy
|
executable utoy
|
||||||
@ -50,24 +50,20 @@ executable utoy
|
|||||||
, http-media
|
, http-media
|
||||||
, servant-server
|
, servant-server
|
||||||
, text
|
, text
|
||||||
, unicode-data
|
|
||||||
, unicode-data-names
|
|
||||||
, utoy
|
, utoy
|
||||||
, wai
|
, wai
|
||||||
, warp
|
, warp
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
test-suite utoy-test
|
benchmark utoy-bench
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
main-is: Spec.hs
|
main-is: Main.hs
|
||||||
other-modules:
|
|
||||||
Paths_utoy
|
|
||||||
hs-source-dirs:
|
hs-source-dirs:
|
||||||
test
|
bench
|
||||||
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
|
|
||||||
build-depends:
|
build-depends:
|
||||||
attoparsec
|
base >=4.7 && <5
|
||||||
, base >=4.7 && <5
|
, criterion
|
||||||
, text
|
, text
|
||||||
, utoy
|
, unicode-data-names
|
||||||
|
, vector
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
Loading…
x
Reference in New Issue
Block a user