diff --git a/.gitignore b/.gitignore index d219106..8ae1d06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .stack-work/ +dist-newstyle .vscode/ *.swp result +bench.html diff --git a/README.md b/README.md index ca87bfb..867421d 100644 --- a/README.md +++ b/README.md @@ -35,4 +35,5 @@ $ nix run .#ghc ## TODO -- [ ] Benchmark, profile and optimize search +- [x] Benchmark, profile and optimize search +- [ ] Trim down the docker image diff --git a/bench/Main.hs b/bench/Main.hs new file mode 100644 index 0000000..52b5326 --- /dev/null +++ b/bench/Main.hs @@ -0,0 +1,69 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import Criterion.Main (bench, bgroup, defaultMain, whnf) +import Data.Char (ord) +import Data.Maybe (maybeToList) +import Data.Text (Text) + +import qualified Data.Text as Text +import qualified Data.Vector as Vector +import qualified Unicode.Char.General.Names as UnicodeNames + +main = defaultMain + [ bench "naiveSearchCI" $ whnf naiveSearchCI "latin" + , bench "naiveSearchCS" $ whnf naiveSearchCS "LATIN" + , bench "memoSearchCI" $ whnf memoSearchCI "latin" + , bench "memoSearchCS" $ whnf memoSearchCS "LATIN" + ] + +naiveSearchCI :: Text -> [Char] +naiveSearchCI search = + filter go [minBound..maxBound] + where + go c = any matches $ allNames c + matches t = Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack t) + +naiveSearchCS :: Text -> [Char] +naiveSearchCS search = + filter go [minBound..maxBound] + where + go c = any matches $ allNames c + matches t = search `Text.isInfixOf` Text.pack t + +memoSearchCS :: Text -> [Char] +memoSearchCS search = + filter go [minBound..maxBound] + where + go c = any matches $ allNamesText c + matches t = search `Text.isInfixOf` t + +memoSearchCI :: Text -> [Char] +memoSearchCI search = + filter go [minBound..maxBound] + where + go c = any matches $ allNamesTextLower c + matches t = Text.toLower search `Text.isInfixOf` t + +-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'. +allNames :: Char -> [String] +allNames c = + maybeToList (UnicodeNames.name c) + ++ map (++ "*") (UnicodeNames.nameAliases c) + +allNamesText :: Char -> [Text] +allNamesText c = Vector.unsafeIndex textNames $ ord c + +textNames :: Vector.Vector [Text] +textNames = Vector.fromList $ map go [minBound..maxBound] + where + go c = map Text.pack $ allNames c + +allNamesTextLower :: Char -> [Text] +allNamesTextLower c = Vector.unsafeIndex textNamesLower $ ord c + +textNamesLower :: Vector.Vector [Text] +textNamesLower = Vector.fromList $ map go [minBound..maxBound] + where + go c = map (Text.toLower . Text.pack) $ allNames c diff --git a/flake.nix b/flake.nix index 7f685cd..d8b8512 100644 --- a/flake.nix +++ b/flake.nix @@ -31,6 +31,7 @@ pkgs.haskellPackages.implicit-hie pkgs.haskell-language-server ]; + cabal2nixOptions = "--benchmark"; }).env; }; } diff --git a/hie.yaml b/hie.yaml index 3c7a3d1..b9de8a6 100644 --- a/hie.yaml +++ b/hie.yaml @@ -1,10 +1,10 @@ cradle: - stack: - - path: "./src" - component: "utoy:lib" + cabal: + - path: "src" + component: "lib:utoy" - - path: "./app/Main.hs" + - path: "app/Main.hs" component: "utoy:exe:utoy" - - path: "./test" - component: "utoy:test:utoy-test" + - path: "bench/Main.hs" + component: "utoy:bench:utoy-bench" diff --git a/test/Spec.hs b/test/Spec.hs deleted file mode 100644 index cd4753f..0000000 --- a/test/Spec.hs +++ /dev/null @@ -1,2 +0,0 @@ -main :: IO () -main = putStrLn "Test suite not yet implemented" diff --git a/utoy.cabal b/utoy.cabal index acd84c3..6cf480b 100644 --- a/utoy.cabal +++ b/utoy.cabal @@ -1,9 +1,5 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.2. --- --- see: https://github.com/sol/hpack - name: utoy version: 0.6.1 author: Paul Brinkmeier @@ -57,17 +53,15 @@ executable utoy , warp default-language: Haskell2010 -test-suite utoy-test +benchmark utoy-bench type: exitcode-stdio-1.0 - main-is: Spec.hs - other-modules: - Paths_utoy + main-is: Main.hs hs-source-dirs: - test - ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N + bench build-depends: - attoparsec - , base >=4.7 && <5 + base >=4.7 && <5 + , criterion , text - , utoy + , unicode-data-names + , vector default-language: Haskell2010