Implement slightly faster search

This commit is contained in:
Paul Brinkmeier 2025-02-22 18:58:12 +01:00
parent 8b31df9193
commit 34c28813fe
3 changed files with 54 additions and 33 deletions

View File

@ -3,6 +3,7 @@
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
@ -15,8 +16,7 @@ module Main (main) where
import Data.Char (chr)
import Data.FileEmbed (embedFile)
import Data.Foldable (for_)
import Data.List (intercalate)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Data.Word (Word8)
import Network.HTTP.Media ((//), (/:))
@ -48,11 +48,10 @@ import qualified Data.Text.Encoding as Encoding
import qualified Network.Wai.Handler.Warp as Warp
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
import qualified Unicode.Char.General.Names as UnicodeNames
import qualified UToy.Decode as Decode
import qualified UToy.Table as Table
import qualified UToy.Names as Names
import qualified UToy.Parsers as Parsers
main :: IO ()
@ -100,6 +99,7 @@ examples =
[ "/text/✅🤔"
, "/codepoints/x2705+x1F914"
, "/utf8/e2.9c.85.f0.9f.a4.94"
, "/search/asterisk"
]
instance MimeRender PlainText RootModel where
@ -143,8 +143,8 @@ instance MimeRender PlainText Utf8Model where
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
]
)
]
@ -162,8 +162,8 @@ instance MimeRender HTML Utf8Model where
Right c -> do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
-- /codepoints/<codepoints>
@ -177,14 +177,14 @@ mkCodepointsModel =
-- Limit number of returned codepoints. Otherwise it's
-- too easy to provoke massive response bodies with requests like
-- /codepoints/0-99999999
. take 100000
. take 100_000
. map go
. concatMap (uncurry enumFromTo)
where
go codepoint = (codepoint, toChar codepoint)
toChar codepoint
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
| codepoint > 0x10_FFFF = Left "Would be too big (maximum: U+10FFFF)"
| isSurrogate codepoint = Left "Is a surrogate"
| otherwise = Right $ chr $ fromIntegral codepoint
@ -200,8 +200,8 @@ instance MimeRender PlainText CodepointsModel where
Right c ->
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
]
)
| (codepoint, eiC) <- model.codepoints
@ -218,8 +218,8 @@ instance MimeRender HTML CodepointsModel where
Right c -> do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
-- /text/<text>
@ -239,10 +239,7 @@ newtype SearchModel = SearchModel
}
mkSearchModel :: Text -> SearchModel
mkSearchModel search = SearchModel $ searchAllChars search
searchAllChars :: Text -> [Char]
searchAllChars search = [c | c <- [minBound..maxBound], any (\name -> Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack name)) (allNames c)]
mkSearchModel search = SearchModel $ take 100_000 $ Names.searchCaseInsensitive search
instance MimeRender HTML SearchModel where
mimeRender _ model = charTableHtml model.results
@ -259,8 +256,8 @@ charTableHtml chars =
H.tr $ do
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
H.td $ H.code $ printfHtml "U+%04X" c
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
charTableText :: [Char] -> BL.ByteString
@ -269,8 +266,8 @@ charTableText chars =
[ map Table.cl
[ Text.pack [c]
, Text.pack $ printf "U+%04X" c
, Text.pack $ intercalate ", " $ allNames c
, Text.pack $ fromMaybe "" $ blockName c
, Text.intercalate ", " $ Names.allNames c
, fromMaybe "" $ Names.blockName c
]
| c <- chars
]
@ -284,15 +281,6 @@ showByteHex = printf " %02X"
showByteBin :: Word8 -> String
showByteBin = printf "%08b"
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
allNames :: Char -> [String]
allNames c =
maybeToList (UnicodeNames.name c)
++ map (++ "*") (UnicodeNames.nameAliases c)
blockName :: Char -> Maybe String
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
orThrow :: Either a b -> (a -> ServerError) -> Handler b
orThrow (Left err) f = throwError $ f err
orThrow (Right val) _ = pure val

31
src/UToy/Names.hs Normal file
View File

@ -0,0 +1,31 @@
module UToy.Names (allNames, blockName, searchCaseInsensitive) where
import Data.Char (ord)
import Data.Maybe (maybeToList)
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
import qualified Unicode.Char.General.Names as UnicodeNames
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
allNames :: Char -> [Text]
allNames c = map Text.pack $
maybeToList (UnicodeNames.name c)
++ map (++ "*") (UnicodeNames.nameAliases c)
blockName :: Char -> Maybe Text
blockName c = Text.pack . UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
searchCaseInsensitive :: Text -> [Char]
searchCaseInsensitive search = filter go [minBound..maxBound]
where
go c = any matches $ Vector.unsafeIndex lowerNames (ord c - ord minBound)
matches t = Text.toLower search `Text.isInfixOf` t
lowerNames :: Vector [Text]
lowerNames = Vector.fromList $ map go [minBound..maxBound]
where
go = map Text.toLower . allNames

View File

@ -20,6 +20,7 @@ library
exposed-modules:
UToy.Decode
UToy.Parsers
UToy.Names
UToy.Table
other-modules:
Paths_utoy
@ -30,6 +31,9 @@ library
attoparsec
, base >=4.7 && <5
, text
, unicode-data
, unicode-data-names
, vector
default-language: Haskell2010
executable utoy
@ -46,8 +50,6 @@ executable utoy
, http-media
, servant-server
, text
, unicode-data
, unicode-data-names
, utoy
, wai
, warp