Implement slightly faster search
This commit is contained in:
parent
8b31df9193
commit
34c28813fe
50
app/Main.hs
50
app/Main.hs
@ -3,6 +3,7 @@
|
||||
{-# LANGUAGE DuplicateRecordFields #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE NumericUnderscores #-}
|
||||
{-# LANGUAGE OverloadedRecordDot #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
@ -15,8 +16,7 @@ module Main (main) where
|
||||
import Data.Char (chr)
|
||||
import Data.FileEmbed (embedFile)
|
||||
import Data.Foldable (for_)
|
||||
import Data.List (intercalate)
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Text (Text)
|
||||
import Data.Word (Word8)
|
||||
import Network.HTTP.Media ((//), (/:))
|
||||
@ -48,11 +48,10 @@ import qualified Data.Text.Encoding as Encoding
|
||||
import qualified Network.Wai.Handler.Warp as Warp
|
||||
import qualified Text.Blaze.Html5 as H
|
||||
import qualified Text.Blaze.Html5.Attributes as A
|
||||
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
|
||||
import qualified Unicode.Char.General.Names as UnicodeNames
|
||||
|
||||
import qualified UToy.Decode as Decode
|
||||
import qualified UToy.Table as Table
|
||||
import qualified UToy.Names as Names
|
||||
import qualified UToy.Parsers as Parsers
|
||||
|
||||
main :: IO ()
|
||||
@ -100,6 +99,7 @@ examples =
|
||||
[ "/text/✅🤔"
|
||||
, "/codepoints/x2705+x1F914"
|
||||
, "/utf8/e2.9c.85.f0.9f.a4.94"
|
||||
, "/search/asterisk"
|
||||
]
|
||||
|
||||
instance MimeRender PlainText RootModel where
|
||||
@ -143,8 +143,8 @@ instance MimeRender PlainText Utf8Model where
|
||||
Right c ->
|
||||
[ Text.pack [c]
|
||||
, Text.pack $ printf "U+%04X" c
|
||||
, Text.pack $ intercalate ", " $ allNames c
|
||||
, Text.pack $ fromMaybe "" $ blockName c
|
||||
, Text.intercalate ", " $ Names.allNames c
|
||||
, fromMaybe "" $ Names.blockName c
|
||||
]
|
||||
)
|
||||
]
|
||||
@ -162,8 +162,8 @@ instance MimeRender HTML Utf8Model where
|
||||
Right c -> do
|
||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||
H.td $ H.code $ printfHtml "U+%04X" c
|
||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
||||
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||
|
||||
-- /codepoints/<codepoints>
|
||||
|
||||
@ -177,14 +177,14 @@ mkCodepointsModel =
|
||||
-- Limit number of returned codepoints. Otherwise it's
|
||||
-- too easy to provoke massive response bodies with requests like
|
||||
-- /codepoints/0-99999999
|
||||
. take 100000
|
||||
. take 100_000
|
||||
. map go
|
||||
. concatMap (uncurry enumFromTo)
|
||||
where
|
||||
go codepoint = (codepoint, toChar codepoint)
|
||||
|
||||
toChar codepoint
|
||||
| codepoint > 0x10FFFF = Left "Would be too big (maximum: U+10FFFF)"
|
||||
| codepoint > 0x10_FFFF = Left "Would be too big (maximum: U+10FFFF)"
|
||||
| isSurrogate codepoint = Left "Is a surrogate"
|
||||
| otherwise = Right $ chr $ fromIntegral codepoint
|
||||
|
||||
@ -200,8 +200,8 @@ instance MimeRender PlainText CodepointsModel where
|
||||
Right c ->
|
||||
[ Text.pack [c]
|
||||
, Text.pack $ printf "U+%04X" c
|
||||
, Text.pack $ intercalate ", " $ allNames c
|
||||
, Text.pack $ fromMaybe "" $ blockName c
|
||||
, Text.intercalate ", " $ Names.allNames c
|
||||
, fromMaybe "" $ Names.blockName c
|
||||
]
|
||||
)
|
||||
| (codepoint, eiC) <- model.codepoints
|
||||
@ -218,8 +218,8 @@ instance MimeRender HTML CodepointsModel where
|
||||
Right c -> do
|
||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||
H.td $ H.code $ printfHtml "U+%04X" c
|
||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
||||
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||
|
||||
-- /text/<text>
|
||||
|
||||
@ -239,10 +239,7 @@ newtype SearchModel = SearchModel
|
||||
}
|
||||
|
||||
mkSearchModel :: Text -> SearchModel
|
||||
mkSearchModel search = SearchModel $ searchAllChars search
|
||||
|
||||
searchAllChars :: Text -> [Char]
|
||||
searchAllChars search = [c | c <- [minBound..maxBound], any (\name -> Text.toLower search `Text.isInfixOf` Text.toLower (Text.pack name)) (allNames c)]
|
||||
mkSearchModel search = SearchModel $ take 100_000 $ Names.searchCaseInsensitive search
|
||||
|
||||
instance MimeRender HTML SearchModel where
|
||||
mimeRender _ model = charTableHtml model.results
|
||||
@ -259,8 +256,8 @@ charTableHtml chars =
|
||||
H.tr $ do
|
||||
H.td $ H.input ! A.class_ "charbox" ! A.value (H.toValue [c])
|
||||
H.td $ H.code $ printfHtml "U+%04X" c
|
||||
H.td $ H.code $ H.toHtml $ intercalate ", " $ allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ blockName c
|
||||
H.td $ H.code $ H.toHtml $ Text.intercalate ", " $ Names.allNames c
|
||||
H.td $ H.code $ H.toHtml $ fromMaybe "" $ Names.blockName c
|
||||
|
||||
|
||||
charTableText :: [Char] -> BL.ByteString
|
||||
@ -269,8 +266,8 @@ charTableText chars =
|
||||
[ map Table.cl
|
||||
[ Text.pack [c]
|
||||
, Text.pack $ printf "U+%04X" c
|
||||
, Text.pack $ intercalate ", " $ allNames c
|
||||
, Text.pack $ fromMaybe "" $ blockName c
|
||||
, Text.intercalate ", " $ Names.allNames c
|
||||
, fromMaybe "" $ Names.blockName c
|
||||
]
|
||||
| c <- chars
|
||||
]
|
||||
@ -284,15 +281,6 @@ showByteHex = printf " %02X"
|
||||
showByteBin :: Word8 -> String
|
||||
showByteBin = printf "%08b"
|
||||
|
||||
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
|
||||
allNames :: Char -> [String]
|
||||
allNames c =
|
||||
maybeToList (UnicodeNames.name c)
|
||||
++ map (++ "*") (UnicodeNames.nameAliases c)
|
||||
|
||||
blockName :: Char -> Maybe String
|
||||
blockName c = UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
|
||||
|
||||
orThrow :: Either a b -> (a -> ServerError) -> Handler b
|
||||
orThrow (Left err) f = throwError $ f err
|
||||
orThrow (Right val) _ = pure val
|
||||
|
31
src/UToy/Names.hs
Normal file
31
src/UToy/Names.hs
Normal file
@ -0,0 +1,31 @@
|
||||
module UToy.Names (allNames, blockName, searchCaseInsensitive) where
|
||||
|
||||
import Data.Char (ord)
|
||||
import Data.Maybe (maybeToList)
|
||||
import Data.Text (Text)
|
||||
import Data.Vector (Vector)
|
||||
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Vector as Vector
|
||||
import qualified Unicode.Char.General.Blocks as UnicodeBlocks
|
||||
import qualified Unicode.Char.General.Names as UnicodeNames
|
||||
|
||||
-- | Retrieve name and aliases (suffixed with @*@) of a 'Char'.
|
||||
allNames :: Char -> [Text]
|
||||
allNames c = map Text.pack $
|
||||
maybeToList (UnicodeNames.name c)
|
||||
++ map (++ "*") (UnicodeNames.nameAliases c)
|
||||
|
||||
blockName :: Char -> Maybe Text
|
||||
blockName c = Text.pack . UnicodeBlocks.blockName . UnicodeBlocks.blockDefinition <$> UnicodeBlocks.block c
|
||||
|
||||
searchCaseInsensitive :: Text -> [Char]
|
||||
searchCaseInsensitive search = filter go [minBound..maxBound]
|
||||
where
|
||||
go c = any matches $ Vector.unsafeIndex lowerNames (ord c - ord minBound)
|
||||
matches t = Text.toLower search `Text.isInfixOf` t
|
||||
|
||||
lowerNames :: Vector [Text]
|
||||
lowerNames = Vector.fromList $ map go [minBound..maxBound]
|
||||
where
|
||||
go = map Text.toLower . allNames
|
@ -20,6 +20,7 @@ library
|
||||
exposed-modules:
|
||||
UToy.Decode
|
||||
UToy.Parsers
|
||||
UToy.Names
|
||||
UToy.Table
|
||||
other-modules:
|
||||
Paths_utoy
|
||||
@ -30,6 +31,9 @@ library
|
||||
attoparsec
|
||||
, base >=4.7 && <5
|
||||
, text
|
||||
, unicode-data
|
||||
, unicode-data-names
|
||||
, vector
|
||||
default-language: Haskell2010
|
||||
|
||||
executable utoy
|
||||
@ -46,8 +50,6 @@ executable utoy
|
||||
, http-media
|
||||
, servant-server
|
||||
, text
|
||||
, unicode-data
|
||||
, unicode-data-names
|
||||
, utoy
|
||||
, wai
|
||||
, warp
|
||||
|
Loading…
x
Reference in New Issue
Block a user