Add ranges to /codepoints

This commit is contained in:
Paul Brinkmeier 2023-03-01 03:41:39 +01:00
parent 944edaf445
commit c7e378122d
2 changed files with 20 additions and 7 deletions

View File

@ -127,8 +127,8 @@ newtype CodepointsModel = CodepointsModel
{ codepoints :: [(Word, Either String Char)] { codepoints :: [(Word, Either String Char)]
} }
mkCodepointsModel :: [Word] -> CodepointsModel mkCodepointsModel :: [(Word, Word)] -> CodepointsModel
mkCodepointsModel = CodepointsModel . map go mkCodepointsModel = CodepointsModel . map go . concatMap (uncurry enumFromTo)
where where
go codepoint = (codepoint, toChar codepoint) go codepoint = (codepoint, toChar codepoint)

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module UToy.Parsers module UToy.Parsers
( parseHexBytes ( parseHexBytes
, parseCodepoints , parseCodepoints
@ -29,20 +31,31 @@ hexBytes = hexByte `Atto.sepBy` separators
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
| otherwise = error $ printf "not a hex digit: %c" c | otherwise = error $ printf "not a hex digit: %c" c
parseCodepoints :: Text -> Either String [Word] parseCodepoints :: Text -> Either String [(Word, Word)]
parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
codepoints :: Atto.Parser [Word] codepoints :: Atto.Parser [(Word, Word)]
codepoints = codepoint `Atto.sepBy` separators codepoints = codepointRange `Atto.sepBy` separators
where
codepoint = Atto.choice [literal, decLiteral, hexLiteral]
codepointRange :: Atto.Parser (Word, Word)
codepointRange = do
codepoint1 <- codepoint
codepoint2 <- Atto.choice [Atto.skip (== '-') *> codepoint, pure codepoint1]
pure (codepoint1, codepoint2)
codepoint :: Atto.Parser Word
codepoint = Atto.choice [literal, decLiteral, hexLiteral, uCodepoint]
where
literal = Atto.decimal literal = Atto.decimal
decLiteral = Atto.char 'd' *> Atto.decimal decLiteral = Atto.char 'd' *> Atto.decimal
hexLiteral = Atto.char 'x' *> Atto.hexadecimal hexLiteral = Atto.char 'x' *> Atto.hexadecimal
uCodepoint = Atto.string "U+" *> Atto.hexadecimal
-- Common -- Common