From c7e378122d41ace44704cb07362f7399e5f3c495 Mon Sep 17 00:00:00 2001 From: Paul Brinkmeier Date: Wed, 1 Mar 2023 03:41:39 +0100 Subject: [PATCH] Add ranges to /codepoints --- app/Main.hs | 4 ++-- src/UToy/Parsers.hs | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 580ede2..c68a92b 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -127,8 +127,8 @@ newtype CodepointsModel = CodepointsModel { codepoints :: [(Word, Either String Char)] } -mkCodepointsModel :: [Word] -> CodepointsModel -mkCodepointsModel = CodepointsModel . map go +mkCodepointsModel :: [(Word, Word)] -> CodepointsModel +mkCodepointsModel = CodepointsModel . map go . concatMap (uncurry enumFromTo) where go codepoint = (codepoint, toChar codepoint) diff --git a/src/UToy/Parsers.hs b/src/UToy/Parsers.hs index b956fb6..74c6f13 100644 --- a/src/UToy/Parsers.hs +++ b/src/UToy/Parsers.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE OverloadedStrings #-} + module UToy.Parsers ( parseHexBytes , parseCodepoints @@ -29,20 +31,31 @@ hexBytes = hexByte `Atto.sepBy` separators | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 | otherwise = error $ printf "not a hex digit: %c" c -parseCodepoints :: Text -> Either String [Word] +parseCodepoints :: Text -> Either String [(Word, Word)] parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput -codepoints :: Atto.Parser [Word] -codepoints = codepoint `Atto.sepBy` separators - where - codepoint = Atto.choice [literal, decLiteral, hexLiteral] +codepoints :: Atto.Parser [(Word, Word)] +codepoints = codepointRange `Atto.sepBy` separators +codepointRange :: Atto.Parser (Word, Word) +codepointRange = do + codepoint1 <- codepoint + codepoint2 <- Atto.choice [Atto.skip (== '-') *> codepoint, pure codepoint1] + + pure (codepoint1, codepoint2) + + + +codepoint :: Atto.Parser Word +codepoint = Atto.choice [literal, decLiteral, hexLiteral, uCodepoint] + where literal = Atto.decimal decLiteral = Atto.char 'd' *> Atto.decimal hexLiteral = Atto.char 'x' *> Atto.hexadecimal + uCodepoint = Atto.string "U+" *> Atto.hexadecimal -- Common