utoy/src/UToy/Parsers.hs

64 lines
1.6 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module UToy.Parsers
( parseHexBytes
, parseCodepoints
) where
import Data.Char (isDigit, isHexDigit, ord)
import Data.Text (Text)
import Data.Word (Word8)
import Text.Printf (printf)
import qualified Data.Attoparsec.Text as Atto
parseHexBytes :: Text -> Either String [Word8]
parseHexBytes = Atto.parseOnly $ hexBytes <* Atto.endOfInput
hexBytes :: Atto.Parser [Word8]
hexBytes = hexByte `Atto.sepBy` separators
where
hexByte = do
high <- hexDigit
low <- hexDigit
pure $ fromIntegral $ high * 16 + low
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
hexDigitToInt c
| isDigit c = ord c - ord '0'
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
| otherwise = error $ printf "not a hex digit: %c" c
parseCodepoints :: Text -> Either String [(Word, Word)]
parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
codepoints :: Atto.Parser [(Word, Word)]
codepoints = codepointRange `Atto.sepBy` separators
codepointRange :: Atto.Parser (Word, Word)
codepointRange = do
codepoint1 <- codepoint
codepoint2 <- Atto.choice [Atto.skip (== '-') *> codepoint, pure codepoint1]
pure (codepoint1, codepoint2)
codepoint :: Atto.Parser Word
codepoint = Atto.choice [literal, decLiteral, hexLiteral, uCodepoint]
where
literal = Atto.decimal
decLiteral = Atto.char 'd' *> Atto.decimal
hexLiteral = Atto.char 'x' *> Atto.hexadecimal
uCodepoint = Atto.string "U+" *> Atto.hexadecimal
-- Common
separators :: Atto.Parser ()
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."