64 lines
1.6 KiB
Haskell
64 lines
1.6 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module UToy.Parsers
|
|
( parseHexBytes
|
|
, parseCodepoints
|
|
) where
|
|
|
|
import Data.Char (isDigit, isHexDigit, ord)
|
|
import Data.Text (Text)
|
|
import Data.Word (Word8)
|
|
import Text.Printf (printf)
|
|
|
|
import qualified Data.Attoparsec.Text as Atto
|
|
|
|
parseHexBytes :: Text -> Either String [Word8]
|
|
parseHexBytes = Atto.parseOnly $ hexBytes <* Atto.endOfInput
|
|
|
|
hexBytes :: Atto.Parser [Word8]
|
|
hexBytes = hexByte `Atto.sepBy` separators
|
|
where
|
|
hexByte = do
|
|
high <- hexDigit
|
|
low <- hexDigit
|
|
pure $ fromIntegral $ high * 16 + low
|
|
|
|
hexDigit = hexDigitToInt <$> Atto.satisfy isHexDigit
|
|
|
|
hexDigitToInt c
|
|
| isDigit c = ord c - ord '0'
|
|
| 'A' <= c && c <= 'F' = ord c - ord 'A' + 10
|
|
| 'a' <= c && c <= 'f' = ord c - ord 'a' + 10
|
|
| otherwise = error $ printf "not a hex digit: %c" c
|
|
|
|
parseCodepoints :: Text -> Either String [(Word, Word)]
|
|
parseCodepoints = Atto.parseOnly $ codepoints <* Atto.endOfInput
|
|
|
|
codepoints :: Atto.Parser [(Word, Word)]
|
|
codepoints = codepointRange `Atto.sepBy` separators
|
|
|
|
codepointRange :: Atto.Parser (Word, Word)
|
|
codepointRange = do
|
|
codepoint1 <- codepoint
|
|
codepoint2 <- Atto.choice [Atto.skip (== '-') *> codepoint, pure codepoint1]
|
|
|
|
pure (codepoint1, codepoint2)
|
|
|
|
|
|
|
|
codepoint :: Atto.Parser Word
|
|
codepoint = Atto.choice [literal, decLiteral, hexLiteral, uCodepoint]
|
|
where
|
|
literal = Atto.decimal
|
|
|
|
decLiteral = Atto.char 'd' *> Atto.decimal
|
|
|
|
hexLiteral = Atto.char 'x' *> Atto.hexadecimal
|
|
|
|
uCodepoint = Atto.string "U+" *> Atto.hexadecimal
|
|
|
|
-- Common
|
|
|
|
separators :: Atto.Parser ()
|
|
separators = Atto.skipMany $ Atto.satisfy $ Atto.inClass " +."
|