1 module Databrary.EZID.ANVL 2 ( ANVL 3 , encode 4 , parse 5 ) where 6 7 import Control.Applicative ((<|>)) 8 import qualified Data.Attoparsec.ByteString.Char8 as P 9 import Data.Bits (shiftL, (.|.)) 10 import qualified Data.ByteString.Builder as B 11 import qualified Data.ByteString.Builder.Prim as BP 12 import Data.ByteString.Internal (c2w) 13 import Data.Char (isHexDigit, digitToInt) 14 import Data.Monoid ((<>)) 15 import qualified Data.Text as T 16 import qualified Data.Text.Encoding as TE 17 import Data.Word (Word8) 18 19 type ANVL = [(T.Text, T.Text)] 20 21 charEscaped :: Bool -> BP.BoundedPrim Word8 22 charEscaped colon = 23 BP.condB (\c -> c == c2w '%' || (colon && c == c2w ':') || c < c2w ' ') 24 (BP.liftFixedToBounded $ (,) '%' BP.>$< BP.char8 BP.>*< BP.word8HexFixed) 25 (BP.liftFixedToBounded BP.word8) 26 27 encode :: ANVL -> B.Builder 28 encode = foldMap $ \(n,v) -> 29 TE.encodeUtf8BuilderEscaped (charEscaped True) n <> B.char8 ':' <> B.char8 ' ' <> TE.encodeUtf8BuilderEscaped (charEscaped False) v <> B.char8 '\n' 30 31 parse :: P.Parser ANVL 32 parse = P.sepBy nv P.endOfLine <* P.skipSpace 33 where 34 hd = digitToInt <$> P.satisfy isHexDigit 35 pe = P.char '%' >> (.|.) . (`shiftL` 4) <$> hd <*> hd 36 textWhile1 p = either (fail . show) return . TE.decodeUtf8' =<< P.takeWhile1 p 37 tx d = mconcat <$> P.many' (textWhile1 (`notElem` '%':d) <|> (T.singleton . toEnum <$> pe)) 38 nv = do 39 n <- tx ":\n" P.<?> "name" 40 _ <- P.char ':' 41 P.skipMany (P.char ' ') 42 v <- tx "\n" P.<?> "value" 43 return (n, v)