1 module Databrary.EZID.ANVL
    2   ( ANVL
    3   , encode
    4   , parse
    5   ) where
    6 
    7 import Control.Applicative ((<|>))
    8 import qualified Data.Attoparsec.ByteString.Char8 as P
    9 import Data.Bits (shiftL, (.|.))
   10 import qualified Data.ByteString.Builder as B
   11 import qualified Data.ByteString.Builder.Prim as BP
   12 import Data.ByteString.Internal (c2w)
   13 import Data.Char (isHexDigit, digitToInt)
   14 import Data.Monoid ((<>))
   15 import qualified Data.Text as T
   16 import qualified Data.Text.Encoding as TE
   17 import Data.Word (Word8)
   18 
   19 type ANVL = [(T.Text, T.Text)]
   20 
   21 charEscaped :: Bool -> BP.BoundedPrim Word8
   22 charEscaped colon =
   23   BP.condB (\c -> c == c2w '%' || (colon && c == c2w ':') || c < c2w ' ')
   24     (BP.liftFixedToBounded $ (,) '%' BP.>$< BP.char8 BP.>*< BP.word8HexFixed)
   25     (BP.liftFixedToBounded BP.word8)
   26 
   27 encode :: ANVL -> B.Builder
   28 encode = foldMap $ \(n,v) ->
   29   TE.encodeUtf8BuilderEscaped (charEscaped True) n <> B.char8 ':' <> B.char8 ' ' <> TE.encodeUtf8BuilderEscaped (charEscaped False) v <> B.char8 '\n'
   30 
   31 parse :: P.Parser ANVL
   32 parse = P.sepBy nv P.endOfLine <* P.skipSpace
   33   where
   34   hd = digitToInt <$> P.satisfy isHexDigit
   35   pe = P.char '%' >> (.|.) . (`shiftL` 4) <$> hd <*> hd
   36   textWhile1 p = either (fail . show) return . TE.decodeUtf8' =<< P.takeWhile1 p
   37   tx d = mconcat <$> P.many' (textWhile1 (`notElem` '%':d) <|> (T.singleton . toEnum <$> pe))
   38   nv = do
   39     n <- tx ":\n" P.<?> "name"
   40     _ <- P.char ':'
   41     P.skipMany (P.char ' ')
   42     v <- tx "\n" P.<?> "value"
   43     return (n, v)