1 {-# LANGUAGE OverloadedStrings #-}
    2 module Databrary.HTTP.Parse
    3   ( Content(..)
    4   , FileContent
    5   , parseRequestContent
    6   ) where
    7 
    8 import Control.Monad (when, unless)
    9 import Control.Monad.IO.Class (liftIO)
   10 import Data.IORef (newIORef, readIORef, writeIORef)
   11 import Data.Word (Word64)
   12 import Network.HTTP.Types (requestEntityTooLarge413, unsupportedMediaType415, hContentType)
   13 import Network.Wai
   14 import Network.Wai.Parse
   15 import System.IO (Handle)
   16 import qualified Data.Aeson as JSON
   17 import qualified Data.Attoparsec.ByteString as AP
   18 import qualified Data.ByteString as BS
   19 import qualified Data.Text as T
   20 import qualified Data.Text.Encoding as TE
   21 import qualified Data.Text.Encoding.Error as TE
   22 import qualified Data.Text.Internal.Lazy as TL (chunk)
   23 import qualified Data.Text.Lazy as TL
   24 
   25 import Databrary.Has (peek, peeks)
   26 import Databrary.Action.Types
   27 import Databrary.Store.Temp
   28 import Databrary.HTTP.Request (lookupRequestHeader)
   29 import Databrary.Action.Response (response, emptyResponse, result, unsafeResult)
   30 
   31 requestTooLarge :: Response
   32 requestTooLarge = emptyResponse requestEntityTooLarge413 []
   33 
   34 type ChunkParser a = IO BS.ByteString -> IO a
   35 
   36 _mapChunks :: (a -> b) -> ChunkParser a -> ChunkParser b
   37 _mapChunks f parse next = f <$> parse next
   38 
   39 _nullChunks :: ChunkParser Word64
   40 _nullChunks next = go 0 where
   41   go n = do
   42     b <- next
   43     if BS.null b
   44       then return n
   45       else go (n + fromIntegral (BS.length b))
   46 
   47 limitChunks :: Word64 -> ChunkParser a -> ChunkParser a
   48 limitChunks lim parse next = do
   49   len <- liftIO $ newIORef 0
   50   parse $ do
   51     n <- readIORef len
   52     b <- next
   53     let n' = n + fromIntegral (BS.length b)
   54     when (n' > lim) $ result requestTooLarge
   55     writeIORef len n'
   56     return b
   57 
   58 writeChunks :: Handle -> ChunkParser ()
   59 writeChunks h next = run where
   60   run = do
   61     b <- next
   62     unless (BS.null b) $
   63       BS.hPut h b >> run
   64 
   65 parserChunks :: AP.Parser a -> ChunkParser (AP.Result a)
   66 parserChunks parser next = run (AP.parse parser) where
   67   run p = do
   68     b <- next
   69     let r = p b
   70     if BS.null b
   71       then return r
   72       else run $ AP.feed r
   73 
   74 textChunks :: TE.OnDecodeError -> ChunkParser TL.Text
   75 textChunks err next = run (TE.streamDecodeUtf8With err) where
   76   run f = do
   77     b <- next
   78     let TE.Some t r f' = f b
   79     if BS.null b
   80       then return $ TL.fromStrict $ maybe t (T.snoc t) $ err "textChunks: invalid UTF-8" . Just . fst =<< BS.uncons r
   81       else TL.chunk t <$> run f'
   82 
   83 textChunks' :: ChunkParser TL.Text
   84 textChunks' = textChunks (\e _ -> unsafeResult $ response unsupportedMediaType415 [] e)
   85 
   86 
   87 _mapBackEnd :: (a -> b) -> BackEnd a -> BackEnd b
   88 _mapBackEnd f back param info next = f <$> back param info next
   89 
   90 rejectBackEnd :: BackEnd a
   91 rejectBackEnd _ _ _ = result requestTooLarge
   92 
   93 
   94 _parseRequestChunks :: ChunkParser a -> Handler a
   95 _parseRequestChunks p = liftIO . p =<< peeks requestBody
   96 
   97 limitRequestChunks :: Word64 -> ChunkParser a -> Handler a
   98 limitRequestChunks lim p = do
   99   rq <- peek
  100   case requestBodyLength rq of
  101     KnownLength l | l > lim -> result requestTooLarge
  102     _ -> liftIO $ limitChunks lim p $ requestBody rq
  103 
  104 data Content a
  105   = ContentForm
  106     { contentFormParams :: [Param]
  107     , contentFormFiles :: [File a]
  108     }
  109   | ContentJSON JSON.Value
  110   | ContentText TL.Text
  111   | ContentUnknown
  112 
  113 maxTextSize :: Word64
  114 maxTextSize = 1024*1024
  115 
  116 class FileContent a where
  117   parseFileContent :: IO BS.ByteString -> Handler a
  118 
  119 instance FileContent () where
  120   parseFileContent _ = result requestTooLarge
  121 
  122 instance FileContent TempFile where
  123   parseFileContent = makeTempFile . flip writeChunks
  124 
  125 instance FileContent JSON.Value where
  126   parseFileContent b = liftIO $ either (result . response unsupportedMediaType415 []) return . AP.eitherResult =<< parserChunks JSON.json b
  127 
  128 instance FileContent TL.Text where
  129   parseFileContent = liftIO . textChunks'
  130 
  131 parseFormContent :: RequestBodyType -> Handler (Content a)
  132 parseFormContent t = uncurry ContentForm
  133   <$> limitRequestChunks maxTextSize (liftIO . sinkRequestBody rejectBackEnd t)
  134 
  135 parseFormFileContent :: FileContent a => (FileInfo BS.ByteString -> Word64) -> RequestBodyType -> Handler (Content a)
  136 parseFormFileContent ff rt = do
  137   app <- peek
  138   (p, f) <- liftIO $ do
  139     let be fn fi fb = case ff fi{ fileContent = fn } of
  140           0 -> result requestTooLarge
  141           m -> limitChunks m (\b -> runHandler (parseFileContent b) app) fb
  142     sinkRequestBody be rt (requestBody $ contextRequest app)
  143   return $ ContentForm p f
  144 
  145 parseJSONContent :: Handler (Content a)
  146 parseJSONContent = maybe ContentUnknown ContentJSON . AP.maybeResult
  147   <$> limitRequestChunks maxTextSize (parserChunks JSON.json)
  148 
  149 parseTextContent :: Handler (Content a)
  150 parseTextContent = ContentText <$> limitRequestChunks maxTextSize textChunks'
  151   -- really would be better to catch the error and return ContentUnknown
  152 
  153 parseRequestContent :: FileContent a => (BS.ByteString -> Word64) -> Handler (Content a)
  154 parseRequestContent fileLimits = do
  155   ct <- peeks $ lookupRequestHeader hContentType
  156   case fmap parseContentType ct of
  157     Just ("application/x-www-form-urlencoded", _) ->
  158       parseFormContent UrlEncoded
  159     Just ("multipart/form-data", attrs) | Just bound <- lookup "boundary" attrs ->
  160       parseFormFileContent (fileLimits . fileContent) $ Multipart bound
  161     Just ("text/json", _) ->
  162       parseJSONContent
  163     Just ("application/json", _) ->
  164       parseJSONContent
  165     Just ("text/plain", _) ->
  166       parseTextContent
  167     _ -> return ContentUnknown