1 {-# LANGUAGE OverloadedStrings, ViewPatterns #-}
    2 module Databrary.Controller.Zip
    3   ( zipContainer
    4   , zipVolume
    5   , viewVolumeDescription
    6   ) where
    7 
    8 import qualified Data.ByteString as BS
    9 import qualified Data.ByteString.Builder as BSB
   10 import qualified Data.ByteString.Char8 as BSC
   11 import qualified Data.ByteString.Lazy as BSL
   12 import Data.Function (on)
   13 import Data.List (groupBy, partition)
   14 import Data.Maybe (fromJust, maybeToList)
   15 import Control.Monad.IO.Class (liftIO)
   16 import Data.Monoid ((<>))
   17 import qualified Data.RangeSet.List as RS
   18 import qualified Data.Text.Encoding as TE
   19 import Network.HTTP.Types (hContentType, hCacheControl, hContentLength)
   20 import System.Posix.FilePath ((<.>))
   21 import qualified Text.Blaze.Html5 as Html
   22 import qualified Text.Blaze.Html.Renderer.Utf8 as Html
   23 import qualified Codec.Archive.Zip as ZIP
   24 import qualified System.IO as IO
   25 import qualified System.Directory as DIR
   26 import qualified Conduit as CND
   27 import Path (parseRelFile)
   28 -- import Path.IO (resolveFile')
   29 
   30 import Databrary.Ops
   31 import Databrary.Has (view, peek, peeks)
   32 import Databrary.Store.Asset
   33 import Databrary.Store.Filename
   34 import Databrary.Store.CSV (buildCSV)
   35 import Databrary.Store.Types
   36 import Databrary.Model.Id
   37 import Databrary.Model.Permission
   38 import Databrary.Model.Volume
   39 import Databrary.Model.Container
   40 import Databrary.Model.Slot
   41 import Databrary.Model.RecordSlot
   42 import Databrary.Model.Asset
   43 import Databrary.Model.AssetSlot
   44 import Databrary.Model.Format
   45 import Databrary.Model.Party
   46 import Databrary.Model.Citation
   47 import Databrary.Model.Funding
   48 import Databrary.HTTP
   49 import Databrary.HTTP.Path.Parser
   50 import Databrary.Action
   51 import Databrary.Controller.Paths
   52 import Databrary.Controller.Asset
   53 import Databrary.Controller.Container
   54 import Databrary.Controller.Volume
   55 import Databrary.Controller.Party
   56 import Databrary.Controller.CSV
   57 import Databrary.Controller.Angular
   58 import Databrary.Controller.IdSet
   59 import Databrary.View.Zip
   60 
   61 -- isOrig flags have been added to toggle the ability to access the pre-transcoded asset
   62 assetZipEntry2 :: Bool -> BS.ByteString -> AssetSlot -> Handler (ZIP.ZipArchive ())
   63 assetZipEntry2 isOrig containerDir AssetSlot{ slotAsset = a@Asset{ assetRow = ar@AssetRow{ assetId = aid}}} = do
   64   origAsset <- lookupOrigAsset aid
   65   Just f <- case isOrig of
   66                  True -> getAssetFile $ fromJust origAsset
   67                  False -> getAssetFile a
   68   req <- peek
   69   -- (t, _) <- assetCreation a
   70   -- Just (t, s) <- fileInfo f
   71   -- liftIO (print ("downloadname", assetDownloadName False True ar))
   72   -- liftIO (print ("format", assetFormat ar))
   73   let entryName =
   74         containerDir `BS.append` (case isOrig of
   75           False -> makeFilename (assetDownloadName True False ar) `addFormatExtension` assetFormat ar
   76           True -> makeFilename (assetDownloadName False True ar) `addFormatExtension` assetFormat ar)
   77   entrySelector <- liftIO $ (parseRelFile (BSC.unpack entryName) >>= ZIP.mkEntrySelector)
   78   return
   79     (do
   80        ZIP.sinkEntry ZIP.Store (CND.sourceFileBS (BSC.unpack f)) entrySelector
   81        ZIP.setEntryComment (TE.decodeUtf8 $ BSL.toStrict $ BSB.toLazyByteString $ actionURL (Just req) viewAsset (HTML, assetId ar) []) entrySelector)
   82 
   83 containerZipEntry2 :: Bool -> BS.ByteString -> Container -> [AssetSlot] -> Handler (ZIP.ZipArchive ())
   84 containerZipEntry2 isOrig prefix c l = do
   85   let containerDir = prefix <> makeFilename (containerDownloadName c) <> "/"
   86   zipActs <- mapM (assetZipEntry2 isOrig containerDir) l
   87   return (sequence_ zipActs)
   88 
   89 volumeDescription :: Bool -> Volume -> (Container, [RecordSlot]) -> IdSet Container -> [AssetSlot] -> Handler (Html.Html, [[AssetSlot]], [[AssetSlot]])
   90 volumeDescription inzip v (_, glob) cs al = do
   91   cite <- lookupVolumeCitation v
   92   links <- lookupVolumeLinks v
   93   fund <- lookupVolumeFunding v
   94   desc <- peeks $ htmlVolumeDescription inzip v (maybeToList cite ++ links) fund glob cs at ab
   95   return (desc, at, ab)
   96   where
   97   (at, ab) = partition (any (containerTop . containerRow . slotContainer) . assetSlot . head) $ groupBy (me `on` fmap (containerId . containerRow . slotContainer) . assetSlot) al
   98   me (Just x) (Just y) = x == y
   99   me _ _ = False
  100 
  101 volumeZipEntry2 :: Bool -> Volume -> (Container, [RecordSlot]) -> IdSet Container -> Maybe BSB.Builder -> [AssetSlot] -> Handler (ZIP.ZipArchive ())
  102 volumeZipEntry2 isOrig v top cs csv al = do
  103   (desc, at, ab) <- volumeDescription True v top cs al -- the actual asset slot's assets arent' used any more for containers, now container zip entry does that
  104   let zipDir = (makeFilename $ volumeDownloadName v ++ if idSetIsFull cs then [] else ["PARTIAL"]) <> "/"
  105   zt <- mapM (ent zipDir) at
  106   zb <- mapM (ent (zipDir <> "sessions/")) ab
  107   descEntrySelector <- liftIO $ (parseRelFile (BSC.unpack zipDir <> "description.html") >>= ZIP.mkEntrySelector)
  108   spreadEntrySelector <- liftIO $ (parseRelFile (BSC.unpack zipDir <> "spreadsheet.csv") >>= ZIP.mkEntrySelector)
  109   return
  110     (do
  111        sequence_ zt
  112        sequence_ zb
  113        ZIP.addEntry ZIP.Store (BSL.toStrict (Html.renderHtml desc)) descEntrySelector
  114        maybe (pure ()) (\c -> ZIP.addEntry ZIP.Store (BSL.toStrict (BSB.toLazyByteString c)) spreadEntrySelector) csv)
  115   where
  116   ent prefix [a@AssetSlot{ assetSlot = Nothing }] = assetZipEntry2 isOrig prefix a -- orig asset doesn't matter here as top level assets aren't transcoded, I believe
  117   ent prefix (AssetSlot{ assetSlot = Just s } : _) = do
  118     (acts, _) <- containerZipEntryCorrectAssetSlots2 isOrig prefix (slotContainer s)
  119     pure acts
  120   ent _ _ = fail "volumeZipEntry"
  121 
  122 zipResponse2 :: BS.ByteString -> ZIP.ZipArchive () -> Handler Response
  123 zipResponse2 n zipAddActions = do
  124   req <- peek
  125   u <- peek
  126   store <- peek
  127   let comment = BSL.toStrict $ BSB.toLazyByteString
  128         $ BSB.string8 "Downloaded by " <> TE.encodeUtf8Builder (partyName $ partyRow u) <> BSB.string8 " <" <> actionURL (Just req) viewParty (HTML, TargetParty $ partyId $ partyRow u) [] <> BSB.char8 '>'
  129   let temporaryZipName = (BSC.unpack . storageTemp) store <> "placeholder.zip" -- TODO: generate temporary name for extra caution?
  130   h <- liftIO $ IO.openFile temporaryZipName IO.ReadWriteMode
  131   liftIO $ DIR.removeFile temporaryZipName
  132   liftIO $ IO.hSetBinaryMode h True
  133   liftIO $ ZIP.createBlindArchive h $ do
  134     ZIP.setArchiveComment (TE.decodeUtf8 comment)
  135     zipAddActions
  136   sz <- liftIO $ (IO.hSeek h IO.SeekFromEnd 0 >> IO.hTell h)
  137   liftIO $ IO.hSeek h IO.AbsoluteSeek 0
  138   return $ okResponse
  139     [ (hContentType, "application/zip")
  140     , ("content-disposition", "attachment; filename=" <> quoteHTTP (n <.> "zip"))
  141     , (hCacheControl, "max-age=31556926, private")
  142     , (hContentLength, BSC.pack $ show $ sz)
  143     ] (CND.bracketP (return h) IO.hClose CND.sourceHandle :: CND.Source (CND.ResourceT IO) BS.ByteString)
  144 
  145 checkAsset :: AssetSlot -> Bool
  146 checkAsset a =
  147   canReadData2 getAssetSlotRelease2 getAssetSlotVolumePermission2 a && assetBacked (view a)
  148 
  149 containerZipEntryCorrectAssetSlots2 :: Bool -> BS.ByteString -> Container -> Handler (ZIP.ZipArchive (), Bool)
  150 containerZipEntryCorrectAssetSlots2 isOrig prefix c = do
  151   c'<- lookupContainerAssets c
  152   assetSlots <- case isOrig of
  153                      True -> do
  154                       origs <- lookupOrigContainerAssets c
  155                       let pdfs = filterFormat c' formatNotAV
  156                       return $ pdfs ++ origs
  157                      False -> return c'
  158   let checkedAssetSlots = filter checkAsset assetSlots
  159   zipActs <- containerZipEntry2 isOrig prefix c $ checkedAssetSlots
  160   pure (zipActs, null checkedAssetSlots)
  161 
  162 zipContainer :: Bool -> ActionRoute (Maybe (Id Volume), Id Slot)
  163 zipContainer isOrig =
  164   let zipPath = case isOrig of
  165                      True -> pathMaybe pathId </> pathSlotId </< "zip" </< "true"
  166                      False -> pathMaybe pathId </> pathSlotId </< "zip" </< "false"
  167   in action GET zipPath $ \(vi, ci) -> withAuth $ do
  168     c <- getContainer PermissionPUBLIC vi ci True
  169     let v = containerVolume c
  170     _ <- maybeAction (if volumeIsPublicRestricted v then Nothing else Just ()) -- block if restricted
  171     (zipActs, isEmpty) <- containerZipEntryCorrectAssetSlots2 isOrig "" c
  172     auditSlotDownload (not $ isEmpty) (containerSlot c)
  173     zipResponse2 ("databrary-" <> BSC.pack (show $ volumeId $ volumeRow $ containerVolume c) <> "-" <> BSC.pack (show $ containerId $ containerRow c)) zipActs
  174 
  175 getVolumeInfo :: Id Volume -> Handler (Volume, IdSet Container, [AssetSlot])
  176 getVolumeInfo vi = do
  177   v <- getVolume PermissionPUBLIC vi
  178   _ <- maybeAction (if volumeIsPublicRestricted v then Nothing else Just ()) -- block if restricted
  179   s <- peeks requestIdSet
  180   -- let isMember = maybe (const False) (\c -> RS.member (containerId $ containerRow $ slotContainer $ c))
  181   -- non-exhaustive pattern found here ...v , implment in case of Nothing (Keep in mind originalAssets will not have containers, or Volumes)
  182   a <- filter (\a@AssetSlot{ assetSlot = Just c } -> checkAsset a && RS.member (containerId $ containerRow $ slotContainer $ c) s) <$> lookupVolumeAssetSlots v False
  183   return (v, s, a)
  184 
  185 filterFormat :: [AssetSlot] -> (Format -> Bool)-> [AssetSlot]
  186 filterFormat as f = filter (f . assetFormat . assetRow . slotAsset ) as
  187 
  188 zipVolume :: Bool -> ActionRoute (Id Volume)
  189 zipVolume isOrig =
  190   let zipPath = case isOrig of
  191                      True -> pathId </< "zip" </< "true"
  192                      False -> pathId </< "zip" </< "false"
  193   in action GET zipPath $ \vi -> withAuth $ do
  194   (v, s, a) <- getVolumeInfo vi
  195   top:cr <- lookupVolumeContainersRecords v
  196   let cr' = filter ((`RS.member` s) . containerId . containerRow . fst) cr
  197   csv <- (null cr') `unlessReturn` (volumeCSV v cr')
  198   zipActs <- volumeZipEntry2 isOrig v top s (buildCSV <$> csv) a
  199   auditVolumeDownload (not $ null a) v
  200   zipResponse2 (BSC.pack $ "databrary-" ++ show (volumeId $ volumeRow v) ++ if idSetIsFull s then "" else "-partial") zipActs
  201 
  202 viewVolumeDescription :: ActionRoute (Id Volume)
  203 viewVolumeDescription = action GET (pathId </< "description") $ \_ -> withAuth $ do
  204   angular
  205   {-
  206   (v, s, a) <- getVolumeInfo vi
  207   top <- lookupVolumeTopContainer v
  208   glob <- lookupSlotRecords $ containerSlot top
  209   (desc, _, _) <- volumeDescription False v (top, glob) s a
  210   -}
  211   return $ okResponse [] (""::String)