1 {-# LANGUAGE OverloadedStrings #-}
    2 module Databrary.Controller.Ingest
    3   ( viewIngest
    4   , postIngest
    5   , detectParticipantCSV
    6   , runParticipantUpload
    7   -- for tests
    8   , mappingParser
    9   , buildParticipantRecordAction
   10   , ParticipantStatus(..)
   11   , MeasureUpdateAction(..)
   12   , ParticipantRecordAction(..)
   13   ) where
   14 
   15 import Control.Arrow (right)
   16 import Control.Monad (unless)
   17 import Control.Monad.IO.Class (liftIO)
   18 import qualified Data.ByteString as BS
   19 import qualified Data.ByteString.Char8 as BSC
   20 import qualified Data.ByteString.Lazy as BSL
   21 import Data.Maybe (catMaybes)
   22 import Data.Monoid ((<>))
   23 import Data.Text (Text)
   24 import qualified Data.Text.Encoding as TE
   25 import qualified Data.Text.Lazy as TL
   26 import qualified Data.Text.Lazy.Encoding as TLE
   27 import Data.Vector (Vector)
   28 import Data.Word (Word64)
   29 import Network.HTTP.Types (badRequest400)
   30 import Network.Wai.Parse (FileInfo(..))
   31 import System.Posix.FilePath (takeExtension)
   32 
   33 import Data.Csv.Contrib (parseCsvWithHeader, getHeaders, removeBomPrefixText)
   34 import qualified Databrary.JSON as JSON
   35 import Databrary.Ops
   36 import Databrary.Has
   37 import Databrary.Model.Category
   38 import Databrary.Model.Id
   39 import Databrary.Model.Metric (Metric)
   40 import Databrary.Model.Ingest
   41 import Databrary.Model.Permission
   42 import Databrary.Model.Measure
   43 import Databrary.Model.Metric
   44 import Databrary.Model.Party
   45 import Databrary.Model.Record
   46 import Databrary.Model.Volume
   47 import Databrary.Model.VolumeMetric
   48 import Databrary.Model.Container
   49 import Databrary.Ingest.Action
   50 import Databrary.Ingest.JSON
   51 import Databrary.HTTP.Path.Parser
   52 import Databrary.HTTP.Form.Deform
   53 import Databrary.Action.Route
   54 import Databrary.Action
   55 import Databrary.Controller.Paths
   56 import Databrary.Controller.Permission
   57 import Databrary.Controller.Form
   58 import Databrary.Controller.Volume
   59 import Databrary.Store.Types
   60 import Databrary.View.Form (FormHtml)
   61 import Databrary.View.Ingest
   62 
   63 viewIngest :: ActionRoute (Id Volume)
   64 viewIngest = action GET (pathId </< "ingest") $ \vi -> withAuth $ do
   65   checkMemberADMIN
   66   s <- focusIO getIngestStatus
   67   v <- getVolume PermissionEDIT vi
   68   peeks $ blankForm . htmlIngestForm v s
   69 
   70 postIngest :: ActionRoute (Id Volume)
   71 postIngest = multipartAction $ action POST (pathId </< "ingest") $ \vi -> withAuth $ do
   72   checkMemberADMIN
   73   s <- focusIO getIngestStatus
   74   v <- getVolume PermissionEDIT vi
   75   a <- runFormFiles [("json", 16*1024*1024)] (Just $ htmlIngestForm v s) $ do
   76     csrfForm
   77     abort <- "abort" .:> deform
   78     abort `unlessReturn` ((,,)
   79       <$> ("run" .:> deform)
   80       <*> ("overwrite" .:> deform)
   81       <*> ("json" .:> do
   82               (fileInfo :: FileInfo JSON.Value) <- deform
   83               (deformCheck
   84                    "Must be JSON."
   85                    (\f -> fileContentType f `elem` ["text/json", "application/json"] || takeExtension (fileName f) == ".json")
   86                    fileInfo)))
   87   r <- maybe
   88     (True <$ focusIO abortIngest)
   89     (\(r,o,j) -> runIngest $ right (map (unId . containerId . containerRow)) <$> ingestJSON v (fileContent j) r o)
   90     a
   91   unless r $ result $ response badRequest400 [] ("failed" :: String)
   92   peeks $ otherRouteResponse [] viewIngest (volumeId $ volumeRow v)
   93 
   94 maxWidelyAcceptableHttpBodyFileSize :: Word64
   95 maxWidelyAcceptableHttpBodyFileSize = 16*1024*1024
   96 
   97 -- TODO: maybe put csv file save/retrieve in Databrary.Store module
   98 detectParticipantCSV :: ActionRoute (Id Volume)
   99 detectParticipantCSV = action POST (pathJSON >/> pathId </< "detectParticipantCSV") $ \vi -> withAuth $ do
  100     v <- getVolume PermissionEDIT vi
  101     (auth :: SiteAuth) <- peek
  102     (store :: Storage) <- peek
  103     csvFileInfo <-
  104       -- TODO: is Nothing okay here?
  105       runFormFiles [("file", maxWidelyAcceptableHttpBodyFileSize)] (Nothing :: Maybe (RequestContext -> FormHtml TL.Text)) $ do
  106           csrfForm
  107           fileInfo :: (FileInfo TL.Text) <- "file" .:> deform
  108           return fileInfo
  109     -- liftIO (print ("after extract form"))
  110     let uploadFileContents' = (BSL.toStrict . TLE.encodeUtf8 . removeBomPrefixText . fileContent) csvFileInfo
  111     -- liftIO (print "uploaded contents below")
  112     -- liftIO (print uploadFileContents')
  113     case parseCsvWithHeader uploadFileContents' of
  114         Left err -> do
  115             -- liftIO (print ("csv parse error", err))
  116             pure (response badRequest400 [] err)
  117         Right (hdrs, records) -> do
  118             metrics <- lookupVolumeParticipantMetrics v
  119             -- liftIO (print ("before check determine", show hdrs))
  120             case checkDetermineMapping metrics ((fmap TE.decodeUtf8 . getHeaders) hdrs) uploadFileContents' of
  121                 Left err -> do
  122                     -- liftIO (print ("failed to determine mapping", err))
  123                     -- if column check failed, then don't save csv file and response is error
  124                     pure (response badRequest400 [] err)
  125                 Right participantFieldMapping -> do
  126                     let uploadFileName =
  127                             uniqueUploadName auth v ((BSC.unpack . fileName) csvFileInfo)
  128                     liftIO
  129                         (BS.writeFile
  130                              ((BSC.unpack . getStorageTempParticipantUpload uploadFileName) store)
  131                              uploadFileContents')
  132                     pure
  133                         $ okResponse []
  134                             $ JSON.recordEncoding -- TODO: not record encoding
  135                                 $ JSON.Record vi
  136                                     $      "csv_upload_id" JSON..= uploadFileName
  137                                         -- TODO: samples for mapped columns only
  138                                         <> "column_samples" JSON..= extractColumnsDistinctSampleJson 5 hdrs records
  139                                         <> "suggested_mapping" JSON..= participantFieldMappingToJSON participantFieldMapping
  140                                         <> "columns_firstvals" JSON..= extractColumnsInitialJson 5 hdrs records
  141 
  142 -- TODO: move this to Databrary.Store.ParticipantUploadTemp
  143 uniqueUploadName :: SiteAuth -> Volume -> String -> String
  144 uniqueUploadName siteAuth vol uploadName =
  145     uniqueUploadName'
  146         ((partyId . partyRow . accountParty . siteAccount) siteAuth)
  147         ((volumeId . volumeRow) vol)
  148         uploadName
  149 
  150 uniqueUploadName' :: Id Party -> Id Volume -> String -> String
  151 uniqueUploadName' uid vid uploadName =
  152     show uid <> "-" <> show vid <> "-" <> uploadName
  153 ----- end
  154 
  155 runParticipantUpload :: ActionRoute (Id Volume)
  156 runParticipantUpload = action POST (pathJSON >/> pathId </< "runParticipantUpload") $ \vi -> withAuth $ do
  157     v <- getVolume PermissionEDIT vi
  158     (store :: Storage) <- peek
  159     -- reqCtxt <- peek
  160     (csvUploadId :: String, selectedMapping :: JSON.Value) <- runForm (Nothing) $ do
  161         csrfForm
  162         (uploadId :: String) <- "csv_upload_id" .:> deform
  163         mapping <- "selected_mapping" .:> deform
  164         pure (uploadId, mapping)
  165     -- TODO: resolve csv id to absolute path; http error if unknown
  166     uploadFileContents <-
  167         (liftIO . BS.readFile) ((BSC.unpack . getStorageTempParticipantUpload csvUploadId) store)
  168     case JSON.parseEither mappingParser selectedMapping of
  169         Left err ->
  170             pure (response badRequest400 [] err) -- bad json shape or keys
  171         Right mpngVal -> do
  172             participantActiveMetrics <- lookupVolumeParticipantMetrics v
  173             case parseParticipantFieldMapping participantActiveMetrics mpngVal of
  174                 Left err ->
  175                     pure (response badRequest400 [] err) -- mapping of inactive metrics or missing metric
  176                 Right mpngs -> do
  177                     -- liftIO $ print ("upload id", csvUploadId, "mapping", mpngs)
  178                     case attemptParseRows mpngs uploadFileContents of
  179                         Left err ->   -- invalid value in row
  180                             pure (response badRequest400 [] err)
  181                         Right (_, records) ->
  182                             let response' =
  183                                     okResponse []
  184                                         $ JSON.recordEncoding -- TODO: not record encoding
  185                                         $       JSON.Record vi
  186                                         $       "succeeded"
  187                                         JSON..= True
  188                             in  response' <$ runImport v records
  189 
  190 mappingParser :: JSON.Value -> JSON.Parser [(Metric, Text)]
  191 mappingParser val = do
  192     (entries :: [HeaderMappingEntry]) <- JSON.parseJSON val
  193     pure (fmap (\e -> (hmeMetric e, hmeCsvField e)) entries)
  194 
  195 -- TODO: move all below to Model.Ingest
  196  -- TODO: error or count
  197 runImport :: Volume -> Vector ParticipantRecord -> Handler (Vector ())
  198 runImport vol records =
  199     mapM (createOrUpdateRecord vol) records
  200 
  201 data ParticipantStatus = Create | Found Record
  202 
  203 data MeasureUpdateAction = Upsert Metric MeasureDatum | Delete Metric | Unchanged Metric | NoAction Metric
  204     deriving (Show, Eq)
  205 
  206 data ParticipantRecordAction = ParticipantRecordAction ParticipantStatus [MeasureUpdateAction]
  207 
  208 buildParticipantRecordAction :: ParticipantRecord -> ParticipantStatus -> ParticipantRecordAction
  209 buildParticipantRecordAction participantRecord updatingRecord =
  210     let
  211         mId = getFieldVal' prdId participantMetricId
  212         mInfo = getFieldVal' prdInfo participantMetricInfo
  213         mDescription = getFieldVal' prdDescription participantMetricDescription
  214         mBirthdate = getFieldVal' prdBirthdate participantMetricBirthdate
  215         mGender = getFieldVal' prdGender participantMetricGender
  216         mRace = getFieldVal' prdRace participantMetricRace
  217         mEthnicity = getFieldVal' prdEthnicity participantMetricEthnicity
  218         mGestationalAge = getFieldVal' prdGestationalAge participantMetricGestationalAge
  219         mPregnancyTerm = getFieldVal' prdPregnancyTerm participantMetricPregnancyTerm
  220         mBirthWeight = getFieldVal' prdBirthWeight participantMetricBirthWeight
  221         mDisability = getFieldVal' prdDisability participantMetricDisability
  222         mLanguage = getFieldVal' prdLanguage participantMetricLanguage
  223         mCountry = getFieldVal' prdCountry participantMetricCountry
  224         mState = getFieldVal' prdState participantMetricState
  225         mSetting = getFieldVal' prdSetting participantMetricSetting
  226         -- print ("save measure id:", mId)
  227         measureActions =
  228             [ changeRecordMeasureIfUsed mId
  229             , changeRecordMeasureIfUsed mInfo
  230             , changeRecordMeasureIfUsed mDescription
  231             , changeRecordMeasureIfUsed mBirthdate
  232             , changeRecordMeasureIfUsed mGender
  233             , changeRecordMeasureIfUsed mRace
  234             , changeRecordMeasureIfUsed mEthnicity
  235             , changeRecordMeasureIfUsed mGestationalAge
  236             , changeRecordMeasureIfUsed mPregnancyTerm
  237             , changeRecordMeasureIfUsed mBirthWeight
  238             , changeRecordMeasureIfUsed mDisability
  239             , changeRecordMeasureIfUsed mLanguage
  240             , changeRecordMeasureIfUsed mCountry
  241             , changeRecordMeasureIfUsed mState
  242             , changeRecordMeasureIfUsed mSetting
  243             ]
  244     in
  245         ParticipantRecordAction updatingRecord (catMaybes measureActions)
  246   where
  247     changeRecordMeasureIfUsed :: Maybe (Maybe MeasureDatum, Metric) -> Maybe MeasureUpdateAction
  248     changeRecordMeasureIfUsed mValueMetric = do
  249         (mVal, met) <- mValueMetric
  250         pure (determineUpdatedMeasure mVal met)
  251     determineUpdatedMeasure :: Maybe MeasureDatum -> Metric -> MeasureUpdateAction
  252     determineUpdatedMeasure mVal met =
  253         case updatingRecord of
  254             Create ->
  255                 maybe (NoAction met) (Upsert met) mVal
  256             Found _ -> do
  257                  -- TODO:
  258                  -- mOldVal <- getOldVal metric record
  259                  -- action = maybe (Upsert val) (\o -> if o == val then Unchanged else Upsert val)
  260                  let measureAction = maybe (Delete met) (Upsert met) mVal
  261                  measureAction
  262     getFieldVal' :: (ParticipantRecord -> FieldUse a) -> Metric -> Maybe (Maybe MeasureDatum, Metric)
  263     getFieldVal' = getFieldVal participantRecord
  264 
  265 getFieldVal
  266     :: ParticipantRecord
  267     -> (ParticipantRecord -> FieldUse a)
  268     -> Metric
  269     -> Maybe (Maybe MeasureDatum, Metric)
  270 getFieldVal participantRecord extractFieldVal metric =
  271     case extractFieldVal participantRecord of
  272         Field fieldVal _ -> pure (Just fieldVal, metric)
  273         FieldEmpty -> pure (Nothing, metric)
  274         FieldUnused -> Nothing
  275             -- field isn't used by this volume, so don't need to save the measure
  276 
  277 createOrUpdateRecord :: Volume -> ParticipantRecord -> Handler () -- TODO: error or record
  278 createOrUpdateRecord vol participantRecord = do
  279     let category = getCategory' (Id 1) -- TODO: use global variable
  280         mIdVal = fst $ maybe (error "id missing") id (getFieldVal' prdId participantMetricId)
  281         idVal = maybe (error "id empty") id mIdVal
  282     mOldParticipant <- lookupVolumeParticipant vol idVal
  283     let recordStatus =
  284             case mOldParticipant of
  285                 Nothing -> Create
  286                 Just oldParticipant -> Found oldParticipant
  287     -- print ("save measure id:", mId)
  288     case buildParticipantRecordAction participantRecord recordStatus of
  289         ParticipantRecordAction Create measureActs -> do
  290             newParticipantShell <- addRecord (blankRecord category vol) -- blankParticipantRecord
  291             _ <- mapM (runMeasureUpdate newParticipantShell) measureActs
  292             pure () -- TODO: reload participant
  293         ParticipantRecordAction (Found oldRecord) measureActs -> do
  294             _ <- mapM (runMeasureUpdate oldRecord) measureActs
  295             pure ()
  296   where
  297     runMeasureUpdate :: Record -> MeasureUpdateAction -> Handler (Maybe Record)
  298     runMeasureUpdate record act =
  299         case act of
  300             Upsert met val -> changeRecordMeasure (Measure record met val)
  301             Delete met -> fmap Just (removeRecordMeasure (Measure record met ""))
  302             Unchanged _ -> pure Nothing
  303             NoAction _ -> pure Nothing
  304     getFieldVal' :: (ParticipantRecord -> FieldUse a) -> Metric -> Maybe (Maybe MeasureDatum, Metric)
  305     getFieldVal' = getFieldVal participantRecord