1 {-# LANGUAGE OverloadedStrings #-} 2 module Databrary.Controller.Ingest 3 ( viewIngest 4 , postIngest 5 , detectParticipantCSV 6 , runParticipantUpload 7 -- for tests 8 , mappingParser 9 , buildParticipantRecordAction 10 , ParticipantStatus(..) 11 , MeasureUpdateAction(..) 12 , ParticipantRecordAction(..) 13 ) where 14 15 import Control.Arrow (right) 16 import Control.Monad (unless) 17 import Control.Monad.IO.Class (liftIO) 18 import qualified Data.ByteString as BS 19 import qualified Data.ByteString.Char8 as BSC 20 import qualified Data.ByteString.Lazy as BSL 21 import Data.Maybe (catMaybes) 22 import Data.Monoid ((<>)) 23 import Data.Text (Text) 24 import qualified Data.Text.Encoding as TE 25 import qualified Data.Text.Lazy as TL 26 import qualified Data.Text.Lazy.Encoding as TLE 27 import Data.Vector (Vector) 28 import Data.Word (Word64) 29 import Network.HTTP.Types (badRequest400) 30 import Network.Wai.Parse (FileInfo(..)) 31 import System.Posix.FilePath (takeExtension) 32 33 import Data.Csv.Contrib (parseCsvWithHeader, getHeaders, removeBomPrefixText) 34 import qualified Databrary.JSON as JSON 35 import Databrary.Ops 36 import Databrary.Has 37 import Databrary.Model.Category 38 import Databrary.Model.Id 39 import Databrary.Model.Metric (Metric) 40 import Databrary.Model.Ingest 41 import Databrary.Model.Permission 42 import Databrary.Model.Measure 43 import Databrary.Model.Metric 44 import Databrary.Model.Party 45 import Databrary.Model.Record 46 import Databrary.Model.Volume 47 import Databrary.Model.VolumeMetric 48 import Databrary.Model.Container 49 import Databrary.Ingest.Action 50 import Databrary.Ingest.JSON 51 import Databrary.HTTP.Path.Parser 52 import Databrary.HTTP.Form.Deform 53 import Databrary.Action.Route 54 import Databrary.Action 55 import Databrary.Controller.Paths 56 import Databrary.Controller.Permission 57 import Databrary.Controller.Form 58 import Databrary.Controller.Volume 59 import Databrary.Store.Types 60 import Databrary.View.Form (FormHtml) 61 import Databrary.View.Ingest 62 63 viewIngest :: ActionRoute (Id Volume) 64 viewIngest = action GET (pathId </< "ingest") $ \vi -> withAuth $ do 65 checkMemberADMIN 66 s <- focusIO getIngestStatus 67 v <- getVolume PermissionEDIT vi 68 peeks $ blankForm . htmlIngestForm v s 69 70 postIngest :: ActionRoute (Id Volume) 71 postIngest = multipartAction $ action POST (pathId </< "ingest") $ \vi -> withAuth $ do 72 checkMemberADMIN 73 s <- focusIO getIngestStatus 74 v <- getVolume PermissionEDIT vi 75 a <- runFormFiles [("json", 16*1024*1024)] (Just $ htmlIngestForm v s) $ do 76 csrfForm 77 abort <- "abort" .:> deform 78 abort `unlessReturn` ((,,) 79 <$> ("run" .:> deform) 80 <*> ("overwrite" .:> deform) 81 <*> ("json" .:> do 82 (fileInfo :: FileInfo JSON.Value) <- deform 83 (deformCheck 84 "Must be JSON." 85 (\f -> fileContentType f `elem` ["text/json", "application/json"] || takeExtension (fileName f) == ".json") 86 fileInfo))) 87 r <- maybe 88 (True <$ focusIO abortIngest) 89 (\(r,o,j) -> runIngest $ right (map (unId . containerId . containerRow)) <$> ingestJSON v (fileContent j) r o) 90 a 91 unless r $ result $ response badRequest400 [] ("failed" :: String) 92 peeks $ otherRouteResponse [] viewIngest (volumeId $ volumeRow v) 93 94 maxWidelyAcceptableHttpBodyFileSize :: Word64 95 maxWidelyAcceptableHttpBodyFileSize = 16*1024*1024 96 97 -- TODO: maybe put csv file save/retrieve in Databrary.Store module 98 detectParticipantCSV :: ActionRoute (Id Volume) 99 detectParticipantCSV = action POST (pathJSON >/> pathId </< "detectParticipantCSV") $ \vi -> withAuth $ do 100 v <- getVolume PermissionEDIT vi 101 (auth :: SiteAuth) <- peek 102 (store :: Storage) <- peek 103 csvFileInfo <- 104 -- TODO: is Nothing okay here? 105 runFormFiles [("file", maxWidelyAcceptableHttpBodyFileSize)] (Nothing :: Maybe (RequestContext -> FormHtml TL.Text)) $ do 106 csrfForm 107 fileInfo :: (FileInfo TL.Text) <- "file" .:> deform 108 return fileInfo 109 -- liftIO (print ("after extract form")) 110 let uploadFileContents' = (BSL.toStrict . TLE.encodeUtf8 . removeBomPrefixText . fileContent) csvFileInfo 111 -- liftIO (print "uploaded contents below") 112 -- liftIO (print uploadFileContents') 113 case parseCsvWithHeader uploadFileContents' of 114 Left err -> do 115 -- liftIO (print ("csv parse error", err)) 116 pure (response badRequest400 [] err) 117 Right (hdrs, records) -> do 118 metrics <- lookupVolumeParticipantMetrics v 119 -- liftIO (print ("before check determine", show hdrs)) 120 case checkDetermineMapping metrics ((fmap TE.decodeUtf8 . getHeaders) hdrs) uploadFileContents' of 121 Left err -> do 122 -- liftIO (print ("failed to determine mapping", err)) 123 -- if column check failed, then don't save csv file and response is error 124 pure (response badRequest400 [] err) 125 Right participantFieldMapping -> do 126 let uploadFileName = 127 uniqueUploadName auth v ((BSC.unpack . fileName) csvFileInfo) 128 liftIO 129 (BS.writeFile 130 ((BSC.unpack . getStorageTempParticipantUpload uploadFileName) store) 131 uploadFileContents') 132 pure 133 $ okResponse [] 134 $ JSON.recordEncoding -- TODO: not record encoding 135 $ JSON.Record vi 136 $ "csv_upload_id" JSON..= uploadFileName 137 -- TODO: samples for mapped columns only 138 <> "column_samples" JSON..= extractColumnsDistinctSampleJson 5 hdrs records 139 <> "suggested_mapping" JSON..= participantFieldMappingToJSON participantFieldMapping 140 <> "columns_firstvals" JSON..= extractColumnsInitialJson 5 hdrs records 141 142 -- TODO: move this to Databrary.Store.ParticipantUploadTemp 143 uniqueUploadName :: SiteAuth -> Volume -> String -> String 144 uniqueUploadName siteAuth vol uploadName = 145 uniqueUploadName' 146 ((partyId . partyRow . accountParty . siteAccount) siteAuth) 147 ((volumeId . volumeRow) vol) 148 uploadName 149 150 uniqueUploadName' :: Id Party -> Id Volume -> String -> String 151 uniqueUploadName' uid vid uploadName = 152 show uid <> "-" <> show vid <> "-" <> uploadName 153 ----- end 154 155 runParticipantUpload :: ActionRoute (Id Volume) 156 runParticipantUpload = action POST (pathJSON >/> pathId </< "runParticipantUpload") $ \vi -> withAuth $ do 157 v <- getVolume PermissionEDIT vi 158 (store :: Storage) <- peek 159 -- reqCtxt <- peek 160 (csvUploadId :: String, selectedMapping :: JSON.Value) <- runForm (Nothing) $ do 161 csrfForm 162 (uploadId :: String) <- "csv_upload_id" .:> deform 163 mapping <- "selected_mapping" .:> deform 164 pure (uploadId, mapping) 165 -- TODO: resolve csv id to absolute path; http error if unknown 166 uploadFileContents <- 167 (liftIO . BS.readFile) ((BSC.unpack . getStorageTempParticipantUpload csvUploadId) store) 168 case JSON.parseEither mappingParser selectedMapping of 169 Left err -> 170 pure (response badRequest400 [] err) -- bad json shape or keys 171 Right mpngVal -> do 172 participantActiveMetrics <- lookupVolumeParticipantMetrics v 173 case parseParticipantFieldMapping participantActiveMetrics mpngVal of 174 Left err -> 175 pure (response badRequest400 [] err) -- mapping of inactive metrics or missing metric 176 Right mpngs -> do 177 -- liftIO $ print ("upload id", csvUploadId, "mapping", mpngs) 178 case attemptParseRows mpngs uploadFileContents of 179 Left err -> -- invalid value in row 180 pure (response badRequest400 [] err) 181 Right (_, records) -> 182 let response' = 183 okResponse [] 184 $ JSON.recordEncoding -- TODO: not record encoding 185 $ JSON.Record vi 186 $ "succeeded" 187 JSON..= True 188 in response' <$ runImport v records 189 190 mappingParser :: JSON.Value -> JSON.Parser [(Metric, Text)] 191 mappingParser val = do 192 (entries :: [HeaderMappingEntry]) <- JSON.parseJSON val 193 pure (fmap (\e -> (hmeMetric e, hmeCsvField e)) entries) 194 195 -- TODO: move all below to Model.Ingest 196 -- TODO: error or count 197 runImport :: Volume -> Vector ParticipantRecord -> Handler (Vector ()) 198 runImport vol records = 199 mapM (createOrUpdateRecord vol) records 200 201 data ParticipantStatus = Create | Found Record 202 203 data MeasureUpdateAction = Upsert Metric MeasureDatum | Delete Metric | Unchanged Metric | NoAction Metric 204 deriving (Show, Eq) 205 206 data ParticipantRecordAction = ParticipantRecordAction ParticipantStatus [MeasureUpdateAction] 207 208 buildParticipantRecordAction :: ParticipantRecord -> ParticipantStatus -> ParticipantRecordAction 209 buildParticipantRecordAction participantRecord updatingRecord = 210 let 211 mId = getFieldVal' prdId participantMetricId 212 mInfo = getFieldVal' prdInfo participantMetricInfo 213 mDescription = getFieldVal' prdDescription participantMetricDescription 214 mBirthdate = getFieldVal' prdBirthdate participantMetricBirthdate 215 mGender = getFieldVal' prdGender participantMetricGender 216 mRace = getFieldVal' prdRace participantMetricRace 217 mEthnicity = getFieldVal' prdEthnicity participantMetricEthnicity 218 mGestationalAge = getFieldVal' prdGestationalAge participantMetricGestationalAge 219 mPregnancyTerm = getFieldVal' prdPregnancyTerm participantMetricPregnancyTerm 220 mBirthWeight = getFieldVal' prdBirthWeight participantMetricBirthWeight 221 mDisability = getFieldVal' prdDisability participantMetricDisability 222 mLanguage = getFieldVal' prdLanguage participantMetricLanguage 223 mCountry = getFieldVal' prdCountry participantMetricCountry 224 mState = getFieldVal' prdState participantMetricState 225 mSetting = getFieldVal' prdSetting participantMetricSetting 226 -- print ("save measure id:", mId) 227 measureActions = 228 [ changeRecordMeasureIfUsed mId 229 , changeRecordMeasureIfUsed mInfo 230 , changeRecordMeasureIfUsed mDescription 231 , changeRecordMeasureIfUsed mBirthdate 232 , changeRecordMeasureIfUsed mGender 233 , changeRecordMeasureIfUsed mRace 234 , changeRecordMeasureIfUsed mEthnicity 235 , changeRecordMeasureIfUsed mGestationalAge 236 , changeRecordMeasureIfUsed mPregnancyTerm 237 , changeRecordMeasureIfUsed mBirthWeight 238 , changeRecordMeasureIfUsed mDisability 239 , changeRecordMeasureIfUsed mLanguage 240 , changeRecordMeasureIfUsed mCountry 241 , changeRecordMeasureIfUsed mState 242 , changeRecordMeasureIfUsed mSetting 243 ] 244 in 245 ParticipantRecordAction updatingRecord (catMaybes measureActions) 246 where 247 changeRecordMeasureIfUsed :: Maybe (Maybe MeasureDatum, Metric) -> Maybe MeasureUpdateAction 248 changeRecordMeasureIfUsed mValueMetric = do 249 (mVal, met) <- mValueMetric 250 pure (determineUpdatedMeasure mVal met) 251 determineUpdatedMeasure :: Maybe MeasureDatum -> Metric -> MeasureUpdateAction 252 determineUpdatedMeasure mVal met = 253 case updatingRecord of 254 Create -> 255 maybe (NoAction met) (Upsert met) mVal 256 Found _ -> do 257 -- TODO: 258 -- mOldVal <- getOldVal metric record 259 -- action = maybe (Upsert val) (\o -> if o == val then Unchanged else Upsert val) 260 let measureAction = maybe (Delete met) (Upsert met) mVal 261 measureAction 262 getFieldVal' :: (ParticipantRecord -> FieldUse a) -> Metric -> Maybe (Maybe MeasureDatum, Metric) 263 getFieldVal' = getFieldVal participantRecord 264 265 getFieldVal 266 :: ParticipantRecord 267 -> (ParticipantRecord -> FieldUse a) 268 -> Metric 269 -> Maybe (Maybe MeasureDatum, Metric) 270 getFieldVal participantRecord extractFieldVal metric = 271 case extractFieldVal participantRecord of 272 Field fieldVal _ -> pure (Just fieldVal, metric) 273 FieldEmpty -> pure (Nothing, metric) 274 FieldUnused -> Nothing 275 -- field isn't used by this volume, so don't need to save the measure 276 277 createOrUpdateRecord :: Volume -> ParticipantRecord -> Handler () -- TODO: error or record 278 createOrUpdateRecord vol participantRecord = do 279 let category = getCategory' (Id 1) -- TODO: use global variable 280 mIdVal = fst $ maybe (error "id missing") id (getFieldVal' prdId participantMetricId) 281 idVal = maybe (error "id empty") id mIdVal 282 mOldParticipant <- lookupVolumeParticipant vol idVal 283 let recordStatus = 284 case mOldParticipant of 285 Nothing -> Create 286 Just oldParticipant -> Found oldParticipant 287 -- print ("save measure id:", mId) 288 case buildParticipantRecordAction participantRecord recordStatus of 289 ParticipantRecordAction Create measureActs -> do 290 newParticipantShell <- addRecord (blankRecord category vol) -- blankParticipantRecord 291 _ <- mapM (runMeasureUpdate newParticipantShell) measureActs 292 pure () -- TODO: reload participant 293 ParticipantRecordAction (Found oldRecord) measureActs -> do 294 _ <- mapM (runMeasureUpdate oldRecord) measureActs 295 pure () 296 where 297 runMeasureUpdate :: Record -> MeasureUpdateAction -> Handler (Maybe Record) 298 runMeasureUpdate record act = 299 case act of 300 Upsert met val -> changeRecordMeasure (Measure record met val) 301 Delete met -> fmap Just (removeRecordMeasure (Measure record met "")) 302 Unchanged _ -> pure Nothing 303 NoAction _ -> pure Nothing 304 getFieldVal' :: (ParticipantRecord -> FieldUse a) -> Metric -> Maybe (Maybe MeasureDatum, Metric) 305 getFieldVal' = getFieldVal participantRecord