1 {-# LANGUAGE TemplateHaskell, QuasiQuotes, DataKinds, OverloadedStrings #-}
    2 module Databrary.Model.Ingest
    3   ( IngestKey
    4   , lookupIngestContainer
    5   , addIngestContainer
    6   , lookupIngestRecord
    7   , addIngestRecord
    8   , lookupIngestAsset
    9   , addIngestAsset
   10   , replaceSlotAsset
   11   , checkDetermineMapping
   12   , attemptParseRows
   13   , extractColumnsDistinctSampleJson
   14   , extractColumnsInitialJson
   15   , HeaderMappingEntry(..)
   16   , participantFieldMappingToJSON
   17   , parseParticipantFieldMapping
   18   -- for testing:
   19   , determineMapping
   20   ) where
   21 
   22 import Control.Monad (when)
   23 import qualified Data.ByteString as BS
   24 import qualified Data.Csv as Csv
   25 import Data.Csv hiding (Record)
   26 import qualified Data.List as L
   27 import qualified Data.Map as Map
   28 import qualified Data.Text as T
   29 import qualified Data.Text.Encoding as TE
   30 import Data.Text (Text)
   31 import Database.PostgreSQL.Typed.Query
   32 import Database.PostgreSQL.Typed.Types
   33 import qualified Data.ByteString
   34 import Data.ByteString (ByteString)
   35 import qualified Data.String
   36 import Data.Vector (Vector)
   37 
   38 import Data.Csv.Contrib (extractColumnsDistinctSample, decodeCsvByNameWith, extractColumnsInitialRows)
   39 import Databrary.Service.DB
   40 import qualified Databrary.JSON as JSON
   41 import Databrary.JSON (FromJSON(..), ToJSON(..))
   42 import Databrary.Model.Volume.Types
   43 import Databrary.Model.Container.Types
   44 import Databrary.Model.Metric.Types
   45 import Databrary.Model.Metric
   46 import qualified Databrary.Model.Record.SQL
   47 import Databrary.Model.Record.Types
   48 import Databrary.Model.Record (columnSampleJson)
   49 import Databrary.Model.Asset.Types
   50 import Databrary.Model.Asset.SQL
   51 
   52 type IngestKey = T.Text
   53 
   54 mapQuery :: ByteString -> ([PGValue] -> a) -> PGSimpleQuery a
   55 mapQuery qry mkResult =
   56   fmap mkResult (rawPGSimpleQuery qry)
   57 
   58 lookupIngestContainer :: MonadDB c m => Volume -> IngestKey -> m (Maybe Container)
   59 lookupIngestContainer vol k = do
   60   let _tenv_a6Dpp = unknownPGTypeEnv
   61   dbQuery1 $ fmap ($ vol) -- .(selectQuery selectVolumeContainer "JOIN ingest.container AS ingest USING (id, volume) WHERE ingest.key = ${k} AND container.volume = ${volumeId $ volumeRow vol}")
   62     (fmap
   63       (\ (vid_a6Dph, vtop_a6Dpi, vname_a6Dpj, vdate_a6Dpk,
   64           vrelease_a6Dpl)
   65          -> Container
   66               (ContainerRow vid_a6Dph vtop_a6Dpi vname_a6Dpj vdate_a6Dpk)
   67               vrelease_a6Dpl)
   68       (mapQuery
   69         ((\ _p_a6Dpq _p_a6Dpr -> 
   70                        (Data.ByteString.concat
   71                           [Data.String.fromString
   72                              "SELECT container.id,container.top,container.name,container.date,slot_release.release FROM container LEFT JOIN slot_release ON container.id = slot_release.container AND slot_release.segment = '(,)' JOIN ingest.container AS ingest USING (id, volume) WHERE ingest.key = ",
   73                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
   74                              _tenv_a6Dpp
   75                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
   76                                 Database.PostgreSQL.Typed.Types.PGTypeName "text")
   77                              _p_a6Dpq,
   78                            Data.String.fromString " AND container.volume = ",
   79                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
   80                              _tenv_a6Dpp
   81                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
   82                                 Database.PostgreSQL.Typed.Types.PGTypeName "integer")
   83                              _p_a6Dpr]))
   84          k (volumeId $ volumeRow vol))
   85                (\ [_cid_a6Dps,
   86                    _ctop_a6Dpt,
   87                    _cname_a6Dpu,
   88                    _cdate_a6Dpv,
   89                    _crelease_a6Dpw]
   90                   -> (Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
   91                         _tenv_a6Dpp
   92                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
   93                            Database.PostgreSQL.Typed.Types.PGTypeName "integer")
   94                         _cid_a6Dps, 
   95                       Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
   96                         _tenv_a6Dpp
   97                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
   98                            Database.PostgreSQL.Typed.Types.PGTypeName "boolean")
   99                         _ctop_a6Dpt, 
  100                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  101                         _tenv_a6Dpp
  102                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  103                            Database.PostgreSQL.Typed.Types.PGTypeName "text")
  104                         _cname_a6Dpu, 
  105                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  106                         _tenv_a6Dpp
  107                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  108                            Database.PostgreSQL.Typed.Types.PGTypeName "date")
  109                         _cdate_a6Dpv, 
  110                       Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  111                         _tenv_a6Dpp
  112                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  113                            Database.PostgreSQL.Typed.Types.PGTypeName "release")
  114                         _crelease_a6Dpw))))
  115 
  116 addIngestContainer :: MonadDB c m => Container -> IngestKey -> m ()
  117 addIngestContainer c k = do
  118   let _tenv_a6Dvh = unknownPGTypeEnv
  119   dbExecute1' -- [pgSQL|INSERT INTO ingest.container (id, volume, key) VALUES (${containerId $ containerRow c}, ${volumeId $ volumeRow $ containerVolume c}, ${k})|]
  120    (mapQuery
  121     ((\ _p_a6Dvi _p_a6Dvj _p_a6Dvk ->
  122                     (Data.ByteString.concat
  123                        [Data.String.fromString
  124                           "INSERT INTO ingest.container (id, volume, key) VALUES (",
  125                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  126                           _tenv_a6Dvh
  127                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  128                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  129                           _p_a6Dvi,
  130                         Data.String.fromString ", ",
  131                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  132                           _tenv_a6Dvh
  133                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  134                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  135                           _p_a6Dvj,
  136                         Data.String.fromString ", ",
  137                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  138                           _tenv_a6Dvh
  139                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  140                              Database.PostgreSQL.Typed.Types.PGTypeName "text")
  141                           _p_a6Dvk,
  142                         Data.String.fromString ")"]))
  143       (containerId $ containerRow c)
  144       (volumeId $ volumeRow $ containerVolume c)
  145       k)
  146             (\ [] -> ()))
  147 
  148 lookupIngestRecord :: MonadDB c m => Volume -> IngestKey -> m (Maybe Record)
  149 lookupIngestRecord vol k = do
  150   let _tenv_a6GtF = unknownPGTypeEnv
  151   dbQuery1 $ fmap ($ vol) -- .(selectQuery selectVolumeRecord "JOIN ingest.record AS ingest USING (id, volume) WHERE ingest.key = ${k} AND record.volume = ${volumeId $ volumeRow vol}")
  152     (fmap
  153       (\ (vid_a6GtB, vcategory_a6GtC, vmeasures_a6GtD, vc_a6GtE)
  154          -> ($)
  155               (Databrary.Model.Record.SQL.makeRecord
  156                  vid_a6GtB vcategory_a6GtC vmeasures_a6GtD)
  157               vc_a6GtE)
  158      (mapQuery
  159       ((\ _p_a6GtG _p_a6GtH ->
  160                        (Data.ByteString.concat
  161                           [Data.String.fromString
  162                              "SELECT record.id,record.category,record.measures,record_release(record.id) FROM record_measures AS record JOIN ingest.record AS ingest USING (id, volume) WHERE ingest.key = ",
  163                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
  164                              _tenv_a6GtF
  165                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  166                                 Database.PostgreSQL.Typed.Types.PGTypeName "text")
  167                              _p_a6GtG,
  168                            Data.String.fromString " AND record.volume = ",
  169                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
  170                              _tenv_a6GtF
  171                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  172                                 Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  173                              _p_a6GtH]))
  174          k (volumeId $ volumeRow vol))
  175                (\ [_cid_a6GtI,
  176                    _ccategory_a6GtJ,
  177                    _cmeasures_a6GtK,
  178                    _crecord_release_a6GtL]
  179                   -> (Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  180                         _tenv_a6GtF
  181                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  182                            Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  183                         _cid_a6GtI, 
  184                       Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  185                         _tenv_a6GtF
  186                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  187                            Database.PostgreSQL.Typed.Types.PGTypeName "smallint")
  188                         _ccategory_a6GtJ, 
  189                       Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  190                         _tenv_a6GtF
  191                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  192                            Database.PostgreSQL.Typed.Types.PGTypeName "text[]")
  193                         _cmeasures_a6GtK, 
  194                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  195                         _tenv_a6GtF
  196                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  197                            Database.PostgreSQL.Typed.Types.PGTypeName "release")
  198                         _crecord_release_a6GtL))))
  199 
  200 addIngestRecord :: MonadDB c m => Record -> IngestKey -> m ()
  201 addIngestRecord r k = do
  202   let _tenv_a6PCz = unknownPGTypeEnv
  203   dbExecute1' -- [pgSQL|INSERT INTO ingest.record (id, volume, key) VALUES (${recordId $ recordRow r}, ${volumeId $ volumeRow $ recordVolume r}, ${k})|]
  204    (mapQuery
  205     ((\ _p_a6PCA _p_a6PCB _p_a6PCC ->
  206                     (Data.ByteString.concat
  207                        [Data.String.fromString
  208                           "INSERT INTO ingest.record (id, volume, key) VALUES (",
  209                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  210                           _tenv_a6PCz
  211                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  212                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  213                           _p_a6PCA,
  214                         Data.String.fromString ", ",
  215                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  216                           _tenv_a6PCz
  217                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  218                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  219                           _p_a6PCB,
  220                         Data.String.fromString ", ",
  221                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  222                           _tenv_a6PCz
  223                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  224                              Database.PostgreSQL.Typed.Types.PGTypeName "text")
  225                           _p_a6PCC,
  226                         Data.String.fromString ")"]))
  227       (recordId $ recordRow r) (volumeId $ volumeRow $ recordVolume r) k)
  228             (\ [] -> ()))
  229 
  230 lookupIngestAsset :: MonadDB c m => Volume -> FilePath -> m (Maybe Asset)
  231 lookupIngestAsset vol k = do
  232   let _tenv_a6PDv = unknownPGTypeEnv
  233   dbQuery1 $ fmap (`Asset` vol) -- .(selectQuery selectAssetRow "JOIN ingest.asset AS ingest USING (id) WHERE ingest.file = ${k} AND asset.volume = ${volumeId $ volumeRow vol}")
  234     (fmap
  235       (\ (vid_a6PDo, vformat_a6PDp, vrelease_a6PDq, vduration_a6PDr,
  236           vname_a6PDs, vc_a6PDt, vsize_a6PDu)
  237          -> Databrary.Model.Asset.SQL.makeAssetRow
  238               vid_a6PDo
  239               vformat_a6PDp
  240               vrelease_a6PDq
  241               vduration_a6PDr
  242               vname_a6PDs
  243               vc_a6PDt
  244               vsize_a6PDu)
  245      (mapQuery
  246       ((\ _p_a6PDw _p_a6PDx ->
  247                        (Data.ByteString.concat
  248                           [Data.String.fromString
  249                              "SELECT asset.id,asset.format,asset.release,asset.duration,asset.name,asset.sha1,asset.size FROM asset JOIN ingest.asset AS ingest USING (id) WHERE ingest.file = ",
  250                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
  251                              _tenv_a6PDv
  252                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  253                                 Database.PostgreSQL.Typed.Types.PGTypeName "text")
  254                              _p_a6PDw,
  255                            Data.String.fromString " AND asset.volume = ",
  256                            Database.PostgreSQL.Typed.Types.pgEscapeParameter
  257                              _tenv_a6PDv
  258                              (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  259                                 Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  260                              _p_a6PDx]))
  261          k (volumeId $ volumeRow vol))
  262                (\ [_cid_a6PDy,
  263                    _cformat_a6PDz,
  264                    _crelease_a6PDA,
  265                    _cduration_a6PDB,
  266                    _cname_a6PDC,
  267                    _csha1_a6PDD,
  268                    _csize_a6PDE]
  269                   -> (Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  270                         _tenv_a6PDv
  271                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  272                            Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  273                         _cid_a6PDy, 
  274                       Database.PostgreSQL.Typed.Types.pgDecodeColumnNotNull
  275                         _tenv_a6PDv
  276                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  277                            Database.PostgreSQL.Typed.Types.PGTypeName "smallint")
  278                         _cformat_a6PDz, 
  279                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  280                         _tenv_a6PDv
  281                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  282                            Database.PostgreSQL.Typed.Types.PGTypeName "release")
  283                         _crelease_a6PDA, 
  284                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  285                         _tenv_a6PDv
  286                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  287                            Database.PostgreSQL.Typed.Types.PGTypeName "interval")
  288                         _cduration_a6PDB, 
  289                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  290                         _tenv_a6PDv
  291                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  292                            Database.PostgreSQL.Typed.Types.PGTypeName "text")
  293                         _cname_a6PDC, 
  294                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  295                         _tenv_a6PDv
  296                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  297                            Database.PostgreSQL.Typed.Types.PGTypeName "bytea")
  298                         _csha1_a6PDD, 
  299                       Database.PostgreSQL.Typed.Types.pgDecodeColumn
  300                         _tenv_a6PDv
  301                         (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  302                            Database.PostgreSQL.Typed.Types.PGTypeName "bigint")
  303                         _csize_a6PDE))))
  304 
  305 addIngestAsset :: MonadDB c m => Asset -> FilePath -> m ()
  306 addIngestAsset r k = do
  307   let _tenv_a6PFc = unknownPGTypeEnv
  308   dbExecute1' -- [pgSQL|INSERT INTO ingest.asset (id, file) VALUES (${assetId $ assetRow r}, ${k})|]
  309    (mapQuery
  310     ((\ _p_a6PFd _p_a6PFe ->
  311                     (Data.ByteString.concat
  312                        [Data.String.fromString
  313                           "INSERT INTO ingest.asset (id, file) VALUES (",
  314                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  315                           _tenv_a6PFc
  316                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  317                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  318                           _p_a6PFd,
  319                         Data.String.fromString ", ",
  320                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  321                           _tenv_a6PFc
  322                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  323                              Database.PostgreSQL.Typed.Types.PGTypeName "text")
  324                           _p_a6PFe,
  325                         Data.String.fromString ")"]))
  326       (assetId $ assetRow r) k)
  327             (\ [] -> ()))
  328 
  329 replaceSlotAsset :: MonadDB c m => Asset -> Asset -> m Bool
  330 replaceSlotAsset o n = do
  331   let _tenv_a6PFB = unknownPGTypeEnv
  332   dbExecute1 -- [pgSQL|UPDATE slot_asset SET asset = ${assetId $ assetRow n} WHERE asset = ${assetId $ assetRow o}|]
  333    (mapQuery
  334     ((\ _p_a6PFC _p_a6PFD ->
  335                     (Data.ByteString.concat
  336                        [Data.String.fromString "UPDATE slot_asset SET asset = ",
  337                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  338                           _tenv_a6PFB
  339                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  340                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  341                           _p_a6PFC,
  342                         Data.String.fromString " WHERE asset = ",
  343                         Database.PostgreSQL.Typed.Types.pgEscapeParameter
  344                           _tenv_a6PFB
  345                           (Database.PostgreSQL.Typed.Types.PGTypeProxy ::
  346                              Database.PostgreSQL.Typed.Types.PGTypeName "integer")
  347                           _p_a6PFD]))
  348       (assetId $ assetRow n) (assetId $ assetRow o))
  349             (\ [] -> ()))
  350 
  351 checkDetermineMapping :: [Metric] -> [Text] -> BS.ByteString -> Either String ParticipantFieldMapping2
  352 checkDetermineMapping participantActiveMetrics csvHeaders csvContents = do
  353     -- return skipped columns or not?
  354     mpng <- determineMapping participantActiveMetrics csvHeaders
  355     _ <- attemptParseRows mpng csvContents
  356     pure mpng
  357 
  358 attemptParseRows
  359     :: ParticipantFieldMapping2 -> BS.ByteString -> Either String (Csv.Header, Vector ParticipantRecord)
  360 attemptParseRows participantFieldMapping contents =
  361     decodeCsvByNameWith (participantRecordParseNamedRecord participantFieldMapping) contents
  362 
  363 participantRecordParseNamedRecord :: ParticipantFieldMapping2 -> Csv.NamedRecord -> Parser ParticipantRecord
  364 participantRecordParseNamedRecord fieldMap m = do
  365     mId <- extractIfUsed2 (lookupField participantMetricId) validateParticipantId
  366     mInfo <- extractIfUsed2 (lookupField participantMetricInfo) validateParticipantInfo
  367     mDescription <- extractIfUsed2 (lookupField participantMetricDescription) validateParticipantDescription
  368     mBirthdate <- extractIfUsed2 (lookupField participantMetricBirthdate) validateParticipantBirthdate
  369     mGender <- extractIfUsed2 (lookupField participantMetricGender) validateParticipantGender
  370     mRace <- extractIfUsed2 (lookupField participantMetricRace) validateParticipantRace
  371     mEthnicity <- extractIfUsed2 (lookupField participantMetricEthnicity) validateParticipantEthnicity
  372     mGestationalAge <- extractIfUsed2 (lookupField participantMetricGestationalAge) validateParticipantGestationalAge
  373     mPregnancyTerm <- extractIfUsed2 (lookupField participantMetricPregnancyTerm) validateParticipantPregnancyTerm
  374     mBirthWeight <- extractIfUsed2 (lookupField participantMetricBirthWeight) validateParticipantBirthWeight
  375     mDisability <- extractIfUsed2 (lookupField participantMetricDisability) validateParticipantDisability
  376     mLanguage <- extractIfUsed2 (lookupField participantMetricLanguage) validateParticipantLanguage
  377     mCountry <- extractIfUsed2 (lookupField participantMetricCountry) validateParticipantCountry
  378     mState <- extractIfUsed2 (lookupField participantMetricState) validateParticipantState
  379     mSetting <- extractIfUsed2 (lookupField participantMetricSetting) validateParticipantSetting
  380     pure
  381         (ParticipantRecord
  382             { prdId = mId
  383             , prdInfo = mInfo
  384             , prdDescription = mDescription
  385             , prdBirthdate = mBirthdate
  386             , prdGender = mGender
  387             , prdRace = mRace
  388             , prdEthnicity = mEthnicity
  389             , prdGestationalAge = mGestationalAge
  390             , prdPregnancyTerm = mPregnancyTerm
  391             , prdBirthWeight = mBirthWeight
  392             , prdDisability = mDisability
  393             , prdLanguage = mLanguage
  394             , prdCountry = mCountry
  395             , prdState = mState
  396             , prdSetting = mSetting
  397             } )
  398   where
  399     extractIfUsed2
  400       :: (ParticipantFieldMapping2 -> Maybe Text)
  401       -> (BS.ByteString -> Maybe (Maybe a))
  402       -> Parser (FieldUse a)
  403     extractIfUsed2 maybeGetField validateValue = do
  404         case maybeGetField fieldMap of
  405             Just colName -> do
  406                 contents <- m .: (TE.encodeUtf8 colName)
  407                 maybe
  408                     (fail ("invalid value for " ++ show colName ++ ", found " ++ show contents))
  409                     (\mV -> pure (maybe FieldEmpty (Field contents) mV))
  410                     (validateValue contents)
  411             Nothing -> pure FieldUnused
  412     
  413 
  414 -- verify that all expected columns are present, with some leniency in matching
  415 -- left if no match possible
  416 determineMapping :: [Metric] -> [Text] -> Either String ParticipantFieldMapping2
  417 determineMapping participantActiveMetrics csvHeaders = do
  418     (columnMatches :: [Text]) <- traverse (detectMetricMatch csvHeaders) participantActiveMetrics
  419     mkParticipantFieldMapping2 (zip participantActiveMetrics columnMatches)
  420   where
  421     detectMetricMatch :: [Text] -> Metric -> Either String Text
  422     detectMetricMatch hdrs metric =
  423         case L.find (\h -> columnMetricCompatible h metric) hdrs of
  424             Just hdr -> Right hdr
  425             Nothing -> Left ("no compatible header found for metric: " ++ (show . metricName) metric)
  426 
  427 columnMetricCompatible :: Text -> Metric -> Bool
  428 columnMetricCompatible hdr metric =
  429     (T.filter (/= ' ') . T.toLower . metricName) metric == T.toLower hdr
  430 
  431 extractColumnsDistinctSampleJson :: Int -> Csv.Header -> Vector Csv.NamedRecord -> [JSON.Value]
  432 extractColumnsDistinctSampleJson maxSamples hdrs records =
  433     ( fmap (\(colHdr, vals) -> columnSampleJson colHdr vals)
  434     . extractColumnsDistinctSample maxSamples hdrs)
  435     records
  436 
  437 extractColumnsInitialJson :: Int -> Csv.Header -> Vector Csv.NamedRecord -> [JSON.Value]
  438 extractColumnsInitialJson maxRows hdrs records =
  439     ( fmap (\(colHdr, vals) -> columnSampleJson colHdr vals)
  440     . extractColumnsInitialRows maxRows hdrs )
  441     records
  442 
  443 data HeaderMappingEntry =
  444     HeaderMappingEntry {
  445           hmeCsvField :: Text
  446         , hmeMetric :: Metric -- only participant metrics
  447     } deriving ({- Show, -} Eq) -- , Ord)
  448 
  449 instance FromJSON HeaderMappingEntry where
  450     parseJSON =
  451         JSON.withObject "HeaderMappingEntry"
  452             (\o -> do
  453                  metricCanonicalName <- o JSON..: "metric"
  454                  case lookupParticipantMetricBySymbolicName metricCanonicalName of
  455                      Just metric ->
  456                          HeaderMappingEntry
  457                              <$> o JSON..: "csv_field"
  458                              <*> pure metric
  459                      Nothing ->
  460                          fail ("metric name does not match any participant metric: " ++ show metricCanonicalName))
  461 
  462 participantFieldMappingToJSON :: ParticipantFieldMapping2 -> JSON.Value
  463 participantFieldMappingToJSON fldMap =
  464     -- didn't use tojson to avoid orphan warning. didn't move tojson to metric.types because of circular ref to metric instances
  465     (toJSON . fmap fieldToEntry . Map.toList . pfmGetMapping) fldMap
  466   where
  467     fieldToEntry :: (Metric, Text) -> JSON.Value
  468     fieldToEntry (metric, colName) =
  469         (JSON.object
  470             [ "metric" JSON..= (T.filter (/= ' ') . T.toLower . metricName) metric -- TODO: use shared function
  471             , "compatible_csv_fields" JSON..= [colName] -- change to single value soon
  472             ])
  473 
  474 parseParticipantFieldMapping :: [Metric] -> [(Metric, Text)] -> Either String ParticipantFieldMapping2
  475 parseParticipantFieldMapping volParticipantActiveMetrics requestedMapping = do
  476     when (   length volParticipantActiveMetrics /= length requestedMapping
  477           || L.sort volParticipantActiveMetrics /= (L.sort . fmap fst) requestedMapping)
  478         (Left "The requested metric mapping does not completely match the required volume metrics")
  479     mkParticipantFieldMapping2 requestedMapping