1 {-# LANGUAGE GeneralizedNewtypeDeriving, TemplateHaskell, OverloadedStrings #-}
    2 module Databrary.Solr.Document
    3   ( SolrDocument(..)
    4   , SolrRecordMeasures(..)
    5   , SolrSegment(..)
    6   , metricField
    7   ) where
    8 
    9 import qualified Data.Aeson as JSON
   10 import qualified Data.Aeson.TH as JTH
   11 import qualified Data.ByteString as BS
   12 import Data.Char (isAlphaNum)
   13 import Data.Int (Int16)
   14 import qualified Data.HashMap.Strict as HM
   15 import Data.Monoid ((<>))
   16 import qualified Data.Text as T
   17 
   18 import Databrary.Model.Id.Types
   19 import Databrary.Model.Permission.Types
   20 import Databrary.Model.Release.Types
   21 import Databrary.Model.Party.Types
   22 import Databrary.Model.Volume.Types
   23 import Databrary.Model.Container.Types
   24 import Databrary.Model.Offset
   25 import Databrary.Model.Segment
   26 import Databrary.Model.Format.Types
   27 import Databrary.Model.Asset.Types
   28 import Databrary.Model.Time
   29 import Databrary.Model.Age
   30 import Databrary.Model.Record.Types
   31 import Databrary.Model.Category.Types
   32 import Databrary.Model.Metric
   33 import Databrary.Model.Tag.Types
   34 import Databrary.Model.Comment.Types
   35 import Databrary.StringUtil
   36 
   37 safeField :: T.Text -> T.Text
   38 safeField = T.map safeChar where
   39   safeChar c
   40     | isAlphaNum c = c
   41     | otherwise = '_'
   42 
   43 newtype SolrRecordMeasures = SolrRecordMeasures [(Metric, MeasureDatum)]
   44 
   45 metricLabel :: Metric -> T.Text
   46 metricLabel Metric{ metricType = MeasureTypeText, metricOptions = _:_ } = "enum"
   47 metricLabel m@Metric{ metricType = MeasureTypeText }
   48   | metricLong m = "long"
   49   | otherwise = "text"
   50 metricLabel Metric{ metricType = MeasureTypeNumeric } = "numeric"
   51 metricLabel Metric{ metricType = MeasureTypeDate } = "date"
   52 metricLabel Metric{ metricType = MeasureTypeVoid } = "void"
   53 
   54 metricField :: Metric -> T.Text
   55 metricField m = "record_" <> metricLabel m <> ('_' `T.cons` safeField (metricName m))
   56 
   57 -- slight hack because we actually index dates as datetimes
   58 metricDatum :: Metric -> MeasureDatum -> JSON.Value
   59 metricDatum Metric{ metricType = MeasureTypeDate } d = JSON.toJSON $ d <> "T12:00:00Z"
   60 metricDatum Metric{ metricType = MeasureTypeVoid } _ = JSON.toJSON True
   61 metricDatum _ d = JSON.toJSON d
   62 
   63 measureKeyValue :: JSON.KeyValue kv => (Metric, MeasureDatum) -> kv
   64 measureKeyValue (m, d) = metricField m JSON..= metricDatum m d
   65 
   66 instance JSON.ToJSON SolrRecordMeasures where
   67   toJSON (SolrRecordMeasures ms) =
   68     JSON.object $ map measureKeyValue ms
   69   toEncoding (SolrRecordMeasures ms) =
   70     JSON.pairs $ foldr ((<>) . measureKeyValue) mempty ms
   71 
   72 newtype SolrSegment = SolrSegment Segment deriving (JSON.FromJSON)
   73 
   74 instance Show SolrSegment where
   75   showsPrec _ (SolrSegment s) = showSegmentWith (shows . offsetMillis) s
   76 
   77 instance JSON.ToJSON SolrSegment where
   78   toJSON s = JSON.String $ T.pack $ show s
   79   toEncoding s = JSON.toEncoding $ show s
   80 
   81 data SolrDocument
   82   = SolrParty
   83     { solrId :: !BS.ByteString
   84     , solrPartyId :: Id Party
   85     , solrPartySortName :: T.Text
   86     , solrPartyPreName :: Maybe T.Text
   87     , solrPartyAffiliation :: Maybe T.Text
   88     , solrPartyIsInstitution :: Bool
   89     , solrPartyAuthorization :: Maybe Permission
   90     }
   91   | SolrVolume
   92     { solrId :: !BS.ByteString
   93     , solrVolumeId :: Id Volume
   94     , solrName :: Maybe T.Text
   95     , solrBody :: Maybe T.Text -- body
   96     , solrVolumeOwnerIds :: [Id Party]
   97     , solrVolumeOwnerNames :: [T.Text]
   98     , solrCitation :: Maybe T.Text
   99     , solrCitationYear :: Maybe Int16
  100     }
  101   | SolrContainer
  102     { solrId :: !BS.ByteString
  103     , solrVolumeId :: Id Volume
  104     , solrContainerId :: Id Container
  105     , solrName :: Maybe T.Text
  106     , solrContainerTop :: Bool
  107     , solrContainerDate :: Maybe MaskedDate
  108     , solrRelease :: Maybe Release
  109     }
  110   | SolrAsset -- Slot
  111     { solrId :: !BS.ByteString
  112     , solrVolumeId :: Id Volume
  113     , solrContainerId :: Id Container
  114     , solrSegment :: SolrSegment
  115     , solrSegmentDuration :: Maybe Offset
  116     , solrAssetId :: Id Asset
  117     , solrName :: Maybe T.Text
  118     , solrFormatId :: Id Format
  119     , solrRelease :: Maybe Release
  120     }
  121   | SolrExcerpt
  122     { solrId :: !BS.ByteString
  123     , solrVolumeId :: Id Volume
  124     , solrContainerId :: Id Container
  125     , solrSegment :: SolrSegment
  126     , solrSegmentDuration :: Maybe Offset
  127     , solrAssetId :: Id Asset
  128     , solrRelease :: Maybe Release
  129     }
  130   | SolrRecord -- Slot
  131     { solrId :: !BS.ByteString
  132     , solrVolumeId :: Id Volume
  133     , solrContainerId :: Id Container
  134     , solrSegment :: SolrSegment
  135     , solrSegmentDuration :: Maybe Offset
  136     , solrRecordId :: Id Record
  137     , solrRecordCategoryId :: Id Category
  138     , solrRecordMeasures :: SolrRecordMeasures
  139     , solrRecordAge :: Maybe Age
  140     }
  141   | SolrTagId
  142     { solrId :: !BS.ByteString
  143     , solrTagId :: Id Tag
  144     , solrTagName :: TagName
  145     }
  146   | SolrTag -- Use
  147     { solrId :: !BS.ByteString
  148     , solrVolumeId :: Id Volume
  149     , solrContainerId :: Id Container
  150     , solrSegment :: SolrSegment
  151     , solrSegmentDuration :: Maybe Offset
  152     , solrTagId :: Id Tag
  153     , solrTagName :: TagName
  154     , solrKeyword :: Maybe TagName
  155     , solrPartyId :: Id Party
  156     }
  157   | SolrComment
  158     { solrId :: !BS.ByteString
  159     , solrVolumeId :: Id Volume
  160     , solrContainerId :: Id Container
  161     , solrSegment :: SolrSegment
  162     , solrSegmentDuration :: Maybe Offset
  163     , solrCommentId :: Id Comment
  164     , solrPartyId :: Id Party
  165     , solrBody :: Maybe T.Text
  166     }
  167 
  168 $(return []) -- force new decl group for splice:
  169 
  170 solrToJSON :: SolrDocument -> JSON.Value
  171 solrToJSON =
  172   $(JTH.mkToJSON JTH.defaultOptions
  173     { JTH.fieldLabelModifier = \('s':'o':'l':'r':s) -> fromCamel s
  174     , JTH.constructorTagModifier = \('S':'o':'l':'r':s) -> fromCamel s
  175     , JTH.omitNothingFields = True
  176     , JTH.sumEncoding = JTH.TaggedObject
  177       { JTH.tagFieldName = "content_type"
  178       , JTH.contentsFieldName = error "solrToJSON: contentsFieldName"
  179       }
  180     } ''SolrDocument)
  181 
  182 fixToJSON :: SolrDocument -> JSON.Value -> JSON.Value
  183 fixToJSON SolrRecord{} (JSON.Object o) = JSON.Object $
  184   maybe o (HM.union $ HM.delete k o) $ do
  185     JSON.Object m <- HM.lookup k o
  186     return m
  187   where k = "record_measures"
  188 fixToJSON _ j = j
  189 
  190 instance JSON.ToJSON SolrDocument where
  191   toJSON s = fixToJSON s $ solrToJSON s
  192   -- TODO: toEncoding (and cleanup fixToJSON)