1 {-# LANGUAGE OverloadedStrings, RecordWildCards, GeneralizedNewtypeDeriving, ScopedTypeVariables, TemplateHaskell #-}
    2 module Databrary.Solr.Index
    3   ( updateIndex
    4   ) where
    5 
    6 import Control.Exception.Lifted (handle)
    7 import Control.Monad.IO.Class (liftIO)
    8 import Control.Monad.Reader (ReaderT(..), ask)
    9 import Control.Monad.Trans.Class (lift)
   10 import qualified Data.Aeson as JSON
   11 import qualified Data.Aeson.Encoding as JSON
   12 import qualified Data.ByteString as BS
   13 import qualified Data.ByteString.Builder as BSB
   14 import qualified Data.ByteString.Char8 as BSC
   15 import qualified Data.ByteString.Lazy as BSL
   16 import Data.Foldable (fold)
   17 import Data.Maybe (isNothing)
   18 import Data.Monoid ((<>))
   19 import Data.Time.Clock (getCurrentTime, diffUTCTime)
   20 import qualified Network.HTTP.Client as HC
   21 import Network.HTTP.Types.Method (methodPost)
   22 import Network.HTTP.Types.Header (hContentType)
   23 
   24 import Control.Invert
   25 import Databrary.Ops
   26 import Databrary.Has
   27 import Databrary.Service.Log
   28 import Databrary.Model.Segment
   29 import Databrary.Model.Kind
   30 import Databrary.Model.Id.Types
   31 import Databrary.Model.Permission.Types
   32 import Databrary.Model.Party
   33 import Databrary.Model.Volume.Types
   34 import Databrary.Model.Citation
   35 import Databrary.Model.Container
   36 import Databrary.Model.Slot.Types
   37 import Databrary.Model.Format.Types
   38 import Databrary.Model.Asset.Types
   39 import Databrary.Model.AssetSlot
   40 import Databrary.Model.AssetSegment.Types
   41 import Databrary.Model.Excerpt
   42 import Databrary.Model.Category.Types
   43 import Databrary.Model.Record.Types
   44 import Databrary.Model.RecordSlot
   45 import Databrary.Model.Measure
   46 import Databrary.Model.Tag
   47 import Databrary.Model.Comment
   48 import Databrary.Context
   49 import Databrary.Solr.Service
   50 import Databrary.Solr.Document
   51 
   52 solrDocId :: forall a . (Kinded a, Show (Id a)) => Id a -> BS.ByteString
   53 solrDocId i = kindOf (undefined :: a) <> BSC.pack ('_' : show i)
   54 
   55 solrParty :: Party -> Maybe Permission -> SolrDocument
   56 solrParty Party{ partyRow = PartyRow{..}, ..} auth = SolrParty
   57   { solrId = solrDocId partyId
   58   , solrPartyId = partyId
   59   , solrPartySortName = partySortName
   60   , solrPartyPreName = partyPreName
   61   , solrPartyAffiliation = partyAffiliation
   62   , solrPartyIsInstitution = isNothing partyAccount
   63   , solrPartyAuthorization = auth
   64   }
   65 
   66 solrVolume :: Volume -> Maybe Citation -> SolrDocument
   67 solrVolume Volume{ volumeRow = VolumeRow{..}, ..} cite = SolrVolume
   68   { solrId = solrDocId volumeId
   69   , solrVolumeId = volumeId
   70   , solrName = Just volumeName
   71   , solrBody = volumeBody
   72   , solrVolumeOwnerIds = ownerIds
   73   , solrVolumeOwnerNames = ownerNames
   74   , solrCitation = citationHead <$> cite
   75   , solrCitationYear = citationYear =<< cite
   76   } where
   77   (ownerIds, ownerNames) = unzip volumeOwners
   78 
   79 solrContainer :: Container -> SolrDocument
   80 solrContainer c@Container{ containerRow = ContainerRow{..}, ..} = SolrContainer
   81   { solrId = solrDocId containerId
   82   , solrContainerId = containerId
   83   , solrVolumeId = volumeId $ volumeRow containerVolume
   84   , solrName = containerName
   85   , solrContainerTop = containerTop
   86   , solrContainerDate = getContainerDate c
   87   , solrRelease = containerRelease
   88   }
   89 
   90 solrAsset :: AssetSlot -> SolrDocument
   91 solrAsset as@AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{..}) } = SolrAsset
   92   { solrId = solrDocId assetId
   93   , solrAssetId = assetId
   94   , solrVolumeId = volumeId $ volumeRow assetVolume
   95   , solrContainerId = containerId $ containerRow slotContainer
   96   , solrSegment = SolrSegment slotSegment
   97   , solrSegmentDuration = segmentLength slotSegment
   98   , solrName = assetSlotName as
   99   , solrRelease = assetRelease
  100   , solrFormatId = formatId assetFormat
  101   }
  102 
  103 solrExcerpt :: Excerpt -> SolrDocument
  104 solrExcerpt Excerpt{ excerptAsset = AssetSegment{ segmentAsset = AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{ slotContainer = container }) }, assetSegment = seg }, ..} = SolrExcerpt
  105   { solrId = BSC.pack $ "excerpt_" <> show assetId
  106     <> maybe "" (('_':) . show) (lowerBound $ segmentRange seg)
  107   , solrAssetId = assetId
  108   , solrVolumeId = volumeId $ volumeRow assetVolume
  109   , solrContainerId = containerId $ containerRow container
  110   , solrSegment = SolrSegment seg
  111   , solrSegmentDuration = segmentLength seg
  112   , solrRelease = assetRelease
  113   }
  114 
  115 solrRecord :: RecordSlot -> SolrDocument
  116 solrRecord rs@RecordSlot{ slotRecord = r@Record{ recordRow = RecordRow{..}, ..}, recordSlot = Slot{..} } = SolrRecord
  117   { solrId = solrDocId recordId
  118     <> BSC.pack ('_' : show (containerId $ containerRow slotContainer))
  119   , solrRecordId = recordId
  120   , solrVolumeId = volumeId $ volumeRow recordVolume
  121   , solrContainerId = containerId $ containerRow slotContainer
  122   , solrSegment = SolrSegment slotSegment
  123   , solrSegmentDuration = segmentLength slotSegment
  124   , solrRecordCategoryId = categoryId recordCategory
  125   , solrRecordMeasures = SolrRecordMeasures $ map (\m -> (measureMetric m, measureDatum m)) $ getRecordMeasures r
  126   , solrRecordAge = recordSlotAge rs
  127   }
  128 
  129 solrTag :: Tag -> SolrDocument
  130 solrTag Tag{..} = SolrTagId
  131   { solrId = BSC.pack $ "tag_" <> show tagId
  132   , solrTagId = tagId
  133   , solrTagName = tagName
  134   }
  135 
  136 solrTagUse :: Id Volume -> TagUseRow -> SolrDocument
  137 solrTagUse vi TagUseRow{ useTagRow = Tag{..}, tagRowSlotId = SlotId{..}, ..} = SolrTag
  138   { solrId = BSC.pack $ "tag_" <> show tagId
  139     <> ('_' : show slotContainerId)
  140     <> (if tagRowKeyword then "" else '_' : show tagRowWhoId)
  141     <> maybe "" (('_':) . show) (lowerBound $ segmentRange slotSegmentId)
  142   , solrVolumeId = vi
  143   , solrContainerId = slotContainerId
  144   , solrSegment = SolrSegment slotSegmentId
  145   , solrSegmentDuration = segmentLength slotSegmentId
  146   , solrTagId = tagId
  147   , solrTagName = tagName
  148   , solrKeyword = tagRowKeyword `thenUse` tagName
  149   , solrPartyId = tagRowWhoId
  150   }
  151 
  152 solrComment :: Id Volume -> CommentRow -> SolrDocument
  153 solrComment vi CommentRow{ commentRowSlotId = SlotId{..}, ..} = SolrComment
  154   { solrId = BSC.pack $ "comment_" <> show commentRowId
  155   , solrVolumeId = vi
  156   , solrContainerId = slotContainerId
  157   , solrSegment = SolrSegment slotSegmentId
  158   , solrSegmentDuration = segmentLength slotSegmentId
  159   , solrCommentId = commentRowId
  160   , solrPartyId = commentRowWhoId
  161   , solrBody = Just commentRowText
  162   }
  163 
  164 type SolrM a = ReaderT BackgroundContext (InvertM BS.ByteString) a
  165 
  166 writeBlock :: BS.ByteString -> SolrM ()
  167 writeBlock = lift . give
  168 
  169 writeDocuments :: [SolrDocument] -> SolrM ()
  170 writeDocuments [] = return ()
  171 writeDocuments d =
  172   writeBlock $ BSL.toStrict $ BSB.toLazyByteString $ foldMap (("},\"add\":{\"doc\":" <>) . JSON.fromEncoding . JSON.value . JSON.toJSON) d
  173 
  174 writeUpdate :: SolrM () -> SolrM ()
  175 writeUpdate f = do
  176   writeBlock "{\"delete\":{\"query\":\"*:*\""
  177   f
  178   writeBlock "},\"commit\":{\"waitSearcher\":true,\"expungeDeletes\":true},\"optimize\":{\"waitSearcher\":true}}"
  179 
  180 joinContainers :: (a -> Slot -> b) -> [Container] -> [(a, SlotId)] -> [b]
  181 joinContainers _ _ [] = []
  182 joinContainers _ [] _ = error "joinContainers"
  183 joinContainers f cl@(c:cr) al@((a, SlotId ci s):ar)
  184   | containerId (containerRow c) == ci = f a (Slot c s) : joinContainers f cl ar
  185   | otherwise = joinContainers f cr al
  186 
  187 writeVolume :: (Volume, Maybe Citation) -> SolrM ()
  188 writeVolume (v, vc) = do
  189   writeDocuments [solrVolume v vc]
  190   cl <- lookupVolumeContainers v
  191   writeDocuments $ map solrContainer cl
  192   writeDocuments . map solrAsset . joinContainers ((. Just) . AssetSlot) cl =<< lookupVolumeAssetSlotIds v
  193   -- this could be more efficient, but there usually aren't many:
  194   writeDocuments . map solrExcerpt =<< lookupVolumeExcerpts v
  195   writeDocuments . map solrRecord . joinContainers RecordSlot cl =<< lookupVolumeRecordSlotIds v
  196   writeDocuments . map (solrTagUse (volumeId $ volumeRow v)) =<< lookupVolumeTagUseRows v
  197   writeDocuments . map (solrComment (volumeId $ volumeRow v)) =<< lookupVolumeCommentRows v
  198 
  199 writeAllDocuments :: SolrM ()
  200 writeAllDocuments = do
  201   mapM_ writeVolume =<< lookupVolumesCitations
  202   writeDocuments . map (uncurry solrParty) =<< lookupPartyAuthorizations
  203   writeDocuments . map solrTag =<< lookupTags
  204 
  205 updateIndex :: BackgroundContextM ()
  206 updateIndex = do
  207   ctx <- ask
  208   req <- peeks solrRequest
  209   t <- liftIO getCurrentTime
  210   handle
  211     (\(e :: HC.HttpException) -> focusIO $ logMsg t ("solr update failed: " ++ show e))
  212     $ do
  213       _ <- focusIO $ HC.httpNoBody req
  214         { HC.path = HC.path req <> "update/json"
  215         , HC.method = methodPost
  216         , HC.requestBody = HC.RequestBodyStreamChunked $ \wf -> do
  217           w <- runInvert $ runReaderT (writeUpdate writeAllDocuments) ctx
  218           wf $ fold <$> w
  219         , HC.requestHeaders = (hContentType, "application/json") : HC.requestHeaders req
  220         , HC.responseTimeout = HC.responseTimeoutMicro 100000000
  221         }
  222       t' <- liftIO getCurrentTime
  223       focusIO $ logMsg t' ("solr update complete " ++ show (diffUTCTime t' t))