1 {-# LANGUAGE OverloadedStrings, RecordWildCards,
    2   ScopedTypeVariables #-}module Solr.Index
    3   ( updateIndex
    4   ) where
    5 
    6 import Control.Exception.Lifted (handle)
    7 import Control.Monad.IO.Class (liftIO)
    8 import Control.Monad.Reader (ReaderT(..), ask)
    9 import Control.Monad.Trans.Class (lift)
   10 import qualified Data.Aeson as JSON
   11 import qualified Data.Aeson.Encoding as JSON
   12 import qualified Data.ByteString as BS
   13 import qualified Data.ByteString.Builder as BSB
   14 import qualified Data.ByteString.Char8 as BSC
   15 import qualified Data.ByteString.Lazy as BSL
   16 import Data.Foldable (fold)
   17 import Data.Maybe (isNothing)
   18 import Data.Monoid ((<>))
   19 import Data.Time.Clock (getCurrentTime, diffUTCTime)
   20 import qualified Network.HTTP.Client as HC
   21 import Network.HTTP.Types.Method (methodPost)
   22 import Network.HTTP.Types.Header (hContentType)
   23 
   24 import Control.Invert
   25 import Ops
   26 import Has
   27 import HTTP.Client (HTTPClient)
   28 import Service.Log
   29 import Model.Segment
   30 import Model.Kind
   31 import Model.Id.Types
   32 import Model.Permission.Types
   33 import Model.Party
   34 import Model.Volume.Types
   35 import Model.Citation
   36 import Model.Container
   37 import Model.Slot.Types
   38 import Model.Format.Types
   39 import Model.Asset.Types
   40 import Model.AssetSlot
   41 import Model.AssetSegment.Types
   42 import Model.Excerpt
   43 import Model.Category.Types
   44 import Model.Record.Types
   45 import Model.RecordSlot
   46 import Model.Measure
   47 import Model.Tag
   48 import Model.Comment
   49 import Context
   50 import Solr.Service
   51 import Solr.Document
   52 
   53 solrDocId :: forall a . (Kinded a, Show (Id a)) => Id a -> BS.ByteString
   54 solrDocId i = kindOf (undefined :: a) <> BSC.pack ('_' : show i)
   55 
   56 solrParty :: Party -> Maybe Permission -> SolrDocument
   57 solrParty Party{ partyRow = PartyRow{..}, ..} auth = SolrParty
   58   { solrId = solrDocId partyId
   59   , solrPartyId = partyId
   60   , solrPartySortName = partySortName
   61   , solrPartyPreName = partyPreName
   62   , solrPartyAffiliation = partyAffiliation
   63   , solrPartyIsInstitution = isNothing partyAccount
   64   , solrPartyAuthorization = auth
   65   }
   66 
   67 solrVolume :: Volume -> Maybe Citation -> SolrDocument
   68 solrVolume Volume{ volumeRow = VolumeRow{..}, ..} cite = SolrVolume
   69   { solrId = solrDocId volumeId
   70   , solrVolumeId = volumeId
   71   , solrName = Just volumeName
   72   , solrBody = volumeBody
   73   , solrVolumeOwnerIds = ownerIds
   74   , solrVolumeOwnerNames = ownerNames
   75   , solrCitation = citationHead <$> cite
   76   , solrCitationYear = citationYear =<< cite
   77   } where
   78   (ownerIds, ownerNames) = unzip volumeOwners
   79 
   80 solrContainer :: Container -> SolrDocument
   81 solrContainer c@Container{ containerRow = ContainerRow{..}, ..} = SolrContainer
   82   { solrId = solrDocId containerId
   83   , solrContainerId = containerId
   84   , solrVolumeId = volumeId $ volumeRow containerVolume
   85   , solrName = containerName
   86   , solrContainerTop = containerTop
   87   , solrContainerDate = getContainerDate c
   88   , solrRelease = containerRelease
   89   }
   90 
   91 solrAsset :: AssetSlot -> SolrDocument
   92 solrAsset as@AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{..}) } = SolrAsset
   93   { solrId = solrDocId assetId
   94   , solrAssetId = assetId
   95   , solrVolumeId = volumeId $ volumeRow assetVolume
   96   , solrContainerId = containerId $ containerRow slotContainer
   97   , solrSegment = SolrSegment slotSegment
   98   , solrSegmentDuration = segmentLength slotSegment
   99   , solrName = assetSlotName as
  100   , solrRelease = assetRelease
  101   , solrFormatId = formatId assetFormat
  102   }
  103 
  104 solrExcerpt :: Excerpt -> SolrDocument
  105 solrExcerpt Excerpt{ excerptAsset = AssetSegment{ segmentAsset = AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{ slotContainer = container }) }, assetSegment = seg }, ..} = SolrExcerpt
  106   { solrId = BSC.pack $ "excerpt_" <> show assetId
  107     <> maybe "" (('_':) . show) (lowerBound $ segmentRange seg)
  108   , solrAssetId = assetId
  109   , solrVolumeId = volumeId $ volumeRow assetVolume
  110   , solrContainerId = containerId $ containerRow container
  111   , solrSegment = SolrSegment seg
  112   , solrSegmentDuration = segmentLength seg
  113   , solrRelease = assetRelease
  114   }
  115 
  116 solrRecord :: RecordSlot -> SolrDocument
  117 solrRecord rs@RecordSlot{ slotRecord = r@Record{ recordRow = RecordRow{..}, ..}, recordSlot = Slot{..} } = SolrRecord
  118   { solrId = solrDocId recordId
  119     <> BSC.pack ('_' : show (containerId $ containerRow slotContainer))
  120   , solrRecordId = recordId
  121   , solrVolumeId = volumeId $ volumeRow recordVolume
  122   , solrContainerId = containerId $ containerRow slotContainer
  123   , solrSegment = SolrSegment slotSegment
  124   , solrSegmentDuration = segmentLength slotSegment
  125   , solrRecordCategoryId = categoryId recordCategory
  126   , solrRecordMeasures = SolrRecordMeasures $ map (\m -> (measureMetric m, measureDatum m)) $ getRecordMeasures r
  127   , solrRecordAge = recordSlotAge rs
  128   }
  129 
  130 solrTag :: Tag -> SolrDocument
  131 solrTag Tag{..} = SolrTagId
  132   { solrId = BSC.pack $ "tag_" <> show tagId
  133   , solrTagId = tagId
  134   , solrTagName = tagName
  135   }
  136 
  137 solrTagUse :: Id Volume -> TagUseRow -> SolrDocument
  138 solrTagUse vi TagUseRow{ useTagRow = Tag{..}, tagRowSlotId = SlotId{..}, ..} = SolrTag
  139   { solrId = BSC.pack $ "tag_" <> show tagId
  140     <> ('_' : show slotContainerId)
  141     <> (if tagRowKeyword then "" else '_' : show tagRowWhoId)
  142     <> maybe "" (('_':) . show) (lowerBound $ segmentRange slotSegmentId)
  143   , solrVolumeId = vi
  144   , solrContainerId = slotContainerId
  145   , solrSegment = SolrSegment slotSegmentId
  146   , solrSegmentDuration = segmentLength slotSegmentId
  147   , solrTagId = tagId
  148   , solrTagName = tagName
  149   , solrKeyword = tagRowKeyword `thenUse` tagName
  150   , solrPartyId = tagRowWhoId
  151   }
  152 
  153 solrComment :: Id Volume -> CommentRow -> SolrDocument
  154 solrComment vi CommentRow{ commentRowSlotId = SlotId{..}, ..} = SolrComment
  155   { solrId = BSC.pack $ "comment_" <> show commentRowId
  156   , solrVolumeId = vi
  157   , solrContainerId = slotContainerId
  158   , solrSegment = SolrSegment slotSegmentId
  159   , solrSegmentDuration = segmentLength slotSegmentId
  160   , solrCommentId = commentRowId
  161   , solrPartyId = commentRowWhoId
  162   , solrBody = Just commentRowText
  163   }
  164 
  165 -- type SolrM a = ReaderT BackgroundContext (InvertM BS.ByteString) a
  166 
  167 writeBlock :: BS.ByteString -> ReaderT a (InvertM BS.ByteString) ()
  168 writeBlock = lift . give
  169 
  170 writeDocuments :: [SolrDocument] -> ReaderT a (InvertM BS.ByteString) ()
  171 writeDocuments [] = return ()
  172 writeDocuments d =
  173   writeBlock $ BSL.toStrict $ BSB.toLazyByteString $ foldMap (("},\"add\":{\"doc\":" <>) . JSON.fromEncoding . JSON.value . JSON.toJSON) d
  174 
  175 writeUpdate :: ReaderT SolrIndexingContext (InvertM BS.ByteString) () -> ReaderT SolrIndexingContext (InvertM BS.ByteString) ()
  176 writeUpdate f = do
  177   writeBlock "{\"delete\":{\"query\":\"*:*\""
  178   f
  179   writeBlock "},\"commit\":{\"waitSearcher\":true,\"expungeDeletes\":true},\"optimize\":{\"waitSearcher\":true}}"
  180 
  181 joinContainers :: (a -> Slot -> b) -> [Container] -> [(a, SlotId)] -> [b]
  182 joinContainers _ _ [] = []
  183 joinContainers _ [] _ = error "joinContainers"
  184 joinContainers f cl@(c:cr) al@((a, SlotId ci s):ar)
  185   | containerId (containerRow c) == ci = f a (Slot c s) : joinContainers f cl ar
  186   | otherwise = joinContainers f cr al
  187 
  188 writeVolume :: (Volume, Maybe Citation) -> ReaderT SolrIndexingContext (InvertM BS.ByteString) ()
  189 writeVolume (v, vc) = do
  190   writeDocuments [solrVolume v vc]
  191   cl <- lookupVolumeContainers v
  192   writeDocuments $ map solrContainer cl
  193   writeDocuments . map solrAsset . joinContainers ((. Just) . AssetSlot) cl =<< lookupVolumeAssetSlotIds v
  194   -- this could be more efficient, but there usually aren't many:
  195   writeDocuments . map solrExcerpt =<< lookupVolumeExcerpts v
  196   writeDocuments . map solrRecord . joinContainers RecordSlot cl =<< lookupVolumeRecordSlotIds v
  197   writeDocuments . map (solrTagUse (volumeId $ volumeRow v)) =<< lookupVolumeTagUseRows v
  198   writeDocuments . map (solrComment (volumeId $ volumeRow v)) =<< lookupVolumeCommentRows v
  199 
  200 writeAllDocuments :: ReaderT SolrIndexingContext (InvertM BS.ByteString) ()
  201 writeAllDocuments = do
  202   mapM_ writeVolume =<< lookupVolumesCitations
  203   writeDocuments . map (uncurry solrParty) =<< lookupPartyAuthorizations
  204   writeDocuments . map solrTag =<< lookupTags
  205 
  206 updateIndex :: SolrIndexingContextM ()
  207 updateIndex = do
  208   (ctx :: SolrIndexingContext) <- ask
  209   (solr :: Solr) <- peek
  210   let (req :: HC.Request) = solrRequest solr
  211   t <- liftIO getCurrentTime
  212   handle
  213     (\(e :: HC.HttpException) -> do
  214        (logs :: Logs) <- peek
  215        liftIO $ logMsg t ("solr update failed: " ++ show e) logs)
  216     $ do
  217       (hc :: HTTPClient) <- peek
  218       let idxReq = req
  219             { HC.path = HC.path req <> "update/json"
  220             , HC.method = methodPost
  221             , HC.requestBody = HC.RequestBodyStreamChunked $ \wf -> do
  222               (w :: IO (Maybe BSC.ByteString)) <-
  223                   runInvert
  224                       (runReaderT (writeUpdate writeAllDocuments) ctx :: InvertM BS.ByteString ())
  225               wf (fold <$> w :: IO BSC.ByteString) :: IO ()
  226             , HC.requestHeaders = (hContentType, "application/json") : HC.requestHeaders req
  227             , HC.responseTimeout = HC.responseTimeoutMicro 100000000
  228             }
  229       _ <- liftIO (HC.httpNoBody idxReq hc)
  230       t' <- liftIO getCurrentTime
  231       (logs :: Logs) <- peek
  232       liftIO $ logMsg t' ("solr update complete " ++ show (diffUTCTime t' t)) logs