1 {-# LANGUAGE OverloadedStrings, RecordWildCards, GeneralizedNewtypeDeriving, ScopedTypeVariables, TemplateHaskell #-} 2 module Databrary.Solr.Index 3 ( updateIndex 4 ) where 5 6 import Control.Exception.Lifted (handle) 7 import Control.Monad.IO.Class (liftIO) 8 import Control.Monad.Reader (ReaderT(..), ask) 9 import Control.Monad.Trans.Class (lift) 10 import qualified Data.Aeson as JSON 11 import qualified Data.Aeson.Encoding as JSON 12 import qualified Data.ByteString as BS 13 import qualified Data.ByteString.Builder as BSB 14 import qualified Data.ByteString.Char8 as BSC 15 import qualified Data.ByteString.Lazy as BSL 16 import Data.Foldable (fold) 17 import Data.Maybe (isNothing) 18 import Data.Monoid ((<>)) 19 import Data.Time.Clock (getCurrentTime, diffUTCTime) 20 import qualified Network.HTTP.Client as HC 21 import Network.HTTP.Types.Method (methodPost) 22 import Network.HTTP.Types.Header (hContentType) 23 24 import Control.Invert 25 import Databrary.Ops 26 import Databrary.Has 27 import Databrary.Service.Log 28 import Databrary.Model.Segment 29 import Databrary.Model.Kind 30 import Databrary.Model.Id.Types 31 import Databrary.Model.Permission.Types 32 import Databrary.Model.Party 33 import Databrary.Model.Volume.Types 34 import Databrary.Model.Citation 35 import Databrary.Model.Container 36 import Databrary.Model.Slot.Types 37 import Databrary.Model.Format.Types 38 import Databrary.Model.Asset.Types 39 import Databrary.Model.AssetSlot 40 import Databrary.Model.AssetSegment.Types 41 import Databrary.Model.Excerpt 42 import Databrary.Model.Category.Types 43 import Databrary.Model.Record.Types 44 import Databrary.Model.RecordSlot 45 import Databrary.Model.Measure 46 import Databrary.Model.Tag 47 import Databrary.Model.Comment 48 import Databrary.Context 49 import Databrary.Solr.Service 50 import Databrary.Solr.Document 51 52 solrDocId :: forall a . (Kinded a, Show (Id a)) => Id a -> BS.ByteString 53 solrDocId i = kindOf (undefined :: a) <> BSC.pack ('_' : show i) 54 55 solrParty :: Party -> Maybe Permission -> SolrDocument 56 solrParty Party{ partyRow = PartyRow{..}, ..} auth = SolrParty 57 { solrId = solrDocId partyId 58 , solrPartyId = partyId 59 , solrPartySortName = partySortName 60 , solrPartyPreName = partyPreName 61 , solrPartyAffiliation = partyAffiliation 62 , solrPartyIsInstitution = isNothing partyAccount 63 , solrPartyAuthorization = auth 64 } 65 66 solrVolume :: Volume -> Maybe Citation -> SolrDocument 67 solrVolume Volume{ volumeRow = VolumeRow{..}, ..} cite = SolrVolume 68 { solrId = solrDocId volumeId 69 , solrVolumeId = volumeId 70 , solrName = Just volumeName 71 , solrBody = volumeBody 72 , solrVolumeOwnerIds = ownerIds 73 , solrVolumeOwnerNames = ownerNames 74 , solrCitation = citationHead <$> cite 75 , solrCitationYear = citationYear =<< cite 76 } where 77 (ownerIds, ownerNames) = unzip volumeOwners 78 79 solrContainer :: Container -> SolrDocument 80 solrContainer c@Container{ containerRow = ContainerRow{..}, ..} = SolrContainer 81 { solrId = solrDocId containerId 82 , solrContainerId = containerId 83 , solrVolumeId = volumeId $ volumeRow containerVolume 84 , solrName = containerName 85 , solrContainerTop = containerTop 86 , solrContainerDate = getContainerDate c 87 , solrRelease = containerRelease 88 } 89 90 solrAsset :: AssetSlot -> SolrDocument 91 solrAsset as@AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{..}) } = SolrAsset 92 { solrId = solrDocId assetId 93 , solrAssetId = assetId 94 , solrVolumeId = volumeId $ volumeRow assetVolume 95 , solrContainerId = containerId $ containerRow slotContainer 96 , solrSegment = SolrSegment slotSegment 97 , solrSegmentDuration = segmentLength slotSegment 98 , solrName = assetSlotName as 99 , solrRelease = assetRelease 100 , solrFormatId = formatId assetFormat 101 } 102 103 solrExcerpt :: Excerpt -> SolrDocument 104 solrExcerpt Excerpt{ excerptAsset = AssetSegment{ segmentAsset = AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{ slotContainer = container }) }, assetSegment = seg }, ..} = SolrExcerpt 105 { solrId = BSC.pack $ "excerpt_" <> show assetId 106 <> maybe "" (('_':) . show) (lowerBound $ segmentRange seg) 107 , solrAssetId = assetId 108 , solrVolumeId = volumeId $ volumeRow assetVolume 109 , solrContainerId = containerId $ containerRow container 110 , solrSegment = SolrSegment seg 111 , solrSegmentDuration = segmentLength seg 112 , solrRelease = assetRelease 113 } 114 115 solrRecord :: RecordSlot -> SolrDocument 116 solrRecord rs@RecordSlot{ slotRecord = r@Record{ recordRow = RecordRow{..}, ..}, recordSlot = Slot{..} } = SolrRecord 117 { solrId = solrDocId recordId 118 <> BSC.pack ('_' : show (containerId $ containerRow slotContainer)) 119 , solrRecordId = recordId 120 , solrVolumeId = volumeId $ volumeRow recordVolume 121 , solrContainerId = containerId $ containerRow slotContainer 122 , solrSegment = SolrSegment slotSegment 123 , solrSegmentDuration = segmentLength slotSegment 124 , solrRecordCategoryId = categoryId recordCategory 125 , solrRecordMeasures = SolrRecordMeasures $ map (\m -> (measureMetric m, measureDatum m)) $ getRecordMeasures r 126 , solrRecordAge = recordSlotAge rs 127 } 128 129 solrTag :: Tag -> SolrDocument 130 solrTag Tag{..} = SolrTagId 131 { solrId = BSC.pack $ "tag_" <> show tagId 132 , solrTagId = tagId 133 , solrTagName = tagName 134 } 135 136 solrTagUse :: Id Volume -> TagUseRow -> SolrDocument 137 solrTagUse vi TagUseRow{ useTagRow = Tag{..}, tagRowSlotId = SlotId{..}, ..} = SolrTag 138 { solrId = BSC.pack $ "tag_" <> show tagId 139 <> ('_' : show slotContainerId) 140 <> (if tagRowKeyword then "" else '_' : show tagRowWhoId) 141 <> maybe "" (('_':) . show) (lowerBound $ segmentRange slotSegmentId) 142 , solrVolumeId = vi 143 , solrContainerId = slotContainerId 144 , solrSegment = SolrSegment slotSegmentId 145 , solrSegmentDuration = segmentLength slotSegmentId 146 , solrTagId = tagId 147 , solrTagName = tagName 148 , solrKeyword = tagRowKeyword `thenUse` tagName 149 , solrPartyId = tagRowWhoId 150 } 151 152 solrComment :: Id Volume -> CommentRow -> SolrDocument 153 solrComment vi CommentRow{ commentRowSlotId = SlotId{..}, ..} = SolrComment 154 { solrId = BSC.pack $ "comment_" <> show commentRowId 155 , solrVolumeId = vi 156 , solrContainerId = slotContainerId 157 , solrSegment = SolrSegment slotSegmentId 158 , solrSegmentDuration = segmentLength slotSegmentId 159 , solrCommentId = commentRowId 160 , solrPartyId = commentRowWhoId 161 , solrBody = Just commentRowText 162 } 163 164 type SolrM a = ReaderT BackgroundContext (InvertM BS.ByteString) a 165 166 writeBlock :: BS.ByteString -> SolrM () 167 writeBlock = lift . give 168 169 writeDocuments :: [SolrDocument] -> SolrM () 170 writeDocuments [] = return () 171 writeDocuments d = 172 writeBlock $ BSL.toStrict $ BSB.toLazyByteString $ foldMap (("},\"add\":{\"doc\":" <>) . JSON.fromEncoding . JSON.value . JSON.toJSON) d 173 174 writeUpdate :: SolrM () -> SolrM () 175 writeUpdate f = do 176 writeBlock "{\"delete\":{\"query\":\"*:*\"" 177 f 178 writeBlock "},\"commit\":{\"waitSearcher\":true,\"expungeDeletes\":true},\"optimize\":{\"waitSearcher\":true}}" 179 180 joinContainers :: (a -> Slot -> b) -> [Container] -> [(a, SlotId)] -> [b] 181 joinContainers _ _ [] = [] 182 joinContainers _ [] _ = error "joinContainers" 183 joinContainers f cl@(c:cr) al@((a, SlotId ci s):ar) 184 | containerId (containerRow c) == ci = f a (Slot c s) : joinContainers f cl ar 185 | otherwise = joinContainers f cr al 186 187 writeVolume :: (Volume, Maybe Citation) -> SolrM () 188 writeVolume (v, vc) = do 189 writeDocuments [solrVolume v vc] 190 cl <- lookupVolumeContainers v 191 writeDocuments $ map solrContainer cl 192 writeDocuments . map solrAsset . joinContainers ((. Just) . AssetSlot) cl =<< lookupVolumeAssetSlotIds v 193 -- this could be more efficient, but there usually aren't many: 194 writeDocuments . map solrExcerpt =<< lookupVolumeExcerpts v 195 writeDocuments . map solrRecord . joinContainers RecordSlot cl =<< lookupVolumeRecordSlotIds v 196 writeDocuments . map (solrTagUse (volumeId $ volumeRow v)) =<< lookupVolumeTagUseRows v 197 writeDocuments . map (solrComment (volumeId $ volumeRow v)) =<< lookupVolumeCommentRows v 198 199 writeAllDocuments :: SolrM () 200 writeAllDocuments = do 201 mapM_ writeVolume =<< lookupVolumesCitations 202 writeDocuments . map (uncurry solrParty) =<< lookupPartyAuthorizations 203 writeDocuments . map solrTag =<< lookupTags 204 205 updateIndex :: BackgroundContextM () 206 updateIndex = do 207 ctx <- ask 208 req <- peeks solrRequest 209 t <- liftIO getCurrentTime 210 handle 211 (\(e :: HC.HttpException) -> focusIO $ logMsg t ("solr update failed: " ++ show e)) 212 $ do 213 _ <- focusIO $ HC.httpNoBody req 214 { HC.path = HC.path req <> "update/json" 215 , HC.method = methodPost 216 , HC.requestBody = HC.RequestBodyStreamChunked $ \wf -> do 217 w <- runInvert $ runReaderT (writeUpdate writeAllDocuments) ctx 218 wf $ fold <$> w 219 , HC.requestHeaders = (hContentType, "application/json") : HC.requestHeaders req 220 , HC.responseTimeout = HC.responseTimeoutMicro 100000000 221 } 222 t' <- liftIO getCurrentTime 223 focusIO $ logMsg t' ("solr update complete " ++ show (diffUTCTime t' t))