1 {-# LANGUAGE OverloadedStrings, RecordWildCards, 2 ScopedTypeVariables #-}module Solr.Index 3 ( updateIndex 4 ) where 5 6 import Control.Exception.Lifted (handle) 7 import Control.Monad.IO.Class (liftIO) 8 import Control.Monad.Reader (ReaderT(..), ask) 9 import Control.Monad.Trans.Class (lift) 10 import qualified Data.Aeson as JSON 11 import qualified Data.Aeson.Encoding as JSON 12 import qualified Data.ByteString as BS 13 import qualified Data.ByteString.Builder as BSB 14 import qualified Data.ByteString.Char8 as BSC 15 import qualified Data.ByteString.Lazy as BSL 16 import Data.Foldable (fold) 17 import Data.Maybe (isNothing) 18 import Data.Monoid ((<>)) 19 import Data.Time.Clock (getCurrentTime, diffUTCTime) 20 import qualified Network.HTTP.Client as HC 21 import Network.HTTP.Types.Method (methodPost) 22 import Network.HTTP.Types.Header (hContentType) 23 24 import Control.Invert 25 import Ops 26 import Has 27 import HTTP.Client (HTTPClient) 28 import Service.Log 29 import Model.Segment 30 import Model.Kind 31 import Model.Id.Types 32 import Model.Permission.Types 33 import Model.Party 34 import Model.Volume.Types 35 import Model.Citation 36 import Model.Container 37 import Model.Slot.Types 38 import Model.Format.Types 39 import Model.Asset.Types 40 import Model.AssetSlot 41 import Model.AssetSegment.Types 42 import Model.Excerpt 43 import Model.Category.Types 44 import Model.Record.Types 45 import Model.RecordSlot 46 import Model.Measure 47 import Model.Tag 48 import Model.Comment 49 import Context 50 import Solr.Service 51 import Solr.Document 52 53 solrDocId :: forall a . (Kinded a, Show (Id a)) => Id a -> BS.ByteString 54 solrDocId i = kindOf (undefined :: a) <> BSC.pack ('_' : show i) 55 56 solrParty :: Party -> Maybe Permission -> SolrDocument 57 solrParty Party{ partyRow = PartyRow{..}, ..} auth = SolrParty 58 { solrId = solrDocId partyId 59 , solrPartyId = partyId 60 , solrPartySortName = partySortName 61 , solrPartyPreName = partyPreName 62 , solrPartyAffiliation = partyAffiliation 63 , solrPartyIsInstitution = isNothing partyAccount 64 , solrPartyAuthorization = auth 65 } 66 67 solrVolume :: Volume -> Maybe Citation -> SolrDocument 68 solrVolume Volume{ volumeRow = VolumeRow{..}, ..} cite = SolrVolume 69 { solrId = solrDocId volumeId 70 , solrVolumeId = volumeId 71 , solrName = Just volumeName 72 , solrBody = volumeBody 73 , solrVolumeOwnerIds = ownerIds 74 , solrVolumeOwnerNames = ownerNames 75 , solrCitation = citationHead <$> cite 76 , solrCitationYear = citationYear =<< cite 77 } where 78 (ownerIds, ownerNames) = unzip volumeOwners 79 80 solrContainer :: Container -> SolrDocument 81 solrContainer c@Container{ containerRow = ContainerRow{..}, ..} = SolrContainer 82 { solrId = solrDocId containerId 83 , solrContainerId = containerId 84 , solrVolumeId = volumeId $ volumeRow containerVolume 85 , solrName = containerName 86 , solrContainerTop = containerTop 87 , solrContainerDate = getContainerDate c 88 , solrRelease = containerRelease 89 } 90 91 solrAsset :: AssetSlot -> SolrDocument 92 solrAsset as@AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{..}) } = SolrAsset 93 { solrId = solrDocId assetId 94 , solrAssetId = assetId 95 , solrVolumeId = volumeId $ volumeRow assetVolume 96 , solrContainerId = containerId $ containerRow slotContainer 97 , solrSegment = SolrSegment slotSegment 98 , solrSegmentDuration = segmentLength slotSegment 99 , solrName = assetSlotName as 100 , solrRelease = assetRelease 101 , solrFormatId = formatId assetFormat 102 } 103 104 solrExcerpt :: Excerpt -> SolrDocument 105 solrExcerpt Excerpt{ excerptAsset = AssetSegment{ segmentAsset = AssetSlot{ slotAsset = Asset{ assetRow = AssetRow{..}, ..}, assetSlot = ~(Just Slot{ slotContainer = container }) }, assetSegment = seg }, ..} = SolrExcerpt 106 { solrId = BSC.pack $ "excerpt_" <> show assetId 107 <> maybe "" (('_':) . show) (lowerBound $ segmentRange seg) 108 , solrAssetId = assetId 109 , solrVolumeId = volumeId $ volumeRow assetVolume 110 , solrContainerId = containerId $ containerRow container 111 , solrSegment = SolrSegment seg 112 , solrSegmentDuration = segmentLength seg 113 , solrRelease = assetRelease 114 } 115 116 solrRecord :: RecordSlot -> SolrDocument 117 solrRecord rs@RecordSlot{ slotRecord = r@Record{ recordRow = RecordRow{..}, ..}, recordSlot = Slot{..} } = SolrRecord 118 { solrId = solrDocId recordId 119 <> BSC.pack ('_' : show (containerId $ containerRow slotContainer)) 120 , solrRecordId = recordId 121 , solrVolumeId = volumeId $ volumeRow recordVolume 122 , solrContainerId = containerId $ containerRow slotContainer 123 , solrSegment = SolrSegment slotSegment 124 , solrSegmentDuration = segmentLength slotSegment 125 , solrRecordCategoryId = categoryId recordCategory 126 , solrRecordMeasures = SolrRecordMeasures $ map (\m -> (measureMetric m, measureDatum m)) $ getRecordMeasures r 127 , solrRecordAge = recordSlotAge rs 128 } 129 130 solrTag :: Tag -> SolrDocument 131 solrTag Tag{..} = SolrTagId 132 { solrId = BSC.pack $ "tag_" <> show tagId 133 , solrTagId = tagId 134 , solrTagName = tagName 135 } 136 137 solrTagUse :: Id Volume -> TagUseRow -> SolrDocument 138 solrTagUse vi TagUseRow{ useTagRow = Tag{..}, tagRowSlotId = SlotId{..}, ..} = SolrTag 139 { solrId = BSC.pack $ "tag_" <> show tagId 140 <> ('_' : show slotContainerId) 141 <> (if tagRowKeyword then "" else '_' : show tagRowWhoId) 142 <> maybe "" (('_':) . show) (lowerBound $ segmentRange slotSegmentId) 143 , solrVolumeId = vi 144 , solrContainerId = slotContainerId 145 , solrSegment = SolrSegment slotSegmentId 146 , solrSegmentDuration = segmentLength slotSegmentId 147 , solrTagId = tagId 148 , solrTagName = tagName 149 , solrKeyword = tagRowKeyword `thenUse` tagName 150 , solrPartyId = tagRowWhoId 151 } 152 153 solrComment :: Id Volume -> CommentRow -> SolrDocument 154 solrComment vi CommentRow{ commentRowSlotId = SlotId{..}, ..} = SolrComment 155 { solrId = BSC.pack $ "comment_" <> show commentRowId 156 , solrVolumeId = vi 157 , solrContainerId = slotContainerId 158 , solrSegment = SolrSegment slotSegmentId 159 , solrSegmentDuration = segmentLength slotSegmentId 160 , solrCommentId = commentRowId 161 , solrPartyId = commentRowWhoId 162 , solrBody = Just commentRowText 163 } 164 165 -- type SolrM a = ReaderT BackgroundContext (InvertM BS.ByteString) a 166 167 writeBlock :: BS.ByteString -> ReaderT a (InvertM BS.ByteString) () 168 writeBlock = lift . give 169 170 writeDocuments :: [SolrDocument] -> ReaderT a (InvertM BS.ByteString) () 171 writeDocuments [] = return () 172 writeDocuments d = 173 writeBlock $ BSL.toStrict $ BSB.toLazyByteString $ foldMap (("},\"add\":{\"doc\":" <>) . JSON.fromEncoding . JSON.value . JSON.toJSON) d 174 175 writeUpdate :: ReaderT SolrIndexingContext (InvertM BS.ByteString) () -> ReaderT SolrIndexingContext (InvertM BS.ByteString) () 176 writeUpdate f = do 177 writeBlock "{\"delete\":{\"query\":\"*:*\"" 178 f 179 writeBlock "},\"commit\":{\"waitSearcher\":true,\"expungeDeletes\":true},\"optimize\":{\"waitSearcher\":true}}" 180 181 joinContainers :: (a -> Slot -> b) -> [Container] -> [(a, SlotId)] -> [b] 182 joinContainers _ _ [] = [] 183 joinContainers _ [] _ = error "joinContainers" 184 joinContainers f cl@(c:cr) al@((a, SlotId ci s):ar) 185 | containerId (containerRow c) == ci = f a (Slot c s) : joinContainers f cl ar 186 | otherwise = joinContainers f cr al 187 188 writeVolume :: (Volume, Maybe Citation) -> ReaderT SolrIndexingContext (InvertM BS.ByteString) () 189 writeVolume (v, vc) = do 190 writeDocuments [solrVolume v vc] 191 cl <- lookupVolumeContainers v 192 writeDocuments $ map solrContainer cl 193 writeDocuments . map solrAsset . joinContainers ((. Just) . AssetSlot) cl =<< lookupVolumeAssetSlotIds v 194 -- this could be more efficient, but there usually aren't many: 195 writeDocuments . map solrExcerpt =<< lookupVolumeExcerpts v 196 writeDocuments . map solrRecord . joinContainers RecordSlot cl =<< lookupVolumeRecordSlotIds v 197 writeDocuments . map (solrTagUse (volumeId $ volumeRow v)) =<< lookupVolumeTagUseRows v 198 writeDocuments . map (solrComment (volumeId $ volumeRow v)) =<< lookupVolumeCommentRows v 199 200 writeAllDocuments :: ReaderT SolrIndexingContext (InvertM BS.ByteString) () 201 writeAllDocuments = do 202 mapM_ writeVolume =<< lookupVolumesCitations 203 writeDocuments . map (uncurry solrParty) =<< lookupPartyAuthorizations 204 writeDocuments . map solrTag =<< lookupTags 205 206 updateIndex :: SolrIndexingContextM () 207 updateIndex = do 208 (ctx :: SolrIndexingContext) <- ask 209 (solr :: Solr) <- peek 210 let (req :: HC.Request) = solrRequest solr 211 t <- liftIO getCurrentTime 212 handle 213 (\(e :: HC.HttpException) -> do 214 (logs :: Logs) <- peek 215 liftIO $ logMsg t ("solr update failed: " ++ show e) logs) 216 $ do 217 (hc :: HTTPClient) <- peek 218 let idxReq = req 219 { HC.path = HC.path req <> "update/json" 220 , HC.method = methodPost 221 , HC.requestBody = HC.RequestBodyStreamChunked $ \wf -> do 222 (w :: IO (Maybe BSC.ByteString)) <- 223 runInvert 224 (runReaderT (writeUpdate writeAllDocuments) ctx :: InvertM BS.ByteString ()) 225 wf (fold <$> w :: IO BSC.ByteString) :: IO () 226 , HC.requestHeaders = (hContentType, "application/json") : HC.requestHeaders req 227 , HC.responseTimeout = HC.responseTimeoutMicro 100000000 228 } 229 _ <- liftIO (HC.httpNoBody idxReq hc) 230 t' <- liftIO getCurrentTime 231 (logs :: Logs) <- peek 232 liftIO $ logMsg t' ("solr update complete " ++ show (diffUTCTime t' t)) logs