1 {-# LANGUAGE OverloadedStrings #-} 2 module Databrary.Model.Citation.CrossRef 3 ( lookupCitation 4 -- for testing 5 , uriHDL 6 ) where 7 8 import Control.Applicative (optional) 9 import Control.Exception (handle) 10 import Control.Monad.Trans.Class (lift) 11 import Control.Monad.Trans.Maybe (MaybeT(..), runMaybeT) 12 import qualified Data.Attoparsec.ByteString as P 13 import qualified Data.ByteString.Char8 as BSC 14 import Data.Maybe (fromJust) 15 import qualified Data.Text.Lazy as TL 16 import qualified Data.Text.Lazy.Encoding as TLE 17 import qualified Network.HTTP.Client as HC 18 import qualified Network.URI as URI 19 20 import Databrary.Ops 21 import qualified Databrary.JSON as JSON 22 import Databrary.HTTP.Client 23 import Databrary.Model.URL 24 import Databrary.Model.Citation.Types 25 26 crossRefUrl :: HC.Request 27 crossRefUrl = (fromJust $ HC.parseRequest "http://data.crossref.org/") 28 { HC.cookieJar = Nothing 29 } 30 31 crossRefReq :: String -> HC.Request 32 crossRefReq h = crossRefUrl { HC.path = BSC.pack $ '/' : URI.escapeURIString URI.isUnescapedInURIComponent h } 33 34 uriHDL :: URI.URI -> Maybe String 35 uriHDL u 36 | URI.uriScheme u == "hdl:" = Just $ URI.uriPath u ++ URI.uriQuery u 37 | otherwise = Nothing 38 39 parseCitation :: JSON.Value -> JSON.Parser Citation 40 parseCitation = JSON.withObject "citation" $ \o -> 41 Citation 42 <$> o JSON..:? "head" JSON..!= "" 43 <*> (Just <$> (o JSON..: "DOI" >>= parseDOI)) 44 <*> optional (o JSON..: "issued" >>= (JSON..: "date-parts") >>= (`JSON.lookupAtParse` 0) >>= (`JSON.lookupAtParse` 0)) 45 <*> o JSON..:? "title" 46 where 47 parseDOI d = (hdlURL d) `useWhen` (validHDL d) 48 49 lookupCitation :: URI.URI -> HTTPClient -> IO (Maybe Citation) 50 lookupCitation uri hcm = runMaybeT $ do 51 req <- may $ crossRefReq <$> uriHDL uri 52 j <- MaybeT $ httpMaybe $ 53 HC.withResponse (requestAcceptContent "application/vnd.citationstyles.csl+json" req) hcm 54 (fmap P.maybeResult . httpParse JSON.json) 55 cite <- may $ JSON.parseMaybe parseCitation j 56 -- empirically this is UTF-8, but does not say so: 57 lift $ handle 58 (\(_ :: HC.HttpException) -> return cite) -- this actually happens fairly often 59 $ (\h -> cite{ citationHead = TL.toStrict $ TLE.decodeUtf8 $ HC.responseBody h }) <$> 60 HC.httpLbs (requestAcceptContent "text/x-bibliography;style=apa" req) hcm 61 where 62 may = MaybeT . return