1 {-# LANGUAGE OverloadedStrings #-}
    2 module Databrary.Model.Funding.FundRef
    3   ( fundRefDOI
    4   , lookupFunderRef
    5   , searchFundRef
    6   -- for testing
    7   , annotateFunder
    8   ) where
    9 
   10 import Control.Monad ((<=<), (>=>), guard, mfilter)
   11 import Control.Monad.IO.Class (MonadIO)
   12 import Control.Monad.Trans.Class (lift)
   13 import Control.Monad.Trans.Maybe (MaybeT(..), runMaybeT)
   14 import Data.Foldable (fold)
   15 import Data.Function (on)
   16 import qualified Data.HashMap.Strict as HM
   17 import Data.List (stripPrefix, sortBy, nubBy)
   18 import Data.Maybe (fromJust, mapMaybe)
   19 import Data.Monoid ((<>))
   20 import Data.Ord (comparing, Down(..))
   21 import Data.String (IsString(..))
   22 import qualified Data.Text as T
   23 import qualified Data.Text.Encoding as TE
   24 import qualified Data.Vector as V
   25 import qualified Network.HTTP.Client as HC
   26 import Text.Read (readMaybe)
   27 
   28 import Databrary.Ops
   29 import Databrary.Has (MonadHas, focusIO)
   30 import qualified Databrary.JSON as JSON
   31 import Databrary.HTTP.Client
   32 import Databrary.Service.DB
   33 import Databrary.Model.Id.Types
   34 import Databrary.Model.GeoNames
   35 import Databrary.Model.Funding
   36 
   37 fundRefDOI :: String
   38 fundRefDOI = "10.13039/"
   39 
   40 -- Not quite like Data.CaseInsensitive (non-strict 'ciFolded')
   41 data CI = CI
   42   { unCI :: !T.Text
   43   , ciFolded :: T.Text
   44   }
   45 
   46 {-# NOINLINE toCI #-} -- toCaseFold's entire switch gets inlined!
   47 toCI :: T.Text -> CI
   48 toCI t = CI t (T.toCaseFold t)
   49 
   50 instance IsString CI where
   51   fromString = toCI . fromString
   52 
   53 onCI :: (T.Text -> T.Text -> a) -> CI -> CI -> a
   54 onCI f = f `on` ciFolded
   55 
   56 instance Eq CI where
   57   (==) = onCI (==)
   58   (/=) = onCI (/=)
   59 
   60 annotateFunder :: Funder -> [T.Text] -> Maybe T.Text -> Funder
   61 annotateFunder f [] Nothing = f
   62 annotateFunder f@Funder{ funderName = n } a c = f{ funderName =
   63   maybe id (\cc -> (<> (", " <> cc))) (mfilter (not . noc . toCI) c)
   64   $ case unCI <$> nai' of
   65       [] -> "" -- impossible
   66       [nn] -> nn
   67       (nn:aa) -> nn <> " (" <> T.intercalate ", " aa <> ")"
   68   }
   69   where
   70   ni = toCI n
   71   ai = toCI <$> sortBy (comparing $ Down . T.length) a
   72   nai' = nubBy (onCI T.isInfixOf) $
   73     (case filter (T.isInfixOf `onCI` ni) ai of
   74       [] -> ni
   75       (lni:_) -> lni) : ai
   76   noc ci = toCI (geoName geoNameUS) == ci || any (T.isInfixOf `onCI` ci) nai'
   77 
   78 parseFundRef :: JSON.Value -> JSON.Parser (Funder, Maybe (Id GeoName))
   79 parseFundRef = JSON.withObject "fundref" $ \j -> do
   80   doi <- j JSON..: "id"
   81   fid <- maybe (fail $ "doi: " ++ doi) (return . Id) $ readMaybe =<< stripPrefix ("http://dx.doi.org/" ++ fundRefDOI) doi
   82   name <- label =<< j JSON..: "prefLabel"
   83   let alts = mapMaybe (JSON.parseMaybe (label <=< JSON.parseJSON)) $ case HM.lookup "altLabel" j of
   84         Just (JSON.Array v) -> V.toList v
   85         Just o -> [o]
   86         Nothing -> []
   87       geo = do
   88         r <- JSON.parseMaybe ((JSON..: "country") >=> (JSON..: "resource")) j
   89         g <- parseGeoNameRef r
   90         guard (g /= geoNameId geoNameUS)
   91         return g
   92   return (annotateFunder (Funder fid name) alts Nothing, geo)
   93   where
   94   label j = j JSON..: "Label" >>= (JSON..: "literalForm") >>= (JSON..: "content")
   95 
   96 lookupFundRef :: Id Funder -> HTTPClient -> IO (Maybe Funder)
   97 lookupFundRef fi hcm = runMaybeT $ do
   98   req <- HC.parseRequest $ "http://data.fundref.org/fundref/funder/" ++ fundRefDOI ++ show fi
   99   j <- MaybeT $ httpRequestJSON req hcm
  100   (f, gi) <- MaybeT $ return $ JSON.parseMaybe parseFundRef j
  101   g <- lift $ flatMapM (\i -> lookupGeoName i hcm) gi
  102   return $ annotateFunder f [] (geoName <$> g)
  103 
  104 lookupFunderRef :: (MonadIO m, MonadDB c m, MonadHas HTTPClient c m) => Id Funder -> m (Maybe Funder)
  105 lookupFunderRef fi = do
  106   f <- lookupFunder fi
  107   f `orElseM` do
  108     r <- focusIO $ lookupFundRef fi
  109     mapM_ addFunder r
  110     return r
  111 
  112 parseFundRefs :: JSON.Value -> JSON.Parser [Funder]
  113 parseFundRefs = JSON.withArray "fundrefs" $
  114   return . mapMaybe (JSON.parseMaybe pfr) . V.toList
  115   where
  116   pfr = JSON.withObject "fundref" $ \j -> do
  117     is <- j JSON..: "id"
  118     i <- maybe (fail "invalid id") (return . Id) $ readMaybe is
  119     name <- j JSON..: "value"
  120     alts <- j JSON..:? "other_names"
  121     country <- j JSON..:? "country"
  122     return $ annotateFunder (Funder i name) (fold alts) country
  123 
  124 fundRefReq :: HC.Request
  125 fundRefReq = (fromJust $ HC.parseRequest "http://search.crossref.org/funders")
  126   { HC.cookieJar = Nothing }
  127 
  128 searchFundRef :: T.Text -> HTTPClient -> IO [Funder]
  129 searchFundRef q hcm = do
  130   j <- httpRequestJSON req hcm
  131   return $ fold $ JSON.parseMaybe parseFundRefs =<< j
  132   where req = HC.setQueryString [("q", Just $ TE.encodeUtf8 q)] fundRefReq