1 {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}
    2 module Databrary.Model.Format
    3   ( module Databrary.Model.Format.Types
    4   , mimeTypeTop
    5   , mimeTypeSub
    6   , mimeTypeTopCompare
    7   -- , unknownFormat
    8   , allFormats
    9   , getFormat
   10   , getFormat'
   11   , getFormatByExtension
   12   , addFormatExtension
   13   , getFormatByFilename
   14   , dropFormatExtension
   15   , videoFormat
   16   , imageFormat
   17   , audioFormat
   18   , formatIsImage
   19   , formatTranscodable
   20   , formatSample
   21   , formatJSON
   22   , formatIsAV
   23   , formatNotAV
   24   ) where
   25 
   26 import qualified Data.ByteString as BS
   27 import qualified Data.ByteString.Char8 as BSC
   28 import Data.Char (toLower)
   29 import qualified Data.IntMap.Strict as IntMap
   30 import qualified Data.Map.Strict as Map
   31 import Data.Maybe (listToMaybe)
   32 import Data.Monoid ((<>))
   33 import System.Posix.FilePath (RawFilePath, splitExtension, takeExtension, addExtension)
   34 
   35 import qualified Databrary.JSON as JSON
   36 import Databrary.Model.Id
   37 import Databrary.Model.Format.Types
   38 
   39 -- | Parse a mimetype string into its type (category) and subtype (specific type)
   40 mimeTypes :: BS.ByteString -> (BS.ByteString, BS.ByteString)
   41 mimeTypes s = maybe (s, "") (\i -> (BS.take i s, BS.drop (succ i) s)) $ BSC.elemIndex '/' s
   42 
   43 -- | Parse a full mimetype value into just its category or type
   44 mimeTypeTop :: BS.ByteString -> BS.ByteString
   45 mimeTypeTop = fst . mimeTypes
   46 
   47 -- | Parse a full mimetype value into just its subtype
   48 mimeTypeSub :: BS.ByteString -> BS.ByteString
   49 mimeTypeSub = snd . mimeTypes
   50 
   51 -- | Establish a relative order between to specific mimetype values ...
   52 mimeTypeTopCompare :: BS.ByteString -> BS.ByteString -> Ordering
   53 mimeTypeTopCompare a b = mttc (BSC.unpack a) (BSC.unpack b) where
   54   mttc []      []      = EQ
   55   mttc ('/':_) []      = EQ
   56   mttc []      ('/':_) = EQ
   57   mttc ('/':_) ('/':_) = EQ
   58   mttc ('/':_) _       = LT
   59   mttc []      _       = LT
   60   mttc _       ('/':_) = GT
   61   mttc _       []      = GT
   62   mttc (ac:as) (bc:bs) = compare ac bc <> mttc as bs
   63 
   64 {- formerly used when reading format from db
   65 makeFormat :: Id Format -> BS.ByteString -> [Maybe BS.ByteString] -> T.Text -> Format
   66 makeFormat i m e n = Format i m (map (fromMaybe (error "NULL format.extension")) e) n
   67 
   68 formatRow :: Selector -- Format
   69 formatRow = selectColumns 'makeFormat "format" ["id", "mimetype", "extension", "name"]
   70 -}
   71 
   72 -- | Harcoded list of all formats recognized by Databrary for uploading
   73 -- TODO: db coherence
   74 allFormats :: [Format]
   75 allFormats
   76     = [ Format (Id (-800)) "video/mp4"  ["mp4"]         "MPEG-4 video"
   77       , Format (Id (-700)) "image/jpeg" ["jpg", "jpeg"] "JPEG image"
   78       , Format (Id (-600))
   79                "audio/mpeg"
   80                ["mp3"]
   81                "MPEG-1 or MPEG-2 audio layer III"
   82       , Format (Id 1) "text/plain"         ["txt"] "Plain text"
   83       , Format (Id 2) "text/csv"           ["csv"] "Comma-separated values"
   84       , Format (Id 4) "text/rtf"           ["rtf"] "Rich text format"
   85       , Format (Id 5) "image/png"          ["png"] "Portable network graphics"
   86       , Format (Id 6) "application/pdf"    ["pdf"] "Portable document"
   87       , Format (Id 7) "application/msword" ["doc"] "Microsoft Word document"
   88       , Format (Id 8)
   89                "application/vnd.oasis.opendocument.text"
   90                ["odf"]
   91                "OpenDocument text"
   92       , Format
   93           (Id 9)
   94           "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
   95           ["docx"]
   96           "Microsoft Word (Office Open XML) document"
   97       , Format (Id 10)
   98                "application/vnd.ms-excel"
   99                ["xls"]
  100                "Microsoft Excel spreadsheet"
  101       , Format (Id 11)
  102                "application/vnd.oasis.opendocument.spreadsheet"
  103                ["ods"]
  104                "OpenDocument spreadsheet"
  105       , Format
  106           (Id 12)
  107           "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
  108           ["xlsx"]
  109           "Microsoft Excel (Office Open XML) workbook"
  110       , Format (Id 13)
  111                "application/vnd.ms-powerpoint"
  112                ["ppt"]
  113                "Microsoft PowerPoint presentation"
  114       , Format (Id 14)
  115                "application/vnd.oasis.opendocument.presentation"
  116                ["odp"]
  117                "OpenDocument presentation"
  118       , Format
  119           (Id 15)
  120           "application/vnd.openxmlformats-officedocument.presentationml.presentation"
  121           ["pptx"]
  122           "Microsoft PowerPoint (Office Open XML) presentation"
  123       , Format (Id 16) "application/vnd.datavyu" ["opf"]  "Datavyu"
  124       , Format (Id 18) "video/webm"              ["webm"] "WebM video"
  125       , Format (Id 19)
  126                "video/mpeg"
  127                ["mpg", "mpeg"]
  128                "MPEG program stream (MPEG-1/MPEG-2 video)"
  129       , Format (Id 20) "video/quicktime" ["mov"] "QuickTime video"
  130       , Format (Id 21) "video/mp2t" ["mts", "m2ts"] "MPEG transport stream"
  131       , Format (Id 22) "video/avi" ["avi"] "Audio Video Interleave"
  132       , Format (Id 23) "application/x-spss-sav" ["sav"] "SPSS System File"
  133       , Format (Id 24) "audio/wav" ["wav"] "Waveform audio"
  134       , Format (Id 25) "video/x-ms-wmv" ["wmv"] "Windows Media video"
  135       , Format (Id 26)
  136                "text/x-chat"
  137                ["cha", "chat"]
  138                "Codes for the Human Analysis of Transcripts"
  139       , Format (Id 27) "audio/aac"      ["aac"] "Advanced Audio Coding"
  140       , Format (Id 28) "audio/x-ms-wma" ["wma"] "Windows Media audio"
  141       , Format (Id 29)
  142                "application/vnd.lena.interpreted-time-segments"
  143                ["its"]
  144                "LENA Interpreted Time Segments"
  145       , Format (Id 30)
  146                "video/x-dv"
  147                ["dv", "dif"]
  148                "Digital Interface Format video"
  149       , Format (Id 31)
  150                "text/elan"
  151                ["eaf", "pfsx", "etf"]
  152                "ELAN - Linguistic Annotator"
  153       ]
  154 
  155 formatsById :: IntMap.IntMap Format
  156 formatsById = IntMap.fromList $ map (\a -> (fromIntegral $ unId $ formatId a, a)) allFormats
  157 
  158 getFormat :: Id Format -> Maybe Format
  159 getFormat (Id i) = IntMap.lookup (fromIntegral i) formatsById
  160 
  161 getFormat' :: Id Format -> Format
  162 getFormat' (Id i) = formatsById IntMap.! fromIntegral i
  163 
  164 formatsByExtension :: Map.Map BS.ByteString Format
  165 formatsByExtension = Map.fromList [ (e, a) | a <- allFormats, e <- formatExtension a ]
  166 
  167 getFormatByExtension :: BS.ByteString -> Maybe Format
  168 getFormatByExtension e = Map.lookup (BSC.map toLower e) formatsByExtension
  169 
  170 addFormatExtension :: RawFilePath -> Format -> RawFilePath
  171 addFormatExtension p (formatExtension -> (e:_)) = addExtension p e
  172 addFormatExtension p _ = p
  173 
  174 getFormatByFilename :: RawFilePath -> Maybe Format
  175 getFormatByFilename n = do
  176   ('.',e) <- BSC.uncons $ takeExtension n
  177   getFormatByExtension e
  178 
  179 dropFormatExtension :: Format -> RawFilePath -> RawFilePath
  180 dropFormatExtension fmt n
  181   | (f,BSC.uncons -> Just ('.',e)) <- splitExtension n
  182   , BSC.map toLower e `elem` formatExtension fmt = f
  183   | otherwise = n
  184 
  185 -- | Blessed video format, used as output from transcoding, as well as understood by front end for playback
  186 videoFormat :: Format
  187 videoFormat = getFormat' (Id (-800))
  188 
  189 imageFormat :: Format
  190 imageFormat = getFormat' (Id (-700))
  191 
  192 -- | Blessed audio format, used as output from transcoding, as well as understood by front end for playback
  193 audioFormat :: Format
  194 audioFormat = getFormat' (Id (-600))
  195 
  196 -- | Is this a video format
  197 formatIsVideo :: Format -> Bool
  198 formatIsVideo Format{ formatMimeType = t } = "video/" `BS.isPrefixOf` t
  199 
  200 -- | Is this an image format
  201 formatIsImage :: Format -> Bool
  202 formatIsImage Format{ formatMimeType = t } = "image/" `BS.isPrefixOf` t
  203 
  204 -- | Is this an audio format
  205 formatIsAudio :: Format -> Bool
  206 formatIsAudio Format{ formatMimeType = t } = "audio/" `BS.isPrefixOf` t
  207 
  208 -- | Is this an Audio or Video format
  209 formatIsAV :: Format -> Bool
  210 formatIsAV fmat = formatIsVideo fmat || formatIsAudio fmat
  211 
  212 formatNotAV :: Format -> Bool
  213 formatNotAV fmat = not (formatIsVideo fmat || formatIsAudio fmat)
  214 
  215 -- | If the format can (or should be) transcoded into the blessed internal format,
  216 -- then provide the format it will be transcoded into
  217 formatTranscodable :: Format -> Maybe Format
  218 formatTranscodable f
  219   | formatIsVideo f = Just videoFormat
  220   | formatIsAudio f = Just audioFormat
  221   | otherwise = Nothing
  222 
  223 -- | For formats where we can produce samples, determine the type for sample output
  224 formatSample :: Format -> Maybe Format
  225 formatSample f
  226   | f == videoFormat = Just imageFormat
  227   | otherwise = Nothing
  228 
  229 -- | Convert a Format value into the shape of JSON to be produced.
  230 -- Arbitrarily only show the first extension if multiple extensions are present
  231 formatJSON :: JSON.ToObject o => Format -> JSON.Record (Id Format) o
  232 formatJSON f = JSON.Record (formatId f) $
  233      "mimetype" JSON..= formatMimeType f
  234   <> "extension" `JSON.kvObjectOrEmpty` listToMaybe (formatExtension f)
  235   <> "name" JSON..= formatName f
  236   <> "transcodable" `JSON.kvObjectOrEmpty` (formatId <$> formatTranscodable f)
  237   -- TODO: description