1{-# LANGUAGE PatternGuards #-}
2{-# LANGUAGE OverloadedStrings #-}
3
4{-
5Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
6
7This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program; if not, write to the Free Software
19Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20-}
21
22{- |
23 Module : Text.TeXMath.Readers.OMML
24 Copyright : Copyright (C) 2014 Jesse Rosenthal
25 License : GNU GPL, version 2 or above
26
27 Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
28 Stability : alpha
29 Portability : portable
30
31Types and functions for conversion of OMML into TeXMath 'Exp's.
32-}
33
34module Text.TeXMath.Readers.OMML (readOMML) where
35
36import Text.XML.Light
37import Data.Maybe (isJust, mapMaybe, fromMaybe)
38import Data.List (intercalate)
39import Data.Char (isDigit, readLitChar)
40import qualified Data.Text as T
41import Text.TeXMath.Types
42import Text.TeXMath.Shared (fixTree, getSpaceWidth, getOperator)
43import Text.TeXMath.Unicode.ToTeX (getSymbolType)
44import Control.Applicative ((<$>))
45import Text.TeXMath.Unicode.Fonts (getUnicode, textToFont)
46
47readOMML :: T.Text -> Either T.Text [Exp]
48readOMML s | Just e <- parseXMLDoc s =
49  case elemToOMML e of
50    Just exs -> Right $ map fixTree $ unGroup exs
51    Nothing   -> Left "xml file was not an <m:oMathPara> or <m:oMath> element."
52readOMML _ = Left "Couldn't parse OMML file"
53
54unGroup :: [Exp] -> [Exp]
55unGroup [EGrouped exps] = exps
56unGroup exps = exps
57
58elemToOMML :: Element -> Maybe [Exp]
59elemToOMML element  | isElem "m" "oMathPara" element = do
60  let expList = mapMaybe elemToOMML (elChildren element)
61  return $ map (\l -> if length l == 1 then (head l) else EGrouped l) expList
62elemToOMML element  | isElem "m" "oMath" element =
63  Just $ concat $ mapMaybe elemToExps $ unwrapWTags $ elChildren element
64elemToOMML _ = Nothing
65
66-- oMath can contain w:hyperlink, w:sdt, etc. I can't find a complete
67-- documentation of this, so we just unwrap any w:tag immediately
68-- beneath oMath. Note that this shouldn't affect the "w" tags in
69-- elemToOMathRunElem(s) because, those are underneath an "m:r" tag.
70unwrapWTags :: [Element] -> [Element]
71unwrapWTags elements = concatMap unwrapChild elements
72  where unwrapChild element = case qPrefix $ elName element of
73                                Just "w" -> elChildren element
74                                _        -> [element]
75
76-- Kept as String because of Text.XML.Light
77isElem :: String -> String -> Element -> Bool
78isElem prefix name element =
79  let qp = fromMaybe "" (qPrefix (elName element))
80  in
81   qName (elName element) == name &&
82   qp == prefix
83
84-- Kept as String because of Text.XML.Light
85hasElemName :: String -> String -> QName -> Bool
86hasElemName prefix name qn =
87  let qp = fromMaybe "" (qPrefix qn)
88  in
89   qName qn == name &&
90   qp       == prefix
91
92data OMathRunElem = TextRun T.Text
93                  | LnBrk
94                  | Tab
95                    deriving Show
96
97data OMathRunTextStyle = NoStyle
98                       | Normal
99                       | Styled { oMathScript :: Maybe OMathTextScript
100                                , oMathStyle  :: Maybe OMathTextStyle }
101                       deriving Show
102
103data OMathTextScript = ORoman
104                     | OScript
105                     | OFraktur
106                     | ODoubleStruck
107                     | OSansSerif
108                     | OMonospace
109                     deriving (Show, Eq)
110
111data OMathTextStyle = OPlain
112                    | OBold
113                    | OItalic
114                    | OBoldItalic
115                    deriving (Show, Eq)
116
117elemToBase :: Element -> Maybe Exp
118elemToBase element | isElem "m" "e" element = do
119  bs <- elemToBases element
120  return $ case bs of
121    (e : []) -> e
122    exps     -> EGrouped exps
123elemToBase _ = Nothing
124
125elemToBases :: Element -> Maybe [Exp]
126elemToBases element | isElem "m" "e" element =
127  return $ concat $ mapMaybe elemToExps' (elChildren element)
128elemToBases _ = Nothing
129
130
131-- TODO: The right way to do this is to use the ampersand to break the
132-- text lines into multiple columns. That's tricky, though, and this
133-- will get us most of the way for the time being.
134filterAmpersand :: Exp -> Exp
135filterAmpersand (EIdentifier s)   = EIdentifier (T.filter ('&' /=) s)
136filterAmpersand (EText tt s)      = EText tt (T.filter ('&' /=) s)
137filterAmpersand (EStyled tt exps) = EStyled tt (map filterAmpersand exps)
138filterAmpersand (EGrouped exps)   = EGrouped (map filterAmpersand exps)
139filterAmpersand e                    = e
140
141elemToOMathRunTextStyle :: Element -> OMathRunTextStyle
142elemToOMathRunTextStyle element
143  | Just mrPr <- filterChildName (hasElemName"m" "rPr") element
144  , Just _    <- filterChildName (hasElemName"m" "nor") mrPr =
145    Normal
146  | Just mrPr <- filterChildName (hasElemName"m" "rPr") element =
147    let scr =
148          case
149            filterChildName (hasElemName"m" "scr") mrPr >>=
150            findAttrBy (hasElemName"m" "val")
151          of
152            Just "roman"         -> Just ORoman
153            Just "script"        -> Just OScript
154            Just "fraktur"       -> Just OFraktur
155            Just "double-struck" -> Just ODoubleStruck
156            Just "sans-serif"    -> Just OSansSerif
157            Just "monospace"     -> Just OMonospace
158            _                    -> Nothing
159
160        sty =
161          case
162            filterChildName (hasElemName"m" "sty") mrPr >>=
163            findAttrBy (hasElemName"m" "val")
164          of
165            Just "p"             -> Just OPlain
166            Just "b"             -> Just OBold
167            Just "i"             -> Just OItalic
168            Just "bi"            -> Just OBoldItalic
169            _                    -> Nothing
170    in
171     Styled { oMathScript = scr, oMathStyle = sty }
172  | otherwise = NoStyle
173
174elemToOMathRunElem :: Element -> Maybe OMathRunElem
175elemToOMathRunElem element
176  | isElem "w" "t" element
177    || isElem "m" "t" element
178    || isElem "w" "delText" element = Just $ TextRun $ T.pack $ strContent element
179  | isElem "w" "br" element = Just LnBrk
180  | isElem "w" "tab" element = Just Tab
181  | isElem "w" "sym" element = Just $ TextRun $ getSymChar element
182  | otherwise = Nothing
183
184elemToOMathRunElems :: Element -> Maybe [OMathRunElem]
185elemToOMathRunElems element
186  | isElem "w" "r" element
187    || isElem "m" "r" element =
188      Just $ mapMaybe (elemToOMathRunElem) (elChildren element)
189elemToOMathRunElems _ = Nothing
190
191----- And now the TeXMath Creation
192
193oMathRunElemToText :: OMathRunElem -> T.Text
194oMathRunElemToText (TextRun s) = s
195oMathRunElemToText (LnBrk) = "\n"
196oMathRunElemToText (Tab) = "\t"
197
198oMathRunElemsToText :: [OMathRunElem] -> T.Text
199oMathRunElemsToText = T.concat . map oMathRunElemToText
200
201oMathRunTextStyleToTextType :: OMathRunTextStyle -> Maybe TextType
202oMathRunTextStyleToTextType (Normal) = Just $ TextNormal
203oMathRunTextStyleToTextType (NoStyle) = Nothing
204oMathRunTextStyleToTextType (Styled scr sty)
205  | Just OBold <- sty
206  , Just OSansSerif <- scr =
207    Just $ TextSansSerifBold
208  | Just OBoldItalic <- sty
209  , Just OSansSerif <- scr =
210    Just $ TextSansSerifBoldItalic
211  | Just OBold <- sty
212  , Just OScript <- scr =
213    Just $ TextBoldScript
214  | Just OBold <- sty
215  , Just OFraktur <- scr =
216    Just $ TextBoldFraktur
217  | Just OItalic <- sty
218  , Just OSansSerif <- scr =
219    Just $ TextSansSerifItalic
220  | Just OBold <- sty =
221    Just $ TextBold
222  | Just OItalic <- sty =
223    Just $ TextItalic
224  | Just OMonospace <- scr =
225    Just $ TextMonospace
226  | Just OSansSerif <- scr =
227    Just $ TextSansSerif
228  | Just ODoubleStruck <- scr =
229    Just $ TextDoubleStruck
230  | Just OScript <- scr =
231    Just $ TextScript
232  | Just OFraktur <- scr =
233    Just $ TextFraktur
234  | Just OBoldItalic <- sty =
235    Just $ TextBoldItalic
236  | otherwise = Nothing
237
238elemToExps :: Element -> Maybe [Exp]
239elemToExps element = unGroup <$> elemToExps' element
240
241elemToExps' :: Element -> Maybe [Exp]
242elemToExps' element | isElem "m" "acc" element = do
243  let chr = filterChildName (hasElemName "m" "accPr") element >>=
244            filterChildName (hasElemName "m" "chr") >>=
245            findAttrBy (hasElemName "m" "val") >>=
246            Just . head
247      chr' = case chr of
248        Just c -> T.singleton c
249        Nothing -> "\x302"       -- default to wide hat.
250  baseExp <- filterChildName (hasElemName "m" "e") element >>=
251             elemToBase
252  return $ [EOver False baseExp (ESymbol Accent chr')]
253elemToExps' element | isElem "m" "bar" element = do
254  pos <- filterChildName (hasElemName "m" "barPr") element >>=
255            filterChildName (hasElemName "m" "pos") >>=
256            findAttrBy (hasElemName "m" "val")
257  baseExp <- filterChildName (hasElemName "m" "e") element >>=
258             elemToBase
259  case pos of
260    "top" -> Just [EOver False baseExp (ESymbol TOver "\773")]
261    "bot" -> Just [EUnder False baseExp (ESymbol TUnder "\818")]
262    _     -> Nothing
263elemToExps' element | isElem "m" "box" element = do
264  baseExp <- filterChildName (hasElemName "m" "e") element >>=
265             elemToBase
266  return [baseExp]
267elemToExps' element | isElem "m" "borderBox" element = do
268  baseExp <- filterChildName (hasElemName "m" "e") element >>=
269             elemToBase
270  return [EBoxed baseExp]
271elemToExps' element | isElem "m" "d" element =
272  let baseExps  = mapMaybe
273                  elemToBases
274                  (elChildren element)
275      inDelimExps = map (map Right) baseExps
276      dPr = filterChildName (hasElemName "m" "dPr") element
277      begChr = dPr >>=
278               filterChildName (hasElemName "m" "begChr") >>=
279               findAttrBy (hasElemName "m" "val") >>=
280               (\c -> if null c then (Just ' ') else (Just $ head c))
281      sepChr = dPr >>=
282               filterChildName (hasElemName "m" "sepChr") >>=
283               findAttrBy (hasElemName "m" "val") >>=
284               (\c -> if null c then (Just ' ') else (Just $ head c))
285      endChr = dPr >>=
286               filterChildName (hasElemName "m" "endChr") >>=
287               findAttrBy (hasElemName "m" "val") >>=
288               (\c -> if null c then (Just ' ') else (Just $ head c))
289      beg = maybe "(" T.singleton begChr
290      end = maybe ")" T.singleton endChr
291      sep = maybe "|" T.singleton sepChr
292      exps = intercalate [Left sep] inDelimExps
293  in
294   Just [EDelimited beg end exps]
295elemToExps' element | isElem "m" "eqArr" element =
296  let expLst = mapMaybe elemToBases (elChildren element)
297      expLst' = map (\es -> [map filterAmpersand es]) expLst
298  in
299   return [EArray [] expLst']
300elemToExps' element | isElem "m" "f" element = do
301  num <- filterChildName (hasElemName "m" "num") element
302  den <- filterChildName (hasElemName "m" "den") element
303  let numExp = EGrouped $ concat $ mapMaybe (elemToExps) (elChildren num)
304      denExp = EGrouped $ concat $ mapMaybe (elemToExps) (elChildren den)
305  return $ [EFraction NormalFrac numExp denExp]
306elemToExps' element | isElem "m" "func" element = do
307  fName <- filterChildName (hasElemName "m" "fName") element
308  baseExp <- filterChildName (hasElemName "m" "e") element >>=
309          elemToBase
310  let fnameExp = case mconcat $ mapMaybe (elemToExps') (elChildren fName) of
311                   [x] -> x
312                   xs  -> EGrouped xs
313  return [fnameExp, baseExp]
314elemToExps' element | isElem "m" "groupChr" element = do
315  let gPr = filterChildName (hasElemName "m" "groupChrPr") element
316      chr = gPr >>=
317            filterChildName (hasElemName "m" "chr") >>=
318            findAttrBy (hasElemName "m" "val")
319      pos = gPr >>=
320            filterChildName (hasElemName "m" "pos") >>=
321            findAttrBy (hasElemName "m" "val")
322      justif = gPr >>=
323               filterChildName (hasElemName "m" "vertJC") >>=
324               findAttrBy (hasElemName "m" "val")
325  baseExp <- filterChildName (hasElemName "m" "e") element >>=
326             elemToBase
327  case pos of
328    Just "top" ->
329      let chr' = case chr of
330            Just (c:_) -> T.singleton c
331            _           -> "\65079"   -- default to overbrace
332      in
333       return $
334         case justif of
335           Just "top" -> [EUnder False (ESymbol TOver chr') baseExp]
336           _ -> [EOver False baseExp (ESymbol TOver chr')]
337    _ -> -- bot is default
338      let chr' = case chr of
339            Just (c:_) -> T.singleton c
340            _           -> "\65080"   -- default to underbrace
341      in
342       return $
343         case justif of
344           Just "top" -> [EUnder False baseExp (ESymbol TUnder chr')]
345           _ -> [EOver False (ESymbol TUnder chr') baseExp]
346elemToExps' element | isElem "m" "limLow" element = do
347  baseExp <- filterChildName (hasElemName "m" "e") element
348          >>= elemToBase
349  limExp <- filterChildName (hasElemName "m" "lim") element
350            >>= (\e -> Just $ concat $ mapMaybe (elemToExps) (elChildren e))
351            >>= (return . EGrouped)
352  return [EUnder True baseExp limExp]
353elemToExps' element | isElem "m" "limUpp" element = do
354  baseExp <- filterChildName (hasElemName "m" "e") element
355          >>= elemToBase
356  limExp <- filterChildName (hasElemName "m" "lim") element
357            >>= (\e -> Just $ concat $ mapMaybe (elemToExps) (elChildren e))
358            >>= (return . EGrouped)
359  return [EOver True baseExp limExp]
360elemToExps' element | isElem "m" "m" element =
361  let rows = filterChildrenName (hasElemName "m" "mr") element
362      rowExps = map
363                (\mr -> mapMaybe
364                        elemToBases
365                        (elChildren mr))
366                rows
367  in
368   return [EArray [AlignCenter] rowExps]
369elemToExps' element | isElem "m" "nary" element = do
370  let naryPr = filterChildName (hasElemName "m" "naryPr") element
371      naryChr = naryPr >>=
372                filterChildName (hasElemName "m" "chr") >>=
373                findAttrBy (hasElemName "m" "val")
374      opChr = case naryChr of
375        Just (c:_) -> T.singleton c
376        _          -> "\8747"   -- default to integral
377      limLoc = naryPr >>=
378               filterChildName (hasElemName "m" "limLoc") >>=
379               findAttrBy (hasElemName "m" "val")
380  subExps <- filterChildName (hasElemName "m" "sub") element >>=
381         (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
382  supExps <- filterChildName (hasElemName "m" "sup") element >>=
383         (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
384  baseExp <- filterChildName (hasElemName "m" "e") element >>=
385             elemToBase
386  case limLoc of
387    Just "undOvr" -> return [EUnderover True
388                              (ESymbol Op opChr)
389                              (EGrouped subExps)
390                              (EGrouped supExps)
391                            , baseExp]
392    _             -> return [ESubsup
393                              (ESymbol Op opChr)
394                              (EGrouped subExps)
395                              (EGrouped supExps)
396                            , baseExp]
397
398elemToExps' element | isElem "m" "phant" element = do
399  baseExp <- filterChildName (hasElemName "m" "e") element >>=
400             elemToBase
401  return [EPhantom baseExp]
402elemToExps' element | isElem "m" "rad" element = do
403  degExps <- filterChildName (hasElemName "m" "deg") element >>=
404              (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
405  baseExp <- filterChildName (hasElemName "m" "e") element >>=
406             elemToBase
407  return $ case degExps of
408    [] -> [ESqrt baseExp]
409    ds -> [ERoot (EGrouped ds) baseExp]
410elemToExps' element | isElem "m" "sPre" element = do
411  subExps <- filterChildName (hasElemName "m" "sub") element >>=
412            (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
413  supExps <- filterChildName (hasElemName "m" "sup") element >>=
414            (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
415  baseExp <- filterChildName (hasElemName "m" "e") element >>=
416             elemToBase
417  return [ESubsup
418          (EIdentifier "")
419          (EGrouped subExps)
420          (EGrouped supExps)
421         , baseExp]
422elemToExps' element | isElem "m" "sSub" element = do
423  baseExp <- filterChildName (hasElemName "m" "e") element >>=
424             elemToBase
425  subExps <- filterChildName (hasElemName "m" "sub") element >>=
426            (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
427  return [ESub baseExp (EGrouped subExps)]
428elemToExps' element | isElem "m" "sSubSup" element = do
429  baseExp <- filterChildName (hasElemName "m" "e") element >>=
430             elemToBase
431  subExps <- filterChildName (hasElemName "m" "sub") element >>=
432             (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
433  supExps <- filterChildName (hasElemName "m" "sup") element >>=
434             (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
435  return [ESubsup baseExp (EGrouped subExps) (EGrouped supExps)]
436elemToExps' element | isElem "m" "sSup" element = do
437  baseExp <- filterChildName (hasElemName "m" "e") element >>=
438             elemToBase
439  supExps <- filterChildName (hasElemName "m" "sup") element >>=
440            (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e))
441  return [ESuper baseExp (EGrouped supExps)]
442elemToExps' element | isElem "m" "r" element = do
443  let mrPr = filterChildName (hasElemName "m" "rPr") element
444      lit = mrPr >>= filterChildName (hasElemName "m" "lit")
445      nor = mrPr >>= filterChildName (hasElemName "m" "nor")
446      txtSty = oMathRunTextStyleToTextType $ elemToOMathRunTextStyle element
447  mrElems <- elemToOMathRunElems element
448  return $
449    if null lit && null nor
450       then case txtSty of
451              Nothing ->
452                interpretText $ oMathRunElemsToText mrElems
453              Just textSty ->
454                [EStyled textSty $ interpretText $ oMathRunElemsToText mrElems]
455       else [EText (fromMaybe TextNormal txtSty) $ oMathRunElemsToText mrElems]
456elemToExps' _ = Nothing
457
458interpretChar :: Char -> Exp
459interpretChar c | isDigit c = ENumber $ T.singleton c
460interpretChar c = case getSymbolType c of
461  Alpha           -> EIdentifier c'
462  Ord | isDigit c -> ENumber c'
463      | otherwise -> case getSpaceWidth c of
464                           Just x  -> ESpace x
465                           Nothing -> ESymbol Ord c'
466  symType         -> ESymbol symType c'
467  where
468    c' = T.singleton c
469
470interpretText :: T.Text -> [Exp]
471interpretText s
472  | Just (c, xs) <- T.uncons s
473  , T.null xs = [interpretChar c]
474  | T.all isDigit s         = [ENumber s]
475  | isJust (getOperator (EMathOperator s))
476                          = [EMathOperator s]
477  | otherwise             =
478      case map interpretChar (T.unpack s) of
479            xs | all isIdentifierOrSpace xs -> [EText TextNormal s]
480               | otherwise                  -> xs
481  where isIdentifierOrSpace (EIdentifier _) = True
482        isIdentifierOrSpace (ESpace _)      = True
483        isIdentifierOrSpace _               = False
484
485-- The char attribute is a hex string
486getSymChar :: Element -> T.Text
487getSymChar element
488  | Just s <- lowerFromPrivate <$> getCodepoint
489  , Just font <- getFont =
490  let [(char, _)] = readLitChar ("\\x" ++ s) in
491    maybe "" T.singleton $ getUnicode font char
492  where
493    getCodepoint = findAttrBy (hasElemName "w" "char") element
494    getFont = (textToFont . T.pack) =<< findAttrBy (hasElemName "w" "font") element
495    lowerFromPrivate ('F':xs) = '0':xs
496    lowerFromPrivate xs = xs
497getSymChar _ = ""
498