1{-# LANGUAGE PatternGuards #-} 2{-# LANGUAGE OverloadedStrings #-} 3 4{- 5Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> 6 7This program is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2 of the License, or 10(at your option) any later version. 11 12This program is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with this program; if not, write to the Free Software 19Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20-} 21 22{- | 23 Module : Text.TeXMath.Readers.OMML 24 Copyright : Copyright (C) 2014 Jesse Rosenthal 25 License : GNU GPL, version 2 or above 26 27 Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> 28 Stability : alpha 29 Portability : portable 30 31Types and functions for conversion of OMML into TeXMath 'Exp's. 32-} 33 34module Text.TeXMath.Readers.OMML (readOMML) where 35 36import Text.XML.Light 37import Data.Maybe (isJust, mapMaybe, fromMaybe) 38import Data.List (intercalate) 39import Data.Char (isDigit, readLitChar) 40import qualified Data.Text as T 41import Text.TeXMath.Types 42import Text.TeXMath.Shared (fixTree, getSpaceWidth, getOperator) 43import Text.TeXMath.Unicode.ToTeX (getSymbolType) 44import Control.Applicative ((<$>)) 45import Text.TeXMath.Unicode.Fonts (getUnicode, textToFont) 46 47readOMML :: T.Text -> Either T.Text [Exp] 48readOMML s | Just e <- parseXMLDoc s = 49 case elemToOMML e of 50 Just exs -> Right $ map fixTree $ unGroup exs 51 Nothing -> Left "xml file was not an <m:oMathPara> or <m:oMath> element." 52readOMML _ = Left "Couldn't parse OMML file" 53 54unGroup :: [Exp] -> [Exp] 55unGroup [EGrouped exps] = exps 56unGroup exps = exps 57 58elemToOMML :: Element -> Maybe [Exp] 59elemToOMML element | isElem "m" "oMathPara" element = do 60 let expList = mapMaybe elemToOMML (elChildren element) 61 return $ map (\l -> if length l == 1 then (head l) else EGrouped l) expList 62elemToOMML element | isElem "m" "oMath" element = 63 Just $ concat $ mapMaybe elemToExps $ unwrapWTags $ elChildren element 64elemToOMML _ = Nothing 65 66-- oMath can contain w:hyperlink, w:sdt, etc. I can't find a complete 67-- documentation of this, so we just unwrap any w:tag immediately 68-- beneath oMath. Note that this shouldn't affect the "w" tags in 69-- elemToOMathRunElem(s) because, those are underneath an "m:r" tag. 70unwrapWTags :: [Element] -> [Element] 71unwrapWTags elements = concatMap unwrapChild elements 72 where unwrapChild element = case qPrefix $ elName element of 73 Just "w" -> elChildren element 74 _ -> [element] 75 76-- Kept as String because of Text.XML.Light 77isElem :: String -> String -> Element -> Bool 78isElem prefix name element = 79 let qp = fromMaybe "" (qPrefix (elName element)) 80 in 81 qName (elName element) == name && 82 qp == prefix 83 84-- Kept as String because of Text.XML.Light 85hasElemName :: String -> String -> QName -> Bool 86hasElemName prefix name qn = 87 let qp = fromMaybe "" (qPrefix qn) 88 in 89 qName qn == name && 90 qp == prefix 91 92data OMathRunElem = TextRun T.Text 93 | LnBrk 94 | Tab 95 deriving Show 96 97data OMathRunTextStyle = NoStyle 98 | Normal 99 | Styled { oMathScript :: Maybe OMathTextScript 100 , oMathStyle :: Maybe OMathTextStyle } 101 deriving Show 102 103data OMathTextScript = ORoman 104 | OScript 105 | OFraktur 106 | ODoubleStruck 107 | OSansSerif 108 | OMonospace 109 deriving (Show, Eq) 110 111data OMathTextStyle = OPlain 112 | OBold 113 | OItalic 114 | OBoldItalic 115 deriving (Show, Eq) 116 117elemToBase :: Element -> Maybe Exp 118elemToBase element | isElem "m" "e" element = do 119 bs <- elemToBases element 120 return $ case bs of 121 (e : []) -> e 122 exps -> EGrouped exps 123elemToBase _ = Nothing 124 125elemToBases :: Element -> Maybe [Exp] 126elemToBases element | isElem "m" "e" element = 127 return $ concat $ mapMaybe elemToExps' (elChildren element) 128elemToBases _ = Nothing 129 130 131-- TODO: The right way to do this is to use the ampersand to break the 132-- text lines into multiple columns. That's tricky, though, and this 133-- will get us most of the way for the time being. 134filterAmpersand :: Exp -> Exp 135filterAmpersand (EIdentifier s) = EIdentifier (T.filter ('&' /=) s) 136filterAmpersand (EText tt s) = EText tt (T.filter ('&' /=) s) 137filterAmpersand (EStyled tt exps) = EStyled tt (map filterAmpersand exps) 138filterAmpersand (EGrouped exps) = EGrouped (map filterAmpersand exps) 139filterAmpersand e = e 140 141elemToOMathRunTextStyle :: Element -> OMathRunTextStyle 142elemToOMathRunTextStyle element 143 | Just mrPr <- filterChildName (hasElemName"m" "rPr") element 144 , Just _ <- filterChildName (hasElemName"m" "nor") mrPr = 145 Normal 146 | Just mrPr <- filterChildName (hasElemName"m" "rPr") element = 147 let scr = 148 case 149 filterChildName (hasElemName"m" "scr") mrPr >>= 150 findAttrBy (hasElemName"m" "val") 151 of 152 Just "roman" -> Just ORoman 153 Just "script" -> Just OScript 154 Just "fraktur" -> Just OFraktur 155 Just "double-struck" -> Just ODoubleStruck 156 Just "sans-serif" -> Just OSansSerif 157 Just "monospace" -> Just OMonospace 158 _ -> Nothing 159 160 sty = 161 case 162 filterChildName (hasElemName"m" "sty") mrPr >>= 163 findAttrBy (hasElemName"m" "val") 164 of 165 Just "p" -> Just OPlain 166 Just "b" -> Just OBold 167 Just "i" -> Just OItalic 168 Just "bi" -> Just OBoldItalic 169 _ -> Nothing 170 in 171 Styled { oMathScript = scr, oMathStyle = sty } 172 | otherwise = NoStyle 173 174elemToOMathRunElem :: Element -> Maybe OMathRunElem 175elemToOMathRunElem element 176 | isElem "w" "t" element 177 || isElem "m" "t" element 178 || isElem "w" "delText" element = Just $ TextRun $ T.pack $ strContent element 179 | isElem "w" "br" element = Just LnBrk 180 | isElem "w" "tab" element = Just Tab 181 | isElem "w" "sym" element = Just $ TextRun $ getSymChar element 182 | otherwise = Nothing 183 184elemToOMathRunElems :: Element -> Maybe [OMathRunElem] 185elemToOMathRunElems element 186 | isElem "w" "r" element 187 || isElem "m" "r" element = 188 Just $ mapMaybe (elemToOMathRunElem) (elChildren element) 189elemToOMathRunElems _ = Nothing 190 191----- And now the TeXMath Creation 192 193oMathRunElemToText :: OMathRunElem -> T.Text 194oMathRunElemToText (TextRun s) = s 195oMathRunElemToText (LnBrk) = "\n" 196oMathRunElemToText (Tab) = "\t" 197 198oMathRunElemsToText :: [OMathRunElem] -> T.Text 199oMathRunElemsToText = T.concat . map oMathRunElemToText 200 201oMathRunTextStyleToTextType :: OMathRunTextStyle -> Maybe TextType 202oMathRunTextStyleToTextType (Normal) = Just $ TextNormal 203oMathRunTextStyleToTextType (NoStyle) = Nothing 204oMathRunTextStyleToTextType (Styled scr sty) 205 | Just OBold <- sty 206 , Just OSansSerif <- scr = 207 Just $ TextSansSerifBold 208 | Just OBoldItalic <- sty 209 , Just OSansSerif <- scr = 210 Just $ TextSansSerifBoldItalic 211 | Just OBold <- sty 212 , Just OScript <- scr = 213 Just $ TextBoldScript 214 | Just OBold <- sty 215 , Just OFraktur <- scr = 216 Just $ TextBoldFraktur 217 | Just OItalic <- sty 218 , Just OSansSerif <- scr = 219 Just $ TextSansSerifItalic 220 | Just OBold <- sty = 221 Just $ TextBold 222 | Just OItalic <- sty = 223 Just $ TextItalic 224 | Just OMonospace <- scr = 225 Just $ TextMonospace 226 | Just OSansSerif <- scr = 227 Just $ TextSansSerif 228 | Just ODoubleStruck <- scr = 229 Just $ TextDoubleStruck 230 | Just OScript <- scr = 231 Just $ TextScript 232 | Just OFraktur <- scr = 233 Just $ TextFraktur 234 | Just OBoldItalic <- sty = 235 Just $ TextBoldItalic 236 | otherwise = Nothing 237 238elemToExps :: Element -> Maybe [Exp] 239elemToExps element = unGroup <$> elemToExps' element 240 241elemToExps' :: Element -> Maybe [Exp] 242elemToExps' element | isElem "m" "acc" element = do 243 let chr = filterChildName (hasElemName "m" "accPr") element >>= 244 filterChildName (hasElemName "m" "chr") >>= 245 findAttrBy (hasElemName "m" "val") >>= 246 Just . head 247 chr' = case chr of 248 Just c -> T.singleton c 249 Nothing -> "\x302" -- default to wide hat. 250 baseExp <- filterChildName (hasElemName "m" "e") element >>= 251 elemToBase 252 return $ [EOver False baseExp (ESymbol Accent chr')] 253elemToExps' element | isElem "m" "bar" element = do 254 pos <- filterChildName (hasElemName "m" "barPr") element >>= 255 filterChildName (hasElemName "m" "pos") >>= 256 findAttrBy (hasElemName "m" "val") 257 baseExp <- filterChildName (hasElemName "m" "e") element >>= 258 elemToBase 259 case pos of 260 "top" -> Just [EOver False baseExp (ESymbol TOver "\773")] 261 "bot" -> Just [EUnder False baseExp (ESymbol TUnder "\818")] 262 _ -> Nothing 263elemToExps' element | isElem "m" "box" element = do 264 baseExp <- filterChildName (hasElemName "m" "e") element >>= 265 elemToBase 266 return [baseExp] 267elemToExps' element | isElem "m" "borderBox" element = do 268 baseExp <- filterChildName (hasElemName "m" "e") element >>= 269 elemToBase 270 return [EBoxed baseExp] 271elemToExps' element | isElem "m" "d" element = 272 let baseExps = mapMaybe 273 elemToBases 274 (elChildren element) 275 inDelimExps = map (map Right) baseExps 276 dPr = filterChildName (hasElemName "m" "dPr") element 277 begChr = dPr >>= 278 filterChildName (hasElemName "m" "begChr") >>= 279 findAttrBy (hasElemName "m" "val") >>= 280 (\c -> if null c then (Just ' ') else (Just $ head c)) 281 sepChr = dPr >>= 282 filterChildName (hasElemName "m" "sepChr") >>= 283 findAttrBy (hasElemName "m" "val") >>= 284 (\c -> if null c then (Just ' ') else (Just $ head c)) 285 endChr = dPr >>= 286 filterChildName (hasElemName "m" "endChr") >>= 287 findAttrBy (hasElemName "m" "val") >>= 288 (\c -> if null c then (Just ' ') else (Just $ head c)) 289 beg = maybe "(" T.singleton begChr 290 end = maybe ")" T.singleton endChr 291 sep = maybe "|" T.singleton sepChr 292 exps = intercalate [Left sep] inDelimExps 293 in 294 Just [EDelimited beg end exps] 295elemToExps' element | isElem "m" "eqArr" element = 296 let expLst = mapMaybe elemToBases (elChildren element) 297 expLst' = map (\es -> [map filterAmpersand es]) expLst 298 in 299 return [EArray [] expLst'] 300elemToExps' element | isElem "m" "f" element = do 301 num <- filterChildName (hasElemName "m" "num") element 302 den <- filterChildName (hasElemName "m" "den") element 303 let numExp = EGrouped $ concat $ mapMaybe (elemToExps) (elChildren num) 304 denExp = EGrouped $ concat $ mapMaybe (elemToExps) (elChildren den) 305 return $ [EFraction NormalFrac numExp denExp] 306elemToExps' element | isElem "m" "func" element = do 307 fName <- filterChildName (hasElemName "m" "fName") element 308 baseExp <- filterChildName (hasElemName "m" "e") element >>= 309 elemToBase 310 let fnameExp = case mconcat $ mapMaybe (elemToExps') (elChildren fName) of 311 [x] -> x 312 xs -> EGrouped xs 313 return [fnameExp, baseExp] 314elemToExps' element | isElem "m" "groupChr" element = do 315 let gPr = filterChildName (hasElemName "m" "groupChrPr") element 316 chr = gPr >>= 317 filterChildName (hasElemName "m" "chr") >>= 318 findAttrBy (hasElemName "m" "val") 319 pos = gPr >>= 320 filterChildName (hasElemName "m" "pos") >>= 321 findAttrBy (hasElemName "m" "val") 322 justif = gPr >>= 323 filterChildName (hasElemName "m" "vertJC") >>= 324 findAttrBy (hasElemName "m" "val") 325 baseExp <- filterChildName (hasElemName "m" "e") element >>= 326 elemToBase 327 case pos of 328 Just "top" -> 329 let chr' = case chr of 330 Just (c:_) -> T.singleton c 331 _ -> "\65079" -- default to overbrace 332 in 333 return $ 334 case justif of 335 Just "top" -> [EUnder False (ESymbol TOver chr') baseExp] 336 _ -> [EOver False baseExp (ESymbol TOver chr')] 337 _ -> -- bot is default 338 let chr' = case chr of 339 Just (c:_) -> T.singleton c 340 _ -> "\65080" -- default to underbrace 341 in 342 return $ 343 case justif of 344 Just "top" -> [EUnder False baseExp (ESymbol TUnder chr')] 345 _ -> [EOver False (ESymbol TUnder chr') baseExp] 346elemToExps' element | isElem "m" "limLow" element = do 347 baseExp <- filterChildName (hasElemName "m" "e") element 348 >>= elemToBase 349 limExp <- filterChildName (hasElemName "m" "lim") element 350 >>= (\e -> Just $ concat $ mapMaybe (elemToExps) (elChildren e)) 351 >>= (return . EGrouped) 352 return [EUnder True baseExp limExp] 353elemToExps' element | isElem "m" "limUpp" element = do 354 baseExp <- filterChildName (hasElemName "m" "e") element 355 >>= elemToBase 356 limExp <- filterChildName (hasElemName "m" "lim") element 357 >>= (\e -> Just $ concat $ mapMaybe (elemToExps) (elChildren e)) 358 >>= (return . EGrouped) 359 return [EOver True baseExp limExp] 360elemToExps' element | isElem "m" "m" element = 361 let rows = filterChildrenName (hasElemName "m" "mr") element 362 rowExps = map 363 (\mr -> mapMaybe 364 elemToBases 365 (elChildren mr)) 366 rows 367 in 368 return [EArray [AlignCenter] rowExps] 369elemToExps' element | isElem "m" "nary" element = do 370 let naryPr = filterChildName (hasElemName "m" "naryPr") element 371 naryChr = naryPr >>= 372 filterChildName (hasElemName "m" "chr") >>= 373 findAttrBy (hasElemName "m" "val") 374 opChr = case naryChr of 375 Just (c:_) -> T.singleton c 376 _ -> "\8747" -- default to integral 377 limLoc = naryPr >>= 378 filterChildName (hasElemName "m" "limLoc") >>= 379 findAttrBy (hasElemName "m" "val") 380 subExps <- filterChildName (hasElemName "m" "sub") element >>= 381 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 382 supExps <- filterChildName (hasElemName "m" "sup") element >>= 383 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 384 baseExp <- filterChildName (hasElemName "m" "e") element >>= 385 elemToBase 386 case limLoc of 387 Just "undOvr" -> return [EUnderover True 388 (ESymbol Op opChr) 389 (EGrouped subExps) 390 (EGrouped supExps) 391 , baseExp] 392 _ -> return [ESubsup 393 (ESymbol Op opChr) 394 (EGrouped subExps) 395 (EGrouped supExps) 396 , baseExp] 397 398elemToExps' element | isElem "m" "phant" element = do 399 baseExp <- filterChildName (hasElemName "m" "e") element >>= 400 elemToBase 401 return [EPhantom baseExp] 402elemToExps' element | isElem "m" "rad" element = do 403 degExps <- filterChildName (hasElemName "m" "deg") element >>= 404 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 405 baseExp <- filterChildName (hasElemName "m" "e") element >>= 406 elemToBase 407 return $ case degExps of 408 [] -> [ESqrt baseExp] 409 ds -> [ERoot (EGrouped ds) baseExp] 410elemToExps' element | isElem "m" "sPre" element = do 411 subExps <- filterChildName (hasElemName "m" "sub") element >>= 412 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 413 supExps <- filterChildName (hasElemName "m" "sup") element >>= 414 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 415 baseExp <- filterChildName (hasElemName "m" "e") element >>= 416 elemToBase 417 return [ESubsup 418 (EIdentifier "") 419 (EGrouped subExps) 420 (EGrouped supExps) 421 , baseExp] 422elemToExps' element | isElem "m" "sSub" element = do 423 baseExp <- filterChildName (hasElemName "m" "e") element >>= 424 elemToBase 425 subExps <- filterChildName (hasElemName "m" "sub") element >>= 426 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 427 return [ESub baseExp (EGrouped subExps)] 428elemToExps' element | isElem "m" "sSubSup" element = do 429 baseExp <- filterChildName (hasElemName "m" "e") element >>= 430 elemToBase 431 subExps <- filterChildName (hasElemName "m" "sub") element >>= 432 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 433 supExps <- filterChildName (hasElemName "m" "sup") element >>= 434 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 435 return [ESubsup baseExp (EGrouped subExps) (EGrouped supExps)] 436elemToExps' element | isElem "m" "sSup" element = do 437 baseExp <- filterChildName (hasElemName "m" "e") element >>= 438 elemToBase 439 supExps <- filterChildName (hasElemName "m" "sup") element >>= 440 (\e -> return $ concat $ mapMaybe (elemToExps) (elChildren e)) 441 return [ESuper baseExp (EGrouped supExps)] 442elemToExps' element | isElem "m" "r" element = do 443 let mrPr = filterChildName (hasElemName "m" "rPr") element 444 lit = mrPr >>= filterChildName (hasElemName "m" "lit") 445 nor = mrPr >>= filterChildName (hasElemName "m" "nor") 446 txtSty = oMathRunTextStyleToTextType $ elemToOMathRunTextStyle element 447 mrElems <- elemToOMathRunElems element 448 return $ 449 if null lit && null nor 450 then case txtSty of 451 Nothing -> 452 interpretText $ oMathRunElemsToText mrElems 453 Just textSty -> 454 [EStyled textSty $ interpretText $ oMathRunElemsToText mrElems] 455 else [EText (fromMaybe TextNormal txtSty) $ oMathRunElemsToText mrElems] 456elemToExps' _ = Nothing 457 458interpretChar :: Char -> Exp 459interpretChar c | isDigit c = ENumber $ T.singleton c 460interpretChar c = case getSymbolType c of 461 Alpha -> EIdentifier c' 462 Ord | isDigit c -> ENumber c' 463 | otherwise -> case getSpaceWidth c of 464 Just x -> ESpace x 465 Nothing -> ESymbol Ord c' 466 symType -> ESymbol symType c' 467 where 468 c' = T.singleton c 469 470interpretText :: T.Text -> [Exp] 471interpretText s 472 | Just (c, xs) <- T.uncons s 473 , T.null xs = [interpretChar c] 474 | T.all isDigit s = [ENumber s] 475 | isJust (getOperator (EMathOperator s)) 476 = [EMathOperator s] 477 | otherwise = 478 case map interpretChar (T.unpack s) of 479 xs | all isIdentifierOrSpace xs -> [EText TextNormal s] 480 | otherwise -> xs 481 where isIdentifierOrSpace (EIdentifier _) = True 482 isIdentifierOrSpace (ESpace _) = True 483 isIdentifierOrSpace _ = False 484 485-- The char attribute is a hex string 486getSymChar :: Element -> T.Text 487getSymChar element 488 | Just s <- lowerFromPrivate <$> getCodepoint 489 , Just font <- getFont = 490 let [(char, _)] = readLitChar ("\\x" ++ s) in 491 maybe "" T.singleton $ getUnicode font char 492 where 493 getCodepoint = findAttrBy (hasElemName "w" "char") element 494 getFont = (textToFont . T.pack) =<< findAttrBy (hasElemName "w" "font") element 495 lowerFromPrivate ('F':xs) = '0':xs 496 lowerFromPrivate xs = xs 497getSymChar _ = "" 498