1{-# LANGUAGE OverloadedStrings #-} 2{- | 3 Module : Text.Pandoc.Readers.Docx.Combine 4 Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, 5 2014-2021 John MacFarlane <jgm@berkeley.edu>, 6 2020 Nikolay Yakimov <root@livid.pp.ru> 7 License : GNU GPL, version 2 or above 8 9 Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> 10 Stability : alpha 11 Portability : portable 12 13Flatten sequences of elements. 14-} 15 16{- 17The purpose of this module is to combine the formatting of separate 18runs, which have *non-nesting* formatting. Because the formatting 19doesn't nest, you can't actually tell the nesting order until you 20combine with the runs that follow. 21 22For example, say you have a something like `<em><strong>foo</strong> 23bar</em>`. Then in ooxml, you'll get these two runs: 24 25~~~ 26<w:r> 27 <w:rPr> 28 <w:b /> 29 <w:i /> 30 </w:rPr> 31 <w:t>Foo</w:t> 32</w:r> 33<w:r> 34 <w:rPr> 35 <w:i /> 36 </w:rPr> 37 <w:t> Bar</w:t> 38</w:r> 39~~~ 40 41Note that this is an ideal situation. In practice, it will probably be 42more---if, for example, the user turned italics 43off and then on. 44 45So, when you get the first run, which is marked as both bold and italic, 46you have no idea whether it's `Strong [Emph [Str "Foo"]]` or `Emph 47[Strong [Str "Foo"]]`. 48 49We combine two runs, then, by taking off the formatting that modifies an 50inline, seeing what is shared between them, and rebuilding an inline. We 51fold this to combine the inlines. 52 53-} 54 55module Text.Pandoc.Readers.Docx.Combine ( smushInlines 56 , smushBlocks 57 ) 58 where 59 60import Data.List 61import Data.Bifunctor 62import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl 63 , (><), (|>) ) 64import Text.Pandoc.Builder 65 66data Modifier a = Modifier (a -> a) 67 | AttrModifier (Attr -> a -> a) Attr 68 69spaceOutInlinesL :: Inlines -> (Inlines, Inlines) 70spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) 71 where (l, (fs, m'), r) = spaceOutInlines ms 72 73spaceOutInlinesR :: Inlines -> (Inlines, Inlines) 74spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) 75 where (l, (fs, m'), r) = spaceOutInlines ms 76 77spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines) 78spaceOutInlines ils = 79 let (fs, ils') = unstackInlines ils 80 (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils' 81 -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element 82 in (Many left, (fs, Many contents'), Many right) 83 84isSpace :: Inline -> Bool 85isSpace Space = True 86isSpace SoftBreak = True 87isSpace _ = False 88 89stackInlines :: [Modifier Inlines] -> Inlines -> Inlines 90stackInlines [] ms = ms 91stackInlines (Modifier f : fs) ms = 92 if null ms 93 then stackInlines fs ms 94 else f $ stackInlines fs ms 95stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms 96 97unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) 98unstackInlines ms = case ilModifierAndInnards ms of 99 Nothing -> ([], ms) 100 Just (f, inner) -> first (f :) $ unstackInlines inner 101 102ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines) 103ilModifierAndInnards ils = case viewl $ unMany ils of 104 x :< xs | null xs -> second fromList <$> case x of 105 Emph lst -> Just (Modifier emph, lst) 106 Strong lst -> Just (Modifier strong, lst) 107 SmallCaps lst -> Just (Modifier smallcaps, lst) 108 Strikeout lst -> Just (Modifier strikeout, lst) 109 Underline lst -> Just (Modifier underline, lst) 110 Superscript lst -> Just (Modifier superscript, lst) 111 Subscript lst -> Just (Modifier subscript, lst) 112 Link attr lst tgt -> Just (Modifier $ uncurry (linkWith attr) tgt, lst) 113 Span attr lst -> Just (AttrModifier spanWith attr, lst) 114 _ -> Nothing 115 _ -> Nothing 116 117inlinesL :: Inlines -> (Inlines, Inlines) 118inlinesL ils = case viewl $ unMany ils of 119 (s :< sq) -> (singleton s, Many sq) 120 _ -> (mempty, ils) 121 122inlinesR :: Inlines -> (Inlines, Inlines) 123inlinesR ils = case viewr $ unMany ils of 124 (sq :> s) -> (Many sq, singleton s) 125 _ -> (ils, mempty) 126 127combineInlines :: Inlines -> Inlines -> Inlines 128combineInlines x y = 129 let (xs', x') = inlinesR x 130 (y', ys') = inlinesL y 131 in 132 xs' <> combineSingletonInlines x' y' <> ys' 133 134combineSingletonInlines :: Inlines -> Inlines -> Inlines 135combineSingletonInlines x y = 136 let (xfs, xs) = unstackInlines x 137 (yfs, ys) = unstackInlines y 138 shared = xfs `intersect` yfs 139 x_remaining = xfs \\ shared 140 y_remaining = yfs \\ shared 141 x_rem_attr = filter isAttrModifier x_remaining 142 y_rem_attr = filter isAttrModifier y_remaining 143 in 144 case null shared of 145 True | null xs && null ys -> 146 stackInlines (x_rem_attr <> y_rem_attr) mempty 147 | null xs -> 148 let (sp, y') = spaceOutInlinesL y in 149 stackInlines x_rem_attr mempty <> sp <> y' 150 | null ys -> 151 let (x', sp) = spaceOutInlinesR x in 152 x' <> sp <> stackInlines y_rem_attr mempty 153 | otherwise -> 154 let (x', xsp) = spaceOutInlinesR x 155 (ysp, y') = spaceOutInlinesL y 156 in 157 x' <> xsp <> ysp <> y' 158 False -> stackInlines shared $ 159 combineInlines 160 (stackInlines x_remaining xs) 161 (stackInlines y_remaining ys) 162 163combineBlocks :: Blocks -> Blocks -> Blocks 164combineBlocks bs cs 165 | bs' :> BlockQuote bs'' <- viewr (unMany bs) 166 , BlockQuote cs'' :< cs' <- viewl (unMany cs) = 167 Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs' 168 | bs' :> CodeBlock attr codeStr <- viewr (unMany bs) 169 , CodeBlock attr' codeStr' :< cs' <- viewl (unMany cs) 170 , attr == attr' = 171 Many $ (bs' |> CodeBlock attr (codeStr <> "\n" <> codeStr')) >< cs' 172combineBlocks bs cs = bs <> cs 173 174instance (Monoid a, Eq a) => Eq (Modifier a) where 175 (Modifier f) == (Modifier g) = f mempty == g mempty 176 (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty 177 _ == _ = False 178 179isAttrModifier :: Modifier a -> Bool 180isAttrModifier (AttrModifier _ _) = True 181isAttrModifier _ = False 182 183smushInlines :: [Inlines] -> Inlines 184smushInlines xs = combineInlines xs' mempty 185 where xs' = foldl combineInlines mempty xs 186 187smushBlocks :: [Blocks] -> Blocks 188smushBlocks xs = foldl combineBlocks mempty xs 189