1{-# LANGUAGE OverloadedStrings    #-}
2{- |
3   Module      : Text.Pandoc.Readers.Docx.Combine
4   Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
5                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
6                   2020 Nikolay Yakimov <root@livid.pp.ru>
7   License     : GNU GPL, version 2 or above
8
9   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
10   Stability   : alpha
11   Portability : portable
12
13Flatten sequences of elements.
14-}
15
16{-
17The purpose of this module is to combine the formatting of separate
18runs, which have *non-nesting* formatting. Because the formatting
19doesn't nest, you can't actually tell the nesting order until you
20combine with the runs that follow.
21
22For example, say you have a something like `<em><strong>foo</strong>
23bar</em>`. Then in ooxml, you'll get these two runs:
24
25~~~
26<w:r>
27 <w:rPr>
28  <w:b />
29  <w:i />
30 </w:rPr>
31 <w:t>Foo</w:t>
32</w:r>
33<w:r>
34 <w:rPr>
35  <w:i />
36 </w:rPr>
37 <w:t> Bar</w:t>
38</w:r>
39~~~
40
41Note that this is an ideal situation. In practice, it will probably be
42more---if, for example, the user turned italics
43off and then on.
44
45So, when you get the first run, which is marked as both bold and italic,
46you have no idea whether it's `Strong [Emph [Str "Foo"]]` or `Emph
47[Strong [Str "Foo"]]`.
48
49We combine two runs, then, by taking off the formatting that modifies an
50inline, seeing what is shared between them, and rebuilding an inline. We
51fold this to combine the inlines.
52
53-}
54
55module Text.Pandoc.Readers.Docx.Combine ( smushInlines
56                                        , smushBlocks
57                                        )
58       where
59
60import Data.List
61import Data.Bifunctor
62import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
63                     , (><), (|>) )
64import Text.Pandoc.Builder
65
66data Modifier a = Modifier (a -> a)
67                | AttrModifier (Attr -> a -> a) Attr
68
69spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
70spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
71  where (l, (fs, m'), r) = spaceOutInlines ms
72
73spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
74spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
75  where (l, (fs, m'), r) = spaceOutInlines ms
76
77spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines)
78spaceOutInlines ils =
79  let (fs, ils') = unstackInlines ils
80      (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils'
81      -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element
82  in (Many left, (fs, Many contents'), Many right)
83
84isSpace :: Inline -> Bool
85isSpace Space = True
86isSpace SoftBreak = True
87isSpace _ = False
88
89stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
90stackInlines [] ms = ms
91stackInlines (Modifier f : fs) ms =
92  if null ms
93  then stackInlines fs ms
94  else f $ stackInlines fs ms
95stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms
96
97unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
98unstackInlines ms = case ilModifierAndInnards ms of
99  Nothing         -> ([], ms)
100  Just (f, inner) -> first (f :) $ unstackInlines inner
101
102ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines)
103ilModifierAndInnards ils = case viewl $ unMany ils of
104  x :< xs | null xs -> second fromList <$> case x of
105    Emph lst          -> Just (Modifier emph, lst)
106    Strong lst        -> Just (Modifier strong, lst)
107    SmallCaps lst     -> Just (Modifier smallcaps, lst)
108    Strikeout lst     -> Just (Modifier strikeout, lst)
109    Underline lst     -> Just (Modifier underline, lst)
110    Superscript lst   -> Just (Modifier superscript, lst)
111    Subscript lst     -> Just (Modifier subscript, lst)
112    Link attr lst tgt -> Just (Modifier $ uncurry (linkWith attr) tgt, lst)
113    Span attr lst     -> Just (AttrModifier spanWith attr, lst)
114    _                 -> Nothing
115  _ -> Nothing
116
117inlinesL :: Inlines -> (Inlines, Inlines)
118inlinesL ils = case viewl $ unMany ils of
119  (s :< sq) -> (singleton s, Many sq)
120  _         -> (mempty, ils)
121
122inlinesR :: Inlines -> (Inlines, Inlines)
123inlinesR ils = case viewr $ unMany ils of
124  (sq :> s) -> (Many sq, singleton s)
125  _         -> (ils, mempty)
126
127combineInlines :: Inlines -> Inlines -> Inlines
128combineInlines x y =
129  let (xs', x') = inlinesR x
130      (y', ys') = inlinesL y
131  in
132   xs' <> combineSingletonInlines x' y' <> ys'
133
134combineSingletonInlines :: Inlines -> Inlines -> Inlines
135combineSingletonInlines x y =
136  let (xfs, xs) = unstackInlines x
137      (yfs, ys) = unstackInlines y
138      shared = xfs `intersect` yfs
139      x_remaining = xfs \\ shared
140      y_remaining = yfs \\ shared
141      x_rem_attr = filter isAttrModifier x_remaining
142      y_rem_attr = filter isAttrModifier y_remaining
143  in
144   case null shared of
145     True | null xs && null ys ->
146            stackInlines (x_rem_attr <> y_rem_attr) mempty
147          | null xs ->
148            let (sp, y') = spaceOutInlinesL y in
149            stackInlines x_rem_attr mempty <> sp <> y'
150          | null ys ->
151            let (x', sp) = spaceOutInlinesR x in
152            x' <> sp <> stackInlines y_rem_attr mempty
153          | otherwise ->
154              let (x', xsp) = spaceOutInlinesR x
155                  (ysp, y') = spaceOutInlinesL y
156              in
157               x' <> xsp <> ysp <> y'
158     False -> stackInlines shared $
159              combineInlines
160              (stackInlines x_remaining xs)
161              (stackInlines y_remaining ys)
162
163combineBlocks :: Blocks -> Blocks -> Blocks
164combineBlocks bs cs
165  | bs' :> BlockQuote bs'' <- viewr (unMany bs)
166  , BlockQuote cs'' :< cs' <- viewl (unMany cs) =
167      Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs'
168  | bs' :> CodeBlock attr codeStr <- viewr (unMany bs)
169  , CodeBlock attr' codeStr' :< cs' <- viewl (unMany cs)
170  , attr == attr' =
171      Many $ (bs' |> CodeBlock attr (codeStr <> "\n" <> codeStr')) >< cs'
172combineBlocks bs cs = bs <> cs
173
174instance (Monoid a, Eq a) => Eq (Modifier a) where
175  (Modifier f) == (Modifier g) = f mempty == g mempty
176  (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty
177  _ == _ = False
178
179isAttrModifier :: Modifier a -> Bool
180isAttrModifier (AttrModifier _ _) = True
181isAttrModifier _                  = False
182
183smushInlines :: [Inlines] -> Inlines
184smushInlines xs = combineInlines xs' mempty
185  where xs' = foldl combineInlines mempty xs
186
187smushBlocks :: [Blocks] -> Blocks
188smushBlocks xs = foldl combineBlocks mempty xs
189