1{-# LANGUAGE StrictData #-}
2{-# LANGUAGE MultiParamTypeClasses #-}
3{-# LANGUAGE FlexibleInstances #-}
4{-# LANGUAGE OverloadedStrings #-}
5{-# LANGUAGE DeriveTraversable #-}
6-- | CSL JSON is the structured text format defined in
7-- <https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html>.
8-- It is used to represent formatted text inside CSL JSON bibliographies.
9-- For the most part it is a subset of HTML, with some special
10-- features like smart quote parsing.  This module defines a parser
11-- and a renderer for this format, as well as 'CiteprocOutput' and
12-- other typeclass instances.
13module Citeproc.CslJson
14  ( CslJson(..)
15  , cslJsonToJson
16  , renderCslJson
17  , parseCslJson
18  )
19where
20
21--  represent and parse CSL JSON pseudo-html
22--  https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html
23--  Supported:
24--  <i>italics</i>  -- will flip-flop
25--  <b>bold</b>     -- will flip-flop
26--  <span style="font-variant:small-caps;">...</span> -- ill flip-flop
27--  <sup>..</sup>
28--  <sub>..</sub>
29--  <span class="nocase">..</span>  -- suppress case transformations
30
31
32import Citeproc.Types
33import Citeproc.CaseTransform
34import Data.Ord ()
35import Data.Char (isAlphaNum, isSpace, isAscii)
36import Data.Text (Text)
37import Data.Maybe (fromMaybe)
38import qualified Data.Text as T
39import qualified Data.Map as M
40import Data.Foldable (fold)
41import Data.Functor.Identity
42import Data.Attoparsec.Text as P
43import Data.Aeson (FromJSON(..), ToJSON(..), Value(..), object)
44import Control.Monad.Trans.State
45import Control.Monad (guard, when)
46import Control.Applicative ((<|>))
47import Data.Generics.Uniplate.Direct
48
49data CslJson a =
50     CslText a
51   | CslEmpty
52   | CslConcat (CslJson a) (CslJson a)
53   | CslQuoted (CslJson a)
54   | CslItalic (CslJson a)
55   | CslNormal (CslJson a)
56   | CslBold   (CslJson a)
57   | CslUnderline (CslJson a)
58   | CslNoDecoration (CslJson a)
59   | CslSmallCaps (CslJson a)
60   | CslBaseline  (CslJson a)
61   | CslSup       (CslJson a)
62   | CslSub       (CslJson a)
63   | CslNoCase    (CslJson a)
64   | CslDiv Text  (CslJson a)
65  deriving (Show, Eq, Ord, Functor, Foldable, Traversable)
66
67instance Semigroup (CslJson a) where
68  (CslConcat x y) <> z = x <> (y <> z)
69  CslEmpty <> x = x
70  x <> CslEmpty = x
71  x <> y = CslConcat x y
72
73instance Monoid (CslJson a) where
74  mempty = CslEmpty
75  mappend = (<>)
76
77instance FromJSON (CslJson Text) where
78  parseJSON = fmap (parseCslJson mempty) . parseJSON
79
80instance ToJSON (CslJson Text) where
81  toJSON = toJSON . renderCslJson False mempty
82
83instance Uniplate (CslJson a) where
84  uniplate (CslText x)         = plate CslText |- x
85  uniplate (CslEmpty)          = plate CslEmpty
86  uniplate (CslConcat x y)     = plate CslConcat |* x |* y
87  uniplate (CslQuoted x)       = plate CslQuoted |* x
88  uniplate (CslItalic x)       = plate CslItalic |* x
89  uniplate (CslNormal x)       = plate CslNormal |* x
90  uniplate (CslBold x)         = plate CslBold |* x
91  uniplate (CslUnderline x)    = plate CslUnderline |* x
92  uniplate (CslNoDecoration x) = plate CslNoDecoration |* x
93  uniplate (CslSmallCaps x)    = plate CslSmallCaps |* x
94  uniplate (CslBaseline x)     = plate CslBaseline |* x
95  uniplate (CslSup x)          = plate CslSup |* x
96  uniplate (CslSub x)          = plate CslSub |* x
97  uniplate (CslNoCase x)       = plate CslNoCase |* x
98  uniplate (CslDiv t x)        = plate CslDiv |- t |* x
99
100instance Biplate (CslJson a) (CslJson a) where
101  biplate = plateSelf
102
103instance CiteprocOutput (CslJson Text) where
104  toText                = fold
105  fromText              = parseCslJson mempty
106  dropTextWhile         = dropTextWhile'
107  dropTextWhileEnd      = dropTextWhileEnd'
108  addFontVariant x      =
109    case x of
110      NormalVariant    -> id
111      SmallCapsVariant -> CslSmallCaps
112  addFontStyle x        =
113    case x of
114      NormalFont       -> CslNormal
115      ItalicFont       -> CslItalic
116      ObliqueFont      -> CslItalic
117  addFontWeight x       =
118    case x of
119      NormalWeight     -> id
120      LightWeight      -> id
121      BoldWeight       -> CslBold
122  addTextDecoration x   =
123    case x of
124      NoDecoration        -> CslNoDecoration
125      UnderlineDecoration -> CslUnderline
126  addVerticalAlign x    =
127    case x of
128      BaselineAlign    -> CslBaseline
129      SubAlign         -> CslSub
130      SupAlign         -> CslSup
131  addTextCase mblang x =
132    case x of
133      Lowercase        -> caseTransform mblang withLowercaseAll
134      Uppercase        -> caseTransform mblang withUppercaseAll
135      CapitalizeFirst  -> caseTransform mblang withCapitalizeFirst
136      CapitalizeAll    -> caseTransform mblang withCapitalizeWords
137      SentenceCase     -> caseTransform mblang withSentenceCase
138      TitleCase        -> caseTransform mblang withTitleCase
139  addDisplay x          =
140    case x of
141      DisplayBlock       -> CslDiv "block"
142      DisplayLeftMargin  -> CslDiv "left-margin"
143      DisplayRightInline -> CslDiv "right-inline"
144      DisplayIndent      -> CslDiv "indent"
145  addQuotes             = CslQuoted
146  inNote                = id -- no-op
147  movePunctuationInsideQuotes
148                        = punctuationInsideQuotes
149  mapText f             = runIdentity . traverse (return . f)
150  addHyperlink _        = id -- CSL JSON doesn't support links
151
152dropTextWhile' :: (Char -> Bool) -> CslJson Text -> CslJson Text
153dropTextWhile' f x = evalState (traverse g x) False
154  where
155   g t = do
156     pastFirst <- get
157     if pastFirst
158        then return t
159        else do
160          put True
161          return (T.dropWhile f t)
162
163dropTextWhileEnd' :: (Char -> Bool) -> CslJson Text -> CslJson Text
164dropTextWhileEnd' f el =
165  case el of
166     CslEmpty -> CslEmpty
167     CslText t -> CslText (T.dropWhileEnd f t)
168     CslConcat x y -> CslConcat x (dropTextWhileEnd' f y)
169     CslQuoted x -> CslQuoted (dropTextWhileEnd' f x)
170     CslItalic x -> CslItalic (dropTextWhileEnd' f x)
171     CslNormal x -> CslNormal (dropTextWhileEnd' f x)
172     CslBold x -> CslBold (dropTextWhileEnd' f x)
173     CslUnderline x -> CslUnderline (dropTextWhileEnd' f x)
174     CslNoDecoration x -> CslNoDecoration (dropTextWhileEnd' f x)
175     CslSmallCaps x -> CslSmallCaps (dropTextWhileEnd' f x)
176     CslBaseline x -> CslBaseline (dropTextWhileEnd' f x)
177     CslSub x -> CslSub (dropTextWhileEnd' f x)
178     CslSup x -> CslSup (dropTextWhileEnd' f x)
179     CslNoCase x -> CslNoCase (dropTextWhileEnd' f x)
180     CslDiv t x -> CslDiv t (dropTextWhileEnd' f x)
181
182parseCslJson :: Locale -> Text -> CslJson Text
183parseCslJson locale t =
184  case P.parseOnly
185         (P.many' (pCslJson locale) <* P.endOfInput) t of
186    Left _   -> CslText t
187    Right xs -> mconcat xs
188
189pCslJson :: Locale -> P.Parser (CslJson Text)
190pCslJson locale = P.choice
191  [ pCslText
192  , pCslQuoted
193  , pCslItalic
194  , pCslBold
195  , pCslUnderline
196  , pCslNoDecoration
197  , pCslSmallCaps
198  , pCslSup
199  , pCslSub
200  , pCslBaseline
201  , pCslNoCase
202  , pCslSymbol
203  ]
204 where
205  ((outerOpenQuote, outerCloseQuote), (innerOpenQuote, innerCloseQuote)) =
206     fromMaybe (("\x201C","\x201D"),("\x2018","\x2019")) $ lookupQuotes locale
207  isSpecialChar c = c == '<' || c == '>' || c == '\'' || c == '"' ||
208       c == '’' || (not (isAscii c) && (isSuperscriptChar c || isQuoteChar c))
209  isQuoteChar = P.inClass
210       (T.unpack (outerOpenQuote <> outerCloseQuote <>
211                 innerOpenQuote <> innerCloseQuote))
212  isSuperscriptChar = P.inClass superscriptChars
213  isApostrophe '\'' = True
214  isApostrophe '’'  = True
215  isApostrophe _    = False
216  pCsl = pCslJson locale
217  notFollowedBySpace =
218    P.peekChar' >>= guard . not . isSpaceChar
219  isSpaceChar = P.inClass [' ','\t','\n','\r']
220  pOpenQuote = (("\"" <$ P.char '"')
221                <|> ("'" <$ P.char '\'')
222                <|> (outerCloseQuote <$ P.string outerOpenQuote)
223                <|> (innerCloseQuote <$ P.string innerOpenQuote))
224                 <* notFollowedBySpace
225  pSpace = P.skipWhile isSpaceChar
226  pCslText = CslText . addNarrowSpace <$>
227    (  do t <- P.takeWhile1 (\c -> isAlphaNum c && not (isSpecialChar c))
228          -- apostrophe
229          P.option t $ do _ <- P.satisfy isApostrophe
230                          t' <- P.takeWhile1 isAlphaNum
231                          return (t <> "’" <> t')
232    <|>
233      (P.takeWhile1 (\c -> not (isAlphaNum c || isSpecialChar c))) )
234  pCslQuoted = CslQuoted <$>
235    do cl <- pOpenQuote
236       mbc <- peekChar
237       case mbc of
238         Just c  | T.singleton c == cl -> fail "unexpected close quote"
239         _ -> return ()
240       mconcat <$> P.manyTill' pCsl (P.string cl)
241  pCslSymbol = do
242    c <- P.satisfy isSpecialChar
243    return $
244       if isApostrophe c
245          then CslText "’"
246          else charToSup c
247  pCslItalic = CslItalic . mconcat <$>
248    (P.string "<i>" *> P.manyTill' pCsl (P.string "</i>"))
249  pCslBold = CslBold . mconcat <$>
250    (P.string "<b>" *> P.manyTill' pCsl (P.string "</b>"))
251  pCslUnderline = CslUnderline . mconcat <$>
252    (P.string "<u>" *> P.manyTill' pCsl (P.string "</u>"))
253  pCslNoDecoration = CslNoDecoration . mconcat <$>
254    (P.string "<span" *> pSpace *>
255     P.string "class=\"nodecor\"" *> pSpace *> P.char '>' *>
256     P.manyTill' pCsl (P.string "</span>"))
257  pCslSup = CslSup . mconcat <$>
258    (P.string "<sup>" *> P.manyTill' pCsl (P.string "</sup>"))
259  pCslSub = CslSub . mconcat <$>
260    (P.string "<sub>" *> P.manyTill' pCsl (P.string "</sub>"))
261  pCslBaseline = CslBaseline . mconcat <$>
262    (P.string "<span" *> pSpace *> P.string "style=\"baseline\">" *>
263      P.manyTill' pCsl (P.string "</span>"))
264  pCslSmallCaps = CslSmallCaps . mconcat <$>
265    ((P.string "<span" *> pSpace *>
266      P.string "style=\"font-variant:" *> pSpace *>
267      P.string "small-caps;" *> pSpace *> P.char '"' *>
268      pSpace *> P.char '>' *> P.manyTill' pCsl (P.string "</span>"))
269    <|>
270     (P.string "<sc>" *> P.manyTill' pCsl (P.string "</sc>")))
271  pCslNoCase = CslNoCase . mconcat <$>
272    (P.string "<span" *> pSpace *>
273     P.string "class=\"nocase\"" *> pSpace *> P.char '>' *>
274     P.manyTill' pCsl (P.string "</span>"))
275  addNarrowSpace =
276    T.replace " ;" "\x202F;" .
277    T.replace " ?" "\x202F?" .
278    T.replace " !" "\x202F!" .
279    T.replace " »" "\x202F»" .
280    T.replace "« " "«\x202F"
281
282data RenderContext =
283  RenderContext
284  { useOuterQuotes  :: Bool
285  , useItalics      :: Bool
286  , useBold         :: Bool
287  , useSmallCaps    :: Bool
288  } deriving (Show, Eq)
289
290lookupTerm :: Locale -> Text -> Maybe Text
291lookupTerm locale termname = do
292  let terms = localeTerms locale
293  case M.lookup termname terms of
294     Just ((_,t):_) -> Just t
295     _              -> Nothing
296
297lookupQuotes :: Locale -> Maybe ((Text, Text), (Text, Text))
298lookupQuotes locale = do
299  outerQuotes <- (,) <$> lookupTerm locale "open-quote"
300                     <*> lookupTerm locale "close-quote"
301  innerQuotes <- (,) <$> lookupTerm locale "open-inner-quote"
302                     <*> lookupTerm locale "close-inner-quote"
303  return (outerQuotes, innerQuotes)
304
305-- | Render 'CslJson' as 'Text'.  Set the first parameter to True
306-- when rendering HTML output (so that entities are escaped).
307-- Set it to False when rendering for CSL JSON bibliographies.
308renderCslJson :: Bool          -- ^ Escape < > & using entities
309              -> Locale        -- ^ Locale (used for quote styles)
310              -> CslJson Text  -- ^ CslJson to render
311              -> Text
312renderCslJson useEntities locale =
313  go (RenderContext True True True True)
314 where
315  (outerQuotes, innerQuotes) = fromMaybe (("\"","\""),("'","'"))
316                                   $ lookupQuotes locale
317  go :: RenderContext -> CslJson Text -> Text
318  go ctx el =
319    case el of
320      CslText t -> escape t
321      CslEmpty -> mempty
322      CslConcat x y -> go ctx x <> go ctx y
323      CslQuoted x
324        | useOuterQuotes ctx
325          -> fst outerQuotes <>
326             go ctx{ useOuterQuotes = False } x <>
327             snd outerQuotes
328        | otherwise
329          -> fst innerQuotes <>
330             go ctx{ useOuterQuotes = True } x <>
331             snd innerQuotes
332      CslNormal x
333        | useItalics ctx -> go ctx x
334        | otherwise      -> "<span style=\"font-style:normal;\">" <>
335                              go ctx x <> "</span>"
336      CslItalic x
337        | useItalics ctx -> "<i>" <> go ctx{ useItalics = False } x <> "</i>"
338        | otherwise -> "<span style=\"font-style:normal;\">" <>
339                          go ctx{ useItalics = True } x <> "</span>"
340      CslBold x
341        | useBold ctx -> "<b>" <> go ctx{ useBold = False } x <> "</b>"
342        | otherwise -> "<span style=\"font-weight:normal;\">" <>
343                          go ctx{ useBold = True } x <> "</span>"
344      CslUnderline x -> "<u>" <> go ctx x <> "</u>"
345      CslNoDecoration x -> "<span style=\"" <>
346                           (if useSmallCaps ctx
347                               then ""
348                               else "font-variant:normal;") <>
349                           (if useBold ctx
350                               then ""
351                               else "font-weight:normal;") <>
352                           (if useItalics ctx
353                               then ""
354                               else "font-style:normal;") <>
355                           "\">" <> go ctx x <> "</span>"
356      CslSmallCaps x
357        | useSmallCaps ctx -> "<span style=\"font-variant:small-caps;\">"
358                                <> go ctx{ useSmallCaps = False } x <>
359                                "</span>"
360        | otherwise -> "<span style=\"font-variant:normal;\">" <>
361                          go ctx{ useSmallCaps = True } x <> "</span>"
362      CslSup x -> "<sup>" <> go ctx x <> "</sup>"
363      CslSub x -> "<sub>" <> go ctx x <> "</sub>"
364      CslBaseline x -> "<span style=\"baseline\">" <> go ctx x <> "</span>"
365      CslDiv t x -> "<div class=\"csl-" <> t <> "\">" <> go ctx x <> "</div>"
366      CslNoCase x -> go ctx x -- nocase is just for internal purposes
367  escape t
368    | useEntities
369      = case T.findIndex (\c -> c == '<' || c == '>' || c == '&') t of
370               Just _ -> T.replace "<" "&#60;" .
371                         T.replace ">" "&#62;" .
372                         T.replace "&" "&#38;" $ t
373               Nothing -> t
374    | otherwise = t
375
376cslJsonToJson :: Locale -> CslJson Text -> [Value]
377cslJsonToJson locale = go (RenderContext True True True True)
378 where
379  (outerQuotes, innerQuotes) = fromMaybe
380       (("\"","\""),("'","'")) $ lookupQuotes locale
381  isString (String _) = True
382  isString _ = False
383  consolidateStrings :: [Value] -> [Value]
384  consolidateStrings [] = []
385  consolidateStrings (String t : rest) =
386    let (xs,ys) = span isString rest
387     in String (t <> mconcat [t' |  String t' <- xs]) :
388        consolidateStrings ys
389  consolidateStrings (x : rest) =
390    x : consolidateStrings rest
391  go :: RenderContext -> CslJson Text -> [Value]
392  go ctx el = consolidateStrings $
393    case el of
394      CslText t -> [String t]
395      CslEmpty -> []
396      CslConcat x CslEmpty -> go ctx x
397      CslConcat (CslConcat x y) z -> go ctx (CslConcat x (CslConcat y z))
398      CslConcat x y -> go ctx x <> go ctx y
399      CslQuoted x
400        | useOuterQuotes ctx
401          -> [String (fst outerQuotes)] <>
402             go ctx{ useOuterQuotes = False } x <>
403             [String (snd outerQuotes)]
404        | otherwise
405          -> [String (fst innerQuotes)] <>
406             go ctx{ useOuterQuotes = True } x <>
407             [String (snd innerQuotes)]
408      CslNormal x
409        | useItalics ctx -> go ctx x
410        | otherwise      -> [ object
411                               [ ("format", "no-italics")
412                               , ("contents", toJSON $ go ctx x)
413                               ]
414                            ]
415      CslItalic x
416        | useItalics ctx -> [ object
417                               [ ("format", "italics")
418                               , ("contents", toJSON $
419                                    go ctx{ useItalics = False } x)
420                               ]
421                            ]
422        | otherwise      -> [ object
423                               [ ("format", "no-italics")
424                               , ("contents", toJSON $
425                                    go ctx{ useItalics = False } x)
426                               ]
427                            ]
428      CslBold x
429        | useItalics ctx -> [ object
430                               [ ("format", "bold")
431                               , ("contents", toJSON $
432                                    go ctx{ useBold = False } x)
433                               ]
434                            ]
435        | otherwise      -> [ object
436                               [ ("format", "no-bold")
437                               , ("contents", toJSON $
438                                    go ctx{ useBold = False } x)
439                               ]
440                            ]
441      CslUnderline x     -> [ object
442                               [ ("format", "underline")
443                               , ("contents", toJSON $ go ctx x)
444                               ]
445                            ]
446      CslNoDecoration x -> [ object
447                               [ ("format", "no-decoration")
448                               , ("contents", toJSON $ go ctx x)
449                               ]
450                           ]
451      CslSmallCaps x
452        | useSmallCaps ctx -> [ object
453                               [ ("format", "small-caps")
454                               , ("contents", toJSON $
455                                    go ctx{ useSmallCaps = False } x)
456                               ]
457                            ]
458        | otherwise      -> [ object
459                               [ ("format", "no-small-caps")
460                               , ("contents", toJSON $
461                                    go ctx{ useSmallCaps = False } x)
462                               ]
463                            ]
464      CslSup x           -> [ object
465                               [ ("format", "superscript")
466                               , ("contents", toJSON $ go ctx x)
467                               ]
468                            ]
469      CslSub x           -> [ object
470                               [ ("format", "subscript")
471                               , ("contents", toJSON $ go ctx x)
472                               ]
473                            ]
474      CslBaseline x      -> [ object
475                               [ ("format", "baseline")
476                               , ("contents", toJSON $ go ctx x)
477                               ]
478                            ]
479      CslDiv t x         -> [ object
480                               [ ("format", "div")
481                               , ("class", toJSON $ "csl-" <> t)
482                               , ("contents", toJSON $ go ctx x)
483                               ]
484                            ]
485      CslNoCase x -> go ctx x -- nocase is just for internal purposes
486
487
488-- custom traversal which does not descend into
489-- CslSmallCaps, Baseline, SUp, Sub, or NoCase (implicit nocase)
490caseTransform' :: (CaseTransformState -> Text -> Text)
491               -> Int -- level in hierarchy
492               -> CslJson Text
493               -> State CaseTransformState (CslJson Text)
494caseTransform' f lev el =
495  case el of
496     CslText x         -> CslText . mconcat <$> mapM g (splitUp x)
497     CslConcat x y     -> do
498       x' <- caseTransform' f lev x
499       let lastWord = lev == 0 && not (hasWordBreak y)
500       st <- get
501       when (lastWord &&
502             (st == AfterWordEnd || st == StartSentence || st == Start)) $
503        put BeforeLastWord
504       y' <- caseTransform' f lev y
505       return $ CslConcat x' y'
506     CslQuoted x       -> CslQuoted <$> caseTransform' f (lev + 1) x
507     CslItalic x       -> CslItalic <$> caseTransform' f (lev + 1) x
508     CslNormal x       -> CslNormal <$> caseTransform' f (lev + 1) x
509     CslBold   x       -> CslBold   <$> caseTransform' f (lev + 1) x
510     CslUnderline x    -> CslUnderline <$> caseTransform' f (lev + 1) x
511     CslNoDecoration _ -> return' el
512     CslSmallCaps _    -> return' el
513     CslBaseline _     -> return' el
514     CslSub _          -> return' el
515     CslSup _          -> return' el
516     CslNoCase _       -> return' el
517     CslDiv _ _        -> return' el
518     CslEmpty          -> return' el
519 where
520  -- we need to apply g to update the state:
521  return' x = x <$ g (toText x)
522
523  g :: Text -> State CaseTransformState Text
524  g t = do
525    st <- get
526    put $ case T.unsnoc t of
527            Nothing -> st
528            Just (_,c)
529              | c == '.' || c == '?' || c == '!' || c == ':' ->
530                AfterSentenceEndingPunctuation
531              | isAlphaNum c -> AfterWordChar
532              | isSpace c
533              , st == AfterSentenceEndingPunctuation -> StartSentence
534              | isWordBreak c -> AfterWordEnd
535              | otherwise -> st
536    return $
537      if T.all isAlphaNum t
538         then f st t
539         else t
540  isWordBreak '-' = True
541  isWordBreak '/' = True
542  isWordBreak '\x2013' = True
543  isWordBreak '\x2014' = True
544  isWordBreak c = isSpace c
545  hasWordBreak = any (T.any isWordBreak)
546  splitUp = T.groupBy sameType
547  sameType c d =
548    (isAlphaNum c && isAlphaNum d) || (isSpace c && isSpace d)
549
550caseTransform :: Maybe Lang
551              -> CaseTransformer
552              -> CslJson Text
553              -> CslJson Text
554caseTransform mblang f x =
555  evalState (caseTransform' (unCaseTransformer f mblang) 0 x) Start
556
557punctuationInsideQuotes :: CslJson Text -> CslJson Text
558punctuationInsideQuotes = go
559 where
560  startsWithMovable t =
561    case T.uncons t of
562      Just (c,_) -> c == '.' || c == ',' || c == '!' || c == '?'
563      Nothing    -> False
564  go el =
565    case el of
566      CslConcat CslEmpty x -> go x
567      CslConcat x CslEmpty -> go x
568      CslConcat (CslQuoted x) y ->
569         case go y of
570           (CslText t) | startsWithMovable t
571             -> CslQuoted (go (x <> CslText (T.take 1 t)))
572               <> CslText (T.drop 1 t)
573           (CslConcat (CslText t) z) | startsWithMovable t
574             -> CslQuoted (go (x <> CslText (T.take 1 t))) <>
575                 CslText (T.drop 1 t) <> z
576           z                      -> CslQuoted x <> z
577      CslConcat (CslConcat x y) z -> go (CslConcat x (CslConcat y z))
578      CslConcat x y               -> go x <> go y
579      CslQuoted x                 -> CslQuoted (go x)
580      CslItalic x                 -> CslItalic (go x)
581      CslNormal x                 -> CslNormal (go x)
582      CslBold x                   -> CslBold (go x)
583      CslUnderline x              -> CslUnderline (go x)
584      CslNoDecoration x           -> CslNoDecoration (go x)
585      CslSmallCaps x              -> CslSmallCaps (go x)
586      CslSup x                    -> CslSup (go x)
587      CslSub x                    -> CslSub (go x)
588      CslBaseline x               -> CslBaseline (go x)
589      CslNoCase x                 -> CslNoCase (go x)
590      CslDiv t x                  -> CslDiv t (go x)
591      CslText t                   -> CslText t
592      CslEmpty                    -> CslEmpty
593
594superscriptChars :: [Char]
595superscriptChars =
596  [ '\x00AA'
597  , '\x00B2'
598  , '\x00B3'
599  , '\x00B9'
600  , '\x00BA'
601  , '\x02B0'
602  , '\x02B1'
603  , '\x02B2'
604  , '\x02B3'
605  , '\x02B4'
606  , '\x02B5'
607  , '\x02B6'
608  , '\x02B7'
609  , '\x02B8'
610  , '\x02E0'
611  , '\x02E1'
612  , '\x02E2'
613  , '\x02E3'
614  , '\x02E4'
615  , '\x1D2C'
616  , '\x1D2D'
617  , '\x1D2E'
618  , '\x1D30'
619  , '\x1D31'
620  , '\x1D32'
621  , '\x1D33'
622  , '\x1D34'
623  , '\x1D35'
624  , '\x1D36'
625  , '\x1D37'
626  , '\x1D38'
627  , '\x1D39'
628  , '\x1D3A'
629  , '\x1D3C'
630  , '\x1D3D'
631  , '\x1D3E'
632  , '\x1D3F'
633  , '\x1D40'
634  , '\x1D41'
635  , '\x1D42'
636  , '\x1D43'
637  , '\x1D44'
638  , '\x1D45'
639  , '\x1D46'
640  , '\x1D47'
641  , '\x1D48'
642  , '\x1D49'
643  , '\x1D4A'
644  , '\x1D4B'
645  , '\x1D4C'
646  , '\x1D4D'
647  , '\x1D4F'
648  , '\x1D50'
649  , '\x1D51'
650  , '\x1D52'
651  , '\x1D53'
652  , '\x1D54'
653  , '\x1D55'
654  , '\x1D56'
655  , '\x1D57'
656  , '\x1D58'
657  , '\x1D59'
658  , '\x1D5A'
659  , '\x1D5B'
660  , '\x1D5C'
661  , '\x1D5D'
662  , '\x1D5E'
663  , '\x1D5F'
664  , '\x1D60'
665  , '\x1D61'
666  , '\x2070'
667  , '\x2071'
668  , '\x2074'
669  , '\x2075'
670  , '\x2076'
671  , '\x2077'
672  , '\x2078'
673  , '\x2079'
674  , '\x207A'
675  , '\x207B'
676  , '\x207C'
677  , '\x207D'
678  , '\x207E'
679  , '\x207F'
680  , '\x2120'
681  , '\x2122'
682  , '\x3192'
683  , '\x3193'
684  , '\x3194'
685  , '\x3195'
686  , '\x3196'
687  , '\x3197'
688  , '\x3198'
689  , '\x3199'
690  , '\x319A'
691  , '\x319B'
692  , '\x319C'
693  , '\x319D'
694  , '\x319E'
695  , '\x319F'
696  , '\x02C0'
697  , '\x02C1'
698  , '\x06E5'
699  , '\x06E6'
700  ]
701
702charToSup :: Char -> CslJson Text
703charToSup c =
704  case c of
705    '\x00AA' -> CslSup (CslText "\x0061")
706    '\x00B2' -> CslSup (CslText "\x0032")
707    '\x00B3' -> CslSup (CslText "\x0033")
708    '\x00B9' -> CslSup (CslText "\x0031")
709    '\x00BA' -> CslSup (CslText "\x006F")
710    '\x02B0' -> CslSup (CslText "\x0068")
711    '\x02B1' -> CslSup (CslText "\x0266")
712    '\x02B2' -> CslSup (CslText "\x006A")
713    '\x02B3' -> CslSup (CslText "\x0072")
714    '\x02B4' -> CslSup (CslText "\x0279")
715    '\x02B5' -> CslSup (CslText "\x027B")
716    '\x02B6' -> CslSup (CslText "\x0281")
717    '\x02B7' -> CslSup (CslText "\x0077")
718    '\x02B8' -> CslSup (CslText "\x0079")
719    '\x02E0' -> CslSup (CslText "\x0263")
720    '\x02E1' -> CslSup (CslText "\x006C")
721    '\x02E2' -> CslSup (CslText "\x0073")
722    '\x02E3' -> CslSup (CslText "\x0078")
723    '\x02E4' -> CslSup (CslText "\x0295")
724    '\x1D2C' -> CslSup (CslText "\x0041")
725    '\x1D2D' -> CslSup (CslText "\x00C6")
726    '\x1D2E' -> CslSup (CslText "\x0042")
727    '\x1D30' -> CslSup (CslText "\x0044")
728    '\x1D31' -> CslSup (CslText "\x0045")
729    '\x1D32' -> CslSup (CslText "\x018E")
730    '\x1D33' -> CslSup (CslText "\x0047")
731    '\x1D34' -> CslSup (CslText "\x0048")
732    '\x1D35' -> CslSup (CslText "\x0049")
733    '\x1D36' -> CslSup (CslText "\x004A")
734    '\x1D37' -> CslSup (CslText "\x004B")
735    '\x1D38' -> CslSup (CslText "\x004C")
736    '\x1D39' -> CslSup (CslText "\x004D")
737    '\x1D3A' -> CslSup (CslText "\x004E")
738    '\x1D3C' -> CslSup (CslText "\x004F")
739    '\x1D3D' -> CslSup (CslText "\x0222")
740    '\x1D3E' -> CslSup (CslText "\x0050")
741    '\x1D3F' -> CslSup (CslText "\x0052")
742    '\x1D40' -> CslSup (CslText "\x0054")
743    '\x1D41' -> CslSup (CslText "\x0055")
744    '\x1D42' -> CslSup (CslText "\x0057")
745    '\x1D43' -> CslSup (CslText "\x0061")
746    '\x1D44' -> CslSup (CslText "\x0250")
747    '\x1D45' -> CslSup (CslText "\x0251")
748    '\x1D46' -> CslSup (CslText "\x1D02")
749    '\x1D47' -> CslSup (CslText "\x0062")
750    '\x1D48' -> CslSup (CslText "\x0064")
751    '\x1D49' -> CslSup (CslText "\x0065")
752    '\x1D4A' -> CslSup (CslText "\x0259")
753    '\x1D4B' -> CslSup (CslText "\x025B")
754    '\x1D4C' -> CslSup (CslText "\x025C")
755    '\x1D4D' -> CslSup (CslText "\x0067")
756    '\x1D4F' -> CslSup (CslText "\x006B")
757    '\x1D50' -> CslSup (CslText "\x006D")
758    '\x1D51' -> CslSup (CslText "\x014B")
759    '\x1D52' -> CslSup (CslText "\x006F")
760    '\x1D53' -> CslSup (CslText "\x0254")
761    '\x1D54' -> CslSup (CslText "\x1D16")
762    '\x1D55' -> CslSup (CslText "\x1D17")
763    '\x1D56' -> CslSup (CslText "\x0070")
764    '\x1D57' -> CslSup (CslText "\x0074")
765    '\x1D58' -> CslSup (CslText "\x0075")
766    '\x1D59' -> CslSup (CslText "\x1D1D")
767    '\x1D5A' -> CslSup (CslText "\x026F")
768    '\x1D5B' -> CslSup (CslText "\x0076")
769    '\x1D5C' -> CslSup (CslText "\x1D25")
770    '\x1D5D' -> CslSup (CslText "\x03B2")
771    '\x1D5E' -> CslSup (CslText "\x03B3")
772    '\x1D5F' -> CslSup (CslText "\x03B4")
773    '\x1D60' -> CslSup (CslText "\x03C6")
774    '\x1D61' -> CslSup (CslText "\x03C7")
775    '\x2070' -> CslSup (CslText "\x0030")
776    '\x2071' -> CslSup (CslText "\x0069")
777    '\x2074' -> CslSup (CslText "\x0034")
778    '\x2075' -> CslSup (CslText "\x0035")
779    '\x2076' -> CslSup (CslText "\x0036")
780    '\x2077' -> CslSup (CslText "\x0037")
781    '\x2078' -> CslSup (CslText "\x0038")
782    '\x2079' -> CslSup (CslText "\x0039")
783    '\x207A' -> CslSup (CslText "\x002B")
784    '\x207B' -> CslSup (CslText "\x2212")
785    '\x207C' -> CslSup (CslText "\x003D")
786    '\x207D' -> CslSup (CslText "\x0028")
787    '\x207E' -> CslSup (CslText "\x0029")
788    '\x207F' -> CslSup (CslText "\x006E")
789    '\x2120' -> CslSup (CslText "\x0053\x004D")
790    '\x2122' -> CslSup (CslText "\x0054\x004D")
791    '\x3192' -> CslSup (CslText "\x4E00")
792    '\x3193' -> CslSup (CslText "\x4E8C")
793    '\x3194' -> CslSup (CslText "\x4E09")
794    '\x3195' -> CslSup (CslText "\x56DB")
795    '\x3196' -> CslSup (CslText "\x4E0A")
796    '\x3197' -> CslSup (CslText "\x4E2D")
797    '\x3198' -> CslSup (CslText "\x4E0B")
798    '\x3199' -> CslSup (CslText "\x7532")
799    '\x319A' -> CslSup (CslText "\x4E59")
800    '\x319B' -> CslSup (CslText "\x4E19")
801    '\x319C' -> CslSup (CslText "\x4E01")
802    '\x319D' -> CslSup (CslText "\x5929")
803    '\x319E' -> CslSup (CslText "\x5730")
804    '\x319F' -> CslSup (CslText "\x4EBA")
805    '\x02C0' -> CslSup (CslText "\x0294")
806    '\x02C1' -> CslSup (CslText "\x0295")
807    '\x06E5' -> CslSup (CslText "\x0648")
808    '\x06E6' -> CslSup (CslText "\x064A")
809    _        -> CslText $ T.singleton c
810