1{-# LANGUAGE OverloadedStrings #-}
2{- |
3   Module      : Text.Pandoc.BCP47
4   Copyright   : Copyright (C) 2017-2021 John MacFarlane
5   License     : GNU GPL, version 2 or above
6
7   Maintainer  : John MacFarlane <jgm@berkeley.edu>
8   Stability   : alpha
9   Portability : portable
10
11Functions for parsing and rendering BCP47 language identifiers.
12-}
13module Text.Pandoc.BCP47 (
14                       getLang
15                     , parseBCP47
16                     , Lang(..)
17                     , renderLang
18                     )
19where
20import Control.Monad (guard)
21import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
22import Text.Pandoc.Definition
23import Text.Pandoc.Options
24import Text.DocTemplates (FromContext(..))
25import qualified Data.Text as T
26import qualified Text.Parsec as P
27
28-- | Represents BCP 47 language/country code.
29data Lang = Lang{ langLanguage :: T.Text
30                , langScript   :: T.Text
31                , langRegion   :: T.Text
32                , langVariants :: [T.Text] }
33                deriving (Eq, Ord, Show)
34
35-- | Render a Lang as BCP 47.
36renderLang :: Lang -> T.Text
37renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
38                    ([langScript lang, langRegion lang] ++ langVariants lang))
39
40-- | Get the contents of the `lang` metadata field or variable.
41getLang :: WriterOptions -> Meta -> Maybe T.Text
42getLang opts meta =
43  case lookupContext "lang" (writerVariables opts) of
44        Just s -> Just s
45        _      ->
46          case lookupMeta "lang" meta of
47               Just (MetaBlocks [Para [Str s]])  -> Just s
48               Just (MetaBlocks [Plain [Str s]]) -> Just s
49               Just (MetaInlines [Str s])        -> Just s
50               Just (MetaString s)               -> Just s
51               _                                 -> Nothing
52
53-- | Parse a BCP 47 string as a Lang.  Currently we parse
54-- extensions and private-use fields as "variants," even
55-- though officially they aren't.
56parseBCP47 :: T.Text -> Either T.Text Lang
57parseBCP47 lang =
58  case P.parse bcp47 "lang" lang of
59       Right r -> Right r
60       Left e  -> Left $ T.pack $ show e
61  where bcp47 = do
62          language <- pLanguage
63          script <- P.option "" pScript
64          region <- P.option "" pRegion
65          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
66          P.eof
67          return Lang{   langLanguage = language
68                       , langScript = script
69                       , langRegion = region
70                       , langVariants = variants }
71        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
72        pLanguage = do
73          cs <- P.many1 asciiLetter
74          let lcs = length cs
75          guard $ lcs == 2 || lcs == 3
76          return $ T.toLower $ T.pack cs
77        pScript = P.try $ do
78          P.char '-'
79          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
80          xs <- P.count 3
81                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
82          return $ T.toLower $ T.pack (x:xs)
83        pRegion = P.try $ do
84          P.char '-'
85          cs <- P.many1 asciiLetter
86          let lcs = length cs
87          guard $ lcs == 2 || lcs == 3
88          return $ T.toUpper $ T.pack cs
89        pVariant = P.try $ do
90          P.char '-'
91          ds <- P.option "" (P.count 1 P.digit)
92          cs <- P.many1 asciiLetter
93          let var = ds ++ cs
94              lv = length var
95          guard $ if null ds
96                     then lv >= 5 && lv <= 8
97                     else lv == 4
98          return $ T.toLower $ T.pack var
99        pExtension = P.try $ do
100          P.char '-'
101          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
102          let lcs = length cs
103          guard $ lcs >= 2 && lcs <= 8
104          return $ T.toLower $ T.pack cs
105        pPrivateUse = P.try $ do
106          P.char '-'
107          P.char 'x'
108          P.char '-'
109          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
110          guard $ not (null cs) && length cs <= 8
111          let var = "x-" ++ cs
112          return $ T.toLower $ T.pack var
113