1{-# LANGUAGE OverloadedStrings #-} 2{- | 3 Module : Text.Pandoc.BCP47 4 Copyright : Copyright (C) 2017-2021 John MacFarlane 5 License : GNU GPL, version 2 or above 6 7 Maintainer : John MacFarlane <jgm@berkeley.edu> 8 Stability : alpha 9 Portability : portable 10 11Functions for parsing and rendering BCP47 language identifiers. 12-} 13module Text.Pandoc.BCP47 ( 14 getLang 15 , parseBCP47 16 , Lang(..) 17 , renderLang 18 ) 19where 20import Control.Monad (guard) 21import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) 22import Text.Pandoc.Definition 23import Text.Pandoc.Options 24import Text.DocTemplates (FromContext(..)) 25import qualified Data.Text as T 26import qualified Text.Parsec as P 27 28-- | Represents BCP 47 language/country code. 29data Lang = Lang{ langLanguage :: T.Text 30 , langScript :: T.Text 31 , langRegion :: T.Text 32 , langVariants :: [T.Text] } 33 deriving (Eq, Ord, Show) 34 35-- | Render a Lang as BCP 47. 36renderLang :: Lang -> T.Text 37renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) 38 ([langScript lang, langRegion lang] ++ langVariants lang)) 39 40-- | Get the contents of the `lang` metadata field or variable. 41getLang :: WriterOptions -> Meta -> Maybe T.Text 42getLang opts meta = 43 case lookupContext "lang" (writerVariables opts) of 44 Just s -> Just s 45 _ -> 46 case lookupMeta "lang" meta of 47 Just (MetaBlocks [Para [Str s]]) -> Just s 48 Just (MetaBlocks [Plain [Str s]]) -> Just s 49 Just (MetaInlines [Str s]) -> Just s 50 Just (MetaString s) -> Just s 51 _ -> Nothing 52 53-- | Parse a BCP 47 string as a Lang. Currently we parse 54-- extensions and private-use fields as "variants," even 55-- though officially they aren't. 56parseBCP47 :: T.Text -> Either T.Text Lang 57parseBCP47 lang = 58 case P.parse bcp47 "lang" lang of 59 Right r -> Right r 60 Left e -> Left $ T.pack $ show e 61 where bcp47 = do 62 language <- pLanguage 63 script <- P.option "" pScript 64 region <- P.option "" pRegion 65 variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) 66 P.eof 67 return Lang{ langLanguage = language 68 , langScript = script 69 , langRegion = region 70 , langVariants = variants } 71 asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) 72 pLanguage = do 73 cs <- P.many1 asciiLetter 74 let lcs = length cs 75 guard $ lcs == 2 || lcs == 3 76 return $ T.toLower $ T.pack cs 77 pScript = P.try $ do 78 P.char '-' 79 x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) 80 xs <- P.count 3 81 (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) 82 return $ T.toLower $ T.pack (x:xs) 83 pRegion = P.try $ do 84 P.char '-' 85 cs <- P.many1 asciiLetter 86 let lcs = length cs 87 guard $ lcs == 2 || lcs == 3 88 return $ T.toUpper $ T.pack cs 89 pVariant = P.try $ do 90 P.char '-' 91 ds <- P.option "" (P.count 1 P.digit) 92 cs <- P.many1 asciiLetter 93 let var = ds ++ cs 94 lv = length var 95 guard $ if null ds 96 then lv >= 5 && lv <= 8 97 else lv == 4 98 return $ T.toLower $ T.pack var 99 pExtension = P.try $ do 100 P.char '-' 101 cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) 102 let lcs = length cs 103 guard $ lcs >= 2 && lcs <= 8 104 return $ T.toLower $ T.pack cs 105 pPrivateUse = P.try $ do 106 P.char '-' 107 P.char 'x' 108 P.char '-' 109 cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) 110 guard $ not (null cs) && length cs <= 8 111 let var = "x-" ++ cs 112 return $ T.toLower $ T.pack var 113