1{-# LANGUAGE OverloadedStrings #-} 2module Cheapskate.Util ( 3 joinLines 4 , tabFilter 5 , isWhitespace 6 , isEscapable 7 , normalizeReference 8 , Scanner 9 , scanIndentSpace 10 , scanNonindentSpace 11 , scanSpacesToColumn 12 , scanChar 13 , scanBlankline 14 , scanSpaces 15 , scanSpnl 16 , nfb 17 , nfbChar 18 , upToCountChars 19 ) where 20 21import Data.Text (Text) 22import qualified Data.Text as T 23import Data.Char 24import Control.Applicative () 25import Cheapskate.ParserCombinators 26 27-- Utility functions. 28 29-- Like T.unlines but does not add a final newline. 30-- Concatenates lines with newlines between. 31joinLines :: [Text] -> Text 32joinLines = T.intercalate "\n" 33 34-- Convert tabs to spaces using a 4-space tab stop. 35tabFilter :: Text -> Text 36tabFilter = T.concat . pad . T.split (== '\t') 37 where pad [] = [] 38 pad [t] = [t] 39 pad (t:ts) = let tl = T.length t 40 n = tl + 4 - (tl `mod` 4) 41 in T.justifyLeft n ' ' t : pad ts 42 43-- These are the whitespace characters that are significant in 44-- parsing markdown. We can treat \160 (nonbreaking space) etc. 45-- as regular characters. This function should be considerably 46-- faster than the unicode-aware isSpace from Data.Char. 47isWhitespace :: Char -> Bool 48isWhitespace ' ' = True 49isWhitespace '\t' = True 50isWhitespace '\n' = True 51isWhitespace '\r' = True 52isWhitespace _ = False 53 54-- The original Markdown only allowed certain symbols 55-- to be backslash-escaped. It was hard to remember 56-- which ones could be, so we now allow any ascii punctuation mark or 57-- symbol to be escaped, whether or not it has a use in Markdown. 58isEscapable :: Char -> Bool 59isEscapable c = isAscii c && (isSymbol c || isPunctuation c) 60 61-- Link references are case sensitive and ignore line breaks 62-- and repeated spaces. 63-- So, [APPLES are good] == [Apples are good] == 64-- [Apples 65-- are good]. 66normalizeReference :: Text -> Text 67normalizeReference = T.toCaseFold . T.concat . T.split isWhitespace 68 69-- Scanners are implemented here as attoparsec parsers, 70-- which consume input and capture nothing. They could easily 71-- be implemented as regexes in other languages, or hand-coded. 72-- With the exception of scanSpnl, they are all intended to 73-- operate on a single line of input (so endOfInput = endOfLine). 74type Scanner = Parser () 75 76-- Scan four spaces. 77scanIndentSpace :: Scanner 78scanIndentSpace = () <$ count 4 (skip (==' ')) 79 80scanSpacesToColumn :: Int -> Scanner 81scanSpacesToColumn col = do 82 currentCol <- column <$> getPosition 83 case col - currentCol of 84 n | n >= 1 -> () <$ (count n (skip (==' '))) 85 | otherwise -> return () 86 87-- Scan 0-3 spaces. 88scanNonindentSpace :: Scanner 89scanNonindentSpace = () <$ upToCountChars 3 (==' ') 90 91-- Scan a specified character. 92scanChar :: Char -> Scanner 93scanChar c = skip (== c) >> return () 94 95-- Scan a blankline. 96scanBlankline :: Scanner 97scanBlankline = scanSpaces *> endOfInput 98 99-- Scan 0 or more spaces 100scanSpaces :: Scanner 101scanSpaces = skipWhile (==' ') 102 103-- Scan 0 or more spaces, and optionally a newline 104-- and more spaces. 105scanSpnl :: Scanner 106scanSpnl = scanSpaces *> option () (char '\n' *> scanSpaces) 107 108-- Not followed by: Succeed without consuming input if the specified 109-- scanner would not succeed. 110nfb :: Parser a -> Scanner 111nfb = notFollowedBy 112 113-- Succeed if not followed by a character. Consumes no input. 114nfbChar :: Char -> Scanner 115nfbChar c = nfb (skip (==c)) 116 117upToCountChars :: Int -> (Char -> Bool) -> Parser Text 118upToCountChars cnt f = 119 scan 0 (\n c -> if n < cnt && f c then Just (n+1) else Nothing) 120