1{-# LANGUAGE OverloadedStrings #-}
2module Cheapskate.Util (
3    joinLines
4  , tabFilter
5  , isWhitespace
6  , isEscapable
7  , normalizeReference
8  , Scanner
9  , scanIndentSpace
10  , scanNonindentSpace
11  , scanSpacesToColumn
12  , scanChar
13  , scanBlankline
14  , scanSpaces
15  , scanSpnl
16  , nfb
17  , nfbChar
18  , upToCountChars
19  ) where
20
21import Data.Text (Text)
22import qualified Data.Text as T
23import Data.Char
24import Control.Applicative ()
25import Cheapskate.ParserCombinators
26
27-- Utility functions.
28
29-- Like T.unlines but does not add a final newline.
30-- Concatenates lines with newlines between.
31joinLines :: [Text] -> Text
32joinLines = T.intercalate "\n"
33
34-- Convert tabs to spaces using a 4-space tab stop.
35tabFilter :: Text -> Text
36tabFilter = T.concat . pad . T.split (== '\t')
37  where pad []  = []
38        pad [t] = [t]
39        pad (t:ts) = let tl = T.length t
40                         n  = tl + 4 - (tl `mod` 4)
41                         in  T.justifyLeft n ' ' t : pad ts
42
43-- These are the whitespace characters that are significant in
44-- parsing markdown. We can treat \160 (nonbreaking space) etc.
45-- as regular characters.  This function should be considerably
46-- faster than the unicode-aware isSpace from Data.Char.
47isWhitespace :: Char -> Bool
48isWhitespace ' '  = True
49isWhitespace '\t' = True
50isWhitespace '\n' = True
51isWhitespace '\r' = True
52isWhitespace _    = False
53
54-- The original Markdown only allowed certain symbols
55-- to be backslash-escaped.  It was hard to remember
56-- which ones could be, so we now allow any ascii punctuation mark or
57-- symbol to be escaped, whether or not it has a use in Markdown.
58isEscapable :: Char -> Bool
59isEscapable c = isAscii c && (isSymbol c || isPunctuation c)
60
61-- Link references are case sensitive and ignore line breaks
62-- and repeated spaces.
63-- So, [APPLES are good] == [Apples are good] ==
64-- [Apples
65-- are     good].
66normalizeReference :: Text -> Text
67normalizeReference = T.toCaseFold . T.concat . T.split isWhitespace
68
69-- Scanners are implemented here as attoparsec parsers,
70-- which consume input and capture nothing.  They could easily
71-- be implemented as regexes in other languages, or hand-coded.
72-- With the exception of scanSpnl, they are all intended to
73-- operate on a single line of input (so endOfInput = endOfLine).
74type Scanner = Parser ()
75
76-- Scan four spaces.
77scanIndentSpace :: Scanner
78scanIndentSpace = () <$ count 4 (skip (==' '))
79
80scanSpacesToColumn :: Int -> Scanner
81scanSpacesToColumn col = do
82  currentCol <- column <$> getPosition
83  case col - currentCol of
84       n | n >= 1 -> () <$ (count n (skip (==' ')))
85         | otherwise -> return ()
86
87-- Scan 0-3 spaces.
88scanNonindentSpace :: Scanner
89scanNonindentSpace = () <$ upToCountChars 3 (==' ')
90
91-- Scan a specified character.
92scanChar :: Char -> Scanner
93scanChar c = skip (== c) >> return ()
94
95-- Scan a blankline.
96scanBlankline :: Scanner
97scanBlankline = scanSpaces *> endOfInput
98
99-- Scan 0 or more spaces
100scanSpaces :: Scanner
101scanSpaces = skipWhile (==' ')
102
103-- Scan 0 or more spaces, and optionally a newline
104-- and more spaces.
105scanSpnl :: Scanner
106scanSpnl = scanSpaces *> option () (char '\n' *> scanSpaces)
107
108-- Not followed by: Succeed without consuming input if the specified
109-- scanner would not succeed.
110nfb :: Parser a -> Scanner
111nfb = notFollowedBy
112
113-- Succeed if not followed by a character. Consumes no input.
114nfbChar :: Char -> Scanner
115nfbChar c = nfb (skip (==c))
116
117upToCountChars :: Int -> (Char -> Bool) -> Parser Text
118upToCountChars cnt f =
119  scan 0 (\n c -> if n < cnt && f c then Just (n+1) else Nothing)
120