1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE ViewPatterns #-}
3
4-- Module      : Data.Text.Lazy.Manipulate
5-- Copyright   : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com>
6-- License     : This Source Code Form is subject to the terms of
7--               the Mozilla Public License, v. 2.0.
8--               A copy of the MPL can be found in the LICENSE file or
9--               you can obtain it at http://mozilla.org/MPL/2.0/.
10-- Maintainer  : Brendan Hay <brendan.g.hay@gmail.com>
11-- Stability   : experimental
12-- Portability : non-portable (GHC extensions)
13
14-- | Manipulate identifiers and structurally non-complex pieces
15-- of text by delimiting word boundaries via a combination of whitespace,
16-- control-characters, and case-sensitivity.
17--
18-- Assumptions have been made about word boundary characteristics inherint
19-- in predominantely English text, please see individual function documentation
20-- for further details and behaviour.
21module Data.Text.Lazy.Manipulate
22  ( -- * Strict vs lazy types
23    -- $strict
24
25    -- * Unicode
26    -- $unicode
27
28    -- * Fusion
29    -- $fusion
30
31    -- * Subwords
32
33    -- ** Removing words
34    takeWord,
35    dropWord,
36    stripWord,
37
38    -- ** Breaking on words
39    breakWord,
40    splitWords,
41
42    -- * Character manipulation
43    lowerHead,
44    upperHead,
45    mapHead,
46
47    -- * Line manipulation
48    indentLines,
49    prependLines,
50
51    -- * Ellipsis
52    toEllipsis,
53    toEllipsisWith,
54
55    -- * Acronyms
56    toAcronym,
57
58    -- * Ordinals
59    toOrdinal,
60
61    -- * Casing
62    toTitle,
63    toCamel,
64    toPascal,
65    toSnake,
66    toSpinal,
67    toTrain,
68
69    -- * Boundary predicates
70    isBoundary,
71    isWordBoundary,
72  )
73where
74
75import qualified Data.Char as Char
76import Data.Int
77import Data.List (intersperse)
78import Data.Text.Lazy (Text)
79import qualified Data.Text.Lazy as LText
80import Data.Text.Lazy.Builder (toLazyText)
81import Data.Text.Manipulate.Internal.Fusion (lazy)
82import qualified Data.Text.Manipulate.Internal.Fusion as Fusion
83import Data.Text.Manipulate.Internal.Types
84
85-- $strict
86-- This library provides functions for manipulating both strict and lazy Text types.
87-- The strict functions are provided by the "Data.Text.Manipulate" module, while the lazy
88-- functions are provided by the "Data.Text.Lazy.Manipulate" module.
89
90-- $unicode
91-- While this library supports Unicode in a similar fashion to the
92-- underlying <http://hackage.haskell.org/package/text text> library,
93-- more explicit Unicode specific handling of word boundaries can be found in the
94-- <http://hackage.haskell.org/package/text-icu text-icu> library.
95
96-- $fusion
97-- Many functions in this module are subject to fusion, meaning that
98-- a pipeline of such functions will usually allocate at most one Text value.
99--
100-- Functions that can be fused by the compiler are documented with the
101-- phrase /Subject to fusion/.
102
103-- | Lowercase the first character of a piece of text.
104--
105-- >>> lowerHead "Title Cased"
106-- "title Cased"
107lowerHead :: Text -> Text
108lowerHead = mapHead Char.toLower
109
110-- | Uppercase the first character of a piece of text.
111--
112-- >>> upperHead "snake_cased"
113-- "Snake_cased"
114upperHead :: Text -> Text
115upperHead = mapHead Char.toUpper
116
117-- | Apply a function to the first character of a piece of text.
118mapHead :: (Char -> Char) -> Text -> Text
119mapHead f x =
120  case LText.uncons x of
121    Just (c, cs) -> LText.singleton (f c) <> cs
122    Nothing -> x
123
124-- | Indent newlines by the given number of spaces.
125indentLines :: Int -> Text -> Text
126indentLines n = prependLines (LText.replicate (fromIntegral n) " ")
127
128-- | Prepend newlines with the given separator
129prependLines :: Text -> Text -> Text
130prependLines sep = mappend sep . LText.unlines . intersperse sep . LText.lines
131
132-- | O(n) Truncate text to a specific length.
133-- If the text was truncated the ellipsis sign "..." will be appended.
134--
135-- /See:/ 'toEllipsisWith'
136toEllipsis :: Int64 -> Text -> Text
137toEllipsis n = toEllipsisWith n "..."
138
139-- | O(n) Truncate text to a specific length.
140-- If the text was truncated the given ellipsis sign will be appended.
141toEllipsisWith ::
142  -- | Length.
143  Int64 ->
144  -- | Ellipsis.
145  Text ->
146  Text ->
147  Text
148toEllipsisWith n suf x
149  | LText.length x > n = LText.take n x <> suf
150  | otherwise = x
151
152-- | O(n) Returns the first word, or the original text if no word
153-- boundary is encountered. /Subject to fusion./
154takeWord :: Text -> Text
155takeWord = lazy Fusion.takeWord
156
157-- | O(n) Return the suffix after dropping the first word. If no word
158-- boundary is encountered, the result will be empty. /Subject to fusion./
159dropWord :: Text -> Text
160dropWord = lazy Fusion.dropWord
161
162-- | Break a piece of text after the first word boundary is encountered.
163--
164-- >>> breakWord "PascalCasedVariable"
165-- ("Pacal", "CasedVariable")
166--
167-- >>> breakWord "spinal-cased-variable"
168-- ("spinal", "cased-variable")
169breakWord :: Text -> (Text, Text)
170breakWord x = (takeWord x, dropWord x)
171
172-- | O(n) Return the suffix after removing the first word, or 'Nothing'
173-- if no word boundary is encountered.
174--
175-- >>> stripWord "HTML5Spaghetti"
176-- Just "Spaghetti"
177--
178-- >>> stripWord "noboundaries"
179-- Nothing
180stripWord :: Text -> Maybe Text
181stripWord x
182  | LText.length y < LText.length x = Just y
183  | otherwise = Nothing
184  where
185    y = dropWord x
186
187-- | O(n) Split into a list of words delimited by boundaries.
188--
189-- >>> splitWords "SupercaliFrag_ilistic"
190-- ["Supercali","Frag","ilistic"]
191splitWords :: Text -> [Text]
192splitWords = go
193  where
194    go x = case breakWord x of
195      (h, t)
196        | LText.null h -> go t
197        | LText.null t -> [h]
198        | otherwise -> h : go t
199
200-- | O(n) Create an adhoc acronym from a piece of cased text.
201--
202-- >>> toAcronym "AmazonWebServices"
203-- Just "AWS"
204--
205-- >>> toAcronym "Learn-You Some_Haskell"
206-- Just "LYSH"
207--
208-- >>> toAcronym "this_is_all_lowercase"
209-- Nothing
210toAcronym :: Text -> Maybe Text
211toAcronym (LText.filter Char.isUpper -> x)
212  | LText.length x > 1 = Just x
213  | otherwise = Nothing
214
215-- | Render an ordinal used to denote the position in an ordered sequence.
216--
217-- >>> toOrdinal (101 :: Int)
218-- "101st"
219--
220-- >>> toOrdinal (12 :: Int)
221-- "12th"
222toOrdinal :: Integral a => a -> Text
223toOrdinal = toLazyText . ordinal
224
225-- | O(n) Convert casing to @Title Cased Phrase@. /Subject to fusion./
226toTitle :: Text -> Text
227toTitle = lazy Fusion.toTitle
228
229-- | O(n) Convert casing to @camelCasedPhrase@. /Subject to fusion./
230toCamel :: Text -> Text
231toCamel = lazy Fusion.toCamel
232
233-- | O(n) Convert casing to @PascalCasePhrase@. /Subject to fusion./
234toPascal :: Text -> Text
235toPascal = lazy Fusion.toPascal
236
237-- | O(n) Convert casing to @snake_cased_phrase@. /Subject to fusion./
238toSnake :: Text -> Text
239toSnake = lazy Fusion.toSnake
240
241-- | O(n) Convert casing to @spinal-cased-phrase@. /Subject to fusion./
242toSpinal :: Text -> Text
243toSpinal = lazy Fusion.toSpinal
244
245-- | O(n) Convert casing to @Train-Cased-Phrase@. /Subject to fusion./
246toTrain :: Text -> Text
247toTrain = lazy Fusion.toTrain
248