1{-# LANGUAGE OverloadedStrings #-} 2{-# LANGUAGE ViewPatterns #-} 3 4-- Module : Data.Text.Lazy.Manipulate 5-- Copyright : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com> 6-- License : This Source Code Form is subject to the terms of 7-- the Mozilla Public License, v. 2.0. 8-- A copy of the MPL can be found in the LICENSE file or 9-- you can obtain it at http://mozilla.org/MPL/2.0/. 10-- Maintainer : Brendan Hay <brendan.g.hay@gmail.com> 11-- Stability : experimental 12-- Portability : non-portable (GHC extensions) 13 14-- | Manipulate identifiers and structurally non-complex pieces 15-- of text by delimiting word boundaries via a combination of whitespace, 16-- control-characters, and case-sensitivity. 17-- 18-- Assumptions have been made about word boundary characteristics inherint 19-- in predominantely English text, please see individual function documentation 20-- for further details and behaviour. 21module Data.Text.Lazy.Manipulate 22 ( -- * Strict vs lazy types 23 -- $strict 24 25 -- * Unicode 26 -- $unicode 27 28 -- * Fusion 29 -- $fusion 30 31 -- * Subwords 32 33 -- ** Removing words 34 takeWord, 35 dropWord, 36 stripWord, 37 38 -- ** Breaking on words 39 breakWord, 40 splitWords, 41 42 -- * Character manipulation 43 lowerHead, 44 upperHead, 45 mapHead, 46 47 -- * Line manipulation 48 indentLines, 49 prependLines, 50 51 -- * Ellipsis 52 toEllipsis, 53 toEllipsisWith, 54 55 -- * Acronyms 56 toAcronym, 57 58 -- * Ordinals 59 toOrdinal, 60 61 -- * Casing 62 toTitle, 63 toCamel, 64 toPascal, 65 toSnake, 66 toSpinal, 67 toTrain, 68 69 -- * Boundary predicates 70 isBoundary, 71 isWordBoundary, 72 ) 73where 74 75import qualified Data.Char as Char 76import Data.Int 77import Data.List (intersperse) 78import Data.Text.Lazy (Text) 79import qualified Data.Text.Lazy as LText 80import Data.Text.Lazy.Builder (toLazyText) 81import Data.Text.Manipulate.Internal.Fusion (lazy) 82import qualified Data.Text.Manipulate.Internal.Fusion as Fusion 83import Data.Text.Manipulate.Internal.Types 84 85-- $strict 86-- This library provides functions for manipulating both strict and lazy Text types. 87-- The strict functions are provided by the "Data.Text.Manipulate" module, while the lazy 88-- functions are provided by the "Data.Text.Lazy.Manipulate" module. 89 90-- $unicode 91-- While this library supports Unicode in a similar fashion to the 92-- underlying <http://hackage.haskell.org/package/text text> library, 93-- more explicit Unicode specific handling of word boundaries can be found in the 94-- <http://hackage.haskell.org/package/text-icu text-icu> library. 95 96-- $fusion 97-- Many functions in this module are subject to fusion, meaning that 98-- a pipeline of such functions will usually allocate at most one Text value. 99-- 100-- Functions that can be fused by the compiler are documented with the 101-- phrase /Subject to fusion/. 102 103-- | Lowercase the first character of a piece of text. 104-- 105-- >>> lowerHead "Title Cased" 106-- "title Cased" 107lowerHead :: Text -> Text 108lowerHead = mapHead Char.toLower 109 110-- | Uppercase the first character of a piece of text. 111-- 112-- >>> upperHead "snake_cased" 113-- "Snake_cased" 114upperHead :: Text -> Text 115upperHead = mapHead Char.toUpper 116 117-- | Apply a function to the first character of a piece of text. 118mapHead :: (Char -> Char) -> Text -> Text 119mapHead f x = 120 case LText.uncons x of 121 Just (c, cs) -> LText.singleton (f c) <> cs 122 Nothing -> x 123 124-- | Indent newlines by the given number of spaces. 125indentLines :: Int -> Text -> Text 126indentLines n = prependLines (LText.replicate (fromIntegral n) " ") 127 128-- | Prepend newlines with the given separator 129prependLines :: Text -> Text -> Text 130prependLines sep = mappend sep . LText.unlines . intersperse sep . LText.lines 131 132-- | O(n) Truncate text to a specific length. 133-- If the text was truncated the ellipsis sign "..." will be appended. 134-- 135-- /See:/ 'toEllipsisWith' 136toEllipsis :: Int64 -> Text -> Text 137toEllipsis n = toEllipsisWith n "..." 138 139-- | O(n) Truncate text to a specific length. 140-- If the text was truncated the given ellipsis sign will be appended. 141toEllipsisWith :: 142 -- | Length. 143 Int64 -> 144 -- | Ellipsis. 145 Text -> 146 Text -> 147 Text 148toEllipsisWith n suf x 149 | LText.length x > n = LText.take n x <> suf 150 | otherwise = x 151 152-- | O(n) Returns the first word, or the original text if no word 153-- boundary is encountered. /Subject to fusion./ 154takeWord :: Text -> Text 155takeWord = lazy Fusion.takeWord 156 157-- | O(n) Return the suffix after dropping the first word. If no word 158-- boundary is encountered, the result will be empty. /Subject to fusion./ 159dropWord :: Text -> Text 160dropWord = lazy Fusion.dropWord 161 162-- | Break a piece of text after the first word boundary is encountered. 163-- 164-- >>> breakWord "PascalCasedVariable" 165-- ("Pacal", "CasedVariable") 166-- 167-- >>> breakWord "spinal-cased-variable" 168-- ("spinal", "cased-variable") 169breakWord :: Text -> (Text, Text) 170breakWord x = (takeWord x, dropWord x) 171 172-- | O(n) Return the suffix after removing the first word, or 'Nothing' 173-- if no word boundary is encountered. 174-- 175-- >>> stripWord "HTML5Spaghetti" 176-- Just "Spaghetti" 177-- 178-- >>> stripWord "noboundaries" 179-- Nothing 180stripWord :: Text -> Maybe Text 181stripWord x 182 | LText.length y < LText.length x = Just y 183 | otherwise = Nothing 184 where 185 y = dropWord x 186 187-- | O(n) Split into a list of words delimited by boundaries. 188-- 189-- >>> splitWords "SupercaliFrag_ilistic" 190-- ["Supercali","Frag","ilistic"] 191splitWords :: Text -> [Text] 192splitWords = go 193 where 194 go x = case breakWord x of 195 (h, t) 196 | LText.null h -> go t 197 | LText.null t -> [h] 198 | otherwise -> h : go t 199 200-- | O(n) Create an adhoc acronym from a piece of cased text. 201-- 202-- >>> toAcronym "AmazonWebServices" 203-- Just "AWS" 204-- 205-- >>> toAcronym "Learn-You Some_Haskell" 206-- Just "LYSH" 207-- 208-- >>> toAcronym "this_is_all_lowercase" 209-- Nothing 210toAcronym :: Text -> Maybe Text 211toAcronym (LText.filter Char.isUpper -> x) 212 | LText.length x > 1 = Just x 213 | otherwise = Nothing 214 215-- | Render an ordinal used to denote the position in an ordered sequence. 216-- 217-- >>> toOrdinal (101 :: Int) 218-- "101st" 219-- 220-- >>> toOrdinal (12 :: Int) 221-- "12th" 222toOrdinal :: Integral a => a -> Text 223toOrdinal = toLazyText . ordinal 224 225-- | O(n) Convert casing to @Title Cased Phrase@. /Subject to fusion./ 226toTitle :: Text -> Text 227toTitle = lazy Fusion.toTitle 228 229-- | O(n) Convert casing to @camelCasedPhrase@. /Subject to fusion./ 230toCamel :: Text -> Text 231toCamel = lazy Fusion.toCamel 232 233-- | O(n) Convert casing to @PascalCasePhrase@. /Subject to fusion./ 234toPascal :: Text -> Text 235toPascal = lazy Fusion.toPascal 236 237-- | O(n) Convert casing to @snake_cased_phrase@. /Subject to fusion./ 238toSnake :: Text -> Text 239toSnake = lazy Fusion.toSnake 240 241-- | O(n) Convert casing to @spinal-cased-phrase@. /Subject to fusion./ 242toSpinal :: Text -> Text 243toSpinal = lazy Fusion.toSpinal 244 245-- | O(n) Convert casing to @Train-Cased-Phrase@. /Subject to fusion./ 246toTrain :: Text -> Text 247toTrain = lazy Fusion.toTrain 248