1use strict; use warnings; 2 3package Text::Wrap; 4 5use warnings::register; 6 7BEGIN { require Exporter; *import = \&Exporter::import } 8 9our @EXPORT = qw( wrap fill ); 10our @EXPORT_OK = qw( $columns $break $huge ); 11 12our $VERSION = '2024.001'; 13our $SUBVERSION = 'modern'; # back-compat vestige 14 15BEGIN { eval sprintf 'sub REGEXPS_USE_BYTES () { %d }', scalar( pack('U*', 0x80) =~ /\xc2/ ) } 16 17my $brkspc = "\x{a0}\x{202f}" =~ /\s/ ? '[^\x{a0}\x{202f}\S]' : '\s'; 18 19our $columns = 76; # <= screen width 20our $break = '(?>\n|\r\n|'.$brkspc.'\pM*)'; 21our $huge = 'wrap'; # alternatively: 'die' or 'overflow' 22our $unexpand = 1; 23our $tabstop = 8; 24our $separator = "\n"; 25our $separator2 = undef; 26 27sub _xlen { $_[0] =~ /^\pM/ + ( () = $_[0] =~ /\PM/g ) } 28 29use Text::Tabs qw(expand unexpand); 30 31sub wrap 32{ 33 my ($ip, $xp, @t) = map +( defined $_ ? $_ : '' ), @_; 34 35 local($Text::Tabs::tabstop) = $tabstop; 36 my $r = ""; 37 my $tail = pop(@t); 38 my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail)); 39 my $lead = $ip; 40 my $nll = $columns - _xlen(expand($xp)) - 1; 41 if ($nll <= 0 && $xp ne '') { 42 my $nc = _xlen(expand($xp)) + 2; 43 warnings::warnif "Increasing \$Text::Wrap::columns from $columns to $nc to accommodate length of subsequent tab"; 44 $columns = $nc; 45 $nll = 1; 46 } 47 my $ll = $columns - _xlen(expand($ip)) - 1; 48 $ll = 0 if $ll < 0; 49 my $nl = ""; 50 my $remainder = ""; 51 52 use re 'taint'; 53 54 pos($t) = 0; 55 while ($t !~ /\G(?:$break)*\Z/gc) { 56 if ($t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){0,$ll})($break|\n+|\z)/xmgc) { 57 $r .= $unexpand 58 ? unexpand($nl . $lead . $1) 59 : $nl . $lead . $1; 60 $remainder = $2; 61 } elsif ($huge eq 'wrap' && $t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){$ll})/gc) { 62 $r .= $unexpand 63 ? unexpand($nl . $lead . $1) 64 : $nl . $lead . $1; 65 $remainder = defined($separator2) ? $separator2 : $separator; 66 } elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)(?!(?<![^\n])\pM)($break|\n+|\z)/xmgc) { 67 $r .= $unexpand 68 ? unexpand($nl . $lead . $1) 69 : $nl . $lead . $1; 70 $remainder = $2; 71 } elsif ($huge eq 'die') { 72 die "couldn't wrap '$t'"; 73 } elsif ($columns < 2) { 74 warnings::warnif "Increasing \$Text::Wrap::columns from $columns to 2"; 75 $columns = 2; 76 return @_; 77 } else { 78 die "This shouldn't happen"; 79 } 80 81 $lead = $xp; 82 $ll = $nll; 83 $nl = defined($separator2) 84 ? ($remainder eq "\n" 85 ? "\n" 86 : $separator2) 87 : $separator; 88 } 89 $r .= $remainder; 90 91 $r .= $lead . substr($t, pos($t), length($t) - pos($t)) 92 if pos($t) ne length($t); 93 94 # the 5.6 regexp engine ignores the UTF8 flag, so using capture buffers acts as an implicit _utf8_off 95 # that means on 5.6 we now have to manually set UTF8=on on the output if the input had it, for which 96 # we extract just the UTF8 flag from the input and check if it forces chr(0x80) to become multibyte 97 return REGEXPS_USE_BYTES && (substr($t,0,0)."\x80") =~ /\xc2/ ? pack('U0a*', $r) : $r; 98} 99 100sub fill 101{ 102 my ($ip, $xp, @raw) = map +( defined $_ ? $_ : '' ), @_; 103 my @para; 104 my $pp; 105 106 for $pp (split(/\n\s+/, join("\n",@raw))) { 107 $pp =~ s/\s+/ /g; 108 my $x = wrap($ip, $xp, $pp); 109 push(@para, $x); 110 } 111 112 # if paragraph_indent is the same as line_indent, 113 # separate paragraphs with blank lines 114 115 my $ps = ($ip eq $xp) ? "\n\n" : "\n"; 116 return join ($ps, @para); 117} 118 1191; 120 121__END__ 122 123=head1 NAME 124 125Text::Wrap - line wrapping to form simple paragraphs 126 127=head1 SYNOPSIS 128 129B<Example 1> 130 131 use Text::Wrap; 132 133 $initial_tab = "\t"; # Tab before first line 134 $subsequent_tab = ""; # All other lines flush left 135 136 print wrap($initial_tab, $subsequent_tab, @text); 137 print fill($initial_tab, $subsequent_tab, @text); 138 139 $lines = wrap($initial_tab, $subsequent_tab, @text); 140 141 @paragraphs = fill($initial_tab, $subsequent_tab, @text); 142 143B<Example 2> 144 145 use Text::Wrap qw(wrap $columns $huge); 146 147 $columns = 132; # Wrap at 132 characters 148 $huge = 'die'; 149 $huge = 'wrap'; 150 $huge = 'overflow'; 151 152B<Example 3> 153 154 use Text::Wrap; 155 156 $Text::Wrap::columns = 72; 157 print wrap('', '', @text); 158 159=head1 DESCRIPTION 160 161C<Text::Wrap::wrap()> is a very simple paragraph formatter. It formats a 162single paragraph at a time by breaking lines at word boundaries. 163Indentation is controlled for the first line (C<$initial_tab>) and 164all subsequent lines (C<$subsequent_tab>) independently. Please note: 165C<$initial_tab> and C<$subsequent_tab> are the literal strings that will 166be used: it is unlikely you would want to pass in a number. 167 168C<Text::Wrap::fill()> is a simple multi-paragraph formatter. It formats 169each paragraph separately and then joins them together when it's done. It 170will destroy any whitespace in the original text. It breaks text into 171paragraphs by looking for whitespace after a newline. In other respects, 172it acts like wrap(). 173 174C<wrap()> compresses trailing whitespace into one newline, and C<fill()> 175deletes all trailing whitespace. 176 177Both C<wrap()> and C<fill()> return a single string. 178 179Unlike the old Unix fmt(1) utility, this module correctly accounts for 180any Unicode combining characters (such as diacriticals) that may occur 181in each line for both expansion and unexpansion. These are overstrike 182characters that do not increment the logical position. Make sure 183you have the appropriate Unicode settings enabled. 184 185=head1 OVERRIDES 186 187C<Text::Wrap::wrap()> has a number of variables that control its behavior. 188Because other modules might be using C<Text::Wrap::wrap()> it is suggested 189that you leave these variables alone! If you can't do that, then 190use C<local($Text::Wrap::VARIABLE) = YOURVALUE> when you change the 191values so that the original value is restored. This C<local()> trick 192will not work if you import the variable into your own namespace. 193 194Lines are wrapped at C<$Text::Wrap::columns> columns (default value: 76). 195C<$Text::Wrap::columns> should be set to the full width of your output 196device. In fact, every resulting line will have length of no more than 197C<$columns - 1>. 198 199It is possible to control which characters terminate words by 200modifying C<$Text::Wrap::break>. Set this to a string such as 201C<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp 202such as C<qr/[\s']/> (to break before spaces or apostrophes). The 203default is simply C<'\s'>; that is, words are terminated by spaces. 204(This means, among other things, that trailing punctuation such as 205full stops or commas stay with the word they are "attached" to.) 206Setting C<$Text::Wrap::break> to a regular expression that doesn't 207eat any characters (perhaps just a forward look-ahead assertion) will 208cause warnings. 209 210Beginner note: In example 2, above C<$columns> is imported into 211the local namespace, and set locally. In example 3, 212C<$Text::Wrap::columns> is set in its own namespace without importing it. 213 214C<Text::Wrap::wrap()> starts its work by expanding all the tabs in its 215input into spaces. The last thing it does it to turn spaces back 216into tabs. If you do not want tabs in your results, set 217C<$Text::Wrap::unexpand> to a false value. Likewise if you do not 218want to use 8-character tabstops, set C<$Text::Wrap::tabstop> to 219the number of characters you do want for your tabstops. 220 221If you want to separate your lines with something other than C<\n> 222then set C<$Text::Wrap::separator> to your preference. This replaces 223all newlines with C<$Text::Wrap::separator>. If you just want to 224preserve existing newlines but add new breaks with something else, set 225C<$Text::Wrap::separator2> instead. 226 227When words that are longer than C<$columns> are encountered, they 228are broken up. C<wrap()> adds a C<"\n"> at column C<$columns>. 229This behavior can be overridden by setting C<$huge> to 230'die' or to 'overflow'. When set to 'die', large words will cause 231C<die()> to be called. When set to 'overflow', large words will be 232left intact. 233 234Historical notes: 'die' used to be the default value of 235C<$huge>. Now, 'wrap' is the default value. 236 237=head1 EXAMPLES 238 239Code: 240 241 print wrap("\t","",<<END); 242 This is a bit of text that forms 243 a normal book-style indented paragraph 244 END 245 246Result: 247 248 " This is a bit of text that forms 249 a normal book-style indented paragraph 250 " 251 252Code: 253 254 $Text::Wrap::columns=20; 255 $Text::Wrap::separator="|"; 256 print wrap("","","This is a bit of text that forms a normal book-style paragraph"); 257 258Result: 259 260 "This is a bit of|text that forms a|normal book-style|paragraph" 261 262=head1 SEE ALSO 263 264For correct handling of East Asian half- and full-width characters, 265see L<Text::WrapI18N>. For more detailed controls: L<Text::Format>. 266 267=head1 AUTHOR 268 269David Muir Sharnoff <cpan@dave.sharnoff.org> with help from Tim Pierce and 270many many others. 271 272=head1 LICENSE 273 274Copyright (C) 1996-2009 David Muir Sharnoff. 275Copyright (C) 2012-2013 Google, Inc. 276This module may be modified, used, copied, and redistributed at your own risk. 277Although allowed by the preceding license, please do not publicly 278redistribute modified versions of this code with the name "Text::Wrap" 279unless it passes the unmodified Text::Wrap test suite. 280