1use strict; use warnings;
2
3package Text::Wrap;
4
5use warnings::register;
6
7BEGIN { require Exporter; *import = \&Exporter::import }
8
9our @EXPORT = qw( wrap fill );
10our @EXPORT_OK = qw( $columns $break $huge );
11
12our $VERSION = '2024.001';
13our $SUBVERSION = 'modern'; # back-compat vestige
14
15BEGIN { eval sprintf 'sub REGEXPS_USE_BYTES () { %d }', scalar( pack('U*', 0x80) =~ /\xc2/ ) }
16
17my $brkspc = "\x{a0}\x{202f}" =~ /\s/ ? '[^\x{a0}\x{202f}\S]' : '\s';
18
19our $columns = 76;  # <= screen width
20our $break = '(?>\n|\r\n|'.$brkspc.'\pM*)';
21our $huge = 'wrap'; # alternatively: 'die' or 'overflow'
22our $unexpand = 1;
23our $tabstop = 8;
24our $separator = "\n";
25our $separator2 = undef;
26
27sub _xlen { $_[0] =~ /^\pM/ + ( () = $_[0] =~ /\PM/g ) }
28
29use Text::Tabs qw(expand unexpand);
30
31sub wrap
32{
33	my ($ip, $xp, @t) = map +( defined $_ ? $_ : '' ), @_;
34
35	local($Text::Tabs::tabstop) = $tabstop;
36	my $r = "";
37	my $tail = pop(@t);
38	my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail));
39	my $lead = $ip;
40	my $nll = $columns - _xlen(expand($xp)) - 1;
41	if ($nll <= 0 && $xp ne '') {
42		my $nc = _xlen(expand($xp)) + 2;
43		warnings::warnif "Increasing \$Text::Wrap::columns from $columns to $nc to accommodate length of subsequent tab";
44		$columns = $nc;
45		$nll = 1;
46	}
47	my $ll = $columns - _xlen(expand($ip)) - 1;
48	$ll = 0 if $ll < 0;
49	my $nl = "";
50	my $remainder = "";
51
52	use re 'taint';
53
54	pos($t) = 0;
55	while ($t !~ /\G(?:$break)*\Z/gc) {
56		if ($t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){0,$ll})($break|\n+|\z)/xmgc) {
57			$r .= $unexpand
58				? unexpand($nl . $lead . $1)
59				: $nl . $lead . $1;
60			$remainder = $2;
61		} elsif ($huge eq 'wrap' && $t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){$ll})/gc) {
62			$r .= $unexpand
63				? unexpand($nl . $lead . $1)
64				: $nl . $lead . $1;
65			$remainder = defined($separator2) ? $separator2 : $separator;
66		} elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)(?!(?<![^\n])\pM)($break|\n+|\z)/xmgc) {
67			$r .= $unexpand
68				? unexpand($nl . $lead . $1)
69				: $nl . $lead . $1;
70			$remainder = $2;
71		} elsif ($huge eq 'die') {
72			die "couldn't wrap '$t'";
73		} elsif ($columns < 2) {
74			warnings::warnif "Increasing \$Text::Wrap::columns from $columns to 2";
75			$columns = 2;
76			return @_;
77		} else {
78			die "This shouldn't happen";
79		}
80
81		$lead = $xp;
82		$ll = $nll;
83		$nl = defined($separator2)
84			? ($remainder eq "\n"
85				? "\n"
86				: $separator2)
87			: $separator;
88	}
89	$r .= $remainder;
90
91	$r .= $lead . substr($t, pos($t), length($t) - pos($t))
92		if pos($t) ne length($t);
93
94	# the 5.6 regexp engine ignores the UTF8 flag, so using capture buffers acts as an implicit _utf8_off
95	# that means on 5.6 we now have to manually set UTF8=on on the output if the input had it, for which
96	# we extract just the UTF8 flag from the input and check if it forces chr(0x80) to become multibyte
97	return REGEXPS_USE_BYTES && (substr($t,0,0)."\x80") =~ /\xc2/ ? pack('U0a*', $r) : $r;
98}
99
100sub fill
101{
102	my ($ip, $xp, @raw) = map +( defined $_ ? $_ : '' ), @_;
103	my @para;
104	my $pp;
105
106	for $pp (split(/\n\s+/, join("\n",@raw))) {
107		$pp =~ s/\s+/ /g;
108		my $x = wrap($ip, $xp, $pp);
109		push(@para, $x);
110	}
111
112	# if paragraph_indent is the same as line_indent,
113	# separate paragraphs with blank lines
114
115	my $ps = ($ip eq $xp) ? "\n\n" : "\n";
116	return join ($ps, @para);
117}
118
1191;
120
121__END__
122
123=head1 NAME
124
125Text::Wrap - line wrapping to form simple paragraphs
126
127=head1 SYNOPSIS
128
129B<Example 1>
130
131	use Text::Wrap;
132
133	$initial_tab = "\t";	# Tab before first line
134	$subsequent_tab = "";	# All other lines flush left
135
136	print wrap($initial_tab, $subsequent_tab, @text);
137	print fill($initial_tab, $subsequent_tab, @text);
138
139	$lines = wrap($initial_tab, $subsequent_tab, @text);
140
141	@paragraphs = fill($initial_tab, $subsequent_tab, @text);
142
143B<Example 2>
144
145	use Text::Wrap qw(wrap $columns $huge);
146
147	$columns = 132;		# Wrap at 132 characters
148	$huge = 'die';
149	$huge = 'wrap';
150	$huge = 'overflow';
151
152B<Example 3>
153
154	use Text::Wrap;
155
156	$Text::Wrap::columns = 72;
157	print wrap('', '', @text);
158
159=head1 DESCRIPTION
160
161C<Text::Wrap::wrap()> is a very simple paragraph formatter.  It formats a
162single paragraph at a time by breaking lines at word boundaries.
163Indentation is controlled for the first line (C<$initial_tab>) and
164all subsequent lines (C<$subsequent_tab>) independently.  Please note:
165C<$initial_tab> and C<$subsequent_tab> are the literal strings that will
166be used: it is unlikely you would want to pass in a number.
167
168C<Text::Wrap::fill()> is a simple multi-paragraph formatter.  It formats
169each paragraph separately and then joins them together when it's done.  It
170will destroy any whitespace in the original text.  It breaks text into
171paragraphs by looking for whitespace after a newline.  In other respects,
172it acts like wrap().
173
174C<wrap()> compresses trailing whitespace into one newline, and C<fill()>
175deletes all trailing whitespace.
176
177Both C<wrap()> and C<fill()> return a single string.
178
179Unlike the old Unix fmt(1) utility, this module correctly accounts for
180any Unicode combining characters (such as diacriticals) that may occur
181in each line for both expansion and unexpansion.  These are overstrike
182characters that do not increment the logical position.  Make sure
183you have the appropriate Unicode settings enabled.
184
185=head1 OVERRIDES
186
187C<Text::Wrap::wrap()> has a number of variables that control its behavior.
188Because other modules might be using C<Text::Wrap::wrap()> it is suggested
189that you leave these variables alone!  If you can't do that, then
190use C<local($Text::Wrap::VARIABLE) = YOURVALUE> when you change the
191values so that the original value is restored.  This C<local()> trick
192will not work if you import the variable into your own namespace.
193
194Lines are wrapped at C<$Text::Wrap::columns> columns (default value: 76).
195C<$Text::Wrap::columns> should be set to the full width of your output
196device.  In fact, every resulting line will have length of no more than
197C<$columns - 1>.
198
199It is possible to control which characters terminate words by
200modifying C<$Text::Wrap::break>. Set this to a string such as
201C<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp
202such as C<qr/[\s']/> (to break before spaces or apostrophes). The
203default is simply C<'\s'>; that is, words are terminated by spaces.
204(This means, among other things, that trailing punctuation  such as
205full stops or commas stay with the word they are "attached" to.)
206Setting C<$Text::Wrap::break> to a regular expression that doesn't
207eat any characters (perhaps just a forward look-ahead assertion) will
208cause warnings.
209
210Beginner note: In example 2, above C<$columns> is imported into
211the local namespace, and set locally.  In example 3,
212C<$Text::Wrap::columns> is set in its own namespace without importing it.
213
214C<Text::Wrap::wrap()> starts its work by expanding all the tabs in its
215input into spaces.  The last thing it does it to turn spaces back
216into tabs.  If you do not want tabs in your results, set
217C<$Text::Wrap::unexpand> to a false value.  Likewise if you do not
218want to use 8-character tabstops, set C<$Text::Wrap::tabstop> to
219the number of characters you do want for your tabstops.
220
221If you want to separate your lines with something other than C<\n>
222then set C<$Text::Wrap::separator> to your preference.  This replaces
223all newlines with C<$Text::Wrap::separator>.  If you just want to
224preserve existing newlines but add new breaks with something else, set
225C<$Text::Wrap::separator2> instead.
226
227When words that are longer than C<$columns> are encountered, they
228are broken up.  C<wrap()> adds a C<"\n"> at column C<$columns>.
229This behavior can be overridden by setting C<$huge> to
230'die' or to 'overflow'.  When set to 'die', large words will cause
231C<die()> to be called.  When set to 'overflow', large words will be
232left intact.
233
234Historical notes: 'die' used to be the default value of
235C<$huge>.  Now, 'wrap' is the default value.
236
237=head1 EXAMPLES
238
239Code:
240
241  print wrap("\t","",<<END);
242  This is a bit of text that forms
243  a normal book-style indented paragraph
244  END
245
246Result:
247
248  "	This is a bit of text that forms
249  a normal book-style indented paragraph
250  "
251
252Code:
253
254  $Text::Wrap::columns=20;
255  $Text::Wrap::separator="|";
256  print wrap("","","This is a bit of text that forms a normal book-style paragraph");
257
258Result:
259
260  "This is a bit of|text that forms a|normal book-style|paragraph"
261
262=head1 SEE ALSO
263
264For correct handling of East Asian half- and full-width characters,
265see L<Text::WrapI18N>.  For more detailed controls: L<Text::Format>.
266
267=head1 AUTHOR
268
269David Muir Sharnoff <cpan@dave.sharnoff.org> with help from Tim Pierce and
270many many others.
271
272=head1 LICENSE
273
274Copyright (C) 1996-2009 David Muir Sharnoff.
275Copyright (C) 2012-2013 Google, Inc.
276This module may be modified, used, copied, and redistributed at your own risk.
277Although allowed by the preceding license, please do not publicly
278redistribute modified versions of this code with the name "Text::Wrap"
279unless it passes the unmodified Text::Wrap test suite.
280