1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 1994-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29
30 #include <cctype>
31
32 #include <queue>
33 #include <sstream>
34
35 #include "dMatrix.h"
36 #include "localcharset-wrapper.h"
37 #include "uniconv-wrappers.h"
38 #include "unistr-wrappers.h"
39
40 #include "Cell.h"
41 #include "defun.h"
42 #include "error.h"
43 #include "errwarn.h"
44 #include "ov.h"
45 #include "ovl.h"
46 #include "unwind-prot.h"
47 #include "utils.h"
48
49 #include "oct-string.h"
50
51 DEFUN (char, args, ,
52 doc: /* -*- texinfo -*-
53 @deftypefn {} {} char (@var{x})
54 @deftypefnx {} {} char (@var{x}, @dots{})
55 @deftypefnx {} {} char (@var{s1}, @var{s2}, @dots{})
56 @deftypefnx {} {} char (@var{cell_array})
57 Create a string array from one or more numeric matrices, character
58 matrices, or cell arrays.
59
60 Arguments are concatenated vertically. The returned values are padded with
61 blanks as needed to make each row of the string array have the same length.
62 Empty input strings are significant and will concatenated in the output.
63
64 For numerical input, each element is converted to the corresponding ASCII
65 character. A range error results if an input is outside the ASCII range
66 (0-255).
67
68 For cell arrays, each element is concatenated separately. Cell arrays
69 converted through @code{char} can mostly be converted back with
70 @code{cellstr}. For example:
71
72 @example
73 @group
74 char ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
75 @result{} ["abc "
76 " "
77 "98 "
78 "99 "
79 "d "
80 "str1"
81 "half"]
82 @end group
83 @end example
84 @seealso{strvcat, cellstr}
85 @end deftypefn */)
86 {
87 octave_value retval;
88
89 int nargin = args.length ();
90
91 if (nargin == 0)
92 retval = "";
93 else if (nargin == 1)
94 retval = args(0).convert_to_str (true, true,
95 args(0).is_dq_string () ? '"' : '\'');
96 else
97 {
98 int n_elts = 0;
99
100 int max_len = 0;
101
102 std::queue<string_vector> args_as_strings;
103
104 for (int i = 0; i < nargin; i++)
105 {
106 string_vector s = args(i).xstring_vector_value ("char: unable to convert some args to strings");
107
108 if (s.numel () > 0)
109 n_elts += s.numel ();
110 else
111 n_elts += 1;
112
113 int s_max_len = s.max_length ();
114
115 if (s_max_len > max_len)
116 max_len = s_max_len;
117
118 args_as_strings.push (s);
119 }
120
121 string_vector result (n_elts);
122
123 int k = 0;
124
125 for (int i = 0; i < nargin; i++)
126 {
127 string_vector s = args_as_strings.front ();
128 args_as_strings.pop ();
129
130 int n = s.numel ();
131
132 if (n > 0)
133 {
134 for (int j = 0; j < n; j++)
135 {
136 std::string t = s[j];
137 int t_len = t.length ();
138
139 if (max_len > t_len)
140 t += std::string (max_len - t_len, ' ');
141
142 result[k++] = t;
143 }
144 }
145 else
146 result[k++] = std::string (max_len, ' ');
147 }
148
149 retval = octave_value (result, '\'');
150 }
151
152 return retval;
153 }
154
155 /*
156 %!assert (char (), '')
157 %!assert (char (100), "d")
158 %!assert (char (100,100), ["d";"d"])
159 %!assert (char ({100,100}), ["d";"d"])
160 %!assert (char ([100,100]), ["dd"])
161 %!assert (char ({100,{100}}), ["d";"d"])
162 %!assert (char (100, [], 100), ["d";" ";"d"])
163 %!assert (char ({100, [], 100}), ["d";" ";"d"])
164 %!assert (char ({100,{100, {""}}}), ["d";"d";" "])
165 %!assert (char (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
166 %!assert (char ("a", "bb", "ccc"), ["a "; "bb "; "ccc"])
167 %!assert (char ([65, 83, 67, 73, 73]), "ASCII")
168
169 %!test
170 %! x = char ("foo", "bar", "foobar");
171 %! assert (x(1,:), "foo ");
172 %! assert (x(2,:), "bar ");
173 %! assert (x(3,:), "foobar");
174 */
175
176 DEFUN (strvcat, args, ,
177 doc: /* -*- texinfo -*-
178 @deftypefn {} {} strvcat (@var{x})
179 @deftypefnx {} {} strvcat (@var{x}, @dots{})
180 @deftypefnx {} {} strvcat (@var{s1}, @var{s2}, @dots{})
181 @deftypefnx {} {} strvcat (@var{cell_array})
182 Create a character array from one or more numeric matrices, character
183 matrices, or cell arrays.
184
185 Arguments are concatenated vertically. The returned values are padded with
186 blanks as needed to make each row of the string array have the same length.
187 Unlike @code{char}, empty strings are removed and will not appear in the
188 output.
189
190 For numerical input, each element is converted to the corresponding ASCII
191 character. A range error results if an input is outside the ASCII range
192 (0-255).
193
194 For cell arrays, each element is concatenated separately. Cell arrays
195 converted through @code{strvcat} can mostly be converted back with
196 @code{cellstr}. For example:
197
198 @example
199 @group
200 strvcat ([97, 98, 99], "", @{"98", "99", 100@}, "str1", ["ha", "lf"])
201 @result{} ["abc "
202 "98 "
203 "99 "
204 "d "
205 "str1"
206 "half"]
207 @end group
208 @end example
209 @seealso{char, strcat, cstrcat}
210 @end deftypefn */)
211 {
212 int nargin = args.length ();
213 int n_elts = 0;
214 std::size_t max_len = 0;
215 std::queue<string_vector> args_as_strings;
216
217 for (int i = 0; i < nargin; i++)
218 {
219 string_vector s = args(i).xstring_vector_value ("strvcat: unable to convert some args to strings");
220
221 std::size_t n = s.numel ();
222
223 // do not count empty strings in calculation of number of elements
224 if (n > 0)
225 {
226 for (std::size_t j = 0; j < n; j++)
227 {
228 if (! s[j].empty ())
229 n_elts++;
230 }
231 }
232
233 std::size_t s_max_len = s.max_length ();
234
235 if (s_max_len > max_len)
236 max_len = s_max_len;
237
238 args_as_strings.push (s);
239 }
240
241 string_vector result (n_elts);
242
243 octave_idx_type k = 0;
244
245 for (int i = 0; i < nargin; i++)
246 {
247 string_vector s = args_as_strings.front ();
248 args_as_strings.pop ();
249
250 std::size_t n = s.numel ();
251
252 if (n > 0)
253 {
254 for (std::size_t j = 0; j < n; j++)
255 {
256 std::string t = s[j];
257 if (t.length () > 0)
258 {
259 std::size_t t_len = t.length ();
260
261 if (max_len > t_len)
262 t += std::string (max_len - t_len, ' ');
263
264 result[k++] = t;
265 }
266 }
267 }
268 }
269
270 // Cannot use ovl. Relies on overloaded octave_value call.
271 return octave_value (result, '\'');
272 }
273
274 /*
275 %!assert (strvcat (""), "")
276 %!assert (strvcat (100) == "d")
277 %!assert (strvcat (100,100), ["d";"d"])
278 %!assert (strvcat ({100,100}), ["d";"d"])
279 %!assert (strvcat ([100,100]), ["dd"])
280 %!assert (strvcat ({100,{100}}), ["d";"d"])
281 %!assert (strvcat (100, [], 100), ["d";"d"])
282 %!assert (strvcat ({100, [], 100}), ["d";"d"])
283 %!assert (strvcat ({100,{100, {""}}}), ["d";"d"])
284 %!assert (strvcat (["a ";"be"], {"c", 100}), ["a ";"be";"c ";"d "])
285 %!assert (strvcat ("a", "bb", "ccc"), ["a "; "bb "; "ccc"])
286 %!assert (strvcat (), "")
287 */
288
289 DEFUN (ischar, args, ,
290 doc: /* -*- texinfo -*-
291 @deftypefn {} {} ischar (@var{x})
292 Return true if @var{x} is a character array.
293 @seealso{isfloat, isinteger, islogical, isnumeric, isstring, iscellstr, isa}
294 @end deftypefn */)
295 {
296 if (args.length () != 1)
297 print_usage ();
298
299 return ovl (args(0).is_string ());
300 }
301
302 /*
303 %!assert (ischar ("a"), true)
304 %!assert (ischar (["ab";"cd"]), true)
305 %!assert (ischar ({"ab"}), false)
306 %!assert (ischar (1), false)
307 %!assert (ischar ([1, 2]), false)
308 %!assert (ischar ([]), false)
309 %!assert (ischar ([1, 2; 3, 4]), false)
310 %!assert (ischar (""), true)
311 %!assert (ischar ("test"), true)
312 %!assert (ischar (["test"; "ing"]), true)
313 %!assert (ischar (struct ("foo", "bar")), false)
314
315 %!error ischar ()
316 %!error ischar ("test", 1)
317 */
318
319 static octave_value
do_strcmp_fun(const octave_value & arg0,const octave_value & arg1,octave_idx_type n,const char * fcn_name,bool (* array_op)(const Array<char> &,const Array<char> &,octave_idx_type),bool (* str_op)(const std::string &,const std::string &,std::string::size_type))320 do_strcmp_fun (const octave_value& arg0, const octave_value& arg1,
321 octave_idx_type n, const char *fcn_name,
322 bool (*array_op) (const Array<char>&, const Array<char>&,
323 octave_idx_type),
324 bool (*str_op) (const std::string&, const std::string&,
325 std::string::size_type))
326
327 {
328 octave_value retval;
329
330 bool s1_string = arg0.is_string ();
331 bool s1_cell = arg0.iscell ();
332 bool s2_string = arg1.is_string ();
333 bool s2_cell = arg1.iscell ();
334
335 if (s1_string && s2_string)
336 retval = array_op (arg0.char_array_value (), arg1.char_array_value (), n);
337 else if ((s1_string && s2_cell) || (s1_cell && s2_string))
338 {
339 octave_value str_val, cell_val;
340
341 if (s1_string)
342 {
343 str_val = arg0;
344 cell_val = arg1;
345 }
346 else
347 {
348 str_val = arg1;
349 cell_val = arg0;
350 }
351
352 const Cell cell = cell_val.cell_value ();
353 const string_vector str = str_val.string_vector_value ();
354 octave_idx_type r = str.numel ();
355
356 if (r == 0 || r == 1)
357 {
358 // Broadcast the string.
359
360 boolNDArray output (cell_val.dims (), false);
361
362 std::string s = (r == 0 ? "" : str[0]);
363
364 if (cell_val.iscellstr ())
365 {
366 const Array<std::string> cellstr = cell_val.cellstr_value ();
367 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
368 output(i) = str_op (cellstr(i), s, n);
369 }
370 else
371 {
372 // FIXME: should we warn here?
373 for (octave_idx_type i = 0; i < cell.numel (); i++)
374 {
375 if (cell(i).is_string ())
376 output(i) = str_op (cell(i).string_value (), s, n);
377 }
378 }
379
380 retval = output;
381 }
382 else if (r > 1)
383 {
384 if (cell.numel () == 1)
385 {
386 // Broadcast the cell.
387
388 const dim_vector dv (r, 1);
389 boolNDArray output (dv, false);
390
391 if (cell(0).is_string ())
392 {
393 const std::string str2 = cell(0).string_value ();
394
395 for (octave_idx_type i = 0; i < r; i++)
396 output(i) = str_op (str[i], str2, n);
397 }
398
399 retval = output;
400 }
401 else
402 {
403 // Must match in all dimensions.
404
405 boolNDArray output (cell.dims (), false);
406
407 if (cell.numel () == r)
408 {
409 if (cell_val.iscellstr ())
410 {
411 const Array<std::string> cellstr
412 = cell_val.cellstr_value ();
413 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
414 output(i) = str_op (str[i], cellstr(i), n);
415 }
416 else
417 {
418 // FIXME: should we warn here?
419 for (octave_idx_type i = 0; i < r; i++)
420 {
421 if (cell(i).is_string ())
422 output(i) = str_op (str[i],
423 cell(i).string_value (), n);
424 }
425 }
426
427 retval = output;
428 }
429 else
430 retval = false;
431 }
432 }
433 }
434 else if (s1_cell && s2_cell)
435 {
436 octave_value cell1_val, cell2_val;
437 octave_idx_type r1 = arg0.numel (), r2;
438
439 if (r1 == 1)
440 {
441 // Make the singleton cell2.
442
443 cell1_val = arg1;
444 cell2_val = arg0;
445 }
446 else
447 {
448 cell1_val = arg0;
449 cell2_val = arg1;
450 }
451
452 const Cell cell1 = cell1_val.cell_value ();
453 const Cell cell2 = cell2_val.cell_value ();
454 r1 = cell1.numel ();
455 r2 = cell2.numel ();
456
457 const dim_vector size1 = cell1.dims ();
458 const dim_vector size2 = cell2.dims ();
459
460 boolNDArray output (size1, false);
461
462 if (r2 == 1)
463 {
464 // Broadcast cell2.
465
466 if (cell2(0).is_string ())
467 {
468 const std::string str2 = cell2(0).string_value ();
469
470 if (cell1_val.iscellstr ())
471 {
472 const Array<std::string> cellstr = cell1_val.cellstr_value ();
473 for (octave_idx_type i = 0; i < cellstr.numel (); i++)
474 output(i) = str_op (cellstr(i), str2, n);
475 }
476 else
477 {
478 // FIXME: should we warn here?
479 for (octave_idx_type i = 0; i < r1; i++)
480 {
481 if (cell1(i).is_string ())
482 {
483 const std::string str1 = cell1(i).string_value ();
484 output(i) = str_op (str1, str2, n);
485 }
486 }
487 }
488 }
489 }
490 else
491 {
492 if (size1 != size2)
493 error ("%s: nonconformant cell arrays", fcn_name);
494
495 if (cell1.iscellstr () && cell2.iscellstr ())
496 {
497 const Array<std::string> cellstr1 = cell1_val.cellstr_value ();
498 const Array<std::string> cellstr2 = cell2_val.cellstr_value ();
499 for (octave_idx_type i = 0; i < r1; i++)
500 output (i) = str_op (cellstr1(i), cellstr2(i), n);
501 }
502 else
503 {
504 // FIXME: should we warn here?
505 for (octave_idx_type i = 0; i < r1; i++)
506 {
507 if (cell1(i).is_string () && cell2(i).is_string ())
508 {
509 const std::string str1 = cell1(i).string_value ();
510 const std::string str2 = cell2(i).string_value ();
511 output(i) = str_op (str1, str2, n);
512 }
513 }
514 }
515 }
516
517 retval = output;
518 }
519 else
520 retval = false;
521
522 return retval;
523 }
524
525
526 // These are required so that they match the same signature as strncmp
527 // and strncmpi and can therefore be used in do_strcmp_fun.
528
529 template <typename T, typename T_size_type>
530 static bool
strcmp_ignore_n(const T & s1,const T & s2,T_size_type)531 strcmp_ignore_n (const T& s1, const T& s2, T_size_type)
532 { return octave::string::strcmp (s1, s2); }
533
534 template <typename T, typename T_size_type>
535 static bool
strcmpi_ignore_n(const T & s1,const T & s2,T_size_type)536 strcmpi_ignore_n (const T& s1, const T& s2, T_size_type)
537 { return octave::string::strcmpi (s1, s2); }
538
539
540 DEFUN (strcmp, args, ,
541 doc: /* -*- texinfo -*-
542 @deftypefn {} {} strcmp (@var{s1}, @var{s2})
543 Return 1 if the character strings @var{s1} and @var{s2} are the same,
544 and 0 otherwise.
545
546 If either @var{s1} or @var{s2} is a cell array of strings, then an array
547 of the same size is returned, containing the values described above for
548 every member of the cell array. The other argument may also be a cell
549 array of strings (of the same size or with only one element), char matrix
550 or character string.
551
552 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
553 function returns 1 if the character strings are equal, and 0 otherwise.
554 This is just the opposite of the corresponding C library function.
555 @seealso{strcmpi, strncmp, strncmpi}
556 @end deftypefn */)
557 {
558 if (args.length () != 2)
559 print_usage ();
560
561 return ovl (do_strcmp_fun (args(0), args(1), 0, "strcmp",
562 strcmp_ignore_n, strcmp_ignore_n));
563 }
564
565 /*
566 %!shared x
567 %! x = char (zeros (0, 2));
568 %!assert (strcmp ("", x), false)
569 %!assert (strcmp (x, ""), false)
570 %!assert (strcmp (x, x), true)
571 ## %!assert (strcmp ({""}, x), true)
572 ## %!assert (strcmp ({x}, ""), false)
573 ## %!assert (strcmp ({x}, x), true)
574 ## %!assert (strcmp ("", {x}), false)
575 ## %!assert (strcmp (x, {""}), false)
576 ## %!assert (strcmp (x, {x}), true)
577 ## %!assert (strcmp ({x; x}, ""), [false; false])
578 ## %!assert (strcmp ({x; x}, {""}), [false; false])
579 ## %!assert (strcmp ("", {x; x}), [false; false])
580 ## %!assert (strcmp ({""}, {x; x}), [false; false])
581 %!assert (strcmp ({"foo"}, x), false)
582 %!assert (strcmp ({"foo"}, "foo"), true)
583 %!assert (strcmp ({"foo"}, x), false)
584 %!assert (strcmp (x, {"foo"}), false)
585 %!assert (strcmp ("foo", {"foo"}), true)
586 %!assert (strcmp (x, {"foo"}), false)
587 %!shared y
588 %! y = char (zeros (2, 0));
589 %!assert (strcmp ("", y), false)
590 %!assert (strcmp (y, ""), false)
591 %!assert (strcmp (y, y), true)
592 %!assert (strcmp ({""}, y), [true; true])
593 %!assert (strcmp ({y}, ""), true)
594 %!assert (strcmp ({y}, y), [true; true])
595 %!assert (strcmp ("", {y}), true)
596 %!assert (strcmp (y, {""}), [true; true])
597 %!assert (strcmp (y, {y}), [true; true])
598 %!assert (strcmp ({y; y}, ""), [true; true])
599 %!assert (strcmp ({y; y}, {""}), [true; true])
600 %!assert (strcmp ("", {y; y}), [true; true])
601 %!assert (strcmp ({""}, {y; y}), [true; true])
602 %!assert (strcmp ({"foo"}, y), [false; false])
603 %!assert (strcmp ({"foo"}, y), [false; false])
604 %!assert (strcmp (y, {"foo"}), [false; false])
605 %!assert (strcmp (y, {"foo"}), [false; false])
606 %!assert (strcmp ("foobar", "foobar"), true)
607 %!assert (strcmp ("foobar", "fooBar"), false)
608 %!assert (strcmp ("fooba", "foobar"), false)
609
610 %!error strcmp ()
611 %!error strcmp ("foo", "bar", 3)
612 */
613
614 DEFUN (strncmp, args, ,
615 doc: /* -*- texinfo -*-
616 @deftypefn {} {} strncmp (@var{s1}, @var{s2}, @var{n})
617 Return 1 if the first @var{n} characters of strings @var{s1} and @var{s2}
618 are the same, and 0 otherwise.
619
620 @example
621 @group
622 strncmp ("abce", "abcd", 3)
623 @result{} 1
624 @end group
625 @end example
626
627 If either @var{s1} or @var{s2} is a cell array of strings, then an array
628 of the same size is returned, containing the values described above for
629 every member of the cell array. The other argument may also be a cell
630 array of strings (of the same size or with only one element), char matrix
631 or character string.
632
633 @example
634 @group
635 strncmp ("abce", @{"abcd", "bca", "abc"@}, 3)
636 @result{} [1, 0, 1]
637 @end group
638 @end example
639
640 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmp
641 function returns 1 if the character strings are equal, and 0 otherwise.
642 This is just the opposite of the corresponding C library function.
643 @seealso{strncmpi, strcmp, strcmpi}
644 @end deftypefn */)
645 {
646 if (args.length () != 3)
647 print_usage ();
648
649 octave_idx_type n = args(2).idx_type_value ();
650
651 if (n > 0)
652 return ovl (do_strcmp_fun (args(0), args(1), n, "strncmp",
653 octave::string::strncmp,
654 octave::string::strncmp));
655 else
656 error ("strncmp: N must be greater than 0");
657 }
658
659 /*
660 %!assert (strncmp ("abce", "abc", 3), true)
661 %!assert (strncmp ("abce", "aBc", 3), false)
662 %!assert (strncmp (100, 100, 1), false)
663 %!assert (strncmp ("abce", {"abcd", "bca", "abc"}, 3), logical ([1, 0, 1]))
664 %!assert (strncmp ("abc", {"abcd", "bca", "abc"}, 4), logical ([0, 0, 1]))
665 %!assert (strncmp ({"abcd", "bca", "abc"},"abce", 3), logical ([1, 0, 1]))
666 %!assert (strncmp ({"abcd", "bca", "abc"},{"abcd", "bca", "abe"}, 3), logical ([1, 1, 0]))
667 %!assert (strncmp ("abc", {"abcd", 10}, 2), logical ([1, 0]))
668
669 %!assert <*54373> (strncmp ("abc", "abc", 100))
670
671 %!error strncmp ()
672 %!error strncmp ("abc", "def")
673 */
674
675 DEFUNX ("strcmpi", Fstrcmpi, args, ,
676 doc: /* -*- texinfo -*-
677 @deftypefn {} {} strcmpi (@var{s1}, @var{s2})
678 Return 1 if the character strings @var{s1} and @var{s2} are the same,
679 disregarding case of alphabetic characters, and 0 otherwise.
680
681 If either @var{s1} or @var{s2} is a cell array of strings, then an array
682 of the same size is returned, containing the values described above for
683 every member of the cell array. The other argument may also be a cell
684 array of strings (of the same size or with only one element), char matrix
685 or character string.
686
687 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strcmp
688 function returns 1 if the character strings are equal, and 0 otherwise.
689 This is just the opposite of the corresponding C library function.
690
691 @strong{Caution:} National alphabets are not supported.
692 @seealso{strcmp, strncmp, strncmpi}
693 @end deftypefn */)
694 {
695 if (args.length () != 2)
696 print_usage ();
697
698 return ovl (do_strcmp_fun (args(0), args(1), 0, "strcmpi",
699 strcmpi_ignore_n, strcmpi_ignore_n));
700 }
701
702 /*
703 %!assert (strcmpi ("abc123", "ABC123"), true)
704 */
705
706 DEFUNX ("strncmpi", Fstrncmpi, args, ,
707 doc: /* -*- texinfo -*-
708 @deftypefn {} {} strncmpi (@var{s1}, @var{s2}, @var{n})
709 Return 1 if the first @var{n} character of @var{s1} and @var{s2} are the
710 same, disregarding case of alphabetic characters, and 0 otherwise.
711
712 If either @var{s1} or @var{s2} is a cell array of strings, then an array
713 of the same size is returned, containing the values described above for
714 every member of the cell array. The other argument may also be a cell
715 array of strings (of the same size or with only one element), char matrix
716 or character string.
717
718 @strong{Caution:} For compatibility with @sc{matlab}, Octave's strncmpi
719 function returns 1 if the character strings are equal, and 0 otherwise.
720 This is just the opposite of the corresponding C library function.
721
722 @strong{Caution:} National alphabets are not supported.
723 @seealso{strncmp, strcmp, strcmpi}
724 @end deftypefn */)
725 {
726 if (args.length () != 3)
727 print_usage ();
728
729 octave_idx_type n = args(2).idx_type_value ();
730
731 if (n > 0)
732 return ovl (do_strcmp_fun (args(0), args(1), n, "strncmpi",
733 octave::string::strncmpi,
734 octave::string::strncmpi));
735 else
736 error ("strncmpi: N must be greater than 0");
737 }
738
739 /*
740 %!assert (strncmpi ("abc123", "ABC456", 3), true)
741
742 %!assert <*54373> (strncmpi ("abc", "abC", 100))
743 */
744
745 DEFUN (str2double, args, ,
746 doc: /* -*- texinfo -*-
747 @deftypefn {} {} str2double (@var{s})
748 Convert a string to a real or complex number.
749
750 The string must be in one of the following formats where a and b are real
751 numbers and the complex unit is @qcode{'i'} or @qcode{'j'}:
752
753 @itemize
754 @item a + bi
755
756 @item a + b*i
757
758 @item a + i*b
759
760 @item bi + a
761
762 @item b*i + a
763
764 @item i*b + a
765 @end itemize
766
767 If present, a and/or b are of the form @nospell{[+-]d[,.]d[[eE][+-]d]} where
768 the brackets indicate optional arguments and @qcode{'d'} indicates zero or
769 more digits. The special input values @code{Inf}, @code{NaN}, and @code{NA}
770 are also accepted.
771
772 @var{s} may be a character string, character matrix, or cell array. For
773 character arrays the conversion is repeated for every row, and a double or
774 complex array is returned. Empty rows in @var{s} are deleted and not
775 returned in the numeric array. For cell arrays each character string
776 element is processed and a double or complex array of the same dimensions as
777 @var{s} is returned.
778
779 For unconvertible scalar or character string input @code{str2double} returns
780 a NaN@. Similarly, for character array input @code{str2double} returns a
781 NaN for any row of @var{s} that could not be converted. For a cell array,
782 @code{str2double} returns a NaN for any element of @var{s} for which
783 conversion fails. Note that numeric elements in a mixed string/numeric
784 cell array are not strings and the conversion will fail for these elements
785 and return NaN.
786
787 @code{str2double} can replace @code{str2num}, and it avoids the security
788 risk of using @code{eval} on unknown data.
789 @seealso{str2num}
790 @end deftypefn */)
791 {
792 if (args.length () != 1)
793 print_usage ();
794
795 octave_value retval;
796
797 if (args(0).is_string ())
798 {
799 if (args(0).rows () == 0 || args(0).columns () == 0)
800 retval = Matrix (1, 1, octave::numeric_limits<double>::NaN ());
801 else if (args(0).rows () == 1 && args(0).ndims () == 2)
802 retval = octave::string::str2double (args(0).string_value ());
803 else
804 {
805 const string_vector sv = args(0).string_vector_value ();
806
807 retval = sv.map<Complex> (octave::string::str2double);
808 }
809 }
810 else if (args(0).iscell ())
811 {
812 const Cell cell = args(0).cell_value ();
813
814 ComplexNDArray output (cell.dims (), octave::numeric_limits<double>::NaN ());
815
816 for (octave_idx_type i = 0; i < cell.numel (); i++)
817 {
818 if (cell(i).is_string ())
819 output(i) = octave::string::str2double (cell(i).string_value ());
820 }
821 retval = output;
822 }
823 else
824 retval = Matrix (1, 1, octave::numeric_limits<double>::NaN ());
825
826 return retval;
827 }
828
829 /*
830 %!assert (str2double ("1"), 1)
831 %!assert (str2double ("-.1e-5"), -1e-6)
832 %!testif ; ! ismac ()
833 %! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
834 %!xtest <47413>
835 %! ## Same test code as above, but intended only for test statistics on Mac.
836 %! if (! ismac ()), return; endif
837 %! assert (str2double (char ("1", "2 3", "4i")), [1; NaN; 4i]);
838 %!assert (str2double ("1,222.5"), 1222.5)
839 %!assert (str2double ("i"), i)
840 %!assert (str2double ("2j"), 2i)
841 %!assert (str2double ("2 + j"), 2+j)
842 %!assert (str2double ("i*2 + 3"), 3+2i)
843 %!assert (str2double (".5*i + 3.5"), 3.5+0.5i)
844 %!assert (str2double ("1e-3 + i*.25"), 1e-3 + 0.25i)
845 %!assert (str2double (char ("2 + j","1.25e-3","-05")), [2+i; 1.25e-3; -5])
846 %!assert (str2double ({"2 + j","1.25e-3","-05"}), [2+i, 1.25e-3, -5])
847 %!assert (str2double (1), NaN)
848 %!assert (str2double ("1 2 3 4"), NaN)
849 %!assert (str2double ("Hello World"), NaN)
850 %!assert (str2double ("NaN"), NaN)
851 %!assert (str2double ("NA"), NA)
852 %!assert (str2double ("Inf"), Inf)
853 %!assert (str2double ("iNF"), Inf)
854 %!assert (str2double ("-Inf"), -Inf)
855 %!assert (str2double ("Inf*i"), complex (0, Inf))
856 %!assert (str2double ("iNF*i"), complex (0, Inf))
857 %!assert (str2double ("NaN + Inf*i"), complex (NaN, Inf))
858 %!assert (str2double ("Inf - Inf*i"), complex (Inf, -Inf))
859 %!assert (str2double ("-i*NaN - Inf"), complex (-Inf, -NaN))
860 %!testif ; ! ismac ()
861 %! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
862 %!xtest <47413>
863 %! if (! ismac ()), return; endif
864 %! assert (str2double ({"abc", "4i"}), [NaN + 0i, 4i]);
865 %!testif ; ! ismac ()
866 %! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
867 %!xtest <47413>
868 %! if (! ismac ()), return; endif
869 %! assert (str2double ({2, "4i"}), [NaN + 0i, 4i])
870 %!assert (str2double (zeros (3,1,2)), NaN)
871 %!assert (str2double (''), NaN)
872 %!assert (str2double ([]), NaN)
873 %!assert (str2double (char(zeros(3,0))), NaN)
874 */
875
876 DEFUN (__native2unicode__, args, ,
877 doc: /* -*- texinfo -*-
878 @deftypefn {} {@var{utf8_str} =} __native2unicode__ (@var{native_bytes}, @var{codepage})
879 Convert byte stream @var{native_bytes} to UTF-8 using @var{codepage}.
880
881 @seealso{native2unicode, __unicode2native__}
882 @end deftypefn */)
883 {
884 int nargin = args.length ();
885
886 if (nargin != 2)
887 print_usage ();
888
889 if (args(0).is_string ())
890 return ovl (args(0));
891
892 std::string tmp = args(1).string_value ();
893 const char *codepage
894 = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());
895
896 charNDArray native_bytes = args(0).char_array_value ();
897
898 const char *src = native_bytes.data ();
899 std::size_t srclen = native_bytes.numel ();
900
901 std::size_t length;
902 uint8_t *utf8_str = nullptr;
903
904 octave::unwind_protect frame;
905
906 utf8_str = octave_u8_conv_from_encoding (codepage, src, srclen, &length);
907
908 if (! utf8_str)
909 {
910 if (errno == ENOSYS)
911 error ("native2unicode: iconv() is not supported. Installing GNU "
912 "libiconv and then re-compiling Octave could fix this.");
913 else
914 error ("native2unicode: converting from codepage '%s' to UTF-8: %s",
915 codepage, std::strerror (errno));
916 }
917
918 frame.add_fcn (::free, static_cast<void *> (utf8_str));
919
920 octave_idx_type len = length;
921
922 charNDArray retval (dim_vector (1, len));
923
924 for (octave_idx_type i = 0; i < len; i++)
925 retval.xelem(i) = utf8_str[i];
926
927 return ovl (retval);
928 }
929
930 DEFUN (__unicode2native__, args, ,
931 doc: /* -*- texinfo -*-
932 @deftypefn {} {@var{native_bytes} =} __unicode2native__ (@var{utf8_str}, @var{codepage})
933 Convert UTF-8 string @var{utf8_str} to byte stream @var{native_bytes} using
934 @var{codepage}.
935
936 @seealso{unicode2native, __native2unicode__}
937 @end deftypefn */)
938 {
939 int nargin = args.length ();
940
941 if (nargin != 2)
942 print_usage ();
943
944 std::string tmp = args(1).string_value ();
945 const char *codepage
946 = (tmp.empty () ? octave_locale_charset_wrapper () : tmp.c_str ());
947
948 charNDArray utf8_str = args(0).char_array_value ();
949
950 const uint8_t *src = reinterpret_cast<const uint8_t *> (utf8_str.data ());
951 std::size_t srclen = utf8_str.numel ();
952
953 std::size_t length;
954 char *native_bytes = nullptr;
955
956 octave::unwind_protect frame;
957
958 native_bytes = octave_u8_conv_to_encoding (codepage, src, srclen, &length);
959
960 if (! native_bytes)
961 {
962 if (errno == ENOSYS)
963 error ("unicode2native: iconv() is not supported. Installing GNU "
964 "libiconv and then re-compiling Octave could fix this.");
965 else
966 error ("unicode2native: converting from UTF-8 to codepage '%s': %s",
967 codepage, std::strerror (errno));
968 }
969
970 frame.add_fcn (::free, static_cast<void *> (native_bytes));
971
972 octave_idx_type len = length;
973
974 uint8NDArray retval (dim_vector (1, len));
975
976 for (octave_idx_type i = 0; i < len; i++)
977 retval.xelem(i) = native_bytes[i];
978
979 return ovl (retval);
980 }
981
982 DEFUN (__locale_charset__, , ,
983 doc: /* -*- texinfo -*-
984 @deftypefn {} {@var{charset} =} __locale_charset__ ()
985 Return the identifier for the charset used if the encoding is set to
986 @qcode{"locale"}.
987 @end deftypefn */)
988 {
989 const char *charset = octave_locale_charset_wrapper ();
990 std::string charset_str (charset);
991 return ovl (charset_str);
992 }
993
994 DEFUN (unicode_idx, args, ,
995 doc: /* -*- texinfo -*-
996 @deftypefn {} {@var{idx} =} unicode_idx (@var{str})
997 Return an array with the indices for each UTF-8 encoded character in @var{str}.
998
999 @example
1000 @group
1001 unicode_idx ("aäbc")
1002 @result{} [1, 2, 2, 3, 4]
1003 @end group
1004 @end example
1005
1006 @end deftypefn */)
1007 {
1008 int nargin = args.length ();
1009
1010 if (nargin != 1)
1011 print_usage ();
1012
1013 charNDArray str = args(0).xchar_array_value ("STR must be a string");
1014 Array<octave_idx_type> p (dim_vector (str.ndims (), 1));
1015 charNDArray str_p;
1016 if (str.ndims () > 1)
1017 {
1018 for (octave_idx_type i=0; i < str.ndims (); i++)
1019 p(i) = i;
1020 p(0) = 1;
1021 p(1) = 0;
1022 str_p = str.permute (p);
1023 }
1024
1025 const uint8_t *src = reinterpret_cast<const uint8_t *> (str_p.data ());
1026 octave_idx_type srclen = str.numel ();
1027
1028 NDArray idx (str_p.dims ());
1029
1030 octave_idx_type u8_char_num = 1;
1031 for (octave_idx_type i = 0; i < srclen; u8_char_num++)
1032 {
1033 int mblen = octave_u8_strmblen_wrapper (src + i);
1034 if (mblen < 1)
1035 mblen = 1;
1036 for (octave_idx_type j = 0; j < mblen; j++)
1037 idx (i+j) = u8_char_num;
1038 i += mblen;
1039 }
1040
1041 return ovl(str.ndims () > 1 ? idx.permute (p, true) : idx);
1042 }
1043
1044 /*
1045 %!assert (unicode_idx (["aäou"; "Ä∞"]), [1 2 2 3 4; 5 5 6 6 6]);
1046 */
1047
1048 DEFUN (__u8_validate__, args, ,
1049 doc: /* -*- texinfo -*-
1050 @deftypefn {} {@var{out_str} =} __u8_validate__ (in_str, mode)
1051 Return string with valid UTF-8.
1052
1053 On encountering invalid UTF-8 in @var{in_str}, the bytes are either replaced by
1054 the replacement character "�" (if @var{mode} is omitted or the string
1055 "replace") or interpreted as the Unicode code points U+0080–U+00FF with the
1056 same value as the byte (if @var{mode} is the string "unicode"), thus
1057 interpreting the bytes according to ISO-8859-1.
1058
1059 @end deftypefn */)
1060 {
1061 if (args.length () < 1 || args.length () > 2)
1062 print_usage ();
1063
1064 // Input check
1065 std::string in_str =
1066 args(0).xstring_value ("__u8_validate__: IN_STR must be a string.");
1067
1068 std::string mode = "replace";
1069 if (args.length () > 1)
1070 mode = args(1).xstring_value ("__u8_validate__: MODE must be a string.");
1071
1072 octave::string::u8_fallback_type fb_type;
1073 if (mode == "replace")
1074 fb_type = octave::string::U8_REPLACEMENT_CHAR;
1075 else if (mode == "unicode")
1076 fb_type = octave::string::U8_ISO_8859_1;
1077 else
1078 error ("__u8_validate__: MODE must either be \"replace\" or \"unicode\".");
1079
1080 octave::string::u8_validate ("__u8_validate__", in_str, fb_type);
1081
1082 return ovl (in_str);
1083 }
1084
1085 DEFUN (newline, args, ,
1086 doc: /* -*- texinfo -*-
1087 @deftypefn {} {} newline
1088 Return the character corresponding to a newline.
1089
1090 This is equivalent to @qcode{"@xbackslashchar{}n"}.
1091
1092 Example Code
1093
1094 @example
1095 @group
1096 joined_string = [newline "line1" newline "line2"]
1097 @result{}
1098 line1
1099 line2
1100 @end group
1101 @end example
1102
1103 @seealso{strcat, strjoin, strsplit}
1104 @end deftypefn */)
1105 {
1106 if (args.length () != 0)
1107 print_usage ();
1108
1109 static octave_value_list retval = ovl ("\n");
1110
1111 return retval;
1112 }
1113
1114 /*
1115 %!assert (newline (), "\n")
1116
1117 %!error newline (1)
1118 %!error [a, b] = newline ();
1119 */
1120
1121 DEFUN (list_in_columns, args, ,
1122 doc: /* -*- texinfo -*-
1123 @deftypefn {} {} list_in_columns (@var{arg}, @var{width}, @var{prefix})
1124 Return a string containing the elements of @var{arg} listed in columns with
1125 an overall maximum width of @var{width} and optional prefix @var{prefix}.
1126
1127 The argument @var{arg} must be a cell array of character strings or a
1128 character array.
1129
1130 If @var{width} is not specified or is an empty matrix, or less than or equal
1131 to zero, the width of the terminal screen is used. Newline characters are
1132 used to break the lines in the output string. For example:
1133 @c Set example in small font to prevent overfull line
1134
1135 @smallexample
1136 @group
1137 list_in_columns (@{"abc", "def", "ghijkl", "mnop", "qrs", "tuv"@}, 20)
1138 @result{} abc mnop
1139 def qrs
1140 ghijkl tuv
1141
1142 whos ans
1143 @result{}
1144 Variables in the current scope:
1145
1146 Attr Name Size Bytes Class
1147 ==== ==== ==== ===== =====
1148 ans 1x37 37 char
1149
1150 Total is 37 elements using 37 bytes
1151 @end group
1152 @end smallexample
1153
1154 @seealso{terminal_size}
1155 @end deftypefn */)
1156 {
1157 int nargin = args.length ();
1158
1159 if (nargin < 1 || nargin > 3)
1160 print_usage ();
1161
1162 string_vector s = args(0).xstring_vector_value ("list_in_columns: ARG must be a cellstr or char array");
1163
1164 int width = -1;
1165
1166 if (nargin > 1 && ! args(1).isempty ())
1167 width = args(1).xint_value ("list_in_columns: WIDTH must be an integer");
1168
1169 std::string prefix;
1170
1171 if (nargin > 2)
1172 prefix = args(2).xstring_value ("list_in_columns: PREFIX must be a string");
1173
1174 std::ostringstream buf;
1175
1176 s.list_in_columns (buf, width, prefix);
1177
1178 return ovl (buf.str ());
1179 }
1180
1181 /*
1182 %!test
1183 %! input = {"abc", "def", "ghijkl", "mnop", "qrs", "tuv"};
1184 %! result = "abc mnop\ndef qrs\nghijkl tuv\n";
1185 %! assert (list_in_columns (input, 20), result);
1186 %!test
1187 %! input = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
1188 %! result = "abc mnop \ndef qrs \nghijkl tuv \n";
1189 %! assert (list_in_columns (input, 20), result);
1190 %!test
1191 %! input = char ("abc", "def", "ghijkl", "mnop", "qrs", "tuv");
1192 %! result = " abc mnop \n def qrs \n ghijkl tuv \n";
1193 %! assert (list_in_columns (input, 20, " "), result);
1194
1195 %!error list_in_columns ()
1196 %!error list_in_columns (["abc", "def"], 20, 2)
1197 %!error list_in_columns (["abc", "def"], 20, " ", 3)
1198 %!error <list_in_columns: WIDTH must be an integer> list_in_columns (["abc", "def"], "a")
1199 */
1200