1 /*
2 * Copyright (C) 2021 Brodie Gaslam
3 *
4 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
17 */
18
19 #include "fansi.h"
20
21 /*
22 * is only needed because the existing unique algo is so bad when
23 * dealing with long strings that are the same, which is likely a common use
24 * case for `substr`.
25 */
26
FANSI_unique_chr(SEXP x)27 SEXP FANSI_unique_chr(SEXP x) {
28 if(TYPEOF(x) != STRSXP) error("Internal Error: type mismatch");
29
30 SEXP x_srt = PROTECT(FANSI_sort_chr(x));
31
32 // Loop and check how many deltas there are
33
34 SEXP res, x_prev;
35 R_xlen_t x_len = XLENGTH(x_srt);
36 R_xlen_t u_count = 1;
37
38 if(x_len > 2) {
39 // Do a two pass version, not idealy but easier
40 x_prev = STRING_ELT(x_srt, 0);
41 for(R_xlen_t i = 1; i < x_len; ++i) {
42 SEXP x_cur;
43 x_cur = STRING_ELT(x_srt, i);
44 FANSI_check_chrsxp(x_cur, i);
45 if(x_prev != x_cur) {
46 ++u_count;
47 x_prev = x_cur;
48 } }
49 res = PROTECT(allocVector(STRSXP, u_count));
50 SET_STRING_ELT(res, 0, STRING_ELT(x_srt, 0));
51
52 x_prev = STRING_ELT(x_srt, 0);
53 u_count = 1;
54 for(R_xlen_t i = 1; i < x_len; ++i) {
55 SEXP x_cur = STRING_ELT(x_srt, i);
56 if(x_prev != x_cur) {
57 SET_STRING_ELT(res, u_count++, x_cur);
58 x_prev = x_cur;
59 } }
60 } else {
61 res = PROTECT(x);
62 }
63 UNPROTECT(2);
64 return res;
65 }
66
67