1 /*
2  * Copyright (C) 2021  Brodie Gaslam
3  *
4  * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Go to <https://www.r-project.org/Licenses/GPL-2> for a copy of the license.
17  */
18 
19 #include "fansi.h"
20 
21 /*
22  * is only needed because the existing unique algo is so bad when
23  * dealing with long strings that are the same, which is likely a common use
24  * case for `substr`.
25  */
26 
FANSI_unique_chr(SEXP x)27 SEXP FANSI_unique_chr(SEXP x) {
28   if(TYPEOF(x) != STRSXP) error("Internal Error: type mismatch");
29 
30   SEXP x_srt = PROTECT(FANSI_sort_chr(x));
31 
32   // Loop and check how many deltas there are
33 
34   SEXP res, x_prev;
35   R_xlen_t x_len = XLENGTH(x_srt);
36   R_xlen_t u_count = 1;
37 
38   if(x_len > 2) {
39     // Do a two pass version, not idealy but easier
40     x_prev = STRING_ELT(x_srt, 0);
41     for(R_xlen_t i = 1; i < x_len; ++i) {
42       SEXP x_cur;
43       x_cur = STRING_ELT(x_srt, i);
44       FANSI_check_chrsxp(x_cur, i);
45       if(x_prev != x_cur) {
46         ++u_count;
47         x_prev = x_cur;
48     } }
49     res = PROTECT(allocVector(STRSXP, u_count));
50     SET_STRING_ELT(res, 0, STRING_ELT(x_srt, 0));
51 
52     x_prev = STRING_ELT(x_srt, 0);
53     u_count = 1;
54     for(R_xlen_t i = 1; i < x_len; ++i) {
55       SEXP x_cur = STRING_ELT(x_srt, i);
56       if(x_prev != x_cur) {
57         SET_STRING_ELT(res, u_count++, x_cur);
58         x_prev = x_cur;
59     } }
60   } else {
61     res = PROTECT(x);
62   }
63   UNPROTECT(2);
64   return res;
65 }
66 
67