1 /* -*- c-basic-offset: 2 -*- */
2 /*
3   Copyright(C) 2016 Brazil
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License version 2.1 as published by the Free Software Foundation.
8 
9   This library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13 
14   You should have received a copy of the GNU Lesser General Public
15   License along with this library; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
17 */
18 
19 #ifdef GRN_EMBEDDED
20 #  define GRN_PLUGIN_FUNCTION_TAG functions_string
21 #endif
22 
23 #include <groonga/plugin.h>
24 
25 /*
26  * func_string_length() returns the number of characters in a string.
27  * If the string contains an invalid byte sequence, this function returns the
28  * number of characters before the invalid byte sequence.
29  */
30 static grn_obj *
func_string_length(grn_ctx * ctx,int n_args,grn_obj ** args,grn_user_data * user_data)31 func_string_length(grn_ctx *ctx, int n_args, grn_obj **args,
32                    grn_user_data *user_data)
33 {
34   grn_obj *target;
35   unsigned int length = 0;
36   grn_obj *grn_length;
37 
38   if (n_args != 1) {
39     GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
40                      "string_length(): wrong number of arguments (%d for 1)",
41                      n_args);
42     return NULL;
43   }
44 
45   target = args[0];
46   if (!(target->header.type == GRN_BULK &&
47         ((target->header.domain == GRN_DB_SHORT_TEXT) ||
48          (target->header.domain == GRN_DB_TEXT) ||
49          (target->header.domain == GRN_DB_LONG_TEXT)))) {
50     grn_obj inspected;
51 
52     GRN_TEXT_INIT(&inspected, 0);
53     grn_inspect(ctx, &inspected, target);
54     GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
55                      "string_length(): target object must be a text bulk: "
56                      "<%.*s>",
57                      (int)GRN_TEXT_LEN(&inspected),
58                      GRN_TEXT_VALUE(&inspected));
59     GRN_OBJ_FIN(ctx, &inspected);
60     return NULL;
61   }
62 
63   {
64     const char *s = GRN_TEXT_VALUE(target);
65     const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target);
66     const char *p;
67     unsigned int cl = 0;
68     for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) {
69       length++;
70     }
71   }
72 
73   grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0);
74   if (!grn_length) {
75     return NULL;
76   }
77 
78   GRN_UINT32_SET(ctx, grn_length, length);
79 
80   return grn_length;
81 }
82 
83 static grn_obj *
func_string_substring(grn_ctx * ctx,int n_args,grn_obj ** args,grn_user_data * user_data)84 func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args,
85                       grn_user_data *user_data)
86 {
87   grn_obj *target;
88   grn_obj *from_raw;
89   grn_obj *length_raw = NULL;
90   int64_t from = 0;
91   int64_t length = -1;
92   const char *start = NULL;
93   const char *end = NULL;
94   grn_obj *substring;
95 
96   if (n_args < 2) {
97     GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
98                      "string_substring(): wrong number of arguments (%d for 2..3)",
99                      n_args);
100     return NULL;
101   }
102 
103   target = args[0];
104   from_raw = args[1];
105   if (n_args == 3) {
106     length_raw = args[2];
107   }
108 
109   if (!(target->header.type == GRN_BULK &&
110         grn_type_id_is_text_family(ctx, target->header.domain))) {
111     grn_obj inspected;
112 
113     GRN_TEXT_INIT(&inspected, 0);
114     grn_inspect(ctx, &inspected, target);
115     GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
116                      "string_substring(): target object must be a text bulk: "
117                      "<%.*s>",
118                      (int)GRN_TEXT_LEN(&inspected),
119                      GRN_TEXT_VALUE(&inspected));
120     GRN_OBJ_FIN(ctx, &inspected);
121     return NULL;
122   }
123 
124   /* TODO: extract as grn_func_arg_int64() */
125   if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
126     grn_obj inspected;
127 
128     GRN_TEXT_INIT(&inspected, 0);
129     grn_inspect(ctx, &inspected, from_raw);
130     GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
131                      "string_substring(): from must be a number: <%.*s>",
132                      (int)GRN_TEXT_LEN(&inspected),
133                      GRN_TEXT_VALUE(&inspected));
134     GRN_OBJ_FIN(ctx, &inspected);
135     return NULL;
136   }
137   if (from_raw->header.domain == GRN_DB_INT32) {
138     from = GRN_INT32_VALUE(from_raw);
139   } else if (from_raw->header.domain == GRN_DB_INT64) {
140     from = GRN_INT64_VALUE(from_raw);
141   } else {
142     grn_obj buffer;
143     grn_rc rc;
144 
145     GRN_INT64_INIT(&buffer, 0);
146     rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
147     if (rc == GRN_SUCCESS) {
148       from = GRN_INT64_VALUE(&buffer);
149     }
150     GRN_OBJ_FIN(ctx, &buffer);
151 
152     if (rc != GRN_SUCCESS) {
153       grn_obj inspected;
154 
155       GRN_TEXT_INIT(&inspected, 0);
156       grn_inspect(ctx, &inspected, from_raw);
157       GRN_PLUGIN_ERROR(ctx, rc,
158                        "string_substring(): "
159                        "failed to cast from value to number: <%.*s>",
160                        (int)GRN_TEXT_LEN(&inspected),
161                        GRN_TEXT_VALUE(&inspected));
162       GRN_OBJ_FIN(ctx, &inspected);
163       return NULL;
164     }
165   }
166 
167   if (length_raw) {
168     /* TODO: extract as grn_func_arg_int64() */
169     if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
170       grn_obj inspected;
171 
172       GRN_TEXT_INIT(&inspected, 0);
173       grn_inspect(ctx, &inspected, length_raw);
174       GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
175                        "string_substring(): length must be a number: <%.*s>",
176                        (int)GRN_TEXT_LEN(&inspected),
177                        GRN_TEXT_VALUE(&inspected));
178       GRN_OBJ_FIN(ctx, &inspected);
179       return NULL;
180     }
181     if (length_raw->header.domain == GRN_DB_INT32) {
182       length = GRN_INT32_VALUE(length_raw);
183     } else if (length_raw->header.domain == GRN_DB_INT64) {
184       length = GRN_INT64_VALUE(length_raw);
185     } else {
186       grn_obj buffer;
187       grn_rc rc;
188 
189       GRN_INT64_INIT(&buffer, 0);
190       rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
191       if (rc == GRN_SUCCESS) {
192         length = GRN_INT64_VALUE(&buffer);
193       }
194       GRN_OBJ_FIN(ctx, &buffer);
195 
196       if (rc != GRN_SUCCESS) {
197         grn_obj inspected;
198 
199         GRN_TEXT_INIT(&inspected, 0);
200         grn_inspect(ctx, &inspected, length_raw);
201         GRN_PLUGIN_ERROR(ctx, rc,
202                          "string_substring(): "
203                          "failed to cast length value to number: <%.*s>",
204                          (int)GRN_TEXT_LEN(&inspected),
205                          GRN_TEXT_VALUE(&inspected));
206         GRN_OBJ_FIN(ctx, &inspected);
207         return NULL;
208       }
209     }
210   }
211 
212   substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0);
213   if (!substring) {
214     return NULL;
215   }
216 
217   GRN_BULK_REWIND(substring);
218 
219   if (GRN_TEXT_LEN(target) == 0) {
220     return substring;
221   }
222   if (length == 0) {
223     return substring;
224   }
225 
226   while (from < 0) {
227     from += GRN_TEXT_LEN(target);
228   }
229 
230   {
231     const char *p;
232 
233     start = NULL;
234     p = GRN_TEXT_VALUE(target);
235     end = p + GRN_TEXT_LEN(target);
236 
237     if (from == 0) {
238       start = p;
239     } else {
240       unsigned int char_length = 0;
241       int64_t n_chars = 0;
242 
243       for (;
244            p < end && (char_length = grn_charlen(ctx, p, end));
245            p += char_length, n_chars++) {
246         if (n_chars == from) {
247           start = p;
248           break;
249         }
250       }
251     }
252 
253     if (start && length > 0) {
254       unsigned int char_length = 0;
255       int64_t n_chars = 0;
256 
257       for (;
258            p < end && (char_length = grn_charlen(ctx, p, end));
259            p += char_length, n_chars++) {
260         if (n_chars == length) {
261           end = p;
262           break;
263         }
264       }
265     }
266   }
267 
268   if (start) {
269     GRN_TEXT_SET(ctx, substring, start, end - start);
270   }
271 
272   return substring;
273 }
274 
275 grn_rc
GRN_PLUGIN_INIT(grn_ctx * ctx)276 GRN_PLUGIN_INIT(grn_ctx *ctx)
277 {
278   return ctx->rc;
279 }
280 
281 grn_rc
GRN_PLUGIN_REGISTER(grn_ctx * ctx)282 GRN_PLUGIN_REGISTER(grn_ctx *ctx)
283 {
284   grn_rc rc = GRN_SUCCESS;
285 
286   grn_proc_create(ctx, "string_length", -1, GRN_PROC_FUNCTION, func_string_length,
287                   NULL, NULL, 0, NULL);
288 
289   grn_proc_create(ctx, "string_substring", -1, GRN_PROC_FUNCTION, func_string_substring,
290                   NULL, NULL, 0, NULL);
291 
292   return rc;
293 }
294 
295 grn_rc
GRN_PLUGIN_FIN(grn_ctx * ctx)296 GRN_PLUGIN_FIN(grn_ctx *ctx)
297 {
298   return GRN_SUCCESS;
299 }
300