1 /* -*- c-basic-offset: 2 -*- */
2 /*
3   Copyright(C) 2015 Brazil
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License version 2.1 as published by the Free Software Foundation.
8 
9   This library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13 
14   You should have received a copy of the GNU Lesser General Public
15   License along with this library; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
17 */
18 
19 /* TS is an acronym for "Turbo Selector". */
20 
21 #include "grn_ts.h"
22 
23 #include "grn_output.h"
24 #include "grn_str.h"
25 
26 #include "ts/ts_buf.h"
27 #include "ts/ts_cursor.h"
28 #include "ts/ts_expr.h"
29 #include "ts/ts_expr_parser.h"
30 #include "ts/ts_log.h"
31 #include "ts/ts_sorter.h"
32 #include "ts/ts_str.h"
33 #include "ts/ts_types.h"
34 #include "ts/ts_util.h"
35 
36 #include <string.h>
37 
38 /*-------------------------------------------------------------
39  * Miscellaneous.
40  */
41 
42 enum { GRN_TS_BATCH_SIZE = 1024 };
43 
44 /* grn_ts_bool_output() outputs a value. */
45 static grn_rc
grn_ts_bool_output(grn_ctx * ctx,grn_ts_bool value)46 grn_ts_bool_output(grn_ctx *ctx, grn_ts_bool value)
47 {
48   if (value) {
49     return grn_bulk_write(ctx, ctx->impl->output.buf, "true", 4);
50   } else {
51     return grn_bulk_write(ctx, ctx->impl->output.buf, "false", 5);
52   }
53 }
54 
55 /* grn_ts_int_output() outputs a value. */
56 static grn_rc
grn_ts_int_output(grn_ctx * ctx,grn_ts_int value)57 grn_ts_int_output(grn_ctx *ctx, grn_ts_int value)
58 {
59   return grn_text_lltoa(ctx, ctx->impl->output.buf, value);
60 }
61 
62 /* grn_ts_float_output() outputs a value. */
63 static grn_rc
grn_ts_float_output(grn_ctx * ctx,grn_ts_float value)64 grn_ts_float_output(grn_ctx *ctx, grn_ts_float value)
65 {
66   return grn_text_ftoa(ctx, ctx->impl->output.buf, value);
67 }
68 
69 /* grn_ts_time_output() outputs a value. */
70 static grn_rc
grn_ts_time_output(grn_ctx * ctx,grn_ts_time value)71 grn_ts_time_output(grn_ctx *ctx, grn_ts_time value)
72 {
73   return grn_text_ftoa(ctx, ctx->impl->output.buf, value * 0.000001);
74 }
75 
76 /* grn_ts_text_output() outputs a value. */
77 static grn_rc
grn_ts_text_output(grn_ctx * ctx,grn_ts_text value)78 grn_ts_text_output(grn_ctx *ctx, grn_ts_text value)
79 {
80   return grn_text_esc(ctx, ctx->impl->output.buf, value.ptr, value.size);
81 }
82 
83 /* grn_ts_geo_output() outputs a value. */
84 static grn_rc
grn_ts_geo_output(grn_ctx * ctx,grn_ts_geo value)85 grn_ts_geo_output(grn_ctx *ctx, grn_ts_geo value)
86 {
87   grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "\"", 1);
88   if (rc != GRN_SUCCESS) {
89     return rc;
90   }
91   rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.latitude);
92   if (rc != GRN_SUCCESS) {
93     return rc;
94   }
95   rc = grn_bulk_write(ctx, ctx->impl->output.buf, "x", 1);
96   if (rc != GRN_SUCCESS) {
97     return rc;
98   }
99   rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.longitude);
100   if (rc != GRN_SUCCESS) {
101     return rc;
102   }
103   return grn_bulk_write(ctx, ctx->impl->output.buf, "\"", 1);
104 }
105 
106 #define GRN_TS_VECTOR_OUTPUT(kind)\
107   size_t i;\
108   grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "[", 1);\
109   if (rc != GRN_SUCCESS) {\
110     return rc;\
111   }\
112   for (i = 0; i < value.size; ++i) {\
113     if (i) {\
114       rc = grn_bulk_write(ctx, ctx->impl->output.buf, ",", 1);\
115       if (rc != GRN_SUCCESS) {\
116         return rc;\
117       }\
118     }\
119     rc = grn_ts_ ## kind ## _output(ctx, value.ptr[i]);\
120     if (rc != GRN_SUCCESS) {\
121       return rc;\
122     }\
123   }\
124   return grn_bulk_write(ctx, ctx->impl->output.buf, "]", 1);
125 /* grn_ts_bool_vector_output() outputs a value. */
126 static grn_rc
grn_ts_bool_vector_output(grn_ctx * ctx,grn_ts_bool_vector value)127 grn_ts_bool_vector_output(grn_ctx *ctx, grn_ts_bool_vector value)
128 {
129   GRN_TS_VECTOR_OUTPUT(bool)
130 }
131 
132 /* grn_ts_int_vector_output() outputs a value. */
133 static grn_rc
grn_ts_int_vector_output(grn_ctx * ctx,grn_ts_int_vector value)134 grn_ts_int_vector_output(grn_ctx *ctx, grn_ts_int_vector value)
135 {
136   GRN_TS_VECTOR_OUTPUT(int)
137 }
138 
139 /* grn_ts_float_vector_output() outputs a value. */
140 static grn_rc
grn_ts_float_vector_output(grn_ctx * ctx,grn_ts_float_vector value)141 grn_ts_float_vector_output(grn_ctx *ctx, grn_ts_float_vector value)
142 {
143   GRN_TS_VECTOR_OUTPUT(float)
144 }
145 
146 /* grn_ts_time_vector_output() outputs a value. */
147 static grn_rc
grn_ts_time_vector_output(grn_ctx * ctx,grn_ts_time_vector value)148 grn_ts_time_vector_output(grn_ctx *ctx, grn_ts_time_vector value)
149 {
150   GRN_TS_VECTOR_OUTPUT(time)
151 }
152 
153 /* grn_ts_text_vector_output() outputs a value. */
154 static grn_rc
grn_ts_text_vector_output(grn_ctx * ctx,grn_ts_text_vector value)155 grn_ts_text_vector_output(grn_ctx *ctx, grn_ts_text_vector value)
156 {
157   GRN_TS_VECTOR_OUTPUT(text)
158 }
159 
160 /* grn_ts_geo_vector_output() outputs a value. */
161 static grn_rc
grn_ts_geo_vector_output(grn_ctx * ctx,grn_ts_geo_vector value)162 grn_ts_geo_vector_output(grn_ctx *ctx, grn_ts_geo_vector value)
163 {
164   GRN_TS_VECTOR_OUTPUT(geo)
165 }
166 #undef GRN_TS_VECTOR_OUTPUT
167 
168 /*-------------------------------------------------------------
169  * grn_ts_writer.
170  */
171 
172 typedef struct {
173   grn_ts_expr_parser *parser;
174   grn_ts_expr **exprs;
175   size_t n_exprs;
176   size_t max_n_exprs;
177   grn_obj name_buf;
178   grn_ts_str *names;
179   grn_ts_buf *bufs;
180 } grn_ts_writer;
181 
182 /* grn_ts_writer_init() initializes a writer. */
183 static void
grn_ts_writer_init(grn_ctx * ctx,grn_ts_writer * writer)184 grn_ts_writer_init(grn_ctx *ctx, grn_ts_writer *writer)
185 {
186   memset(writer, 0, sizeof(*writer));
187   writer->parser = NULL;
188   writer->exprs = NULL;
189   GRN_TEXT_INIT(&writer->name_buf, GRN_OBJ_VECTOR);
190   writer->names = NULL;
191   writer->bufs = NULL;
192 }
193 
194 /* grn_ts_writer_fin() finalizes a writer. */
195 static void
grn_ts_writer_fin(grn_ctx * ctx,grn_ts_writer * writer)196 grn_ts_writer_fin(grn_ctx *ctx, grn_ts_writer *writer)
197 {
198   size_t i;
199   if (writer->bufs) {
200     for (i = 0; i < writer->n_exprs; i++) {
201       grn_ts_buf_fin(ctx, &writer->bufs[i]);
202     }
203     GRN_FREE(writer->bufs);
204   }
205   if (writer->names) {
206     GRN_FREE(writer->names);
207   }
208   GRN_OBJ_FIN(ctx, &writer->name_buf);
209   if (writer->exprs) {
210     for (i = 0; i < writer->n_exprs; i++) {
211       grn_ts_expr_close(ctx, writer->exprs[i]);
212     }
213     GRN_FREE(writer->exprs);
214   }
215   if (writer->parser) {
216     grn_ts_expr_parser_close(ctx, writer->parser);
217   }
218 }
219 
220 /* grn_ts_writer_expand() expands a wildcard. */
221 static grn_rc
grn_ts_writer_expand(grn_ctx * ctx,grn_ts_writer * writer,grn_obj * table,grn_ts_str str)222 grn_ts_writer_expand(grn_ctx *ctx, grn_ts_writer *writer,
223                      grn_obj *table, grn_ts_str str)
224 {
225   grn_rc rc = GRN_SUCCESS;
226   grn_hash_cursor *cursor;
227   grn_hash *hash = grn_hash_create(ctx, NULL, sizeof(grn_ts_id), 0,
228                                    GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY);
229   if (!hash) {
230     return GRN_INVALID_ARGUMENT;
231   }
232   grn_table_columns(ctx, table, str.ptr, str.size - 1, (grn_obj *)hash);
233   if (ctx->rc != GRN_SUCCESS) {
234     return ctx->rc;
235   }
236   cursor = grn_hash_cursor_open(ctx, hash, NULL, 0, NULL, 0, 0, -1, 0);
237   if (!cursor) {
238     rc = GRN_INVALID_ARGUMENT;
239   } else {
240     while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
241       char name_buf[GRN_TABLE_MAX_KEY_SIZE];
242       size_t name_size;
243       grn_obj *column;
244       grn_ts_id *column_id;
245       if (!grn_hash_cursor_get_key(ctx, cursor, (void **)&column_id)) {
246         rc = GRN_INVALID_ARGUMENT;
247         break;
248       }
249       column = grn_ctx_at(ctx, *column_id);
250       if (!column) {
251         rc = GRN_INVALID_ARGUMENT;
252         break;
253       }
254       name_size = grn_column_name(ctx, column, name_buf, sizeof(name_buf));
255       grn_obj_unlink(ctx, column);
256       rc = grn_vector_add_element(ctx, &writer->name_buf,
257                                   name_buf, name_size, 0, GRN_DB_TEXT);
258       if (rc != GRN_SUCCESS) {
259         break;
260       }
261     }
262     grn_hash_cursor_close(ctx, cursor);
263   }
264   grn_hash_close(ctx, hash);
265   return rc;
266 }
267 
268 /* grn_ts_writer_parse() parses output expressions. */
269 static grn_rc
grn_ts_writer_parse(grn_ctx * ctx,grn_ts_writer * writer,grn_obj * table,grn_ts_str str)270 grn_ts_writer_parse(grn_ctx *ctx, grn_ts_writer *writer,
271                     grn_obj *table, grn_ts_str str)
272 {
273   grn_rc rc;
274   grn_ts_str rest = str;
275   rc = grn_ts_expr_parser_open(ctx, table, &writer->parser);
276   for ( ; ; ) {
277     grn_ts_str first = { NULL, 0 };
278     rc = grn_ts_expr_parser_split(ctx, writer->parser, rest, &first, &rest);
279     if (rc != GRN_SUCCESS) {
280       return (rc == GRN_END_OF_DATA) ? GRN_SUCCESS : rc;
281     }
282     if ((first.ptr[first.size - 1] == '*') &&
283         grn_ts_str_is_name_prefix((grn_ts_str){ first.ptr, first.size - 1 })) {
284       rc = grn_ts_writer_expand(ctx, writer, table, first);
285       if (rc != GRN_SUCCESS) {
286         return rc;
287       }
288     } else if (grn_ts_str_is_key_name(first) &&
289                !grn_ts_table_has_key(ctx, table)) {
290       /*
291        * Skip _key if the table has no _key, because the default output_columns
292        * option contains _key.
293        */
294       GRN_TS_DEBUG("skip \"_key\" because the table has no _key");
295     } else {
296       rc = grn_vector_add_element(ctx, &writer->name_buf,
297                                   first.ptr, first.size, 0, GRN_DB_TEXT);
298       if (rc != GRN_SUCCESS) {
299         return rc;
300       }
301     }
302   }
303   return GRN_SUCCESS;
304 }
305 
306 /* grn_ts_writer_build() builds output expresions. */
307 static grn_rc
grn_ts_writer_build(grn_ctx * ctx,grn_ts_writer * writer,grn_obj * table)308 grn_ts_writer_build(grn_ctx *ctx, grn_ts_writer *writer, grn_obj *table)
309 {
310   size_t i, n_names = grn_vector_size(ctx, &writer->name_buf);
311   if (!n_names) {
312     return GRN_SUCCESS;
313   }
314   writer->names = GRN_MALLOCN(grn_ts_str, n_names);
315   if (!writer->names) {
316     GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
317                       "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE,
318                       sizeof(grn_ts_str), n_names);
319   }
320   writer->exprs = GRN_MALLOCN(grn_ts_expr *, n_names);
321   if (!writer->exprs) {
322     GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
323                       "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE,
324                       sizeof(grn_ts_expr *), n_names);
325   }
326   for (i = 0; i < n_names; i++) {
327     grn_rc rc;
328     grn_ts_expr *new_expr;
329     const char *name_ptr;
330     size_t name_size = grn_vector_get_element(ctx, &writer->name_buf, i,
331                                               &name_ptr, NULL, NULL);
332     rc = grn_ts_expr_parser_parse(ctx, writer->parser,
333                                   (grn_ts_str){ name_ptr, name_size },
334                                   &new_expr);
335     if (rc != GRN_SUCCESS) {
336       return rc;
337     }
338     writer->names[i].ptr = name_ptr;
339     writer->names[i].size = name_size;
340     writer->exprs[i] = new_expr;
341     writer->n_exprs++;
342   }
343   return GRN_SUCCESS;
344 }
345 
346 /* grn_ts_writer_open() creates a writer. */
347 static grn_rc
grn_ts_writer_open(grn_ctx * ctx,grn_obj * table,grn_ts_str str,grn_ts_writer ** writer)348 grn_ts_writer_open(grn_ctx *ctx, grn_obj *table, grn_ts_str str,
349                    grn_ts_writer **writer)
350 {
351   grn_rc rc;
352   grn_ts_writer *new_writer = GRN_MALLOCN(grn_ts_writer, 1);
353   if (!new_writer) {
354     GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
355                       "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1",
356                       sizeof(grn_ts_writer));
357   }
358   grn_ts_writer_init(ctx, new_writer);
359   rc = grn_ts_writer_parse(ctx, new_writer, table, str);
360   if (rc == GRN_SUCCESS) {
361     rc = grn_ts_writer_build(ctx, new_writer, table);
362   }
363   if (rc != GRN_SUCCESS) {
364     grn_ts_writer_fin(ctx, new_writer);
365     GRN_FREE(new_writer);
366     return rc;
367   }
368   *writer = new_writer;
369   return GRN_SUCCESS;
370 }
371 
372 /* grn_ts_writer_close() destroys a writer. */
373 static void
grn_ts_writer_close(grn_ctx * ctx,grn_ts_writer * writer)374 grn_ts_writer_close(grn_ctx *ctx, grn_ts_writer *writer)
375 {
376   grn_ts_writer_fin(ctx, writer);
377   GRN_FREE(writer);
378 }
379 
380 /* TODO: Errors of output macros, such as GRN_TEXT_*(), are ignored. */
381 
382 #define GRN_TS_WRITER_OUTPUT_HEADER_CASE(TYPE, name)\
383   case GRN_DB_ ## TYPE: {\
384     GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, name);\
385     break;\
386   }
387 /* grn_ts_writer_output_header() outputs names and data types. */
388 static grn_rc
grn_ts_writer_output_header(grn_ctx * ctx,grn_ts_writer * writer)389 grn_ts_writer_output_header(grn_ctx *ctx, grn_ts_writer *writer)
390 {
391   grn_rc rc;
392   GRN_OUTPUT_ARRAY_OPEN("COLUMNS", writer->n_exprs);
393   for (size_t i = 0; i < writer->n_exprs; ++i) {
394     GRN_OUTPUT_ARRAY_OPEN("COLUMN", 2);
395     rc = grn_text_esc(ctx, ctx->impl->output.buf,
396                       writer->names[i].ptr, writer->names[i].size);
397     if (rc != GRN_SUCCESS) {
398       return rc;
399     }
400     GRN_TEXT_PUT(ctx, ctx->impl->output.buf, ",\"", 2);
401     switch (writer->exprs[i]->data_type) {
402       case GRN_DB_VOID: {
403         if (writer->exprs[i]->data_kind == GRN_TS_GEO) {
404           GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "GeoPoint");
405         } else {
406           GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "Void");
407         }
408         break;
409       }
410       GRN_TS_WRITER_OUTPUT_HEADER_CASE(BOOL, "Bool")
411       GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT8, "Int8")
412       GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT16, "Int16")
413       GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT32, "Int32")
414       GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT64, "Int64")
415       GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT8, "UInt8")
416       GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT16, "UInt16")
417       GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT32, "UInt32")
418       GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT64, "UInt64")
419       GRN_TS_WRITER_OUTPUT_HEADER_CASE(FLOAT, "Float")
420       GRN_TS_WRITER_OUTPUT_HEADER_CASE(TIME, "Time")
421       GRN_TS_WRITER_OUTPUT_HEADER_CASE(SHORT_TEXT, "ShortText")
422       GRN_TS_WRITER_OUTPUT_HEADER_CASE(TEXT, "Text")
423       GRN_TS_WRITER_OUTPUT_HEADER_CASE(LONG_TEXT, "LongText")
424       GRN_TS_WRITER_OUTPUT_HEADER_CASE(TOKYO_GEO_POINT, "TokyoGeoPoint")
425       GRN_TS_WRITER_OUTPUT_HEADER_CASE(WGS84_GEO_POINT, "WGS84GeoPoint")
426       default: {
427         char name_buf[GRN_TABLE_MAX_KEY_SIZE];
428         size_t name_size;
429         grn_obj *obj = grn_ctx_at(ctx, writer->exprs[i]->data_type);
430         if (!obj) {
431           GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_ctx_at failed: %d",
432                             writer->exprs[i]->data_type);
433         }
434         if (!grn_ts_obj_is_table(ctx, obj)) {
435           grn_obj_unlink(ctx, obj);
436           GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "not table: %d",
437                             writer->exprs[i]->data_type);
438         }
439         name_size = grn_obj_name(ctx, obj, name_buf, sizeof(name_buf));
440         GRN_TEXT_PUT(ctx, ctx->impl->output.buf, name_buf, name_size);
441         grn_obj_unlink(ctx, obj);
442         break;
443       }
444     }
445     GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, '"');
446     GRN_OUTPUT_ARRAY_CLOSE();
447   }
448   GRN_OUTPUT_ARRAY_CLOSE(); /* COLUMNS. */
449   return GRN_SUCCESS;
450 }
451 #undef GRN_TS_WRITER_OUTPUT_HEADER_CASE
452 
453 #define GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND, kind)\
454   case GRN_TS_ ## KIND: {\
455     grn_ts_ ## kind *value = (grn_ts_ ## kind *)writer->bufs[j].ptr;\
456     grn_ts_ ## kind ## _output(ctx, value[i]);\
457     break;\
458   }
459 #define GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(KIND, kind)\
460   GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND ## _VECTOR, kind ## _vector)
461 /*
462  * grn_ts_writer_output_body() evaluates expressions and outputs the results.
463  */
464 static grn_rc
grn_ts_writer_output_body(grn_ctx * ctx,grn_ts_writer * writer,const grn_ts_record * in,size_t n_in)465 grn_ts_writer_output_body(grn_ctx *ctx, grn_ts_writer *writer,
466                           const grn_ts_record *in, size_t n_in)
467 {
468   size_t i, j, count = 0;
469   writer->bufs = GRN_MALLOCN(grn_ts_buf, writer->n_exprs);
470   if (!writer->bufs) {
471     GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
472                       "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE,
473                       sizeof(grn_ts_buf), writer->n_exprs);
474   }
475   for (i = 0; i < writer->n_exprs; i++) {
476     grn_ts_buf_init(ctx, &writer->bufs[i]);
477   }
478   while (count < n_in) {
479     size_t batch_size = GRN_TS_BATCH_SIZE;
480     if (batch_size > (n_in - count)) {
481       batch_size = n_in - count;
482     }
483     for (i = 0; i < writer->n_exprs; ++i) {
484       grn_rc rc = grn_ts_expr_evaluate_to_buf(ctx, writer->exprs[i], in + count,
485                                               batch_size, &writer->bufs[i]);
486       if (rc != GRN_SUCCESS) {
487         return rc;
488       }
489     }
490     for (i = 0; i < batch_size; ++i) {
491       GRN_OUTPUT_ARRAY_OPEN("HIT", writer->n_exprs);
492       for (j = 0; j < writer->n_exprs; ++j) {
493         if (j) {
494           GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, ',');
495         }
496         switch (writer->exprs[j]->data_kind) {
497           GRN_TS_WRITER_OUTPUT_BODY_CASE(BOOL, bool);
498           GRN_TS_WRITER_OUTPUT_BODY_CASE(INT, int);
499           GRN_TS_WRITER_OUTPUT_BODY_CASE(FLOAT, float);
500           GRN_TS_WRITER_OUTPUT_BODY_CASE(TIME, time);
501           GRN_TS_WRITER_OUTPUT_BODY_CASE(TEXT, text);
502           GRN_TS_WRITER_OUTPUT_BODY_CASE(GEO, geo);
503           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(BOOL, bool);
504           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(INT, int);
505           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(FLOAT, float);
506           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TIME, time);
507           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TEXT, text);
508           GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(GEO, geo);
509           default: {
510             break;
511           }
512         }
513       }
514       GRN_OUTPUT_ARRAY_CLOSE(); /* HITS. */
515     }
516     count += batch_size;
517   }
518   return GRN_SUCCESS;
519 }
520 #undef GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE
521 #undef GRN_TS_WRITER_OUTPUT_BODY_CASE
522 
523 /* grn_ts_writer_output() outputs search results into the output buffer. */
524 static grn_rc
grn_ts_writer_output(grn_ctx * ctx,grn_ts_writer * writer,const grn_ts_record * in,size_t n_in,size_t n_hits)525 grn_ts_writer_output(grn_ctx *ctx, grn_ts_writer *writer,
526                      const grn_ts_record *in, size_t n_in, size_t n_hits)
527 {
528   grn_rc rc;
529   GRN_OUTPUT_ARRAY_OPEN("RESULT", 1);
530   GRN_OUTPUT_ARRAY_OPEN("RESULTSET", 2 + n_in);
531   GRN_OUTPUT_ARRAY_OPEN("NHITS", 1);
532   rc = grn_text_ulltoa(ctx, ctx->impl->output.buf, n_hits);
533   if (rc != GRN_SUCCESS) {
534     return rc;
535   }
536   GRN_OUTPUT_ARRAY_CLOSE(); /* NHITS. */
537   rc = grn_ts_writer_output_header(ctx, writer);
538   if (rc != GRN_SUCCESS) {
539     return rc;
540   }
541   rc = grn_ts_writer_output_body(ctx, writer, in, n_in);
542   if (rc != GRN_SUCCESS) {
543     return rc;
544   }
545   GRN_OUTPUT_ARRAY_CLOSE(); /* RESULTSET. */
546   GRN_OUTPUT_ARRAY_CLOSE(); /* RESET. */
547   return GRN_SUCCESS;
548 }
549 
550 /* grn_ts_select_filter() applies a filter to all the records of a table. */
551 static grn_rc
grn_ts_select_filter(grn_ctx * ctx,grn_obj * table,grn_ts_str str,size_t offset,size_t limit,grn_ts_record ** out,size_t * n_out,size_t * n_hits)552 grn_ts_select_filter(grn_ctx *ctx, grn_obj *table, grn_ts_str str,
553                      size_t offset, size_t limit,
554                      grn_ts_record **out, size_t *n_out, size_t *n_hits)
555 {
556   grn_rc rc;
557   grn_table_cursor *cursor_obj;
558   grn_ts_cursor *cursor;
559   grn_ts_expr *expr = NULL;
560   grn_ts_record *buf = NULL;
561   size_t buf_size = 0;
562 
563   *out = NULL;
564   *n_out = 0;
565   *n_hits = 0;
566 
567   cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1,
568                                      GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID);
569   if (!cursor_obj) {
570     return (ctx->rc != GRN_SUCCESS) ? ctx->rc : GRN_UNKNOWN_ERROR;
571   }
572   rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor);
573   if (rc != GRN_SUCCESS) {
574     grn_obj_close(ctx, cursor_obj);
575     return rc;
576   }
577 
578   if (str.size) {
579     rc = grn_ts_expr_parse(ctx, table, str, &expr);
580   }
581   if (rc == GRN_SUCCESS) {
582     for ( ; ; ) {
583       size_t batch_size;
584       grn_ts_record *batch;
585 
586       /* Extend the record buffer. */
587       if (buf_size < (*n_out + GRN_TS_BATCH_SIZE)) {
588         size_t new_size = buf_size ? (buf_size * 2) : GRN_TS_BATCH_SIZE;
589         size_t n_bytes = sizeof(grn_ts_record) * new_size;
590         grn_ts_record *new_buf = (grn_ts_record *)GRN_REALLOC(buf, n_bytes);
591         if (!new_buf) {
592           GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE,
593                      "GRN_REALLOC failed: %" GRN_FMT_SIZE,
594                      n_bytes);
595           rc = ctx->rc;
596           break;
597         }
598         buf = new_buf;
599         buf_size = new_size;
600       }
601 
602       /* Read records from the cursor. */
603       batch = buf + *n_out;
604       rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE,
605                               &batch_size);
606       if ((rc != GRN_SUCCESS) || !batch_size) {
607         break;
608       }
609 
610       /* Apply the filter. */
611       if (expr) {
612         rc = grn_ts_expr_filter(ctx, expr, batch, batch_size,
613                                 batch, &batch_size);
614         if (rc != GRN_SUCCESS) {
615           break;
616         }
617       }
618       *n_hits += batch_size;
619 
620       /* Apply the offset and the limit. */
621       if (offset) {
622         if (batch_size <= offset) {
623           offset -= batch_size;
624           batch_size = 0;
625         } else {
626           size_t n_bytes = sizeof(grn_ts_record) * (batch_size - offset);
627           grn_memmove(batch, batch + offset, n_bytes);
628           batch_size -= offset;
629           offset = 0;
630         }
631       }
632       if (batch_size <= limit) {
633         limit -= batch_size;
634       } else {
635         batch_size = limit;
636         limit = 0;
637       }
638       *n_out += batch_size;
639     }
640     /* Ignore a failure of destruction. */
641     if (expr) {
642       grn_ts_expr_close(ctx, expr);
643     }
644   }
645   /* Ignore a failure of  destruction. */
646   grn_ts_cursor_close(ctx, cursor);
647 
648   if (rc != GRN_SUCCESS) {
649     if (buf) {
650       GRN_FREE(buf);
651     }
652     *n_out = 0;
653     *n_hits = 0;
654     return rc;
655   }
656   *out = buf;
657   return GRN_SUCCESS;
658 }
659 
660 /* grn_ts_select_scorer() adjust scores. */
661 static grn_rc
grn_ts_select_scorer(grn_ctx * ctx,grn_obj * table,grn_ts_str str,grn_ts_record * records,size_t n_records)662 grn_ts_select_scorer(grn_ctx *ctx, grn_obj *table, grn_ts_str str,
663                      grn_ts_record *records, size_t n_records)
664 {
665   grn_rc rc;
666   grn_ts_str rest;
667   grn_ts_expr *expr;
668   rest = grn_ts_str_trim_score_assignment(str);
669   if (!rest.size) {
670     return GRN_SUCCESS;
671   }
672   rc = grn_ts_expr_parse(ctx, table, rest, &expr);
673   if (rc != GRN_SUCCESS) {
674     return rc;
675   }
676   rc = grn_ts_expr_adjust(ctx, expr, records, n_records);
677   grn_ts_expr_close(ctx, expr);
678   return rc;
679 }
680 
681 /* grn_ts_select_output() outputs the results. */
682 static grn_rc
grn_ts_select_output(grn_ctx * ctx,grn_obj * table,grn_ts_str str,const grn_ts_record * in,size_t n_in,size_t n_hits)683 grn_ts_select_output(grn_ctx *ctx, grn_obj *table, grn_ts_str str,
684                      const grn_ts_record *in, size_t n_in, size_t n_hits)
685 {
686   grn_ts_writer *writer= 0;
687   grn_rc rc = grn_ts_writer_open(ctx, table, str, &writer);
688   if (rc != GRN_SUCCESS) {
689     return rc;
690   }
691   rc = grn_ts_writer_output(ctx, writer, in, n_in, n_hits);
692   grn_ts_writer_close(ctx, writer);
693   return rc;
694 }
695 
696 /* grn_ts_select_with_sortby() executes a select command with --sortby. */
697 static grn_rc
grn_ts_select_with_sortby(grn_ctx * ctx,grn_obj * table,grn_ts_str filter,grn_ts_str scorer,grn_ts_str sortby,grn_ts_str output_columns,size_t offset,size_t limit)698 grn_ts_select_with_sortby(grn_ctx *ctx, grn_obj *table,
699                           grn_ts_str filter, grn_ts_str scorer,
700                           grn_ts_str sortby, grn_ts_str output_columns,
701                           size_t offset, size_t limit)
702 {
703   grn_rc rc;
704   grn_ts_record *recs = NULL;
705   size_t n_recs = 0, max_n_recs = 0, n_hits = 0;
706   grn_table_cursor *cursor_obj;
707   grn_ts_cursor *cursor = NULL;
708   grn_ts_expr *filter_expr = NULL;
709   grn_ts_expr *scorer_expr = NULL;
710   grn_ts_sorter *sorter = NULL;
711   cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1,
712                                      GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID);
713   if (!cursor_obj) {
714     GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_table_cursor_open failed");
715   }
716   rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor);
717   if (rc != GRN_SUCCESS) {
718     grn_obj_close(ctx, cursor_obj);
719     return rc;
720   }
721   if (filter.size) {
722     rc = grn_ts_expr_parse(ctx, table, filter, &filter_expr);
723   }
724   if (rc == GRN_SUCCESS) {
725     scorer = grn_ts_str_trim_score_assignment(scorer);
726     if (scorer.size) {
727       rc = grn_ts_expr_parse(ctx, table, scorer, &scorer_expr);
728     }
729     if (rc == GRN_SUCCESS) {
730       rc = grn_ts_sorter_parse(ctx, table, sortby, offset, limit, &sorter);
731     }
732   }
733   if (rc == GRN_SUCCESS) {
734     size_t n_pending_recs = 0;
735     for ( ; ; ) {
736       size_t batch_size;
737       grn_ts_record *batch;
738       /* Extend a buffer for records. */
739       if (max_n_recs < (n_recs + GRN_TS_BATCH_SIZE)) {
740         size_t n_bytes, new_max_n_recs = max_n_recs * 2;
741         grn_ts_record *new_recs;
742         if (!new_max_n_recs) {
743           new_max_n_recs = GRN_TS_BATCH_SIZE;
744         }
745         n_bytes = sizeof(grn_ts_record) * new_max_n_recs;
746         new_recs = (grn_ts_record *)GRN_REALLOC(recs, n_bytes);
747         if (!new_recs) {
748           GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE,
749                      "GRN_REALLOC failed: %" GRN_FMT_SIZE,
750                      n_bytes);
751           rc = ctx->rc;
752           break;
753         }
754         recs = new_recs;
755         max_n_recs = new_max_n_recs;
756       }
757       /* Read records from a cursor. */
758       batch = recs + n_recs;
759       rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE,
760                               &batch_size);
761       if (rc != GRN_SUCCESS) {
762         break;
763       } else if (!batch_size) {
764         /* Apply a scorer and complete sorting. */
765         if (scorer_expr) {
766           rc = grn_ts_expr_adjust(ctx, scorer_expr,
767                                   recs + n_recs - n_pending_recs,
768                                   n_pending_recs);
769           if (rc != GRN_SUCCESS) {
770             break;
771           }
772         }
773         if (n_pending_recs) {
774           rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs);
775           if (rc != GRN_SUCCESS) {
776             break;
777           }
778         }
779         rc = grn_ts_sorter_complete(ctx, sorter, recs, n_recs, &n_recs);
780         break;
781       }
782       /* Apply a filter. */
783       if (filter_expr) {
784         rc = grn_ts_expr_filter(ctx, filter_expr, batch, batch_size,
785                                 batch, &batch_size);
786         if (rc != GRN_SUCCESS) {
787           break;
788         }
789       }
790       n_hits += batch_size;
791       n_recs += batch_size;
792       n_pending_recs += batch_size;
793       /*
794        * Apply a scorer and progress sorting if there are enough pending
795        * records.
796        */
797       if (n_pending_recs >= GRN_TS_BATCH_SIZE) {
798         if (scorer_expr) {
799           rc = grn_ts_expr_adjust(ctx, scorer_expr,
800                                   recs + n_recs - n_pending_recs,
801                                   n_pending_recs);
802           if (rc != GRN_SUCCESS) {
803             break;
804           }
805         }
806         rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs);
807         if (rc != GRN_SUCCESS) {
808           break;
809         }
810         n_pending_recs = 0;
811       }
812     }
813   }
814   if (rc == GRN_SUCCESS) {
815     rc = grn_ts_select_output(ctx, table, output_columns,
816                               recs, n_recs, n_hits);
817   }
818   if (cursor) {
819     grn_ts_cursor_close(ctx, cursor);
820   }
821   if (recs) {
822     GRN_FREE(recs);
823   }
824   if (sorter) {
825     grn_ts_sorter_close(ctx, sorter);
826   }
827   if (scorer_expr) {
828     grn_ts_expr_close(ctx, scorer_expr);
829   }
830   if (filter_expr) {
831     grn_ts_expr_close(ctx, filter_expr);
832   }
833   return rc;
834 }
835 
836 /*
837  * grn_ts_select_without_sortby() executes a select command without --sortby.
838  */
839 static grn_rc
grn_ts_select_without_sortby(grn_ctx * ctx,grn_obj * table,grn_ts_str filter,grn_ts_str scorer,grn_ts_str output_columns,size_t offset,size_t limit)840 grn_ts_select_without_sortby(grn_ctx *ctx, grn_obj *table,
841                              grn_ts_str filter, grn_ts_str scorer,
842                              grn_ts_str output_columns,
843                              size_t offset, size_t limit)
844 {
845   grn_rc rc;
846   grn_ts_record *records = NULL;
847   size_t n_records, n_hits;
848   rc = grn_ts_select_filter(ctx, table, filter, offset, limit,
849                             &records, &n_records, &n_hits);
850   if (rc == GRN_SUCCESS) {
851     rc = grn_ts_select_scorer(ctx, table, scorer, records, n_records);
852     if (rc == GRN_SUCCESS) {
853       rc = grn_ts_select_output(ctx, table, output_columns,
854                                 records, n_records, n_hits);
855     }
856   }
857   if (records) {
858     GRN_FREE(records);
859   }
860   return rc;
861 }
862 
863 /*-------------------------------------------------------------
864  * API.
865  */
866 
867 grn_rc
grn_ts_select(grn_ctx * ctx,grn_obj * table,const char * filter_ptr,size_t filter_len,const char * scorer_ptr,size_t scorer_len,const char * sortby_ptr,size_t sortby_len,const char * output_columns_ptr,size_t output_columns_len,size_t offset,size_t limit)868 grn_ts_select(grn_ctx *ctx, grn_obj *table,
869               const char *filter_ptr, size_t filter_len,
870               const char *scorer_ptr, size_t scorer_len,
871               const char *sortby_ptr, size_t sortby_len,
872               const char *output_columns_ptr, size_t output_columns_len,
873               size_t offset, size_t limit)
874 {
875   grn_rc rc;
876   grn_ts_str filter = { filter_ptr, filter_len };
877   grn_ts_str scorer = { scorer_ptr, scorer_len };
878   grn_ts_str sortby = { sortby_ptr, sortby_len };
879   grn_ts_str output_columns = { output_columns_ptr, output_columns_len };
880   if (!ctx) {
881     return GRN_INVALID_ARGUMENT;
882   }
883   if (!table || !grn_ts_obj_is_table(ctx, table) ||
884       (!filter_ptr && filter_len) || (!scorer_ptr && scorer_len) ||
885       (!sortby_ptr && sortby_len) ||
886       (!output_columns_ptr && output_columns_len)) {
887     GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument");
888   }
889   filter = grn_ts_str_trim_left(filter);
890   if (sortby_len) {
891     rc = grn_ts_select_with_sortby(ctx, table, filter, scorer, sortby,
892                                    output_columns, offset, limit);
893   } else {
894     rc = grn_ts_select_without_sortby(ctx, table, filter, scorer,
895                                       output_columns, offset, limit);
896   }
897   if (rc != GRN_SUCCESS) {
898     GRN_BULK_REWIND(ctx->impl->output.buf);
899     if ((ctx->rc == GRN_SUCCESS) || !ctx->errbuf[0]) {
900       ERR(rc, "error message is missing");
901     } else if (ctx->errlvl < GRN_LOG_ERROR) {
902       ctx->errlvl = GRN_LOG_ERROR;
903     }
904   }
905   return rc;
906 }
907