1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2013-2015 Couchbase, Inc.
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 #if defined(__GNUC__)
18 #define JSONSL_API static __attribute__((unused))
19 #elif defined(_MSC_VER)
20 #define JSONSL_API static __inline
21 #else
22 #define JSONSL_API static
23 #endif
24 #include "contrib/jsonsl/jsonsl.c"
25 #include "contrib/lcb-jsoncpp/lcb-jsoncpp.h"
26 #include "parser.h"
27 
28 #define DECLARE_JSONSL_CALLBACK(name) \
29 static void name(jsonsl_t,jsonsl_action_t,struct jsonsl_state_st*,const char*)
30 
31 
32 DECLARE_JSONSL_CALLBACK(row_pop_callback);
33 DECLARE_JSONSL_CALLBACK(initial_push_callback);
34 DECLARE_JSONSL_CALLBACK(initial_pop_callback);
35 DECLARE_JSONSL_CALLBACK(meta_header_complete_callback);
36 DECLARE_JSONSL_CALLBACK(trailer_pop_callback);
37 
38 using namespace lcb::jsparse;
39 
40 /* conform to void */
41 #define JOBJ_RESPONSE_ROOT (void*)1
42 #define JOBJ_ROWSET (void*)2
43 
44 template <typename T>
45 void NORMALIZE_OFFSETS(const char *& buf, T& len) {
46     buf++;
47     len--;
48 }
49 
50 /**
51  * Gets a buffer, given an (absolute) position offset.
52  * It will try to get a buffer of size desired. The actual size is
53  * returned in 'actual' (and may be less than desired, maybe even 0)
54  */
55 const char * Parser::get_buffer_region(size_t pos, size_t desired, size_t *actual)
56 {
57     const char *ret = current_buf.c_str() + pos - min_pos;
58     const char *end = current_buf.c_str() + current_buf.size();
59     *actual = end - ret;
60 
61     if (min_pos > pos) {
62         /* swallowed */
63         *actual = 0;
64         return NULL;
65     }
66 
67     lcb_assert(ret < end);
68     if (desired < *actual) {
69         *actual = desired;
70     }
71     return ret;
72 }
73 
74 /**
75  * Consolidate the meta data into a single parsable string..
76  */
77 void Parser::combine_meta() {
78     const char *meta_trailer;
79     size_t ntrailer;
80 
81     if (meta_complete) {
82         return;
83     }
84 
85     lcb_assert(header_len <= meta_buf.size());
86 
87     /* Adjust the length for the first portion */
88     meta_buf.resize(header_len);
89 
90     /* Append any trailing data */
91     meta_trailer = get_buffer_region(last_row_endpos, -1, &ntrailer);
92     meta_buf.append(meta_trailer, ntrailer);
93     meta_complete = 1;
94 }
95 
96 static Parser * get_ctx(jsonsl_t jsn) {
97     return reinterpret_cast<Parser*>(jsn->data);
98 }
99 
100 static void
101 meta_header_complete_callback(jsonsl_t jsn, jsonsl_action_t,
102     struct jsonsl_state_st *state, const jsonsl_char_t *)
103 {
104     Parser *ctx = get_ctx(jsn);
105     ctx->meta_buf.append(ctx->current_buf.c_str(), state->pos_begin);
106 
107     ctx->header_len = state->pos_begin;
108     jsn->action_callback_PUSH = NULL;
109 }
110 
111 
112 static void
113 row_pop_callback(jsonsl_t jsn, jsonsl_action_t,
114     struct jsonsl_state_st *state, const jsonsl_char_t *)
115 {
116     Parser *ctx = get_ctx(jsn);
117     const char *rowbuf;
118     size_t szdummy;
119 
120     if (ctx->have_error) {
121         return;
122     }
123 
124     ctx->keep_pos = jsn->pos;
125     ctx->last_row_endpos = jsn->pos;
126 
127     if (state->data == JOBJ_ROWSET) {
128         /** The closing ] of "rows" : [ ... ] */
129         if (ctx->mode == Parser::MODE_ANALYTICS_DEFERRED) {
130             if (ctx->keep_pos > ctx->min_pos) {
131                 ctx->current_buf.erase(0, ctx->keep_pos - ctx->min_pos);
132                 ctx->min_pos = ctx->keep_pos;
133             }
134             ctx->meta_buf.append(ctx->current_buf);
135             ctx->header_len = jsn->pos;
136             ctx->meta_complete = 1;
137             if (ctx->actions) {
138                 ctx->actions->JSPARSE_on_complete(ctx->meta_buf);
139                 ctx->actions = NULL;
140             }
141             return;
142         }
143 
144         jsn->action_callback_POP = trailer_pop_callback;
145         jsn->action_callback_PUSH = NULL;
146         if (ctx->rowcount == 0) {
147             /* Emulate what meta_header_complete callback does. */
148 
149             /* While the entire meta is available to us, the _closing_ part
150              * of the meta is handled in a different callback. */
151             ctx->meta_buf.append(ctx->current_buf.c_str(), jsn->pos);
152             ctx->header_len = jsn->pos;
153         }
154         return;
155     }
156 
157     ctx->rowcount++;
158     if (!ctx->actions) {
159         return;
160     }
161 
162     rowbuf = ctx->get_buffer_region(state->pos_begin, -1, &szdummy);
163     Row dt = {{0}};
164     dt.row.iov_base = (void *)rowbuf;
165     dt.row.iov_len = jsn->pos - state->pos_begin + 1;
166     ctx->actions->JSPARSE_on_row(dt);
167 }
168 
169 static int
170 parse_error_callback(jsonsl_t jsn, jsonsl_error_t,
171     struct jsonsl_state_st *, jsonsl_char_t *)
172 {
173     Parser *ctx = get_ctx(jsn);
174     ctx->have_error = 1;
175 
176     /* invoke the callback */
177     if (ctx->actions) {
178         ctx->actions->JSPARSE_on_error(ctx->current_buf);
179         ctx->actions = NULL;
180     }
181     return 0;
182 }
183 
184 static void
185 trailer_pop_callback(jsonsl_t jsn, jsonsl_action_t,
186     struct jsonsl_state_st *state, const jsonsl_char_t *)
187 {
188     Parser *ctx = get_ctx(jsn);
189 
190     if (state->data != JOBJ_RESPONSE_ROOT) {
191         return;
192     }
193     ctx->combine_meta();
194     if (ctx->actions) {
195         ctx->actions->JSPARSE_on_complete(ctx->meta_buf);
196         ctx->actions = NULL;
197     }
198 }
199 
200 static void
201 initial_pop_callback(jsonsl_t jsn, jsonsl_action_t,
202     struct jsonsl_state_st *state, const jsonsl_char_t *)
203 {
204     Parser *ctx = get_ctx(jsn);
205     unsigned long len;
206 
207     if (ctx->have_error) {
208         return;
209     }
210     if (JSONSL_STATE_IS_CONTAINER(state)) {
211         return;
212     }
213     if (state->type != JSONSL_T_HKEY) {
214         return;
215     }
216 
217     const char *key = ctx->current_buf.c_str() + state->pos_begin;
218     len = jsn->pos - state->pos_begin;
219     NORMALIZE_OFFSETS(key, len);
220     ctx->last_hk.assign(key, len);
221 }
222 
223 /**
224  * This is called for the first few tokens, where we are still searching
225  * for the row set.
226  */
227 static void
228 initial_push_callback(jsonsl_t jsn, jsonsl_action_t,
229     struct jsonsl_state_st *state, const jsonsl_char_t *)
230 {
231     Parser *ctx = (Parser*)jsn->data;
232     jsonsl_jpr_match_t match = JSONSL_MATCH_UNKNOWN;
233 
234     if (ctx->have_error) {
235         return;
236     }
237 
238     if (JSONSL_STATE_IS_CONTAINER(state)) {
239         jsonsl_jpr_match_state(jsn, state, ctx->last_hk.c_str(), ctx->last_hk.size(),
240             &match);
241     }
242     ctx->last_hk.clear();
243 
244     if (ctx->mode == Parser::MODE_ANALYTICS_DEFERRED) {
245         ctx->initialized = 1;
246     }
247 
248     if (ctx->initialized == 0) {
249         if (state->type != JSONSL_T_OBJECT) {
250             ctx->have_error = 1;
251             return;
252         }
253 
254         if (match != JSONSL_MATCH_POSSIBLE) {
255             ctx->have_error = 1;
256             return;
257         }
258         /* tag the state */
259         state->data = JOBJ_RESPONSE_ROOT;
260         ctx->initialized = 1;
261         return;
262     }
263 
264     if (state->type == JSONSL_T_LIST && match == JSONSL_MATCH_POSSIBLE) {
265         /* we have a match, e.g. "rows:[]" */
266         jsn->action_callback_POP = row_pop_callback;
267         jsn->action_callback_PUSH = meta_header_complete_callback;
268         state->data = JOBJ_ROWSET;
269     }
270 }
271 
272 void Parser::feed(const char *data_, size_t ndata)
273 {
274     size_t old_len = current_buf.size();
275     current_buf.append(data_, ndata);
276     jsonsl_feed(jsn, current_buf.c_str() + old_len, ndata);
277 
278     /* Do we need to cut off some bytes? */
279     if (keep_pos > min_pos) {
280         current_buf.erase(0, keep_pos - min_pos);
281     }
282 
283     min_pos = keep_pos;
284 }
285 
286 const char* Parser::jprstr_for_mode(Mode mode) {
287     switch (mode) {
288     case MODE_VIEWS:
289         return "/rows/^";
290     case MODE_N1QL:
291     case MODE_ANALYTICS:
292         return "/results/^";
293     case MODE_ANALYTICS_DEFERRED:
294         return "/^";
295     case MODE_FTS:
296         return "/hits/^";
297     default:
298         lcb_assert(0 && "Invalid mode passed!");
299         return "/";
300     }
301 }
302 
303 Parser::Parser(Mode mode_, Parser::Actions* actions_) :
304     jsn(jsonsl_new(512)),
305     jsn_rdetails(jsonsl_new(32)),
306     jpr(jsonsl_jpr_new(jprstr_for_mode(mode_), NULL)),
307     mode(mode_),
308     have_error(0),
309     initialized(0),
310     meta_complete(0),
311     rowcount(0),
312     min_pos(0),
313     keep_pos(0),
314     header_len(0),
315     last_row_endpos(0),
316     cxx_data(),
317     actions(actions_) {
318 
319     jsonsl_jpr_match_state_init(jsn, &jpr, 1);
320     jsonsl_reset(jsn);
321     jsonsl_reset(jsn_rdetails);
322     current_buf.clear();
323     meta_buf.clear();
324     last_hk.clear();
325 
326     /* Initially all callbacks are enabled so that we can search for the
327      * rows array. */
328     jsn->action_callback_POP = initial_pop_callback;
329     jsn->action_callback_PUSH = initial_push_callback;
330     jsn->error_callback = parse_error_callback;
331     if (mode == MODE_ANALYTICS_DEFERRED) {
332         jsn->max_callback_level = 3;
333     } else {
334         jsn->max_callback_level = 4;
335     }
336     jsn->data = this;
337     jsonsl_enable_all_callbacks(jsn);
338 }
339 
340 void Parser::get_postmortem(lcb_IOV &out) const {
341     if (meta_complete) {
342         out.iov_base = const_cast<char*>(meta_buf.c_str());
343         out.iov_len = meta_buf.size();
344     } else {
345         out.iov_base = const_cast<char*>(current_buf.c_str());
346         out.iov_len = current_buf.size();
347     }
348 }
349 
350 Parser::~Parser() {
351     jsonsl_jpr_match_state_cleanup(jsn);
352     jsonsl_destroy(jsn);
353     jsonsl_destroy(jsn_rdetails);
354     jsonsl_jpr_destroy(jpr);
355 }
356 
357 typedef struct {
358     const char *root;
359     lcb_IOV *next_iov;
360     Row *datum;
361     Parser *parent;
362 } miniparse_ctx;
363 
364 static void
365 parse_json_docid(lcb_IOV* iov, Parser *parent)
366 {
367     Json::Reader r;
368     const char *s = static_cast<char*>(iov->iov_base);
369     const char *s_end = s + iov->iov_len;
370     Json::Value& jvp = parent->cxx_data;
371     bool rv = r.parse(s, s_end, jvp);
372     if (!rv) {
373         // fprintf(stderr, "libcouchbase: Failed to parse document ID as JSON!\n");
374         return;
375     }
376 
377     s = NULL;
378     s_end = NULL;
379 
380     lcb_assert(jvp.isString());
381 
382     // Re-use s and s_end values for the string value itself
383     if (!jvp.getString(&s, &s_end)) {
384         // fprintf(stderr, "libcouchbase: couldn't get string value!\n");
385         iov->iov_base = NULL;
386         iov->iov_len = 0;
387     }
388     iov->iov_base = const_cast<char*>(s);
389     iov->iov_len = s_end - s;
390 }
391 
392 static void
393 miniparse_callback(jsonsl_t jsn, jsonsl_action_t,
394     struct jsonsl_state_st *state, const jsonsl_char_t *at)
395 {
396     miniparse_ctx *ctx = reinterpret_cast<miniparse_ctx*>(jsn->data);
397     lcb_IOV *iov;
398 
399     if (state->level == 1) {
400         return;
401     }
402 
403     /* Is a hashkey? */
404     if (state->type == JSONSL_T_HKEY) {
405         size_t nhk = state->pos_cur - state->pos_begin;
406 
407         nhk--;
408 
409         #define IS_ROWFIELD(s) \
410             (nhk == sizeof(s)-1 && !strncmp(s, at- (sizeof(s)-1) , sizeof(s)-1) )
411 
412         if (IS_ROWFIELD("id")) {
413             /* "id" */
414             ctx->next_iov = &ctx->datum->docid;
415         } else if (IS_ROWFIELD("key")) {
416             /* "key" */
417             ctx->next_iov = &ctx->datum->key;
418         } else if (IS_ROWFIELD("value")) {
419             /* "value" */
420             ctx->next_iov = &ctx->datum->value;
421         } else if (IS_ROWFIELD("geometry")) {
422             ctx->next_iov = &ctx->datum->geo;
423         } else {
424             ctx->next_iov = NULL;
425         }
426         #undef IS_ROWFIELD
427         return;
428     }
429 
430     if (ctx->next_iov == NULL) {
431         return;
432     }
433 
434     iov = ctx->next_iov;
435 
436     if (JSONSL_STATE_IS_CONTAINER(state)) {
437         iov->iov_base = (void *) (ctx->root + state->pos_begin);
438         iov->iov_len = (jsn->pos - state->pos_begin) + 1;
439     } else if (iov == &ctx->datum->docid) {
440         if (state->nescapes) {
441             iov->iov_base = (void *) (ctx->root + state->pos_begin);
442             iov->iov_len = (state->pos_cur - state->pos_begin) + 1;
443             parse_json_docid(iov, ctx->parent);
444         } else {
445             iov->iov_base = (void *) (ctx->root + state->pos_begin + 1);
446             iov->iov_len = (state->pos_cur - state->pos_begin) - 1;
447         }
448     } else {
449         iov->iov_base = (void *) (ctx->root + state->pos_begin);
450         iov->iov_len = state->pos_cur - state->pos_begin;
451         if (state->type == JSONSL_T_STRING) {
452             iov->iov_len++;
453         }
454     }
455 }
456 
457 void Parser::parse_viewrow(Row &vr) {
458     miniparse_ctx ctx = { NULL };
459     ctx.datum = &vr;
460     ctx.root = static_cast<const char*>(vr.row.iov_base);
461     ctx.parent = this;
462 
463     jsonsl_reset(jsn_rdetails);
464 
465     jsonsl_enable_all_callbacks(jsn_rdetails);
466     jsn_rdetails->max_callback_level = 3;
467     jsn_rdetails->action_callback_POP = miniparse_callback;
468     jsn_rdetails->data = &ctx;
469 
470     jsonsl_feed(jsn_rdetails,
471         static_cast<const char*>(vr.row.iov_base), vr.row.iov_len);
472 }
473