1 /* Must be defined before including Perl header files or we slow down by 2x! */
2 #define PERL_NO_GET_CONTEXT
3
4 #define NEED_newSV_type
5 #include "EXTERN.h"
6 #include "perl.h"
7 #include "XSUB.h"
8
9 #include "ppport.h"
10
11 #include "srl_common.h"
12 #include "srl_decoder.h"
13 #include "srl_protocol.h"
14
15 #ifndef GvCV_set
16 # define GvCV_set(gv, cv) (GvCV(gv) = (cv))
17 #endif
18
19 #ifndef PERL_ARGS_ASSERT_CROAK_XS_USAGE
20 #define PERL_ARGS_ASSERT_CROAK_XS_USAGE assert(cv); assert(params)
21
22 /* prototype to pass -Wmissing-prototypes */
23 STATIC void
24 S_croak_xs_usage(pTHX_ const CV *const cv, const char *const params);
25
26 STATIC void
S_croak_xs_usage(pTHX_ const CV * const cv,const char * const params)27 S_croak_xs_usage(pTHX_ const CV *const cv, const char *const params)
28 {
29 const GV *const gv = CvGV(cv);
30
31 PERL_ARGS_ASSERT_CROAK_XS_USAGE;
32
33 if (gv) {
34 const char *const gvname = GvNAME(gv);
35 const HV *const stash = GvSTASH(gv);
36 const char *const hvname = stash ? HvNAME(stash) : NULL;
37
38 if (hvname)
39 Perl_croak_nocontext("Usage: %s::%s(%s)", hvname, gvname, params);
40 else
41 Perl_croak_nocontext("Usage: %s(%s)", gvname, params);
42 } else {
43 /* Pants. I don't think that it should be possible to get here. */
44 Perl_croak_nocontext("Usage: CODE(0x%"UVxf")(%s)", PTR2UV(cv), params);
45 }
46 }
47
48 #ifdef PERL_IMPLICIT_CONTEXT
49 #define croak_xs_usage(a,b) S_croak_xs_usage(aTHX_ a,b)
50 #else
51 #define croak_xs_usage S_croak_xs_usage
52 #endif
53
54 #endif
55
56
57 #if defined(cv_set_call_checker) && defined(XopENTRY_set)
58 # define USE_CUSTOM_OPS 1
59 #else
60 # define USE_CUSTOM_OPS 0
61 #endif
62
63 #define OPOPT_DO_BODY (1<<0)
64 #define OPOPT_DO_HEADER (1<<1)
65 #define OPOPT_OFFSET (1<<2)
66 #define OPOPT_OUTARG_BODY (1<<3)
67 #define OPOPT_OUTARG_HEADER (1<<4)
68 #define OPOPT_LOOKS_LIKE (1<<5)
69
70 #define pp1_sereal_decode(opopt) THX_pp1_sereal_decode(aTHX_ opopt)
71 static void
THX_pp1_sereal_decode(pTHX_ U8 opopt)72 THX_pp1_sereal_decode(pTHX_ U8 opopt)
73 {
74 bool need_retvalue = GIMME_V != G_VOID;
75 SV *decoder_ref_sv, *decoder_sv, *src_sv;
76 UV offset;
77 SV *body_into, *header_into;
78 srl_decoder_t *decoder;
79 char *stash_name;
80 dSP;
81
82 header_into = expect_false(opopt & OPOPT_OUTARG_HEADER)
83 ? POPs
84 : expect_false(opopt & OPOPT_DO_HEADER) ? sv_newmortal() : NULL;
85 body_into = expect_false(opopt & OPOPT_OUTARG_BODY)
86 ? POPs
87 : expect_true(opopt & OPOPT_DO_BODY) ? sv_newmortal() : NULL;
88
89 offset = expect_false(opopt & OPOPT_OFFSET) ? SvUVx(POPs) : 0;
90 src_sv = POPs;
91 decoder_ref_sv = POPs;
92 PUTBACK;
93
94 if (!expect_true(
95 decoder_ref_sv &&
96 SvROK(decoder_ref_sv) &&
97 (decoder_sv = SvRV(decoder_ref_sv)) &&
98 SvOBJECT(decoder_sv) &&
99 (stash_name = HvNAME(SvSTASH(decoder_sv))) &&
100 !strcmp(stash_name, "Sereal::Decoder")
101 ))
102 {
103 croak("handle is not a Sereal::Decoder handle");
104 }
105
106 decoder = (srl_decoder_t *)SvIV(decoder_sv);
107 if (expect_true(opopt & OPOPT_DO_BODY)) {
108 if (opopt & OPOPT_DO_HEADER) {
109 srl_decode_all_into(aTHX_ decoder, src_sv, header_into,
110 body_into, offset);
111 } else {
112 srl_decode_into(aTHX_ decoder, src_sv, body_into, offset);
113 }
114 } else {
115 srl_decode_header_into(aTHX_ decoder, src_sv, header_into, offset);
116 }
117
118 if (expect_true(need_retvalue)) {
119 SV *retvalue;
120 if (expect_true(opopt & OPOPT_DO_BODY)) {
121 if (opopt & OPOPT_DO_HEADER) {
122 AV *retav = newAV();
123 retvalue = newRV_noinc((SV*)retav);
124 sv_2mortal(retvalue);
125 av_extend(retav, 1);
126 av_store(retav, 0, SvREFCNT_inc(header_into));
127 av_store(retav, 1, SvREFCNT_inc(body_into));
128 } else {
129 retvalue = body_into;
130 }
131 } else {
132 retvalue = header_into;
133 }
134 SPAGAIN;
135 XPUSHs(retvalue);
136 PUTBACK;
137 }
138 }
139
140 #define pp1_looks_like_sereal() THX_pp1_looks_like_sereal(aTHX)
141 static void
THX_pp1_looks_like_sereal(pTHX)142 THX_pp1_looks_like_sereal(pTHX)
143 {
144 dSP;
145 SV *data= TOPs;
146 /* Should this be SvPOK()? Or better yet, check if it's *really* a string pointer: SvPOKp(data). After all
147 the serialization format is a string and anything otherwise would not look sereal. */
148 if ( SvOK(data) ) {
149 STRLEN len;
150 char *strdata= SvPV(data, len);
151 IV ret= srl_validate_header_version_pv_len(aTHX_ strdata, len);
152 if ( ret < 0 ) {
153 SETs(&PL_sv_no);
154 } else {
155 SETs(sv_2mortal(newSViv(ret & SRL_PROTOCOL_VERSION_MASK)));
156 }
157 } else {
158 SETs(&PL_sv_no);
159 }
160 }
161
162 #if USE_CUSTOM_OPS
163
164 static OP *
THX_pp_sereal_decode(pTHX)165 THX_pp_sereal_decode(pTHX)
166 {
167 pp1_sereal_decode(PL_op->op_private);
168 return NORMAL;
169 }
170
171 static OP *
THX_pp_looks_like_sereal(pTHX)172 THX_pp_looks_like_sereal(pTHX)
173 {
174 pp1_looks_like_sereal();
175 return NORMAL;
176 }
177
178 static OP *
THX_ck_entersub_args_sereal_decoder(pTHX_ OP * entersubop,GV * namegv,SV * ckobj)179 THX_ck_entersub_args_sereal_decoder(pTHX_ OP *entersubop, GV *namegv, SV *ckobj)
180 {
181
182 /* pull apart a standard entersub op tree */
183
184 CV *cv = (CV*)ckobj;
185 I32 cv_private = CvXSUBANY(cv).any_i32;
186 U8 opopt = cv_private & 0xff;
187 U8 min_arity = (cv_private >> 8) & 0xff;
188 U8 max_arity = (cv_private >> 16) & 0xff;
189 OP *pushop, *firstargop, *cvop, *lastargop, *argop, *newop;
190 int arity;
191
192 /* Walk the OP structure under the "entersub" to validate that we
193 * can use the custom OP implementation. */
194
195 entersubop = ck_entersub_args_proto(entersubop, namegv, (SV*)cv);
196 pushop = cUNOPx(entersubop)->op_first;
197 if ( ! OpHAS_SIBLING(pushop) )
198 pushop = cUNOPx(pushop)->op_first;
199 firstargop = OpSIBLING(pushop);
200
201 for (cvop = firstargop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ;
202
203 lastargop = pushop;
204 for (
205 arity = 0, lastargop = pushop, argop = firstargop;
206 argop != cvop;
207 lastargop = argop, argop = OpSIBLING(argop)
208 ){
209 arity++;
210 }
211
212 if (expect_false(arity < min_arity || arity > max_arity))
213 return entersubop;
214
215 /* If we get here, we can replace the entersub with a suitable
216 * custom OP. */
217
218 if (arity > min_arity && (opopt & OPOPT_DO_BODY)) {
219 opopt |= OPOPT_OUTARG_BODY;
220 min_arity++;
221 }
222
223 if (arity > min_arity)
224 opopt |= OPOPT_OUTARG_HEADER;
225
226 #ifdef op_sibling_splice
227 /* op_sibling_splice is new in 5.31 and we have to do things differenly */
228
229 /* cut out all ops between the pushmark and the RV2CV */
230 op_sibling_splice(NULL, pushop, arity, NULL);
231 /* then throw everything else out */
232 op_free(entersubop);
233 newop = newUNOP(OP_NULL, 0, NULL);
234
235 #else
236
237 OpMORESIB_set(pushop, cvop);
238 OpLASTSIB_set(lastargop, op_parent(lastargop));
239 op_free(entersubop);
240 newop = newUNOP(OP_NULL, 0, firstargop);
241
242 #endif
243
244 newop->op_type = OP_CUSTOM;
245 newop->op_private = opopt;
246 newop->op_ppaddr = opopt & OPOPT_LOOKS_LIKE ? THX_pp_looks_like_sereal : THX_pp_sereal_decode;
247
248 #ifdef op_sibling_splice
249
250 /* attach the spliced-out args as children of the custom op, while
251 * deleting the stub op created by newUNOP() */
252 op_sibling_splice(newop, NULL, 1, firstargop);
253
254 #endif
255
256 return newop;
257 }
258
259 #endif /* USE_CUSTOM_OPS */
260
261 static void
THX_xsfunc_sereal_decode(pTHX_ CV * cv)262 THX_xsfunc_sereal_decode(pTHX_ CV *cv)
263 {
264 dMARK;
265 dSP;
266 SSize_t arity = SP - MARK;
267 I32 cv_private = CvXSUBANY(cv).any_i32;
268 U8 opopt = cv_private & 0xff;
269 U8 min_arity = (cv_private >> 8) & 0xff;
270 U8 max_arity = (cv_private >> 16) & 0xff;
271
272 if (arity < min_arity || arity > max_arity)
273 croak("bad Sereal decoder usage");
274 if (arity > min_arity && (opopt & OPOPT_DO_BODY)) {
275 opopt |= OPOPT_OUTARG_BODY;
276 min_arity++;
277 }
278 if (arity > min_arity)
279 opopt |= OPOPT_OUTARG_HEADER;
280
281 pp1_sereal_decode(opopt);
282 }
283
284 static void
THX_xsfunc_looks_like_sereal(pTHX_ CV * cv)285 THX_xsfunc_looks_like_sereal(pTHX_ CV *cv)
286 {
287 dMARK;
288 dSP;
289 SSize_t arity = SP - MARK;
290 I32 cv_private = CvXSUBANY(cv).any_i32;
291 U8 max_arity = (cv_private >> 16) & 0xff;
292
293 if (arity < 1 || arity > max_arity)
294 croak_xs_usage(cv, max_arity == 1 ? "data" : "[invocant,] data");
295 if(arity == 2) {
296 SV *data = POPs;
297 SETs(data);
298 PUTBACK;
299 }
300 pp1_looks_like_sereal();
301 }
302
303 #define MY_CXT_KEY "Sereal::Decoder::_stash" XS_VERSION
304
305 typedef struct {
306 sv_with_hash options[SRL_DEC_OPT_COUNT];
307 } my_cxt_t;
308
309 START_MY_CXT
310
311
312 MODULE = Sereal::Decoder PACKAGE = Sereal::Decoder
313 PROTOTYPES: DISABLE
314
315 BOOT:
316 {
317 struct {
318 char const *name_suffix;
319 U8 opopt;
320 } const funcs_to_install[] = {
321 { "", OPOPT_DO_BODY },
322 { "_only_header", OPOPT_DO_HEADER },
323 { "_with_header", (OPOPT_DO_BODY|OPOPT_DO_HEADER) },
324 { "_with_offset", (OPOPT_DO_BODY|OPOPT_OFFSET) },
325 { "_only_header_with_offset", (OPOPT_DO_HEADER|OPOPT_OFFSET) },
326 { "_with_header_and_offset", (OPOPT_DO_BODY|OPOPT_DO_HEADER|OPOPT_OFFSET) },
327 /*012345678901234567890123*/
328 }, *fti;
329 int i;
330 {
331 MY_CXT_INIT;
332 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_ALIAS_SMALLINT, SRL_DEC_OPT_STR_ALIAS_SMALLINT );
333 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_ALIAS_VARINT_UNDER, SRL_DEC_OPT_STR_ALIAS_VARINT_UNDER );
334 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_DESTRUCTIVE_INCREMENTAL, SRL_DEC_OPT_STR_DESTRUCTIVE_INCREMENTAL );
335 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_NUM_HASH_ENTRIES, SRL_DEC_OPT_STR_MAX_NUM_HASH_ENTRIES );
336 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_RECURSION_DEPTH, SRL_DEC_OPT_STR_MAX_RECURSION_DEPTH );
337 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_NO_BLESS_OBJECTS, SRL_DEC_OPT_STR_NO_BLESS_OBJECTS );
338 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_OBJECTS, SRL_DEC_OPT_STR_REFUSE_OBJECTS );
339 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_SNAPPY, SRL_DEC_OPT_STR_REFUSE_SNAPPY );
340 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_ZLIB, SRL_DEC_OPT_STR_REFUSE_ZLIB );
341 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_SET_READONLY, SRL_DEC_OPT_STR_SET_READONLY );
342 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_SET_READONLY_SCALARS, SRL_DEC_OPT_STR_SET_READONLY_SCALARS );
343 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_USE_UNDEF, SRL_DEC_OPT_STR_USE_UNDEF );
344 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_VALIDATE_UTF8, SRL_DEC_OPT_STR_VALIDATE_UTF8 );
345 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_ZSTD, SRL_DEC_OPT_STR_REFUSE_ZSTD );
346 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_NUM_ARRAY_ENTRIES, SRL_DEC_OPT_STR_MAX_NUM_ARRAY_ENTRIES );
347 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_STRING_LENGTH, SRL_DEC_OPT_STR_MAX_STRING_LENGTH );
348 SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_UNCOMPRESSED_SIZE, SRL_DEC_OPT_STR_MAX_UNCOMPRESSED_SIZE );
349 }
350 #if USE_CUSTOM_OPS
351 {
352 XOP *xop;
353 Newxz(xop, 1, XOP);
354 XopENTRY_set(xop, xop_name, "sereal_decode_with_object");
355 XopENTRY_set(xop, xop_desc, "sereal_decode_with_object");
356 XopENTRY_set(xop, xop_class, OA_UNOP);
357 Perl_custom_op_register(aTHX_ THX_pp_sereal_decode, xop);
358 }
359 #endif /* USE_CUSTOM_OPS */
360 for (i = sizeof(funcs_to_install)/sizeof(*fti); i--; ) {
361 # define LONG_CLASS_FMT "Sereal::Decoder::sereal_decode%s_with_object"
362 char name[sizeof(LONG_CLASS_FMT)+24];
363 char proto[7], *p = proto;
364 U8 opopt;
365 I32 cv_private;
366 GV *gv;
367 CV *cv;
368
369 fti = &funcs_to_install[i];
370 opopt = fti->opopt;
371 /*
372 * The cv_private value incorporates flags describing the operation to be
373 * performed by the sub and precomputed arity limits. 0x020200 corresponds
374 * to min_arity=2 and max_arity=2. The various additions to cv_private
375 * increment one or both of these sub-values.
376
377 * The six subs created there share a single C body function, and are
378 * differentiated only by the option flags in cv_private. The custom ops
379 * likewise share one op_ppaddr function, and the operations they perform
380 * are differentiated by the same flags, stored in op_private.
381 */
382 cv_private = opopt | 0x020200;
383
384 /* Yes, the subs have prototypes. The protoypes have no effect when the
385 * subs are used as methods, so there's no break of compatibility for those
386 * using the documented API. There is a change that could be detected by
387 * code such as "Sereal::Decoder::decode($dec, @v)", that uses the methods
388 * directly in an undocumented way.
389 *
390 * The prototype, specifically the putting of argument expressions into
391 * scalar context, is required in order to be able to resolve arity at
392 * compile time. If this wasn't done, there would have to be a pushmark
393 * op preceding the argument ops, and pp_sereal_decode() would need the
394 * same code as xsfunc_sereal_decode() to check arity and resolve the
395 * optional-parameter flags.
396 */
397 *p++ = '$';
398 *p++ = '$';
399
400 if (opopt & OPOPT_OFFSET) {
401 *p++ = '$';
402 cv_private += 0x010100;
403 }
404 *p++ = ';';
405 if (opopt & OPOPT_DO_BODY) {
406 *p++ = '$';
407 cv_private += 0x010000;
408 }
409 if (opopt & OPOPT_DO_HEADER) {
410 *p++ = '$';
411 cv_private += 0x010000;
412 }
413 *p = 0;
414 /* setup the name of the sub */
415 sprintf(name, LONG_CLASS_FMT, fti->name_suffix);
416 cv = newXSproto_portable(name, THX_xsfunc_sereal_decode, __FILE__,
417 proto);
418 CvXSUBANY(cv).any_i32 = cv_private;
419 #if USE_CUSTOM_OPS
420 cv_set_call_checker(cv, THX_ck_entersub_args_sereal_decoder, (SV*)cv);
421 #endif /* USE_CUSTOM_OPS */
422 sprintf(name, "Sereal::Decoder::decode%s", fti->name_suffix);
423 gv = gv_fetchpv(name, GV_ADDMULTI, SVt_PVCV);
424 GvCV_set(gv, cv);
425 }
426 }
427
428 BOOT:
429 {
430 #if USE_CUSTOM_OPS
431 {
432 XOP *xop;
433 Newxz(xop, 1, XOP);
434 XopENTRY_set(xop, xop_name, "scalar_looks_like_sereal");
435 XopENTRY_set(xop, xop_desc, "scalar_looks_like_sereal");
436 XopENTRY_set(xop, xop_class, OA_UNOP);
437 Perl_custom_op_register(aTHX_ THX_pp_looks_like_sereal, xop);
438 }
439 #endif /* USE_CUSTOM_OPS */
440 {
441 CV *cv;
442 cv = newXSproto_portable("Sereal::Decoder::scalar_looks_like_sereal", THX_xsfunc_looks_like_sereal, __FILE__, "$");
443 CvXSUBANY(cv).any_i32 = 0x010100 | OPOPT_LOOKS_LIKE;
444 #if USE_CUSTOM_OPS
445 cv_set_call_checker(cv, THX_ck_entersub_args_sereal_decoder, (SV*)cv);
446 #endif /* USE_CUSTOM_OPS */
447 cv = newXS("Sereal::Decoder::looks_like_sereal", THX_xsfunc_looks_like_sereal, __FILE__);
448 CvXSUBANY(cv).any_i32 = 0x020100 | OPOPT_LOOKS_LIKE;
449 }
450 }
451
452 srl_decoder_t *
453 new(CLASS, opt = NULL)
454 char *CLASS;
455 HV *opt;
456 PREINIT:
457 dMY_CXT;
458 CODE:
459 RETVAL = srl_build_decoder_struct(aTHX_ opt, MY_CXT.options);
460 RETVAL->flags |= SRL_F_DECODER_REUSE;
461 OUTPUT: RETVAL
462
463 void
464 DESTROY(dec)
465 srl_decoder_t *dec;
466 CODE:
467 srl_destroy_decoder(aTHX_ dec);
468
469 void
470 decode_sereal(src, opt = NULL, into = NULL)
471 SV *src;
472 SV *opt;
473 SV *into;
474 PREINIT:
475 dMY_CXT;
476 srl_decoder_t *dec= NULL;
477 PPCODE:
478 if (SvROK(src))
479 croak("We can't decode a reference as Sereal!");
480 /* Support no opt at all, undef, hashref */
481 if (opt != NULL) {
482 SvGETMAGIC(opt);
483 if (!SvOK(opt))
484 opt = NULL;
485 else if (SvROK(opt) && SvTYPE(SvRV(opt)) == SVt_PVHV)
486 opt = (SV *)SvRV(opt);
487 else
488 croak("Options are neither undef nor hash reference");
489 }
490 dec = srl_build_decoder_struct(aTHX_ (HV *)opt, MY_CXT.options);
491 ST(0)= srl_decode_into(aTHX_ dec, src, into, 0);
492 XSRETURN(1);
493
494 AV *
495 decode_sereal_with_header_data(src, opt = NULL, body_into = NULL, header_into = NULL)
496 SV *src;
497 SV *opt;
498 SV *body_into;
499 SV *header_into;
500 PREINIT:
501 dMY_CXT;
502 srl_decoder_t *dec= NULL;
503 CODE:
504 /* Support no opt at all, undef, hashref */
505 if (opt != NULL) {
506 SvGETMAGIC(opt);
507 if (!SvOK(opt))
508 opt = NULL;
509 else if (SvROK(opt) && SvTYPE(SvRV(opt)) == SVt_PVHV)
510 opt = (SV *)SvRV(opt);
511 else
512 croak("Options are neither undef nor hash reference");
513 }
514 dec = srl_build_decoder_struct(aTHX_ (HV *)opt, MY_CXT.options);
515 if (body_into == NULL)
516 body_into = sv_newmortal();
517 if (header_into == NULL)
518 header_into = sv_newmortal();
519 srl_decode_all_into(aTHX_ dec, src, header_into, body_into, 0);
520 RETVAL = newAV();
521 sv_2mortal((SV *)RETVAL);
522 av_extend(RETVAL, 1);
523 av_store(RETVAL, 0, SvREFCNT_inc(header_into));
524 av_store(RETVAL, 1, SvREFCNT_inc(body_into));
525 OUTPUT: RETVAL
526
527 UV
528 bytes_consumed(dec)
529 srl_decoder_t *dec;
530 CODE:
531 RETVAL = dec->bytes_consumed;
532 OUTPUT: RETVAL
533
534 U32
535 flags(dec)
536 srl_decoder_t *dec;
537 CODE:
538 RETVAL = dec->flags;
539 OUTPUT: RETVAL
540
541 SV*
542 regexp_internals_type()
543 CODE:
544 RETVAL = newSVpvs(REGEXP_TYPE);
545 OUTPUT: RETVAL
546