1 /* Must be defined before including Perl header files or we slow down by 2x! */
2 #define PERL_NO_GET_CONTEXT
3 
4 #define NEED_newSV_type
5 #include "EXTERN.h"
6 #include "perl.h"
7 #include "XSUB.h"
8 
9 #include "ppport.h"
10 
11 #include "srl_common.h"
12 #include "srl_decoder.h"
13 #include "srl_protocol.h"
14 
15 #ifndef GvCV_set
16 # define GvCV_set(gv, cv) (GvCV(gv) = (cv))
17 #endif
18 
19 #ifndef PERL_ARGS_ASSERT_CROAK_XS_USAGE
20 #define PERL_ARGS_ASSERT_CROAK_XS_USAGE assert(cv); assert(params)
21 
22 /* prototype to pass -Wmissing-prototypes */
23 STATIC void
24 S_croak_xs_usage(pTHX_ const CV *const cv, const char *const params);
25 
26 STATIC void
S_croak_xs_usage(pTHX_ const CV * const cv,const char * const params)27 S_croak_xs_usage(pTHX_ const CV *const cv, const char *const params)
28 {
29     const GV *const gv = CvGV(cv);
30 
31     PERL_ARGS_ASSERT_CROAK_XS_USAGE;
32 
33     if (gv) {
34         const char *const gvname = GvNAME(gv);
35         const HV *const stash = GvSTASH(gv);
36         const char *const hvname = stash ? HvNAME(stash) : NULL;
37 
38         if (hvname)
39             Perl_croak_nocontext("Usage: %s::%s(%s)", hvname, gvname, params);
40         else
41             Perl_croak_nocontext("Usage: %s(%s)", gvname, params);
42     } else {
43         /* Pants. I don't think that it should be possible to get here. */
44         Perl_croak_nocontext("Usage: CODE(0x%"UVxf")(%s)", PTR2UV(cv), params);
45     }
46 }
47 
48 #ifdef PERL_IMPLICIT_CONTEXT
49 #define croak_xs_usage(a,b)     S_croak_xs_usage(aTHX_ a,b)
50 #else
51 #define croak_xs_usage          S_croak_xs_usage
52 #endif
53 
54 #endif
55 
56 
57 #if defined(cv_set_call_checker) && defined(XopENTRY_set)
58 # define USE_CUSTOM_OPS 1
59 #else
60 # define USE_CUSTOM_OPS 0
61 #endif
62 
63 #define OPOPT_DO_BODY       (1<<0)
64 #define OPOPT_DO_HEADER     (1<<1)
65 #define OPOPT_OFFSET        (1<<2)
66 #define OPOPT_OUTARG_BODY   (1<<3)
67 #define OPOPT_OUTARG_HEADER (1<<4)
68 #define OPOPT_LOOKS_LIKE    (1<<5)
69 
70 #define pp1_sereal_decode(opopt) THX_pp1_sereal_decode(aTHX_ opopt)
71 static void
THX_pp1_sereal_decode(pTHX_ U8 opopt)72 THX_pp1_sereal_decode(pTHX_ U8 opopt)
73 {
74     bool need_retvalue = GIMME_V != G_VOID;
75     SV *decoder_ref_sv, *decoder_sv, *src_sv;
76     UV offset;
77     SV *body_into, *header_into;
78     srl_decoder_t *decoder;
79     char *stash_name;
80     dSP;
81 
82     header_into = expect_false(opopt & OPOPT_OUTARG_HEADER)
83                   ? POPs
84                   : expect_false(opopt & OPOPT_DO_HEADER) ? sv_newmortal() : NULL;
85     body_into = expect_false(opopt & OPOPT_OUTARG_BODY)
86                 ? POPs
87                 : expect_true(opopt & OPOPT_DO_BODY) ? sv_newmortal() : NULL;
88 
89     offset = expect_false(opopt & OPOPT_OFFSET) ? SvUVx(POPs) : 0;
90     src_sv = POPs;
91     decoder_ref_sv = POPs;
92     PUTBACK;
93 
94     if (!expect_true(
95           decoder_ref_sv &&
96           SvROK(decoder_ref_sv) &&
97           (decoder_sv = SvRV(decoder_ref_sv)) &&
98           SvOBJECT(decoder_sv) &&
99           (stash_name = HvNAME(SvSTASH(decoder_sv))) &&
100           !strcmp(stash_name, "Sereal::Decoder")
101        ))
102     {
103         croak("handle is not a Sereal::Decoder handle");
104     }
105 
106     decoder = (srl_decoder_t *)SvIV(decoder_sv);
107     if (expect_true(opopt & OPOPT_DO_BODY)) {
108         if (opopt & OPOPT_DO_HEADER) {
109              srl_decode_all_into(aTHX_ decoder, src_sv, header_into,
110                                  body_into, offset);
111         } else {
112             srl_decode_into(aTHX_ decoder, src_sv, body_into, offset);
113         }
114     } else {
115         srl_decode_header_into(aTHX_ decoder, src_sv, header_into, offset);
116     }
117 
118     if (expect_true(need_retvalue)) {
119         SV *retvalue;
120         if (expect_true(opopt & OPOPT_DO_BODY)) {
121             if (opopt & OPOPT_DO_HEADER) {
122                 AV *retav = newAV();
123                 retvalue = newRV_noinc((SV*)retav);
124                 sv_2mortal(retvalue);
125                 av_extend(retav, 1);
126                 av_store(retav, 0, SvREFCNT_inc(header_into));
127                 av_store(retav, 1, SvREFCNT_inc(body_into));
128             } else {
129                 retvalue = body_into;
130             }
131         } else {
132             retvalue = header_into;
133         }
134         SPAGAIN;
135         XPUSHs(retvalue);
136         PUTBACK;
137     }
138 }
139 
140 #define pp1_looks_like_sereal() THX_pp1_looks_like_sereal(aTHX)
141 static void
THX_pp1_looks_like_sereal(pTHX)142 THX_pp1_looks_like_sereal(pTHX)
143 {
144     dSP;
145     SV *data= TOPs;
146     /* Should this be SvPOK()? Or better yet, check if it's *really* a string pointer: SvPOKp(data). After all
147        the serialization format is a string and anything otherwise would not look sereal. */
148     if ( SvOK(data) ) {
149         STRLEN len;
150         char *strdata= SvPV(data, len);
151         IV ret= srl_validate_header_version_pv_len(aTHX_ strdata, len);
152         if ( ret < 0 ) {
153             SETs(&PL_sv_no);
154         } else {
155             SETs(sv_2mortal(newSViv(ret & SRL_PROTOCOL_VERSION_MASK)));
156         }
157     } else {
158         SETs(&PL_sv_no);
159     }
160 }
161 
162 #if USE_CUSTOM_OPS
163 
164 static OP *
THX_pp_sereal_decode(pTHX)165 THX_pp_sereal_decode(pTHX)
166 {
167     pp1_sereal_decode(PL_op->op_private);
168     return NORMAL;
169 }
170 
171 static OP *
THX_pp_looks_like_sereal(pTHX)172 THX_pp_looks_like_sereal(pTHX)
173 {
174     pp1_looks_like_sereal();
175     return NORMAL;
176 }
177 
178 static OP *
THX_ck_entersub_args_sereal_decoder(pTHX_ OP * entersubop,GV * namegv,SV * ckobj)179 THX_ck_entersub_args_sereal_decoder(pTHX_ OP *entersubop, GV *namegv, SV *ckobj)
180 {
181 
182    /* pull apart a standard entersub op tree */
183 
184     CV *cv = (CV*)ckobj;
185     I32 cv_private = CvXSUBANY(cv).any_i32;
186     U8 opopt = cv_private & 0xff;
187     U8 min_arity = (cv_private >> 8) & 0xff;
188     U8 max_arity = (cv_private >> 16) & 0xff;
189     OP *pushop, *firstargop, *cvop, *lastargop, *argop, *newop;
190     int arity;
191 
192     /* Walk the OP structure under the "entersub" to validate that we
193      * can use the custom OP implementation. */
194 
195     entersubop = ck_entersub_args_proto(entersubop, namegv, (SV*)cv);
196     pushop = cUNOPx(entersubop)->op_first;
197     if ( ! OpHAS_SIBLING(pushop) )
198         pushop = cUNOPx(pushop)->op_first;
199     firstargop = OpSIBLING(pushop);
200 
201     for (cvop = firstargop; OpHAS_SIBLING(cvop); cvop = OpSIBLING(cvop)) ;
202 
203     lastargop = pushop;
204     for (
205         arity = 0, lastargop = pushop, argop = firstargop;
206         argop != cvop;
207         lastargop = argop, argop = OpSIBLING(argop)
208     ){
209         arity++;
210     }
211 
212     if (expect_false(arity < min_arity || arity > max_arity))
213         return entersubop;
214 
215     /* If we get here, we can replace the entersub with a suitable
216      * custom OP. */
217 
218     if (arity > min_arity && (opopt & OPOPT_DO_BODY)) {
219         opopt |= OPOPT_OUTARG_BODY;
220         min_arity++;
221     }
222 
223     if (arity > min_arity)
224         opopt |= OPOPT_OUTARG_HEADER;
225 
226 #ifdef op_sibling_splice
227     /* op_sibling_splice is new in 5.31 and we have to do things differenly */
228 
229     /* cut out all ops between the pushmark and the RV2CV */
230     op_sibling_splice(NULL, pushop, arity, NULL);
231     /* then throw everything else out */
232     op_free(entersubop);
233     newop = newUNOP(OP_NULL, 0, NULL);
234 
235 #else
236 
237     OpMORESIB_set(pushop, cvop);
238     OpLASTSIB_set(lastargop, op_parent(lastargop));
239     op_free(entersubop);
240     newop = newUNOP(OP_NULL, 0, firstargop);
241 
242 #endif
243 
244     newop->op_type    = OP_CUSTOM;
245     newop->op_private = opopt;
246     newop->op_ppaddr = opopt & OPOPT_LOOKS_LIKE ? THX_pp_looks_like_sereal : THX_pp_sereal_decode;
247 
248 #ifdef op_sibling_splice
249 
250     /* attach the spliced-out args as children of the custom op, while
251      * deleting the stub op created by newUNOP() */
252     op_sibling_splice(newop, NULL, 1, firstargop);
253 
254 #endif
255 
256     return newop;
257 }
258 
259 #endif /* USE_CUSTOM_OPS */
260 
261 static void
THX_xsfunc_sereal_decode(pTHX_ CV * cv)262 THX_xsfunc_sereal_decode(pTHX_ CV *cv)
263 {
264     dMARK;
265     dSP;
266     SSize_t arity = SP - MARK;
267     I32 cv_private = CvXSUBANY(cv).any_i32;
268     U8 opopt = cv_private & 0xff;
269     U8 min_arity = (cv_private >> 8) & 0xff;
270     U8 max_arity = (cv_private >> 16) & 0xff;
271 
272     if (arity < min_arity || arity > max_arity)
273         croak("bad Sereal decoder usage");
274     if (arity > min_arity && (opopt & OPOPT_DO_BODY)) {
275         opopt |= OPOPT_OUTARG_BODY;
276         min_arity++;
277     }
278     if (arity > min_arity)
279         opopt |= OPOPT_OUTARG_HEADER;
280 
281     pp1_sereal_decode(opopt);
282 }
283 
284 static void
THX_xsfunc_looks_like_sereal(pTHX_ CV * cv)285 THX_xsfunc_looks_like_sereal(pTHX_ CV *cv)
286 {
287     dMARK;
288     dSP;
289     SSize_t arity = SP - MARK;
290     I32 cv_private = CvXSUBANY(cv).any_i32;
291     U8 max_arity = (cv_private >> 16) & 0xff;
292 
293     if (arity < 1 || arity > max_arity)
294         croak_xs_usage(cv, max_arity == 1 ? "data" : "[invocant,] data");
295     if(arity == 2) {
296         SV *data = POPs;
297         SETs(data);
298         PUTBACK;
299     }
300     pp1_looks_like_sereal();
301 }
302 
303 #define MY_CXT_KEY "Sereal::Decoder::_stash" XS_VERSION
304 
305 typedef struct {
306     sv_with_hash options[SRL_DEC_OPT_COUNT];
307 } my_cxt_t;
308 
309 START_MY_CXT
310 
311 
312 MODULE = Sereal::Decoder        PACKAGE = Sereal::Decoder
313 PROTOTYPES: DISABLE
314 
315 BOOT:
316 {
317     struct {
318         char const *name_suffix;
319         U8 opopt;
320     } const funcs_to_install[] = {
321         { "",                           OPOPT_DO_BODY },
322         { "_only_header",               OPOPT_DO_HEADER },
323         { "_with_header",               (OPOPT_DO_BODY|OPOPT_DO_HEADER) },
324         { "_with_offset",               (OPOPT_DO_BODY|OPOPT_OFFSET) },
325         { "_only_header_with_offset",   (OPOPT_DO_HEADER|OPOPT_OFFSET) },
326         { "_with_header_and_offset",    (OPOPT_DO_BODY|OPOPT_DO_HEADER|OPOPT_OFFSET) },
327          /*012345678901234567890123*/
328     }, *fti;
329     int i;
330     {
331         MY_CXT_INIT;
332         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_ALIAS_SMALLINT,             SRL_DEC_OPT_STR_ALIAS_SMALLINT             );
333         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_ALIAS_VARINT_UNDER,         SRL_DEC_OPT_STR_ALIAS_VARINT_UNDER         );
334         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_DESTRUCTIVE_INCREMENTAL,    SRL_DEC_OPT_STR_DESTRUCTIVE_INCREMENTAL    );
335         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_NUM_HASH_ENTRIES,       SRL_DEC_OPT_STR_MAX_NUM_HASH_ENTRIES       );
336         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_RECURSION_DEPTH,        SRL_DEC_OPT_STR_MAX_RECURSION_DEPTH        );
337         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_NO_BLESS_OBJECTS,           SRL_DEC_OPT_STR_NO_BLESS_OBJECTS           );
338         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_OBJECTS,             SRL_DEC_OPT_STR_REFUSE_OBJECTS             );
339         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_SNAPPY,              SRL_DEC_OPT_STR_REFUSE_SNAPPY              );
340         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_ZLIB,                SRL_DEC_OPT_STR_REFUSE_ZLIB                );
341         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_SET_READONLY,               SRL_DEC_OPT_STR_SET_READONLY               );
342         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_SET_READONLY_SCALARS,       SRL_DEC_OPT_STR_SET_READONLY_SCALARS       );
343         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_USE_UNDEF,                  SRL_DEC_OPT_STR_USE_UNDEF                  );
344         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_VALIDATE_UTF8,              SRL_DEC_OPT_STR_VALIDATE_UTF8              );
345         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_REFUSE_ZSTD,                SRL_DEC_OPT_STR_REFUSE_ZSTD                );
346         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_NUM_ARRAY_ENTRIES,      SRL_DEC_OPT_STR_MAX_NUM_ARRAY_ENTRIES      );
347         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_STRING_LENGTH,          SRL_DEC_OPT_STR_MAX_STRING_LENGTH          );
348         SRL_INIT_OPTION( SRL_DEC_OPT_IDX_MAX_UNCOMPRESSED_SIZE,      SRL_DEC_OPT_STR_MAX_UNCOMPRESSED_SIZE      );
349     }
350 #if USE_CUSTOM_OPS
351     {
352         XOP *xop;
353         Newxz(xop, 1, XOP);
354         XopENTRY_set(xop, xop_name, "sereal_decode_with_object");
355         XopENTRY_set(xop, xop_desc, "sereal_decode_with_object");
356         XopENTRY_set(xop, xop_class, OA_UNOP);
357         Perl_custom_op_register(aTHX_ THX_pp_sereal_decode, xop);
358     }
359 #endif /* USE_CUSTOM_OPS */
360     for (i = sizeof(funcs_to_install)/sizeof(*fti); i--; ) {
361 #       define LONG_CLASS_FMT "Sereal::Decoder::sereal_decode%s_with_object"
362         char name[sizeof(LONG_CLASS_FMT)+24];
363         char proto[7], *p = proto;
364         U8 opopt;
365         I32 cv_private;
366         GV *gv;
367         CV *cv;
368 
369         fti = &funcs_to_install[i];
370         opopt = fti->opopt;
371         /*
372          * The cv_private value incorporates flags describing the operation to be
373          * performed by the sub and precomputed arity limits.  0x020200 corresponds
374          * to min_arity=2 and max_arity=2.  The various additions to cv_private
375          * increment one or both of these sub-values.
376 
377          * The six subs created there share a single C body function, and are
378          * differentiated only by the option flags in cv_private.  The custom ops
379          * likewise share one op_ppaddr function, and the operations they perform
380          * are differentiated by the same flags, stored in op_private.
381          */
382         cv_private = opopt | 0x020200;
383 
384         /* Yes, the subs have prototypes.  The protoypes have no effect when the
385          * subs are used as methods, so there's no break of compatibility for those
386          * using the documented API.  There is a change that could be detected by
387          * code such as "Sereal::Decoder::decode($dec, @v)", that uses the methods
388          * directly in an undocumented way.
389          *
390          * The prototype, specifically the putting of argument expressions into
391          * scalar context, is required in order to be able to resolve arity at
392          * compile time.  If this wasn't done, there would have to be a pushmark
393          * op preceding the argument ops, and pp_sereal_decode() would need the
394          * same code as xsfunc_sereal_decode() to check arity and resolve the
395          * optional-parameter flags.
396          */
397         *p++ = '$';
398         *p++ = '$';
399 
400         if (opopt & OPOPT_OFFSET) {
401             *p++ = '$';
402             cv_private += 0x010100;
403         }
404         *p++ = ';';
405         if (opopt & OPOPT_DO_BODY) {
406             *p++ = '$';
407             cv_private += 0x010000;
408         }
409         if (opopt & OPOPT_DO_HEADER) {
410             *p++ = '$';
411             cv_private += 0x010000;
412         }
413         *p = 0;
414         /* setup the name of the sub */
415         sprintf(name, LONG_CLASS_FMT, fti->name_suffix);
416         cv = newXSproto_portable(name, THX_xsfunc_sereal_decode, __FILE__,
417                 proto);
418         CvXSUBANY(cv).any_i32 = cv_private;
419 #if USE_CUSTOM_OPS
420         cv_set_call_checker(cv, THX_ck_entersub_args_sereal_decoder, (SV*)cv);
421 #endif /* USE_CUSTOM_OPS */
422         sprintf(name, "Sereal::Decoder::decode%s", fti->name_suffix);
423         gv = gv_fetchpv(name, GV_ADDMULTI, SVt_PVCV);
424         GvCV_set(gv, cv);
425     }
426 }
427 
428 BOOT:
429 {
430 #if USE_CUSTOM_OPS
431     {
432         XOP *xop;
433         Newxz(xop, 1, XOP);
434         XopENTRY_set(xop, xop_name, "scalar_looks_like_sereal");
435         XopENTRY_set(xop, xop_desc, "scalar_looks_like_sereal");
436         XopENTRY_set(xop, xop_class, OA_UNOP);
437         Perl_custom_op_register(aTHX_ THX_pp_looks_like_sereal, xop);
438     }
439 #endif /* USE_CUSTOM_OPS */
440     {
441         CV *cv;
442         cv = newXSproto_portable("Sereal::Decoder::scalar_looks_like_sereal", THX_xsfunc_looks_like_sereal, __FILE__, "$");
443         CvXSUBANY(cv).any_i32 = 0x010100 | OPOPT_LOOKS_LIKE;
444 #if USE_CUSTOM_OPS
445         cv_set_call_checker(cv, THX_ck_entersub_args_sereal_decoder, (SV*)cv);
446 #endif /* USE_CUSTOM_OPS */
447         cv = newXS("Sereal::Decoder::looks_like_sereal", THX_xsfunc_looks_like_sereal, __FILE__);
448         CvXSUBANY(cv).any_i32 = 0x020100 | OPOPT_LOOKS_LIKE;
449     }
450 }
451 
452 srl_decoder_t *
453 new(CLASS, opt = NULL)
454     char *CLASS;
455     HV *opt;
456   PREINIT:
457     dMY_CXT;
458   CODE:
459     RETVAL = srl_build_decoder_struct(aTHX_ opt, MY_CXT.options);
460     RETVAL->flags |= SRL_F_DECODER_REUSE;
461   OUTPUT: RETVAL
462 
463 void
464 DESTROY(dec)
465     srl_decoder_t *dec;
466   CODE:
467     srl_destroy_decoder(aTHX_ dec);
468 
469 void
470 decode_sereal(src, opt = NULL, into = NULL)
471     SV *src;
472     SV *opt;
473     SV *into;
474   PREINIT:
475     dMY_CXT;
476     srl_decoder_t *dec= NULL;
477   PPCODE:
478     if (SvROK(src))
479         croak("We can't decode a reference as Sereal!");
480     /* Support no opt at all, undef, hashref */
481     if (opt != NULL) {
482         SvGETMAGIC(opt);
483         if (!SvOK(opt))
484             opt = NULL;
485         else if (SvROK(opt) && SvTYPE(SvRV(opt)) == SVt_PVHV)
486             opt = (SV *)SvRV(opt);
487         else
488             croak("Options are neither undef nor hash reference");
489     }
490     dec = srl_build_decoder_struct(aTHX_ (HV *)opt, MY_CXT.options);
491     ST(0)= srl_decode_into(aTHX_ dec, src, into, 0);
492     XSRETURN(1);
493 
494 AV *
495 decode_sereal_with_header_data(src, opt = NULL, body_into = NULL, header_into = NULL)
496     SV *src;
497     SV *opt;
498     SV *body_into;
499     SV *header_into;
500   PREINIT:
501     dMY_CXT;
502     srl_decoder_t *dec= NULL;
503   CODE:
504     /* Support no opt at all, undef, hashref */
505     if (opt != NULL) {
506         SvGETMAGIC(opt);
507         if (!SvOK(opt))
508             opt = NULL;
509         else if (SvROK(opt) && SvTYPE(SvRV(opt)) == SVt_PVHV)
510             opt = (SV *)SvRV(opt);
511         else
512             croak("Options are neither undef nor hash reference");
513     }
514     dec = srl_build_decoder_struct(aTHX_ (HV *)opt, MY_CXT.options);
515     if (body_into == NULL)
516       body_into = sv_newmortal();
517     if (header_into == NULL)
518       header_into = sv_newmortal();
519     srl_decode_all_into(aTHX_ dec, src, header_into, body_into, 0);
520     RETVAL = newAV();
521     sv_2mortal((SV *)RETVAL);
522     av_extend(RETVAL, 1);
523     av_store(RETVAL, 0, SvREFCNT_inc(header_into));
524     av_store(RETVAL, 1, SvREFCNT_inc(body_into));
525   OUTPUT: RETVAL
526 
527 UV
528 bytes_consumed(dec)
529     srl_decoder_t *dec;
530   CODE:
531     RETVAL = dec->bytes_consumed;
532   OUTPUT: RETVAL
533 
534 U32
535 flags(dec)
536     srl_decoder_t *dec;
537   CODE:
538     RETVAL = dec->flags;
539   OUTPUT: RETVAL
540 
541 SV*
542 regexp_internals_type()
543   CODE:
544     RETVAL = newSVpvs(REGEXP_TYPE);
545   OUTPUT: RETVAL
546