Ruby  2.5.0dev(2017-10-22revision60238)
marshal.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  marshal.c -
4 
5  $Author$
6  created at: Thu Apr 27 16:30:01 JST 1995
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #if defined __GNUC__ && __GNUC__ < 3
13 # error too old GCC
14 #endif
15 
16 #include "internal.h"
17 #include "ruby/io.h"
18 #include "ruby/st.h"
19 #include "ruby/util.h"
20 #include "encindex.h"
21 #include "id_table.h"
22 
23 #include <math.h>
24 #ifdef HAVE_FLOAT_H
25 #include <float.h>
26 #endif
27 #ifdef HAVE_IEEEFP_H
28 #include <ieeefp.h>
29 #endif
30 
31 #define BITSPERSHORT (2*CHAR_BIT)
32 #define SHORTMASK ((1<<BITSPERSHORT)-1)
33 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
34 
35 #if SIZEOF_SHORT == SIZEOF_BDIGIT
36 #define SHORTLEN(x) (x)
37 #else
38 static size_t
39 shortlen(size_t len, BDIGIT *ds)
40 {
41  BDIGIT num;
42  int offset = 0;
43 
44  num = ds[len-1];
45  while (num) {
46  num = SHORTDN(num);
47  offset++;
48  }
49  return (len - 1)*SIZEOF_BDIGIT/2 + offset;
50 }
51 #define SHORTLEN(x) shortlen((x),d)
52 #endif
53 
54 #define MARSHAL_MAJOR 4
55 #define MARSHAL_MINOR 8
56 
57 #define TYPE_NIL '0'
58 #define TYPE_TRUE 'T'
59 #define TYPE_FALSE 'F'
60 #define TYPE_FIXNUM 'i'
61 
62 #define TYPE_EXTENDED 'e'
63 #define TYPE_UCLASS 'C'
64 #define TYPE_OBJECT 'o'
65 #define TYPE_DATA 'd'
66 #define TYPE_USERDEF 'u'
67 #define TYPE_USRMARSHAL 'U'
68 #define TYPE_FLOAT 'f'
69 #define TYPE_BIGNUM 'l'
70 #define TYPE_STRING '"'
71 #define TYPE_REGEXP '/'
72 #define TYPE_ARRAY '['
73 #define TYPE_HASH '{'
74 #define TYPE_HASH_DEF '}'
75 #define TYPE_STRUCT 'S'
76 #define TYPE_MODULE_OLD 'M'
77 #define TYPE_CLASS 'c'
78 #define TYPE_MODULE 'm'
79 
80 #define TYPE_SYMBOL ':'
81 #define TYPE_SYMLINK ';'
82 
83 #define TYPE_IVAR 'I'
84 #define TYPE_LINK '@'
85 
86 static ID s_dump, s_load, s_mdump, s_mload;
87 static ID s_dump_data, s_load_data, s_alloc, s_call;
88 static ID s_getbyte, s_read, s_write, s_binmode;
89 
90 #define name_s_dump "_dump"
91 #define name_s_load "_load"
92 #define name_s_mdump "marshal_dump"
93 #define name_s_mload "marshal_load"
94 #define name_s_dump_data "_dump_data"
95 #define name_s_load_data "_load_data"
96 #define name_s_alloc "_alloc"
97 #define name_s_call "call"
98 #define name_s_getbyte "getbyte"
99 #define name_s_read "read"
100 #define name_s_write "write"
101 #define name_s_binmode "binmode"
102 
103 typedef struct {
106  VALUE (*dumper)(VALUE);
107  VALUE (*loader)(VALUE, VALUE);
109 
110 static st_table *compat_allocator_tbl;
111 static VALUE compat_allocator_tbl_wrapper;
112 static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
113 static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc);
114 
115 static int
116 mark_marshal_compat_i(st_data_t key, st_data_t value)
117 {
118  marshal_compat_t *p = (marshal_compat_t *)value;
119  rb_gc_mark(p->newclass);
120  rb_gc_mark(p->oldclass);
121  return ST_CONTINUE;
122 }
123 
124 static void
125 mark_marshal_compat_t(void *tbl)
126 {
127  if (!tbl) return;
128  st_foreach(tbl, mark_marshal_compat_i, 0);
129 }
130 
131 static st_table *compat_allocator_table(void);
132 
133 void
134 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
135 {
136  marshal_compat_t *compat;
137  rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
138 
139  if (!allocator) {
140  rb_raise(rb_eTypeError, "no allocator");
141  }
142 
143  compat = ALLOC(marshal_compat_t);
144  compat->newclass = Qnil;
145  compat->oldclass = Qnil;
146  compat->newclass = newclass;
147  compat->oldclass = oldclass;
148  compat->dumper = dumper;
149  compat->loader = loader;
150 
151  st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
152 }
153 
154 #define MARSHAL_INFECTION FL_TAINT
155 STATIC_ASSERT(marshal_infection_is_int, MARSHAL_INFECTION == (int)MARSHAL_INFECTION);
156 
157 struct dump_arg {
158  VALUE str, dest;
164 };
165 
168  struct dump_arg *arg;
169  int limit;
170 };
171 
172 static VALUE
173 check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
174 {
175  if (!arg->symbols) {
176  rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
177  name);
178  }
179  return ret;
180 }
181 
182 static VALUE
183 check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
184  struct dump_arg *arg, const char *name)
185 {
186  VALUE ret = rb_funcallv(obj, sym, argc, argv);
187  VALUE klass = CLASS_OF(obj);
188  if (CLASS_OF(ret) == klass) {
189  rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
190  klass, name);
191  }
192  return check_dump_arg(ret, arg, name);
193 }
194 
195 #define dump_funcall(arg, obj, sym, argc, argv) \
196  check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
197 #define dump_check_funcall(arg, obj, sym, argc, argv) \
198  check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
199 
200 static void clear_dump_arg(struct dump_arg *arg);
201 
202 static void
203 mark_dump_arg(void *ptr)
204 {
205  struct dump_arg *p = ptr;
206  if (!p->symbols)
207  return;
208  rb_mark_set(p->symbols);
209  rb_mark_set(p->data);
211  rb_gc_mark(p->str);
212 }
213 
214 static void
215 free_dump_arg(void *ptr)
216 {
217  clear_dump_arg(ptr);
218  xfree(ptr);
219 }
220 
221 static size_t
222 memsize_dump_arg(const void *ptr)
223 {
224  return sizeof(struct dump_arg);
225 }
226 
227 static const rb_data_type_t dump_arg_data = {
228  "dump_arg",
229  {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
231 };
232 
233 static VALUE
234 must_not_be_anonymous(const char *type, VALUE path)
235 {
236  char *n = RSTRING_PTR(path);
237 
238  if (!rb_enc_asciicompat(rb_enc_get(path))) {
239  /* cannot occur? */
240  rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
241  type, path);
242  }
243  if (n[0] == '#') {
244  rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
245  type, path);
246  }
247  return path;
248 }
249 
250 static VALUE
251 class2path(VALUE klass)
252 {
253  VALUE path = rb_class_path(klass);
254 
255  must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
256  if (rb_path_to_class(path) != rb_class_real(klass)) {
257  rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
258  }
259  return path;
260 }
261 
262 static void w_long(long, struct dump_arg*);
263 static void w_encoding(VALUE encname, struct dump_call_arg *arg);
264 static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
265 
266 static void
267 w_nbyte(const char *s, long n, struct dump_arg *arg)
268 {
269  VALUE buf = arg->str;
270  rb_str_buf_cat(buf, s, n);
271  RBASIC(buf)->flags |= arg->infection;
272  if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
273  rb_io_write(arg->dest, buf);
274  rb_str_resize(buf, 0);
275  }
276 }
277 
278 static void
279 w_byte(char c, struct dump_arg *arg)
280 {
281  w_nbyte(&c, 1, arg);
282 }
283 
284 static void
285 w_bytes(const char *s, long n, struct dump_arg *arg)
286 {
287  w_long(n, arg);
288  w_nbyte(s, n, arg);
289 }
290 
291 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
292 
293 static void
294 w_short(int x, struct dump_arg *arg)
295 {
296  w_byte((char)((x >> 0) & 0xff), arg);
297  w_byte((char)((x >> 8) & 0xff), arg);
298 }
299 
300 static void
301 w_long(long x, struct dump_arg *arg)
302 {
303  char buf[sizeof(long)+1];
304  int i;
305 
306 #if SIZEOF_LONG > 4
307  if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
308  /* big long does not fit in 4 bytes */
309  rb_raise(rb_eTypeError, "long too big to dump");
310  }
311 #endif
312 
313  if (x == 0) {
314  w_byte(0, arg);
315  return;
316  }
317  if (0 < x && x < 123) {
318  w_byte((char)(x + 5), arg);
319  return;
320  }
321  if (-124 < x && x < 0) {
322  w_byte((char)((x - 5)&0xff), arg);
323  return;
324  }
325  for (i=1;i<(int)sizeof(long)+1;i++) {
326  buf[i] = (char)(x & 0xff);
327  x = RSHIFT(x,8);
328  if (x == 0) {
329  buf[0] = i;
330  break;
331  }
332  if (x == -1) {
333  buf[0] = -i;
334  break;
335  }
336  }
337  w_nbyte(buf, i+1, arg);
338 }
339 
340 #ifdef DBL_MANT_DIG
341 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
342 
343 #if DBL_MANT_DIG > 32
344 #define MANT_BITS 32
345 #elif DBL_MANT_DIG > 24
346 #define MANT_BITS 24
347 #elif DBL_MANT_DIG > 16
348 #define MANT_BITS 16
349 #else
350 #define MANT_BITS 8
351 #endif
352 
353 static double
354 load_mantissa(double d, const char *buf, long len)
355 {
356  if (!len) return d;
357  if (--len > 0 && !*buf++) { /* binary mantissa mark */
358  int e, s = d < 0, dig = 0;
359  unsigned long m;
360 
361  modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
362  do {
363  m = 0;
364  switch (len) {
365  default: m = *buf++ & 0xff;
366 #if MANT_BITS > 24
367  case 3: m = (m << 8) | (*buf++ & 0xff);
368 #endif
369 #if MANT_BITS > 16
370  case 2: m = (m << 8) | (*buf++ & 0xff);
371 #endif
372 #if MANT_BITS > 8
373  case 1: m = (m << 8) | (*buf++ & 0xff);
374 #endif
375  }
376  dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
377  d += ldexp((double)m, dig);
378  } while ((len -= MANT_BITS / 8) > 0);
379  d = ldexp(d, e - DECIMAL_MANT);
380  if (s) d = -d;
381  }
382  return d;
383 }
384 #else
385 #define load_mantissa(d, buf, len) (d)
386 #endif
387 
388 #ifdef DBL_DIG
389 #define FLOAT_DIG (DBL_DIG+2)
390 #else
391 #define FLOAT_DIG 17
392 #endif
393 
394 static void
395 w_float(double d, struct dump_arg *arg)
396 {
397  char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
398 
399  if (isinf(d)) {
400  if (d < 0) w_cstr("-inf", arg);
401  else w_cstr("inf", arg);
402  }
403  else if (isnan(d)) {
404  w_cstr("nan", arg);
405  }
406  else if (d == 0.0) {
407  if (1.0/d < 0) w_cstr("-0", arg);
408  else w_cstr("0", arg);
409  }
410  else {
411  int decpt, sign, digs, len = 0;
412  char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
413  if (sign) buf[len++] = '-';
414  digs = (int)(e - p);
415  if (decpt < -3 || decpt > digs) {
416  buf[len++] = p[0];
417  if (--digs > 0) buf[len++] = '.';
418  memcpy(buf + len, p + 1, digs);
419  len += digs;
420  len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
421  }
422  else if (decpt > 0) {
423  memcpy(buf + len, p, decpt);
424  len += decpt;
425  if ((digs -= decpt) > 0) {
426  buf[len++] = '.';
427  memcpy(buf + len, p + decpt, digs);
428  len += digs;
429  }
430  }
431  else {
432  buf[len++] = '0';
433  buf[len++] = '.';
434  if (decpt) {
435  memset(buf + len, '0', -decpt);
436  len -= decpt;
437  }
438  memcpy(buf + len, p, digs);
439  len += digs;
440  }
441  xfree(p);
442  w_bytes(buf, len, arg);
443  }
444 }
445 
446 static void
447 w_symbol(VALUE sym, struct dump_arg *arg)
448 {
449  st_data_t num;
450  VALUE encname;
451 
452  if (st_lookup(arg->symbols, sym, &num)) {
453  w_byte(TYPE_SYMLINK, arg);
454  w_long((long)num, arg);
455  }
456  else {
457  const VALUE orig_sym = sym;
458  sym = rb_sym2str(sym);
459  if (!sym) {
460  rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
461  }
462  encname = encoding_name(sym, arg);
463  if (NIL_P(encname) ||
465  encname = Qnil;
466  }
467  else {
468  w_byte(TYPE_IVAR, arg);
469  }
470  w_byte(TYPE_SYMBOL, arg);
471  w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
472  st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
473  if (!NIL_P(encname)) {
474  struct dump_call_arg c_arg;
475  c_arg.limit = 1;
476  c_arg.arg = arg;
477  w_long(1L, arg);
478  w_encoding(encname, &c_arg);
479  }
480  }
481 }
482 
483 static void
484 w_unique(VALUE s, struct dump_arg *arg)
485 {
486  must_not_be_anonymous("class", s);
487  w_symbol(rb_str_intern(s), arg);
488 }
489 
490 static void w_object(VALUE,struct dump_arg*,int);
491 
492 static int
493 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
494 {
495  w_object(key, arg->arg, arg->limit);
496  w_object(value, arg->arg, arg->limit);
497  return ST_CONTINUE;
498 }
499 
500 #define SINGLETON_DUMP_UNABLE_P(klass) \
501  (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
502  (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
503 
504 static void
505 w_extended(VALUE klass, struct dump_arg *arg, int check)
506 {
507  if (check && FL_TEST(klass, FL_SINGLETON)) {
508  VALUE origin = RCLASS_ORIGIN(klass);
509  if (SINGLETON_DUMP_UNABLE_P(klass) ||
510  (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
511  rb_raise(rb_eTypeError, "singleton can't be dumped");
512  }
513  klass = RCLASS_SUPER(klass);
514  }
515  while (BUILTIN_TYPE(klass) == T_ICLASS) {
516  VALUE path = rb_class_name(RBASIC(klass)->klass);
517  w_byte(TYPE_EXTENDED, arg);
518  w_unique(path, arg);
519  klass = RCLASS_SUPER(klass);
520  }
521 }
522 
523 static void
524 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
525 {
526  VALUE path;
527  st_data_t real_obj;
528  VALUE klass;
529 
530  if (arg->compat_tbl &&
531  st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
532  obj = (VALUE)real_obj;
533  }
534  klass = CLASS_OF(obj);
535  w_extended(klass, arg, check);
536  w_byte(type, arg);
537  path = class2path(rb_class_real(klass));
538  w_unique(path, arg);
539 }
540 
541 static void
542 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
543 {
544  VALUE klass = CLASS_OF(obj);
545 
546  w_extended(klass, arg, TRUE);
547  klass = rb_class_real(klass);
548  if (klass != super) {
549  w_byte(TYPE_UCLASS, arg);
550  w_unique(class2path(klass), arg);
551  }
552 }
553 
554 #define to_be_skipped_id(id) (id == rb_id_encoding() || id == rb_intern("E") || !rb_id2str(id))
555 
556 static int
557 w_obj_each(st_data_t key, st_data_t val, st_data_t a)
558 {
559  ID id = (ID)key;
560  VALUE value = (VALUE)val;
561  struct dump_call_arg *arg = (struct dump_call_arg *)a;
562 
563  if (to_be_skipped_id(id)) return ST_CONTINUE;
564  w_symbol(ID2SYM(id), arg->arg);
565  w_object(value, arg->arg, arg->limit);
566  return ST_CONTINUE;
567 }
568 
569 static int
570 obj_count_ivars(st_data_t key, st_data_t val, st_data_t a)
571 {
572  ID id = (ID)key;
573  if (!to_be_skipped_id(id)) ++*(st_index_t *)a;
574  return ST_CONTINUE;
575 }
576 
577 static VALUE
578 encoding_name(VALUE obj, struct dump_arg *arg)
579 {
580  int encidx = rb_enc_get_index(obj);
581  rb_encoding *enc = 0;
582  st_data_t name;
583 
584  if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
585  return Qnil;
586  }
587 
588  /* special treatment for US-ASCII and UTF-8 */
589  if (encidx == rb_usascii_encindex()) {
590  return Qfalse;
591  }
592  else if (encidx == rb_utf8_encindex()) {
593  return Qtrue;
594  }
595 
596  if (arg->encodings ?
597  !st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
598  (arg->encodings = st_init_strcasetable(), 1)) {
599  name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
600  st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
601  }
602  return (VALUE)name;
603 }
604 
605 static void
606 w_encoding(VALUE encname, struct dump_call_arg *arg)
607 {
608  int limit = arg->limit;
609  if (limit >= 0) ++limit;
610  switch (encname) {
611  case Qfalse:
612  case Qtrue:
613  w_symbol(ID2SYM(rb_intern("E")), arg->arg);
614  w_object(encname, arg->arg, limit);
615  case Qnil:
616  return;
617  }
618  w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
619  w_object(encname, arg->arg, limit);
620 }
621 
622 static st_index_t
623 has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
624 {
625  st_index_t enc = !NIL_P(encname);
626  st_index_t num = 0;
627 
628  if (SPECIAL_CONST_P(obj)) goto generic;
629  switch (BUILTIN_TYPE(obj)) {
630  case T_OBJECT:
631  case T_CLASS:
632  case T_MODULE:
633  break; /* counted elsewhere */
634  default:
635  generic:
636  rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
637  if (num) *ivobj = obj;
638  }
639 
640  return num + enc;
641 }
642 
643 static void
644 w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
645 {
646  w_long(num, arg->arg);
647  w_encoding(encname, arg);
648  if (ivobj != Qundef) {
649  rb_ivar_foreach(ivobj, w_obj_each, (st_data_t)arg);
650  }
651 }
652 
653 static void
654 w_objivar(VALUE obj, struct dump_call_arg *arg)
655 {
656  st_data_t num = 0;
657 
658  rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
659  w_long(num, arg->arg);
660  if (num != 0) {
661  rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
662  }
663 }
664 
665 static void
666 w_object(VALUE obj, struct dump_arg *arg, int limit)
667 {
668  struct dump_call_arg c_arg;
669  VALUE ivobj = Qundef;
670  st_data_t num;
671  st_index_t hasiv = 0;
672  VALUE encname = Qnil;
673 
674  if (limit == 0) {
675  rb_raise(rb_eArgError, "exceed depth limit");
676  }
677 
678  if (limit > 0) limit--;
679  c_arg.limit = limit;
680  c_arg.arg = arg;
681 
682  if (st_lookup(arg->data, obj, &num)) {
683  w_byte(TYPE_LINK, arg);
684  w_long((long)num, arg);
685  return;
686  }
687 
688  if (obj == Qnil) {
689  w_byte(TYPE_NIL, arg);
690  }
691  else if (obj == Qtrue) {
692  w_byte(TYPE_TRUE, arg);
693  }
694  else if (obj == Qfalse) {
695  w_byte(TYPE_FALSE, arg);
696  }
697  else if (FIXNUM_P(obj)) {
698 #if SIZEOF_LONG <= 4
699  w_byte(TYPE_FIXNUM, arg);
700  w_long(FIX2INT(obj), arg);
701 #else
702  if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
703  w_byte(TYPE_FIXNUM, arg);
704  w_long(FIX2LONG(obj), arg);
705  }
706  else {
707  w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
708  }
709 #endif
710  }
711  else if (SYMBOL_P(obj)) {
712  w_symbol(obj, arg);
713  }
714  else if (FLONUM_P(obj)) {
715  st_add_direct(arg->data, obj, arg->data->num_entries);
716  w_byte(TYPE_FLOAT, arg);
717  w_float(RFLOAT_VALUE(obj), arg);
718  }
719  else {
720  VALUE v;
721 
722  if (!RBASIC_CLASS(obj)) {
723  rb_raise(rb_eTypeError, "can't dump internal %s",
725  }
726 
727  arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
728 
729  if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
730  st_add_direct(arg->data, obj, arg->data->num_entries);
731 
732  v = dump_funcall(arg, obj, s_mdump, 0, 0);
733  w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
734  w_object(v, arg, limit);
735  return;
736  }
737  if (rb_obj_respond_to(obj, s_dump, TRUE)) {
738  VALUE ivobj2 = Qundef;
739  st_index_t hasiv2;
740  VALUE encname2;
741 
742  v = INT2NUM(limit);
743  v = dump_funcall(arg, obj, s_dump, 1, &v);
744  if (!RB_TYPE_P(v, T_STRING)) {
745  rb_raise(rb_eTypeError, "_dump() must return string");
746  }
747  hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
748  hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
749  if (hasiv2) {
750  hasiv = hasiv2;
751  ivobj = ivobj2;
752  encname = encname2;
753  }
754  if (hasiv) w_byte(TYPE_IVAR, arg);
755  w_class(TYPE_USERDEF, obj, arg, FALSE);
756  w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
757  if (hasiv) {
758  w_ivar(hasiv, ivobj, encname, &c_arg);
759  }
760  st_add_direct(arg->data, obj, arg->data->num_entries);
761  return;
762  }
763 
764  st_add_direct(arg->data, obj, arg->data->num_entries);
765 
766  hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
767  {
768  st_data_t compat_data;
769  rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
770  if (st_lookup(compat_allocator_tbl,
771  (st_data_t)allocator,
772  &compat_data)) {
773  marshal_compat_t *compat = (marshal_compat_t*)compat_data;
774  VALUE real_obj = obj;
775  obj = compat->dumper(real_obj);
776  if (!arg->compat_tbl) {
778  }
779  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
780  if (obj != real_obj && ivobj == Qundef) hasiv = 0;
781  }
782  }
783  if (hasiv) w_byte(TYPE_IVAR, arg);
784 
785  switch (BUILTIN_TYPE(obj)) {
786  case T_CLASS:
787  if (FL_TEST(obj, FL_SINGLETON)) {
788  rb_raise(rb_eTypeError, "singleton class can't be dumped");
789  }
790  w_byte(TYPE_CLASS, arg);
791  {
792  VALUE path = class2path(obj);
793  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
794  RB_GC_GUARD(path);
795  }
796  break;
797 
798  case T_MODULE:
799  w_byte(TYPE_MODULE, arg);
800  {
801  VALUE path = class2path(obj);
802  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
803  RB_GC_GUARD(path);
804  }
805  break;
806 
807  case T_FLOAT:
808  w_byte(TYPE_FLOAT, arg);
809  w_float(RFLOAT_VALUE(obj), arg);
810  break;
811 
812  case T_BIGNUM:
813  w_byte(TYPE_BIGNUM, arg);
814  {
815  char sign = BIGNUM_SIGN(obj) ? '+' : '-';
816  size_t len = BIGNUM_LEN(obj);
817  size_t slen;
818  BDIGIT *d = BIGNUM_DIGITS(obj);
819 
820  slen = SHORTLEN(len);
821  if (LONG_MAX < slen) {
822  rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
823  }
824 
825  w_byte(sign, arg);
826  w_long((long)slen, arg);
827  while (len--) {
828 #if SIZEOF_BDIGIT > SIZEOF_SHORT
829  BDIGIT num = *d;
830  int i;
831 
832  for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
833  w_short(num & SHORTMASK, arg);
834  num = SHORTDN(num);
835  if (len == 0 && num == 0) break;
836  }
837 #else
838  w_short(*d, arg);
839 #endif
840  d++;
841  }
842  }
843  break;
844 
845  case T_STRING:
846  w_uclass(obj, rb_cString, arg);
847  w_byte(TYPE_STRING, arg);
848  w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
849  break;
850 
851  case T_REGEXP:
852  w_uclass(obj, rb_cRegexp, arg);
853  w_byte(TYPE_REGEXP, arg);
854  {
855  int opts = rb_reg_options(obj);
856  w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
857  w_byte((char)opts, arg);
858  }
859  break;
860 
861  case T_ARRAY:
862  w_uclass(obj, rb_cArray, arg);
863  w_byte(TYPE_ARRAY, arg);
864  {
865  long i, len = RARRAY_LEN(obj);
866 
867  w_long(len, arg);
868  for (i=0; i<RARRAY_LEN(obj); i++) {
869  w_object(RARRAY_AREF(obj, i), arg, limit);
870  if (len != RARRAY_LEN(obj)) {
871  rb_raise(rb_eRuntimeError, "array modified during dump");
872  }
873  }
874  }
875  break;
876 
877  case T_HASH:
878  w_uclass(obj, rb_cHash, arg);
879  if (NIL_P(RHASH_IFNONE(obj))) {
880  w_byte(TYPE_HASH, arg);
881  }
882  else if (FL_TEST(obj, HASH_PROC_DEFAULT)) {
883  rb_raise(rb_eTypeError, "can't dump hash with default proc");
884  }
885  else {
886  w_byte(TYPE_HASH_DEF, arg);
887  }
888  w_long(RHASH_SIZE(obj), arg);
889  rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
890  if (!NIL_P(RHASH_IFNONE(obj))) {
891  w_object(RHASH_IFNONE(obj), arg, limit);
892  }
893  break;
894 
895  case T_STRUCT:
896  w_class(TYPE_STRUCT, obj, arg, TRUE);
897  {
898  long len = RSTRUCT_LEN(obj);
899  VALUE mem;
900  long i;
901 
902  w_long(len, arg);
903  mem = rb_struct_members(obj);
904  for (i=0; i<len; i++) {
905  w_symbol(RARRAY_AREF(mem, i), arg);
906  w_object(RSTRUCT_GET(obj, i), arg, limit);
907  }
908  }
909  break;
910 
911  case T_OBJECT:
912  w_class(TYPE_OBJECT, obj, arg, TRUE);
913  w_objivar(obj, &c_arg);
914  break;
915 
916  case T_DATA:
917  {
918  VALUE v;
919 
920  if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
922  "no _dump_data is defined for class %"PRIsVALUE,
923  rb_obj_class(obj));
924  }
925  v = dump_funcall(arg, obj, s_dump_data, 0, 0);
926  w_class(TYPE_DATA, obj, arg, TRUE);
927  w_object(v, arg, limit);
928  }
929  break;
930 
931  default:
932  rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
933  rb_obj_class(obj));
934  break;
935  }
936  RB_GC_GUARD(obj);
937  }
938  if (hasiv) {
939  w_ivar(hasiv, ivobj, encname, &c_arg);
940  }
941 }
942 
943 static void
944 clear_dump_arg(struct dump_arg *arg)
945 {
946  if (!arg->symbols) return;
947  st_free_table(arg->symbols);
948  arg->symbols = 0;
949  st_free_table(arg->data);
950  arg->data = 0;
951  if (arg->compat_tbl) {
953  arg->compat_tbl = 0;
954  }
955  if (arg->encodings) {
956  st_free_table(arg->encodings);
957  arg->encodings = 0;
958  }
959 }
960 
961 NORETURN(static inline void io_needed(void));
962 static inline void
963 io_needed(void)
964 {
965  rb_raise(rb_eTypeError, "instance of IO needed");
966 }
967 
968 /*
969  * call-seq:
970  * dump( obj [, anIO] , limit=-1 ) -> anIO
971  *
972  * Serializes obj and all descendant objects. If anIO is
973  * specified, the serialized data will be written to it, otherwise the
974  * data will be returned as a String. If limit is specified, the
975  * traversal of subobjects will be limited to that depth. If limit is
976  * negative, no checking of depth will be performed.
977  *
978  * class Klass
979  * def initialize(str)
980  * @str = str
981  * end
982  * def say_hello
983  * @str
984  * end
985  * end
986  *
987  * (produces no output)
988  *
989  * o = Klass.new("hello\n")
990  * data = Marshal.dump(o)
991  * obj = Marshal.load(data)
992  * obj.say_hello #=> "hello\n"
993  *
994  * Marshal can't dump following objects:
995  * * anonymous Class/Module.
996  * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
997  * and so on)
998  * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
999  * ThreadGroup, Continuation
1000  * * objects which define singleton methods
1001  */
1002 static VALUE
1003 marshal_dump(int argc, VALUE *argv)
1004 {
1005  VALUE obj, port, a1, a2;
1006  int limit = -1;
1007 
1008  port = Qnil;
1009  rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1010  if (argc == 3) {
1011  if (!NIL_P(a2)) limit = NUM2INT(a2);
1012  if (NIL_P(a1)) io_needed();
1013  port = a1;
1014  }
1015  else if (argc == 2) {
1016  if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1017  else if (NIL_P(a1)) io_needed();
1018  else port = a1;
1019  }
1020  return rb_marshal_dump_limited(obj, port, limit);
1021 }
1022 
1023 VALUE
1024 rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1025 {
1026  struct dump_arg *arg;
1027  VALUE wrapper; /* used to avoid memory leak in case of exception */
1028 
1029  wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1030  arg->dest = 0;
1031  arg->symbols = st_init_numtable();
1032  arg->data = rb_init_identtable();
1033  arg->infection = 0;
1034  arg->compat_tbl = 0;
1035  arg->encodings = 0;
1036  arg->str = rb_str_buf_new(0);
1037  if (!NIL_P(port)) {
1038  if (!rb_respond_to(port, s_write)) {
1039  io_needed();
1040  }
1041  arg->dest = port;
1042  dump_check_funcall(arg, port, s_binmode, 0, 0);
1043  }
1044  else {
1045  port = arg->str;
1046  }
1047 
1048  w_byte(MARSHAL_MAJOR, arg);
1049  w_byte(MARSHAL_MINOR, arg);
1050 
1051  w_object(obj, arg, limit);
1052  if (arg->dest) {
1053  rb_io_write(arg->dest, arg->str);
1054  rb_str_resize(arg->str, 0);
1055  }
1056  clear_dump_arg(arg);
1057  RB_GC_GUARD(wrapper);
1058 
1059  return port;
1060 }
1061 
1062 struct load_arg {
1064  char *buf;
1065  long buflen;
1066  long readable;
1067  long offset;
1073 };
1074 
1075 static VALUE
1076 check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1077 {
1078  if (!arg->symbols) {
1079  rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1080  name);
1081  }
1082  return ret;
1083 }
1084 #define load_funcall(arg, obj, sym, argc, argv) \
1085  check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1086 
1087 static void clear_load_arg(struct load_arg *arg);
1088 
1089 static void
1090 mark_load_arg(void *ptr)
1091 {
1092  struct load_arg *p = ptr;
1093  if (!p->symbols)
1094  return;
1095  rb_mark_tbl(p->symbols);
1096  rb_mark_tbl(p->data);
1098 }
1099 
1100 static void
1101 free_load_arg(void *ptr)
1102 {
1103  clear_load_arg(ptr);
1104  xfree(ptr);
1105 }
1106 
1107 static size_t
1108 memsize_load_arg(const void *ptr)
1109 {
1110  return sizeof(struct load_arg);
1111 }
1112 
1113 static const rb_data_type_t load_arg_data = {
1114  "load_arg",
1115  {mark_load_arg, free_load_arg, memsize_load_arg,},
1116  0, 0, RUBY_TYPED_FREE_IMMEDIATELY
1117 };
1118 
1119 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1120 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
1121 static VALUE r_object(struct load_arg *arg);
1122 static VALUE r_symbol(struct load_arg *arg);
1123 static VALUE path2class(VALUE path);
1124 
1125 NORETURN(static void too_short(void));
1126 static void
1127 too_short(void)
1128 {
1129  rb_raise(rb_eArgError, "marshal data too short");
1130 }
1131 
1132 static st_index_t
1133 r_prepare(struct load_arg *arg)
1134 {
1135  st_index_t idx = arg->data->num_entries;
1136 
1137  st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1138  return idx;
1139 }
1140 
1141 static unsigned char
1142 r_byte1_buffered(struct load_arg *arg)
1143 {
1144  if (arg->buflen == 0) {
1145  long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1146  VALUE str, n = LONG2NUM(readable);
1147 
1148  str = load_funcall(arg, arg->src, s_read, 1, &n);
1149  if (NIL_P(str)) too_short();
1150  StringValue(str);
1151  arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
1152  memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1153  arg->offset = 0;
1154  arg->buflen = RSTRING_LEN(str);
1155  }
1156  arg->buflen--;
1157  return arg->buf[arg->offset++];
1158 }
1159 
1160 static int
1161 r_byte(struct load_arg *arg)
1162 {
1163  int c;
1164 
1165  if (RB_TYPE_P(arg->src, T_STRING)) {
1166  if (RSTRING_LEN(arg->src) > arg->offset) {
1167  c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1168  }
1169  else {
1170  too_short();
1171  }
1172  }
1173  else {
1174  if (arg->readable >0 || arg->buflen > 0) {
1175  c = r_byte1_buffered(arg);
1176  }
1177  else {
1178  VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1179  if (NIL_P(v)) rb_eof_error();
1180  c = (unsigned char)NUM2CHR(v);
1181  }
1182  }
1183  return c;
1184 }
1185 
1186 static void
1187 long_toobig(int size)
1188 {
1189  rb_raise(rb_eTypeError, "long too big for this architecture (size "
1190  STRINGIZE(SIZEOF_LONG)", given %d)", size);
1191 }
1192 
1193 static long
1194 r_long(struct load_arg *arg)
1195 {
1196  register long x;
1197  int c = (signed char)r_byte(arg);
1198  long i;
1199 
1200  if (c == 0) return 0;
1201  if (c > 0) {
1202  if (4 < c && c < 128) {
1203  return c - 5;
1204  }
1205  if (c > (int)sizeof(long)) long_toobig(c);
1206  x = 0;
1207  for (i=0;i<c;i++) {
1208  x |= (long)r_byte(arg) << (8*i);
1209  }
1210  }
1211  else {
1212  if (-129 < c && c < -4) {
1213  return c + 5;
1214  }
1215  c = -c;
1216  if (c > (int)sizeof(long)) long_toobig(c);
1217  x = -1;
1218  for (i=0;i<c;i++) {
1219  x &= ~((long)0xff << (8*i));
1220  x |= (long)r_byte(arg) << (8*i);
1221  }
1222  }
1223  return x;
1224 }
1225 
1226 static VALUE
1227 r_bytes1(long len, struct load_arg *arg)
1228 {
1229  VALUE str, n = LONG2NUM(len);
1230 
1231  str = load_funcall(arg, arg->src, s_read, 1, &n);
1232  if (NIL_P(str)) too_short();
1233  StringValue(str);
1234  if (RSTRING_LEN(str) != len) too_short();
1235  arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
1236 
1237  return str;
1238 }
1239 
1240 static VALUE
1241 r_bytes1_buffered(long len, struct load_arg *arg)
1242 {
1243  VALUE str;
1244 
1245  if (len <= arg->buflen) {
1246  str = rb_str_new(arg->buf+arg->offset, len);
1247  arg->offset += len;
1248  arg->buflen -= len;
1249  }
1250  else {
1251  long buflen = arg->buflen;
1252  long readable = arg->readable + 1;
1253  long tmp_len, read_len, need_len = len - buflen;
1254  VALUE tmp, n;
1255 
1256  readable = readable < BUFSIZ ? readable : BUFSIZ;
1257  read_len = need_len > readable ? need_len : readable;
1258  n = LONG2NUM(read_len);
1259  tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1260  if (NIL_P(tmp)) too_short();
1261  StringValue(tmp);
1262 
1263  tmp_len = RSTRING_LEN(tmp);
1264 
1265  if (tmp_len < need_len) too_short();
1266  arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
1267 
1268  str = rb_str_new(arg->buf+arg->offset, buflen);
1269  rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1270 
1271  if (tmp_len > need_len) {
1272  buflen = tmp_len - need_len;
1273  memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1274  arg->buflen = buflen;
1275  }
1276  else {
1277  arg->buflen = 0;
1278  }
1279  arg->offset = 0;
1280  }
1281 
1282  return str;
1283 }
1284 
1285 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1286 
1287 static VALUE
1288 r_bytes0(long len, struct load_arg *arg)
1289 {
1290  VALUE str;
1291 
1292  if (len == 0) return rb_str_new(0, 0);
1293  if (RB_TYPE_P(arg->src, T_STRING)) {
1294  if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1295  str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1296  arg->offset += len;
1297  }
1298  else {
1299  too_short();
1300  }
1301  }
1302  else {
1303  if (arg->readable > 0 || arg->buflen > 0) {
1304  str = r_bytes1_buffered(len, arg);
1305  }
1306  else {
1307  str = r_bytes1(len, arg);
1308  }
1309  }
1310  return str;
1311 }
1312 
1313 static int
1314 sym2encidx(VALUE sym, VALUE val)
1315 {
1316  static const char name_encoding[8] = "encoding";
1317  const char *p;
1318  long l;
1319  if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1320  RSTRING_GETMEM(sym, p, l);
1321  if (l <= 0) return -1;
1322  if (l == sizeof(name_encoding) &&
1323  memcmp(p, name_encoding, sizeof(name_encoding)) == 0) {
1324  int idx = rb_enc_find_index(StringValueCStr(val));
1325  return idx;
1326  }
1327  else if (l == 1 && *p == 'E') {
1328  if (val == Qfalse) return rb_usascii_encindex();
1329  else if (val == Qtrue) return rb_utf8_encindex();
1330  /* bogus ignore */
1331  }
1332  return -1;
1333 }
1334 
1335 static VALUE
1336 r_symlink(struct load_arg *arg)
1337 {
1338  st_data_t sym;
1339  long num = r_long(arg);
1340 
1341  if (!st_lookup(arg->symbols, num, &sym)) {
1342  rb_raise(rb_eArgError, "bad symbol");
1343  }
1344  return (VALUE)sym;
1345 }
1346 
1347 static VALUE
1348 r_symreal(struct load_arg *arg, int ivar)
1349 {
1350  VALUE s = r_bytes(arg);
1351  VALUE sym;
1352  int idx = -1;
1353  st_index_t n = arg->symbols->num_entries;
1354 
1356  st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1357  if (ivar) {
1358  long num = r_long(arg);
1359  while (num-- > 0) {
1360  sym = r_symbol(arg);
1361  idx = sym2encidx(sym, r_object(arg));
1362  }
1363  }
1364  if (idx > 0) rb_enc_associate_index(s, idx);
1365 
1366  return s;
1367 }
1368 
1369 static VALUE
1370 r_symbol(struct load_arg *arg)
1371 {
1372  int type, ivar = 0;
1373 
1374  again:
1375  switch ((type = r_byte(arg))) {
1376  default:
1377  rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1378  case TYPE_IVAR:
1379  ivar = 1;
1380  goto again;
1381  case TYPE_SYMBOL:
1382  return r_symreal(arg, ivar);
1383  case TYPE_SYMLINK:
1384  if (ivar) {
1385  rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1386  }
1387  return r_symlink(arg);
1388  }
1389 }
1390 
1391 static VALUE
1392 r_unique(struct load_arg *arg)
1393 {
1394  return r_symbol(arg);
1395 }
1396 
1397 static VALUE
1398 r_string(struct load_arg *arg)
1399 {
1400  return r_bytes(arg);
1401 }
1402 
1403 static VALUE
1404 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1405 {
1406  st_data_t real_obj = (VALUE)Qundef;
1407  if (arg->compat_tbl && st_lookup(arg->compat_tbl, v, &real_obj)) {
1408  st_insert(arg->data, num, (st_data_t)real_obj);
1409  }
1410  else {
1411  st_insert(arg->data, num, (st_data_t)v);
1412  }
1413  if (arg->infection &&
1414  !RB_TYPE_P(v, T_CLASS) && !RB_TYPE_P(v, T_MODULE)) {
1415  OBJ_TAINT(v);
1416  if ((VALUE)real_obj != Qundef)
1417  OBJ_TAINT((VALUE)real_obj);
1418  }
1419  return v;
1420 }
1421 
1422 static VALUE
1423 r_fixup_compat(VALUE v, struct load_arg *arg)
1424 {
1425  st_data_t data;
1426  st_data_t key = (st_data_t)v;
1427  if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1428  VALUE real_obj = (VALUE)data;
1429  rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1430  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1431  marshal_compat_t *compat = (marshal_compat_t*)data;
1432  compat->loader(real_obj, v);
1433  }
1434  v = real_obj;
1435  }
1436  return v;
1437 }
1438 
1439 static VALUE
1440 r_post_proc(VALUE v, struct load_arg *arg)
1441 {
1442  if (arg->proc) {
1443  v = load_funcall(arg, arg->proc, s_call, 1, &v);
1444  }
1445  return v;
1446 }
1447 
1448 static VALUE
1449 r_leave(VALUE v, struct load_arg *arg)
1450 {
1451  v = r_fixup_compat(v, arg);
1452  v = r_post_proc(v, arg);
1453  return v;
1454 }
1455 
1456 static int
1457 copy_ivar_i(st_data_t key, st_data_t val, st_data_t arg)
1458 {
1459  VALUE obj = (VALUE)arg, value = (VALUE)val;
1460  ID vid = (ID)key;
1461 
1462  if (!rb_ivar_defined(obj, vid))
1463  rb_ivar_set(obj, vid, value);
1464  return ST_CONTINUE;
1465 }
1466 
1467 static VALUE
1468 r_copy_ivar(VALUE v, VALUE data)
1469 {
1470  rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1471  return v;
1472 }
1473 
1474 static void
1475 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1476 {
1477  long len;
1478 
1479  len = r_long(arg);
1480  if (len > 0) {
1481  do {
1482  VALUE sym = r_symbol(arg);
1483  VALUE val = r_object(arg);
1484  int idx = sym2encidx(sym, val);
1485  if (idx >= 0) {
1486  rb_enc_associate_index(obj, idx);
1487  if (has_encoding) *has_encoding = TRUE;
1488  }
1489  else {
1490  rb_ivar_set(obj, rb_intern_str(sym), val);
1491  }
1492  } while (--len > 0);
1493  }
1494 }
1495 
1496 static VALUE
1497 path2class(VALUE path)
1498 {
1499  VALUE v = rb_path_to_class(path);
1500 
1501  if (!RB_TYPE_P(v, T_CLASS)) {
1502  rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1503  }
1504  return v;
1505 }
1506 
1507 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1508 
1509 static VALUE
1510 must_be_module(VALUE v, VALUE path)
1511 {
1512  if (!RB_TYPE_P(v, T_MODULE)) {
1513  rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1514  }
1515  return v;
1516 }
1517 
1518 static VALUE
1519 obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1520 {
1521  st_data_t data;
1522  rb_alloc_func_t allocator;
1523 
1524  allocator = rb_get_alloc_func(klass);
1525  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1526  marshal_compat_t *compat = (marshal_compat_t*)data;
1527  VALUE real_obj = rb_obj_alloc(klass);
1528  VALUE obj = rb_obj_alloc(compat->oldclass);
1529  if (oldclass) *oldclass = compat->oldclass;
1530 
1531  if (!arg->compat_tbl) {
1532  arg->compat_tbl = rb_init_identtable();
1533  }
1534  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1535  return obj;
1536  }
1537 
1538  return rb_obj_alloc(klass);
1539 }
1540 
1541 static VALUE
1542 obj_alloc_by_path(VALUE path, struct load_arg *arg)
1543 {
1544  return obj_alloc_by_klass(path2class(path), arg, 0);
1545 }
1546 
1547 static VALUE
1548 append_extmod(VALUE obj, VALUE extmod)
1549 {
1550  long i = RARRAY_LEN(extmod);
1551  while (i > 0) {
1552  VALUE m = RARRAY_AREF(extmod, --i);
1553  rb_extend_object(obj, m);
1554  }
1555  return obj;
1556 }
1557 
1558 #define prohibit_ivar(type, str) do { \
1559  if (!ivp || !*ivp) break; \
1560  rb_raise(rb_eTypeError, \
1561  "can't override instance variable of "type" `%"PRIsVALUE"'", \
1562  (str)); \
1563  } while (0)
1564 
1565 static VALUE
1566 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
1567 {
1568  VALUE v = Qnil;
1569  int type = r_byte(arg);
1570  long id;
1571  st_data_t link;
1572 
1573  switch (type) {
1574  case TYPE_LINK:
1575  id = r_long(arg);
1576  if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1577  rb_raise(rb_eArgError, "dump format error (unlinked)");
1578  }
1579  v = (VALUE)link;
1580  v = r_post_proc(v, arg);
1581  break;
1582 
1583  case TYPE_IVAR:
1584  {
1585  int ivar = TRUE;
1586 
1587  v = r_object0(arg, &ivar, extmod);
1588  if (ivar) r_ivar(v, NULL, arg);
1589  }
1590  break;
1591 
1592  case TYPE_EXTENDED:
1593  {
1594  VALUE path = r_unique(arg);
1595  VALUE m = rb_path_to_class(path);
1596  if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0);
1597 
1598  if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1599  VALUE c;
1600 
1601  v = r_object0(arg, 0, Qnil);
1602  c = CLASS_OF(v);
1603  if (c != m || FL_TEST(c, FL_SINGLETON)) {
1605  "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1606  path, rb_class_name(c));
1607  }
1608  c = rb_singleton_class(v);
1609  while (RARRAY_LEN(extmod) > 0) {
1610  m = rb_ary_pop(extmod);
1611  rb_prepend_module(c, m);
1612  }
1613  }
1614  else {
1615  must_be_module(m, path);
1616  rb_ary_push(extmod, m);
1617 
1618  v = r_object0(arg, 0, extmod);
1619  while (RARRAY_LEN(extmod) > 0) {
1620  m = rb_ary_pop(extmod);
1621  rb_extend_object(v, m);
1622  }
1623  }
1624  }
1625  break;
1626 
1627  case TYPE_UCLASS:
1628  {
1629  VALUE c = path2class(r_unique(arg));
1630 
1631  if (FL_TEST(c, FL_SINGLETON)) {
1632  rb_raise(rb_eTypeError, "singleton can't be loaded");
1633  }
1634  v = r_object0(arg, 0, extmod);
1635  if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1636  format_error:
1637  rb_raise(rb_eArgError, "dump format error (user class)");
1638  }
1639  if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1640  VALUE tmp = rb_obj_alloc(c);
1641 
1642  if (TYPE(v) != TYPE(tmp)) goto format_error;
1643  }
1644  RBASIC_SET_CLASS(v, c);
1645  }
1646  break;
1647 
1648  case TYPE_NIL:
1649  v = Qnil;
1650  v = r_leave(v, arg);
1651  break;
1652 
1653  case TYPE_TRUE:
1654  v = Qtrue;
1655  v = r_leave(v, arg);
1656  break;
1657 
1658  case TYPE_FALSE:
1659  v = Qfalse;
1660  v = r_leave(v, arg);
1661  break;
1662 
1663  case TYPE_FIXNUM:
1664  {
1665  long i = r_long(arg);
1666  v = LONG2FIX(i);
1667  }
1668  v = r_leave(v, arg);
1669  break;
1670 
1671  case TYPE_FLOAT:
1672  {
1673  double d;
1674  VALUE str = r_bytes(arg);
1675  const char *ptr = RSTRING_PTR(str);
1676 
1677  if (strcmp(ptr, "nan") == 0) {
1678  d = NAN;
1679  }
1680  else if (strcmp(ptr, "inf") == 0) {
1681  d = INFINITY;
1682  }
1683  else if (strcmp(ptr, "-inf") == 0) {
1684  d = -INFINITY;
1685  }
1686  else {
1687  char *e;
1688  d = strtod(ptr, &e);
1689  d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1690  }
1691  v = DBL2NUM(d);
1692  v = r_entry(v, arg);
1693  v = r_leave(v, arg);
1694  }
1695  break;
1696 
1697  case TYPE_BIGNUM:
1698  {
1699  long len;
1700  VALUE data;
1701  int sign;
1702 
1703  sign = r_byte(arg);
1704  len = r_long(arg);
1705  data = r_bytes0(len * 2, arg);
1706  v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
1707  INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
1708  rb_str_resize(data, 0L);
1709  v = r_entry(v, arg);
1710  v = r_leave(v, arg);
1711  }
1712  break;
1713 
1714  case TYPE_STRING:
1715  v = r_entry(r_string(arg), arg);
1716  v = r_leave(v, arg);
1717  break;
1718 
1719  case TYPE_REGEXP:
1720  {
1721  VALUE str = r_bytes(arg);
1722  int options = r_byte(arg);
1723  int has_encoding = FALSE;
1724  st_index_t idx = r_prepare(arg);
1725 
1726  if (ivp) {
1727  r_ivar(str, &has_encoding, arg);
1728  *ivp = FALSE;
1729  }
1730  if (!has_encoding) {
1731  /* 1.8 compatibility; remove escapes undefined in 1.8 */
1732  char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
1733  long len = RSTRING_LEN(str);
1734  long bs = 0;
1735  for (; len-- > 0; *dst++ = *src++) {
1736  switch (*src) {
1737  case '\\': bs++; break;
1738  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1739  case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
1740  case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
1741  case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
1742  case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
1743  if (bs & 1) --dst;
1744  default: bs = 0; break;
1745  }
1746  }
1747  rb_str_set_len(str, dst - ptr);
1748  }
1749  v = r_entry0(rb_reg_new_str(str, options), idx, arg);
1750  v = r_leave(v, arg);
1751  }
1752  break;
1753 
1754  case TYPE_ARRAY:
1755  {
1756  long len = r_long(arg);
1757 
1758  v = rb_ary_new2(len);
1759  v = r_entry(v, arg);
1760  arg->readable += len - 1;
1761  while (len--) {
1762  rb_ary_push(v, r_object(arg));
1763  arg->readable--;
1764  }
1765  v = r_leave(v, arg);
1766  arg->readable++;
1767  }
1768  break;
1769 
1770  case TYPE_HASH:
1771  case TYPE_HASH_DEF:
1772  {
1773  long len = r_long(arg);
1774 
1775  v = rb_hash_new_with_size(len);
1776  v = r_entry(v, arg);
1777  arg->readable += (len - 1) * 2;
1778  while (len--) {
1779  VALUE key = r_object(arg);
1780  VALUE value = r_object(arg);
1781  rb_hash_aset(v, key, value);
1782  arg->readable -= 2;
1783  }
1784  arg->readable += 2;
1785  if (type == TYPE_HASH_DEF) {
1786  RHASH_SET_IFNONE(v, r_object(arg));
1787  }
1788  v = r_leave(v, arg);
1789  }
1790  break;
1791 
1792  case TYPE_STRUCT:
1793  {
1794  VALUE mem, values;
1795  long i;
1796  VALUE slot;
1797  st_index_t idx = r_prepare(arg);
1798  VALUE klass = path2class(r_unique(arg));
1799  long len = r_long(arg);
1800 
1801  v = rb_obj_alloc(klass);
1802  if (!RB_TYPE_P(v, T_STRUCT)) {
1803  rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
1804  }
1805  mem = rb_struct_s_members(klass);
1806  if (RARRAY_LEN(mem) != len) {
1807  rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
1808  rb_class_name(klass));
1809  }
1810 
1811  arg->readable += (len - 1) * 2;
1812  v = r_entry0(v, idx, arg);
1813  values = rb_ary_new2(len);
1814  for (i=0; i<len; i++) {
1815  VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
1816  slot = r_symbol(arg);
1817 
1818  if (!rb_str_equal(n, slot)) {
1819  rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
1820  rb_class_name(klass),
1821  slot, n);
1822  }
1823  rb_ary_push(values, r_object(arg));
1824  arg->readable -= 2;
1825  }
1826  rb_struct_initialize(v, values);
1827  v = r_leave(v, arg);
1828  arg->readable += 2;
1829  }
1830  break;
1831 
1832  case TYPE_USERDEF:
1833  {
1834  VALUE name = r_unique(arg);
1835  VALUE klass = path2class(name);
1836  VALUE data;
1837  st_data_t d;
1838 
1839  if (!rb_obj_respond_to(klass, s_load, TRUE)) {
1840  rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method `_load'",
1841  name);
1842  }
1843  data = r_string(arg);
1844  if (ivp) {
1845  r_ivar(data, NULL, arg);
1846  *ivp = FALSE;
1847  }
1848  v = load_funcall(arg, klass, s_load, 1, &data);
1849  v = r_entry(v, arg);
1850  if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
1851  marshal_compat_t *compat = (marshal_compat_t*)d;
1852  v = compat->loader(klass, v);
1853  }
1854  v = r_post_proc(v, arg);
1855  }
1856  break;
1857 
1858  case TYPE_USRMARSHAL:
1859  {
1860  VALUE name = r_unique(arg);
1861  VALUE klass = path2class(name);
1862  VALUE oldclass = 0;
1863  VALUE data;
1864 
1865  v = obj_alloc_by_klass(klass, arg, &oldclass);
1866  if (!NIL_P(extmod)) {
1867  /* for the case marshal_load is overridden */
1868  append_extmod(v, extmod);
1869  }
1870  if (!rb_obj_respond_to(v, s_mload, TRUE)) {
1871  rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method `marshal_load'",
1872  name);
1873  }
1874  v = r_entry(v, arg);
1875  data = r_object(arg);
1876  load_funcall(arg, v, s_mload, 1, &data);
1877  v = r_fixup_compat(v, arg);
1878  v = r_copy_ivar(v, data);
1879  v = r_post_proc(v, arg);
1880  if (!NIL_P(extmod)) {
1881  if (oldclass) append_extmod(v, extmod);
1882  rb_ary_clear(extmod);
1883  }
1884  }
1885  break;
1886 
1887  case TYPE_OBJECT:
1888  {
1889  st_index_t idx = r_prepare(arg);
1890  v = obj_alloc_by_path(r_unique(arg), arg);
1891  if (!RB_TYPE_P(v, T_OBJECT)) {
1892  rb_raise(rb_eArgError, "dump format error");
1893  }
1894  v = r_entry0(v, idx, arg);
1895  r_ivar(v, NULL, arg);
1896  v = r_leave(v, arg);
1897  }
1898  break;
1899 
1900  case TYPE_DATA:
1901  {
1902  VALUE name = r_unique(arg);
1903  VALUE klass = path2class(name);
1904  VALUE oldclass = 0;
1905  VALUE r;
1906 
1907  v = obj_alloc_by_klass(klass, arg, &oldclass);
1908  if (!RB_TYPE_P(v, T_DATA)) {
1909  rb_raise(rb_eArgError, "dump format error");
1910  }
1911  v = r_entry(v, arg);
1912  if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
1914  "class %"PRIsVALUE" needs to have instance method `_load_data'",
1915  name);
1916  }
1917  r = r_object0(arg, 0, extmod);
1918  load_funcall(arg, v, s_load_data, 1, &r);
1919  v = r_leave(v, arg);
1920  }
1921  break;
1922 
1923  case TYPE_MODULE_OLD:
1924  {
1925  VALUE str = r_bytes(arg);
1926 
1927  v = rb_path_to_class(str);
1928  prohibit_ivar("class/module", str);
1929  v = r_entry(v, arg);
1930  v = r_leave(v, arg);
1931  }
1932  break;
1933 
1934  case TYPE_CLASS:
1935  {
1936  VALUE str = r_bytes(arg);
1937 
1938  v = path2class(str);
1939  prohibit_ivar("class", str);
1940  v = r_entry(v, arg);
1941  v = r_leave(v, arg);
1942  }
1943  break;
1944 
1945  case TYPE_MODULE:
1946  {
1947  VALUE str = r_bytes(arg);
1948 
1949  v = path2module(str);
1950  prohibit_ivar("module", str);
1951  v = r_entry(v, arg);
1952  v = r_leave(v, arg);
1953  }
1954  break;
1955 
1956  case TYPE_SYMBOL:
1957  if (ivp) {
1958  v = r_symreal(arg, *ivp);
1959  *ivp = FALSE;
1960  }
1961  else {
1962  v = r_symreal(arg, 0);
1963  }
1964  v = rb_str_intern(v);
1965  v = r_leave(v, arg);
1966  break;
1967 
1968  case TYPE_SYMLINK:
1969  v = rb_str_intern(r_symlink(arg));
1970  break;
1971 
1972  default:
1973  rb_raise(rb_eArgError, "dump format error(0x%x)", type);
1974  break;
1975  }
1976 
1977  if (v == Qundef) {
1978  rb_raise(rb_eArgError, "dump format error (bad link)");
1979  }
1980 
1981  return v;
1982 }
1983 
1984 static VALUE
1985 r_object(struct load_arg *arg)
1986 {
1987  return r_object0(arg, 0, Qnil);
1988 }
1989 
1990 static void
1991 clear_load_arg(struct load_arg *arg)
1992 {
1993  if (arg->buf) {
1994  xfree(arg->buf);
1995  arg->buf = 0;
1996  }
1997  arg->buflen = 0;
1998  arg->offset = 0;
1999  arg->readable = 0;
2000  if (!arg->symbols) return;
2001  st_free_table(arg->symbols);
2002  arg->symbols = 0;
2003  st_free_table(arg->data);
2004  arg->data = 0;
2005  if (arg->compat_tbl) {
2006  st_free_table(arg->compat_tbl);
2007  arg->compat_tbl = 0;
2008  }
2009 }
2010 
2011 /*
2012  * call-seq:
2013  * load( source [, proc] ) -> obj
2014  * restore( source [, proc] ) -> obj
2015  *
2016  * Returns the result of converting the serialized data in source into a
2017  * Ruby object (possibly with associated subordinate objects). source
2018  * may be either an instance of IO or an object that responds to
2019  * to_str. If proc is specified, each object will be passed to the proc, as the object
2020  * is being deserialized.
2021  *
2022  * Never pass untrusted data (including user supplied input) to this method.
2023  * Please see the overview for further details.
2024  */
2025 static VALUE
2026 marshal_load(int argc, VALUE *argv)
2027 {
2028  VALUE port, proc;
2029 
2030  rb_check_arity(argc, 1, 2);
2031  port = argv[0];
2032  proc = argc > 1 ? argv[1] : Qnil;
2033  return rb_marshal_load_with_proc(port, proc);
2034 }
2035 
2036 VALUE
2037 rb_marshal_load_with_proc(VALUE port, VALUE proc)
2038 {
2039  int major, minor, infection = 0;
2040  VALUE v;
2041  VALUE wrapper; /* used to avoid memory leak in case of exception */
2042  struct load_arg *arg;
2043 
2044  v = rb_check_string_type(port);
2045  if (!NIL_P(v)) {
2046  infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */
2047  port = v;
2048  }
2049  else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2050  rb_check_funcall(port, s_binmode, 0, 0);
2051  infection = (int)FL_TAINT;
2052  }
2053  else {
2054  io_needed();
2055  }
2056  wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2057  arg->infection = infection;
2058  arg->src = port;
2059  arg->offset = 0;
2060  arg->symbols = st_init_numtable();
2061  arg->data = rb_init_identtable();
2062  arg->compat_tbl = 0;
2063  arg->proc = 0;
2064  arg->readable = 0;
2065 
2066  if (NIL_P(v))
2067  arg->buf = xmalloc(BUFSIZ);
2068  else
2069  arg->buf = 0;
2070 
2071  major = r_byte(arg);
2072  minor = r_byte(arg);
2073  if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2074  clear_load_arg(arg);
2075  rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2076 \tformat version %d.%d required; %d.%d given",
2077  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2078  }
2079  if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2080  rb_warn("incompatible marshal file format (can be read)\n\
2081 \tformat version %d.%d required; %d.%d given",
2082  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2083  }
2084 
2085  if (!NIL_P(proc)) arg->proc = proc;
2086  v = r_object(arg);
2087  clear_load_arg(arg);
2088  RB_GC_GUARD(wrapper);
2089 
2090  return v;
2091 }
2092 
2093 /*
2094  * The marshaling library converts collections of Ruby objects into a
2095  * byte stream, allowing them to be stored outside the currently
2096  * active script. This data may subsequently be read and the original
2097  * objects reconstituted.
2098  *
2099  * Marshaled data has major and minor version numbers stored along
2100  * with the object information. In normal use, marshaling can only
2101  * load data written with the same major version number and an equal
2102  * or lower minor version number. If Ruby's ``verbose'' flag is set
2103  * (normally using -d, -v, -w, or --verbose) the major and minor
2104  * numbers must match exactly. Marshal versioning is independent of
2105  * Ruby's version numbers. You can extract the version by reading the
2106  * first two bytes of marshaled data.
2107  *
2108  * str = Marshal.dump("thing")
2109  * RUBY_VERSION #=> "1.9.0"
2110  * str[0].ord #=> 4
2111  * str[1].ord #=> 8
2112  *
2113  * Some objects cannot be dumped: if the objects to be dumped include
2114  * bindings, procedure or method objects, instances of class IO, or
2115  * singleton objects, a TypeError will be raised.
2116  *
2117  * If your class has special serialization needs (for example, if you
2118  * want to serialize in some specific format), or if it contains
2119  * objects that would otherwise not be serializable, you can implement
2120  * your own serialization strategy.
2121  *
2122  * There are two methods of doing this, your object can define either
2123  * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2124  * precedence over _dump if both are defined. marshal_dump may result in
2125  * smaller Marshal strings.
2126  *
2127  * == Security considerations
2128  *
2129  * By design, Marshal.load can deserialize almost any class loaded into the
2130  * Ruby process. In many cases this can lead to remote code execution if the
2131  * Marshal data is loaded from an untrusted source.
2132  *
2133  * As a result, Marshal.load is not suitable as a general purpose serialization
2134  * format and you should never unmarshal user supplied input or other untrusted
2135  * data.
2136  *
2137  * If you need to deserialize untrusted data, use JSON or another serialization
2138  * format that is only able to load simple, 'primitive' types such as String,
2139  * Array, Hash, etc. Never allow user input to specify arbitrary types to
2140  * deserialize into.
2141  *
2142  * == marshal_dump and marshal_load
2143  *
2144  * When dumping an object the method marshal_dump will be called.
2145  * marshal_dump must return a result containing the information necessary for
2146  * marshal_load to reconstitute the object. The result can be any object.
2147  *
2148  * When loading an object dumped using marshal_dump the object is first
2149  * allocated then marshal_load is called with the result from marshal_dump.
2150  * marshal_load must recreate the object from the information in the result.
2151  *
2152  * Example:
2153  *
2154  * class MyObj
2155  * def initialize name, version, data
2156  * @name = name
2157  * @version = version
2158  * @data = data
2159  * end
2160  *
2161  * def marshal_dump
2162  * [@name, @version]
2163  * end
2164  *
2165  * def marshal_load array
2166  * @name, @version = array
2167  * end
2168  * end
2169  *
2170  * == _dump and _load
2171  *
2172  * Use _dump and _load when you need to allocate the object you're restoring
2173  * yourself.
2174  *
2175  * When dumping an object the instance method _dump is called with an Integer
2176  * which indicates the maximum depth of objects to dump (a value of -1 implies
2177  * that you should disable depth checking). _dump must return a String
2178  * containing the information necessary to reconstitute the object.
2179  *
2180  * The class method _load should take a String and use it to return an object
2181  * of the same class.
2182  *
2183  * Example:
2184  *
2185  * class MyObj
2186  * def initialize name, version, data
2187  * @name = name
2188  * @version = version
2189  * @data = data
2190  * end
2191  *
2192  * def _dump level
2193  * [@name, @version].join ':'
2194  * end
2195  *
2196  * def self._load args
2197  * new(*args.split(':'))
2198  * end
2199  * end
2200  *
2201  * Since Marshal.dump outputs a string you can have _dump return a Marshal
2202  * string which is Marshal.loaded in _load for complex objects.
2203  */
2204 void
2206 {
2207 #undef rb_intern
2208 #define rb_intern(str) rb_intern_const(str)
2209 
2210  VALUE rb_mMarshal = rb_define_module("Marshal");
2211 #define set_id(sym) sym = rb_intern_const(name_##sym)
2212  set_id(s_dump);
2213  set_id(s_load);
2214  set_id(s_mdump);
2215  set_id(s_mload);
2216  set_id(s_dump_data);
2217  set_id(s_load_data);
2218  set_id(s_alloc);
2219  set_id(s_call);
2220  set_id(s_getbyte);
2221  set_id(s_read);
2222  set_id(s_write);
2223  set_id(s_binmode);
2224 
2225  rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2226  rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
2227  rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
2228 
2229  /* major version */
2230  rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2231  /* minor version */
2232  rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2233 }
2234 
2235 static st_table *
2236 compat_allocator_table(void)
2237 {
2238  if (compat_allocator_tbl) return compat_allocator_tbl;
2239  compat_allocator_tbl = st_init_numtable();
2240 #undef RUBY_UNTYPED_DATA_WARNING
2241 #define RUBY_UNTYPED_DATA_WARNING 0
2242  compat_allocator_tbl_wrapper =
2243  Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
2244  rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
2245  return compat_allocator_tbl;
2246 }
2247 
2248 VALUE
2250 {
2251  return rb_marshal_dump_limited(obj, port, -1);
2252 }
2253 
2254 VALUE
2256 {
2257  return rb_marshal_load_with_proc(port, Qnil);
2258 }
RUBY_EXTERN VALUE rb_cString
Definition: ruby.h:1927
#define T_OBJECT
Definition: ruby.h:491
#define ENCINDEX_US_ASCII
Definition: encindex.h:44
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:773
#define TYPE_OBJECT
Definition: marshal.c:64
RUBY_EXTERN VALUE rb_cData
Definition: ruby.h:1902
VALUE proc
Definition: marshal.c:1070
#define path2module(path)
Definition: marshal.c:1507
void rb_warn(const char *fmt,...)
Definition: error.c:246
char * buf
Definition: marshal.c:1064
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:968
int infection
Definition: marshal.c:163
#define RARRAY_LEN(a)
Definition: ruby.h:1019
#define FALSE
Definition: nkf.h:174
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1138
#define load_mantissa(d, buf, len)
Definition: marshal.c:385
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:3214
#define INT2NUM(x)
Definition: ruby.h:1538
Definition: st.h:79
#define BIGNUM_DIGITS(b)
Definition: internal.h:616
#define NUM2INT(x)
Definition: ruby.h:684
struct dump_arg * arg
Definition: marshal.c:168
#define FL_TAINT
Definition: ruby.h:1213
#define CLASS_OF(v)
Definition: ruby.h:453
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2284
#define T_MODULE
Definition: ruby.h:494
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:2746
VALUE rb_marshal_dump(VALUE obj, VALUE port)
Definition: marshal.c:2249
#define st_foreach
Definition: regint.h:186
#define Qtrue
Definition: ruby.h:437
#define SHORTMASK
Definition: marshal.c:32
VALUE rb_cHash
Definition: hash.c:82
#define BIGNUM_LEN(b)
Definition: internal.h:610
Definition: st.h:99
const int id
Definition: nkf.c:209
#define r_bytes(arg)
Definition: marshal.c:1285
#define TYPE_LINK
Definition: marshal.c:84
#define NAN
Definition: missing.h:155
#define FLOAT_DIG
Definition: marshal.c:391
#define rb_check_arity
Definition: intern.h:298
#define SHORTLEN(x)
Definition: marshal.c:36
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:924
#define TYPE_SYMLINK
Definition: marshal.c:81
int rb_usascii_encindex(void)
Definition: encoding.c:1344
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:544
VALUE rb_path_to_class(VALUE)
Definition: variable.c:390
void rb_str_set_len(VALUE, long)
Definition: string.c:2627
VALUE dest
Definition: marshal.c:158
#define RBASIC_SET_CLASS(obj, cls)
Definition: internal.h:1471
int rb_reg_options(VALUE)
Definition: re.c:3543
int rb_enc_str_coderange(VALUE)
Definition: string.c:621
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: ruby.h:984
VALUE rb_ary_clear(VALUE ary)
Definition: array.c:3501
#define RB_GC_GUARD(v)
Definition: ruby.h:552
#define T_HASH
Definition: ruby.h:499
VALUE rb_obj_alloc(VALUE)
Allocates an instance of klass.
Definition: object.c:2121
void rb_gc_mark(VALUE ptr)
Definition: gc.c:4464
#define T_ARRAY
Definition: ruby.h:498
st_data_t st_index_t
Definition: st.h:50
#define TYPE_UCLASS
Definition: marshal.c:63
#define SHORTDN(x)
Definition: marshal.c:33
#define st_delete
Definition: regint.h:182
#define st_lookup
Definition: regint.h:185
VALUE rb_io_write(VALUE, VALUE)
Definition: io.c:1510
ID rb_id_encoding(void)
Definition: encoding.c:753
#define TYPE_USERDEF
Definition: marshal.c:66
STATIC_ASSERT(marshal_infection_is_int, MARSHAL_INFECTION==(int) MARSHAL_INFECTION)
#define FIXNUM_P(f)
Definition: ruby.h:365
VALUE rb_hash_new_with_size(st_index_t size)
Definition: hash.c:430
#define FL_TEST(x, f)
Definition: ruby.h:1282
VALUE rb_ivar_defined(VALUE, ID)
Definition: variable.c:1374
#define RHASH_IFNONE(h)
Definition: ruby.h:1058
void rb_ivar_foreach(VALUE, int(*)(ANYARGS), st_data_t)
Definition: variable.c:1544
#define ENC_CODERANGE_7BIT
Definition: encoding.h:100
#define rb_ary_new2
Definition: intern.h:90
#define TYPE_IVAR
Definition: marshal.c:83
st_table * symbols
Definition: marshal.c:1068
#define RHASH_SET_IFNONE(h, ifnone)
Definition: ruby.h:1061
VALUE rb_eArgError
Definition: error.c:802
#define sym(x)
Definition: date_core.c:3721
#define TYPE_FALSE
Definition: marshal.c:59
VALUE rb_str_buf_cat(VALUE, const char *, long)
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
#define SIZEOF_BDIGIT
Definition: internal.h:527
#define TYPE_HASH_DEF
Definition: marshal.c:74
#define Data_Wrap_Struct(klass, mark, free, sval)
Definition: ruby.h:1142
st_table * compat_tbl
Definition: marshal.c:1071
void rb_hash_foreach(VALUE hash, int(*func)(ANYARGS), VALUE farg)
Definition: hash.c:385
VALUE rb_struct_members(VALUE)
Definition: struct.c:65
#define FL_SINGLETON
Definition: ruby.h:1208
void rb_prepend_module(VALUE klass, VALUE module)
Definition: class.c:973
st_table * st_init_strcasetable(void)
Definition: st.c:640
#define strtod(s, e)
Definition: util.h:77
VALUE rb_singleton_class(VALUE obj)
Returns the singleton class of obj.
Definition: class.c:1689
VALUE rb_obj_class(VALUE)
call-seq: obj.class -> class
Definition: object.c:277
#define RB_TYPE_P(obj, type)
Definition: ruby.h:527
VALUE rb_struct_s_members(VALUE)
Definition: struct.c:51
#define prohibit_ivar(type, str)
Definition: marshal.c:1558
#define TYPE_FLOAT
Definition: marshal.c:68
#define TYPE_CLASS
Definition: marshal.c:77
st_table * data
Definition: marshal.c:1069
#define rb_intern_str(string)
Definition: generator.h:16
VALUE rb_class_name(VALUE)
Definition: variable.c:444
#define TYPE_STRING
Definition: marshal.c:70
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1616
long buflen
Definition: marshal.c:1065
#define val
#define RREGEXP_SRC_PTR(r)
Definition: ruby.h:1051
#define TYPE_REGEXP
Definition: marshal.c:71
#define PRIdVALUE
Definition: ruby.h:130
#define SINGLETON_DUMP_UNABLE_P(klass)
Definition: marshal.c:500
#define TYPE_BIGNUM
Definition: marshal.c:69
#define TYPE_MODULE_OLD
Definition: marshal.c:76
VALUE oldclass
Definition: marshal.c:105
#define RSTRUCT_LEN(st)
Definition: ruby.h:1186
#define snprintf
Definition: subst.h:6
#define RCLASS_ORIGIN(c)
Definition: internal.h:794
#define NIL_P(v)
Definition: ruby.h:451
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2691
#define FLONUM_P(x)
Definition: ruby.h:399
#define T_FLOAT
Definition: ruby.h:495
#define TYPE(x)
Definition: ruby.h:521
int argc
Definition: ruby.c:187
#define TYPE_SYMBOL
Definition: marshal.c:80
#define Qfalse
Definition: ruby.h:436
const char * rb_builtin_type_name(int t)
Definition: error.c:648
#define T_BIGNUM
Definition: ruby.h:501
#define LONG_MAX
Definition: ruby.h:189
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:6227
rb_alloc_func_t rb_get_alloc_func(VALUE)
Definition: vm_method.c:681
RUBY_EXTERN int isinf(double)
Definition: isinf.c:56
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:826
VALUE str
Definition: marshal.c:158
#define TYPE_NIL
Definition: marshal.c:57
int link(const char *, const char *)
Definition: win32.c:4925
char * ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve)
Definition: util.c:3144
#define HASH_PROC_DEFAULT
Definition: internal.h:1273
#define ALLOC(type)
Definition: ruby.h:1588
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3615
#define to_be_skipped_id(id)
Definition: marshal.c:554
VALUE rb_str_resize(VALUE, long)
Definition: string.c:2644
#define TYPE_USRMARSHAL
Definition: marshal.c:67
RUBY_EXTERN VALUE rb_cRegexp
Definition: ruby.h:1925
#define RSTRING_LEN(str)
Definition: ruby.h:971
void rb_define_module_function(VALUE module, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a module function for module.
Definition: class.c:1731
#define TYPE_FIXNUM
Definition: marshal.c:60
#define TRUE
Definition: nkf.h:175
#define T_DATA
Definition: ruby.h:506
st_table * rb_init_identtable(void)
Definition: hash.c:2932
#define TYPE_STRUCT
Definition: marshal.c:75
int rb_obj_respond_to(VALUE, ID, int)
Definition: vm_method.c:1984
#define MARSHAL_MAJOR
Definition: marshal.c:54
#define rb_enc_name(enc)
Definition: encoding.h:171
VALUE rb_class_path(VALUE)
Definition: variable.c:295
#define RHASH_SIZE(hsh)
Definition: fbuffer.h:8
#define TYPE_TRUE
Definition: marshal.c:58
#define NUM2CHR(x)
Definition: ruby.h:1575
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1908
st_table * compat_tbl
Definition: marshal.c:161
VALUE rb_class_inherited_p(VALUE mod, VALUE arg)
call-seq: mod <= other -> true, false, or nil
Definition: object.c:1827
#define TYPE_DATA
Definition: marshal.c:65
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1315
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4309
#define PRIsVALUE
Definition: ruby.h:135
unsigned long ID
Definition: ruby.h:86
#define T_STRUCT
Definition: ruby.h:500
#define Qnil
Definition: ruby.h:438
#define rb_intern(str)
#define BUILTIN_TYPE(x)
Definition: ruby.h:518
#define TYPE_ARRAY
Definition: marshal.c:72
unsigned long VALUE
Definition: ruby.h:85
#define RBASIC(obj)
Definition: ruby.h:1197
VALUE rb_eTypeError
Definition: error.c:801
int rb_utf8_encindex(void)
Definition: encoding.c:1329
#define FIX2INT(x)
Definition: ruby.h:686
void rb_mark_tbl(st_table *tbl)
Definition: gc.c:4302
VALUE rb_check_funcall(VALUE, ID, int, const VALUE *)
Definition: vm_eval.c:389
#define TYPE_MODULE
Definition: marshal.c:78
#define INFINITY
Definition: missing.h:149
#define rb_enc_asciicompat(enc)
Definition: encoding.h:239
VALUE rb_str_new_cstr(const char *)
Definition: string.c:771
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
#define INTEGER_PACK_LITTLE_ENDIAN
Definition: intern.h:149
#define isnan(x)
Definition: win32.h:346
st_table * symbols
Definition: marshal.c:159
#define w_cstr(s, arg)
Definition: marshal.c:291
#define LONG2NUM(x)
Definition: ruby.h:1573
#define rb_funcallv
Definition: console.c:21
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1994
#define MARSHAL_INFECTION
Definition: marshal.c:154
register unsigned int len
Definition: zonetab.h:51
#define load_funcall(arg, obj, sym, argc, argv)
Definition: marshal.c:1084
#define StringValueCStr(v)
Definition: ruby.h:571
st_table * encodings
Definition: marshal.c:162
NORETURN(static inline void io_needed(void))
#define RSTRING_PTR(str)
Definition: ruby.h:975
#define INTEGER_PACK_NEGATIVE
Definition: intern.h:147
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:860
#define MARSHAL_MINOR
Definition: marshal.c:55
#define BIGNUM_SIGN(b)
Definition: internal.h:599
#define RFLOAT_VALUE(v)
Definition: ruby.h:933
int size
Definition: encoding.c:57
#define RSTRUCT_GET(st, idx)
Definition: ruby.h:1189
#define INT2FIX(i)
Definition: ruby.h:232
#define RCLASS_SUPER(c)
Definition: classext.h:16
#define RARRAY_AREF(a, i)
Definition: ruby.h:1033
#define xmalloc
Definition: defines.h:183
#define st_init_numtable
Definition: regint.h:178
#define RBASIC_CLASS(obj)
Definition: ruby.h:878
#define TYPE_HASH
Definition: marshal.c:73
VALUE rb_eRuntimeError
Definition: error.c:800
void Init_marshal(void)
Definition: marshal.c:2205
void rb_mark_set(st_table *tbl)
Definition: gc.c:4134
void rb_extend_object(VALUE obj, VALUE module)
Extend the object with the module.
Definition: eval.c:1596
VALUE rb_check_string_type(VALUE)
Definition: string.c:2246
int infection
Definition: marshal.c:1072
#define dump_check_funcall(arg, obj, sym, argc, argv)
Definition: marshal.c:197
#define LONG2FIX(i)
Definition: ruby.h:234
#define dump_funcall(arg, obj, sym, argc, argv)
Definition: marshal.c:195
#define RTEST(v)
Definition: ruby.h:450
#define T_STRING
Definition: ruby.h:496
#define st_add_direct
Definition: regint.h:187
VALUE src
Definition: marshal.c:1063
long offset
Definition: marshal.c:1067
#define TypedData_Make_Struct(klass, type, data_type, sval)
Definition: ruby.h:1175
VALUE rb_cArray
Definition: array.c:26
VALUE rb_int2big(SIGNED_VALUE n)
Definition: bignum.c:3162
#define st_insert
Definition: regint.h:184
#define T_CLASS
Definition: ruby.h:492
long readable
Definition: marshal.c:1066
void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE(*dumper)(VALUE), VALUE(*loader)(VALUE, VALUE))
Definition: marshal.c:134
st_table * data
Definition: marshal.c:160
#define RREGEXP_SRC_LEN(r)
Definition: ruby.h:1052
const char * name
Definition: nkf.c:208
#define ID2SYM(x)
Definition: ruby.h:383
VALUE rb_reg_new_str(VALUE, int)
Definition: re.c:2861
#define st_free_table
Definition: regint.h:188
int rb_enc_find_index(const char *name)
Definition: encoding.c:704
long len
Definition: ruby.h:958
VALUE rb_str_intern(VALUE)
Definition: symbol.c:661
#define STRINGIZE(expr)
Definition: defines.h:203
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1242
void void xfree(void *)
VALUE rb_define_module(const char *name)
Definition: class.c:768
void rb_mark_hash(st_table *tbl)
Definition: gc.c:4157
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:641
VALUE rb_str_buf_new(long)
Definition: string.c:1282
#define SYMBOL_P(x)
Definition: ruby.h:382
VALUE obj
Definition: marshal.c:167
#define set_id(sym)
VALUE(* rb_alloc_func_t)(VALUE)
Definition: intern.h:370
#define NULL
Definition: _sdbm.c:102
#define FIX2LONG(x)
Definition: ruby.h:363
#define Qundef
Definition: ruby.h:439
#define T_ICLASS
Definition: ruby.h:493
VALUE rb_marshal_load(VALUE port)
Definition: marshal.c:2255
VALUE rb_class_real(VALUE cl)
Looks up the nearest ancestor of cl, skipping singleton classes or module inclusions.
Definition: object.c:251
VALUE rb_struct_initialize(VALUE, VALUE)
Definition: struct.c:571
#define OBJ_TAINT(x)
Definition: ruby.h:1298
#define r_entry(v, arg)
Definition: marshal.c:1119
st_index_t num_entries
Definition: st.h:86
VALUE(* loader)(VALUE, VALUE)
Definition: marshal.c:107
#define ruby_verbose
Definition: ruby.h:1813
VALUE(* dumper)(VALUE)
Definition: marshal.c:106
VALUE newclass
Definition: marshal.c:104
#define T_REGEXP
Definition: ruby.h:497
#define BDIGIT
Definition: bigdecimal.h:48
char ** argv
Definition: ruby.c:188
char * ptr
Definition: ruby.h:959
#define DBL2NUM(dbl)
Definition: ruby.h:934
#define StringValue(v)
Definition: ruby.h:569
#define TYPE_EXTENDED
Definition: marshal.c:62
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:616
#define rb_sym2str(sym)
Definition: console.c:107
VALUE rb_str_new(const char *, long)
Definition: string.c:737
void rb_eof_error(void)
Definition: io.c:620