Ruby  2.5.0dev(2017-10-22revision60238)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author$
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "internal.h"
13 #include "encindex.h"
14 #include "regenc.h"
15 #include <ctype.h>
16 #include "ruby/util.h"
17 
18 #include "ruby_assert.h"
19 #ifndef ENC_DEBUG
20 #define ENC_DEBUG 0
21 #endif
22 #define ENC_ASSERT (!ENC_DEBUG)?(void)0:assert
23 #define MUST_STRING(str) (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)
24 
25 #undef rb_ascii8bit_encindex
26 #undef rb_utf8_encindex
27 #undef rb_usascii_encindex
28 
30 
31 #if defined __GNUC__ && __GNUC__ >= 4
32 #pragma GCC visibility push(default)
33 int rb_enc_register(const char *name, rb_encoding *encoding);
34 void rb_enc_set_base(const char *name, const char *orig);
35 int rb_enc_set_dummy(int index);
36 void rb_encdb_declare(const char *name);
37 int rb_encdb_replicate(const char *name, const char *orig);
38 int rb_encdb_dummy(const char *name);
39 int rb_encdb_alias(const char *alias, const char *orig);
40 void rb_encdb_set_unicode(int index);
41 #pragma GCC visibility pop
42 #endif
43 
44 static ID id_encoding;
46 static VALUE rb_encoding_list;
47 
49  const char *name;
52 };
53 
54 static struct {
56  int count;
57  int size;
59 } enc_table;
60 
61 #define ENC_DUMMY_FLAG (1<<24)
62 #define ENC_INDEX_MASK (~(~0U<<24))
63 
64 #define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
65 #define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
66 #define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
67 
68 void rb_enc_init(void);
69 
70 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
71 #define UNSPECIFIED_ENCODING INT_MAX
72 
73 #define ENCODING_NAMELEN_MAX 63
74 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
75 
76 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
77 
78 static int load_encoding(const char *name);
79 
80 static const rb_data_type_t encoding_data_type = {
81  "encoding",
82  {0, 0, 0,},
84 };
85 
86 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
87 #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
88 
89 int
91 {
92  return is_data_encoding(obj);
93 }
94 
95 static VALUE
96 enc_new(rb_encoding *encoding)
97 {
98  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding);
99 }
100 
101 static VALUE
102 rb_enc_from_encoding_index(int idx)
103 {
104  VALUE list, enc;
105 
106  if (!(list = rb_encoding_list)) {
107  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
108  }
109  enc = rb_ary_entry(list, idx);
110  if (NIL_P(enc)) {
111  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
112  }
113  return enc;
114 }
115 
116 VALUE
118 {
119  int idx;
120  if (!encoding) return Qnil;
121  idx = ENC_TO_ENCINDEX(encoding);
122  return rb_enc_from_encoding_index(idx);
123 }
124 
125 int
127 {
128  return enc ? ENC_TO_ENCINDEX(enc) : 0;
129 }
130 
131 int
133 {
134  return ENC_DUMMY_P(enc) != 0;
135 }
136 
137 static int enc_autoload(rb_encoding *);
138 
139 static int
140 check_encoding(rb_encoding *enc)
141 {
142  int index = rb_enc_to_index(enc);
143  if (rb_enc_from_index(index) != enc)
144  return -1;
145  if (enc_autoload_p(enc)) {
146  index = enc_autoload(enc);
147  }
148  return index;
149 }
150 
151 static int
152 enc_check_encoding(VALUE obj)
153 {
154  if (!is_obj_encoding(obj)) {
155  return -1;
156  }
157  return check_encoding(RDATA(obj)->data);
158 }
159 
160 NORETURN(static void not_encoding(VALUE enc));
161 static void
162 not_encoding(VALUE enc)
163 {
164  rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Encoding)",
165  rb_obj_class(enc));
166 }
167 
168 static rb_encoding *
169 must_encoding(VALUE enc)
170 {
171  int index = enc_check_encoding(enc);
172  if (index < 0) {
173  not_encoding(enc);
174  }
175  return DATA_PTR(enc);
176 }
177 
178 static rb_encoding *
179 must_encindex(int index)
180 {
181  rb_encoding *enc = rb_enc_from_index(index);
182  if (!enc) {
183  rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
184  index);
185  }
186  if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
187  rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
188  index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc));
189  }
190  if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
191  rb_loaderror("failed to load encoding (%s)",
192  rb_enc_name(enc));
193  }
194  return enc;
195 }
196 
197 int
199 {
200  int idx;
201 
202  idx = enc_check_encoding(enc);
203  if (idx >= 0) {
204  return idx;
205  }
206  else if (NIL_P(enc = rb_check_string_type(enc))) {
207  return -1;
208  }
209  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
210  return -1;
211  }
212  return rb_enc_find_index(StringValueCStr(enc));
213 }
214 
215 /* Returns encoding index or UNSPECIFIED_ENCODING */
216 static int
217 str_find_encindex(VALUE enc)
218 {
219  int idx;
220 
221  StringValue(enc);
222  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
223  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
224  }
226  return idx;
227 }
228 
229 static int
230 str_to_encindex(VALUE enc)
231 {
232  int idx = str_find_encindex(enc);
233  if (idx < 0) {
234  rb_raise(rb_eArgError, "unknown encoding name - %"PRIsVALUE, enc);
235  }
236  return idx;
237 }
238 
239 static rb_encoding *
240 str_to_encoding(VALUE enc)
241 {
242  return rb_enc_from_index(str_to_encindex(enc));
243 }
244 
245 rb_encoding *
247 {
248  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
249  return str_to_encoding(enc);
250 }
251 
252 rb_encoding *
254 {
255  int idx;
256  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
257  idx = str_find_encindex(enc);
258  if (idx < 0) return NULL;
259  return rb_enc_from_index(idx);
260 }
261 
262 void
264 {
265 }
266 
267 static int
268 enc_table_expand(int newsize)
269 {
270  struct rb_encoding_entry *ent;
271  int count = newsize;
272 
273  if (enc_table.size >= newsize) return newsize;
274  newsize = (newsize + 7) / 8 * 8;
275  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
276  if (!ent) return -1;
277  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
278  enc_table.list = ent;
279  enc_table.size = newsize;
280  return count;
281 }
282 
283 static int
284 enc_register_at(int index, const char *name, rb_encoding *base_encoding)
285 {
286  struct rb_encoding_entry *ent = &enc_table.list[index];
287  rb_raw_encoding *encoding;
288  VALUE list;
289 
290  if (!valid_encoding_name_p(name)) return -1;
291  if (!ent->name) {
292  ent->name = name = strdup(name);
293  }
294  else if (STRCASECMP(name, ent->name)) {
295  return -1;
296  }
297  encoding = (rb_raw_encoding *)ent->enc;
298  if (!encoding) {
299  encoding = xmalloc(sizeof(rb_encoding));
300  }
301  if (base_encoding) {
302  *encoding = *base_encoding;
303  }
304  else {
305  memset(encoding, 0, sizeof(*ent->enc));
306  }
307  encoding->name = name;
308  encoding->ruby_encoding_index = index;
309  ent->enc = encoding;
310  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
311  list = rb_encoding_list;
312  if (list && NIL_P(rb_ary_entry(list, index))) {
313  /* initialize encoding data */
314  rb_ary_store(list, index, enc_new(encoding));
315  }
316  return index;
317 }
318 
319 static int
320 enc_register(const char *name, rb_encoding *encoding)
321 {
322  int index = enc_table.count;
323 
324  if ((index = enc_table_expand(index + 1)) < 0) return -1;
325  enc_table.count = index;
326  return enc_register_at(index - 1, name, encoding);
327 }
328 
329 static void set_encoding_const(const char *, rb_encoding *);
330 int rb_enc_registered(const char *name);
331 
332 int
333 rb_enc_register(const char *name, rb_encoding *encoding)
334 {
335  int index = rb_enc_registered(name);
336 
337  if (index >= 0) {
338  rb_encoding *oldenc = rb_enc_from_index(index);
339  if (STRCASECMP(name, rb_enc_name(oldenc))) {
340  index = enc_register(name, encoding);
341  }
342  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
343  enc_register_at(index, name, encoding);
344  }
345  else {
346  rb_raise(rb_eArgError, "encoding %s is already registered", name);
347  }
348  }
349  else {
350  index = enc_register(name, encoding);
351  set_encoding_const(name, rb_enc_from_index(index));
352  }
353  return index;
354 }
355 
356 void
357 rb_encdb_declare(const char *name)
358 {
359  int idx = rb_enc_registered(name);
360  if (idx < 0) {
361  idx = enc_register(name, 0);
362  }
363  set_encoding_const(name, rb_enc_from_index(idx));
364 }
365 
366 static void
367 enc_check_duplication(const char *name)
368 {
369  if (rb_enc_registered(name) >= 0) {
370  rb_raise(rb_eArgError, "encoding %s is already registered", name);
371  }
372 }
373 
374 static rb_encoding*
375 set_base_encoding(int index, rb_encoding *base)
376 {
377  rb_encoding *enc = enc_table.list[index].enc;
378 
379  enc_table.list[index].base = base;
380  if (ENC_DUMMY_P(base)) ENC_SET_DUMMY((rb_raw_encoding *)enc);
381  return enc;
382 }
383 
384 /* for encdb.h
385  * Set base encoding for encodings which are not replicas
386  * but not in their own files.
387  */
388 void
389 rb_enc_set_base(const char *name, const char *orig)
390 {
391  int idx = rb_enc_registered(name);
392  int origidx = rb_enc_registered(orig);
393  set_base_encoding(idx, rb_enc_from_index(origidx));
394 }
395 
396 /* for encdb.h
397  * Set encoding dummy.
398  */
399 int
401 {
402  rb_encoding *enc = enc_table.list[index].enc;
403 
405  return index;
406 }
407 
408 int
409 rb_enc_replicate(const char *name, rb_encoding *encoding)
410 {
411  int idx;
412 
413  enc_check_duplication(name);
414  idx = enc_register(name, encoding);
415  set_base_encoding(idx, encoding);
416  set_encoding_const(name, rb_enc_from_index(idx));
417  return idx;
418 }
419 
420 /*
421  * call-seq:
422  * enc.replicate(name) -> encoding
423  *
424  * Returns a replicated encoding of _enc_ whose name is _name_.
425  * The new encoding should have the same byte structure of _enc_.
426  * If _name_ is used by another encoding, raise ArgumentError.
427  *
428  */
429 static VALUE
430 enc_replicate(VALUE encoding, VALUE name)
431 {
432  return rb_enc_from_encoding_index(
434  rb_to_encoding(encoding)));
435 }
436 
437 static int
438 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
439 {
440  if (idx < 0) {
441  idx = enc_register(name, origenc);
442  }
443  else {
444  idx = enc_register_at(idx, name, origenc);
445  }
446  if (idx >= 0) {
447  set_base_encoding(idx, origenc);
448  set_encoding_const(name, rb_enc_from_index(idx));
449  }
450  return idx;
451 }
452 
453 int
454 rb_encdb_replicate(const char *name, const char *orig)
455 {
456  int origidx = rb_enc_registered(orig);
457  int idx = rb_enc_registered(name);
458 
459  if (origidx < 0) {
460  origidx = enc_register(orig, 0);
461  }
462  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
463 }
464 
465 int
467 {
468  int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
469  rb_encoding *enc = enc_table.list[index].enc;
470 
472  return index;
473 }
474 
475 int
476 rb_encdb_dummy(const char *name)
477 {
478  int index = enc_replicate_with_index(name, rb_ascii8bit_encoding(),
479  rb_enc_registered(name));
480  rb_encoding *enc = enc_table.list[index].enc;
481 
483  return index;
484 }
485 
486 /*
487  * call-seq:
488  * enc.dummy? -> true or false
489  *
490  * Returns true for dummy encodings.
491  * A dummy encoding is an encoding for which character handling is not properly
492  * implemented.
493  * It is used for stateful encodings.
494  *
495  * Encoding::ISO_2022_JP.dummy? #=> true
496  * Encoding::UTF_8.dummy? #=> false
497  *
498  */
499 static VALUE
500 enc_dummy_p(VALUE enc)
501 {
502  return ENC_DUMMY_P(must_encoding(enc)) ? Qtrue : Qfalse;
503 }
504 
505 /*
506  * call-seq:
507  * enc.ascii_compatible? -> true or false
508  *
509  * Returns whether ASCII-compatible or not.
510  *
511  * Encoding::UTF_8.ascii_compatible? #=> true
512  * Encoding::UTF_16BE.ascii_compatible? #=> false
513  *
514  */
515 static VALUE
516 enc_ascii_compatible_p(VALUE enc)
517 {
518  return rb_enc_asciicompat(must_encoding(enc)) ? Qtrue : Qfalse;
519 }
520 
521 /*
522  * Returns non-zero when the encoding is Unicode series other than UTF-7 else 0.
523  */
524 int
526 {
527  return ONIGENC_IS_UNICODE(enc);
528 }
529 
530 static st_data_t
531 enc_dup_name(st_data_t name)
532 {
533  return (st_data_t)strdup((const char *)name);
534 }
535 
536 /*
537  * Returns copied alias name when the key is added for st_table,
538  * else returns NULL.
539  */
540 static int
541 enc_alias_internal(const char *alias, int idx)
542 {
543  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
544  enc_dup_name);
545 }
546 
547 static int
548 enc_alias(const char *alias, int idx)
549 {
550  if (!valid_encoding_name_p(alias)) return -1;
551  if (!enc_alias_internal(alias, idx))
552  set_encoding_const(alias, rb_enc_from_index(idx));
553  return idx;
554 }
555 
556 int
557 rb_enc_alias(const char *alias, const char *orig)
558 {
559  int idx;
560 
561  enc_check_duplication(alias);
562  if (!enc_table.list) {
563  rb_enc_init();
564  }
565  if ((idx = rb_enc_find_index(orig)) < 0) {
566  return -1;
567  }
568  return enc_alias(alias, idx);
569 }
570 
571 int
572 rb_encdb_alias(const char *alias, const char *orig)
573 {
574  int idx = rb_enc_registered(orig);
575 
576  if (idx < 0) {
577  idx = enc_register(orig, 0);
578  }
579  return enc_alias(alias, idx);
580 }
581 
582 void
584 {
586 }
587 
588 void
590 {
591  enc_table_expand(ENCODING_COUNT + 1);
592  if (!enc_table.names) {
593  enc_table.names = st_init_strcasetable();
594  }
595 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
598  ENC_REGISTER(US_ASCII);
599 #undef ENC_REGISTER
600 #define ENCDB_REGISTER(name, enc) enc_register_at(ENCINDEX_##enc, name, NULL)
601  ENCDB_REGISTER("UTF-16BE", UTF_16BE);
602  ENCDB_REGISTER("UTF-16LE", UTF_16LE);
603  ENCDB_REGISTER("UTF-32BE", UTF_32BE);
604  ENCDB_REGISTER("UTF-32LE", UTF_32LE);
605  ENCDB_REGISTER("UTF-16", UTF_16);
606  ENCDB_REGISTER("UTF-32", UTF_32);
607  ENCDB_REGISTER("UTF8-MAC", UTF8_MAC);
608 
609  ENCDB_REGISTER("EUC-JP", EUC_JP);
610  ENCDB_REGISTER("Windows-31J", Windows_31J);
611 #undef ENCDB_REGISTER
612  enc_table.count = ENCINDEX_BUILTIN_MAX;
613 }
614 
615 rb_encoding *
617 {
618  if (!enc_table.list) {
619  rb_enc_init();
620  }
621  if (index < 0 || enc_table.count <= (index &= ENC_INDEX_MASK)) {
622  return 0;
623  }
624  return enc_table.list[index].enc;
625 }
626 
627 rb_encoding *
629 {
630  return must_encindex(index);
631 }
632 
633 int
635 {
636  st_data_t idx = 0;
637 
638  if (!name) return -1;
639  if (!enc_table.list) return -1;
640  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
641  return (int)idx;
642  }
643  return -1;
644 }
645 
646 static int
647 load_encoding(const char *name)
648 {
649  VALUE enclib = rb_sprintf("enc/%s.so", name);
650  VALUE verbose = ruby_verbose;
652  VALUE errinfo;
653  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
654  int loaded;
655  int idx;
656 
657  while (s < e) {
658  if (!ISALNUM(*s)) *s = '_';
659  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
660  ++s;
661  }
662  FL_UNSET(enclib, FL_TAINT);
663  OBJ_FREEZE(enclib);
665  ruby_debug = Qfalse;
666  errinfo = rb_errinfo();
667  loaded = rb_require_internal(enclib, rb_safe_level());
668  ruby_verbose = verbose;
669  ruby_debug = debug;
670  rb_set_errinfo(errinfo);
671  if (loaded < 0 || 1 < loaded) return -1;
672  if ((idx = rb_enc_registered(name)) < 0) return -1;
673  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
674  return idx;
675 }
676 
677 static int
678 enc_autoload(rb_encoding *enc)
679 {
680  int i;
681  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
682 
683  if (base) {
684  i = 0;
685  do {
686  if (i >= enc_table.count) return -1;
687  } while (enc_table.list[i].enc != base && (++i, 1));
688  if (enc_autoload_p(base)) {
689  if (enc_autoload(base) < 0) return -1;
690  }
691  i = enc->ruby_encoding_index;
692  enc_register_at(i & ENC_INDEX_MASK, rb_enc_name(enc), base);
693  ((rb_raw_encoding *)enc)->ruby_encoding_index = i;
694  i &= ENC_INDEX_MASK;
695  }
696  else {
697  i = load_encoding(rb_enc_name(enc));
698  }
699  return i;
700 }
701 
702 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
703 int
705 {
706  int i = rb_enc_registered(name);
707  rb_encoding *enc;
708 
709  if (i < 0) {
710  i = load_encoding(name);
711  }
712  else if (!(enc = rb_enc_from_index(i))) {
713  if (i != UNSPECIFIED_ENCODING) {
714  rb_raise(rb_eArgError, "encoding %s is not registered", name);
715  }
716  }
717  else if (enc_autoload_p(enc)) {
718  if (enc_autoload(enc) < 0) {
719  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
720  name);
721  return 0;
722  }
723  }
724  return i;
725 }
726 
727 rb_encoding *
728 rb_enc_find(const char *name)
729 {
730  int idx = rb_enc_find_index(name);
731  if (idx < 0) idx = 0;
732  return rb_enc_from_index(idx);
733 }
734 
735 static inline int
736 enc_capable(VALUE obj)
737 {
738  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
739  switch (BUILTIN_TYPE(obj)) {
740  case T_STRING:
741  case T_REGEXP:
742  case T_FILE:
743  case T_SYMBOL:
744  return TRUE;
745  case T_DATA:
746  if (is_data_encoding(obj)) return TRUE;
747  default:
748  return FALSE;
749  }
750 }
751 
752 ID
754 {
755  CONST_ID(id_encoding, "encoding");
756  return id_encoding;
757 }
758 
759 static int
760 enc_get_index_str(VALUE str)
761 {
762  int i = ENCODING_GET_INLINED(str);
763  if (i == ENCODING_INLINE_MAX) {
764  VALUE iv;
765 
766  iv = rb_ivar_get(str, rb_id_encoding());
767  i = NUM2INT(iv);
768  }
769  return i;
770 }
771 
772 int
774 {
775  int i = -1;
776  VALUE tmp;
777 
778  if (SPECIAL_CONST_P(obj)) {
779  if (!SYMBOL_P(obj)) return -1;
780  obj = rb_sym2str(obj);
781  }
782  switch (BUILTIN_TYPE(obj)) {
783  as_default:
784  default:
785  case T_STRING:
786  case T_REGEXP:
787  i = enc_get_index_str(obj);
788  break;
789  case T_FILE:
790  tmp = rb_funcallv(obj, rb_intern("internal_encoding"), 0, 0);
791  if (NIL_P(tmp)) obj = rb_funcallv(obj, rb_intern("external_encoding"), 0, 0);
792  else obj = tmp;
793  if (NIL_P(obj)) break;
794  case T_DATA:
795  if (is_data_encoding(obj)) {
796  i = enc_check_encoding(obj);
797  }
798  else {
799  goto as_default;
800  }
801  break;
802  }
803  return i;
804 }
805 
806 static void
807 enc_set_index(VALUE obj, int idx)
808 {
809  if (idx < ENCODING_INLINE_MAX) {
810  ENCODING_SET_INLINED(obj, idx);
811  return;
812  }
814  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
815 }
816 
817 void
818 rb_enc_set_index(VALUE obj, int idx)
819 {
820  rb_check_frozen(obj);
821  must_encindex(idx);
822  enc_set_index(obj, idx);
823 }
824 
825 VALUE
827 {
828  rb_encoding *enc;
829  int oldidx, oldtermlen, termlen;
830 
831 /* enc_check_capable(obj);*/
832  rb_check_frozen(obj);
833  oldidx = rb_enc_get_index(obj);
834  if (oldidx == idx)
835  return obj;
836  if (SPECIAL_CONST_P(obj)) {
837  rb_raise(rb_eArgError, "cannot set encoding");
838  }
839  enc = must_encindex(idx);
840  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
841  !rb_enc_asciicompat(enc)) {
842  ENC_CODERANGE_CLEAR(obj);
843  }
844  termlen = rb_enc_mbminlen(enc);
845  oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
846  if (oldtermlen != termlen && RB_TYPE_P(obj, T_STRING)) {
847  rb_str_change_terminator_length(obj, oldtermlen, termlen);
848  }
849  enc_set_index(obj, idx);
850  return obj;
851 }
852 
853 VALUE
855 {
856  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
857 }
858 
861 {
862  return rb_enc_from_index(rb_enc_get_index(obj));
863 }
864 
865 static rb_encoding* enc_compatible_str(VALUE str1, VALUE str2);
866 
869 {
870  rb_encoding *enc = enc_compatible_str(MUST_STRING(str1), MUST_STRING(str2));
871  if (!enc)
872  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
873  rb_enc_name(rb_enc_get(str1)),
874  rb_enc_name(rb_enc_get(str2)));
875  return enc;
876 }
877 
880 {
881  rb_encoding *enc = rb_enc_compatible(str1, str2);
882  if (!enc)
883  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
884  rb_enc_name(rb_enc_get(str1)),
885  rb_enc_name(rb_enc_get(str2)));
886  return enc;
887 }
888 
889 static rb_encoding*
890 enc_compatible_latter(VALUE str1, VALUE str2, int idx1, int idx2)
891 {
892  int isstr1, isstr2;
893  rb_encoding *enc1 = rb_enc_from_index(idx1);
894  rb_encoding *enc2 = rb_enc_from_index(idx2);
895 
896  isstr2 = RB_TYPE_P(str2, T_STRING);
897  if (isstr2 && RSTRING_LEN(str2) == 0)
898  return enc1;
899  isstr1 = RB_TYPE_P(str1, T_STRING);
900  if (isstr1 && RSTRING_LEN(str1) == 0)
901  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
902  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
903  return 0;
904  }
905 
906  /* objects whose encoding is the same of contents */
907  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
908  return enc1;
909  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
910  return enc2;
911 
912  if (!isstr1) {
913  VALUE tmp = str1;
914  int idx0 = idx1;
915  str1 = str2;
916  str2 = tmp;
917  idx1 = idx2;
918  idx2 = idx0;
919  idx0 = isstr1;
920  isstr1 = isstr2;
921  isstr2 = idx0;
922  }
923  if (isstr1) {
924  int cr1, cr2;
925 
926  cr1 = rb_enc_str_coderange(str1);
927  if (isstr2) {
928  cr2 = rb_enc_str_coderange(str2);
929  if (cr1 != cr2) {
930  /* may need to handle ENC_CODERANGE_BROKEN */
931  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
932  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
933  }
934  if (cr2 == ENC_CODERANGE_7BIT) {
935  return enc1;
936  }
937  }
938  if (cr1 == ENC_CODERANGE_7BIT)
939  return enc2;
940  }
941  return 0;
942 }
943 
944 static rb_encoding*
945 enc_compatible_str(VALUE str1, VALUE str2)
946 {
947  int idx1 = enc_get_index_str(str1);
948  int idx2 = enc_get_index_str(str2);
949 
950  if (idx1 < 0 || idx2 < 0)
951  return 0;
952 
953  if (idx1 == idx2) {
954  return rb_enc_from_index(idx1);
955  }
956  else {
957  return enc_compatible_latter(str1, str2, idx1, idx2);
958  }
959 }
960 
963 {
964  int idx1 = rb_enc_get_index(str1);
965  int idx2 = rb_enc_get_index(str2);
966 
967  if (idx1 < 0 || idx2 < 0)
968  return 0;
969 
970  if (idx1 == idx2) {
971  return rb_enc_from_index(idx1);
972  }
973 
974  return enc_compatible_latter(str1, str2, idx1, idx2);
975 }
976 
977 void
979 {
981 }
982 
983 
984 /*
985  * call-seq:
986  * obj.encoding -> encoding
987  *
988  * Returns the Encoding object that represents the encoding of obj.
989  */
990 
991 VALUE
993 {
994  int idx = rb_enc_get_index(obj);
995  if (idx < 0) {
996  rb_raise(rb_eTypeError, "unknown encoding");
997  }
998  return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
999 }
1000 
1001 int
1002 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
1003 {
1004  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1005 }
1006 
1007 int
1008 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
1009 {
1010  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1011  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
1012  return MBCLEN_CHARFOUND_LEN(n);
1013  else {
1014  int min = rb_enc_mbminlen(enc);
1015  return min <= e-p ? min : (int)(e-p);
1016  }
1017 }
1018 
1019 int
1020 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
1021 {
1022  int n;
1023  if (e <= p)
1025  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1026  if (e-p < n)
1027  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
1028  return n;
1029 }
1030 
1031 int
1032 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
1033 {
1034  unsigned int c;
1035  int l;
1036  if (e <= p)
1037  return -1;
1038  if (rb_enc_asciicompat(enc)) {
1039  c = (unsigned char)*p;
1040  if (!ISASCII(c))
1041  return -1;
1042  if (len) *len = 1;
1043  return c;
1044  }
1045  l = rb_enc_precise_mbclen(p, e, enc);
1046  if (!MBCLEN_CHARFOUND_P(l))
1047  return -1;
1048  c = rb_enc_mbc_to_codepoint(p, e, enc);
1049  if (!rb_enc_isascii(c, enc))
1050  return -1;
1051  if (len) *len = l;
1052  return c;
1053 }
1054 
1055 unsigned int
1056 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
1057 {
1058  int r;
1059  if (e <= p)
1060  rb_raise(rb_eArgError, "empty string");
1061  r = rb_enc_precise_mbclen(p, e, enc);
1062  if (!MBCLEN_CHARFOUND_P(r)) {
1063  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
1064  }
1065  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
1066  return rb_enc_mbc_to_codepoint(p, e, enc);
1067 }
1068 
1069 #undef rb_enc_codepoint
1070 unsigned int
1071 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
1072 {
1073  return rb_enc_codepoint_len(p, e, 0, enc);
1074 }
1075 
1076 int
1078 {
1079  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
1080  if (n == 0) {
1081  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
1082  }
1083  return n;
1084 }
1085 
1086 #undef rb_enc_code_to_mbclen
1087 int
1089 {
1090  return ONIGENC_CODE_TO_MBCLEN(enc, code);
1091 }
1092 
1093 int
1095 {
1097 }
1098 
1099 int
1101 {
1103 }
1104 
1105 /*
1106  * call-seq:
1107  * enc.inspect -> string
1108  *
1109  * Returns a string which represents the encoding for programmers.
1110  *
1111  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1112  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1113  */
1114 static VALUE
1115 enc_inspect(VALUE self)
1116 {
1117  rb_encoding *enc;
1118 
1119  if (!is_data_encoding(self)) {
1120  not_encoding(self);
1121  }
1122  if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
1123  rb_raise(rb_eTypeError, "broken Encoding");
1124  }
1126  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
1127  rb_enc_name(enc),
1128  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
1129  enc_autoload_p(enc) ? " (autoload)" : "");
1130 }
1131 
1132 /*
1133  * call-seq:
1134  * enc.name -> string
1135  * enc.to_s -> string
1136  *
1137  * Returns the name of the encoding.
1138  *
1139  * Encoding::UTF_8.name #=> "UTF-8"
1140  */
1141 static VALUE
1142 enc_name(VALUE self)
1143 {
1145 }
1146 
1147 static int
1148 enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
1149 {
1150  VALUE *arg = (VALUE *)args;
1151 
1152  if ((int)idx == (int)arg[0]) {
1153  VALUE str = rb_usascii_str_new2((char *)name);
1154  OBJ_FREEZE(str);
1155  rb_ary_push(arg[1], str);
1156  }
1157  return ST_CONTINUE;
1158 }
1159 
1160 /*
1161  * call-seq:
1162  * enc.names -> array
1163  *
1164  * Returns the list of name and aliases of the encoding.
1165  *
1166  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1167  */
1168 static VALUE
1169 enc_names(VALUE self)
1170 {
1171  VALUE args[2];
1172 
1173  args[0] = (VALUE)rb_to_encoding_index(self);
1174  args[1] = rb_ary_new2(0);
1175  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1176  return args[1];
1177 }
1178 
1179 /*
1180  * call-seq:
1181  * Encoding.list -> [enc1, enc2, ...]
1182  *
1183  * Returns the list of loaded encodings.
1184  *
1185  * Encoding.list
1186  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1187  * #<Encoding:ISO-2022-JP (dummy)>]
1188  *
1189  * Encoding.find("US-ASCII")
1190  * #=> #<Encoding:US-ASCII>
1191  *
1192  * Encoding.list
1193  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1194  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1195  *
1196  */
1197 static VALUE
1198 enc_list(VALUE klass)
1199 {
1200  VALUE ary = rb_ary_new2(0);
1201  rb_ary_replace(ary, rb_encoding_list);
1202  return ary;
1203 }
1204 
1205 /*
1206  * call-seq:
1207  * Encoding.find(string) -> enc
1208  *
1209  * Search the encoding with specified <i>name</i>.
1210  * <i>name</i> should be a string.
1211  *
1212  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1213  *
1214  * Names which this method accept are encoding names and aliases
1215  * including following special aliases
1216  *
1217  * "external":: default external encoding
1218  * "internal":: default internal encoding
1219  * "locale":: locale encoding
1220  * "filesystem":: filesystem encoding
1221  *
1222  * An ArgumentError is raised when no encoding with <i>name</i>.
1223  * Only <code>Encoding.find("internal")</code> however returns nil
1224  * when no encoding named "internal", in other words, when Ruby has no
1225  * default internal encoding.
1226  */
1227 static VALUE
1228 enc_find(VALUE klass, VALUE enc)
1229 {
1230  int idx;
1231  if (is_obj_encoding(enc))
1232  return enc;
1233  idx = str_to_encindex(enc);
1234  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1235  return rb_enc_from_encoding_index(idx);
1236 }
1237 
1238 /*
1239  * call-seq:
1240  * Encoding.compatible?(obj1, obj2) -> enc or nil
1241  *
1242  * Checks the compatibility of two objects.
1243  *
1244  * If the objects are both strings they are compatible when they are
1245  * concatenatable. The encoding of the concatenated string will be returned
1246  * if they are compatible, nil if they are not.
1247  *
1248  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1249  * #=> #<Encoding:ISO-8859-1>
1250  *
1251  * Encoding.compatible?(
1252  * "\xa1".force_encoding("iso-8859-1"),
1253  * "\xa1\xa1".force_encoding("euc-jp"))
1254  * #=> nil
1255  *
1256  * If the objects are non-strings their encodings are compatible when they
1257  * have an encoding and:
1258  * * Either encoding is US-ASCII compatible
1259  * * One of the encodings is a 7-bit encoding
1260  *
1261  */
1262 static VALUE
1263 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1264 {
1265  rb_encoding *enc;
1266 
1267  if (!enc_capable(str1)) return Qnil;
1268  if (!enc_capable(str2)) return Qnil;
1269  enc = rb_enc_compatible(str1, str2);
1270  if (!enc) return Qnil;
1271  return rb_enc_from_encoding(enc);
1272 }
1273 
1274 /* :nodoc: */
1275 static VALUE
1276 enc_s_alloc(VALUE klass)
1277 {
1278  rb_undefined_alloc(klass);
1279  return Qnil;
1280 }
1281 
1282 /* :nodoc: */
1283 static VALUE
1284 enc_dump(int argc, VALUE *argv, VALUE self)
1285 {
1286  rb_check_arity(argc, 0, 1);
1287  return enc_name(self);
1288 }
1289 
1290 /* :nodoc: */
1291 static VALUE
1292 enc_load(VALUE klass, VALUE str)
1293 {
1294  return str;
1295 }
1296 
1297 /* :nodoc: */
1298 static VALUE
1299 enc_m_loader(VALUE klass, VALUE str)
1300 {
1301  return enc_find(klass, str);
1302 }
1303 
1304 rb_encoding *
1306 {
1307  if (!enc_table.list) {
1308  rb_enc_init();
1309  }
1310  return enc_table.list[ENCINDEX_ASCII].enc;
1311 }
1312 
1313 int
1315 {
1316  return ENCINDEX_ASCII;
1317 }
1318 
1319 rb_encoding *
1321 {
1322  if (!enc_table.list) {
1323  rb_enc_init();
1324  }
1325  return enc_table.list[ENCINDEX_UTF_8].enc;
1326 }
1327 
1328 int
1330 {
1331  return ENCINDEX_UTF_8;
1332 }
1333 
1334 rb_encoding *
1336 {
1337  if (!enc_table.list) {
1338  rb_enc_init();
1339  }
1340  return enc_table.list[ENCINDEX_US_ASCII].enc;
1341 }
1342 
1343 int
1345 {
1346  return ENCINDEX_US_ASCII;
1347 }
1348 
1349 int rb_locale_charmap_index(void);
1350 
1351 int
1353 {
1354  int idx = rb_locale_charmap_index();
1355 
1356  if (idx < 0) idx = ENCINDEX_ASCII;
1357 
1358  if (rb_enc_registered("locale") < 0) {
1359 # if defined _WIN32
1360  void Init_w32_codepage(void);
1362 # endif
1363  enc_alias_internal("locale", idx);
1364  }
1365 
1366  return idx;
1367 }
1368 
1369 rb_encoding *
1371 {
1373 }
1374 
1375 int
1377 {
1378  int idx = rb_enc_registered("filesystem");
1379  if (idx < 0)
1380  idx = ENCINDEX_ASCII;
1381  return idx;
1382 }
1383 
1384 rb_encoding *
1386 {
1388 }
1389 
1391  int index; /* -2 => not yet set, -1 => nil */
1393 };
1394 
1395 static struct default_encoding default_external = {0};
1396 
1397 static int
1398 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1399 {
1400  int overridden = FALSE;
1401 
1402  if (def->index != -2)
1403  /* Already set */
1404  overridden = TRUE;
1405 
1406  if (NIL_P(encoding)) {
1407  def->index = -1;
1408  def->enc = 0;
1409  st_insert(enc_table.names, (st_data_t)strdup(name),
1411  }
1412  else {
1413  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1414  def->enc = 0;
1415  enc_alias_internal(name, def->index);
1416  }
1417 
1418  if (def == &default_external)
1419  enc_alias_internal("filesystem", Init_enc_set_filesystem_encoding());
1420 
1421  return overridden;
1422 }
1423 
1424 rb_encoding *
1426 {
1427  if (default_external.enc) return default_external.enc;
1428 
1429  if (default_external.index >= 0) {
1430  default_external.enc = rb_enc_from_index(default_external.index);
1431  return default_external.enc;
1432  }
1433  else {
1434  return rb_locale_encoding();
1435  }
1436 }
1437 
1438 VALUE
1440 {
1442 }
1443 
1444 /*
1445  * call-seq:
1446  * Encoding.default_external -> enc
1447  *
1448  * Returns default external encoding.
1449  *
1450  * The default external encoding is used by default for strings created from
1451  * the following locations:
1452  *
1453  * * CSV
1454  * * File data read from disk
1455  * * SDBM
1456  * * StringIO
1457  * * Zlib::GzipReader
1458  * * Zlib::GzipWriter
1459  * * String#inspect
1460  * * Regexp#inspect
1461  *
1462  * While strings created from these locations will have this encoding, the
1463  * encoding may not be valid. Be sure to check String#valid_encoding?.
1464  *
1465  * File data written to disk will be transcoded to the default external
1466  * encoding when written.
1467  *
1468  * The default external encoding is initialized by the locale or -E option.
1469  */
1470 static VALUE
1471 get_default_external(VALUE klass)
1472 {
1473  return rb_enc_default_external();
1474 }
1475 
1476 void
1478 {
1479  if (NIL_P(encoding)) {
1480  rb_raise(rb_eArgError, "default external can not be nil");
1481  }
1482  enc_set_default_encoding(&default_external, encoding,
1483  "external");
1484 }
1485 
1486 /*
1487  * call-seq:
1488  * Encoding.default_external = enc
1489  *
1490  * Sets default external encoding. You should not set
1491  * Encoding::default_external in ruby code as strings created before changing
1492  * the value may have a different encoding from strings created after the value
1493  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1494  * the correct default_external.
1495  *
1496  * See Encoding::default_external for information on how the default external
1497  * encoding is used.
1498  */
1499 static VALUE
1500 set_default_external(VALUE klass, VALUE encoding)
1501 {
1502  rb_warning("setting Encoding.default_external");
1503  rb_enc_set_default_external(encoding);
1504  return encoding;
1505 }
1506 
1507 static struct default_encoding default_internal = {-2};
1508 
1509 rb_encoding *
1511 {
1512  if (!default_internal.enc && default_internal.index >= 0) {
1513  default_internal.enc = rb_enc_from_index(default_internal.index);
1514  }
1515  return default_internal.enc; /* can be NULL */
1516 }
1517 
1518 VALUE
1520 {
1521  /* Note: These functions cope with default_internal not being set */
1523 }
1524 
1525 /*
1526  * call-seq:
1527  * Encoding.default_internal -> enc
1528  *
1529  * Returns default internal encoding. Strings will be transcoded to the
1530  * default internal encoding in the following places if the default internal
1531  * encoding is not nil:
1532  *
1533  * * CSV
1534  * * Etc.sysconfdir and Etc.systmpdir
1535  * * File data read from disk
1536  * * File names from Dir
1537  * * Integer#chr
1538  * * String#inspect and Regexp#inspect
1539  * * Strings returned from Readline
1540  * * Strings returned from SDBM
1541  * * Time#zone
1542  * * Values from ENV
1543  * * Values in ARGV including $PROGRAM_NAME
1544  *
1545  * Additionally String#encode and String#encode! use the default internal
1546  * encoding if no encoding is given.
1547  *
1548  * The locale encoding (__ENCODING__), not default_internal, is used as the
1549  * encoding of created strings.
1550  *
1551  * Encoding::default_internal is initialized by the source file's
1552  * internal_encoding or -E option.
1553  */
1554 static VALUE
1555 get_default_internal(VALUE klass)
1556 {
1557  return rb_enc_default_internal();
1558 }
1559 
1560 void
1562 {
1563  enc_set_default_encoding(&default_internal, encoding,
1564  "internal");
1565 }
1566 
1567 /*
1568  * call-seq:
1569  * Encoding.default_internal = enc or nil
1570  *
1571  * Sets default internal encoding or removes default internal encoding when
1572  * passed nil. You should not set Encoding::default_internal in ruby code as
1573  * strings created before changing the value may have a different encoding
1574  * from strings created after the change. Instead you should use
1575  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1576  *
1577  * See Encoding::default_internal for information on how the default internal
1578  * encoding is used.
1579  */
1580 static VALUE
1581 set_default_internal(VALUE klass, VALUE encoding)
1582 {
1583  rb_warning("setting Encoding.default_internal");
1584  rb_enc_set_default_internal(encoding);
1585  return encoding;
1586 }
1587 
1588 static void
1589 set_encoding_const(const char *name, rb_encoding *enc)
1590 {
1591  VALUE encoding = rb_enc_from_encoding(enc);
1592  char *s = (char *)name;
1593  int haslower = 0, hasupper = 0, valid = 0;
1594 
1595  if (ISDIGIT(*s)) return;
1596  if (ISUPPER(*s)) {
1597  hasupper = 1;
1598  while (*++s && (ISALNUM(*s) || *s == '_')) {
1599  if (ISLOWER(*s)) haslower = 1;
1600  }
1601  }
1602  if (!*s) {
1603  if (s - name > ENCODING_NAMELEN_MAX) return;
1604  valid = 1;
1605  rb_define_const(rb_cEncoding, name, encoding);
1606  }
1607  if (!valid || haslower) {
1608  size_t len = s - name;
1609  if (len > ENCODING_NAMELEN_MAX) return;
1610  if (!haslower || !hasupper) {
1611  do {
1612  if (ISLOWER(*s)) haslower = 1;
1613  if (ISUPPER(*s)) hasupper = 1;
1614  } while (*++s && (!haslower || !hasupper));
1615  len = s - name;
1616  }
1617  len += strlen(s);
1618  if (len++ > ENCODING_NAMELEN_MAX) return;
1619  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1620  name = s;
1621  if (!valid) {
1622  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1623  for (; *s; ++s) {
1624  if (!ISALNUM(*s)) *s = '_';
1625  }
1626  if (hasupper) {
1627  rb_define_const(rb_cEncoding, name, encoding);
1628  }
1629  }
1630  if (haslower) {
1631  for (s = (char *)name; *s; ++s) {
1632  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1633  }
1634  rb_define_const(rb_cEncoding, name, encoding);
1635  }
1636  }
1637 }
1638 
1639 static int
1640 rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
1641 {
1642  VALUE ary = (VALUE)arg;
1643  VALUE str = rb_fstring_cstr((char *)name);
1644  rb_ary_push(ary, str);
1645  return ST_CONTINUE;
1646 }
1647 
1648 /*
1649  * call-seq:
1650  * Encoding.name_list -> ["enc1", "enc2", ...]
1651  *
1652  * Returns the list of available encoding names.
1653  *
1654  * Encoding.name_list
1655  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1656  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1657  * "Windows-31J",
1658  * "BINARY", "CP932", "eucJP"]
1659  *
1660  */
1661 
1662 static VALUE
1663 rb_enc_name_list(VALUE klass)
1664 {
1665  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1666  st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
1667  return ary;
1668 }
1669 
1670 static int
1671 rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
1672 {
1673  VALUE *p = (VALUE *)arg;
1674  VALUE aliases = p[0], ary = p[1];
1675  int idx = (int)orig;
1676  VALUE key, str = rb_ary_entry(ary, idx);
1677 
1678  if (NIL_P(str)) {
1679  rb_encoding *enc = rb_enc_from_index(idx);
1680 
1681  if (!enc) return ST_CONTINUE;
1682  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1683  return ST_CONTINUE;
1684  }
1685  str = rb_fstring_cstr(rb_enc_name(enc));
1686  rb_ary_store(ary, idx, str);
1687  }
1688  key = rb_usascii_str_new2((char *)name);
1689  OBJ_FREEZE(key);
1690  rb_hash_aset(aliases, key, str);
1691  return ST_CONTINUE;
1692 }
1693 
1694 /*
1695  * call-seq:
1696  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1697  *
1698  * Returns the hash of available encoding alias and original encoding name.
1699  *
1700  * Encoding.aliases
1701  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1702  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1703  *
1704  */
1705 
1706 static VALUE
1707 rb_enc_aliases(VALUE klass)
1708 {
1709  VALUE aliases[2];
1710  aliases[0] = rb_hash_new();
1711  aliases[1] = rb_ary_new();
1712  st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
1713  return aliases[0];
1714 }
1715 
1716 /*
1717  * An Encoding instance represents a character encoding usable in Ruby. It is
1718  * defined as a constant under the Encoding namespace. It has a name and
1719  * optionally, aliases:
1720  *
1721  * Encoding::ISO_8859_1.name
1722  * #=> "ISO-8859-1"
1723  *
1724  * Encoding::ISO_8859_1.names
1725  * #=> ["ISO-8859-1", "ISO8859-1"]
1726  *
1727  * Ruby methods dealing with encodings return or accept Encoding instances as
1728  * arguments (when a method accepts an Encoding instance as an argument, it
1729  * can be passed an Encoding name or alias instead).
1730  *
1731  * "some string".encoding
1732  * #=> #<Encoding:UTF-8>
1733  *
1734  * string = "some string".encode(Encoding::ISO_8859_1)
1735  * #=> "some string"
1736  * string.encoding
1737  * #=> #<Encoding:ISO-8859-1>
1738  *
1739  * "some string".encode "ISO-8859-1"
1740  * #=> "some string"
1741  *
1742  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1743  * used for a byte string, not a character string. But as the name insists,
1744  * its characters in the range of ASCII are considered as ASCII characters.
1745  * This is useful when you use ASCII-8BIT characters with other ASCII
1746  * compatible characters.
1747  *
1748  * == Changing an encoding
1749  *
1750  * The associated Encoding of a String can be changed in two different ways.
1751  *
1752  * First, it is possible to set the Encoding of a string to a new Encoding
1753  * without changing the internal byte representation of the string, with
1754  * String#force_encoding. This is how you can tell Ruby the correct encoding
1755  * of a string.
1756  *
1757  * string
1758  * #=> "R\xC3\xA9sum\xC3\xA9"
1759  * string.encoding
1760  * #=> #<Encoding:ISO-8859-1>
1761  * string.force_encoding(Encoding::UTF_8)
1762  * #=> "R\u00E9sum\u00E9"
1763  *
1764  * Second, it is possible to transcode a string, i.e. translate its internal
1765  * byte representation to another encoding. Its associated encoding is also
1766  * set to the other encoding. See String#encode for the various forms of
1767  * transcoding, and the Encoding::Converter class for additional control over
1768  * the transcoding process.
1769  *
1770  * string
1771  * #=> "R\u00E9sum\u00E9"
1772  * string.encoding
1773  * #=> #<Encoding:UTF-8>
1774  * string = string.encode!(Encoding::ISO_8859_1)
1775  * #=> "R\xE9sum\xE9"
1776  * string.encoding
1777  * #=> #<Encoding::ISO-8859-1>
1778  *
1779  * == Script encoding
1780  *
1781  * All Ruby script code has an associated Encoding which any String literal
1782  * created in the source code will be associated to.
1783  *
1784  * The default script encoding is <code>Encoding::UTF-8</code> after v2.0, but it can
1785  * be changed by a magic comment on the first line of the source code file (or
1786  * second line, if there is a shebang line on the first). The comment must
1787  * contain the word <code>coding</code> or <code>encoding</code>, followed
1788  * by a colon, space and the Encoding name or alias:
1789  *
1790  * # encoding: UTF-8
1791  *
1792  * "some string".encoding
1793  * #=> #<Encoding:UTF-8>
1794  *
1795  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1796  * which the keyword is written:
1797  *
1798  * # encoding: ISO-8859-1
1799  *
1800  * __ENCODING__
1801  * #=> #<Encoding:ISO-8859-1>
1802  *
1803  * <code>ruby -K</code> will change the default locale encoding, but this is
1804  * not recommended. Ruby source files should declare its script encoding by a
1805  * magic comment even when they only depend on US-ASCII strings or regular
1806  * expressions.
1807  *
1808  * == Locale encoding
1809  *
1810  * The default encoding of the environment. Usually derived from locale.
1811  *
1812  * see Encoding.locale_charmap, Encoding.find('locale')
1813  *
1814  * == Filesystem encoding
1815  *
1816  * The default encoding of strings from the filesystem of the environment.
1817  * This is used for strings of file names or paths.
1818  *
1819  * see Encoding.find('filesystem')
1820  *
1821  * == External encoding
1822  *
1823  * Each IO object has an external encoding which indicates the encoding that
1824  * Ruby will use to read its data. By default Ruby sets the external encoding
1825  * of an IO object to the default external encoding. The default external
1826  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1827  * Encoding.default_external returns the current value of the external
1828  * encoding.
1829  *
1830  * ENV["LANG"]
1831  * #=> "UTF-8"
1832  * Encoding.default_external
1833  * #=> #<Encoding:UTF-8>
1834  *
1835  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1836  * #<Encoding:ISO-8859-1>
1837  *
1838  * $ LANG=C ruby -e 'p Encoding.default_external'
1839  * #<Encoding:US-ASCII>
1840  *
1841  * The default external encoding may also be set through
1842  * Encoding.default_external=, but you should not do this as strings created
1843  * before and after the change will have inconsistent encodings. Instead use
1844  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1845  *
1846  * When you know that the actual encoding of the data of an IO object is not
1847  * the default external encoding, you can reset its external encoding with
1848  * IO#set_encoding or set it at IO object creation (see IO.new options).
1849  *
1850  * == Internal encoding
1851  *
1852  * To process the data of an IO object which has an encoding different
1853  * from its external encoding, you can set its internal encoding. Ruby will use
1854  * this internal encoding to transcode the data when it is read from the IO
1855  * object.
1856  *
1857  * Conversely, when data is written to the IO object it is transcoded from the
1858  * internal encoding to the external encoding of the IO object.
1859  *
1860  * The internal encoding of an IO object can be set with
1861  * IO#set_encoding or at IO object creation (see IO.new options).
1862  *
1863  * The internal encoding is optional and when not set, the Ruby default
1864  * internal encoding is used. If not explicitly set this default internal
1865  * encoding is +nil+ meaning that by default, no transcoding occurs.
1866  *
1867  * The default internal encoding can be set with the interpreter option
1868  * <code>-E</code>. Encoding.default_internal returns the current internal
1869  * encoding.
1870  *
1871  * $ ruby -e 'p Encoding.default_internal'
1872  * nil
1873  *
1874  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1875  * Encoding.default_internal]"
1876  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1877  *
1878  * The default internal encoding may also be set through
1879  * Encoding.default_internal=, but you should not do this as strings created
1880  * before and after the change will have inconsistent encodings. Instead use
1881  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1882  *
1883  * == IO encoding example
1884  *
1885  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1886  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1887  *
1888  * string = "R\u00E9sum\u00E9"
1889  *
1890  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1891  * io.write(string)
1892  * end
1893  *
1894  * puts "raw text:"
1895  * p File.binread("transcoded.txt")
1896  * puts
1897  *
1898  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1899  * puts "transcoded text:"
1900  * p io.read
1901  * end
1902  *
1903  * While writing the file, the internal encoding is not specified as it is
1904  * only necessary for reading. While reading the file both the internal and
1905  * external encoding must be specified to obtain the correct result.
1906  *
1907  * $ ruby t.rb
1908  * raw text:
1909  * "R\xE9sum\xE9"
1910  *
1911  * transcoded text:
1912  * "R\u00E9sum\u00E9"
1913  *
1914  */
1915 
1916 void
1918 {
1919 #undef rb_intern
1920 #define rb_intern(str) rb_intern_const(str)
1921  VALUE list;
1922  int i;
1923 
1924  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1925  rb_define_alloc_func(rb_cEncoding, enc_s_alloc);
1927  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1928  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1929  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1930  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1931  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1932  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1933  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1934  rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0);
1935  rb_define_singleton_method(rb_cEncoding, "name_list", rb_enc_name_list, 0);
1936  rb_define_singleton_method(rb_cEncoding, "aliases", rb_enc_aliases, 0);
1937  rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1);
1938  rb_define_singleton_method(rb_cEncoding, "compatible?", enc_compatible_p, 2);
1939 
1940  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1941  rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
1942 
1943  rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
1944  rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
1945  rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
1946  rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
1947  rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); /* in localeinit.c */
1948 
1949  list = rb_ary_new2(enc_table.count);
1950  RBASIC_CLEAR_CLASS(list);
1951  rb_encoding_list = list;
1953 
1954  for (i = 0; i < enc_table.count; ++i) {
1955  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1956  }
1957 
1959 }
1960 
1961 /* locale insensitive ctype functions */
1962 
1963 void
1965 {
1966  st_foreach(enc_table.names, func, arg);
1967 }
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:1469
#define T_SYMBOL
Definition: ruby.h:508
#define ENCINDEX_US_ASCII
Definition: encindex.h:44
#define ISDIGIT(c)
Definition: ruby.h:2150
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1077
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:773
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:185
#define is_obj_encoding(obj)
Definition: encoding.c:87
void rb_warn(const char *fmt,...)
Definition: error.c:246
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:879
void rb_bug(const char *fmt,...)
Definition: error.c:521
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1215
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:186
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:202
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:978
#define FALSE
Definition: nkf.h:174
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1138
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:389
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1538
Definition: st.h:79
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
VALUE rb_cEncoding
Definition: encoding.c:45
#define NUM2INT(x)
Definition: ruby.h:684
int count
Definition: encoding.c:56
int ruby_encoding_index
Definition: onigmo.h:178
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1716
#define rb_usascii_str_new2
Definition: intern.h:841
#define FL_TAINT
Definition: ruby.h:1213
#define CLASS_OF(v)
Definition: ruby.h:453
VALUE rb_fstring_cstr(const char *str)
Definition: string.c:388
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2284
#define st_foreach
Definition: regint.h:186
#define Qtrue
Definition: ruby.h:437
void Init_Encoding(void)
Definition: encoding.c:1917
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1162
Definition: st.h:99
#define OBJ_FREEZE(x)
Definition: ruby.h:1306
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:246
int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.c:132
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:107
VALUE rb_eEncCompatError
Definition: error.c:808
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:117
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1100
#define rb_check_arity
Definition: intern.h:298
st_table * names
Definition: encoding.c:58
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1510
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:924
#define ENCINDEX_ASCII
Definition: encindex.h:42
if(len<=MAX_WORD_LENGTH &&len >=MIN_WORD_LENGTH)
Definition: zonetab.h:883
int rb_usascii_encindex(void)
Definition: encoding.c:1344
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:962
VALUE rb_locale_charmap(VALUE klass)
Definition: localeinit.c:90
Definition: nkf.c:115
#define ENC_SET_DUMMY(enc)
Definition: encoding.c:66
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:57
int rb_enc_str_coderange(VALUE)
Definition: string.c:621
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:1056
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1210
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1477
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:854
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
int rb_enc_set_dummy(int index)
Definition: encoding.c:400
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1008
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:361
#define DATA_PTR(dta)
Definition: ruby.h:1106
const char * alias
Definition: nkf.c:1151
#define FL_UNSET(x, f)
Definition: ruby.h:1290
#define ENC_REGISTER(enc)
#define st_lookup
Definition: regint.h:185
int rb_enc_registered(const char *name)
Definition: encoding.c:634
ID rb_id_encoding(void)
Definition: encoding.c:753
void Init_w32_codepage(void)
Definition: file.c:703
#define MUST_STRING(str)
Definition: encoding.c:23
int rb_filesystem_encindex(void)
Definition: encoding.c:1376
void rb_enc_init(void)
Definition: encoding.c:589
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1320
VALUE rb_eEncodingError
Definition: error.c:807
#define RDATA(obj)
Definition: ruby.h:1204
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1533
Definition: nkf.c:111
#define ENCINDEX_UTF_8
Definition: encindex.h:43
#define ENC_CODERANGE_7BIT
Definition: encoding.h:100
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1433
#define rb_ary_new2
Definition: intern.h:90
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1094
VALUE rb_eArgError
Definition: error.c:802
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
Definition: nkf.c:87
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
Definition: st.c:1185
st_table * st_init_strcasetable(void)
Definition: st.c:640
VALUE rb_obj_class(VALUE)
call-seq: obj.class -> class
Definition: object.c:277
#define RB_TYPE_P(obj, type)
Definition: ruby.h:527
void rb_encdb_set_unicode(int index)
Definition: encoding.c:583
#define ENCINDEX_BUILTIN_MAX
Definition: encindex.h:54
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:198
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1425
int rb_enc_to_index(rb_encoding *enc)
Definition: encoding.c:126
int rb_data_is_encoding(VALUE obj)
Definition: encoding.c:90
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1616
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1002
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1893
Definition: encoding.c:48
#define rb_enc_isascii(c, enc)
Definition: encoding.h:224
Definition: nkf.c:99
#define RSTRING_END(str)
Definition: ruby.h:979
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3451
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
Definition: eval.c:1792
VALUE rb_ary_new(void)
Definition: array.c:499
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1314
VALUE rb_enc_default_external(void)
Definition: encoding.c:1439
int rb_require_internal(VALUE fname, int safe)
Definition: load.c:962
#define NIL_P(v)
Definition: ruby.h:451
#define ISASCII(c)
Definition: ruby.h:2142
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:105
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:646
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:818
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:409
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2691
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:815
#define ENCODING_COUNT
Definition: encoding.c:70
#define ISALNUM(c)
Definition: ruby.h:2148
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:572
int argc
Definition: ruby.c:187
#define Qfalse
Definition: ruby.h:436
int rb_locale_encindex(void)
Definition: encoding.c:1352
#define realloc
Definition: ripper.c:359
#define ALLOCA_N(type, n)
Definition: ruby.h:1593
NORETURN(static void not_encoding(VALUE enc))
int rb_locale_charmap_index(void)
Definition: localeinit.c:108
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:6227
#define ISUPPER(c)
Definition: ruby.h:2146
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1661
#define ENCDB_REGISTER(name, enc)
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:826
#define rb_enc_mbminlen(enc)
Definition: encoding.h:174
#define ISLOWER(c)
Definition: ruby.h:2147
const char * name
Definition: encoding.c:49
#define UChar
Definition: onigmo.h:76
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:253
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:1032
#define RSTRING_LEN(str)
Definition: ruby.h:971
int rb_encdb_dummy(const char *name)
Definition: encoding.c:476
#define TRUE
Definition: nkf.h:175
#define T_DATA
Definition: ruby.h:506
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1452
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1020
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:525
#define rb_enc_name(enc)
Definition: encoding.h:171
VALUE rb_hash_new(void)
Definition: hash.c:424
#define strdup(s)
Definition: util.h:70
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: regenc.h:218
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1315
#define ONIGENC_FLAG_UNICODE
Definition: onigmo.h:313
#define PRIsVALUE
Definition: ruby.h:135
unsigned long ID
Definition: ruby.h:86
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1335
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:454
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:73
#define Qnil
Definition: ruby.h:438
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:466
const char * name
Definition: onigmo.h:162
#define BUILTIN_TYPE(x)
Definition: ruby.h:518
#define debug(x)
Definition: _sdbm.c:51
unsigned long VALUE
Definition: ruby.h:85
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1370
VALUE rb_eTypeError
Definition: error.c:801
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
int rb_utf8_encindex(void)
Definition: encoding.c:1329
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:55
int rb_enc_code_to_mbclen(int code, rb_encoding *enc)
Definition: encoding.c:1088
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:992
int Init_enc_set_filesystem_encoding(void)
Definition: localeinit.c:118
#define rb_enc_asciicompat(enc)
Definition: encoding.h:239
void rb_enc_foreach_name(int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
Definition: encoding.c:1964
#define enc_autoload_p(enc)
Definition: encoding.c:76
#define rb_funcallv
Definition: console.c:21
register unsigned int len
Definition: zonetab.h:51
#define StringValueCStr(v)
Definition: ruby.h:571
#define RSTRING_PTR(str)
Definition: ruby.h:975
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:860
int size
Definition: encoding.c:57
int rb_safe_level(void)
Definition: safe.c:35
#define ENCODING_INLINE_MAX
Definition: encoding.h:36
#define xmalloc
Definition: defines.h:183
rb_encoding * enc
Definition: encoding.c:1392
VALUE rb_check_string_type(VALUE)
Definition: string.c:2246
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1561
#define valid_encoding_name_p(name)
Definition: encoding.c:74
OnigEncodingType rb_raw_encoding
Definition: encoding.c:29
void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE(*dumper)(VALUE), VALUE(*loader)(VALUE, VALUE))
Definition: marshal.c:134
void rb_warning(const char *fmt,...)
Definition: error.c:267
#define T_STRING
Definition: ruby.h:496
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:557
#define is_data_encoding(obj)
Definition: encoding.c:86
struct rb_encoding_entry * list
Definition: encoding.c:55
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.c:64
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1385
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:628
#define T_FILE
Definition: ruby.h:502
VALUE rb_errinfo(void)
The current exception in the current thread.
Definition: eval.c:1777
Definition: nkf.c:112
#define TOLOWER(c)
Definition: ruby.h:2154
#define st_insert
Definition: regint.h:184
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1519
Definition: nkf.c:113
#define ruby_debug
Definition: ruby.h:1814
const char * name
Definition: nkf.c:208
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1071
#define ENC_INDEX_MASK
Definition: encoding.c:62
#define STRCASECMP(s1, s2)
Definition: ruby.h:2158
#define rb_intern(str)
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1305
int rb_enc_find_index(const char *name)
Definition: encoding.c:704
rb_encoding * rb_enc_check_str(VALUE str1, VALUE str2)
Definition: encoding.c:868
#define rb_check_frozen(obj)
Definition: intern.h:271
#define CONST_ID(var, str)
Definition: ruby.h:1743
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:333
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1242
Definition: nkf.c:108
void rb_encdb_declare(const char *name)
Definition: encoding.c:357
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:641
void rb_gc_mark_encodings(void)
Definition: encoding.c:263
#define SYMBOL_P(x)
Definition: ruby.h:382
#define NULL
Definition: _sdbm.c:102
#define UNSPECIFIED_ENCODING
Definition: encoding.c:71
#define ENC_DUMMY_P(enc)
Definition: encoding.c:65
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1515
#define ruby_verbose
Definition: ruby.h:1813
Definition: nkf.c:118
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:356
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
Definition: string.c:2162
void rb_loaderror(const char *fmt,...)
Definition: error.c:2306
rb_encoding * enc
Definition: encoding.c:50
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:728
#define T_REGEXP
Definition: ruby.h:497
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: onigmo.h:352
char ** argv
Definition: ruby.c:188
#define StringValue(v)
Definition: ruby.h:569
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
Definition: regenc.h:217
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:616
#define rb_sym2str(sym)
Definition: console.c:107
Definition: nkf.c:117
Definition: nkf.c:120
rb_encoding * base
Definition: encoding.c:51