Ruby  2.1.10p492(2016-04-01revision54464)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: usa $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #include "ruby/util.h"
18 
19 #undef rb_ascii8bit_encindex
20 #undef rb_utf8_encindex
21 #undef rb_usascii_encindex
22 
23 #if defined __GNUC__ && __GNUC__ >= 4
24 #pragma GCC visibility push(default)
25 int rb_enc_register(const char *name, rb_encoding *encoding);
26 void rb_enc_set_base(const char *name, const char *orig);
27 int rb_enc_set_dummy(int index);
28 void rb_encdb_declare(const char *name);
29 int rb_encdb_replicate(const char *name, const char *orig);
30 int rb_encdb_dummy(const char *name);
31 int rb_encdb_alias(const char *alias, const char *orig);
32 void rb_encdb_set_unicode(int index);
33 #pragma GCC visibility pop
34 #endif
35 
36 static ID id_encoding;
39 
41  const char *name;
44 };
45 
46 static struct {
48  int count;
49  int size;
51 } enc_table;
52 
53 void rb_enc_init(void);
54 
55 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
56 #define UNSPECIFIED_ENCODING INT_MAX
57 
58 #define ENCODING_NAMELEN_MAX 63
59 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
60 
61 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
62 
63 static int load_encoding(const char *name);
64 
65 static size_t
66 enc_memsize(const void *p)
67 {
68  return 0;
69 }
70 
72  "encoding",
73  {0, 0, enc_memsize,},
75 };
76 
77 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
78 #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
79 
80 static VALUE
81 enc_new(rb_encoding *encoding)
82 {
83  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
84 }
85 
86 static VALUE
88 {
89  VALUE list, enc;
90 
91  if (!(list = rb_encoding_list)) {
92  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
93  }
94  enc = rb_ary_entry(list, idx);
95  if (NIL_P(enc)) {
96  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
97  }
98  return enc;
99 }
100 
101 VALUE
103 {
104  int idx;
105  if (!encoding) return Qnil;
106  idx = ENC_TO_ENCINDEX(encoding);
107  return rb_enc_from_encoding_index(idx);
108 }
109 
110 static int enc_autoload(rb_encoding *);
111 
112 static int
114 {
115  int index = rb_enc_to_index(enc);
116  if (rb_enc_from_index(index) != enc)
117  return -1;
118  if (enc_autoload_p(enc)) {
119  index = enc_autoload(enc);
120  }
121  return index;
122 }
123 
124 static int
126 {
127  if (!is_obj_encoding(obj)) {
128  return -1;
129  }
130  return check_encoding(RDATA(obj)->data);
131 }
132 
133 NORETURN(static void not_encoding(VALUE enc));
134 static void
136 {
137  rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Encoding)",
138  rb_obj_class(enc));
139 }
140 
141 static rb_encoding *
143 {
144  int index = enc_check_encoding(enc);
145  if (index < 0) {
146  not_encoding(enc);
147  }
148  return DATA_PTR(enc);
149 }
150 
151 static rb_encoding *
153 {
155  if (!enc) {
156  rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
157  index);
158  }
159  if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
160  rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
161  index, rb_enc_name(enc), ENC_TO_ENCINDEX(enc));
162  }
163  if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
164  rb_loaderror("failed to load encoding (%s)",
165  rb_enc_name(enc));
166  }
167  return enc;
168 }
169 
170 int
172 {
173  int idx;
174 
175  idx = enc_check_encoding(enc);
176  if (idx >= 0) {
177  return idx;
178  }
179  else if (NIL_P(enc = rb_check_string_type(enc))) {
180  return -1;
181  }
182  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
183  return -1;
184  }
185  return rb_enc_find_index(StringValueCStr(enc));
186 }
187 
188 /* Returns encoding index or UNSPECIFIED_ENCODING */
189 static int
191 {
192  int idx;
193 
194  StringValue(enc);
195  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
196  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
197  }
199  return idx;
200 }
201 
202 static int
204 {
205  int idx = str_find_encindex(enc);
206  if (idx < 0) {
207  rb_raise(rb_eArgError, "unknown encoding name - %"PRIsVALUE, enc);
208  }
209  return idx;
210 }
211 
212 static rb_encoding *
214 {
215  return rb_enc_from_index(str_to_encindex(enc));
216 }
217 
218 rb_encoding *
220 {
221  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
222  return str_to_encoding(enc);
223 }
224 
225 rb_encoding *
227 {
228  int idx;
229  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
230  idx = str_find_encindex(enc);
231  if (idx < 0) return NULL;
232  return rb_enc_from_index(idx);
233 }
234 
235 void
237 {
238 }
239 
240 static int
241 enc_table_expand(int newsize)
242 {
243  struct rb_encoding_entry *ent;
244  int count = newsize;
245 
246  if (enc_table.size >= newsize) return newsize;
247  newsize = (newsize + 7) / 8 * 8;
248  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
249  if (!ent) return -1;
250  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
251  enc_table.list = ent;
252  enc_table.size = newsize;
253  return count;
254 }
255 
256 static int
257 enc_register_at(int index, const char *name, rb_encoding *encoding)
258 {
259  struct rb_encoding_entry *ent = &enc_table.list[index];
260  VALUE list;
261 
262  if (!valid_encoding_name_p(name)) return -1;
263  if (!ent->name) {
264  ent->name = name = strdup(name);
265  }
266  else if (STRCASECMP(name, ent->name)) {
267  return -1;
268  }
269  if (!ent->enc) {
270  ent->enc = xmalloc(sizeof(rb_encoding));
271  }
272  if (encoding) {
273  *ent->enc = *encoding;
274  }
275  else {
276  memset(ent->enc, 0, sizeof(*ent->enc));
277  }
278  encoding = ent->enc;
279  encoding->name = name;
280  encoding->ruby_encoding_index = index;
281  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
282  list = rb_encoding_list;
283  if (list && NIL_P(rb_ary_entry(list, index))) {
284  /* initialize encoding data */
285  rb_ary_store(list, index, enc_new(encoding));
286  }
287  return index;
288 }
289 
290 static int
291 enc_register(const char *name, rb_encoding *encoding)
292 {
293  int index = enc_table.count;
294 
295  if ((index = enc_table_expand(index + 1)) < 0) return -1;
296  enc_table.count = index;
297  return enc_register_at(index - 1, name, encoding);
298 }
299 
300 static void set_encoding_const(const char *, rb_encoding *);
301 int rb_enc_registered(const char *name);
302 
303 int
304 rb_enc_register(const char *name, rb_encoding *encoding)
305 {
306  int index = rb_enc_registered(name);
307 
308  if (index >= 0) {
309  rb_encoding *oldenc = rb_enc_from_index(index);
310  if (STRCASECMP(name, rb_enc_name(oldenc))) {
311  index = enc_register(name, encoding);
312  }
313  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
314  enc_register_at(index, name, encoding);
315  }
316  else {
317  rb_raise(rb_eArgError, "encoding %s is already registered", name);
318  }
319  }
320  else {
321  index = enc_register(name, encoding);
323  }
324  return index;
325 }
326 
327 void
328 rb_encdb_declare(const char *name)
329 {
330  int idx = rb_enc_registered(name);
331  if (idx < 0) {
332  idx = enc_register(name, 0);
333  }
335 }
336 
337 static void
339 {
340  if (rb_enc_registered(name) >= 0) {
341  rb_raise(rb_eArgError, "encoding %s is already registered", name);
342  }
343 }
344 
345 static rb_encoding*
347 {
348  rb_encoding *enc = enc_table.list[index].enc;
349 
350  enc_table.list[index].base = base;
351  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
352  return enc;
353 }
354 
355 /* for encdb.h
356  * Set base encoding for encodings which are not replicas
357  * but not in their own files.
358  */
359 void
360 rb_enc_set_base(const char *name, const char *orig)
361 {
362  int idx = rb_enc_registered(name);
363  int origidx = rb_enc_registered(orig);
364  set_base_encoding(idx, rb_enc_from_index(origidx));
365 }
366 
367 /* for encdb.h
368  * Set encoding dummy.
369  */
370 int
372 {
373  rb_encoding *enc = enc_table.list[index].enc;
374 
375  ENC_SET_DUMMY(enc);
376  return index;
377 }
378 
379 int
380 rb_enc_replicate(const char *name, rb_encoding *encoding)
381 {
382  int idx;
383 
384  enc_check_duplication(name);
385  idx = enc_register(name, encoding);
386  set_base_encoding(idx, encoding);
388  return idx;
389 }
390 
391 /*
392  * call-seq:
393  * enc.replicate(name) -> encoding
394  *
395  * Returns a replicated encoding of _enc_ whose name is _name_.
396  * The new encoding should have the same byte structure of _enc_.
397  * If _name_ is used by another encoding, raise ArgumentError.
398  *
399  */
400 static VALUE
402 {
405  rb_to_encoding(encoding)));
406 }
407 
408 static int
409 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
410 {
411  if (idx < 0) {
412  idx = enc_register(name, origenc);
413  }
414  else {
415  idx = enc_register_at(idx, name, origenc);
416  }
417  if (idx >= 0) {
418  set_base_encoding(idx, origenc);
420  }
421  return idx;
422 }
423 
424 int
425 rb_encdb_replicate(const char *name, const char *orig)
426 {
427  int origidx = rb_enc_registered(orig);
428  int idx = rb_enc_registered(name);
429 
430  if (origidx < 0) {
431  origidx = enc_register(orig, 0);
432  }
433  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
434 }
435 
436 int
438 {
440  rb_encoding *enc = enc_table.list[index].enc;
441 
442  ENC_SET_DUMMY(enc);
443  return index;
444 }
445 
446 int
447 rb_encdb_dummy(const char *name)
448 {
450  rb_enc_registered(name));
451  rb_encoding *enc = enc_table.list[index].enc;
452 
453  ENC_SET_DUMMY(enc);
454  return index;
455 }
456 
457 /*
458  * call-seq:
459  * enc.dummy? -> true or false
460  *
461  * Returns true for dummy encodings.
462  * A dummy encoding is an encoding for which character handling is not properly
463  * implemented.
464  * It is used for stateful encodings.
465  *
466  * Encoding::ISO_2022_JP.dummy? #=> true
467  * Encoding::UTF_8.dummy? #=> false
468  *
469  */
470 static VALUE
472 {
473  return ENC_DUMMY_P(must_encoding(enc)) ? Qtrue : Qfalse;
474 }
475 
476 /*
477  * call-seq:
478  * enc.ascii_compatible? -> true or false
479  *
480  * Returns whether ASCII-compatible or not.
481  *
482  * Encoding::UTF_8.ascii_compatible? #=> true
483  * Encoding::UTF_16BE.ascii_compatible? #=> false
484  *
485  */
486 static VALUE
488 {
489  return rb_enc_asciicompat(must_encoding(enc)) ? Qtrue : Qfalse;
490 }
491 
492 /*
493  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
494  */
495 int
497 {
498  return ONIGENC_IS_UNICODE(enc);
499 }
500 
501 static st_data_t
503 {
504  return (st_data_t)strdup((const char *)name);
505 }
506 
507 /*
508  * Returns copied alias name when the key is added for st_table,
509  * else returns NULL.
510  */
511 static int
512 enc_alias_internal(const char *alias, int idx)
513 {
514  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
515  enc_dup_name);
516 }
517 
518 static int
519 enc_alias(const char *alias, int idx)
520 {
521  if (!valid_encoding_name_p(alias)) return -1;
522  if (!enc_alias_internal(alias, idx))
524  return idx;
525 }
526 
527 int
528 rb_enc_alias(const char *alias, const char *orig)
529 {
530  int idx;
531 
532  enc_check_duplication(alias);
533  if (!enc_table.list) {
534  rb_enc_init();
535  }
536  if ((idx = rb_enc_find_index(orig)) < 0) {
537  return -1;
538  }
539  return enc_alias(alias, idx);
540 }
541 
542 int
543 rb_encdb_alias(const char *alias, const char *orig)
544 {
545  int idx = rb_enc_registered(orig);
546 
547  if (idx < 0) {
548  idx = enc_register(orig, 0);
549  }
550  return enc_alias(alias, idx);
551 }
552 
553 void
555 {
557 }
558 
561 
562 void
564 {
566  if (!enc_table.names) {
568  }
569 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
572  ENC_REGISTER(US_ASCII);
573 #undef ENC_REGISTER
574 #define ENCDB_REGISTER(name, enc) enc_register_at(ENCINDEX_##enc, name, NULL)
575  ENCDB_REGISTER("UTF-16BE", UTF_16BE);
576  ENCDB_REGISTER("UTF-16LE", UTF_16LE);
577  ENCDB_REGISTER("UTF-32BE", UTF_32BE);
578  ENCDB_REGISTER("UTF-32LE", UTF_32LE);
579  ENCDB_REGISTER("UTF-16", UTF_16);
580  ENCDB_REGISTER("UTF-32", UTF_32);
581  ENCDB_REGISTER("UTF8-MAC", UTF8_MAC);
582 
583  ENCDB_REGISTER("EUC-JP", EUC_JP);
584  ENCDB_REGISTER("Windows-31J", Windows_31J);
585 #undef ENCDB_REGISTER
587 }
588 
589 rb_encoding *
591 {
592  if (!enc_table.list) {
593  rb_enc_init();
594  }
595  if (index < 0 || enc_table.count <= (index &= ENC_INDEX_MASK)) {
596  return 0;
597  }
598  return enc_table.list[index].enc;
599 }
600 
601 rb_encoding *
603 {
604  return must_encindex(index);
605 }
606 
607 int
609 {
610  st_data_t idx = 0;
611 
612  if (!name) return -1;
613  if (!enc_table.list) return -1;
614  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
615  return (int)idx;
616  }
617  return -1;
618 }
619 
620 static VALUE
622 {
623  int safe = rb_safe_level();
624  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
625 }
626 
627 static int
628 load_encoding(const char *name)
629 {
630  VALUE enclib = rb_sprintf("enc/%s.so", name);
631  VALUE verbose = ruby_verbose;
633  VALUE errinfo;
634  VALUE loaded;
635  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
636  int idx;
637 
638  while (s < e) {
639  if (!ISALNUM(*s)) *s = '_';
640  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
641  ++s;
642  }
643  FL_UNSET(enclib, FL_TAINT);
644  OBJ_FREEZE(enclib);
646  ruby_debug = Qfalse;
647  errinfo = rb_errinfo();
648  loaded = rb_protect(require_enc, enclib, 0);
649  ruby_verbose = verbose;
650  ruby_debug = debug;
651  rb_set_errinfo(errinfo);
652  if (NIL_P(loaded)) return -1;
653  if ((idx = rb_enc_registered(name)) < 0) return -1;
654  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
655  return idx;
656 }
657 
658 static int
660 {
661  int i;
662  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
663 
664  if (base) {
665  i = 0;
666  do {
667  if (i >= enc_table.count) return -1;
668  } while (enc_table.list[i].enc != base && (++i, 1));
669  if (enc_autoload_p(base)) {
670  if (enc_autoload(base) < 0) return -1;
671  }
672  i = enc->ruby_encoding_index;
673  enc_register_at(i & ENC_INDEX_MASK, rb_enc_name(enc), base);
674  enc->ruby_encoding_index = i;
675  }
676  else {
677  i = load_encoding(rb_enc_name(enc));
678  }
679  return i;
680 }
681 
682 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
683 int
685 {
686  int i = rb_enc_registered(name);
687  rb_encoding *enc;
688 
689  if (i < 0) {
690  i = load_encoding(name);
691  }
692  else if (!(enc = rb_enc_from_index(i))) {
693  if (i != UNSPECIFIED_ENCODING) {
694  rb_raise(rb_eArgError, "encoding %s is not registered", name);
695  }
696  }
697  else if (enc_autoload_p(enc)) {
698  if (enc_autoload(enc) < 0) {
699  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
700  name);
701  return 0;
702  }
703  }
704  return i;
705 }
706 
707 rb_encoding *
708 rb_enc_find(const char *name)
709 {
710  int idx = rb_enc_find_index(name);
711  if (idx < 0) idx = 0;
712  return rb_enc_from_index(idx);
713 }
714 
715 static inline int
717 {
718  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
719  switch (BUILTIN_TYPE(obj)) {
720  case T_STRING:
721  case T_REGEXP:
722  case T_FILE:
723  return TRUE;
724  case T_DATA:
725  if (is_data_encoding(obj)) return TRUE;
726  default:
727  return FALSE;
728  }
729 }
730 
731 ID
733 {
734  CONST_ID(id_encoding, "encoding");
735  return id_encoding;
736 }
737 
738 int
740 {
741  int i = -1;
742  VALUE tmp;
743 
744  if (SPECIAL_CONST_P(obj)) {
745  if (!SYMBOL_P(obj)) return -1;
746  obj = rb_id2str(SYM2ID(obj));
747  }
748  switch (BUILTIN_TYPE(obj)) {
749  as_default:
750  default:
751  case T_STRING:
752  case T_REGEXP:
753  i = ENCODING_GET_INLINED(obj);
754  if (i == ENCODING_INLINE_MAX) {
755  VALUE iv;
756 
757  iv = rb_ivar_get(obj, rb_id_encoding());
758  i = NUM2INT(iv);
759  }
760  break;
761  case T_FILE:
762  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
763  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
764  else obj = tmp;
765  if (NIL_P(obj)) break;
766  case T_DATA:
767  if (is_data_encoding(obj)) {
768  i = enc_check_encoding(obj);
769  }
770  else {
771  goto as_default;
772  }
773  break;
774  }
775  return i;
776 }
777 
778 static void
780 {
781  if (idx < ENCODING_INLINE_MAX) {
782  ENCODING_SET_INLINED(obj, idx);
783  return;
784  }
786  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
787 }
788 
789 void
791 {
792  rb_check_frozen(obj);
793  must_encindex(idx);
794  enc_set_index(obj, idx);
795 }
796 
797 VALUE
799 {
800  rb_encoding *enc;
801  int oldidx, oldtermlen, termlen;
802 
803 /* enc_check_capable(obj);*/
804  rb_check_frozen(obj);
805  oldidx = rb_enc_get_index(obj);
806  if (oldidx == idx)
807  return obj;
808  if (SPECIAL_CONST_P(obj)) {
809  rb_raise(rb_eArgError, "cannot set encoding");
810  }
811  enc = must_encindex(idx);
812  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
813  !rb_enc_asciicompat(enc)) {
814  ENC_CODERANGE_CLEAR(obj);
815  }
816  termlen = rb_enc_mbminlen(enc);
817  oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
818  if (oldtermlen < termlen && RB_TYPE_P(obj, T_STRING)) {
819  rb_str_fill_terminator(obj, termlen);
820  }
821  enc_set_index(obj, idx);
822  return obj;
823 }
824 
825 VALUE
827 {
828  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
829 }
830 
833 {
834  return rb_enc_from_index(rb_enc_get_index(obj));
835 }
836 
839 {
840  rb_encoding *enc = rb_enc_compatible(str1, str2);
841  if (!enc)
842  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
843  rb_enc_name(rb_enc_get(str1)),
844  rb_enc_name(rb_enc_get(str2)));
845  return enc;
846 }
847 
850 {
851  int idx1, idx2;
852  rb_encoding *enc1, *enc2;
853  int isstr1, isstr2;
854 
855  idx1 = rb_enc_get_index(str1);
856  idx2 = rb_enc_get_index(str2);
857 
858  if (idx1 < 0 || idx2 < 0)
859  return 0;
860 
861  if (idx1 == idx2) {
862  return rb_enc_from_index(idx1);
863  }
864  enc1 = rb_enc_from_index(idx1);
865  enc2 = rb_enc_from_index(idx2);
866 
867  isstr2 = RB_TYPE_P(str2, T_STRING);
868  if (isstr2 && RSTRING_LEN(str2) == 0)
869  return enc1;
870  isstr1 = RB_TYPE_P(str1, T_STRING);
871  if (isstr1 && RSTRING_LEN(str1) == 0)
872  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
873  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
874  return 0;
875  }
876 
877  /* objects whose encoding is the same of contents */
878  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
879  return enc1;
880  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
881  return enc2;
882 
883  if (!isstr1) {
884  VALUE tmp = str1;
885  int idx0 = idx1;
886  str1 = str2;
887  str2 = tmp;
888  idx1 = idx2;
889  idx2 = idx0;
890  idx0 = isstr1;
891  isstr1 = isstr2;
892  isstr2 = idx0;
893  }
894  if (isstr1) {
895  int cr1, cr2;
896 
897  cr1 = rb_enc_str_coderange(str1);
898  if (isstr2) {
899  cr2 = rb_enc_str_coderange(str2);
900  if (cr1 != cr2) {
901  /* may need to handle ENC_CODERANGE_BROKEN */
902  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
903  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
904  }
905  if (cr2 == ENC_CODERANGE_7BIT) {
906  return enc1;
907  }
908  }
909  if (cr1 == ENC_CODERANGE_7BIT)
910  return enc2;
911  }
912  return 0;
913 }
914 
915 void
917 {
919 }
920 
921 
922 /*
923  * call-seq:
924  * obj.encoding -> encoding
925  *
926  * Returns the Encoding object that represents the encoding of obj.
927  */
928 
929 VALUE
931 {
932  int idx = rb_enc_get_index(obj);
933  if (idx < 0) {
934  rb_raise(rb_eTypeError, "unknown encoding");
935  }
937 }
938 
939 int
940 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
941 {
942  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
943 }
944 
945 int
946 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
947 {
948  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
949  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
950  return MBCLEN_CHARFOUND_LEN(n);
951  else {
952  int min = rb_enc_mbminlen(enc);
953  return min <= e-p ? min : (int)(e-p);
954  }
955 }
956 
957 int
958 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
959 {
960  int n;
961  if (e <= p)
963  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
964  if (e-p < n)
965  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
966  return n;
967 }
968 
969 int
970 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
971 {
972  unsigned int c, l;
973  if (e <= p)
974  return -1;
975  if (rb_enc_asciicompat(enc)) {
976  c = (unsigned char)*p;
977  if (!ISASCII(c))
978  return -1;
979  if (len) *len = 1;
980  return c;
981  }
982  l = rb_enc_precise_mbclen(p, e, enc);
983  if (!MBCLEN_CHARFOUND_P(l))
984  return -1;
985  c = rb_enc_mbc_to_codepoint(p, e, enc);
986  if (!rb_enc_isascii(c, enc))
987  return -1;
988  if (len) *len = l;
989  return c;
990 }
991 
992 unsigned int
993 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
994 {
995  int r;
996  if (e <= p)
997  rb_raise(rb_eArgError, "empty string");
998  r = rb_enc_precise_mbclen(p, e, enc);
999  if (!MBCLEN_CHARFOUND_P(r)) {
1000  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
1001  }
1002  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
1003  return rb_enc_mbc_to_codepoint(p, e, enc);
1004 }
1005 
1006 #undef rb_enc_codepoint
1007 unsigned int
1008 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
1009 {
1010  return rb_enc_codepoint_len(p, e, 0, enc);
1011 }
1012 
1013 int
1015 {
1016  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
1017  if (n == 0) {
1018  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
1019  }
1020  return n;
1021 }
1022 
1023 #undef rb_enc_code_to_mbclen
1024 int
1026 {
1027  return ONIGENC_CODE_TO_MBCLEN(enc, code);
1028 }
1029 
1030 int
1032 {
1034 }
1035 
1036 int
1038 {
1040 }
1041 
1042 /*
1043  * call-seq:
1044  * enc.inspect -> string
1045  *
1046  * Returns a string which represents the encoding for programmers.
1047  *
1048  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1049  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1050  */
1051 static VALUE
1053 {
1054  rb_encoding *enc;
1055 
1056  if (!is_data_encoding(self)) {
1057  not_encoding(self);
1058  }
1059  if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
1060  rb_raise(rb_eTypeError, "broken Encoding");
1061  }
1063  "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
1064  rb_enc_name(enc),
1065  (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
1066  enc_autoload_p(enc) ? " (autoload)" : "");
1067 }
1068 
1069 /*
1070  * call-seq:
1071  * enc.name -> string
1072  * enc.to_s -> string
1073  *
1074  * Returns the name of the encoding.
1075  *
1076  * Encoding::UTF_8.name #=> "UTF-8"
1077  */
1078 static VALUE
1080 {
1082 }
1083 
1084 static int
1086 {
1087  VALUE *arg = (VALUE *)args;
1088 
1089  if ((int)idx == (int)arg[0]) {
1090  VALUE str = rb_usascii_str_new2((char *)name);
1091  OBJ_FREEZE(str);
1092  rb_ary_push(arg[1], str);
1093  }
1094  return ST_CONTINUE;
1095 }
1096 
1097 /*
1098  * call-seq:
1099  * enc.names -> array
1100  *
1101  * Returns the list of name and aliases of the encoding.
1102  *
1103  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1104  */
1105 static VALUE
1107 {
1108  VALUE args[2];
1109 
1110  args[0] = (VALUE)rb_to_encoding_index(self);
1111  args[1] = rb_ary_new2(0);
1112  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1113  return args[1];
1114 }
1115 
1116 /*
1117  * call-seq:
1118  * Encoding.list -> [enc1, enc2, ...]
1119  *
1120  * Returns the list of loaded encodings.
1121  *
1122  * Encoding.list
1123  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1124  * #<Encoding:ISO-2022-JP (dummy)>]
1125  *
1126  * Encoding.find("US-ASCII")
1127  * #=> #<Encoding:US-ASCII>
1128  *
1129  * Encoding.list
1130  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1131  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1132  *
1133  */
1134 static VALUE
1136 {
1137  VALUE ary = rb_ary_new2(0);
1139  return ary;
1140 }
1141 
1142 /*
1143  * call-seq:
1144  * Encoding.find(string) -> enc
1145  *
1146  * Search the encoding with specified <i>name</i>.
1147  * <i>name</i> should be a string.
1148  *
1149  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1150  *
1151  * Names which this method accept are encoding names and aliases
1152  * including following special aliases
1153  *
1154  * "external":: default external encoding
1155  * "internal":: default internal encoding
1156  * "locale":: locale encoding
1157  * "filesystem":: filesystem encoding
1158  *
1159  * An ArgumentError is raised when no encoding with <i>name</i>.
1160  * Only <code>Encoding.find("internal")</code> however returns nil
1161  * when no encoding named "internal", in other words, when Ruby has no
1162  * default internal encoding.
1163  */
1164 static VALUE
1166 {
1167  int idx;
1168  if (is_obj_encoding(enc))
1169  return enc;
1170  idx = str_to_encindex(enc);
1171  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1172  return rb_enc_from_encoding_index(idx);
1173 }
1174 
1175 /*
1176  * call-seq:
1177  * Encoding.compatible?(obj1, obj2) -> enc or nil
1178  *
1179  * Checks the compatibility of two objects.
1180  *
1181  * If the objects are both strings they are compatible when they are
1182  * concatenatable. The encoding of the concatenated string will be returned
1183  * if they are compatible, nil if they are not.
1184  *
1185  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1186  * #=> #<Encoding:ISO-8859-1>
1187  *
1188  * Encoding.compatible?(
1189  * "\xa1".force_encoding("iso-8859-1"),
1190  * "\xa1\xa1".force_encoding("euc-jp"))
1191  * #=> nil
1192  *
1193  * If the objects are non-strings their encodings are compatible when they
1194  * have an encoding and:
1195  * * Either encoding is US-ASCII compatible
1196  * * One of the encodings is a 7-bit encoding
1197  *
1198  */
1199 static VALUE
1201 {
1202  rb_encoding *enc;
1203 
1204  if (!enc_capable(str1)) return Qnil;
1205  if (!enc_capable(str2)) return Qnil;
1206  enc = rb_enc_compatible(str1, str2);
1207  if (!enc) return Qnil;
1208  return rb_enc_from_encoding(enc);
1209 }
1210 
1211 /* :nodoc: */
1212 static VALUE
1214 {
1215  rb_undefined_alloc(klass);
1216  return Qnil;
1217 }
1218 
1219 /* :nodoc: */
1220 static VALUE
1222 {
1223  rb_scan_args(argc, argv, "01", 0);
1224  return enc_name(self);
1225 }
1226 
1227 /* :nodoc: */
1228 static VALUE
1230 {
1231  return str;
1232 }
1233 
1234 /* :nodoc: */
1235 static VALUE
1237 {
1238  return enc_find(klass, str);
1239 }
1240 
1241 rb_encoding *
1243 {
1244  if (!enc_table.list) {
1245  rb_enc_init();
1246  }
1247  return enc_table.list[ENCINDEX_ASCII].enc;
1248 }
1249 
1250 int
1252 {
1253  return ENCINDEX_ASCII;
1254 }
1255 
1256 rb_encoding *
1258 {
1259  if (!enc_table.list) {
1260  rb_enc_init();
1261  }
1262  return enc_table.list[ENCINDEX_UTF_8].enc;
1263 }
1264 
1265 int
1267 {
1268  return ENCINDEX_UTF_8;
1269 }
1270 
1271 rb_encoding *
1273 {
1274  if (!enc_table.list) {
1275  rb_enc_init();
1276  }
1277  return enc_table.list[ENCINDEX_US_ASCII].enc;
1278 }
1279 
1280 int
1282 {
1283  return ENCINDEX_US_ASCII;
1284 }
1285 
1286 int
1288 {
1290  int idx;
1291 
1292  if (NIL_P(charmap))
1293  idx = ENCINDEX_US_ASCII;
1294  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1295  idx = ENCINDEX_ASCII;
1296 
1297  if (rb_enc_registered("locale") < 0) {
1298 # if defined _WIN32
1299  void Init_w32_codepage(void);
1301 # endif
1302  enc_alias_internal("locale", idx);
1303  }
1304 
1305  return idx;
1306 }
1307 
1308 rb_encoding *
1310 {
1312 }
1313 
1314 int
1316 {
1317  int idx = rb_enc_registered("filesystem");
1318  if (idx < 0)
1319  idx = ENCINDEX_ASCII;
1320  return idx;
1321 }
1322 
1323 rb_encoding *
1325 {
1327 }
1328 
1330  int index; /* -2 => not yet set, -1 => nil */
1332 };
1333 
1335 
1336 extern int Init_enc_set_filesystem_encoding(void);
1337 
1338 static int
1339 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1340 {
1341  int overridden = FALSE;
1342 
1343  if (def->index != -2)
1344  /* Already set */
1345  overridden = TRUE;
1346 
1347  if (NIL_P(encoding)) {
1348  def->index = -1;
1349  def->enc = 0;
1350  st_insert(enc_table.names, (st_data_t)strdup(name),
1352  }
1353  else {
1354  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1355  def->enc = 0;
1356  enc_alias_internal(name, def->index);
1357  }
1358 
1359  if (def == &default_external)
1361 
1362  return overridden;
1363 }
1364 
1365 rb_encoding *
1367 {
1368  if (default_external.enc) return default_external.enc;
1369 
1370  if (default_external.index >= 0) {
1371  default_external.enc = rb_enc_from_index(default_external.index);
1372  return default_external.enc;
1373  }
1374  else {
1375  return rb_locale_encoding();
1376  }
1377 }
1378 
1379 VALUE
1381 {
1383 }
1384 
1385 /*
1386  * call-seq:
1387  * Encoding.default_external -> enc
1388  *
1389  * Returns default external encoding.
1390  *
1391  * The default external encoding is used by default for strings created from
1392  * the following locations:
1393  *
1394  * * CSV
1395  * * File data read from disk
1396  * * SDBM
1397  * * StringIO
1398  * * Zlib::GzipReader
1399  * * Zlib::GzipWriter
1400  * * String#inspect
1401  * * Regexp#inspect
1402  *
1403  * While strings created from these locations will have this encoding, the
1404  * encoding may not be valid. Be sure to check String#valid_encoding?.
1405  *
1406  * File data written to disk will be transcoded to the default external
1407  * encoding when written.
1408  *
1409  * The default external encoding is initialized by the locale or -E option.
1410  */
1411 static VALUE
1413 {
1414  return rb_enc_default_external();
1415 }
1416 
1417 void
1419 {
1420  if (NIL_P(encoding)) {
1421  rb_raise(rb_eArgError, "default external can not be nil");
1422  }
1423  enc_set_default_encoding(&default_external, encoding,
1424  "external");
1425 }
1426 
1427 /*
1428  * call-seq:
1429  * Encoding.default_external = enc
1430  *
1431  * Sets default external encoding. You should not set
1432  * Encoding::default_external in ruby code as strings created before changing
1433  * the value may have a different encoding from strings created after the value
1434  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1435  * the correct default_external.
1436  *
1437  * See Encoding::default_external for information on how the default external
1438  * encoding is used.
1439  */
1440 static VALUE
1442 {
1443  rb_warning("setting Encoding.default_external");
1444  rb_enc_set_default_external(encoding);
1445  return encoding;
1446 }
1447 
1448 static struct default_encoding default_internal = {-2};
1449 
1450 rb_encoding *
1452 {
1453  if (!default_internal.enc && default_internal.index >= 0) {
1454  default_internal.enc = rb_enc_from_index(default_internal.index);
1455  }
1456  return default_internal.enc; /* can be NULL */
1457 }
1458 
1459 VALUE
1461 {
1462  /* Note: These functions cope with default_internal not being set */
1464 }
1465 
1466 /*
1467  * call-seq:
1468  * Encoding.default_internal -> enc
1469  *
1470  * Returns default internal encoding. Strings will be transcoded to the
1471  * default internal encoding in the following places if the default internal
1472  * encoding is not nil:
1473  *
1474  * * CSV
1475  * * Etc.sysconfdir and Etc.systmpdir
1476  * * File data read from disk
1477  * * File names from Dir
1478  * * Integer#chr
1479  * * String#inspect and Regexp#inspect
1480  * * Strings returned from Readline
1481  * * Strings returned from SDBM
1482  * * Time#zone
1483  * * Values from ENV
1484  * * Values in ARGV including $PROGRAM_NAME
1485  * * __FILE__
1486  *
1487  * Additionally String#encode and String#encode! use the default internal
1488  * encoding if no encoding is given.
1489  *
1490  * The locale encoding (__ENCODING__), not default_internal, is used as the
1491  * encoding of created strings.
1492  *
1493  * Encoding::default_internal is initialized by the source file's
1494  * internal_encoding or -E option.
1495  */
1496 static VALUE
1498 {
1499  return rb_enc_default_internal();
1500 }
1501 
1502 void
1504 {
1505  enc_set_default_encoding(&default_internal, encoding,
1506  "internal");
1507 }
1508 
1509 /*
1510  * call-seq:
1511  * Encoding.default_internal = enc or nil
1512  *
1513  * Sets default internal encoding or removes default internal encoding when
1514  * passed nil. You should not set Encoding::default_internal in ruby code as
1515  * strings created before changing the value may have a different encoding
1516  * from strings created after the change. Instead you should use
1517  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1518  *
1519  * See Encoding::default_internal for information on how the default internal
1520  * encoding is used.
1521  */
1522 static VALUE
1524 {
1525  rb_warning("setting Encoding.default_internal");
1526  rb_enc_set_default_internal(encoding);
1527  return encoding;
1528 }
1529 
1530 /*
1531  * call-seq:
1532  * Encoding.locale_charmap -> string
1533  *
1534  * Returns the locale charmap name.
1535  * It returns nil if no appropriate information.
1536  *
1537  * Debian GNU/Linux
1538  * LANG=C
1539  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1540  * LANG=ja_JP.EUC-JP
1541  * Encoding.locale_charmap #=> "EUC-JP"
1542  *
1543  * SunOS 5
1544  * LANG=C
1545  * Encoding.locale_charmap #=> "646"
1546  * LANG=ja
1547  * Encoding.locale_charmap #=> "eucJP"
1548  *
1549  * The result is highly platform dependent.
1550  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1551  * If you need some encoding object even for unknown locale,
1552  * Encoding.find("locale") can be used.
1553  *
1554  */
1555 VALUE
1557 
1558 static void
1560 {
1561  VALUE encoding = rb_enc_from_encoding(enc);
1562  char *s = (char *)name;
1563  int haslower = 0, hasupper = 0, valid = 0;
1564 
1565  if (ISDIGIT(*s)) return;
1566  if (ISUPPER(*s)) {
1567  hasupper = 1;
1568  while (*++s && (ISALNUM(*s) || *s == '_')) {
1569  if (ISLOWER(*s)) haslower = 1;
1570  }
1571  }
1572  if (!*s) {
1573  if (s - name > ENCODING_NAMELEN_MAX) return;
1574  valid = 1;
1575  rb_define_const(rb_cEncoding, name, encoding);
1576  }
1577  if (!valid || haslower) {
1578  size_t len = s - name;
1579  if (len > ENCODING_NAMELEN_MAX) return;
1580  if (!haslower || !hasupper) {
1581  do {
1582  if (ISLOWER(*s)) haslower = 1;
1583  if (ISUPPER(*s)) hasupper = 1;
1584  } while (*++s && (!haslower || !hasupper));
1585  len = s - name;
1586  }
1587  len += strlen(s);
1588  if (len++ > ENCODING_NAMELEN_MAX) return;
1589  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1590  name = s;
1591  if (!valid) {
1592  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1593  for (; *s; ++s) {
1594  if (!ISALNUM(*s)) *s = '_';
1595  }
1596  if (hasupper) {
1597  rb_define_const(rb_cEncoding, name, encoding);
1598  }
1599  }
1600  if (haslower) {
1601  for (s = (char *)name; *s; ++s) {
1602  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1603  }
1604  rb_define_const(rb_cEncoding, name, encoding);
1605  }
1606  }
1607 }
1608 
1609 static int
1611 {
1612  VALUE ary = (VALUE)arg;
1613  VALUE str = rb_usascii_str_new2((char *)name);
1614  OBJ_FREEZE(str);
1615  rb_ary_push(ary, str);
1616  return ST_CONTINUE;
1617 }
1618 
1619 /*
1620  * call-seq:
1621  * Encoding.name_list -> ["enc1", "enc2", ...]
1622  *
1623  * Returns the list of available encoding names.
1624  *
1625  * Encoding.name_list
1626  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1627  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1628  * "Windows-31J",
1629  * "BINARY", "CP932", "eucJP"]
1630  *
1631  */
1632 
1633 static VALUE
1635 {
1636  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1638  return ary;
1639 }
1640 
1641 static int
1643 {
1644  VALUE *p = (VALUE *)arg;
1645  VALUE aliases = p[0], ary = p[1];
1646  int idx = (int)orig;
1647  VALUE key, str = rb_ary_entry(ary, idx);
1648 
1649  if (NIL_P(str)) {
1651 
1652  if (!enc) return ST_CONTINUE;
1653  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1654  return ST_CONTINUE;
1655  }
1656  str = rb_usascii_str_new2(rb_enc_name(enc));
1657  OBJ_FREEZE(str);
1658  rb_ary_store(ary, idx, str);
1659  }
1660  key = rb_usascii_str_new2((char *)name);
1661  OBJ_FREEZE(key);
1662  rb_hash_aset(aliases, key, str);
1663  return ST_CONTINUE;
1664 }
1665 
1666 /*
1667  * call-seq:
1668  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1669  *
1670  * Returns the hash of available encoding alias and original encoding name.
1671  *
1672  * Encoding.aliases
1673  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1674  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1675  *
1676  */
1677 
1678 static VALUE
1680 {
1681  VALUE aliases[2];
1682  aliases[0] = rb_hash_new();
1683  aliases[1] = rb_ary_new();
1685  return aliases[0];
1686 }
1687 
1688 /*
1689  * An Encoding instance represents a character encoding usable in Ruby. It is
1690  * defined as a constant under the Encoding namespace. It has a name and
1691  * optionally, aliases:
1692  *
1693  * Encoding::ISO_8859_1.name
1694  * #=> #<Encoding:ISO-8859-1>
1695  *
1696  * Encoding::ISO_8859_1.names
1697  * #=> ["ISO-8859-1", "ISO8859-1"]
1698  *
1699  * Ruby methods dealing with encodings return or accept Encoding instances as
1700  * arguments (when a method accepts an Encoding instance as an argument, it
1701  * can be passed an Encoding name or alias instead).
1702  *
1703  * "some string".encoding
1704  * #=> #<Encoding:UTF-8>
1705  *
1706  * string = "some string".encode(Encoding::ISO_8859_1)
1707  * #=> "some string"
1708  * string.encoding
1709  * #=> #<Encoding:ISO-8859-1>
1710  *
1711  * "some string".encode "ISO-8859-1"
1712  * #=> "some string"
1713  *
1714  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1715  * used for a byte string, not a character string. But as the name insists,
1716  * its characters in the range of ASCII are considered as ASCII characters.
1717  * This is useful when you use ASCII-8BIT characters with other ASCII
1718  * compatible characters.
1719  *
1720  * == Changing an encoding
1721  *
1722  * The associated Encoding of a String can be changed in two different ways.
1723  *
1724  * First, it is possible to set the Encoding of a string to a new Encoding
1725  * without changing the internal byte representation of the string, with
1726  * String#force_encoding. This is how you can tell Ruby the correct encoding
1727  * of a string.
1728  *
1729  * string
1730  * #=> "R\xC3\xA9sum\xC3\xA9"
1731  * string.encoding
1732  * #=> #<Encoding:ISO-8859-1>
1733  * string.force_encoding(Encoding::UTF_8)
1734  * #=> "R\u00E9sum\u00E9"
1735  *
1736  * Second, it is possible to transcode a string, i.e. translate its internal
1737  * byte representation to another encoding. Its associated encoding is also
1738  * set to the other encoding. See String#encode for the various forms of
1739  * transcoding, and the Encoding::Converter class for additional control over
1740  * the transcoding process.
1741  *
1742  * string
1743  * #=> "R\u00E9sum\u00E9"
1744  * string.encoding
1745  * #=> #<Encoding:UTF-8>
1746  * string = string.encode!(Encoding::ISO_8859_1)
1747  * #=> "R\xE9sum\xE9"
1748  * string.encoding
1749  * #=> #<Encoding::ISO-8859-1>
1750  *
1751  * == Script encoding
1752  *
1753  * All Ruby script code has an associated Encoding which any String literal
1754  * created in the source code will be associated to.
1755  *
1756  * The default script encoding is <code>Encoding::UTF-8</code> after v2.0, but it can
1757  * be changed by a magic comment on the first line of the source code file (or
1758  * second line, if there is a shebang line on the first). The comment must
1759  * contain the word <code>coding</code> or <code>encoding</code>, followed
1760  * by a colon, space and the Encoding name or alias:
1761  *
1762  * # encoding: UTF-8
1763  *
1764  * "some string".encoding
1765  * #=> #<Encoding:UTF-8>
1766  *
1767  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1768  * which the keyword is written:
1769  *
1770  * # encoding: ISO-8859-1
1771  *
1772  * __ENCODING__
1773  * #=> #<Encoding:ISO-8859-1>
1774  *
1775  * <code>ruby -K</code> will change the default locale encoding, but this is
1776  * not recommended. Ruby source files should declare its script encoding by a
1777  * magic comment even when they only depend on US-ASCII strings or regular
1778  * expressions.
1779  *
1780  * == Locale encoding
1781  *
1782  * The default encoding of the environment. Usually derived from locale.
1783  *
1784  * see Encoding.locale_charmap, Encoding.find('locale')
1785  *
1786  * == Filesystem encoding
1787  *
1788  * The default encoding of strings from the filesystem of the environment.
1789  * This is used for strings of file names or paths.
1790  *
1791  * see Encoding.find('filesystem')
1792  *
1793  * == External encoding
1794  *
1795  * Each IO object has an external encoding which indicates the encoding that
1796  * Ruby will use to read its data. By default Ruby sets the external encoding
1797  * of an IO object to the default external encoding. The default external
1798  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1799  * Encoding.default_external returns the current value of the external
1800  * encoding.
1801  *
1802  * ENV["LANG"]
1803  * #=> "UTF-8"
1804  * Encoding.default_external
1805  * #=> #<Encoding:UTF-8>
1806  *
1807  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1808  * #<Encoding:ISO-8859-1>
1809  *
1810  * $ LANG=C ruby -e 'p Encoding.default_external'
1811  * #<Encoding:US-ASCII>
1812  *
1813  * The default external encoding may also be set through
1814  * Encoding.default_external=, but you should not do this as strings created
1815  * before and after the change will have inconsistent encodings. Instead use
1816  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1817  *
1818  * When you know that the actual encoding of the data of an IO object is not
1819  * the default external encoding, you can reset its external encoding with
1820  * IO#set_encoding or set it at IO object creation (see IO.new options).
1821  *
1822  * == Internal encoding
1823  *
1824  * To process the data of an IO object which has an encoding different
1825  * from its external encoding, you can set its internal encoding. Ruby will use
1826  * this internal encoding to transcode the data when it is read from the IO
1827  * object.
1828  *
1829  * Conversely, when data is written to the IO object it is transcoded from the
1830  * internal encoding to the external encoding of the IO object.
1831  *
1832  * The internal encoding of an IO object can be set with
1833  * IO#set_encoding or at IO object creation (see IO.new options).
1834  *
1835  * The internal encoding is optional and when not set, the Ruby default
1836  * internal encoding is used. If not explicitly set this default internal
1837  * encoding is +nil+ meaning that by default, no transcoding occurs.
1838  *
1839  * The default internal encoding can be set with the interpreter option
1840  * <code>-E</code>. Encoding.default_internal returns the current internal
1841  * encoding.
1842  *
1843  * $ ruby -e 'p Encoding.default_internal'
1844  * nil
1845  *
1846  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1847  * Encoding.default_internal]"
1848  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1849  *
1850  * The default internal encoding may also be set through
1851  * Encoding.default_internal=, but you should not do this as strings created
1852  * before and after the change will have inconsistent encodings. Instead use
1853  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1854  *
1855  * == IO encoding example
1856  *
1857  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1858  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1859  *
1860  * string = "R\u00E9sum\u00E9"
1861  *
1862  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1863  * io.write(string)
1864  * end
1865  *
1866  * puts "raw text:"
1867  * p File.binread("transcoded.txt")
1868  * puts
1869  *
1870  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1871  * puts "transcoded text:"
1872  * p io.read
1873  * end
1874  *
1875  * While writing the file, the internal encoding is not specified as it is
1876  * only necessary for reading. While reading the file both the internal and
1877  * external encoding must be specified to obtain the correct result.
1878  *
1879  * $ ruby t.rb
1880  * raw text:
1881  * "R\xE9sum\xE9"
1882  *
1883  * transcoded text:
1884  * "R\u00E9sum\u00E9"
1885  *
1886  */
1887 
1888 void
1890 {
1891 #undef rb_intern
1892 #define rb_intern(str) rb_intern_const(str)
1893  VALUE list;
1894  int i;
1895 
1896  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1899  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1900  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1901  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1902  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1903  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1904  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1905  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1911 
1912  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1914 
1920 
1921  list = rb_ary_new2(enc_table.count);
1922  RBASIC_CLEAR_CLASS(list);
1925 
1926  for (i = 0; i < enc_table.count; ++i) {
1927  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1928  }
1929 
1931 }
1932 
1933 /* locale insensitive ctype functions */
1934 
1935 #define ctype_test(c, ctype) \
1936  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1937 
1938 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1939 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1940 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1941 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1942 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1943 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1944 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1945 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1946 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1947 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1948 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1950 
1951 int
1953 {
1955 }
1956 
1957 int
1959 {
1961 }
1962 
1963 void
1965 {
1966  st_foreach(enc_table.names, func, arg);
1967 }
VALUE data
Definition: tcltklib.c:3360
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:779
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1610
#define RB_TYPE_P(obj, type)
rb_encoding OnigEncodingUS_ASCII
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: ripper.y:20
#define ISDIGIT(c)
Definition: ruby.h:1783
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1014
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:739
#define is_obj_encoding(obj)
Definition: encoding.c:78
#define ONIGENC_CTYPE_GRAPH
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:838
VP_EXPORT int
Definition: bigdecimal.c:5172
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1179
void rb_bug(const char *fmt,...)
Definition: error.c:327
VALUE rb_require_safe(VALUE, int)
Definition: load.c:945
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:916
#define FALSE
Definition: nkf.h:174
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:360
static VALUE enc_m_loader(VALUE klass, VALUE str)
Definition: encoding.c:1236
code
Definition: tcltklib.c:3373
size_t strlen(const char *)
gz enc2
Definition: zlib.c:2274
VALUE rb_id2str(ID id)
Definition: ripper.c:17201
#define RSTRING_END(str)
VALUE rb_cEncoding
Definition: encoding.c:37
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1229
int count
Definition: encoding.c:48
#define ONIGENC_CTYPE_PUNCT
int st_lookup(st_table *, st_data_t, st_data_t *)
int ruby_encoding_index
Definition: ripper.y:176
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1646
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1642
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:1052
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:346
#define ENC_SET_DUMMY(enc)
#define ONIGENC_CTYPE_XDIGIT
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1634
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:257
rb_funcall(memo->yielder, id_lshift, 1, rb_assoc_new(memo->prev_value, memo->prev_elts))
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:113
#define rb_usascii_str_new2
#define rb_enc_codepoint(p, e, enc)
int rb_toupper(int c)
Definition: encoding.c:1958
void Init_Encoding(void)
Definition: encoding.c:1889
#define UChar
#define rb_check_frozen(obj)
static int str_to_encindex(VALUE enc)
Definition: encoding.c:203
#define rb_enc_name(enc)
#define ONIGENC_CTYPE_ALNUM
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:219
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1037
VALUE rb_eTypeError
Definition: error.c:548
#define OBJ_FREEZE(x)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
st_table * names
Definition: encoding.c:50
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1451
VALUE enc
Definition: tcltklib.c:10318
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:900
#define ONIGENC_CTYPE_LOWER
st_table * st_init_strcasetable(void)
Definition: st.c:296
VALUE rb_eEncodingError
Definition: error.c:554
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:849
static VALUE enc_names(VALUE self)
Definition: encoding.c:1106
static struct @5 enc_table
#define RSTRING_PTR(str)
#define CLASS_OF(v)
static rb_encoding * must_encindex(int index)
Definition: encoding.c:152
NIL_P(eventloop_thread)
Definition: tcltklib.c:4056
int rb_isblank(int c)
Definition: encoding.c:1940
int safe
Definition: tcltklib.c:6418
static int enc_table_expand(int newsize)
Definition: encoding.c:241
Definition: nkf.c:115
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:807
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:993
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:81
void * realloc()
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1857
#define ONIGENC_CTYPE_SPACE
unsigned int flags
Definition: ripper.y:177
return Qtrue
Definition: tcltklib.c:9618
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1418
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:826
VALUE rb_obj_class(VALUE)
Definition: object.c:226
int rb_isupper(int c)
Definition: encoding.c:1948
#define T_FILE
int rb_enc_set_dummy(int index)
Definition: encoding.c:371
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1679
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1441
int index
Definition: tcltklib.c:4468
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:946
int rb_isprint(int c)
Definition: encoding.c:1945
VALUE rb_locale_charmap(VALUE klass)
Definition: localeinit.c:23
VALUE enc_name
Definition: tcltklib.c:8441
const char * alias
Definition: nkf.c:1151
#define rb_enc_to_index(enc)
r
Definition: bigdecimal.c:1212
#define ENC_REGISTER(enc)
tmp
Definition: enum.c:447
int rb_enc_registered(const char *name)
Definition: encoding.c:608
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
ID rb_id_encoding(void)
Definition: encoding.c:732
void Init_w32_codepage(void)
Definition: file.c:712
int rb_isdigit(int c)
Definition: encoding.c:1942
int size
Definition: encoding.c:49
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:512
#define ONIGENC_IS_ASCII_CODE(code)
int rb_filesystem_encindex(void)
Definition: encoding.c:1315
void rb_enc_init(void)
Definition: encoding.c:563
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1257
#define ONIGENC_CTYPE_CNTRL
void rb_loaderror(const char *fmt,...)
Definition: error.c:1879
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1497
Definition: nkf.c:111
#define rb_ary_new2
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1115
#define ENC_CODERANGE_ASCIIONLY(obj)
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1231
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:471
i
Definition: enum.c:446
VALUE ary
Definition: enum.c:674
const char * name
Definition: ripper.y:161
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:87
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1031
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
#define NORETURN(x)
Definition: ruby.h:33
Definition: nkf.c:87
#define ctype_test(c, ctype)
Definition: encoding.c:1935
void rb_encdb_set_unicode(int index)
Definition: encoding.c:554
static int str_find_encindex(VALUE enc)
Definition: encoding.c:190
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:171
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1366
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define ENCODING_INLINE_MAX
static void not_encoding(VALUE enc)
Definition: encoding.c:135
return Qfalse
Definition: tcltklib.c:6790
int rb_isxdigit(int c)
Definition: encoding.c:1949
#define Qnil
Definition: enum.c:67
#define STRCASECMP(s1, s2)
int rb_ispunct(int c)
Definition: encoding.c:1946
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:940
Definition: encoding.c:40
Definition: nkf.c:99
static VALUE char * str
Definition: tcltklib.c:3539
int rb_isspace(int c)
Definition: encoding.c:1947
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3342
VALUE rb_ary_new(void)
Definition: array.c:499
#define StringValueCStr(v)
unsigned long ID
Definition: ripper.y:89
VALUE rb_enc_default_external(void)
Definition: encoding.c:1380
#define ONIGENC_CTYPE_UPPER
#define rb_usascii_encindex()
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2228
#define ISASCII(c)
Definition: ruby.h:1774
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:611
static VALUE VALUE obj
Definition: tcltklib.c:3150
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:790
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:380
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:794
#define ENCODING_COUNT
Definition: encoding.c:55
#define ISALNUM(c)
Definition: ruby.h:1781
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1559
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:245
#define T_STRING
#define MBCLEN_CHARFOUND_P(ret)
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1221
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:543
#define xmalloc
int rb_locale_encindex(void)
Definition: encoding.c:1287
static rb_encoding * str_to_encoding(VALUE enc)
Definition: encoding.c:213
#define TypedData_Wrap_Struct(klass, data_type, sval)
Tcl_Obj * enc_list
Definition: tcltklib.c:10161
#define ISUPPER(c)
Definition: ruby.h:1779
void rb_undefined_alloc(VALUE klass)
Definition: object.c:1775
#define ENCDB_REGISTER(name, enc)
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:798
VALUE rb_eEncCompatError
Definition: error.c:555
static rb_encoding * must_encoding(VALUE enc)
Definition: encoding.c:142
#define ISLOWER(c)
Definition: ruby.h:1780
#define ALLOCA_N(type, n)
const char * name
Definition: encoding.c:41
static int VALUE key
Definition: tkutil.c:265
#define rb_enc_mbc_to_codepoint(p, e, enc)
int len
Definition: enumerator.c:1332
VALUE arg
Definition: enum.c:2427
#define ONIGENC_CTYPE_BLANK
int rb_isgraph(int c)
Definition: encoding.c:1943
#define rb_utf8_encindex()
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1339
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:226
VALUE * argv
Definition: tcltklib.c:1969
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:970
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1034
int rb_encdb_dummy(const char *name)
Definition: encoding.c:447
#define rb_enc_mbminlen(enc)
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:291
#define TRUE
Definition: nkf.h:175
#define ENC_DUMMY_P(enc)
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:125
#define rb_ascii8bit_encindex()
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1250
#define StringValue(v)
#define RDATA(obj)
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:958
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:496
#define rb_isascii(c)
#define RUBY_TYPED_FREE_IMMEDIATELY
#define T_REGEXP
#define TOLOWER(c)
register char * s
Definition: os2.c:56
#define CONST_ID(var, str)
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:4923
#define strdup(s)
Definition: util.h:67
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1719
static int min(int a, int b)
Definition: strftime.c:131
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1272
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:425
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:58
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:437
#define FL_TAINT
static struct default_encoding default_internal
Definition: encoding.c:1448
#define debug(x)
Definition: _sdbm.c:51
int argc
Definition: tcltklib.c:1968
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1200
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1309
rb_hash_aset(hash, RARRAY_AREF(key_value_pair, 0), RARRAY_AREF(key_value_pair, 1))
#define ENCODING_SET_INLINED(obj, i)
#define ONIGENC_IS_UNICODE(enc)
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:930
static int enc_autoload(rb_encoding *)
Definition: encoding.c:659
int rb_islower(int c)
Definition: encoding.c:1944
VALUE idx
Definition: enumerator.c:499
ruby_verbose
Definition: tcltklib.c:5796
void rb_enc_foreach_name(int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
Definition: encoding.c:1964
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1523
VpDivd * c
Definition: bigdecimal.c:1223
#define enc_autoload_p(enc)
Definition: encoding.c:61
#define ONIGENC_FLAG_UNICODE
void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE(*dumper)(VALUE), VALUE(*loader)(VALUE, VALUE))
Definition: marshal.c:115
#define MEMCPY(p1, p2, type, n)
static void enc_check_duplication(const char *name)
Definition: encoding.c:338
static size_t enc_memsize(const void *p)
Definition: encoding.c:66
static ID id_encoding
Definition: encoding.c:36
static VALUE enc_s_alloc(VALUE klass)
Definition: encoding.c:1213
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:1085
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:832
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1412
static struct default_encoding default_external
Definition: encoding.c:1334
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define SYMBOL_P(x)
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:448
VALUE name
Definition: enum.c:572
void rb_set_errinfo(VALUE err)
Definition: eval.c:1517
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:401
DATA_PTR(self)
rb_encoding * enc
Definition: encoding.c:1331
void rb_str_fill_terminator(VALUE str, const int termlen)
Definition: string.c:1669
args[0]
Definition: enum.c:585
ruby_debug
Definition: tcltklib.c:5795
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1561
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1503
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:487
#define rb_enc_code_to_mbclen(c, enc)
#define valid_encoding_name_p(name)
Definition: encoding.c:59
#define ONIGENC_CTYPE_DIGIT
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:528
klass
Definition: tcltklib.c:3496
#define INT2NUM(x)
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:621
#define is_data_encoding(obj)
Definition: encoding.c:77
struct rb_encoding_entry * list
Definition: encoding.c:47
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1324
static int enc_capable(VALUE obj)
Definition: encoding.c:716
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:602
Definition: nkf.c:112
static const rb_data_type_t encoding_data_type
Definition: encoding.c:71
int st_insert(st_table *, st_data_t, st_data_t)
#define ENC_INDEX_MASK
static st_data_t enc_dup_name(st_data_t name)
Definition: encoding.c:502
register C_block * p
Definition: crypt.c:309
int rb_isalnum(int c)
Definition: encoding.c:1938
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
rb_ivar_set(yielder, id_memo, LONG2NUM(++count))
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1460
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:113
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1497
#define rb_safe_level()
Definition: tcltklib.c:95
int rb_tolower(int c)
Definition: encoding.c:1952
Definition: nkf.c:113
data n
Definition: enum.c:860
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:409
VALUE rb_hash_new(void)
Definition: hash.c:307
#define rb_errinfo()
Definition: tcltklib.c:90
#define BUILTIN_TYPE(x)
#define PRIsVALUE
#define RBASIC_CLEAR_CLASS(obj)
BDIGIT e
Definition: bigdecimal.c:5209
#define rb_enc_isascii(c, enc)
unsigned long VALUE
Definition: ripper.y:88
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:519
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1242
void rb_warning(const char *fmt,...)
Definition: error.c:236
int rb_enc_find_index(const char *name)
Definition: encoding.c:684
int rb_iscntrl(int c)
Definition: encoding.c:1941
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:304
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:38
#define SPECIAL_CONST_P(x)
void rb_encdb_declare(const char *name)
Definition: encoding.c:328
#define ONIGENC_CTYPE_PRINT
#define rb_intern(str)
void rb_gc_mark_encodings(void)
Definition: encoding.c:236
#define NULL
Definition: _sdbm.c:102
#define T_DATA
#define UNSPECIFIED_ENCODING
Definition: encoding.c:56
VALUE rb_check_string_type(VALUE)
Definition: string.c:1678
rb_encoding OnigEncodingUTF_8
#define ENC_TO_ENCINDEX(enc)
int rb_enc_str_coderange(VALUE)
Definition: string.c:435
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1479
Definition: nkf.c:118
void rb_warn(const char *fmt,...)
Definition: error.c:223
#define SYM2ID(x)
rb_encoding * enc
Definition: encoding.c:42
VALUE rb_eArgError
Definition: error.c:549
static int load_encoding(const char *name)
Definition: encoding.c:628
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:708
int Init_enc_set_filesystem_encoding(void)
Definition: localeinit.c:51
#define FL_UNSET(x, f)
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1165
#define ENCODING_GET_INLINED(obj)
int rb_isalpha(int c)
Definition: encoding.c:1939
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:590
Definition: nkf.c:117
Definition: nkf.c:120
rb_encoding * base
Definition: encoding.c:43