Ruby  1.9.3p551(2014-11-13revision48407)
euc_kr.c
Go to the documentation of this file.
1 /**********************************************************************
2  euc_kr.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regenc.h"
31 
32 static const int EncLen_EUCKR[] = {
33  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
49 };
50 
51 typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
52 #define A ACCEPT
53 #define F FAILURE
54 static const signed char trans[][0x100] = {
55  { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
56  /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
57  /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
58  /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
59  /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
60  /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
61  /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
62  /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
63  /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
64  /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
65  /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
66  /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
67  /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
68  /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
69  /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70  /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71  /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
72  },
73  { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
74  /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
75  /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
76  /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
77  /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
78  /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
79  /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
80  /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
81  /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
82  /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
83  /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
84  /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
85  /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
86  /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
87  /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
88  /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
89  /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
90  }
91 };
92 #undef A
93 #undef F
94 
95 static int
97 {
98  int firstbyte = *p++;
99  state_t s = trans[0][firstbyte];
100 #define RETURN(n) \
101  return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
102  ONIGENC_CONSTRUCT_MBCLEN_INVALID()
103  if (s < 0) RETURN(1);
104  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCKR[firstbyte]-1);
105  s = trans[s][*p++];
106  RETURN(2);
107 #undef RETURN
108 }
109 
110 static OnigCodePoint
112 {
113  return onigenc_mbn_mbc_to_code(enc, p, end);
114 }
115 
116 static int
118 {
119  return onigenc_mb2_code_to_mbc(enc, code, buf);
120 }
121 
122 static int
124  UChar* lower, OnigEncoding enc)
125 {
126  return onigenc_mbn_mbc_case_fold(enc, flag,
127  pp, end, lower);
128 }
129 
130 #if 0
131 static int
132 euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
133  const UChar** pp, const UChar* end, OnigEncoding enc)
134 {
135  return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end);
136 }
137 #endif
138 
139 static int
141 {
142  return onigenc_mb2_is_code_ctype(enc, code, ctype);
143 }
144 
145 #define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
146 
147 static UChar*
149 {
150  /* Assumed in this encoding,
151  mb-trail bytes don't mix with single bytes.
152  */
153  const UChar *p;
154  int len;
155 
156  if (s <= start) return (UChar* )s;
157  p = s;
158 
159  while (!euckr_islead(*p) && p > start) p--;
160  len = enclen(enc, p, end);
161  if (p + len > s) return (UChar* )p;
162  p += len;
163  return (UChar* )(p + ((s - p) & ~1));
164 }
165 
166 static int
168 {
169  const UChar c = *s;
170  if (c <= 0x7e) return TRUE;
171  else return FALSE;
172 }
173 
174 OnigEncodingDefine(euc_kr, EUC_KR) = {
176  "EUC-KR", /* name */
177  2, /* max enc length */
178  1, /* min enc length */
191 };
192 ENC_ALIAS("eucKR", "EUC-KR")
unsigned int OnigCodePoint
Definition: oniguruma.h:111
#define OnigEncodingDefine(f, n)
Definition: big5.c:90
#define FALSE
Definition: nkf.h:185
code
Definition: tcltklib.c:3375
unsigned int OnigCaseFoldType
Definition: oniguruma.h:117
static int euckr_mbc_enc_len(const UChar *p, const UChar *e, OnigEncoding enc ARG_UNUSED)
Definition: euc_kr.c:96
int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, const UChar **pp, const UChar *end ARG_UNUSED, UChar *lower)
Definition: regenc.c:691
int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:733
SYMID SyckParser * p
Definition: yaml2byte.c:119
VALUE enc
Definition: tcltklib.c:10402
static int euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
Definition: euc_kr.c:140
static const signed char trans[][0x100]
Definition: euc_kr.c:54
int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
Definition: regenc.c:750
state_t
Definition: big5.c:90
int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void *arg, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:409
#define ARG_UNUSED
#define ENC_ALIAS(name, orig)
Definition: encdb.c:18
#define euckr_islead(c)
Definition: euc_kr.c:145
flag
Definition: tcltklib.c:2039
int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype)
Definition: regenc.c:823
Definition: big5.c:90
static int euckr_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
Definition: euc_kr.c:117
static OnigCodePoint euckr_mbc_to_code(const UChar *p, const UChar *end, OnigEncoding enc)
Definition: euc_kr.c:111
#define enclen(enc, p, e)
Definition: big5.c:90
#define A
Definition: euc_kr.c:52
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:3913
Definition: big5.c:90
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: oniguruma.h:250
static int euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar **pp, const UChar *end, UChar *lower, OnigEncoding enc)
Definition: euc_kr.c:123
int onigenc_is_mbc_newline_0x0a(const UChar *p, const UChar *end, OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:580
#define RETURN(n)
#define F
Definition: euc_kr.c:53
#define TRUE
Definition: nkf.h:186
register char * s
Definition: os2.c:56
static int euckr_is_allowed_reverse_match(const UChar *s, const UChar *end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
Definition: euc_kr.c:167
register unsigned int len
Definition: name2ctype.h:22210
int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint *ranges[], OnigEncoding enc)
Definition: regenc.c:572
int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, const OnigUChar *p, const OnigUChar *end ARG_UNUSED, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
Definition: regenc.c:432
VpDivd * c
Definition: bigdecimal.c:1163
#define UChar
Definition: oniguruma.h:107
gz end
Definition: zlib.c:2033
static UChar * euckr_left_adjust_char_head(const UChar *start, const UChar *s, const UChar *end, OnigEncoding enc)
Definition: euc_kr.c:148
char * start
Definition: yaml2byte.c:126
BDIGIT e
Definition: bigdecimal.c:4946
static const int EncLen_EUCKR[]
Definition: euc_kr.c:32
OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:673
int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar *p, UChar *end)
Definition: regenc.c:790