Prism Ruby parser
Loading...
Searching...
No Matches
encoding.h
Go to the documentation of this file.
1
6#ifndef PRISM_ENCODING_H
7#define PRISM_ENCODING_H
8
9#include "prism/defines.h"
11
12#include <assert.h>
13#include <stdbool.h>
14#include <stddef.h>
15#include <stdint.h>
16
23typedef struct {
29 size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
30
36 size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
37
43 size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
44
50 bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
51
56 const char *name;
57
63
68#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
69
74#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
75
80#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
81
90size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
91
101size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
102
112size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
113
123bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
124
131extern const uint8_t pm_encoding_unicode_table[256];
132
136typedef enum {
137 PM_ENCODING_UTF_8 = 0,
138 PM_ENCODING_US_ASCII,
139 PM_ENCODING_ASCII_8BIT,
140 PM_ENCODING_EUC_JP,
141 PM_ENCODING_WINDOWS_31J,
142
143// We optionally support excluding the full set of encodings to only support the
144// minimum necessary to process Ruby code without encoding comments.
145#ifndef PRISM_ENCODING_EXCLUDE_FULL
146 PM_ENCODING_BIG5,
147 PM_ENCODING_BIG5_HKSCS,
148 PM_ENCODING_BIG5_UAO,
149 PM_ENCODING_CESU_8,
150 PM_ENCODING_CP51932,
151 PM_ENCODING_CP850,
152 PM_ENCODING_CP852,
153 PM_ENCODING_CP855,
154 PM_ENCODING_CP949,
155 PM_ENCODING_CP950,
156 PM_ENCODING_CP951,
157 PM_ENCODING_EMACS_MULE,
158 PM_ENCODING_EUC_JP_MS,
159 PM_ENCODING_EUC_JIS_2004,
160 PM_ENCODING_EUC_KR,
161 PM_ENCODING_EUC_TW,
162 PM_ENCODING_GB12345,
163 PM_ENCODING_GB18030,
164 PM_ENCODING_GB1988,
165 PM_ENCODING_GB2312,
166 PM_ENCODING_GBK,
167 PM_ENCODING_IBM437,
168 PM_ENCODING_IBM720,
169 PM_ENCODING_IBM737,
170 PM_ENCODING_IBM775,
171 PM_ENCODING_IBM852,
172 PM_ENCODING_IBM855,
173 PM_ENCODING_IBM857,
174 PM_ENCODING_IBM860,
175 PM_ENCODING_IBM861,
176 PM_ENCODING_IBM862,
177 PM_ENCODING_IBM863,
178 PM_ENCODING_IBM864,
179 PM_ENCODING_IBM865,
180 PM_ENCODING_IBM866,
181 PM_ENCODING_IBM869,
182 PM_ENCODING_ISO_8859_1,
183 PM_ENCODING_ISO_8859_2,
184 PM_ENCODING_ISO_8859_3,
185 PM_ENCODING_ISO_8859_4,
186 PM_ENCODING_ISO_8859_5,
187 PM_ENCODING_ISO_8859_6,
188 PM_ENCODING_ISO_8859_7,
189 PM_ENCODING_ISO_8859_8,
190 PM_ENCODING_ISO_8859_9,
191 PM_ENCODING_ISO_8859_10,
192 PM_ENCODING_ISO_8859_11,
193 PM_ENCODING_ISO_8859_13,
194 PM_ENCODING_ISO_8859_14,
195 PM_ENCODING_ISO_8859_15,
196 PM_ENCODING_ISO_8859_16,
197 PM_ENCODING_KOI8_R,
198 PM_ENCODING_KOI8_U,
199 PM_ENCODING_MAC_CENT_EURO,
200 PM_ENCODING_MAC_CROATIAN,
201 PM_ENCODING_MAC_CYRILLIC,
202 PM_ENCODING_MAC_GREEK,
203 PM_ENCODING_MAC_ICELAND,
204 PM_ENCODING_MAC_JAPANESE,
205 PM_ENCODING_MAC_ROMAN,
206 PM_ENCODING_MAC_ROMANIA,
207 PM_ENCODING_MAC_THAI,
208 PM_ENCODING_MAC_TURKISH,
209 PM_ENCODING_MAC_UKRAINE,
210 PM_ENCODING_SHIFT_JIS,
211 PM_ENCODING_SJIS_DOCOMO,
212 PM_ENCODING_SJIS_KDDI,
213 PM_ENCODING_SJIS_SOFTBANK,
214 PM_ENCODING_STATELESS_ISO_2022_JP,
215 PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
216 PM_ENCODING_TIS_620,
217 PM_ENCODING_UTF8_MAC,
218 PM_ENCODING_UTF8_DOCOMO,
219 PM_ENCODING_UTF8_KDDI,
220 PM_ENCODING_UTF8_SOFTBANK,
221 PM_ENCODING_WINDOWS_1250,
222 PM_ENCODING_WINDOWS_1251,
223 PM_ENCODING_WINDOWS_1252,
224 PM_ENCODING_WINDOWS_1253,
225 PM_ENCODING_WINDOWS_1254,
226 PM_ENCODING_WINDOWS_1255,
227 PM_ENCODING_WINDOWS_1256,
228 PM_ENCODING_WINDOWS_1257,
229 PM_ENCODING_WINDOWS_1258,
230 PM_ENCODING_WINDOWS_874,
231#endif
232
233 PM_ENCODING_MAXIMUM
235
239extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
240
245#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
246
252#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
253
259#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
260
265#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
266
271#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
272
281const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
282
283#endif
Macro definitions used throughout the prism library.
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
Definition encoding.c:2346
const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]
This is the table of all of the encodings that prism supports.
Definition encoding.c:4295
pm_encoding_type_t
These are all of the encodings that prism supports.
Definition encoding.h:136
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
Definition encoding.c:5026
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
Definition encoding.c:2164
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphabetical character.
Definition encoding.c:2306
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
Definition encoding.c:2287
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphanumeric character.
Definition encoding.c:2326
A custom strncasecmp implementation.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
bool multibyte
Return true if the encoding is a multibyte encoding.
Definition encoding.h:61
const char * name
The name of the encoding.
Definition encoding.h:56