This source file includes following definitions.
- SINGLE_BYTE_CHAR_P
- CHAR_BYTE8_P
- BYTE8_TO_CHAR
- UNIBYTE_TO_CHAR
- CHAR_TO_BYTE8
- CHAR_TO_BYTE_SAFE
- CHAR_BYTE8_HEAD_P
- make_char_multibyte
- CHAR_VALID_P
- CHARACTERP
- CHECK_CHARACTER
- CHECK_CHARACTER_CAR
- CHECK_CHARACTER_CDR
- CHAR_PRINTABLE_P
- CHAR_BYTES
- CHAR_LEADING_CODE
- CHAR_STRING
- BYTE8_STRING
- LEADING_CODE_P
- TRAILING_CODE_P
- CHAR_HEAD_P
- BYTES_BY_CHAR_HEAD
- multibyte_length
- raw_prev_char_len
- string_char_and_length
- STRING_CHAR
- string_char_advance
- fetch_string_char_advance
- fetch_string_char_as_multibyte_advance
- fetch_string_char_advance_no_check
- CHAR_VARIATION_SELECTOR_P
- char_surrogate_p
- char_resolve_modifier_mask
- char_hexdigit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #ifndef EMACS_CHARACTER_H
24 #define EMACS_CHARACTER_H
25
26 #include <verify.h>
27 #include "lisp.h"
28
29 INLINE_HEADER_BEGIN
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 enum { MAX_CHAR = 0x3FFFFF };
51
52
53 enum { MAX_UNICODE_CHAR = 0x10FFFF };
54
55
56 enum { MAX_1_BYTE_CHAR = 0x7F };
57 enum { MAX_2_BYTE_CHAR = 0x7FF };
58 enum { MAX_3_BYTE_CHAR = 0xFFFF };
59 enum { MAX_4_BYTE_CHAR = 0x1FFFFF };
60 enum { MAX_5_BYTE_CHAR = 0x3FFF7F };
61
62
63 enum { MIN_MULTIBYTE_LEADING_CODE = 0xC0 };
64
65
66 enum { MAX_MULTIBYTE_LEADING_CODE = 0xF8 };
67
68
69 enum
70 {
71 NO_BREAK_SPACE = 0x00A0,
72 SOFT_HYPHEN = 0x00AD,
73 ZERO_WIDTH_NON_JOINER = 0x200C,
74 ZERO_WIDTH_JOINER = 0x200D,
75 HYPHEN = 0x2010,
76 NON_BREAKING_HYPHEN = 0x2011,
77 LEFT_SINGLE_QUOTATION_MARK = 0x2018,
78 RIGHT_SINGLE_QUOTATION_MARK = 0x2019,
79 PARAGRAPH_SEPARATOR = 0x2029,
80 LEFT_POINTING_ANGLE_BRACKET = 0x2329,
81 RIGHT_POINTING_ANGLE_BRACKET = 0x232A,
82 LEFT_ANGLE_BRACKET = 0x3008,
83 RIGHT_ANGLE_BRACKET = 0x3009,
84 OBJECT_REPLACEMENT_CHARACTER = 0xFFFC,
85 TAG_SPACE = 0xE0020,
86 CANCEL_TAG = 0xE007F,
87 };
88
89 extern int char_string (unsigned, unsigned char *);
90
91
92
93 #define uLSQM "\xE2\x80\x98"
94 #define uRSQM "\xE2\x80\x99"
95
96
97 INLINE bool
98 SINGLE_BYTE_CHAR_P (intmax_t c)
99 {
100 return 0 <= c && c < 0x100;
101 }
102
103
104
105 INLINE bool
106 CHAR_BYTE8_P (int c)
107 {
108 return MAX_5_BYTE_CHAR < c;
109 }
110
111
112 INLINE int
113 BYTE8_TO_CHAR (int byte)
114 {
115 return byte + 0x3FFF00;
116 }
117
118 INLINE int
119 UNIBYTE_TO_CHAR (int byte)
120 {
121 return ASCII_CHAR_P (byte) ? byte : BYTE8_TO_CHAR (byte);
122 }
123
124
125 INLINE int
126 CHAR_TO_BYTE8 (int c)
127 {
128 return CHAR_BYTE8_P (c) ? c - 0x3FFF00 : c & 0xFF;
129 }
130
131
132
133 INLINE int
134 CHAR_TO_BYTE_SAFE (int c)
135 {
136 return ASCII_CHAR_P (c) ? c : CHAR_BYTE8_P (c) ? c - 0x3FFF00 : -1;
137 }
138
139
140
141 INLINE bool
142 CHAR_BYTE8_HEAD_P (int byte)
143 {
144 return byte == 0xC0 || byte == 0xC1;
145 }
146
147
148 INLINE int
149 make_char_multibyte (int c)
150 {
151 eassert (SINGLE_BYTE_CHAR_P (c));
152 return UNIBYTE_TO_CHAR (c);
153 }
154
155
156 enum { MAX_MULTIBYTE_LENGTH = 5 };
157
158
159 INLINE bool
160 CHAR_VALID_P (intmax_t c)
161 {
162 return 0 <= c && c <= MAX_CHAR;
163 }
164
165
166 INLINE bool
167 CHARACTERP (Lisp_Object x)
168 {
169 return FIXNUMP (x) && CHAR_VALID_P (XFIXNUM (x));
170 }
171
172
173 INLINE void
174 CHECK_CHARACTER (Lisp_Object x)
175 {
176 CHECK_TYPE (CHARACTERP (x), Qcharacterp, x);
177 }
178
179 INLINE void
180 CHECK_CHARACTER_CAR (Lisp_Object x)
181 {
182 CHECK_CHARACTER (XCAR (x));
183 }
184
185 INLINE void
186 CHECK_CHARACTER_CDR (Lisp_Object x)
187 {
188 CHECK_CHARACTER (XCDR (x));
189 }
190
191
192 INLINE bool
193 CHAR_PRINTABLE_P (int c)
194 {
195 return ((32 <= c && c < 127)
196 || ! NILP (CHAR_TABLE_REF (Vprintable_chars, c)));
197 }
198
199
200 INLINE int
201 CHAR_BYTES (int c)
202 {
203 return ((MAX_5_BYTE_CHAR < c ? -2 : 1)
204 + (MAX_1_BYTE_CHAR < c)
205 + (MAX_2_BYTE_CHAR < c)
206 + (MAX_3_BYTE_CHAR < c)
207 + (MAX_4_BYTE_CHAR < c));
208 }
209
210
211 INLINE int
212 CHAR_LEADING_CODE (int c)
213 {
214 return (c <= MAX_1_BYTE_CHAR ? c
215 : c <= MAX_2_BYTE_CHAR ? 0xC0 | (c >> 6)
216 : c <= MAX_3_BYTE_CHAR ? 0xE0 | (c >> 12)
217 : c <= MAX_4_BYTE_CHAR ? 0xF0 | (c >> 18)
218 : c <= MAX_5_BYTE_CHAR ? 0xF8
219 : 0xC0 | ((c >> 6) & 0x01));
220 }
221
222
223
224
225
226
227 INLINE int
228 CHAR_STRING (int c, unsigned char *p)
229 {
230 eassume (0 <= c);
231 if (c <= MAX_1_BYTE_CHAR)
232 {
233 p[0] = c;
234 return 1;
235 }
236 if (c <= MAX_2_BYTE_CHAR)
237 {
238 p[0] = 0xC0 | (c >> 6);
239 p[1] = 0x80 | (c & 0x3F);
240 return 2;
241 }
242 if (c <= MAX_3_BYTE_CHAR)
243 {
244 p[0] = 0xE0 | (c >> 12);
245 p[1] = 0x80 | ((c >> 6) & 0x3F);
246 p[2] = 0x80 | (c & 0x3F);
247 return 3;
248 }
249 int len = char_string (c, p);
250 eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH);
251 return len;
252 }
253
254
255
256
257
258 INLINE int
259 BYTE8_STRING (int b, unsigned char *p)
260 {
261 p[0] = 0xC0 | ((b >> 6) & 0x01);
262 p[1] = 0x80 | (b & 0x3F);
263 return 2;
264 }
265
266
267
268 INLINE bool
269 LEADING_CODE_P (int byte)
270 {
271 return (byte & 0xC0) == 0xC0;
272 }
273
274
275
276 INLINE bool
277 TRAILING_CODE_P (int byte)
278 {
279 return (byte & 0xC0) == 0x80;
280 }
281
282
283
284
285 INLINE bool
286 CHAR_HEAD_P (int byte)
287 {
288 return (byte & 0xC0) != 0x80;
289 }
290
291
292
293
294 INLINE int
295 BYTES_BY_CHAR_HEAD (int byte)
296 {
297 return (!(byte & 0x80) ? 1
298 : !(byte & 0x20) ? 2
299 : !(byte & 0x10) ? 3
300 : !(byte & 0x08) ? 4
301 : 5);
302 }
303
304
305
306
307
308
309
310
311 INLINE int
312 multibyte_length (unsigned char const *p, unsigned char const *pend,
313 bool check, bool allow_8bit)
314 {
315 if (!check || p < pend)
316 {
317 unsigned char c = p[0];
318 if (c < 0x80)
319 return 1;
320 if (!check || p + 1 < pend)
321 {
322 unsigned char d = p[1];
323 int w = ((d & 0xC0) << 2) + c;
324 if ((allow_8bit ? 0x2C0 : 0x2C2) <= w && w <= 0x2DF)
325 return 2;
326 if (!check || p + 2 < pend)
327 {
328 unsigned char e = p[2];
329 w += (e & 0xC0) << 4;
330 int w1 = w | ((d & 0x20) >> 2);
331 if (0xAE1 <= w1 && w1 <= 0xAEF)
332 return 3;
333 if (!check || p + 3 < pend)
334 {
335 unsigned char f = p[3];
336 w += (f & 0xC0) << 6;
337 int w2 = w | ((d & 0x30) >> 3);
338 if (0x2AF1 <= w2 && w2 <= 0x2AF7)
339 return 4;
340 if (!check || p + 4 < pend)
341 {
342 int_fast64_t lw = w + ((p[4] & 0xC0) << 8),
343 w3 = (lw << 24) + (d << 16) + (e << 8) + f;
344 if (0xAAF8888080 <= w3 && w3 <= 0xAAF88FBFBD)
345 return 5;
346 }
347 }
348 }
349 }
350 }
351
352 return 0;
353 }
354
355
356
357
358
359
360 INLINE int
361 raw_prev_char_len (unsigned char const *p)
362 {
363 for (int len = 1; ; len++)
364 if (CHAR_HEAD_P (p[-len]))
365 return len;
366 }
367
368
369
370
371
372 INLINE int
373 string_char_and_length (unsigned char const *p, int *length)
374 {
375 int c = p[0];
376 if (! (c & 0x80))
377 {
378 *length = 1;
379 return c;
380 }
381 eassume (0xC0 <= c);
382
383 int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
384 if (! (c & 0x20))
385 {
386 *length = 2;
387 return d + (c < 0xC2 ? 0x3FFF80 : 0);
388 }
389
390 d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
391 if (! (c & 0x10))
392 {
393 *length = 3;
394 eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
395 return d;
396 }
397
398 d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
399 if (! (c & 0x08))
400 {
401 *length = 4;
402 eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
403 return d;
404 }
405
406 d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
407 *length = 5;
408 eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
409 return d;
410 }
411
412
413
414 INLINE int
415 STRING_CHAR (unsigned char const *p)
416 {
417 int len;
418 return string_char_and_length (p, &len);
419 }
420
421
422
423
424 INLINE int
425 string_char_advance (unsigned char const **pp)
426 {
427 unsigned char const *p = *pp;
428 int len, c = string_char_and_length (p, &len);
429 *pp = p + len;
430 return c;
431 }
432
433
434
435
436
437
438 INLINE int
439 fetch_string_char_advance (Lisp_Object string,
440 ptrdiff_t *charidx, ptrdiff_t *byteidx)
441 {
442 int output;
443 ptrdiff_t b = *byteidx;
444 unsigned char *chp = SDATA (string) + b;
445 if (STRING_MULTIBYTE (string))
446 {
447 int chlen;
448 output = string_char_and_length (chp, &chlen);
449 b += chlen;
450 }
451 else
452 {
453 output = *chp;
454 b++;
455 }
456 (*charidx)++;
457 *byteidx = b;
458 return output;
459 }
460
461
462
463
464 INLINE int
465 fetch_string_char_as_multibyte_advance (Lisp_Object string,
466 ptrdiff_t *charidx, ptrdiff_t *byteidx)
467 {
468 int output;
469 ptrdiff_t b = *byteidx;
470 unsigned char *chp = SDATA (string) + b;
471 if (STRING_MULTIBYTE (string))
472 {
473 int chlen;
474 output = string_char_and_length (chp, &chlen);
475 b += chlen;
476 }
477 else
478 {
479 output = make_char_multibyte (*chp);
480 b++;
481 }
482 (*charidx)++;
483 *byteidx = b;
484 return output;
485 }
486
487
488
489
490 INLINE int
491 fetch_string_char_advance_no_check (Lisp_Object string,
492 ptrdiff_t *charidx, ptrdiff_t *byteidx)
493 {
494 ptrdiff_t b = *byteidx;
495 unsigned char *chp = SDATA (string) + b;
496 int chlen, output = string_char_and_length (chp, &chlen);
497 (*charidx)++;
498 *byteidx = b + chlen;
499 return output;
500 }
501
502
503
504
505
506 INLINE int
507 CHAR_VARIATION_SELECTOR_P (int c)
508 {
509 return (c < 0xFE00 ? 0
510 : c <= 0xFE0F ? c - 0xFE00 + 1
511 : c < 0xE0100 ? 0
512 : c <= 0xE01EF ? c - 0xE0100 + 17
513 : 0);
514 }
515
516
517
518 INLINE bool
519 char_surrogate_p (int c)
520 {
521 return 0xD800 <= c && c <= 0xDFFF;
522 }
523
524
525
526
527
528
529
530 typedef enum {
531 UNICODE_CATEGORY_UNKNOWN = 0,
532 UNICODE_CATEGORY_Lu,
533 UNICODE_CATEGORY_Ll,
534 UNICODE_CATEGORY_Lt,
535 UNICODE_CATEGORY_Lm,
536 UNICODE_CATEGORY_Lo,
537 UNICODE_CATEGORY_Mn,
538 UNICODE_CATEGORY_Mc,
539 UNICODE_CATEGORY_Me,
540 UNICODE_CATEGORY_Nd,
541 UNICODE_CATEGORY_Nl,
542 UNICODE_CATEGORY_No,
543 UNICODE_CATEGORY_Pc,
544 UNICODE_CATEGORY_Pd,
545 UNICODE_CATEGORY_Ps,
546 UNICODE_CATEGORY_Pe,
547 UNICODE_CATEGORY_Pi,
548 UNICODE_CATEGORY_Pf,
549 UNICODE_CATEGORY_Po,
550 UNICODE_CATEGORY_Sm,
551 UNICODE_CATEGORY_Sc,
552 UNICODE_CATEGORY_Sk,
553 UNICODE_CATEGORY_So,
554 UNICODE_CATEGORY_Zs,
555 UNICODE_CATEGORY_Zl,
556 UNICODE_CATEGORY_Zp,
557 UNICODE_CATEGORY_Cc,
558 UNICODE_CATEGORY_Cf,
559 UNICODE_CATEGORY_Cs,
560 UNICODE_CATEGORY_Co,
561 UNICODE_CATEGORY_Cn
562 } unicode_category_t;
563
564 extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST;
565
566 extern int translate_char (Lisp_Object, int c);
567 extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
568 extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
569 ptrdiff_t *);
570 extern ptrdiff_t str_to_multibyte (unsigned char *dst, const unsigned char *src,
571 ptrdiff_t nchars);
572 extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
573 extern ptrdiff_t strwidth (const char *, ptrdiff_t);
574 extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
575 ptrdiff_t *, ptrdiff_t *);
576 extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t, ptrdiff_t,
577 ptrdiff_t, ptrdiff_t *, ptrdiff_t *, bool);
578
579 extern Lisp_Object Vchar_unify_table;
580 extern Lisp_Object string_escape_byte8 (Lisp_Object);
581
582 extern bool alphabeticp (int);
583 extern bool alphanumericp (int);
584 extern bool graphicp (int);
585 extern bool printablep (int);
586 extern bool blankp (int);
587 extern bool graphic_base_p (int);
588
589
590
591
592 INLINE int
593 char_table_translate (Lisp_Object obj, int ch)
594 {
595
596
597 eassert (CHAR_VALID_P (ch));
598 eassert (CHAR_TABLE_P (obj));
599 obj = CHAR_TABLE_REF (obj, ch);
600 return CHARACTERP (obj) ? XFIXNUM (obj) : ch;
601 }
602
603 extern signed char const hexdigit[];
604
605
606
607
608 INLINE int
609 char_hexdigit (int c)
610 {
611 return 0 <= c && c <= UCHAR_MAX ? hexdigit[c] - 1 : -1;
612 }
613
614 INLINE_HEADER_END
615
616 #endif