This source file includes following definitions.
- surrogates_to_codepoint
- build_string_from_utf8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #ifndef EMACS_CODING_H
27 #define EMACS_CODING_H
28
29 #include "lisp.h"
30
31 INLINE_HEADER_BEGIN
32
33
34
35 enum define_coding_system_arg_index
36 {
37 coding_arg_name,
38 coding_arg_mnemonic,
39 coding_arg_coding_type,
40 coding_arg_charset_list,
41 coding_arg_ascii_compatible_p,
42 coding_arg_decode_translation_table,
43 coding_arg_encode_translation_table,
44 coding_arg_post_read_conversion,
45 coding_arg_pre_write_conversion,
46 coding_arg_default_char,
47 coding_arg_for_unibyte,
48 coding_arg_plist,
49 coding_arg_eol_type,
50 coding_arg_max
51 };
52
53 enum define_coding_iso2022_arg_index
54 {
55 coding_arg_iso2022_initial = coding_arg_max,
56 coding_arg_iso2022_reg_usage,
57 coding_arg_iso2022_request,
58 coding_arg_iso2022_flags,
59 coding_arg_iso2022_max
60 };
61
62 enum define_coding_utf8_arg_index
63 {
64 coding_arg_utf8_bom = coding_arg_max,
65 coding_arg_utf8_max
66 };
67
68 enum define_coding_utf16_arg_index
69 {
70 coding_arg_utf16_bom = coding_arg_max,
71 coding_arg_utf16_endian,
72 coding_arg_utf16_max
73 };
74
75 enum define_coding_ccl_arg_index
76 {
77 coding_arg_ccl_decoder = coding_arg_max,
78 coding_arg_ccl_encoder,
79 coding_arg_ccl_valids,
80 coding_arg_ccl_max
81 };
82
83 enum define_coding_undecided_arg_index
84 {
85 coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
86 coding_arg_undecided_inhibit_iso_escape_detection,
87 coding_arg_undecided_prefer_utf_8,
88 coding_arg_undecided_max
89 };
90
91
92
93
94
95
96
97
98 extern Lisp_Object Vcoding_system_hash_table;
99
100
101
102 enum coding_attr_index
103 {
104 coding_attr_base_name,
105 coding_attr_docstring,
106 coding_attr_mnemonic,
107 coding_attr_type,
108 coding_attr_charset_list,
109 coding_attr_ascii_compat,
110 coding_attr_decode_tbl,
111 coding_attr_encode_tbl,
112 coding_attr_trans_tbl,
113 coding_attr_post_read,
114 coding_attr_pre_write,
115 coding_attr_default_char,
116 coding_attr_for_unibyte,
117 coding_attr_plist,
118
119 coding_attr_category,
120 coding_attr_safe_charsets,
121
122
123 coding_attr_charset_valids,
124
125 coding_attr_ccl_decoder,
126 coding_attr_ccl_encoder,
127 coding_attr_ccl_valids,
128
129 coding_attr_iso_initial,
130 coding_attr_iso_usage,
131 coding_attr_iso_request,
132 coding_attr_iso_flags,
133
134 coding_attr_utf_bom,
135 coding_attr_utf_16_endian,
136
137 coding_attr_emacs_mule_full,
138
139 coding_attr_undecided_inhibit_null_byte_detection,
140 coding_attr_undecided_inhibit_iso_escape_detection,
141 coding_attr_undecided_prefer_utf_8,
142
143 coding_attr_last_index
144 };
145
146
147
148
149 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
150 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
151 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
152 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
153 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
154 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
155 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
156 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
157 #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
158 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
159 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
160 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
161 #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
162 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
163 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
164 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
165
166
167
168 #define CODING_ID_NAME(id) \
169 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
170
171
172
173 #define CODING_ID_ATTRS(id) \
174 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
175
176
177
178 #define CODING_ID_ALIASES(id) \
179 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
180
181
182
183 #define CODING_ID_EOL_TYPE(id) \
184 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
185
186
187
188
189 #define CODING_SYSTEM_SPEC(coding_system_symbol) \
190 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
191
192
193
194
195 #define CODING_SYSTEM_ID(coding_system_symbol) \
196 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
197 coding_system_symbol, NULL)
198
199
200
201 #define CODING_SYSTEM_P(coding_system_symbol) \
202 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
203 || (! NILP (coding_system_symbol) \
204 && ! NILP (Fcoding_system_p (coding_system_symbol))))
205
206
207
208 #define CHECK_CODING_SYSTEM(x) \
209 do { \
210 if (CODING_SYSTEM_ID (x) < 0 \
211 && NILP (Fcheck_coding_system (x))) \
212 wrong_type_argument (Qcoding_system_p, (x)); \
213 } while (false)
214
215
216
217
218
219 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
220 do { \
221 spec = CODING_SYSTEM_SPEC (x); \
222 if (NILP (spec)) \
223 { \
224 Fcheck_coding_system (x); \
225 spec = CODING_SYSTEM_SPEC (x); \
226 } \
227 if (NILP (spec)) \
228 wrong_type_argument (Qcoding_system_p, (x)); \
229 } while (false)
230
231
232
233
234
235 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
236 do \
237 { \
238 id = CODING_SYSTEM_ID (x); \
239 if (id < 0) \
240 { \
241 Fcheck_coding_system (x); \
242 id = CODING_SYSTEM_ID (x); \
243 } \
244 if (id < 0) \
245 wrong_type_argument (Qcoding_system_p, (x)); \
246 } while (false)
247
248
249
250
251
252 enum coding_result_code
253 {
254 CODING_RESULT_SUCCESS,
255 CODING_RESULT_INSUFFICIENT_SRC,
256 CODING_RESULT_INSUFFICIENT_DST,
257 CODING_RESULT_INVALID_SRC,
258 CODING_RESULT_INTERRUPT
259 };
260
261
262
263
264
265
266
267 #define CODING_MODE_LAST_BLOCK 0x01
268
269
270
271 #define CODING_MODE_SELECTIVE_DISPLAY 0x02
272
273
274
275 #define CODING_MODE_DIRECTION 0x04
276
277 #define CODING_MODE_FIXED_DESTINATION 0x08
278
279
280
281 #define CODING_MODE_SAFE_ENCODING 0x10
282
283
284 #include "composite.h"
285
286 enum composition_state
287 {
288 COMPOSING_NO,
289 COMPOSING_CHAR,
290 COMPOSING_RULE,
291 COMPOSING_COMPONENT_CHAR,
292 COMPOSING_COMPONENT_RULE
293 };
294
295
296 struct composition_status
297 {
298 enum composition_state state;
299 enum composition_method method;
300 bool old_form;
301 int length;
302 int nchars;
303 int ncomps;
304
305
306 int carryover[4
307 + MAX_COMPOSITION_COMPONENTS * 3 - 2
308 + 2
309 + MAX_COMPOSITION_COMPONENTS
310 ];
311 };
312
313
314
315
316 struct iso_2022_spec
317 {
318
319 unsigned flags;
320
321
322 int current_invocation[2];
323
324
325
326
327 int current_designation[4];
328
329
330 int ctext_extended_segment_len;
331
332
333
334 bool_bf single_shifting : 1;
335
336
337 bool_bf bol : 1;
338
339
340 bool_bf embedded_utf_8 : 1;
341
342
343 struct composition_status cmp_status;
344 };
345
346 struct emacs_mule_spec
347 {
348 struct composition_status cmp_status;
349 };
350
351 struct undecided_spec
352 {
353
354
355 int inhibit_nbd;
356
357
358 int inhibit_ied;
359
360
361 bool prefer_utf_8;
362 };
363
364 enum utf_bom_type
365 {
366 utf_detect_bom,
367 utf_without_bom,
368 utf_with_bom
369 };
370
371 enum utf_16_endian_type
372 {
373 utf_16_big_endian,
374 utf_16_little_endian
375 };
376
377 struct utf_16_spec
378 {
379 enum utf_bom_type bom;
380 enum utf_16_endian_type endian;
381 int surrogate;
382 };
383
384 struct coding_detection_info
385 {
386
387
388 int checked;
389
390 int found;
391
392 int rejected;
393 };
394
395
396 struct coding_system
397 {
398
399
400
401
402
403 ptrdiff_t id;
404
405
406
407 unsigned common_flags : 14;
408
409
410
411 unsigned mode : 5;
412
413
414
415
416
417 bool_bf src_multibyte : 1;
418 bool_bf dst_multibyte : 1;
419
420
421
422 bool_bf chars_at_source : 1;
423
424
425
426 bool_bf raw_destination : 1;
427
428
429 bool_bf annotated : 1;
430
431
432 unsigned eol_seen : 3;
433
434
435 ENUM_BF (coding_result_code) result : 3;
436
437 int max_charset_id;
438
439
440 union
441 {
442 struct iso_2022_spec iso_2022;
443 struct ccl_spec *ccl;
444 struct utf_16_spec utf_16;
445 enum utf_bom_type utf_8_bom;
446 struct emacs_mule_spec emacs_mule;
447 struct undecided_spec undecided;
448 } spec;
449
450 unsigned char *safe_charsets;
451
452
453
454
455
456
457 ptrdiff_t head_ascii;
458
459
460
461 ptrdiff_t detected_utf8_bytes, detected_utf8_chars;
462
463
464 ptrdiff_t produced, produced_char, consumed, consumed_char;
465
466 ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
467 Lisp_Object src_object;
468 const unsigned char *source;
469
470 ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
471 Lisp_Object dst_object;
472 unsigned char *destination;
473
474
475
476
477
478
479
480
481
482
483
484
485
486 int *charbuf;
487 int charbuf_size, charbuf_used;
488
489 unsigned char carryover[64];
490 int carryover_bytes;
491
492 int default_char;
493
494 bool (*detector) (struct coding_system *, struct coding_detection_info *);
495 void (*decoder) (struct coding_system *);
496 bool (*encoder) (struct coding_system *);
497 };
498
499
500
501
502 #define CODING_ANNOTATION_MASK 0x00FF
503 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
504 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
505 #define CODING_ANNOTATE_CHARSET_MASK 0x0003
506 #define CODING_FOR_UNIBYTE_MASK 0x0100
507 #define CODING_REQUIRE_FLUSHING_MASK 0x0200
508 #define CODING_REQUIRE_DECODING_MASK 0x0400
509 #define CODING_REQUIRE_ENCODING_MASK 0x0800
510 #define CODING_REQUIRE_DETECTION_MASK 0x1000
511 #define CODING_RESET_AT_BOL_MASK 0x2000
512
513
514
515 #define CODING_REQUIRE_ANNOTATION(coding) \
516 ((coding)->common_flags & CODING_ANNOTATION_MASK)
517
518
519
520 #define CODING_FOR_UNIBYTE(coding) \
521 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
522
523
524
525 #define CODING_REQUIRE_FLUSHING(coding) \
526 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
527
528
529
530 #define CODING_REQUIRE_DECODING(coding) \
531 ((coding)->dst_multibyte \
532 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
533
534
535
536
537
538
539
540 #define CODING_REQUIRE_ENCODING(coding) \
541 ((coding)->src_multibyte \
542 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
543 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
544
545
546
547
548 #define CODING_REQUIRE_DETECTION(coding) \
549 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
550
551
552
553 #define CODING_MAY_REQUIRE_DECODING(coding) \
554 (CODING_REQUIRE_DECODING (coding) \
555 || CODING_REQUIRE_DETECTION (coding))
556
557
558
559
560
561
562 #define SJIS_TO_JIS(code) \
563 do { \
564 int s1, s2, j1, j2; \
565 \
566 s1 = (code) >> 8, s2 = (code) & 0xFF; \
567 \
568 if (s2 >= 0x9F) \
569 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
570 j2 = s2 - 0x7E); \
571 else \
572 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
573 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
574 (code) = (j1 << 8) | j2; \
575 } while (false)
576
577 #define SJIS_TO_JIS2(code) \
578 do { \
579 int s1, s2, j1, j2; \
580 \
581 s1 = (code) >> 8, s2 = (code) & 0xFF; \
582 \
583 if (s2 >= 0x9F) \
584 { \
585 j1 = (s1 == 0xF0 ? 0x28 \
586 : s1 == 0xF1 ? 0x24 \
587 : s1 == 0xF2 ? 0x2C \
588 : s1 == 0xF3 ? 0x2E \
589 : 0x6E + (s1 - 0xF4) * 2); \
590 j2 = s2 - 0x7E; \
591 } \
592 else \
593 { \
594 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
595 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
596 : 0x6F + (s1 - 0xF5) * 2); \
597 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
598 } \
599 (code) = (j1 << 8) | j2; \
600 } while (false)
601
602
603 #define JIS_TO_SJIS(code) \
604 do { \
605 int s1, s2, j1, j2; \
606 \
607 j1 = (code) >> 8, j2 = (code) & 0xFF; \
608 if (j1 & 1) \
609 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
610 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
611 else \
612 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
613 s2 = j2 + 0x7E); \
614 (code) = (s1 << 8) | s2; \
615 } while (false)
616
617 #define JIS_TO_SJIS2(code) \
618 do { \
619 int s1, s2, j1, j2; \
620 \
621 j1 = (code) >> 8, j2 = (code) & 0xFF; \
622 if (j1 & 1) \
623 { \
624 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
625 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
626 : 0xF5 + (j1 - 0x6F) / 2); \
627 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
628 } \
629 else \
630 { \
631 s1 = (j1 == 0x28 ? 0xF0 \
632 : j1 == 0x24 ? 0xF1 \
633 : j1 == 0x2C ? 0xF2 \
634 : j1 == 0x2E ? 0xF3 \
635 : 0xF4 + (j1 - 0x6E) / 2); \
636 s2 = j2 + 0x7E; \
637 } \
638 (code) = (s1 << 8) | s2; \
639 } while (false)
640
641
642
643 #define ENCODE_FILE(NAME) encode_file_name (NAME)
644
645
646
647 #define DECODE_FILE(NAME) decode_file_name (NAME)
648
649
650
651 #define ENCODE_SYSTEM(str) \
652 (! NILP (Vlocale_coding_system) \
653 ? code_convert_string_norecord (str, Vlocale_coding_system, true) \
654 : str)
655
656
657
658 #define DECODE_SYSTEM(str) \
659 (! NILP (Vlocale_coding_system) \
660 ? code_convert_string_norecord (str, Vlocale_coding_system, false) \
661 : str)
662
663
664 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
665
666
667
668
669 #define UTF_16_HIGH_SURROGATE_P(val) \
670 (((val) & 0xFC00) == 0xD800)
671
672
673
674
675 #define UTF_16_LOW_SURROGATE_P(val) \
676 (((val) & 0xFC00) == 0xDC00)
677
678
679 extern Lisp_Object code_conversion_save (bool, bool);
680 extern bool encode_coding_utf_8 (struct coding_system *);
681 extern bool utf8_string_p (Lisp_Object);
682 extern void setup_coding_system (Lisp_Object, struct coding_system *);
683 extern Lisp_Object coding_charset_list (struct coding_system *);
684 extern Lisp_Object coding_system_charset_list (Lisp_Object);
685 extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
686 Lisp_Object, bool, bool, bool);
687 extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
688 bool);
689 extern Lisp_Object encode_string_utf_8 (Lisp_Object, Lisp_Object, bool,
690 Lisp_Object, Lisp_Object);
691 extern Lisp_Object decode_string_utf_8 (Lisp_Object, const char *, ptrdiff_t,
692 Lisp_Object, bool,
693 Lisp_Object, Lisp_Object);
694 extern Lisp_Object encode_file_name (Lisp_Object);
695 extern Lisp_Object decode_file_name (Lisp_Object);
696 extern Lisp_Object raw_text_coding_system (Lisp_Object);
697 extern bool raw_text_coding_system_p (struct coding_system *);
698 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
699 extern Lisp_Object complement_process_encoding_system (Lisp_Object);
700 extern Lisp_Object make_string_from_utf8 (const char *, ptrdiff_t);
701
702 extern void decode_coding_gap (struct coding_system *, ptrdiff_t);
703 extern void decode_coding_object (struct coding_system *,
704 Lisp_Object, ptrdiff_t, ptrdiff_t,
705 ptrdiff_t, ptrdiff_t, Lisp_Object);
706 extern void encode_coding_object (struct coding_system *,
707 Lisp_Object, ptrdiff_t, ptrdiff_t,
708 ptrdiff_t, ptrdiff_t, Lisp_Object);
709
710 INLINE int surrogates_to_codepoint (int, int);
711
712 #if defined (WINDOWSNT) || defined (CYGWIN)
713
714
715
716
717
718
719
720 #define WCSDATA(x) ((wchar_t *) SDATA (x))
721
722
723
724 extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
725
726
727
728
729
730 extern Lisp_Object from_unicode (Lisp_Object str);
731
732
733 extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);
734
735 #endif
736
737
738
739 #define encode_coding_string(coding, string, nocopy) \
740 (STRING_MULTIBYTE(string) ? \
741 (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
742 SBYTES (string), Qt), \
743 (coding)->dst_object) : (string))
744
745
746 #define decode_coding_c_string(coding, src, bytes, dst_object) \
747 do { \
748 (coding)->source = (src); \
749 (coding)->src_chars = (coding)->src_bytes = (bytes); \
750 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
751 (dst_object)); \
752 } while (false)
753
754
755
756
757 INLINE int
758 surrogates_to_codepoint (int low, int high)
759 {
760 eassert (0 <= low && low <= 0xFFFF);
761 eassert (0 <= high && high <= 0xFFFF);
762 eassert (UTF_16_LOW_SURROGATE_P (low));
763 eassert (UTF_16_HIGH_SURROGATE_P (high));
764 return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
765 }
766
767
768
769
770 INLINE Lisp_Object
771 build_string_from_utf8 (const char *str)
772 {
773 return make_string_from_utf8 (str, strlen (str));
774 }
775
776
777 extern Lisp_Object preferred_coding_system (void);
778
779
780
781 extern struct coding_system safe_terminal_coding;
782
783 extern char emacs_mule_bytes[256];
784
785 INLINE_HEADER_END
786
787 #endif