This source file includes following definitions.
- detect_coding_XXX
- decode_coding_XXXX
- encode_coding_XXX
- encode_inhibit_flag
- inhibit_flag
- growable_destination
- record_conversion_result
- coding_set_source
- coding_change_source
- coding_set_destination
- coding_change_destination
- coding_alloc_by_realloc
- coding_alloc_by_making_gap
- alloc_destination
- detect_coding_utf_8
- decode_coding_utf_8
- encode_coding_utf_8
- detect_coding_utf_16
- decode_coding_utf_16
- encode_coding_utf_16
- detect_coding_emacs_mule
- emacs_mule_char
- emacs_mule_finish_composition
- decode_coding_emacs_mule
- encode_coding_emacs_mule
- setup_iso_safe_charsets
- detect_coding_iso_2022
- finish_composition
- decode_coding_iso_2022
- encode_invocation_designation
- encode_designation_at_bol
- encode_coding_iso_2022
- detect_coding_sjis
- detect_coding_big5
- decode_coding_sjis
- decode_coding_big5
- encode_coding_sjis
- encode_coding_big5
- detect_coding_ccl
- decode_coding_ccl
- encode_coding_ccl
- decode_coding_raw_text
- encode_coding_raw_text
- detect_coding_charset
- decode_coding_charset
- encode_coding_charset
- setup_coding_system
- coding_charset_list
- coding_system_charset_list
- raw_text_coding_system
- raw_text_coding_system_p
- coding_inherit_eol_type
- complement_process_encoding_system
- check_ascii
- check_utf_8
- utf8_string_p
- make_string_from_utf8
- detect_eol
- adjust_coding_eol_type
- detect_coding
- decode_eol
- get_translation_table
- get_translation
- produce_chars
- produce_composition
- produce_charset
- produce_annotation
- decode_coding
- handle_composition_annotation
- handle_charset_annotation
- consume_chars
- encode_coding
- code_conversion_restore
- code_conversion_save
- coding_restore_undo_list
- decode_coding_gap
- decode_coding_object
- encode_coding_object
- preferred_coding_system
- from_unicode
- from_unicode_buffer
- to_unicode
- DEFUN
- DEFUN
- DEFUN
- detect_coding_system
- char_encodable_p
- code_convert_region
- string_ascii_p
- code_convert_string
- code_convert_string_norecord
- get_buffer_gap_address
- get_char_bytes
- encode_string_utf_8
- decode_string_utf_8
- convert_string_nocopy
- decode_file_name
- encode_file_name_1
- encode_file_name
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- make_subsidiaries
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- init_coding_once
- syms_of_coding
- reset_coding_after_pdumper_load
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154 #if 0
155 static bool
156 detect_coding_XXX (struct coding_system *coding,
157 struct coding_detection_info *detect_info)
158 {
159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes;
161 bool multibytep = coding->src_multibyte;
162 ptrdiff_t consumed_chars = 0;
163 int found = 0;
164 ...;
165
166 while (1)
167 {
168
169
170 ONE_MORE_BYTE (c);
171
172 if (! __C_conforms_to_XXX___ (c))
173 break;
174 if (! __C_strongly_suggests_XXX__ (c))
175 found = CATEGORY_MASK_XXX;
176 }
177
178 detect_info->rejected |= CATEGORY_MASK_XXX;
179 return 0;
180
181 no_more_source:
182
183 detect_info->found |= found;
184 return 1;
185 }
186 #endif
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202 #if 0
203 static void
204 decode_coding_XXXX (struct coding_system *coding)
205 {
206 const unsigned char *src = coding->source + coding->consumed;
207 const unsigned char *src_end = coding->source + coding->src_bytes;
208
209
210
211 const unsigned char *src_base;
212
213 int *charbuf = coding->charbuf + coding->charbuf_used;
214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
215 bool multibytep = coding->src_multibyte;
216
217 while (1)
218 {
219 src_base = src;
220 if (charbuf < charbuf_end)
221
222 break;
223 ONE_MORE_BYTE (c);
224
225 }
226
227 no_more_source:
228 if (src_base < src_end
229 && coding->mode & CODING_MODE_LAST_BLOCK)
230
231
232 while (src_base < src_end && charbuf < charbuf_end)
233 *charbuf++ = *src_base++;
234
235
236 coding->consumed = coding->consumed_char = src_base - coding->source;
237
238 coding->charbuf_used = charbuf - coding->charbuf;
239 }
240 #endif
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 #if 0
260 static void
261 encode_coding_XXX (struct coding_system *coding)
262 {
263 bool multibytep = coding->dst_multibyte;
264 int *charbuf = coding->charbuf;
265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
266 unsigned char *dst = coding->destination + coding->produced;
267 unsigned char *dst_end = coding->destination + coding->dst_bytes;
268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
269 ptrdiff_t produced_chars = 0;
270
271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
272 {
273 int c = *charbuf;
274
275 }
276 label_no_more_destination:
277
278 coding->produced_char += produced_chars;
279 coding->produced = dst - coding->destination;
280 }
281 #endif
282
283
284
285
286 #include <config.h>
287
288 #ifdef HAVE_WCHAR_H
289 #include <wchar.h>
290 #endif
291
292 #include "lisp.h"
293 #include "character.h"
294 #include "buffer.h"
295 #include "charset.h"
296 #include "ccl.h"
297 #include "composite.h"
298 #include "coding.h"
299 #include "termhooks.h"
300 #include "pdumper.h"
301
302 Lisp_Object Vcoding_system_hash_table;
303
304
305
306
307
308 struct coding_system safe_terminal_coding;
309
310
311 static Lisp_Object Vsjis_coding_system;
312 static Lisp_Object Vbig5_coding_system;
313
314
315
316 #define CODING_ISO_INITIAL(coding, reg) \
317 (XFIXNUM (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
318 coding_attr_iso_initial), \
319 reg)))
320
321
322 #define CODING_ISO_REQUEST(coding, charset_id) \
323 (((charset_id) <= (coding)->max_charset_id \
324 ? ((coding)->safe_charsets[charset_id] != 255 \
325 ? (coding)->safe_charsets[charset_id] \
326 : -1) \
327 : -1))
328
329
330 #define CODING_ISO_FLAGS(coding) \
331 ((coding)->spec.iso_2022.flags)
332 #define CODING_ISO_DESIGNATION(coding, reg) \
333 ((coding)->spec.iso_2022.current_designation[reg])
334 #define CODING_ISO_INVOCATION(coding, plane) \
335 ((coding)->spec.iso_2022.current_invocation[plane])
336 #define CODING_ISO_SINGLE_SHIFTING(coding) \
337 ((coding)->spec.iso_2022.single_shifting)
338 #define CODING_ISO_BOL(coding) \
339 ((coding)->spec.iso_2022.bol)
340 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
341 (CODING_ISO_INVOCATION (coding, plane) < 0 ? -1 \
342 : CODING_ISO_DESIGNATION (coding, CODING_ISO_INVOCATION (coding, plane)))
343 #define CODING_ISO_CMP_STATUS(coding) \
344 (&(coding)->spec.iso_2022.cmp_status)
345 #define CODING_ISO_EXTSEGMENT_LEN(coding) \
346 ((coding)->spec.iso_2022.ctext_extended_segment_len)
347 #define CODING_ISO_EMBEDDED_UTF_8(coding) \
348 ((coding)->spec.iso_2022.embedded_utf_8)
349
350
351
352 #define ISO_CODE_SO 0x0E
353 #define ISO_CODE_SI 0x0F
354 #define ISO_CODE_SS2_7 0x19
355 #define ISO_CODE_ESC 0x1B
356 #define ISO_CODE_SS2 0x8E
357 #define ISO_CODE_SS3 0x8F
358 #define ISO_CODE_CSI 0x9B
359
360
361
362 enum iso_code_class_type
363 {
364 ISO_control_0,
365
366
367 ISO_shift_out,
368 ISO_shift_in,
369 ISO_single_shift_2_7,
370 ISO_escape,
371 ISO_control_1,
372
373
374 ISO_single_shift_2,
375 ISO_single_shift_3,
376 ISO_control_sequence_introducer,
377 ISO_0x20_or_0x7F,
378 ISO_graphic_plane_0,
379 ISO_0xA0_or_0xFF,
380 ISO_graphic_plane_1
381 };
382
383
384
385
386
387
388 #define CODING_ISO_FLAG_LONG_FORM 0x0001
389
390
391
392 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
393
394
395
396 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
397
398
399 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
400
401
402 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
403
404
405
406 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
407
408
409 #define CODING_ISO_FLAG_DESIGNATION 0x0040
410
411
412 #define CODING_ISO_FLAG_REVISION 0x0080
413
414
415 #define CODING_ISO_FLAG_DIRECTION 0x0100
416
417
418
419 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
420
421
422
423 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
424
425
426 #define CODING_ISO_FLAG_SAFE 0x0800
427
428
429
430 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
431
432 #define CODING_ISO_FLAG_COMPOSITION 0x2000
433
434
435
436 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
437
438 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
439
440 #define CODING_ISO_FLAG_LEVEL_4 0x20000
441
442 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
443
444
445
446 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
447
448
449 #define CODING_UTF_8_BOM(coding) \
450 ((coding)->spec.utf_8_bom)
451
452
453 #define CODING_UTF_16_BOM(coding) \
454 ((coding)->spec.utf_16.bom)
455
456 #define CODING_UTF_16_ENDIAN(coding) \
457 ((coding)->spec.utf_16.endian)
458
459 #define CODING_UTF_16_SURROGATE(coding) \
460 ((coding)->spec.utf_16.surrogate)
461
462
463
464 #define CODING_CCL_DECODER(coding) \
465 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
466 #define CODING_CCL_ENCODER(coding) \
467 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
468 #define CODING_CCL_VALIDS(coding) \
469 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
470
471
472
473 enum coding_category
474 {
475 coding_category_iso_7,
476 coding_category_iso_7_tight,
477 coding_category_iso_8_1,
478 coding_category_iso_8_2,
479 coding_category_iso_7_else,
480 coding_category_iso_8_else,
481 coding_category_utf_8_auto,
482 coding_category_utf_8_nosig,
483 coding_category_utf_8_sig,
484 coding_category_utf_16_auto,
485 coding_category_utf_16_be,
486 coding_category_utf_16_le,
487 coding_category_utf_16_be_nosig,
488 coding_category_utf_16_le_nosig,
489 coding_category_charset,
490 coding_category_sjis,
491 coding_category_big5,
492 coding_category_ccl,
493 coding_category_emacs_mule,
494
495 coding_category_raw_text,
496 coding_category_undecided,
497 coding_category_max
498 };
499
500
501 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
502 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
503 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
504 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
505 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
506 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
507 #define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
508 #define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
509 #define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
510 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
511 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
512 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
513 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
514 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
515 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
516 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
517 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
518 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
519 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
520 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
521
522
523
524 #define CATEGORY_MASK_ANY \
525 (CATEGORY_MASK_ISO_7 \
526 | CATEGORY_MASK_ISO_7_TIGHT \
527 | CATEGORY_MASK_ISO_8_1 \
528 | CATEGORY_MASK_ISO_8_2 \
529 | CATEGORY_MASK_ISO_7_ELSE \
530 | CATEGORY_MASK_ISO_8_ELSE \
531 | CATEGORY_MASK_UTF_8_AUTO \
532 | CATEGORY_MASK_UTF_8_NOSIG \
533 | CATEGORY_MASK_UTF_8_SIG \
534 | CATEGORY_MASK_UTF_16_AUTO \
535 | CATEGORY_MASK_UTF_16_BE \
536 | CATEGORY_MASK_UTF_16_LE \
537 | CATEGORY_MASK_UTF_16_BE_NOSIG \
538 | CATEGORY_MASK_UTF_16_LE_NOSIG \
539 | CATEGORY_MASK_CHARSET \
540 | CATEGORY_MASK_SJIS \
541 | CATEGORY_MASK_BIG5 \
542 | CATEGORY_MASK_CCL \
543 | CATEGORY_MASK_EMACS_MULE)
544
545
546 #define CATEGORY_MASK_ISO_7BIT \
547 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
548
549 #define CATEGORY_MASK_ISO_8BIT \
550 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
551
552 #define CATEGORY_MASK_ISO_ELSE \
553 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
554
555 #define CATEGORY_MASK_ISO_ESCAPE \
556 (CATEGORY_MASK_ISO_7 \
557 | CATEGORY_MASK_ISO_7_TIGHT \
558 | CATEGORY_MASK_ISO_7_ELSE \
559 | CATEGORY_MASK_ISO_8_ELSE)
560
561 #define CATEGORY_MASK_ISO \
562 ( CATEGORY_MASK_ISO_7BIT \
563 | CATEGORY_MASK_ISO_8BIT \
564 | CATEGORY_MASK_ISO_ELSE)
565
566 #define CATEGORY_MASK_UTF_16 \
567 (CATEGORY_MASK_UTF_16_AUTO \
568 | CATEGORY_MASK_UTF_16_BE \
569 | CATEGORY_MASK_UTF_16_LE \
570 | CATEGORY_MASK_UTF_16_BE_NOSIG \
571 | CATEGORY_MASK_UTF_16_LE_NOSIG)
572
573 #define CATEGORY_MASK_UTF_8 \
574 (CATEGORY_MASK_UTF_8_AUTO \
575 | CATEGORY_MASK_UTF_8_NOSIG \
576 | CATEGORY_MASK_UTF_8_SIG)
577
578
579
580 static Lisp_Object Vcoding_category_table;
581
582
583 static enum coding_category coding_priorities[coding_category_max];
584
585
586
587 static struct coding_system coding_categories[coding_category_max];
588
589
590
591 static int
592 encode_inhibit_flag (Lisp_Object flag)
593 {
594 return NILP (flag) ? -1 : EQ (flag, Qt);
595 }
596
597
598
599
600 static bool
601 inhibit_flag (int encoded_flag, bool var)
602 {
603 return 0 < encoded_flag + var;
604 }
605
606 #define CODING_GET_INFO(coding, attrs, charset_list) \
607 do { \
608 (attrs) = CODING_ID_ATTRS ((coding)->id); \
609 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
610 } while (false)
611
612
613
614 static bool
615 growable_destination (struct coding_system *coding)
616 {
617 return STRINGP (coding->dst_object) || BUFFERP (coding->dst_object);
618 }
619
620
621
622
623
624
625
626
627
628
629 #define ONE_MORE_BYTE(c) \
630 do { \
631 if (src == src_end) \
632 { \
633 if (src_base < src) \
634 record_conversion_result \
635 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
636 goto no_more_source; \
637 } \
638 c = *src++; \
639 if (multibytep && (c & 0x80)) \
640 { \
641 if ((c & 0xFE) == 0xC0) \
642 c = ((c & 1) << 6) | *src++; \
643 else \
644 { \
645 src--; \
646 c = - string_char_advance (&src); \
647 record_conversion_result \
648 (coding, CODING_RESULT_INVALID_SRC); \
649 } \
650 } \
651 consumed_chars++; \
652 } while (0)
653
654
655
656 #if 13 <= __clang_major__ - defined __apple_build_version__
657 # pragma clang diagnostic ignored "-Wunused-but-set-variable"
658 #endif
659
660
661
662
663
664
665
666
667
668
669
670 #define TWO_MORE_BYTES(c1, c2) \
671 do { \
672 do { \
673 if (src == src_end) \
674 goto no_more_source; \
675 c1 = *src++; \
676 if (multibytep && (c1 & 0x80)) \
677 { \
678 if ((c1 & 0xFE) == 0xC0) \
679 c1 = ((c1 & 1) << 6) | *src++; \
680 else \
681 { \
682 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
683 c1 = -1; \
684 } \
685 } \
686 } while (c1 < 0); \
687 if (src == src_end) \
688 goto no_more_source; \
689 c2 = *src++; \
690 if (multibytep && (c2 & 0x80)) \
691 { \
692 if ((c2 & 0xFE) == 0xC0) \
693 c2 = ((c2 & 1) << 6) | *src++; \
694 else \
695 c2 = -1; \
696 } \
697 } while (0)
698
699
700
701
702
703
704
705
706
707 #define EMIT_ONE_ASCII_BYTE(c) \
708 do { \
709 produced_chars++; \
710 *dst++ = (c); \
711 } while (0)
712
713
714
715
716 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
717 do { \
718 produced_chars += 2; \
719 *dst++ = (c1), *dst++ = (c2); \
720 } while (0)
721
722
723
724
725
726
727
728
729 #define EMIT_ONE_BYTE(c) \
730 do { \
731 produced_chars++; \
732 if (multibytep) \
733 { \
734 unsigned ch = (c); \
735 if (ch >= 0x80) \
736 ch = BYTE8_TO_CHAR (ch); \
737 dst += CHAR_STRING (ch, dst); \
738 } \
739 else \
740 *dst++ = (c); \
741 } while (0)
742
743
744
745
746 #define EMIT_TWO_BYTES(c1, c2) \
747 do { \
748 produced_chars += 2; \
749 if (multibytep) \
750 { \
751 unsigned ch; \
752 \
753 ch = (c1); \
754 if (ch >= 0x80) \
755 ch = BYTE8_TO_CHAR (ch); \
756 dst += CHAR_STRING (ch, dst); \
757 ch = (c2); \
758 if (ch >= 0x80) \
759 ch = BYTE8_TO_CHAR (ch); \
760 dst += CHAR_STRING (ch, dst); \
761 } \
762 else \
763 { \
764 *dst++ = (c1); \
765 *dst++ = (c2); \
766 } \
767 } while (0)
768
769
770 #define EMIT_THREE_BYTES(c1, c2, c3) \
771 do { \
772 EMIT_ONE_BYTE (c1); \
773 EMIT_TWO_BYTES (c2, c3); \
774 } while (0)
775
776
777 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
778 do { \
779 EMIT_TWO_BYTES (c1, c2); \
780 EMIT_TWO_BYTES (c3, c4); \
781 } while (0)
782
783
784 static void
785 record_conversion_result (struct coding_system *coding,
786 enum coding_result_code result)
787 {
788 coding->result = result;
789 switch (result)
790 {
791 case CODING_RESULT_INSUFFICIENT_SRC:
792 Vlast_code_conversion_error = Qinsufficient_source;
793 break;
794 case CODING_RESULT_INVALID_SRC:
795 Vlast_code_conversion_error = Qinvalid_source;
796 break;
797 case CODING_RESULT_INTERRUPT:
798 Vlast_code_conversion_error = Qinterrupted;
799 break;
800 case CODING_RESULT_INSUFFICIENT_DST:
801
802
803
804 break;
805 case CODING_RESULT_SUCCESS:
806 break;
807 default:
808 Vlast_code_conversion_error = intern ("Unknown error");
809 }
810 }
811
812
813
814
815
816
817 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
818 do { \
819 ptrdiff_t offset; \
820 \
821 charset_map_loaded = 0; \
822 c = DECODE_CHAR (charset, code); \
823 if (charset_map_loaded \
824 && (offset = coding_change_source (coding))) \
825 { \
826 src += offset; \
827 src_base += offset; \
828 src_end += offset; \
829 } \
830 } while (0)
831
832 #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
833 do { \
834 ptrdiff_t offset; \
835 \
836 charset_map_loaded = 0; \
837 code = ENCODE_CHAR (charset, c); \
838 if (charset_map_loaded \
839 && (offset = coding_change_destination (coding))) \
840 { \
841 dst += offset; \
842 dst_end += offset; \
843 } \
844 } while (0)
845
846 #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
847 do { \
848 ptrdiff_t offset; \
849 \
850 charset_map_loaded = 0; \
851 charset = char_charset (c, charset_list, code_return); \
852 if (charset_map_loaded \
853 && (offset = coding_change_destination (coding))) \
854 { \
855 dst += offset; \
856 dst_end += offset; \
857 } \
858 } while (0)
859
860 #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
861 do { \
862 ptrdiff_t offset; \
863 \
864 charset_map_loaded = 0; \
865 result = CHAR_CHARSET_P (c, charset); \
866 if (charset_map_loaded \
867 && (offset = coding_change_destination (coding))) \
868 { \
869 dst += offset; \
870 dst_end += offset; \
871 } \
872 } while (0)
873
874
875
876
877
878
879
880 #define ASSURE_DESTINATION(bytes) \
881 do { \
882 if (dst + (bytes) >= dst_end) \
883 { \
884 ptrdiff_t more_bytes = charbuf_end - charbuf + (bytes); \
885 \
886 dst = alloc_destination (coding, more_bytes, dst); \
887 dst_end = coding->destination + coding->dst_bytes; \
888 } \
889 } while (0)
890
891
892
893
894
895
896
897 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) ((p) += CHAR_STRING (c, p))
898
899
900
901
902
903
904 #define STRING_CHAR_ADVANCE_NO_UNIFY(p) string_char_advance (&(p))
905
906
907
908 static void
909 coding_set_source (struct coding_system *coding)
910 {
911 if (BUFFERP (coding->src_object))
912 {
913 struct buffer *buf = XBUFFER (coding->src_object);
914
915 if (coding->src_pos < 0)
916 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
917 else
918 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
919 }
920 else if (STRINGP (coding->src_object))
921 {
922 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
923 }
924 else
925 {
926
927
928 }
929 }
930
931
932
933
934
935 static ptrdiff_t
936 coding_change_source (struct coding_system *coding)
937 {
938 const unsigned char *orig = coding->source;
939 coding_set_source (coding);
940 return coding->source - orig;
941 }
942
943
944
945
946 static void
947 coding_set_destination (struct coding_system *coding)
948 {
949 if (BUFFERP (coding->dst_object))
950 {
951 if (BUFFERP (coding->src_object) && coding->src_pos < 0)
952 {
953 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
954 coding->dst_bytes = (GAP_END_ADDR
955 - (coding->src_bytes - coding->consumed)
956 - coding->destination);
957 }
958 else
959 {
960
961
962 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
963 + coding->dst_pos_byte - BEG_BYTE);
964 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
965 - coding->destination);
966 }
967 }
968 else
969 {
970
971
972 }
973 }
974
975
976
977
978
979 static ptrdiff_t
980 coding_change_destination (struct coding_system *coding)
981 {
982 const unsigned char *orig = coding->destination;
983 coding_set_destination (coding);
984 return coding->destination - orig;
985 }
986
987
988 static void
989 coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes)
990 {
991 ptrdiff_t newbytes;
992 if (ckd_add (&newbytes, coding->dst_bytes, bytes)
993 || SIZE_MAX < newbytes)
994 string_overflow ();
995 coding->destination = xrealloc (coding->destination, newbytes);
996 coding->dst_bytes = newbytes;
997 }
998
999 static void
1000 coding_alloc_by_making_gap (struct coding_system *coding,
1001 ptrdiff_t gap_head_used, ptrdiff_t bytes)
1002 {
1003 if (EQ (coding->src_object, coding->dst_object))
1004 {
1005
1006
1007
1008
1009 ptrdiff_t add = GAP_SIZE;
1010
1011 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1012 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1013 make_gap (bytes);
1014 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1015 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1016 }
1017 else
1018 make_gap_1 (XBUFFER (coding->dst_object), bytes);
1019 }
1020
1021
1022 static unsigned char *
1023 alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1024 unsigned char *dst)
1025 {
1026 ptrdiff_t offset = dst - coding->destination;
1027
1028 if (BUFFERP (coding->dst_object))
1029 {
1030 struct buffer *buf = XBUFFER (coding->dst_object);
1031
1032 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1033 }
1034 else
1035 coding_alloc_by_realloc (coding, nbytes);
1036 coding_set_destination (coding);
1037 dst = coding->destination + offset;
1038 return dst;
1039 }
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073 #define MAX_ANNOTATION_LENGTH 5
1074
1075 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1076 do { \
1077 *(buf)++ = -(len); \
1078 *(buf)++ = (mask); \
1079 *(buf)++ = (nchars); \
1080 coding->annotated = 1; \
1081 } while (0);
1082
1083 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
1084 do { \
1085 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1086 *buf++ = nbytes; \
1087 *buf++ = method; \
1088 } while (0)
1089
1090
1091 #define ADD_CHARSET_DATA(buf, nchars, id) \
1092 do { \
1093 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1094 *buf++ = id; \
1095 } while (0)
1096
1097
1098
1099
1100 #define EOL_SEEN_NONE 0
1101 #define EOL_SEEN_LF 1
1102 #define EOL_SEEN_CR 2
1103 #define EOL_SEEN_CRLF 4
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1117 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1118 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1119 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1120 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1121 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1122
1123 #define UTF_8_BOM_1 0xEF
1124 #define UTF_8_BOM_2 0xBB
1125 #define UTF_8_BOM_3 0xBF
1126
1127
1128
1129
1130 static bool
1131 detect_coding_utf_8 (struct coding_system *coding,
1132 struct coding_detection_info *detect_info)
1133 {
1134 const unsigned char *src = coding->source, *src_base;
1135 const unsigned char *src_end = coding->source + coding->src_bytes;
1136 bool multibytep = coding->src_multibyte;
1137 ptrdiff_t consumed_chars = 0;
1138 bool bom_found = 0;
1139 ptrdiff_t nchars = coding->head_ascii;
1140
1141 detect_info->checked |= CATEGORY_MASK_UTF_8;
1142
1143 src += nchars;
1144
1145 if (src == coding->source
1146 && src + 3 < src_end
1147 && src[0] == UTF_8_BOM_1
1148 && src[1] == UTF_8_BOM_2
1149 && src[2] == UTF_8_BOM_3)
1150 {
1151 bom_found = 1;
1152 src += 3;
1153 nchars++;
1154 }
1155
1156 while (1)
1157 {
1158 int c, c1, c2, c3, c4;
1159
1160 src_base = src;
1161 ONE_MORE_BYTE (c);
1162 if (c < 0 || UTF_8_1_OCTET_P (c))
1163 {
1164 nchars++;
1165 if (c == '\r')
1166 {
1167 if (src < src_end && *src == '\n')
1168 {
1169 src++;
1170 nchars++;
1171 }
1172 }
1173 continue;
1174 }
1175 ONE_MORE_BYTE (c1);
1176 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1177 break;
1178 if (UTF_8_2_OCTET_LEADING_P (c))
1179 {
1180 nchars++;
1181 continue;
1182 }
1183 ONE_MORE_BYTE (c2);
1184 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1185 break;
1186 if (UTF_8_3_OCTET_LEADING_P (c))
1187 {
1188 nchars++;
1189 continue;
1190 }
1191 ONE_MORE_BYTE (c3);
1192 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1193 break;
1194 if (UTF_8_4_OCTET_LEADING_P (c))
1195 {
1196 nchars++;
1197 continue;
1198 }
1199 ONE_MORE_BYTE (c4);
1200 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1201 break;
1202 if (UTF_8_5_OCTET_LEADING_P (c)
1203
1204
1205 && c < MAX_MULTIBYTE_LEADING_CODE)
1206 {
1207 nchars++;
1208 continue;
1209 }
1210 break;
1211 }
1212 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1213 return 0;
1214
1215 no_more_source:
1216 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1217 {
1218 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1219 return 0;
1220 }
1221 if (bom_found)
1222 {
1223
1224 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1225 }
1226 else
1227 {
1228 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1229 if (nchars < src_end - coding->source)
1230
1231
1232 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
1233 }
1234 coding->detected_utf8_bytes = src_base - coding->source;
1235 coding->detected_utf8_chars = nchars;
1236 return 1;
1237 }
1238
1239
1240 static void
1241 decode_coding_utf_8 (struct coding_system *coding)
1242 {
1243 const unsigned char *src = coding->source + coding->consumed;
1244 const unsigned char *src_end = coding->source + coding->src_bytes;
1245 const unsigned char *src_base;
1246 int *charbuf = coding->charbuf + coding->charbuf_used;
1247 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1248 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1249 bool multibytep = coding->src_multibyte;
1250 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1251 bool eol_dos
1252 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1253 int byte_after_cr = -1;
1254
1255 if (bom != utf_without_bom)
1256 {
1257 int c1, c2, c3;
1258
1259 src_base = src;
1260 ONE_MORE_BYTE (c1);
1261 if (! UTF_8_3_OCTET_LEADING_P (c1))
1262 src = src_base;
1263 else
1264 {
1265 ONE_MORE_BYTE (c2);
1266 if (! UTF_8_EXTRA_OCTET_P (c2))
1267 src = src_base;
1268 else
1269 {
1270 ONE_MORE_BYTE (c3);
1271 if (! UTF_8_EXTRA_OCTET_P (c3))
1272 src = src_base;
1273 else
1274 {
1275 if ((c1 != UTF_8_BOM_1)
1276 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1277 src = src_base;
1278 else
1279 CODING_UTF_8_BOM (coding) = utf_without_bom;
1280 }
1281 }
1282 }
1283 }
1284 CODING_UTF_8_BOM (coding) = utf_without_bom;
1285
1286 while (1)
1287 {
1288 int c, c1, c2, c3, c4, c5;
1289
1290 src_base = src;
1291 consumed_chars_base = consumed_chars;
1292
1293 if (charbuf >= charbuf_end)
1294 {
1295 if (byte_after_cr >= 0)
1296 src_base--;
1297 break;
1298 }
1299
1300
1301 if (multibytep && ! eol_dos
1302 && charbuf < charbuf_end - 6 && src < src_end - 6)
1303 {
1304 while (charbuf < charbuf_end - 6 && src < src_end - 6)
1305 {
1306 c1 = *src;
1307 if (c1 & 0x80)
1308 break;
1309 src++;
1310 consumed_chars++;
1311 *charbuf++ = c1;
1312
1313 c1 = *src;
1314 if (c1 & 0x80)
1315 break;
1316 src++;
1317 consumed_chars++;
1318 *charbuf++ = c1;
1319
1320 c1 = *src;
1321 if (c1 & 0x80)
1322 break;
1323 src++;
1324 consumed_chars++;
1325 *charbuf++ = c1;
1326
1327 c1 = *src;
1328 if (c1 & 0x80)
1329 break;
1330 src++;
1331 consumed_chars++;
1332 *charbuf++ = c1;
1333 }
1334
1335 if (src != src_base)
1336 continue;
1337 }
1338
1339 if (byte_after_cr >= 0)
1340 c1 = byte_after_cr, byte_after_cr = -1;
1341 else
1342 ONE_MORE_BYTE (c1);
1343 if (c1 < 0)
1344 {
1345 c = - c1;
1346 }
1347 else if (UTF_8_1_OCTET_P (c1))
1348 {
1349 if (eol_dos && c1 == '\r')
1350 ONE_MORE_BYTE (byte_after_cr);
1351 c = c1;
1352 }
1353 else
1354 {
1355 ONE_MORE_BYTE (c2);
1356 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1357 goto invalid_code;
1358 if (UTF_8_2_OCTET_LEADING_P (c1))
1359 {
1360 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1361
1362
1363
1364 if (c < 128)
1365 goto invalid_code;
1366 }
1367 else
1368 {
1369 ONE_MORE_BYTE (c3);
1370 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1371 goto invalid_code;
1372 if (UTF_8_3_OCTET_LEADING_P (c1))
1373 {
1374 c = (((c1 & 0xF) << 12)
1375 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1376 if (c < 0x800
1377 || (c >= 0xd800 && c < 0xe000))
1378 goto invalid_code;
1379 }
1380 else
1381 {
1382 ONE_MORE_BYTE (c4);
1383 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1384 goto invalid_code;
1385 if (UTF_8_4_OCTET_LEADING_P (c1))
1386 {
1387 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1388 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1389 if (c < 0x10000)
1390 goto invalid_code;
1391 }
1392 else
1393 {
1394 ONE_MORE_BYTE (c5);
1395 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1396 goto invalid_code;
1397 if (UTF_8_5_OCTET_LEADING_P (c1))
1398 {
1399 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1400 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1401 | (c5 & 0x3F));
1402 if ((c > MAX_CHAR) || (c < 0x200000))
1403 goto invalid_code;
1404 }
1405 else
1406 goto invalid_code;
1407 }
1408 }
1409 }
1410 }
1411
1412 *charbuf++ = c;
1413 continue;
1414
1415 invalid_code:
1416 src = src_base;
1417 consumed_chars = consumed_chars_base;
1418 ONE_MORE_BYTE (c);
1419 *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
1420 }
1421
1422 no_more_source:
1423 coding->consumed_char += consumed_chars_base;
1424 coding->consumed = src_base - coding->source;
1425 coding->charbuf_used = charbuf - coding->charbuf;
1426 }
1427
1428
1429 bool
1430 encode_coding_utf_8 (struct coding_system *coding)
1431 {
1432 bool multibytep = coding->dst_multibyte;
1433 int *charbuf = coding->charbuf;
1434 int *charbuf_end = charbuf + coding->charbuf_used;
1435 unsigned char *dst = coding->destination + coding->produced;
1436 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1437 ptrdiff_t produced_chars = 0;
1438 int c;
1439
1440 if (CODING_UTF_8_BOM (coding) != utf_without_bom)
1441 {
1442 ASSURE_DESTINATION (3);
1443 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1444 CODING_UTF_8_BOM (coding) = utf_without_bom;
1445 }
1446
1447 if (multibytep)
1448 {
1449 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1450
1451 while (charbuf < charbuf_end)
1452 {
1453 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1454
1455 ASSURE_DESTINATION (safe_room);
1456 c = *charbuf++;
1457 if (CHAR_BYTE8_P (c))
1458 {
1459 c = CHAR_TO_BYTE8 (c);
1460 EMIT_ONE_BYTE (c);
1461 }
1462 else
1463 {
1464 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1465 for (p = str; p < pend; p++)
1466 EMIT_ONE_BYTE (*p);
1467 }
1468 }
1469 }
1470 else
1471 {
1472 int safe_room = MAX_MULTIBYTE_LENGTH;
1473
1474 while (charbuf < charbuf_end)
1475 {
1476 ASSURE_DESTINATION (safe_room);
1477 c = *charbuf++;
1478 if (CHAR_BYTE8_P (c))
1479 *dst++ = CHAR_TO_BYTE8 (c);
1480 else
1481 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1482 }
1483 produced_chars = dst - (coding->destination + coding->produced);
1484 }
1485 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1486 coding->produced_char += produced_chars;
1487 coding->produced = dst - coding->destination;
1488 return 0;
1489 }
1490
1491
1492
1493
1494
1495 static bool
1496 detect_coding_utf_16 (struct coding_system *coding,
1497 struct coding_detection_info *detect_info)
1498 {
1499 const unsigned char *src = coding->source;
1500 const unsigned char *src_end = coding->source + coding->src_bytes;
1501 bool multibytep = coding->src_multibyte;
1502 int c1, c2;
1503
1504 detect_info->checked |= CATEGORY_MASK_UTF_16;
1505 if (coding->mode & CODING_MODE_LAST_BLOCK
1506 && (coding->src_chars & 1))
1507 {
1508 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1509 return 0;
1510 }
1511
1512 TWO_MORE_BYTES (c1, c2);
1513 if ((c1 == 0xFF) && (c2 == 0xFE))
1514 {
1515 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1516 | CATEGORY_MASK_UTF_16_AUTO);
1517 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1518 | CATEGORY_MASK_UTF_16_BE_NOSIG
1519 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1520 }
1521 else if ((c1 == 0xFE) && (c2 == 0xFF))
1522 {
1523 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1524 | CATEGORY_MASK_UTF_16_AUTO);
1525 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1526 | CATEGORY_MASK_UTF_16_BE_NOSIG
1527 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1528 }
1529 else if (c2 < 0)
1530 {
1531 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1532 return 0;
1533 }
1534 else
1535 {
1536
1537
1538 unsigned char e[256], o[256];
1539 unsigned e_num = 1, o_num = 1;
1540
1541 memset (e, 0, 256);
1542 memset (o, 0, 256);
1543 e[c1] = 1;
1544 o[c2] = 1;
1545
1546 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1547 |CATEGORY_MASK_UTF_16_BE
1548 | CATEGORY_MASK_UTF_16_LE);
1549
1550 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1551 != CATEGORY_MASK_UTF_16)
1552 {
1553 TWO_MORE_BYTES (c1, c2);
1554 if (c2 < 0)
1555 break;
1556 if (! e[c1])
1557 {
1558 e[c1] = 1;
1559 e_num++;
1560 if (e_num >= 128)
1561 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1562 }
1563 if (! o[c2])
1564 {
1565 o[c2] = 1;
1566 o_num++;
1567 if (o_num >= 128)
1568 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1569 }
1570 }
1571 return 0;
1572 }
1573
1574 no_more_source:
1575 return 1;
1576 }
1577
1578 static void
1579 decode_coding_utf_16 (struct coding_system *coding)
1580 {
1581 const unsigned char *src = coding->source + coding->consumed;
1582 const unsigned char *src_end = coding->source + coding->src_bytes;
1583 const unsigned char *src_base;
1584 int *charbuf = coding->charbuf + coding->charbuf_used;
1585
1586 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1587 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1588 bool multibytep = coding->src_multibyte;
1589 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1590 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1591 int surrogate = CODING_UTF_16_SURROGATE (coding);
1592 bool eol_dos
1593 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1594 int byte_after_cr1 = -1, byte_after_cr2 = -1;
1595
1596 if (bom == utf_with_bom)
1597 {
1598 int c, c1, c2;
1599
1600 src_base = src;
1601 ONE_MORE_BYTE (c1);
1602 ONE_MORE_BYTE (c2);
1603 c = (c1 << 8) | c2;
1604
1605 if (endian == utf_16_big_endian
1606 ? c != 0xFEFF : c != 0xFFFE)
1607 {
1608
1609
1610 src = src_base;
1611 }
1612 CODING_UTF_16_BOM (coding) = utf_without_bom;
1613 }
1614 else if (bom == utf_detect_bom)
1615 {
1616
1617
1618 CODING_UTF_16_BOM (coding) = utf_without_bom;
1619 }
1620
1621 while (1)
1622 {
1623 int c, c1, c2;
1624
1625 src_base = src;
1626 consumed_chars_base = consumed_chars;
1627
1628 if (charbuf >= charbuf_end)
1629 {
1630 if (byte_after_cr1 >= 0)
1631 src_base -= 2;
1632 break;
1633 }
1634
1635 if (byte_after_cr1 >= 0)
1636 c1 = byte_after_cr1, byte_after_cr1 = -1;
1637 else
1638 ONE_MORE_BYTE (c1);
1639 if (c1 < 0)
1640 {
1641 *charbuf++ = -c1;
1642 continue;
1643 }
1644 if (byte_after_cr2 >= 0)
1645 c2 = byte_after_cr2, byte_after_cr2 = -1;
1646 else
1647 ONE_MORE_BYTE (c2);
1648 if (c2 < 0)
1649 {
1650 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1651 *charbuf++ = -c2;
1652 continue;
1653 }
1654 c = (endian == utf_16_big_endian
1655 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1656
1657 if (surrogate)
1658 {
1659 if (! UTF_16_LOW_SURROGATE_P (c))
1660 {
1661 if (endian == utf_16_big_endian)
1662 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1663 else
1664 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1665 *charbuf++ = c1;
1666 *charbuf++ = c2;
1667 if (UTF_16_HIGH_SURROGATE_P (c))
1668 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1669 else
1670 *charbuf++ = c;
1671 }
1672 else
1673 {
1674 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1675 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1676 *charbuf++ = 0x10000 + c;
1677 }
1678 }
1679 else
1680 {
1681 if (UTF_16_HIGH_SURROGATE_P (c))
1682 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1683 else
1684 {
1685 if (eol_dos && c == '\r')
1686 {
1687 ONE_MORE_BYTE (byte_after_cr1);
1688 ONE_MORE_BYTE (byte_after_cr2);
1689 }
1690 *charbuf++ = c;
1691 }
1692 }
1693 }
1694
1695 no_more_source:
1696 coding->consumed_char += consumed_chars_base;
1697 coding->consumed = src_base - coding->source;
1698 coding->charbuf_used = charbuf - coding->charbuf;
1699 }
1700
1701 static bool
1702 encode_coding_utf_16 (struct coding_system *coding)
1703 {
1704 bool multibytep = coding->dst_multibyte;
1705 int *charbuf = coding->charbuf;
1706 int *charbuf_end = charbuf + coding->charbuf_used;
1707 unsigned char *dst = coding->destination + coding->produced;
1708 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1709 int safe_room = 8;
1710 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1711 bool big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1712 ptrdiff_t produced_chars = 0;
1713 int c;
1714
1715 if (bom != utf_without_bom)
1716 {
1717 ASSURE_DESTINATION (safe_room);
1718 if (big_endian)
1719 EMIT_TWO_BYTES (0xFE, 0xFF);
1720 else
1721 EMIT_TWO_BYTES (0xFF, 0xFE);
1722 CODING_UTF_16_BOM (coding) = utf_without_bom;
1723 }
1724
1725 while (charbuf < charbuf_end)
1726 {
1727 ASSURE_DESTINATION (safe_room);
1728 c = *charbuf++;
1729 if (c > MAX_UNICODE_CHAR)
1730 c = coding->default_char;
1731
1732 if (c < 0x10000)
1733 {
1734 if (big_endian)
1735 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1736 else
1737 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1738 }
1739 else
1740 {
1741 int c1, c2;
1742
1743 c -= 0x10000;
1744 c1 = (c >> 10) + 0xD800;
1745 c2 = (c & 0x3FF) + 0xDC00;
1746 if (big_endian)
1747 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1748 else
1749 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1750 }
1751 }
1752 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1753 coding->produced = dst - coding->destination;
1754 coding->produced_char += produced_chars;
1755 return 0;
1756 }
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832 char emacs_mule_bytes[256];
1833
1834
1835
1836
1837
1838 static bool
1839 detect_coding_emacs_mule (struct coding_system *coding,
1840 struct coding_detection_info *detect_info)
1841 {
1842 const unsigned char *src = coding->source, *src_base;
1843 const unsigned char *src_end = coding->source + coding->src_bytes;
1844 bool multibytep = coding->src_multibyte;
1845 ptrdiff_t consumed_chars = 0;
1846 int c;
1847 int found = 0;
1848
1849 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1850
1851 src += coding->head_ascii;
1852
1853 while (1)
1854 {
1855 src_base = src;
1856 ONE_MORE_BYTE (c);
1857 if (c < 0)
1858 continue;
1859 if (c == 0x80)
1860 {
1861
1862
1863
1864
1865 const unsigned char *src_start;
1866
1867 repeat:
1868 src_start = src;
1869 do
1870 {
1871 ONE_MORE_BYTE (c);
1872 }
1873 while (c >= 0xA0);
1874
1875 if (src - src_start <= 4)
1876 break;
1877 found = CATEGORY_MASK_EMACS_MULE;
1878 if (c == 0x80)
1879 goto repeat;
1880 }
1881
1882 if (c < 0x80)
1883 {
1884 if (c < 0x20
1885 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1886 break;
1887 }
1888 else
1889 {
1890 int more_bytes = emacs_mule_bytes[c] - 1;
1891
1892 while (more_bytes > 0)
1893 {
1894 ONE_MORE_BYTE (c);
1895 if (c < 0xA0)
1896 {
1897 src--;
1898 break;
1899 }
1900 more_bytes--;
1901 }
1902 if (more_bytes != 0)
1903 break;
1904 found = CATEGORY_MASK_EMACS_MULE;
1905 }
1906 }
1907 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1908 return 0;
1909
1910 no_more_source:
1911 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1912 {
1913 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1914 return 0;
1915 }
1916 detect_info->found |= found;
1917 return 1;
1918 }
1919
1920
1921
1922
1923
1924
1925
1926
1927 static int
1928 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1929 int *nbytes, int *nchars, int *id,
1930 struct composition_status *cmp_status)
1931 {
1932 const unsigned char *src_end = coding->source + coding->src_bytes;
1933 const unsigned char *src_base = src;
1934 bool multibytep = coding->src_multibyte;
1935 int charset_ID;
1936 unsigned code;
1937 int c;
1938 ptrdiff_t consumed_chars = 0;
1939 bool mseq_found = 0;
1940
1941 ONE_MORE_BYTE (c);
1942 if (c < 0)
1943 {
1944 c = -c;
1945 charset_ID = emacs_mule_charset[0];
1946 }
1947 else
1948 {
1949 if (c >= 0xA0)
1950 {
1951 if (cmp_status->state != COMPOSING_NO
1952 && cmp_status->old_form)
1953 {
1954 if (cmp_status->state == COMPOSING_CHAR)
1955 {
1956 if (c == 0xA0)
1957 {
1958 ONE_MORE_BYTE (c);
1959 c -= 0x80;
1960 if (c < 0)
1961 goto invalid_code;
1962 }
1963 else
1964 c -= 0x20;
1965 mseq_found = 1;
1966 }
1967 else
1968 {
1969 *nbytes = src - src_base;
1970 *nchars = consumed_chars;
1971 return -c;
1972 }
1973 }
1974 else
1975 goto invalid_code;
1976 }
1977
1978 switch (emacs_mule_bytes[c])
1979 {
1980 case 2:
1981 if ((charset_ID = emacs_mule_charset[c]) < 0)
1982 goto invalid_code;
1983 ONE_MORE_BYTE (c);
1984 if (c < 0xA0)
1985 goto invalid_code;
1986 code = c & 0x7F;
1987 break;
1988
1989 case 3:
1990 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1991 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1992 {
1993 ONE_MORE_BYTE (c);
1994 if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
1995 goto invalid_code;
1996 ONE_MORE_BYTE (c);
1997 if (c < 0xA0)
1998 goto invalid_code;
1999 code = c & 0x7F;
2000 }
2001 else
2002 {
2003 if ((charset_ID = emacs_mule_charset[c]) < 0)
2004 goto invalid_code;
2005 ONE_MORE_BYTE (c);
2006 if (c < 0xA0)
2007 goto invalid_code;
2008 code = (c & 0x7F) << 8;
2009 ONE_MORE_BYTE (c);
2010 if (c < 0xA0)
2011 goto invalid_code;
2012 code |= c & 0x7F;
2013 }
2014 break;
2015
2016 case 4:
2017 ONE_MORE_BYTE (c);
2018 if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2019 goto invalid_code;
2020 ONE_MORE_BYTE (c);
2021 if (c < 0xA0)
2022 goto invalid_code;
2023 code = (c & 0x7F) << 8;
2024 ONE_MORE_BYTE (c);
2025 if (c < 0xA0)
2026 goto invalid_code;
2027 code |= c & 0x7F;
2028 break;
2029
2030 case 1:
2031 code = c;
2032 charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
2033 break;
2034
2035 default:
2036 emacs_abort ();
2037 }
2038 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2039 CHARSET_FROM_ID (charset_ID), code, c);
2040 if (c < 0)
2041 goto invalid_code;
2042 }
2043 *nbytes = src - src_base;
2044 *nchars = consumed_chars;
2045 if (id)
2046 *id = charset_ID;
2047 return (mseq_found ? -c : c);
2048
2049 no_more_source:
2050 return -2;
2051
2052 invalid_code:
2053 return -1;
2054 }
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
2119 do { \
2120 int gref, nref; \
2121 \
2122 c -= 0xA0; \
2123 if (c < 0 || c >= 81) \
2124 goto invalid_code; \
2125 gref = c / 9, nref = c % 9; \
2126 if (gref == 4) gref = 10; \
2127 if (nref == 4) nref = 10; \
2128 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2129 } while (0)
2130
2131
2132
2133
2134
2135
2136 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
2137 do { \
2138 int gref, nref; \
2139 \
2140 gref = c - 0x20; \
2141 if (gref < 0 || gref >= 81) \
2142 goto invalid_code; \
2143 ONE_MORE_BYTE (c); \
2144 nref = c - 0x20; \
2145 if (nref < 0 || nref >= 81) \
2146 goto invalid_code; \
2147 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2148 } while (0)
2149
2150
2151
2152
2153
2154
2155
2156 #define DECODE_EMACS_MULE_21_COMPOSITION() \
2157 do { \
2158 enum composition_method method = c - 0xF2; \
2159 int nbytes, nchars; \
2160 \
2161 ONE_MORE_BYTE (c); \
2162 if (c < 0) \
2163 goto invalid_code; \
2164 nbytes = c - 0xA0; \
2165 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
2166 goto invalid_code; \
2167 ONE_MORE_BYTE (c); \
2168 nchars = c - 0xA0; \
2169 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2170 goto invalid_code; \
2171 cmp_status->old_form = 0; \
2172 cmp_status->method = method; \
2173 if (method == COMPOSITION_RELATIVE) \
2174 cmp_status->state = COMPOSING_CHAR; \
2175 else \
2176 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2177 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2178 cmp_status->nchars = nchars; \
2179 cmp_status->ncomps = nbytes - 4; \
2180 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
2181 } while (0)
2182
2183
2184
2185
2186 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2187 do { \
2188 cmp_status->old_form = 1; \
2189 cmp_status->method = COMPOSITION_RELATIVE; \
2190 cmp_status->state = COMPOSING_CHAR; \
2191 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2192 cmp_status->nchars = cmp_status->ncomps = 0; \
2193 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2194 } while (0)
2195
2196
2197
2198
2199 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2200 do { \
2201 cmp_status->old_form = 1; \
2202 cmp_status->method = COMPOSITION_WITH_RULE; \
2203 cmp_status->state = COMPOSING_CHAR; \
2204 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2205 cmp_status->nchars = cmp_status->ncomps = 0; \
2206 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2207 } while (0)
2208
2209
2210 #define DECODE_EMACS_MULE_COMPOSITION_START() \
2211 do { \
2212 const unsigned char *current_src = src; \
2213 \
2214 ONE_MORE_BYTE (c); \
2215 if (c < 0) \
2216 goto invalid_code; \
2217 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2218 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2219 DECODE_EMACS_MULE_21_COMPOSITION (); \
2220 else if (c < 0xA0) \
2221 goto invalid_code; \
2222 else if (c < 0xC0) \
2223 { \
2224 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2225 \
2226 src = current_src; \
2227 } \
2228 else if (c == 0xFF) \
2229 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2230 else \
2231 goto invalid_code; \
2232 } while (0)
2233
2234 #define EMACS_MULE_COMPOSITION_END() \
2235 do { \
2236 int idx = - cmp_status->length; \
2237 \
2238 if (cmp_status->old_form) \
2239 charbuf[idx + 2] = cmp_status->nchars; \
2240 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2241 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2242 cmp_status->state = COMPOSING_NO; \
2243 } while (0)
2244
2245
2246 static int
2247 emacs_mule_finish_composition (int *charbuf,
2248 struct composition_status *cmp_status)
2249 {
2250 int idx = - cmp_status->length;
2251 int new_chars;
2252
2253 if (cmp_status->old_form && cmp_status->nchars > 0)
2254 {
2255 charbuf[idx + 2] = cmp_status->nchars;
2256 new_chars = 0;
2257 if (cmp_status->method == COMPOSITION_WITH_RULE
2258 && cmp_status->state == COMPOSING_CHAR)
2259 {
2260
2261 int rule = charbuf[-1] + 0xA0;
2262
2263 charbuf[-2] = BYTE8_TO_CHAR (rule);
2264 charbuf[-1] = -1;
2265 new_chars = 1;
2266 }
2267 }
2268 else
2269 {
2270 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2271
2272 if (cmp_status->method == COMPOSITION_WITH_RULE)
2273 {
2274 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2275 charbuf[idx++] = -3;
2276 charbuf[idx++] = 0;
2277 new_chars = 1;
2278 }
2279 else
2280 {
2281 int nchars = charbuf[idx + 1] + 0xA0;
2282 int nbytes = charbuf[idx + 2] + 0xA0;
2283
2284 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2285 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2286 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2287 charbuf[idx++] = -1;
2288 new_chars = 4;
2289 }
2290 }
2291 cmp_status->state = COMPOSING_NO;
2292 return new_chars;
2293 }
2294
2295 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2296 do { \
2297 if (cmp_status->state != COMPOSING_NO) \
2298 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2299 } while (0)
2300
2301
2302 static void
2303 decode_coding_emacs_mule (struct coding_system *coding)
2304 {
2305 const unsigned char *src = coding->source + coding->consumed;
2306 const unsigned char *src_end = coding->source + coding->src_bytes;
2307 const unsigned char *src_base;
2308 int *charbuf = coding->charbuf + coding->charbuf_used;
2309
2310
2311 int *charbuf_end
2312 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2313
2314 - 1;
2315 ptrdiff_t consumed_chars = 0, consumed_chars_base;
2316 bool multibytep = coding->src_multibyte;
2317 ptrdiff_t char_offset = coding->produced_char;
2318 ptrdiff_t last_offset = char_offset;
2319 int last_id = charset_ascii;
2320 bool eol_dos
2321 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2322 int byte_after_cr = -1;
2323 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2324
2325 if (cmp_status->state != COMPOSING_NO)
2326 {
2327 int i;
2328
2329 if (charbuf_end - charbuf < cmp_status->length)
2330 emacs_abort ();
2331 for (i = 0; i < cmp_status->length; i++)
2332 *charbuf++ = cmp_status->carryover[i];
2333 coding->annotated = 1;
2334 }
2335
2336 while (1)
2337 {
2338 int c;
2339 int id UNINIT;
2340
2341 src_base = src;
2342 consumed_chars_base = consumed_chars;
2343
2344 if (charbuf >= charbuf_end)
2345 {
2346 if (byte_after_cr >= 0)
2347 src_base--;
2348 break;
2349 }
2350
2351 if (byte_after_cr >= 0)
2352 c = byte_after_cr, byte_after_cr = -1;
2353 else
2354 ONE_MORE_BYTE (c);
2355
2356 if (c < 0 || c == 0x80)
2357 {
2358 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2359 if (c < 0)
2360 {
2361 *charbuf++ = -c;
2362 char_offset++;
2363 }
2364 else
2365 DECODE_EMACS_MULE_COMPOSITION_START ();
2366 continue;
2367 }
2368
2369 if (c < 0x80)
2370 {
2371 if (eol_dos && c == '\r')
2372 ONE_MORE_BYTE (byte_after_cr);
2373 id = charset_ascii;
2374 if (cmp_status->state != COMPOSING_NO)
2375 {
2376 if (cmp_status->old_form)
2377 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2378 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2379 cmp_status->ncomps--;
2380 }
2381 }
2382 else
2383 {
2384 int nchars UNINIT, nbytes UNINIT;
2385
2386
2387
2388
2389
2390 const unsigned char *orig = coding->source;
2391 ptrdiff_t offset;
2392
2393 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2394 cmp_status);
2395 offset = coding->source - orig;
2396 if (offset)
2397 {
2398 src += offset;
2399 src_base += offset;
2400 src_end += offset;
2401 }
2402 if (c < 0)
2403 {
2404 if (c == -1)
2405 goto invalid_code;
2406 if (c == -2)
2407 break;
2408 }
2409 src = src_base + nbytes;
2410 consumed_chars = consumed_chars_base + nchars;
2411 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2412 cmp_status->ncomps -= nchars;
2413 }
2414
2415
2416
2417
2418
2419 if (cmp_status->state == COMPOSING_NO)
2420 {
2421 if (last_id != id)
2422 {
2423 if (last_id != charset_ascii)
2424 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2425 last_id);
2426 last_id = id;
2427 last_offset = char_offset;
2428 }
2429 *charbuf++ = c;
2430 char_offset++;
2431 }
2432 else if (cmp_status->state == COMPOSING_CHAR)
2433 {
2434 if (cmp_status->old_form)
2435 {
2436 if (c >= 0)
2437 {
2438 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2439 *charbuf++ = c;
2440 char_offset++;
2441 }
2442 else
2443 {
2444 *charbuf++ = -c;
2445 cmp_status->nchars++;
2446 cmp_status->length++;
2447 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2448 EMACS_MULE_COMPOSITION_END ();
2449 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2450 cmp_status->state = COMPOSING_RULE;
2451 }
2452 }
2453 else
2454 {
2455 *charbuf++ = c;
2456 cmp_status->length++;
2457 cmp_status->nchars--;
2458 if (cmp_status->nchars == 0)
2459 EMACS_MULE_COMPOSITION_END ();
2460 }
2461 }
2462 else if (cmp_status->state == COMPOSING_RULE)
2463 {
2464 int rule;
2465
2466 if (c >= 0)
2467 {
2468 EMACS_MULE_COMPOSITION_END ();
2469 *charbuf++ = c;
2470 char_offset++;
2471 }
2472 else
2473 {
2474 c = -c;
2475 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2476 if (rule < 0)
2477 goto invalid_code;
2478 *charbuf++ = -2;
2479 *charbuf++ = rule;
2480 cmp_status->length += 2;
2481 cmp_status->state = COMPOSING_CHAR;
2482 }
2483 }
2484 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2485 {
2486 *charbuf++ = c;
2487 cmp_status->length++;
2488 if (cmp_status->ncomps == 0)
2489 cmp_status->state = COMPOSING_CHAR;
2490 else if (cmp_status->ncomps > 0)
2491 {
2492 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2493 cmp_status->state = COMPOSING_COMPONENT_RULE;
2494 }
2495 else
2496 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2497 }
2498 else
2499 {
2500 int rule;
2501
2502 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2503 if (rule < 0)
2504 goto invalid_code;
2505 *charbuf++ = -2;
2506 *charbuf++ = rule;
2507 cmp_status->length += 2;
2508 cmp_status->ncomps--;
2509 if (cmp_status->ncomps > 0)
2510 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2511 else
2512 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2513 }
2514 continue;
2515
2516 invalid_code:
2517 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2518 src = src_base;
2519 consumed_chars = consumed_chars_base;
2520 ONE_MORE_BYTE (c);
2521 *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
2522 char_offset++;
2523 }
2524
2525 no_more_source:
2526 if (cmp_status->state != COMPOSING_NO)
2527 {
2528 if (coding->mode & CODING_MODE_LAST_BLOCK)
2529 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2530 else
2531 {
2532 int i;
2533
2534 charbuf -= cmp_status->length;
2535 for (i = 0; i < cmp_status->length; i++)
2536 cmp_status->carryover[i] = charbuf[i];
2537 }
2538 }
2539 if (last_id != charset_ascii)
2540 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2541 coding->consumed_char += consumed_chars_base;
2542 coding->consumed = src_base - coding->source;
2543 coding->charbuf_used = charbuf - coding->charbuf;
2544 }
2545
2546
2547 #define EMACS_MULE_LEADING_CODES(id, codes) \
2548 do { \
2549 if (id < 0xA0) \
2550 codes[0] = id, codes[1] = 0; \
2551 else if (id < 0xE0) \
2552 codes[0] = 0x9A, codes[1] = id; \
2553 else if (id < 0xF0) \
2554 codes[0] = 0x9B, codes[1] = id; \
2555 else if (id < 0xF5) \
2556 codes[0] = 0x9C, codes[1] = id; \
2557 else \
2558 codes[0] = 0x9D, codes[1] = id; \
2559 } while (0);
2560
2561
2562 static bool
2563 encode_coding_emacs_mule (struct coding_system *coding)
2564 {
2565 bool multibytep = coding->dst_multibyte;
2566 int *charbuf = coding->charbuf;
2567 int *charbuf_end = charbuf + coding->charbuf_used;
2568 unsigned char *dst = coding->destination + coding->produced;
2569 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2570 int safe_room = 8;
2571 ptrdiff_t produced_chars = 0;
2572 Lisp_Object attrs, charset_list;
2573 int c;
2574 int preferred_charset_id = -1;
2575
2576 CODING_GET_INFO (coding, attrs, charset_list);
2577 if (! EQ (charset_list, Vemacs_mule_charset_list))
2578 {
2579 charset_list = Vemacs_mule_charset_list;
2580 ASET (attrs, coding_attr_charset_list, charset_list);
2581 }
2582
2583 while (charbuf < charbuf_end)
2584 {
2585 ASSURE_DESTINATION (safe_room);
2586 c = *charbuf++;
2587
2588 if (c < 0)
2589 {
2590
2591 switch (*charbuf)
2592 {
2593 case CODING_ANNOTATE_COMPOSITION_MASK:
2594
2595 break;
2596 case CODING_ANNOTATE_CHARSET_MASK:
2597 preferred_charset_id = charbuf[3];
2598 if (preferred_charset_id >= 0
2599 && NILP (Fmemq (make_fixnum (preferred_charset_id),
2600 charset_list)))
2601 preferred_charset_id = -1;
2602 break;
2603 default:
2604 emacs_abort ();
2605 }
2606 charbuf += -c - 1;
2607 continue;
2608 }
2609
2610 if (ASCII_CHAR_P (c))
2611 EMIT_ONE_ASCII_BYTE (c);
2612 else if (CHAR_BYTE8_P (c))
2613 {
2614 c = CHAR_TO_BYTE8 (c);
2615 EMIT_ONE_BYTE (c);
2616 }
2617 else
2618 {
2619 struct charset *charset;
2620 unsigned code;
2621 int dimension;
2622 int emacs_mule_id;
2623 unsigned char leading_codes[2];
2624
2625 if (preferred_charset_id >= 0)
2626 {
2627 bool result;
2628
2629 charset = CHARSET_FROM_ID (preferred_charset_id);
2630 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2631 if (result)
2632 code = ENCODE_CHAR (charset, c);
2633 else
2634 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2635 &code, charset);
2636 }
2637 else
2638 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2639 &code, charset);
2640 if (! charset)
2641 {
2642 c = coding->default_char;
2643 if (ASCII_CHAR_P (c))
2644 {
2645 EMIT_ONE_ASCII_BYTE (c);
2646 continue;
2647 }
2648 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2649 &code, charset);
2650 }
2651 dimension = CHARSET_DIMENSION (charset);
2652 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2653 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2654 EMIT_ONE_BYTE (leading_codes[0]);
2655 if (leading_codes[1])
2656 EMIT_ONE_BYTE (leading_codes[1]);
2657 if (dimension == 1)
2658 EMIT_ONE_BYTE (code | 0x80);
2659 else
2660 {
2661 code |= 0x8080;
2662 EMIT_ONE_BYTE (code >> 8);
2663 EMIT_ONE_BYTE (code & 0xFF);
2664 }
2665 }
2666 }
2667 record_conversion_result (coding, CODING_RESULT_SUCCESS);
2668 coding->produced_char += produced_chars;
2669 coding->produced = dst - coding->destination;
2670 return 0;
2671 }
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851 static enum iso_code_class_type iso_code_class[256];
2852
2853 #define SAFE_CHARSET_P(coding, id) \
2854 ((id) <= (coding)->max_charset_id \
2855 && (coding)->safe_charsets[id] != 255)
2856
2857 static void
2858 setup_iso_safe_charsets (Lisp_Object attrs)
2859 {
2860 Lisp_Object charset_list, safe_charsets;
2861 Lisp_Object request;
2862 Lisp_Object reg_usage;
2863 Lisp_Object tail;
2864 EMACS_INT reg94, reg96;
2865 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
2866 int max_charset_id;
2867
2868 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2869 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2870 && ! EQ (charset_list, Viso_2022_charset_list))
2871 {
2872 charset_list = Viso_2022_charset_list;
2873 ASET (attrs, coding_attr_charset_list, charset_list);
2874 ASET (attrs, coding_attr_safe_charsets, Qnil);
2875 }
2876
2877 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2878 return;
2879
2880 max_charset_id = 0;
2881 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2882 {
2883 int id = XFIXNUM (XCAR (tail));
2884 if (max_charset_id < id)
2885 max_charset_id = id;
2886 }
2887
2888 safe_charsets = make_uninit_string (max_charset_id + 1);
2889 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2890 request = AREF (attrs, coding_attr_iso_request);
2891 reg_usage = AREF (attrs, coding_attr_iso_usage);
2892 reg94 = XFIXNUM (XCAR (reg_usage));
2893 reg96 = XFIXNUM (XCDR (reg_usage));
2894
2895 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2896 {
2897 Lisp_Object id;
2898 Lisp_Object reg;
2899 struct charset *charset;
2900
2901 id = XCAR (tail);
2902 charset = CHARSET_FROM_ID (XFIXNUM (id));
2903 reg = Fcdr (Fassq (id, request));
2904 if (! NILP (reg))
2905 SSET (safe_charsets, XFIXNUM (id), XFIXNUM (reg));
2906 else if (charset->iso_chars_96)
2907 {
2908 if (reg96 < 4)
2909 SSET (safe_charsets, XFIXNUM (id), reg96);
2910 }
2911 else
2912 {
2913 if (reg94 < 4)
2914 SSET (safe_charsets, XFIXNUM (id), reg94);
2915 }
2916 }
2917 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2918 }
2919
2920
2921
2922
2923
2924
2925 static bool
2926 detect_coding_iso_2022 (struct coding_system *coding,
2927 struct coding_detection_info *detect_info)
2928 {
2929 const unsigned char *src = coding->source, *src_base = src;
2930 const unsigned char *src_end = coding->source + coding->src_bytes;
2931 bool multibytep = coding->src_multibyte;
2932 bool single_shifting = 0;
2933 int id;
2934 int c, c1;
2935 ptrdiff_t consumed_chars = 0;
2936 int i;
2937 int rejected = 0;
2938 int found = 0;
2939 int composition_count = -1;
2940
2941 detect_info->checked |= CATEGORY_MASK_ISO;
2942
2943 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2944 {
2945 struct coding_system *this = &(coding_categories[i]);
2946 Lisp_Object attrs, val;
2947
2948 if (this->id < 0)
2949 continue;
2950 attrs = CODING_ID_ATTRS (this->id);
2951 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2952 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
2953 setup_iso_safe_charsets (attrs);
2954 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2955 this->max_charset_id = SCHARS (val) - 1;
2956 this->safe_charsets = SDATA (val);
2957 }
2958
2959
2960 src += coding->head_ascii;
2961
2962 while (rejected != CATEGORY_MASK_ISO)
2963 {
2964 src_base = src;
2965 ONE_MORE_BYTE (c);
2966 switch (c)
2967 {
2968 case ISO_CODE_ESC:
2969 if (inhibit_iso_escape_detection)
2970 break;
2971 single_shifting = 0;
2972 ONE_MORE_BYTE (c);
2973 if (c == 'N' || c == 'O')
2974 {
2975
2976 single_shifting = 1;
2977 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2978 }
2979 else if (c == '1')
2980 {
2981
2982 if (composition_count < 0
2983 || composition_count > MAX_COMPOSITION_COMPONENTS)
2984
2985 break;
2986 composition_count = -1;
2987 found |= CATEGORY_MASK_ISO;
2988 }
2989 else if (c >= '0' && c <= '4')
2990 {
2991
2992 composition_count = 0;
2993 }
2994 else
2995 {
2996 if (c >= '(' && c <= '/')
2997 {
2998
2999 ONE_MORE_BYTE (c1);
3000 if (c1 < ' ' || c1 >= 0x80
3001 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3002 {
3003
3004 if (c1 >= 0x80)
3005 rejected |= (CATEGORY_MASK_ISO_7BIT
3006 | CATEGORY_MASK_ISO_7_ELSE);
3007 break;
3008 }
3009 }
3010 else if (c == '$')
3011 {
3012
3013 ONE_MORE_BYTE (c);
3014 if (c >= '@' && c <= 'B')
3015
3016 id = iso_charset_table[1][0][c];
3017 else if (c >= '(' && c <= '/')
3018 {
3019 ONE_MORE_BYTE (c1);
3020 if (c1 < ' ' || c1 >= 0x80
3021 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3022 {
3023
3024 if (c1 >= 0x80)
3025 rejected |= (CATEGORY_MASK_ISO_7BIT
3026 | CATEGORY_MASK_ISO_7_ELSE);
3027 break;
3028 }
3029 }
3030 else
3031 {
3032
3033 if (c >= 0x80)
3034 rejected |= (CATEGORY_MASK_ISO_7BIT
3035 | CATEGORY_MASK_ISO_7_ELSE);
3036 break;
3037 }
3038 }
3039 else
3040 {
3041
3042 if (c >= 0x80)
3043 rejected |= (CATEGORY_MASK_ISO_7BIT
3044 | CATEGORY_MASK_ISO_7_ELSE);
3045 break;
3046 }
3047
3048
3049 rejected |= CATEGORY_MASK_ISO_8BIT;
3050 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3051 id))
3052 found |= CATEGORY_MASK_ISO_7;
3053 else
3054 rejected |= CATEGORY_MASK_ISO_7;
3055 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3056 id))
3057 found |= CATEGORY_MASK_ISO_7_TIGHT;
3058 else
3059 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3060 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3061 id))
3062 found |= CATEGORY_MASK_ISO_7_ELSE;
3063 else
3064 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3065 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3066 id))
3067 found |= CATEGORY_MASK_ISO_8_ELSE;
3068 else
3069 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3070 }
3071 break;
3072
3073 case ISO_CODE_SO:
3074 case ISO_CODE_SI:
3075
3076 if (inhibit_iso_escape_detection)
3077 break;
3078 single_shifting = 0;
3079 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3080 break;
3081
3082 case ISO_CODE_CSI:
3083
3084 single_shifting = 0;
3085 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3086 found |= CATEGORY_MASK_ISO_8_ELSE;
3087 goto check_extra_latin;
3088
3089 case ISO_CODE_SS2:
3090 case ISO_CODE_SS3:
3091
3092 if (inhibit_iso_escape_detection)
3093 break;
3094 single_shifting = 0;
3095 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3096 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3097 & CODING_ISO_FLAG_SINGLE_SHIFT)
3098 {
3099 found |= CATEGORY_MASK_ISO_8_1;
3100 single_shifting = 1;
3101 }
3102 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3103 & CODING_ISO_FLAG_SINGLE_SHIFT)
3104 {
3105 found |= CATEGORY_MASK_ISO_8_2;
3106 single_shifting = 1;
3107 }
3108 if (single_shifting)
3109 break;
3110 goto check_extra_latin;
3111
3112 default:
3113 if (c < 0)
3114 continue;
3115 if (c < 0x80)
3116 {
3117 if (composition_count >= 0)
3118 composition_count++;
3119 single_shifting = 0;
3120 break;
3121 }
3122 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3123 if (c >= 0xA0)
3124 {
3125 found |= CATEGORY_MASK_ISO_8_1;
3126
3127
3128
3129
3130 if (! single_shifting
3131 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3132 {
3133 ptrdiff_t len = 1;
3134 while (src < src_end)
3135 {
3136 src_base = src;
3137 ONE_MORE_BYTE (c);
3138 if (c < 0xA0)
3139 {
3140 src = src_base;
3141 break;
3142 }
3143 len++;
3144 }
3145
3146 if (len & 1 && src < src_end)
3147 {
3148 rejected |= CATEGORY_MASK_ISO_8_2;
3149 if (composition_count >= 0)
3150 composition_count += len;
3151 }
3152 else
3153 {
3154 found |= CATEGORY_MASK_ISO_8_2;
3155 if (composition_count >= 0)
3156 composition_count += len / 2;
3157 }
3158 }
3159 break;
3160 }
3161 check_extra_latin:
3162 if (! VECTORP (Vlatin_extra_code_table)
3163 || NILP (AREF (Vlatin_extra_code_table, c)))
3164 {
3165 rejected = CATEGORY_MASK_ISO;
3166 break;
3167 }
3168 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3169 & CODING_ISO_FLAG_LATIN_EXTRA)
3170 found |= CATEGORY_MASK_ISO_8_1;
3171 else
3172 rejected |= CATEGORY_MASK_ISO_8_1;
3173 rejected |= CATEGORY_MASK_ISO_8_2;
3174 break;
3175 }
3176 }
3177 detect_info->rejected |= CATEGORY_MASK_ISO;
3178 return 0;
3179
3180 no_more_source:
3181 detect_info->rejected |= rejected;
3182 detect_info->found |= (found & ~rejected);
3183 return 1;
3184 }
3185
3186
3187
3188
3189 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3190 do { \
3191 int id, prev; \
3192 \
3193 if (final < '0' || final >= 128 \
3194 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3195 || !SAFE_CHARSET_P (coding, id)) \
3196 { \
3197 CODING_ISO_DESIGNATION (coding, reg) = -2; \
3198 chars_96 = -1; \
3199 break; \
3200 } \
3201 prev = CODING_ISO_DESIGNATION (coding, reg); \
3202 if (id == charset_jisx0201_roman) \
3203 { \
3204 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3205 id = charset_ascii; \
3206 } \
3207 else if (id == charset_jisx0208_1978) \
3208 { \
3209 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3210 id = charset_jisx0208; \
3211 } \
3212 CODING_ISO_DESIGNATION (coding, reg) = id; \
3213
3214
3215 \
3216 if (prev == -2 && id == charset_ascii) \
3217 chars_96 = -1; \
3218 } while (0)
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267 #define DECODE_COMPOSITION_RULE(rule) \
3268 do { \
3269 rule = c1 - 32; \
3270 if (rule < 0) \
3271 goto invalid_code; \
3272 if (rule < 81) \
3273 { \
3274 int gref = (rule) / 9; \
3275 int nref = (rule) % 9; \
3276 if (gref == 4) gref = 10; \
3277 if (nref == 4) nref = 10; \
3278 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3279 } \
3280 else \
3281 { \
3282 int b; \
3283 \
3284 ONE_MORE_BYTE (b); \
3285 if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32)) \
3286 goto invalid_code; \
3287 rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \
3288 rule += 0x100; \
3289 } \
3290 } while (0)
3291
3292 #define ENCODE_COMPOSITION_RULE(rule) \
3293 do { \
3294 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3295 \
3296 if (rule < 0x100) \
3297 { \
3298 if (gref == 10) gref = 4; \
3299 if (nref == 10) nref = 4; \
3300 charbuf[idx] = 32 + gref * 9 + nref; \
3301 charbuf[idx + 1] = -1; \
3302 new_chars++; \
3303 } \
3304 else \
3305 { \
3306 charbuf[idx] = 32 + 81 + gref; \
3307 charbuf[idx + 1] = 32 + nref; \
3308 new_chars += 2; \
3309 } \
3310 } while (0)
3311
3312
3313
3314 static int
3315 finish_composition (int *charbuf, struct composition_status *cmp_status)
3316 {
3317 int idx = - cmp_status->length;
3318 int new_chars;
3319
3320
3321 charbuf[idx++] = ISO_CODE_ESC;
3322 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3323 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3324 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3325
3326 : '4');
3327 charbuf[idx++] = -2;
3328 charbuf[idx++] = 0;
3329 charbuf[idx++] = -1;
3330 new_chars = cmp_status->nchars;
3331 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3332 for (; idx < 0; idx++)
3333 {
3334 int elt = charbuf[idx];
3335
3336 if (elt == -2)
3337 {
3338 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3339 idx++;
3340 }
3341 else if (elt == -1)
3342 {
3343 charbuf[idx++] = ISO_CODE_ESC;
3344 charbuf[idx] = '0';
3345 new_chars += 2;
3346 }
3347 }
3348 cmp_status->state = COMPOSING_NO;
3349 return new_chars;
3350 }
3351
3352
3353 #define MAYBE_FINISH_COMPOSITION() \
3354 do { \
3355 if (cmp_status->state != COMPOSING_NO) \
3356 char_offset += finish_composition (charbuf, cmp_status); \
3357 } while (0)
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371 #define DECODE_COMPOSITION_START(c1) \
3372 do { \
3373 if (c1 == '0' \
3374 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3375 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3376 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3377 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3378 { \
3379 *charbuf++ = -1; \
3380 *charbuf++= -1; \
3381 cmp_status->state = COMPOSING_CHAR; \
3382 cmp_status->length += 2; \
3383 } \
3384 else \
3385 { \
3386 MAYBE_FINISH_COMPOSITION (); \
3387 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3388 : c1 == '2' ? COMPOSITION_WITH_RULE \
3389 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3390 : COMPOSITION_WITH_RULE_ALTCHARS); \
3391 cmp_status->state \
3392 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3393 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3394 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3395 cmp_status->nchars = cmp_status->ncomps = 0; \
3396 coding->annotated = 1; \
3397 } \
3398 } while (0)
3399
3400
3401
3402
3403 #define DECODE_COMPOSITION_END() \
3404 do { \
3405 if (cmp_status->nchars == 0 \
3406 || ((cmp_status->state == COMPOSING_CHAR) \
3407 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
3408 { \
3409 MAYBE_FINISH_COMPOSITION (); \
3410 goto invalid_code; \
3411 } \
3412 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3413 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3414 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3415 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3416 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3417 char_offset += cmp_status->nchars; \
3418 cmp_status->state = COMPOSING_NO; \
3419 } while (0)
3420
3421
3422
3423 #define STORE_COMPOSITION_RULE(rule) \
3424 do { \
3425 *charbuf++ = -2; \
3426 *charbuf++ = rule; \
3427 cmp_status->length += 2; \
3428 cmp_status->state--; \
3429 } while (0)
3430
3431
3432
3433
3434 #define STORE_COMPOSITION_CHAR(c) \
3435 do { \
3436 *charbuf++ = (c); \
3437 cmp_status->length++; \
3438 if (cmp_status->state == COMPOSING_CHAR) \
3439 cmp_status->nchars++; \
3440 else \
3441 cmp_status->ncomps++; \
3442 if (cmp_status->method == COMPOSITION_WITH_RULE \
3443 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3444 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3445 cmp_status->state++; \
3446 } while (0)
3447
3448
3449
3450
3451 static void
3452 decode_coding_iso_2022 (struct coding_system *coding)
3453 {
3454 const unsigned char *src = coding->source + coding->consumed;
3455 const unsigned char *src_end = coding->source + coding->src_bytes;
3456 const unsigned char *src_base;
3457 int *charbuf = coding->charbuf + coding->charbuf_used;
3458
3459
3460 int *charbuf_end
3461 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3462 ptrdiff_t consumed_chars = 0, consumed_chars_base;
3463 bool multibytep = coding->src_multibyte;
3464
3465 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3466 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3467 int charset_id_2, charset_id_3;
3468 struct charset *charset;
3469 int c;
3470 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3471 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3472 ptrdiff_t char_offset = coding->produced_char;
3473 ptrdiff_t last_offset = char_offset;
3474 int last_id = charset_ascii;
3475 bool eol_dos
3476 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3477 int byte_after_cr = -1;
3478 int i;
3479
3480 setup_iso_safe_charsets (attrs);
3481 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3482
3483 if (cmp_status->state != COMPOSING_NO)
3484 {
3485 if (charbuf_end - charbuf < cmp_status->length)
3486 emacs_abort ();
3487 for (i = 0; i < cmp_status->length; i++)
3488 *charbuf++ = cmp_status->carryover[i];
3489 coding->annotated = 1;
3490 }
3491
3492 while (1)
3493 {
3494 int c1, c2, c3;
3495
3496 src_base = src;
3497 consumed_chars_base = consumed_chars;
3498
3499 if (charbuf >= charbuf_end)
3500 {
3501 if (byte_after_cr >= 0)
3502 src_base--;
3503 break;
3504 }
3505
3506 if (byte_after_cr >= 0)
3507 c1 = byte_after_cr, byte_after_cr = -1;
3508 else
3509 ONE_MORE_BYTE (c1);
3510 if (c1 < 0)
3511 goto invalid_code;
3512
3513 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3514 {
3515 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3516 char_offset++;
3517 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3518 continue;
3519 }
3520
3521 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3522 {
3523 if (c1 == ISO_CODE_ESC)
3524 {
3525 if (src + 1 >= src_end)
3526 goto no_more_source;
3527 *charbuf++ = ISO_CODE_ESC;
3528 char_offset++;
3529 if (src[0] == '%' && src[1] == '@')
3530 {
3531 src += 2;
3532 consumed_chars += 2;
3533 char_offset += 2;
3534
3535 *charbuf++ = '%';
3536 *charbuf++ = '@';
3537 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3538 }
3539 }
3540 else
3541 {
3542 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3543 char_offset++;
3544 }
3545 continue;
3546 }
3547
3548 if ((cmp_status->state == COMPOSING_RULE
3549 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3550 && c1 != ISO_CODE_ESC)
3551 {
3552 int rule;
3553
3554 DECODE_COMPOSITION_RULE (rule);
3555 STORE_COMPOSITION_RULE (rule);
3556 continue;
3557 }
3558
3559
3560 switch (iso_code_class [c1])
3561 {
3562 case ISO_0x20_or_0x7F:
3563 if (charset_id_0 < 0
3564 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3565
3566 charset = CHARSET_FROM_ID (charset_ascii);
3567 else
3568 charset = CHARSET_FROM_ID (charset_id_0);
3569 break;
3570
3571 case ISO_graphic_plane_0:
3572 if (charset_id_0 < 0)
3573 charset = CHARSET_FROM_ID (charset_ascii);
3574 else
3575 charset = CHARSET_FROM_ID (charset_id_0);
3576 break;
3577
3578 case ISO_0xA0_or_0xFF:
3579 if (charset_id_1 < 0
3580 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3581 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3582 goto invalid_code;
3583
3584 FALLTHROUGH;
3585 case ISO_graphic_plane_1:
3586 if (charset_id_1 < 0)
3587 goto invalid_code;
3588 charset = CHARSET_FROM_ID (charset_id_1);
3589 break;
3590
3591 case ISO_control_0:
3592 if (eol_dos && c1 == '\r')
3593 ONE_MORE_BYTE (byte_after_cr);
3594 MAYBE_FINISH_COMPOSITION ();
3595 charset = CHARSET_FROM_ID (charset_ascii);
3596 break;
3597
3598 case ISO_control_1:
3599 goto invalid_code;
3600
3601 case ISO_shift_out:
3602 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3603 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3604 goto invalid_code;
3605 CODING_ISO_INVOCATION (coding, 0) = 1;
3606 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3607 continue;
3608
3609 case ISO_shift_in:
3610 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3611 goto invalid_code;
3612 CODING_ISO_INVOCATION (coding, 0) = 0;
3613 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3614 continue;
3615
3616 case ISO_single_shift_2_7:
3617 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3618 goto invalid_code;
3619 FALLTHROUGH;
3620 case ISO_single_shift_2:
3621 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3622 goto invalid_code;
3623
3624 c1 = 'N';
3625 goto label_escape_sequence;
3626
3627 case ISO_single_shift_3:
3628 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3629 goto invalid_code;
3630
3631 c1 = 'O';
3632 goto label_escape_sequence;
3633
3634 case ISO_control_sequence_introducer:
3635
3636 c1 = '[';
3637 goto label_escape_sequence;
3638
3639 case ISO_escape:
3640 ONE_MORE_BYTE (c1);
3641 label_escape_sequence:
3642
3643
3644
3645 switch (c1)
3646 {
3647 case '&':
3648 ONE_MORE_BYTE (c1);
3649 if (!(c1 >= '@' && c1 <= '~'))
3650 goto invalid_code;
3651 ONE_MORE_BYTE (c1);
3652 if (c1 != ISO_CODE_ESC)
3653 goto invalid_code;
3654 ONE_MORE_BYTE (c1);
3655 goto label_escape_sequence;
3656
3657 case '$':
3658 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3659 goto invalid_code;
3660 {
3661 int reg, chars96;
3662
3663 ONE_MORE_BYTE (c1);
3664 if (c1 >= '@' && c1 <= 'B')
3665 {
3666
3667 reg = 0, chars96 = 0;
3668 }
3669 else if (c1 >= 0x28 && c1 <= 0x2B)
3670 {
3671 reg = c1 - 0x28, chars96 = 0;
3672 ONE_MORE_BYTE (c1);
3673 }
3674 else if (c1 >= 0x2C && c1 <= 0x2F)
3675 {
3676 reg = c1 - 0x2C, chars96 = 1;
3677 ONE_MORE_BYTE (c1);
3678 }
3679 else
3680 goto invalid_code;
3681 DECODE_DESIGNATION (reg, 2, chars96, c1);
3682
3683 if (reg == 0)
3684 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3685 else if (reg == 1)
3686 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3687 if (chars96 < 0)
3688 goto invalid_code;
3689 }
3690 continue;
3691
3692 case 'n':
3693 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3694 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3695 goto invalid_code;
3696 CODING_ISO_INVOCATION (coding, 0) = 2;
3697 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3698 continue;
3699
3700 case 'o':
3701 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3702 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3703 goto invalid_code;
3704 CODING_ISO_INVOCATION (coding, 0) = 3;
3705 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3706 continue;
3707
3708 case 'N':
3709 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3710 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3711 goto invalid_code;
3712 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3713 if (charset_id_2 < 0)
3714 charset = CHARSET_FROM_ID (charset_ascii);
3715 else
3716 charset = CHARSET_FROM_ID (charset_id_2);
3717 ONE_MORE_BYTE (c1);
3718 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3719 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3720 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3721 ? c1 >= 0x80 : c1 < 0x80)))
3722 goto invalid_code;
3723 break;
3724
3725 case 'O':
3726 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3727 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3728 goto invalid_code;
3729 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3730 if (charset_id_3 < 0)
3731 charset = CHARSET_FROM_ID (charset_ascii);
3732 else
3733 charset = CHARSET_FROM_ID (charset_id_3);
3734 ONE_MORE_BYTE (c1);
3735 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3736 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3737 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3738 ? c1 >= 0x80 : c1 < 0x80)))
3739 goto invalid_code;
3740 break;
3741
3742 case '0': case '2': case '3': case '4':
3743 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3744 goto invalid_code;
3745 if (last_id != charset_ascii)
3746 {
3747 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3748 last_id = charset_ascii;
3749 last_offset = char_offset;
3750 }
3751 DECODE_COMPOSITION_START (c1);
3752 continue;
3753
3754 case '1':
3755 if (cmp_status->state == COMPOSING_NO)
3756 goto invalid_code;
3757 DECODE_COMPOSITION_END ();
3758 continue;
3759
3760 case '[':
3761 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3762 goto invalid_code;
3763
3764
3765
3766 ONE_MORE_BYTE (c1);
3767 switch (c1)
3768 {
3769 case ']':
3770 coding->mode &= ~CODING_MODE_DIRECTION;
3771 break;
3772
3773 case '0':
3774 case '1':
3775 ONE_MORE_BYTE (c1);
3776 if (c1 == ']')
3777 coding->mode &= ~CODING_MODE_DIRECTION;
3778 else
3779 goto invalid_code;
3780 break;
3781
3782 case '2':
3783 ONE_MORE_BYTE (c1);
3784 if (c1 == ']')
3785 coding->mode |= CODING_MODE_DIRECTION;
3786 else
3787 goto invalid_code;
3788 break;
3789
3790 default:
3791 goto invalid_code;
3792 }
3793 continue;
3794
3795 case '%':
3796 ONE_MORE_BYTE (c1);
3797 if (c1 == '/')
3798 {
3799
3800
3801
3802
3803 int dim, M, L;
3804 int size;
3805
3806 ONE_MORE_BYTE (dim);
3807 if (dim < '0' || dim > '4')
3808 goto invalid_code;
3809 ONE_MORE_BYTE (M);
3810 if (M < 128)
3811 goto invalid_code;
3812 ONE_MORE_BYTE (L);
3813 if (L < 128)
3814 goto invalid_code;
3815 size = ((M - 128) * 128) + (L - 128);
3816 if (charbuf + 6 > charbuf_end)
3817 goto break_loop;
3818 *charbuf++ = ISO_CODE_ESC;
3819 *charbuf++ = '%';
3820 *charbuf++ = '/';
3821 *charbuf++ = dim;
3822 *charbuf++ = BYTE8_TO_CHAR (M);
3823 *charbuf++ = BYTE8_TO_CHAR (L);
3824 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3825 }
3826 else if (c1 == 'G')
3827 {
3828
3829
3830
3831
3832 if (charbuf + 3 > charbuf_end)
3833 goto break_loop;
3834 *charbuf++ = ISO_CODE_ESC;
3835 *charbuf++ = '%';
3836 *charbuf++ = 'G';
3837 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3838 }
3839 else
3840 goto invalid_code;
3841 continue;
3842 break;
3843
3844 default:
3845 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3846 goto invalid_code;
3847 {
3848 int reg, chars96;
3849
3850 if (c1 >= 0x28 && c1 <= 0x2B)
3851 {
3852 reg = c1 - 0x28, chars96 = 0;
3853 ONE_MORE_BYTE (c1);
3854 }
3855 else if (c1 >= 0x2C && c1 <= 0x2F)
3856 {
3857 reg = c1 - 0x2C, chars96 = 1;
3858 ONE_MORE_BYTE (c1);
3859 }
3860 else
3861 goto invalid_code;
3862 DECODE_DESIGNATION (reg, 1, chars96, c1);
3863
3864 if (reg == 0)
3865 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3866 else if (reg == 1)
3867 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3868 if (chars96 < 0)
3869 goto invalid_code;
3870 }
3871 continue;
3872 }
3873 break;
3874
3875 default:
3876 emacs_abort ();
3877 }
3878
3879 if (cmp_status->state == COMPOSING_NO
3880 && charset->id != charset_ascii
3881 && last_id != charset->id)
3882 {
3883 if (last_id != charset_ascii)
3884 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3885 last_id = charset->id;
3886 last_offset = char_offset;
3887 }
3888
3889
3890
3891
3892 if (CHARSET_DIMENSION (charset) > 1)
3893 {
3894 ONE_MORE_BYTE (c2);
3895 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3896 || ((c1 & 0x80) != (c2 & 0x80)))
3897
3898 goto invalid_code;
3899 if (CHARSET_DIMENSION (charset) == 2)
3900 c1 = (c1 << 8) | c2;
3901 else
3902 {
3903 ONE_MORE_BYTE (c3);
3904 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3905 || ((c1 & 0x80) != (c3 & 0x80)))
3906
3907 goto invalid_code;
3908 c1 = (c1 << 16) | (c2 << 8) | c2;
3909 }
3910 }
3911 c1 &= 0x7F7F7F;
3912 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3913 if (c < 0)
3914 {
3915 MAYBE_FINISH_COMPOSITION ();
3916 for (; src_base < src; src_base++, char_offset++)
3917 {
3918 if (ASCII_CHAR_P (*src_base))
3919 *charbuf++ = *src_base;
3920 else
3921 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3922 }
3923 }
3924 else if (cmp_status->state == COMPOSING_NO)
3925 {
3926 *charbuf++ = c;
3927 char_offset++;
3928 }
3929 else if ((cmp_status->state == COMPOSING_CHAR
3930 ? cmp_status->nchars
3931 : cmp_status->ncomps)
3932 >= MAX_COMPOSITION_COMPONENTS)
3933 {
3934
3935 MAYBE_FINISH_COMPOSITION ();
3936 *charbuf++ = c;
3937 char_offset++;
3938 }
3939 else
3940 STORE_COMPOSITION_CHAR (c);
3941 continue;
3942
3943 invalid_code:
3944 MAYBE_FINISH_COMPOSITION ();
3945 src = src_base;
3946 consumed_chars = consumed_chars_base;
3947 ONE_MORE_BYTE (c);
3948 *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
3949 char_offset++;
3950
3951
3952
3953
3954
3955 CODING_ISO_INVOCATION (coding, 0) = 0;
3956 CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
3957 charset_id_0 = charset_ascii;
3958 continue;
3959
3960 break_loop:
3961 break;
3962 }
3963
3964 no_more_source:
3965 if (cmp_status->state != COMPOSING_NO)
3966 {
3967 if (coding->mode & CODING_MODE_LAST_BLOCK)
3968 MAYBE_FINISH_COMPOSITION ();
3969 else
3970 {
3971 charbuf -= cmp_status->length;
3972 for (i = 0; i < cmp_status->length; i++)
3973 cmp_status->carryover[i] = charbuf[i];
3974 }
3975 }
3976 else if (last_id != charset_ascii)
3977 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3978 coding->consumed_char += consumed_chars_base;
3979 coding->consumed = src_base - coding->source;
3980 coding->charbuf_used = charbuf - coding->charbuf;
3981 }
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010 #define ENCODE_DESIGNATION(charset, reg, coding) \
4011 do { \
4012 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4013 const char *intermediate_char_94 = "()*+"; \
4014 const char *intermediate_char_96 = ",-./"; \
4015 int revision = -1; \
4016 \
4017 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
4018 revision = CHARSET_ISO_REVISION (charset); \
4019 \
4020 if (revision >= 0) \
4021 { \
4022 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4023 EMIT_ONE_BYTE ('@' + revision); \
4024 } \
4025 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4026 if (CHARSET_DIMENSION (charset) == 1) \
4027 { \
4028 int b; \
4029 if (! CHARSET_ISO_CHARS_96 (charset)) \
4030 b = intermediate_char_94[reg]; \
4031 else \
4032 b = intermediate_char_96[reg]; \
4033 EMIT_ONE_ASCII_BYTE (b); \
4034 } \
4035 else \
4036 { \
4037 EMIT_ONE_ASCII_BYTE ('$'); \
4038 if (! CHARSET_ISO_CHARS_96 (charset)) \
4039 { \
4040 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
4041 || reg != 0 \
4042 || final_char < '@' || final_char > 'B') \
4043 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4044 } \
4045 else \
4046 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4047 } \
4048 EMIT_ONE_ASCII_BYTE (final_char); \
4049 \
4050 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4051 } while (0)
4052
4053
4054
4055
4056
4057
4058 #define ENCODE_SINGLE_SHIFT_2 \
4059 do { \
4060 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4061 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4062 else \
4063 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4064 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4065 } while (0)
4066
4067
4068 #define ENCODE_SINGLE_SHIFT_3 \
4069 do { \
4070 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4071 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4072 else \
4073 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4074 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4075 } while (0)
4076
4077
4078
4079
4080
4081
4082 #define ENCODE_SHIFT_IN \
4083 do { \
4084 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4085 CODING_ISO_INVOCATION (coding, 0) = 0; \
4086 } while (0)
4087
4088
4089 #define ENCODE_SHIFT_OUT \
4090 do { \
4091 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4092 CODING_ISO_INVOCATION (coding, 0) = 1; \
4093 } while (0)
4094
4095
4096 #define ENCODE_LOCKING_SHIFT_2 \
4097 do { \
4098 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4099 CODING_ISO_INVOCATION (coding, 0) = 2; \
4100 } while (0)
4101
4102
4103 #define ENCODE_LOCKING_SHIFT_3 \
4104 do { \
4105 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4106 CODING_ISO_INVOCATION (coding, 0) = 3; \
4107 } while (0)
4108
4109
4110
4111
4112
4113
4114 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4115 do { \
4116 int id = CHARSET_ID (charset); \
4117 \
4118 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4119 && id == charset_ascii) \
4120 { \
4121 id = charset_jisx0201_roman; \
4122 charset = CHARSET_FROM_ID (id); \
4123 } \
4124 \
4125 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
4126 { \
4127 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4128 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
4129 else \
4130 EMIT_ONE_BYTE (c1 | 0x80); \
4131 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
4132 break; \
4133 } \
4134 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
4135 { \
4136 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
4137 break; \
4138 } \
4139 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
4140 { \
4141 EMIT_ONE_BYTE (c1 | 0x80); \
4142 break; \
4143 } \
4144 else \
4145
4146
4147
4148 \
4149 dst = encode_invocation_designation (charset, coding, dst, \
4150 &produced_chars); \
4151 } while (1)
4152
4153
4154
4155
4156
4157
4158 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4159 do { \
4160 int id = CHARSET_ID (charset); \
4161 \
4162 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4163 && id == charset_jisx0208) \
4164 { \
4165 id = charset_jisx0208_1978; \
4166 charset = CHARSET_FROM_ID (id); \
4167 } \
4168 \
4169 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
4170 { \
4171 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4172 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
4173 else \
4174 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4175 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
4176 break; \
4177 } \
4178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
4179 { \
4180 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
4181 break; \
4182 } \
4183 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
4184 { \
4185 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4186 break; \
4187 } \
4188 else \
4189
4190
4191
4192 \
4193 dst = encode_invocation_designation (charset, coding, dst, \
4194 &produced_chars); \
4195 } while (1)
4196
4197
4198 #define ENCODE_ISO_CHARACTER(charset, c) \
4199 do { \
4200 unsigned code; \
4201 CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
4202 \
4203 if (CHARSET_DIMENSION (charset) == 1) \
4204 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4205 else \
4206 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4207 } while (0)
4208
4209
4210
4211
4212
4213
4214 static unsigned char *
4215 encode_invocation_designation (struct charset *charset,
4216 struct coding_system *coding,
4217 unsigned char *dst, ptrdiff_t *p_nchars)
4218 {
4219 bool multibytep = coding->dst_multibyte;
4220 ptrdiff_t produced_chars = *p_nchars;
4221 int reg;
4222 int id = CHARSET_ID (charset);
4223
4224
4225 for (reg = 0; reg < 4; reg++)
4226 if (id == CODING_ISO_DESIGNATION (coding, reg))
4227 break;
4228
4229 if (reg >= 4)
4230 {
4231
4232
4233 reg = CODING_ISO_REQUEST (coding, id);
4234 if (reg < 0)
4235
4236
4237 reg = 0;
4238
4239 ENCODE_DESIGNATION (charset, reg, coding);
4240 }
4241
4242 if (CODING_ISO_INVOCATION (coding, 0) != reg
4243 && CODING_ISO_INVOCATION (coding, 1) != reg)
4244 {
4245
4246
4247 switch (reg)
4248 {
4249 case 0:
4250 ENCODE_SHIFT_IN;
4251 break;
4252
4253 case 1:
4254 ENCODE_SHIFT_OUT;
4255 break;
4256
4257 case 2:
4258 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4259 ENCODE_SINGLE_SHIFT_2;
4260 else
4261 ENCODE_LOCKING_SHIFT_2;
4262 break;
4263
4264 case 3:
4265 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4266 ENCODE_SINGLE_SHIFT_3;
4267 else
4268 ENCODE_LOCKING_SHIFT_3;
4269 break;
4270
4271 default:
4272 break;
4273 }
4274 }
4275
4276 *p_nchars = produced_chars;
4277 return dst;
4278 }
4279
4280
4281
4282
4283 #define ENCODE_RESET_PLANE_AND_REGISTER() \
4284 do { \
4285 int reg; \
4286 struct charset *charset; \
4287 \
4288 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4289 ENCODE_SHIFT_IN; \
4290 for (reg = 0; reg < 4; reg++) \
4291 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4292 && (CODING_ISO_DESIGNATION (coding, reg) \
4293 != CODING_ISO_INITIAL (coding, reg))) \
4294 { \
4295 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4296 ENCODE_DESIGNATION (charset, reg, coding); \
4297 } \
4298 } while (0)
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309 static ptrdiff_t
4310 encode_designation_at_bol (struct coding_system *coding,
4311 int *charbuf, int *charbuf_end,
4312 unsigned char *dst)
4313 {
4314 unsigned char *orig = dst;
4315 struct charset *charset;
4316
4317 int r[4];
4318 int c, found = 0, reg;
4319 ptrdiff_t produced_chars = 0;
4320 bool multibytep = coding->dst_multibyte;
4321 Lisp_Object attrs;
4322 Lisp_Object charset_list;
4323
4324 attrs = CODING_ID_ATTRS (coding->id);
4325 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4326 if (EQ (charset_list, Qiso_2022))
4327 charset_list = Viso_2022_charset_list;
4328
4329 for (reg = 0; reg < 4; reg++)
4330 r[reg] = -1;
4331
4332 while (charbuf < charbuf_end && found < 4)
4333 {
4334 int id;
4335
4336 c = *charbuf++;
4337 if (c == '\n')
4338 break;
4339 charset = char_charset (c, charset_list, NULL);
4340 id = CHARSET_ID (charset);
4341 reg = CODING_ISO_REQUEST (coding, id);
4342 if (reg >= 0 && r[reg] < 0)
4343 {
4344 found++;
4345 r[reg] = id;
4346 }
4347 }
4348
4349 if (found)
4350 {
4351 for (reg = 0; reg < 4; reg++)
4352 if (r[reg] >= 0
4353 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4354 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4355 }
4356
4357 return dst - orig;
4358 }
4359
4360
4361
4362 static bool
4363 encode_coding_iso_2022 (struct coding_system *coding)
4364 {
4365 bool multibytep = coding->dst_multibyte;
4366 int *charbuf = coding->charbuf;
4367 int *charbuf_end = charbuf + coding->charbuf_used;
4368 unsigned char *dst = coding->destination + coding->produced;
4369 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4370 int safe_room = 16;
4371 bool bol_designation
4372 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4373 && CODING_ISO_BOL (coding));
4374 ptrdiff_t produced_chars = 0;
4375 Lisp_Object attrs, eol_type, charset_list;
4376 bool ascii_compatible;
4377 int c;
4378 int preferred_charset_id = -1;
4379
4380 CODING_GET_INFO (coding, attrs, charset_list);
4381 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4382 if (VECTORP (eol_type))
4383 eol_type = Qunix;
4384
4385 setup_iso_safe_charsets (attrs);
4386
4387 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4388 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4389
4390 ascii_compatible
4391 = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4392 && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4393 | CODING_ISO_FLAG_LOCKING_SHIFT)));
4394
4395 while (charbuf < charbuf_end)
4396 {
4397 ASSURE_DESTINATION (safe_room);
4398
4399 if (bol_designation)
4400 {
4401
4402 unsigned char desig_buf[16];
4403 ptrdiff_t nbytes;
4404 ptrdiff_t offset;
4405
4406 charset_map_loaded = 0;
4407 nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4408 desig_buf);
4409 if (charset_map_loaded
4410 && (offset = coding_change_destination (coding)))
4411 {
4412 dst += offset;
4413 dst_end += offset;
4414 }
4415 memcpy (dst, desig_buf, nbytes);
4416 dst += nbytes;
4417
4418 produced_chars += nbytes;
4419 bol_designation = 0;
4420 ASSURE_DESTINATION (safe_room);
4421 }
4422
4423 c = *charbuf++;
4424
4425 if (c < 0)
4426 {
4427
4428 switch (*charbuf)
4429 {
4430 case CODING_ANNOTATE_COMPOSITION_MASK:
4431
4432 break;
4433 case CODING_ANNOTATE_CHARSET_MASK:
4434 preferred_charset_id = charbuf[2];
4435 if (preferred_charset_id >= 0
4436 && NILP (Fmemq (make_fixnum (preferred_charset_id),
4437 charset_list)))
4438 preferred_charset_id = -1;
4439 break;
4440 default:
4441 emacs_abort ();
4442 }
4443 charbuf += -c - 1;
4444 continue;
4445 }
4446
4447
4448 if (c < 0x20 || c == 0x7F)
4449 {
4450 if (c == '\n'
4451 || (c == '\r' && EQ (eol_type, Qmac)))
4452 {
4453 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4454 ENCODE_RESET_PLANE_AND_REGISTER ();
4455 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4456 {
4457 int i;
4458
4459 for (i = 0; i < 4; i++)
4460 CODING_ISO_DESIGNATION (coding, i)
4461 = CODING_ISO_INITIAL (coding, i);
4462 }
4463 bol_designation = ((CODING_ISO_FLAGS (coding)
4464 & CODING_ISO_FLAG_DESIGNATE_AT_BOL)
4465 != 0);
4466 }
4467 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4468 ENCODE_RESET_PLANE_AND_REGISTER ();
4469 EMIT_ONE_ASCII_BYTE (c);
4470 }
4471 else if (ASCII_CHAR_P (c))
4472 {
4473 if (ascii_compatible)
4474 EMIT_ONE_ASCII_BYTE (c);
4475 else
4476 {
4477 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4478 ENCODE_ISO_CHARACTER (charset, c);
4479 }
4480 }
4481 else if (CHAR_BYTE8_P (c))
4482 {
4483 c = CHAR_TO_BYTE8 (c);
4484 EMIT_ONE_BYTE (c);
4485 }
4486 else
4487 {
4488 struct charset *charset;
4489
4490 if (preferred_charset_id >= 0)
4491 {
4492 bool result;
4493
4494 charset = CHARSET_FROM_ID (preferred_charset_id);
4495 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4496 if (! result)
4497 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4498 NULL, charset);
4499 }
4500 else
4501 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4502 NULL, charset);
4503 if (!charset)
4504 {
4505 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4506 {
4507 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4508 charset = CHARSET_FROM_ID (charset_ascii);
4509 }
4510 else
4511 {
4512 c = coding->default_char;
4513 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4514 charset_list, NULL, charset);
4515 }
4516 }
4517 ENCODE_ISO_CHARACTER (charset, c);
4518 }
4519 }
4520
4521 if (coding->mode & CODING_MODE_LAST_BLOCK
4522 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4523 {
4524 ASSURE_DESTINATION (safe_room);
4525 ENCODE_RESET_PLANE_AND_REGISTER ();
4526 }
4527 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4528 CODING_ISO_BOL (coding) = bol_designation;
4529 coding->produced_char += produced_chars;
4530 coding->produced = dst - coding->destination;
4531 return 0;
4532 }
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574 static bool
4575 detect_coding_sjis (struct coding_system *coding,
4576 struct coding_detection_info *detect_info)
4577 {
4578 const unsigned char *src = coding->source, *src_base;
4579 const unsigned char *src_end = coding->source + coding->src_bytes;
4580 bool multibytep = coding->src_multibyte;
4581 ptrdiff_t consumed_chars = 0;
4582 int found = 0;
4583 int c;
4584 Lisp_Object attrs, charset_list;
4585 int max_first_byte_of_2_byte_code;
4586
4587 CODING_GET_INFO (coding, attrs, charset_list);
4588 max_first_byte_of_2_byte_code = list_length (charset_list) <= 3 ? 0xEF : 0xFC;
4589
4590 detect_info->checked |= CATEGORY_MASK_SJIS;
4591
4592 src += coding->head_ascii;
4593
4594 while (1)
4595 {
4596 src_base = src;
4597 ONE_MORE_BYTE (c);
4598 if (c < 0x80)
4599 continue;
4600 if ((c >= 0x81 && c <= 0x9F)
4601 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4602 {
4603 ONE_MORE_BYTE (c);
4604 if (c < 0x40 || c == 0x7F || c > 0xFC)
4605 break;
4606 found = CATEGORY_MASK_SJIS;
4607 }
4608 else if (c >= 0xA0 && c < 0xE0)
4609 found = CATEGORY_MASK_SJIS;
4610 else
4611 break;
4612 }
4613 detect_info->rejected |= CATEGORY_MASK_SJIS;
4614 return 0;
4615
4616 no_more_source:
4617 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4618 {
4619 detect_info->rejected |= CATEGORY_MASK_SJIS;
4620 return 0;
4621 }
4622 detect_info->found |= found;
4623 return 1;
4624 }
4625
4626
4627
4628
4629 static bool
4630 detect_coding_big5 (struct coding_system *coding,
4631 struct coding_detection_info *detect_info)
4632 {
4633 const unsigned char *src = coding->source, *src_base;
4634 const unsigned char *src_end = coding->source + coding->src_bytes;
4635 bool multibytep = coding->src_multibyte;
4636 ptrdiff_t consumed_chars = 0;
4637 int found = 0;
4638 int c;
4639
4640 detect_info->checked |= CATEGORY_MASK_BIG5;
4641
4642 src += coding->head_ascii;
4643
4644 while (1)
4645 {
4646 src_base = src;
4647 ONE_MORE_BYTE (c);
4648 if (c < 0x80)
4649 continue;
4650 if (c >= 0xA1)
4651 {
4652 ONE_MORE_BYTE (c);
4653 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4654 return 0;
4655 found = CATEGORY_MASK_BIG5;
4656 }
4657 else
4658 break;
4659 }
4660 detect_info->rejected |= CATEGORY_MASK_BIG5;
4661 return 0;
4662
4663 no_more_source:
4664 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4665 {
4666 detect_info->rejected |= CATEGORY_MASK_BIG5;
4667 return 0;
4668 }
4669 detect_info->found |= found;
4670 return 1;
4671 }
4672
4673
4674
4675 static void
4676 decode_coding_sjis (struct coding_system *coding)
4677 {
4678 const unsigned char *src = coding->source + coding->consumed;
4679 const unsigned char *src_end = coding->source + coding->src_bytes;
4680 const unsigned char *src_base;
4681 int *charbuf = coding->charbuf + coding->charbuf_used;
4682
4683
4684 int *charbuf_end
4685 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4686 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4687 bool multibytep = coding->src_multibyte;
4688 struct charset *charset_roman, *charset_kanji, *charset_kana;
4689 struct charset *charset_kanji2;
4690 Lisp_Object attrs, charset_list, val;
4691 ptrdiff_t char_offset = coding->produced_char;
4692 ptrdiff_t last_offset = char_offset;
4693 int last_id = charset_ascii;
4694 bool eol_dos
4695 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4696 int byte_after_cr = -1;
4697
4698 CODING_GET_INFO (coding, attrs, charset_list);
4699
4700 val = charset_list;
4701 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4702 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4703 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4704 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4705
4706 while (1)
4707 {
4708 int c, c1;
4709 struct charset *charset;
4710
4711 src_base = src;
4712 consumed_chars_base = consumed_chars;
4713
4714 if (charbuf >= charbuf_end)
4715 {
4716 if (byte_after_cr >= 0)
4717 src_base--;
4718 break;
4719 }
4720
4721 if (byte_after_cr >= 0)
4722 c = byte_after_cr, byte_after_cr = -1;
4723 else
4724 ONE_MORE_BYTE (c);
4725 if (c < 0)
4726 goto invalid_code;
4727 if (c < 0x80)
4728 {
4729 if (eol_dos && c == '\r')
4730 ONE_MORE_BYTE (byte_after_cr);
4731 charset = charset_roman;
4732 }
4733 else if (c == 0x80 || c == 0xA0)
4734 goto invalid_code;
4735 else if (c >= 0xA1 && c <= 0xDF)
4736 {
4737
4738 c &= 0x7F;
4739 charset = charset_kana;
4740 }
4741 else if (c <= 0xEF)
4742 {
4743
4744 ONE_MORE_BYTE (c1);
4745 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4746 goto invalid_code;
4747 c = (c << 8) | c1;
4748 SJIS_TO_JIS (c);
4749 charset = charset_kanji;
4750 }
4751 else if (c <= 0xFC && charset_kanji2)
4752 {
4753
4754 ONE_MORE_BYTE (c1);
4755 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4756 goto invalid_code;
4757 c = (c << 8) | c1;
4758 SJIS_TO_JIS2 (c);
4759 charset = charset_kanji2;
4760 }
4761 else
4762 goto invalid_code;
4763 if (charset->id != charset_ascii
4764 && last_id != charset->id)
4765 {
4766 if (last_id != charset_ascii)
4767 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4768 last_id = charset->id;
4769 last_offset = char_offset;
4770 }
4771 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4772 *charbuf++ = c;
4773 char_offset++;
4774 continue;
4775
4776 invalid_code:
4777 src = src_base;
4778 consumed_chars = consumed_chars_base;
4779 ONE_MORE_BYTE (c);
4780 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4781 char_offset++;
4782 }
4783
4784 no_more_source:
4785 if (last_id != charset_ascii)
4786 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4787 coding->consumed_char += consumed_chars_base;
4788 coding->consumed = src_base - coding->source;
4789 coding->charbuf_used = charbuf - coding->charbuf;
4790 }
4791
4792 static void
4793 decode_coding_big5 (struct coding_system *coding)
4794 {
4795 const unsigned char *src = coding->source + coding->consumed;
4796 const unsigned char *src_end = coding->source + coding->src_bytes;
4797 const unsigned char *src_base;
4798 int *charbuf = coding->charbuf + coding->charbuf_used;
4799
4800
4801 int *charbuf_end
4802 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4803 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4804 bool multibytep = coding->src_multibyte;
4805 struct charset *charset_roman, *charset_big5;
4806 Lisp_Object attrs, charset_list, val;
4807 ptrdiff_t char_offset = coding->produced_char;
4808 ptrdiff_t last_offset = char_offset;
4809 int last_id = charset_ascii;
4810 bool eol_dos
4811 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4812 int byte_after_cr = -1;
4813
4814 CODING_GET_INFO (coding, attrs, charset_list);
4815 val = charset_list;
4816 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4817 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4818
4819 while (1)
4820 {
4821 int c, c1;
4822 struct charset *charset;
4823
4824 src_base = src;
4825 consumed_chars_base = consumed_chars;
4826
4827 if (charbuf >= charbuf_end)
4828 {
4829 if (byte_after_cr >= 0)
4830 src_base--;
4831 break;
4832 }
4833
4834 if (byte_after_cr >= 0)
4835 c = byte_after_cr, byte_after_cr = -1;
4836 else
4837 ONE_MORE_BYTE (c);
4838
4839 if (c < 0)
4840 goto invalid_code;
4841 if (c < 0x80)
4842 {
4843 if (eol_dos && c == '\r')
4844 ONE_MORE_BYTE (byte_after_cr);
4845 charset = charset_roman;
4846 }
4847 else
4848 {
4849
4850 if (c < 0xA1 || c > 0xFE)
4851 goto invalid_code;
4852 ONE_MORE_BYTE (c1);
4853 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4854 goto invalid_code;
4855 c = c << 8 | c1;
4856 charset = charset_big5;
4857 }
4858 if (charset->id != charset_ascii
4859 && last_id != charset->id)
4860 {
4861 if (last_id != charset_ascii)
4862 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4863 last_id = charset->id;
4864 last_offset = char_offset;
4865 }
4866 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4867 *charbuf++ = c;
4868 char_offset++;
4869 continue;
4870
4871 invalid_code:
4872 src = src_base;
4873 consumed_chars = consumed_chars_base;
4874 ONE_MORE_BYTE (c);
4875 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4876 char_offset++;
4877 }
4878
4879 no_more_source:
4880 if (last_id != charset_ascii)
4881 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4882 coding->consumed_char += consumed_chars_base;
4883 coding->consumed = src_base - coding->source;
4884 coding->charbuf_used = charbuf - coding->charbuf;
4885 }
4886
4887
4888
4889
4890
4891
4892
4893
4894 static bool
4895 encode_coding_sjis (struct coding_system *coding)
4896 {
4897 bool multibytep = coding->dst_multibyte;
4898 int *charbuf = coding->charbuf;
4899 int *charbuf_end = charbuf + coding->charbuf_used;
4900 unsigned char *dst = coding->destination + coding->produced;
4901 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4902 int safe_room = 4;
4903 ptrdiff_t produced_chars = 0;
4904 Lisp_Object attrs, charset_list, val;
4905 bool ascii_compatible;
4906 struct charset *charset_kanji, *charset_kana;
4907 struct charset *charset_kanji2;
4908 int c;
4909
4910 CODING_GET_INFO (coding, attrs, charset_list);
4911 val = XCDR (charset_list);
4912 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4913 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4914 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4915
4916 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4917
4918 while (charbuf < charbuf_end)
4919 {
4920 ASSURE_DESTINATION (safe_room);
4921 c = *charbuf++;
4922
4923 if (ASCII_CHAR_P (c) && ascii_compatible)
4924 EMIT_ONE_ASCII_BYTE (c);
4925 else if (CHAR_BYTE8_P (c))
4926 {
4927 c = CHAR_TO_BYTE8 (c);
4928 EMIT_ONE_BYTE (c);
4929 }
4930 else
4931 {
4932 unsigned code;
4933 struct charset *charset;
4934 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4935 &code, charset);
4936
4937 if (!charset)
4938 {
4939 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4940 {
4941 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4942 charset = CHARSET_FROM_ID (charset_ascii);
4943 }
4944 else
4945 {
4946 c = coding->default_char;
4947 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4948 charset_list, &code, charset);
4949 }
4950 }
4951 if (code == CHARSET_INVALID_CODE (charset))
4952 emacs_abort ();
4953 if (charset == charset_kanji)
4954 {
4955 int c1, c2;
4956 JIS_TO_SJIS (code);
4957 c1 = code >> 8, c2 = code & 0xFF;
4958 EMIT_TWO_BYTES (c1, c2);
4959 }
4960 else if (charset == charset_kana)
4961 EMIT_ONE_BYTE (code | 0x80);
4962 else if (charset_kanji2 && charset == charset_kanji2)
4963 {
4964 int c1, c2;
4965
4966 c1 = code >> 8;
4967 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
4968 || c1 == 0x28
4969 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4970 {
4971 JIS_TO_SJIS2 (code);
4972 c1 = code >> 8, c2 = code & 0xFF;
4973 EMIT_TWO_BYTES (c1, c2);
4974 }
4975 else
4976 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4977 }
4978 else
4979 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4980 }
4981 }
4982 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4983 coding->produced_char += produced_chars;
4984 coding->produced = dst - coding->destination;
4985 return 0;
4986 }
4987
4988 static bool
4989 encode_coding_big5 (struct coding_system *coding)
4990 {
4991 bool multibytep = coding->dst_multibyte;
4992 int *charbuf = coding->charbuf;
4993 int *charbuf_end = charbuf + coding->charbuf_used;
4994 unsigned char *dst = coding->destination + coding->produced;
4995 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4996 int safe_room = 4;
4997 ptrdiff_t produced_chars = 0;
4998 Lisp_Object attrs, charset_list, val;
4999 bool ascii_compatible;
5000 struct charset *charset_big5;
5001 int c;
5002
5003 CODING_GET_INFO (coding, attrs, charset_list);
5004 val = XCDR (charset_list);
5005 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
5006 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5007
5008 while (charbuf < charbuf_end)
5009 {
5010 ASSURE_DESTINATION (safe_room);
5011 c = *charbuf++;
5012
5013 if (ASCII_CHAR_P (c) && ascii_compatible)
5014 EMIT_ONE_ASCII_BYTE (c);
5015 else if (CHAR_BYTE8_P (c))
5016 {
5017 c = CHAR_TO_BYTE8 (c);
5018 EMIT_ONE_BYTE (c);
5019 }
5020 else
5021 {
5022 unsigned code;
5023 struct charset *charset;
5024 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5025 &code, charset);
5026
5027 if (! charset)
5028 {
5029 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5030 {
5031 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5032 charset = CHARSET_FROM_ID (charset_ascii);
5033 }
5034 else
5035 {
5036 c = coding->default_char;
5037 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5038 charset_list, &code, charset);
5039 }
5040 }
5041 if (code == CHARSET_INVALID_CODE (charset))
5042 emacs_abort ();
5043 if (charset == charset_big5)
5044 {
5045 int c1, c2;
5046
5047 c1 = code >> 8, c2 = code & 0xFF;
5048 EMIT_TWO_BYTES (c1, c2);
5049 }
5050 else
5051 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5052 }
5053 }
5054 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5055 coding->produced_char += produced_chars;
5056 coding->produced = dst - coding->destination;
5057 return 0;
5058 }
5059
5060
5061
5062
5063
5064
5065
5066
5067 static bool
5068 detect_coding_ccl (struct coding_system *coding,
5069 struct coding_detection_info *detect_info)
5070 {
5071 const unsigned char *src = coding->source, *src_base;
5072 const unsigned char *src_end = coding->source + coding->src_bytes;
5073 bool multibytep = coding->src_multibyte;
5074 ptrdiff_t consumed_chars = 0;
5075 int found = 0;
5076 unsigned char *valids;
5077 ptrdiff_t head_ascii = coding->head_ascii;
5078 Lisp_Object attrs;
5079
5080 detect_info->checked |= CATEGORY_MASK_CCL;
5081
5082 coding = &coding_categories[coding_category_ccl];
5083 valids = CODING_CCL_VALIDS (coding);
5084 attrs = CODING_ID_ATTRS (coding->id);
5085 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5086 src += head_ascii;
5087
5088 while (1)
5089 {
5090 int c;
5091
5092 src_base = src;
5093 ONE_MORE_BYTE (c);
5094 if (c < 0 || ! valids[c])
5095 break;
5096 if ((valids[c] > 1))
5097 found = CATEGORY_MASK_CCL;
5098 }
5099 detect_info->rejected |= CATEGORY_MASK_CCL;
5100 return 0;
5101
5102 no_more_source:
5103 detect_info->found |= found;
5104 return 1;
5105 }
5106
5107 static void
5108 decode_coding_ccl (struct coding_system *coding)
5109 {
5110 const unsigned char *src = coding->source + coding->consumed;
5111 const unsigned char *src_end = coding->source + coding->src_bytes;
5112 int *charbuf = coding->charbuf + coding->charbuf_used;
5113 int *charbuf_end = coding->charbuf + coding->charbuf_size;
5114 ptrdiff_t consumed_chars = 0;
5115 bool multibytep = coding->src_multibyte;
5116 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5117 int source_charbuf[1024];
5118 int source_byteidx[1025];
5119 Lisp_Object attrs, charset_list;
5120
5121 CODING_GET_INFO (coding, attrs, charset_list);
5122
5123 while (1)
5124 {
5125 const unsigned char *p = src;
5126 ptrdiff_t offset;
5127 int i = 0;
5128
5129 if (multibytep)
5130 {
5131 while (i < 1024 && p < src_end)
5132 {
5133 source_byteidx[i] = p - src;
5134 source_charbuf[i++] = string_char_advance (&p);
5135 }
5136 source_byteidx[i] = p - src;
5137 }
5138 else
5139 while (i < 1024 && p < src_end)
5140 source_charbuf[i++] = *p++;
5141
5142 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5143 ccl->last_block = true;
5144
5145 charset_map_loaded = 0;
5146 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5147 charset_list);
5148 if (charset_map_loaded
5149 && (offset = coding_change_source (coding)))
5150 {
5151 p += offset;
5152 src += offset;
5153 src_end += offset;
5154 }
5155 charbuf += ccl->produced;
5156 if (multibytep)
5157 src += source_byteidx[ccl->consumed];
5158 else
5159 src += ccl->consumed;
5160 consumed_chars += ccl->consumed;
5161 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5162 break;
5163 }
5164
5165 switch (ccl->status)
5166 {
5167 case CCL_STAT_SUSPEND_BY_SRC:
5168 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5169 break;
5170 case CCL_STAT_SUSPEND_BY_DST:
5171 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5172 break;
5173 case CCL_STAT_QUIT:
5174 case CCL_STAT_INVALID_CMD:
5175 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5176 break;
5177 default:
5178 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5179 break;
5180 }
5181 coding->consumed_char += consumed_chars;
5182 coding->consumed = src - coding->source;
5183 coding->charbuf_used = charbuf - coding->charbuf;
5184 }
5185
5186 static bool
5187 encode_coding_ccl (struct coding_system *coding)
5188 {
5189 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5190 bool multibytep = coding->dst_multibyte;
5191 int *charbuf = coding->charbuf;
5192 int *charbuf_end = charbuf + coding->charbuf_used;
5193 unsigned char *dst = coding->destination + coding->produced;
5194 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5195 int destination_charbuf[1024];
5196 ptrdiff_t produced_chars = 0;
5197 int i;
5198 Lisp_Object attrs, charset_list;
5199
5200 CODING_GET_INFO (coding, attrs, charset_list);
5201 if (coding->consumed_char == coding->src_chars
5202 && coding->mode & CODING_MODE_LAST_BLOCK)
5203 ccl->last_block = true;
5204
5205 do
5206 {
5207 ptrdiff_t offset;
5208
5209
5210 charset_map_loaded = 0;
5211 ccl_driver (ccl, charbuf, destination_charbuf,
5212 charbuf_end - charbuf, 1024, charset_list);
5213 if (charset_map_loaded
5214 && (offset = coding_change_destination (coding)))
5215 dst += offset;
5216 if (multibytep)
5217 {
5218 ASSURE_DESTINATION (ccl->produced * 2);
5219 for (i = 0; i < ccl->produced; i++)
5220 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5221 }
5222 else
5223 {
5224 ASSURE_DESTINATION (ccl->produced);
5225 for (i = 0; i < ccl->produced; i++)
5226 *dst++ = destination_charbuf[i] & 0xFF;
5227 produced_chars += ccl->produced;
5228 }
5229 charbuf += ccl->consumed;
5230 if (ccl->status == CCL_STAT_QUIT
5231 || ccl->status == CCL_STAT_INVALID_CMD)
5232 break;
5233 }
5234 while (charbuf < charbuf_end);
5235
5236 switch (ccl->status)
5237 {
5238 case CCL_STAT_SUSPEND_BY_SRC:
5239 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5240 break;
5241 case CCL_STAT_SUSPEND_BY_DST:
5242 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5243 break;
5244 case CCL_STAT_QUIT:
5245 case CCL_STAT_INVALID_CMD:
5246 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5247 break;
5248 default:
5249 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5250 break;
5251 }
5252
5253 coding->produced_char += produced_chars;
5254 coding->produced = dst - coding->destination;
5255 return 0;
5256 }
5257
5258
5259
5260
5261
5262
5263 static void
5264 decode_coding_raw_text (struct coding_system *coding)
5265 {
5266 bool eol_dos
5267 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5268
5269 coding->chars_at_source = 1;
5270 coding->consumed_char = coding->src_chars;
5271 coding->consumed = coding->src_bytes;
5272 if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5273 {
5274 coding->consumed_char--;
5275 coding->consumed--;
5276 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5277 }
5278 else
5279 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5280 }
5281
5282 static bool
5283 encode_coding_raw_text (struct coding_system *coding)
5284 {
5285 bool multibytep = coding->dst_multibyte;
5286 int *charbuf = coding->charbuf;
5287 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5288 unsigned char *dst = coding->destination + coding->produced;
5289 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5290 ptrdiff_t produced_chars = 0;
5291 int c;
5292
5293 if (multibytep)
5294 {
5295 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5296
5297 if (coding->src_multibyte)
5298 while (charbuf < charbuf_end)
5299 {
5300 ASSURE_DESTINATION (safe_room);
5301 c = *charbuf++;
5302 if (ASCII_CHAR_P (c))
5303 EMIT_ONE_ASCII_BYTE (c);
5304 else if (CHAR_BYTE8_P (c))
5305 {
5306 c = CHAR_TO_BYTE8 (c);
5307 EMIT_ONE_BYTE (c);
5308 }
5309 else
5310 {
5311 unsigned char str[MAX_MULTIBYTE_LENGTH];
5312 int len = CHAR_STRING (c, str);
5313 for (int i = 0; i < len; i++)
5314 EMIT_ONE_BYTE (str[i]);
5315 }
5316 }
5317 else
5318 while (charbuf < charbuf_end)
5319 {
5320 ASSURE_DESTINATION (safe_room);
5321 c = *charbuf++;
5322 EMIT_ONE_BYTE (c);
5323 }
5324 }
5325 else
5326 {
5327 if (coding->src_multibyte)
5328 {
5329 int safe_room = MAX_MULTIBYTE_LENGTH;
5330
5331 while (charbuf < charbuf_end)
5332 {
5333 ASSURE_DESTINATION (safe_room);
5334 c = *charbuf++;
5335 if (ASCII_CHAR_P (c))
5336 *dst++ = c;
5337 else if (CHAR_BYTE8_P (c))
5338 *dst++ = CHAR_TO_BYTE8 (c);
5339 else
5340 dst += CHAR_STRING (c, dst);
5341 }
5342 }
5343 else
5344 {
5345 ASSURE_DESTINATION (charbuf_end - charbuf);
5346 while (charbuf < charbuf_end && dst < dst_end)
5347 *dst++ = *charbuf++;
5348 }
5349 produced_chars = dst - (coding->destination + coding->produced);
5350 }
5351 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5352 coding->produced_char += produced_chars;
5353 coding->produced = dst - coding->destination;
5354 return 0;
5355 }
5356
5357
5358
5359
5360 static bool
5361 detect_coding_charset (struct coding_system *coding,
5362 struct coding_detection_info *detect_info)
5363 {
5364 const unsigned char *src = coding->source, *src_base;
5365 const unsigned char *src_end = coding->source + coding->src_bytes;
5366 bool multibytep = coding->src_multibyte;
5367 ptrdiff_t consumed_chars = 0;
5368 Lisp_Object attrs, valids, name;
5369 int found = 0;
5370 ptrdiff_t head_ascii = coding->head_ascii;
5371 bool check_latin_extra = 0;
5372
5373 detect_info->checked |= CATEGORY_MASK_CHARSET;
5374
5375 coding = &coding_categories[coding_category_charset];
5376 attrs = CODING_ID_ATTRS (coding->id);
5377 valids = AREF (attrs, coding_attr_charset_valids);
5378 name = CODING_ID_NAME (coding->id);
5379 if (strncmp (SSDATA (SYMBOL_NAME (name)),
5380 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5381 || strncmp (SSDATA (SYMBOL_NAME (name)),
5382 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5383 check_latin_extra = 1;
5384
5385 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5386 src += head_ascii;
5387
5388 while (1)
5389 {
5390 int c;
5391 Lisp_Object val;
5392 struct charset *charset;
5393 int dim, idx;
5394
5395 src_base = src;
5396 ONE_MORE_BYTE (c);
5397 if (c < 0)
5398 continue;
5399 val = AREF (valids, c);
5400 if (NILP (val))
5401 break;
5402 if (c >= 0x80)
5403 {
5404 if (c < 0xA0
5405 && check_latin_extra
5406 && (!VECTORP (Vlatin_extra_code_table)
5407 || NILP (AREF (Vlatin_extra_code_table, c))))
5408 break;
5409 found = CATEGORY_MASK_CHARSET;
5410 }
5411 if (FIXNUMP (val))
5412 {
5413 charset = CHARSET_FROM_ID (XFIXNAT (val));
5414 dim = CHARSET_DIMENSION (charset);
5415 for (idx = 1; idx < dim; idx++)
5416 {
5417 if (src == src_end)
5418 goto too_short;
5419 ONE_MORE_BYTE (c);
5420 if (c < charset->code_space[(dim - 1 - idx) * 4]
5421 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5422 break;
5423 }
5424 if (idx < dim)
5425 break;
5426 }
5427 else
5428 {
5429 idx = 1;
5430 for (; CONSP (val); val = XCDR (val))
5431 {
5432 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (val)));
5433 dim = CHARSET_DIMENSION (charset);
5434 while (idx < dim)
5435 {
5436 if (src == src_end)
5437 goto too_short;
5438 ONE_MORE_BYTE (c);
5439 if (c < charset->code_space[(dim - 1 - idx) * 4]
5440 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5441 break;
5442 idx++;
5443 }
5444 if (idx == dim)
5445 {
5446 val = Qnil;
5447 break;
5448 }
5449 }
5450 if (CONSP (val))
5451 break;
5452 }
5453 }
5454 too_short:
5455 detect_info->rejected |= CATEGORY_MASK_CHARSET;
5456 return 0;
5457
5458 no_more_source:
5459 detect_info->found |= found;
5460 return 1;
5461 }
5462
5463 static void
5464 decode_coding_charset (struct coding_system *coding)
5465 {
5466 const unsigned char *src = coding->source + coding->consumed;
5467 const unsigned char *src_end = coding->source + coding->src_bytes;
5468 const unsigned char *src_base;
5469 int *charbuf = coding->charbuf + coding->charbuf_used;
5470
5471
5472 int *charbuf_end
5473 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5474 ptrdiff_t consumed_chars = 0, consumed_chars_base;
5475 bool multibytep = coding->src_multibyte;
5476 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5477 Lisp_Object valids;
5478 ptrdiff_t char_offset = coding->produced_char;
5479 ptrdiff_t last_offset = char_offset;
5480 int last_id = charset_ascii;
5481 bool eol_dos
5482 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5483 int byte_after_cr = -1;
5484
5485 valids = AREF (attrs, coding_attr_charset_valids);
5486
5487 while (1)
5488 {
5489 int c;
5490 Lisp_Object val;
5491 struct charset *charset;
5492 int dim;
5493 int len = 1;
5494 unsigned code;
5495
5496 src_base = src;
5497 consumed_chars_base = consumed_chars;
5498
5499 if (charbuf >= charbuf_end)
5500 {
5501 if (byte_after_cr >= 0)
5502 src_base--;
5503 break;
5504 }
5505
5506 if (byte_after_cr >= 0)
5507 {
5508 c = byte_after_cr;
5509 byte_after_cr = -1;
5510 }
5511 else
5512 {
5513 ONE_MORE_BYTE (c);
5514 if (eol_dos && c == '\r')
5515 ONE_MORE_BYTE (byte_after_cr);
5516 }
5517 if (c < 0)
5518 goto invalid_code;
5519 code = c;
5520
5521 val = AREF (valids, c);
5522 if (! FIXNUMP (val) && ! CONSP (val))
5523 goto invalid_code;
5524 if (FIXNUMP (val))
5525 {
5526 charset = CHARSET_FROM_ID (XFIXNAT (val));
5527 dim = CHARSET_DIMENSION (charset);
5528 while (len < dim)
5529 {
5530 ONE_MORE_BYTE (c);
5531 code = (code << 8) | c;
5532 len++;
5533 }
5534 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5535 charset, code, c);
5536 }
5537 else
5538 {
5539
5540
5541
5542 while (CONSP (val))
5543 {
5544 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (val)));
5545 dim = CHARSET_DIMENSION (charset);
5546 while (len < dim)
5547 {
5548 ONE_MORE_BYTE (c);
5549 code = (code << 8) | c;
5550 len++;
5551 }
5552 CODING_DECODE_CHAR (coding, src, src_base,
5553 src_end, charset, code, c);
5554 if (c >= 0)
5555 break;
5556 val = XCDR (val);
5557 }
5558 }
5559 if (c < 0)
5560 goto invalid_code;
5561 if (charset->id != charset_ascii
5562 && last_id != charset->id)
5563 {
5564 if (last_id != charset_ascii)
5565 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5566 last_id = charset->id;
5567 last_offset = char_offset;
5568 }
5569
5570 *charbuf++ = c;
5571 char_offset++;
5572 continue;
5573
5574 invalid_code:
5575 src = src_base;
5576 consumed_chars = consumed_chars_base;
5577 ONE_MORE_BYTE (c);
5578 *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
5579 char_offset++;
5580 }
5581
5582 no_more_source:
5583 if (last_id != charset_ascii)
5584 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5585 coding->consumed_char += consumed_chars_base;
5586 coding->consumed = src_base - coding->source;
5587 coding->charbuf_used = charbuf - coding->charbuf;
5588 }
5589
5590 static bool
5591 encode_coding_charset (struct coding_system *coding)
5592 {
5593 bool multibytep = coding->dst_multibyte;
5594 int *charbuf = coding->charbuf;
5595 int *charbuf_end = charbuf + coding->charbuf_used;
5596 unsigned char *dst = coding->destination + coding->produced;
5597 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5598 int safe_room = MAX_MULTIBYTE_LENGTH;
5599 ptrdiff_t produced_chars = 0;
5600 Lisp_Object attrs, charset_list;
5601 bool ascii_compatible;
5602 int c;
5603
5604 CODING_GET_INFO (coding, attrs, charset_list);
5605 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5606
5607 while (charbuf < charbuf_end)
5608 {
5609 struct charset *charset;
5610 unsigned code;
5611
5612 ASSURE_DESTINATION (safe_room);
5613 c = *charbuf++;
5614 if (ascii_compatible && ASCII_CHAR_P (c))
5615 EMIT_ONE_ASCII_BYTE (c);
5616 else if (CHAR_BYTE8_P (c))
5617 {
5618 c = CHAR_TO_BYTE8 (c);
5619 EMIT_ONE_BYTE (c);
5620 }
5621 else
5622 {
5623 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5624 &code, charset);
5625
5626 if (charset)
5627 {
5628 if (CHARSET_DIMENSION (charset) == 1)
5629 EMIT_ONE_BYTE (code);
5630 else if (CHARSET_DIMENSION (charset) == 2)
5631 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5632 else if (CHARSET_DIMENSION (charset) == 3)
5633 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5634 else
5635 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5636 (code >> 8) & 0xFF, code & 0xFF);
5637 }
5638 else
5639 {
5640 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5641 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5642 else
5643 c = coding->default_char;
5644 EMIT_ONE_BYTE (c);
5645 }
5646 }
5647 }
5648
5649 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5650 coding->produced_char += produced_chars;
5651 coding->produced = dst - coding->destination;
5652 return 0;
5653 }
5654
5655
5656
5657
5658
5659
5660
5661
5662 void
5663 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5664 {
5665 Lisp_Object attrs;
5666 Lisp_Object eol_type;
5667 Lisp_Object coding_type;
5668 Lisp_Object val;
5669
5670 if (NILP (coding_system))
5671 coding_system = Qundecided;
5672
5673 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5674
5675 attrs = CODING_ID_ATTRS (coding->id);
5676 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5677
5678 coding->mode = 0;
5679 if (VECTORP (eol_type))
5680 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5681 | CODING_REQUIRE_DETECTION_MASK);
5682 else if (! EQ (eol_type, Qunix))
5683 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5684 | CODING_REQUIRE_ENCODING_MASK);
5685 else
5686 coding->common_flags = 0;
5687 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5688 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5689 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5690 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5691 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5692 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5693
5694 val = CODING_ATTR_SAFE_CHARSETS (attrs);
5695 coding->max_charset_id = SCHARS (val) - 1;
5696 coding->safe_charsets = SDATA (val);
5697 coding->default_char = XFIXNUM (CODING_ATTR_DEFAULT_CHAR (attrs));
5698 coding->carryover_bytes = 0;
5699 coding->raw_destination = 0;
5700
5701 coding_type = CODING_ATTR_TYPE (attrs);
5702 if (EQ (coding_type, Qundecided))
5703 {
5704 coding->detector = NULL;
5705 coding->decoder = decode_coding_raw_text;
5706 coding->encoder = encode_coding_raw_text;
5707 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5708 coding->spec.undecided.inhibit_nbd
5709 = (encode_inhibit_flag
5710 (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
5711 coding->spec.undecided.inhibit_ied
5712 = (encode_inhibit_flag
5713 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
5714 coding->spec.undecided.prefer_utf_8
5715 = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
5716 }
5717 else if (EQ (coding_type, Qiso_2022))
5718 {
5719 int i;
5720 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5721
5722
5723 CODING_ISO_INVOCATION (coding, 0) = 0;
5724
5725 CODING_ISO_INVOCATION (coding, 1)
5726 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5727
5728 for (i = 0; i < 4; i++)
5729 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5730
5731 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5732
5733 CODING_ISO_BOL (coding) = 1;
5734 coding->detector = detect_coding_iso_2022;
5735 coding->decoder = decode_coding_iso_2022;
5736 coding->encoder = encode_coding_iso_2022;
5737 if (flags & CODING_ISO_FLAG_SAFE)
5738 coding->mode |= CODING_MODE_SAFE_ENCODING;
5739 coding->common_flags
5740 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5741 | CODING_REQUIRE_FLUSHING_MASK);
5742 if (flags & CODING_ISO_FLAG_COMPOSITION)
5743 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5744 if (flags & CODING_ISO_FLAG_DESIGNATION)
5745 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5746 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5747 {
5748 setup_iso_safe_charsets (attrs);
5749 val = CODING_ATTR_SAFE_CHARSETS (attrs);
5750 coding->max_charset_id = SCHARS (val) - 1;
5751 coding->safe_charsets = SDATA (val);
5752 }
5753 CODING_ISO_FLAGS (coding) = flags;
5754 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5755 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5756 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5757 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5758 }
5759 else if (EQ (coding_type, Qcharset))
5760 {
5761 coding->detector = detect_coding_charset;
5762 coding->decoder = decode_coding_charset;
5763 coding->encoder = encode_coding_charset;
5764 coding->common_flags
5765 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5766 }
5767 else if (EQ (coding_type, Qutf_8))
5768 {
5769 val = AREF (attrs, coding_attr_utf_bom);
5770 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5771 : EQ (val, Qt) ? utf_with_bom
5772 : utf_without_bom);
5773 coding->detector = detect_coding_utf_8;
5774 coding->decoder = decode_coding_utf_8;
5775 coding->encoder = encode_coding_utf_8;
5776 coding->common_flags
5777 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5778 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5779 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5780 }
5781 else if (EQ (coding_type, Qutf_16))
5782 {
5783 val = AREF (attrs, coding_attr_utf_bom);
5784 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5785 : EQ (val, Qt) ? utf_with_bom
5786 : utf_without_bom);
5787 val = AREF (attrs, coding_attr_utf_16_endian);
5788 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5789 : utf_16_little_endian);
5790 CODING_UTF_16_SURROGATE (coding) = 0;
5791 coding->detector = detect_coding_utf_16;
5792 coding->decoder = decode_coding_utf_16;
5793 coding->encoder = encode_coding_utf_16;
5794 coding->common_flags
5795 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5796 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5797 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5798 }
5799 else if (EQ (coding_type, Qccl))
5800 {
5801 coding->detector = detect_coding_ccl;
5802 coding->decoder = decode_coding_ccl;
5803 coding->encoder = encode_coding_ccl;
5804 coding->common_flags
5805 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5806 | CODING_REQUIRE_FLUSHING_MASK);
5807 }
5808 else if (EQ (coding_type, Qemacs_mule))
5809 {
5810 coding->detector = detect_coding_emacs_mule;
5811 coding->decoder = decode_coding_emacs_mule;
5812 coding->encoder = encode_coding_emacs_mule;
5813 coding->common_flags
5814 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5815 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5816 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5817 {
5818 Lisp_Object tail, safe_charsets;
5819 int max_charset_id = 0;
5820
5821 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5822 tail = XCDR (tail))
5823 if (max_charset_id < XFIXNAT (XCAR (tail)))
5824 max_charset_id = XFIXNAT (XCAR (tail));
5825 safe_charsets = make_uninit_string (max_charset_id + 1);
5826 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5827 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5828 tail = XCDR (tail))
5829 SSET (safe_charsets, XFIXNAT (XCAR (tail)), 0);
5830 coding->max_charset_id = max_charset_id;
5831 coding->safe_charsets = SDATA (safe_charsets);
5832 }
5833 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5834 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5835 }
5836 else if (EQ (coding_type, Qshift_jis))
5837 {
5838 coding->detector = detect_coding_sjis;
5839 coding->decoder = decode_coding_sjis;
5840 coding->encoder = encode_coding_sjis;
5841 coding->common_flags
5842 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5843 }
5844 else if (EQ (coding_type, Qbig5))
5845 {
5846 coding->detector = detect_coding_big5;
5847 coding->decoder = decode_coding_big5;
5848 coding->encoder = encode_coding_big5;
5849 coding->common_flags
5850 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5851 }
5852 else
5853 {
5854 coding->detector = NULL;
5855 coding->decoder = decode_coding_raw_text;
5856 coding->encoder = encode_coding_raw_text;
5857 if (! EQ (eol_type, Qunix))
5858 {
5859 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5860 if (! VECTORP (eol_type))
5861 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5862 }
5863
5864 }
5865
5866 return;
5867 }
5868
5869
5870
5871 Lisp_Object
5872 coding_charset_list (struct coding_system *coding)
5873 {
5874 Lisp_Object attrs, charset_list;
5875
5876 CODING_GET_INFO (coding, attrs, charset_list);
5877 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5878 {
5879 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5880
5881 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5882 charset_list = Viso_2022_charset_list;
5883 }
5884 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5885 {
5886 charset_list = Vemacs_mule_charset_list;
5887 }
5888 return charset_list;
5889 }
5890
5891
5892
5893
5894 Lisp_Object
5895 coding_system_charset_list (Lisp_Object coding_system)
5896 {
5897 ptrdiff_t id;
5898 Lisp_Object attrs, charset_list;
5899
5900 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5901 attrs = CODING_ID_ATTRS (id);
5902
5903 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5904 {
5905 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5906
5907 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5908 charset_list = Viso_2022_charset_list;
5909 else
5910 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5911 }
5912 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5913 {
5914 charset_list = Vemacs_mule_charset_list;
5915 }
5916 else
5917 {
5918 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5919 }
5920 return charset_list;
5921 }
5922
5923
5924
5925
5926
5927 Lisp_Object
5928 raw_text_coding_system (Lisp_Object coding_system)
5929 {
5930 Lisp_Object spec, attrs;
5931 Lisp_Object eol_type, raw_text_eol_type;
5932
5933 if (NILP (coding_system))
5934 return Qraw_text;
5935 spec = CODING_SYSTEM_SPEC (coding_system);
5936 attrs = AREF (spec, 0);
5937
5938 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5939 return coding_system;
5940
5941 eol_type = AREF (spec, 2);
5942 if (VECTORP (eol_type))
5943 return Qraw_text;
5944 spec = CODING_SYSTEM_SPEC (Qraw_text);
5945 raw_text_eol_type = AREF (spec, 2);
5946 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5947 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5948 : AREF (raw_text_eol_type, 2));
5949 }
5950
5951
5952
5953 bool
5954 raw_text_coding_system_p (struct coding_system *coding)
5955 {
5956 return (coding->decoder == decode_coding_raw_text
5957 && coding->encoder == encode_coding_raw_text) ? true : false;
5958 }
5959
5960
5961
5962
5963
5964
5965 Lisp_Object
5966 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
5967 {
5968 Lisp_Object spec, eol_type;
5969
5970 if (NILP (coding_system))
5971 coding_system = Qraw_text;
5972 else
5973 CHECK_CODING_SYSTEM (coding_system);
5974 spec = CODING_SYSTEM_SPEC (coding_system);
5975 eol_type = AREF (spec, 2);
5976 if (VECTORP (eol_type))
5977 {
5978
5979
5980
5981
5982 Lisp_Object system_eol_type = Qunix;
5983 #ifdef DOS_NT
5984 system_eol_type = Qdos;
5985 #endif
5986
5987 Lisp_Object parent_eol_type = system_eol_type;
5988 if (! NILP (parent))
5989 {
5990 CHECK_CODING_SYSTEM (parent);
5991 Lisp_Object parent_spec = CODING_SYSTEM_SPEC (parent);
5992 Lisp_Object pspec_type = AREF (parent_spec, 2);
5993 if (!VECTORP (pspec_type))
5994 parent_eol_type = pspec_type;
5995 }
5996 if (EQ (parent_eol_type, Qunix))
5997 coding_system = AREF (eol_type, 0);
5998 else if (EQ (parent_eol_type, Qdos))
5999 coding_system = AREF (eol_type, 1);
6000 else if (EQ (parent_eol_type, Qmac))
6001 coding_system = AREF (eol_type, 2);
6002 }
6003 return coding_system;
6004 }
6005
6006
6007
6008
6009
6010
6011 Lisp_Object
6012 complement_process_encoding_system (Lisp_Object coding_system)
6013 {
6014 Lisp_Object coding_base = Qnil, eol_base = Qnil;
6015 Lisp_Object spec, attrs;
6016 int i;
6017
6018 for (i = 0; i < 3; i++)
6019 {
6020 if (i == 1)
6021 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6022 else if (i == 2)
6023 coding_system = preferred_coding_system ();
6024 spec = CODING_SYSTEM_SPEC (coding_system);
6025 if (NILP (spec))
6026 continue;
6027 attrs = AREF (spec, 0);
6028 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6029 coding_base = CODING_ATTR_BASE_NAME (attrs);
6030 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6031 eol_base = coding_system;
6032 if (! NILP (coding_base) && ! NILP (eol_base))
6033 break;
6034 }
6035
6036 if (i > 0)
6037
6038
6039
6040 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6041 return coding_system;
6042 }
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154 static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
6155 int eol_seen);
6156
6157
6158
6159
6160
6161
6162
6163
6164 static ptrdiff_t
6165 check_ascii (struct coding_system *coding)
6166 {
6167 const unsigned char *src, *end;
6168 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6169 int eol_seen = coding->eol_seen;
6170
6171 coding_set_source (coding);
6172 src = coding->source;
6173 end = src + coding->src_bytes;
6174
6175 if (inhibit_eol_conversion
6176 || SYMBOLP (eol_type))
6177 {
6178
6179 while (src < end && !( *src & 0x80))
6180 {
6181 if (*src++ == '\n')
6182 eol_seen |= EOL_SEEN_LF;
6183 }
6184 }
6185 else
6186 {
6187 end--;
6188 while (src < end)
6189 {
6190 int c = *src;
6191
6192 if (c & 0x80)
6193 break;
6194 src++;
6195 if (c == '\r')
6196 {
6197 if (*src == '\n')
6198 {
6199 eol_seen |= EOL_SEEN_CRLF;
6200 src++;
6201 }
6202 else
6203 eol_seen |= EOL_SEEN_CR;
6204 }
6205 else if (c == '\n')
6206 eol_seen |= EOL_SEEN_LF;
6207 }
6208 if (src == end)
6209 {
6210 int c = *src;
6211
6212
6213 if (! (c & 0x80))
6214 {
6215 if (c == '\r')
6216 eol_seen |= EOL_SEEN_CR;
6217 else if (c == '\n')
6218 eol_seen |= EOL_SEEN_LF;
6219 src++;
6220 }
6221 }
6222 }
6223 coding->head_ascii = src - coding->source;
6224 coding->eol_seen = eol_seen;
6225 return (coding->head_ascii);
6226 }
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236 static ptrdiff_t
6237 check_utf_8 (struct coding_system *coding)
6238 {
6239 const unsigned char *src, *end;
6240 int eol_seen;
6241 ptrdiff_t nchars = coding->head_ascii;
6242
6243 if (coding->head_ascii < 0)
6244 check_ascii (coding);
6245 else
6246 coding_set_source (coding);
6247 src = coding->source + coding->head_ascii;
6248
6249 end = coding->source + coding->src_bytes - 1;
6250 eol_seen = coding->eol_seen;
6251 while (src < end)
6252 {
6253 int c = *src;
6254
6255 if (UTF_8_1_OCTET_P (*src))
6256 {
6257 src++;
6258 if (c < 0x20)
6259 {
6260 if (c == '\r')
6261 {
6262 if (*src == '\n')
6263 {
6264 eol_seen |= EOL_SEEN_CRLF;
6265 src++;
6266 nchars++;
6267 }
6268 else
6269 eol_seen |= EOL_SEEN_CR;
6270 }
6271 else if (c == '\n')
6272 eol_seen |= EOL_SEEN_LF;
6273 }
6274 }
6275 else if (UTF_8_2_OCTET_LEADING_P (c))
6276 {
6277 if (c < 0xC2
6278 || src + 1 >= end
6279 || ! UTF_8_EXTRA_OCTET_P (src[1]))
6280 return -1;
6281 src += 2;
6282 }
6283 else if (UTF_8_3_OCTET_LEADING_P (c))
6284 {
6285 if (src + 2 >= end
6286 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6287 && UTF_8_EXTRA_OCTET_P (src[2])))
6288 return -1;
6289 c = (((c & 0xF) << 12)
6290 | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
6291 if (c < 0x800
6292 || (c >= 0xd800 && c < 0xe000))
6293 return -1;
6294 src += 3;
6295 }
6296 else if (UTF_8_4_OCTET_LEADING_P (c))
6297 {
6298 if (src + 3 >= end
6299 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6300 && UTF_8_EXTRA_OCTET_P (src[2])
6301 && UTF_8_EXTRA_OCTET_P (src[3])))
6302 return -1;
6303 c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
6304 | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
6305 if (c < 0x10000
6306 || c >= 0x110000)
6307 return -1;
6308 src += 4;
6309 }
6310 else
6311 return -1;
6312 nchars++;
6313 }
6314
6315 if (src == end)
6316 {
6317 if (! UTF_8_1_OCTET_P (*src))
6318 return -1;
6319 nchars++;
6320 if (*src == '\r')
6321 eol_seen |= EOL_SEEN_CR;
6322 else if (*src == '\n')
6323 eol_seen |= EOL_SEEN_LF;
6324 }
6325 coding->eol_seen = eol_seen;
6326 return nchars;
6327 }
6328
6329
6330
6331
6332
6333 bool
6334 utf8_string_p (Lisp_Object string)
6335 {
6336 eassert (!STRING_MULTIBYTE (string));
6337 struct coding_system coding;
6338 setup_coding_system (Qutf_8_unix, &coding);
6339
6340 coding.head_ascii = -1;
6341 coding.src_pos = 0;
6342 coding.src_pos_byte = 0;
6343 coding.src_chars = SCHARS (string);
6344 coding.src_bytes = SBYTES (string);
6345 coding.src_object = string;
6346 coding.eol_seen = EOL_SEEN_NONE;
6347 return check_utf_8 (&coding) != -1;
6348 }
6349
6350
6351
6352 Lisp_Object
6353 make_string_from_utf8 (const char *text, ptrdiff_t nbytes)
6354 {
6355 #if 0
6356
6357
6358
6359
6360 ptrdiff_t chars, bytes;
6361 parse_str_as_multibyte ((const unsigned char *) text, nbytes,
6362 &chars, &bytes);
6363
6364
6365 if (chars == nbytes || bytes == nbytes)
6366 return make_specified_string (text, chars, nbytes, true);
6367 else
6368 {
6369 struct coding_system coding;
6370 setup_coding_system (Qutf_8_unix, &coding);
6371 coding.mode |= CODING_MODE_LAST_BLOCK;
6372 coding.source = (const unsigned char *) text;
6373 decode_coding_object (&coding, Qnil, 0, 0, nbytes, nbytes, Qt);
6374 return coding.dst_object;
6375 }
6376 #else
6377 return decode_string_utf_8 (Qnil, text, nbytes, Qnil, false, Qt, Qt);
6378 #endif
6379 }
6380
6381
6382
6383
6384
6385
6386
6387
6388 #define MAX_EOL_CHECK_COUNT 3
6389
6390 static int
6391 detect_eol (const unsigned char *source, ptrdiff_t src_bytes,
6392 enum coding_category category)
6393 {
6394 const unsigned char *src = source, *src_end = src + src_bytes;
6395 unsigned char c;
6396 int total = 0;
6397 int eol_seen = EOL_SEEN_NONE;
6398
6399 if ((1 << category) & CATEGORY_MASK_UTF_16)
6400 {
6401 bool msb = category == (coding_category_utf_16_le
6402 | coding_category_utf_16_le_nosig);
6403 bool lsb = !msb;
6404
6405 while (src + 1 < src_end)
6406 {
6407 c = src[lsb];
6408 if (src[msb] == 0 && (c == '\n' || c == '\r'))
6409 {
6410 int this_eol;
6411
6412 if (c == '\n')
6413 this_eol = EOL_SEEN_LF;
6414 else if (src + 3 >= src_end
6415 || src[msb + 2] != 0
6416 || src[lsb + 2] != '\n')
6417 this_eol = EOL_SEEN_CR;
6418 else
6419 {
6420 this_eol = EOL_SEEN_CRLF;
6421 src += 2;
6422 }
6423
6424 if (eol_seen == EOL_SEEN_NONE)
6425
6426 eol_seen = this_eol;
6427 else if (eol_seen != this_eol)
6428 {
6429
6430
6431 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6432 || (eol_seen == EOL_SEEN_CRLF
6433 && this_eol == EOL_SEEN_CR))
6434 eol_seen = EOL_SEEN_CRLF;
6435 else
6436 {
6437 eol_seen = EOL_SEEN_LF;
6438 break;
6439 }
6440 }
6441 if (++total == MAX_EOL_CHECK_COUNT)
6442 break;
6443 }
6444 src += 2;
6445 }
6446 }
6447 else
6448 while (src < src_end)
6449 {
6450 c = *src++;
6451 if (c == '\n' || c == '\r')
6452 {
6453 int this_eol;
6454
6455 if (c == '\n')
6456 this_eol = EOL_SEEN_LF;
6457 else if (src >= src_end || *src != '\n')
6458 this_eol = EOL_SEEN_CR;
6459 else
6460 this_eol = EOL_SEEN_CRLF, src++;
6461
6462 if (eol_seen == EOL_SEEN_NONE)
6463
6464 eol_seen = this_eol;
6465 else if (eol_seen != this_eol)
6466 {
6467
6468
6469 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6470 || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6471 eol_seen = EOL_SEEN_CRLF;
6472 else
6473 {
6474 eol_seen = EOL_SEEN_LF;
6475 break;
6476 }
6477 }
6478 if (++total == MAX_EOL_CHECK_COUNT)
6479 break;
6480 }
6481 }
6482 return eol_seen;
6483 }
6484
6485
6486 static Lisp_Object
6487 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6488 {
6489 Lisp_Object eol_type;
6490
6491 eol_type = CODING_ID_EOL_TYPE (coding->id);
6492 if (! VECTORP (eol_type))
6493
6494 return eol_type;
6495 if (eol_seen & EOL_SEEN_LF)
6496 {
6497 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6498 eol_type = Qunix;
6499 }
6500 else if (eol_seen & EOL_SEEN_CRLF)
6501 {
6502 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6503 eol_type = Qdos;
6504 }
6505 else if (eol_seen & EOL_SEEN_CR)
6506 {
6507 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6508 eol_type = Qmac;
6509 }
6510 return eol_type;
6511 }
6512
6513
6514
6515
6516
6517 static void
6518 detect_coding (struct coding_system *coding)
6519 {
6520 const unsigned char *src, *src_end;
6521 unsigned int saved_mode = coding->mode;
6522 Lisp_Object found = Qnil;
6523 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6524
6525 coding->consumed = coding->consumed_char = 0;
6526 coding->produced = coding->produced_char = 0;
6527 coding_set_source (coding);
6528
6529 src_end = coding->source + coding->src_bytes;
6530
6531 coding->eol_seen = EOL_SEEN_NONE;
6532
6533
6534 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6535 {
6536 int c, i;
6537 struct coding_detection_info detect_info = {0};
6538 bool null_byte_found = 0, eight_bit_found = 0;
6539 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6540 inhibit_null_byte_detection);
6541 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
6542 inhibit_iso_escape_detection);
6543 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
6544
6545 coding->head_ascii = 0;
6546 for (src = coding->source; src < src_end; src++)
6547 {
6548 c = *src;
6549 if (c & 0x80)
6550 {
6551 eight_bit_found = 1;
6552 if (null_byte_found)
6553 break;
6554 }
6555 else if (c < 0x20)
6556 {
6557 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6558 && ! inhibit_ied
6559 && ! detect_info.checked)
6560 {
6561 if (detect_coding_iso_2022 (coding, &detect_info))
6562 {
6563
6564 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6565 {
6566
6567
6568
6569
6570 src = src_end;
6571 coding->head_ascii = src - coding->source;
6572 }
6573 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6574 break;
6575 }
6576 }
6577 else if (! c && !inhibit_nbd)
6578 {
6579 null_byte_found = 1;
6580 if (eight_bit_found)
6581 break;
6582 }
6583 else if (! disable_ascii_optimization
6584 && ! inhibit_eol_conversion)
6585 {
6586 if (c == '\r')
6587 {
6588 if (src < src_end && src[1] == '\n')
6589 {
6590 coding->eol_seen |= EOL_SEEN_CRLF;
6591 src++;
6592 if (! eight_bit_found)
6593 coding->head_ascii++;
6594 }
6595 else
6596 coding->eol_seen |= EOL_SEEN_CR;
6597 }
6598 else if (c == '\n')
6599 {
6600 coding->eol_seen |= EOL_SEEN_LF;
6601 }
6602 }
6603
6604 if (! eight_bit_found)
6605 coding->head_ascii++;
6606 }
6607 else if (! eight_bit_found)
6608 coding->head_ascii++;
6609 }
6610
6611 if (null_byte_found || eight_bit_found
6612 || coding->head_ascii < coding->src_bytes
6613 || detect_info.found)
6614 {
6615 enum coding_category category;
6616 struct coding_system *this;
6617
6618 if (coding->head_ascii == coding->src_bytes)
6619
6620 for (i = 0; i < coding_category_raw_text; i++)
6621 {
6622 category = coding_priorities[i];
6623 this = coding_categories + category;
6624 if (detect_info.found & (1 << category))
6625 break;
6626 }
6627 else
6628 {
6629 if (null_byte_found)
6630 {
6631 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6632 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6633 }
6634 else if (prefer_utf_8
6635 && detect_coding_utf_8 (coding, &detect_info))
6636 {
6637 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
6638 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
6639 }
6640 for (i = 0; i < coding_category_raw_text; i++)
6641 {
6642 category = coding_priorities[i];
6643 this = coding_categories + category;
6644
6645
6646 coding->id = this->id;
6647 if (this->id < 0)
6648 {
6649
6650 detect_info.rejected |= (1 << category);
6651 }
6652 else if (category >= coding_category_raw_text)
6653 continue;
6654 else if (detect_info.checked & (1 << category))
6655 {
6656 if (detect_info.found & (1 << category))
6657 break;
6658 }
6659 else if ((*(this->detector)) (coding, &detect_info)
6660 && detect_info.found & (1 << category))
6661 break;
6662 }
6663 }
6664
6665 if (i < coding_category_raw_text)
6666 {
6667 if (category == coding_category_utf_8_auto)
6668 {
6669 Lisp_Object coding_systems;
6670
6671 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6672 coding_attr_utf_bom);
6673 if (CONSP (coding_systems))
6674 {
6675 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6676 found = XCAR (coding_systems);
6677 else
6678 found = XCDR (coding_systems);
6679 }
6680 else
6681 found = CODING_ID_NAME (this->id);
6682 }
6683 else if (category == coding_category_utf_16_auto)
6684 {
6685 Lisp_Object coding_systems;
6686
6687 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6688 coding_attr_utf_bom);
6689 if (CONSP (coding_systems))
6690 {
6691 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6692 found = XCAR (coding_systems);
6693 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6694 found = XCDR (coding_systems);
6695 }
6696 else
6697 found = CODING_ID_NAME (this->id);
6698 }
6699 else
6700 found = CODING_ID_NAME (this->id);
6701 }
6702 else if (null_byte_found)
6703 found = Qno_conversion;
6704 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6705 == CATEGORY_MASK_ANY)
6706 found = Qraw_text;
6707 else if (detect_info.rejected)
6708 for (i = 0; i < coding_category_raw_text; i++)
6709 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6710 {
6711 this = coding_categories + coding_priorities[i];
6712 found = CODING_ID_NAME (this->id);
6713 break;
6714 }
6715 }
6716 }
6717 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6718 == coding_category_utf_8_auto)
6719 {
6720 Lisp_Object coding_systems
6721 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6722 if (check_ascii (coding) == coding->src_bytes)
6723 {
6724 if (CONSP (coding_systems))
6725 found = XCDR (coding_systems);
6726 }
6727 else
6728 {
6729 struct coding_detection_info detect_info = {0};
6730 if (CONSP (coding_systems)
6731 && detect_coding_utf_8 (coding, &detect_info))
6732 {
6733 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6734 found = XCAR (coding_systems);
6735 else
6736 found = XCDR (coding_systems);
6737 }
6738 }
6739 }
6740 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6741 == coding_category_utf_16_auto)
6742 {
6743 Lisp_Object coding_systems
6744 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6745 coding->head_ascii = 0;
6746 if (CONSP (coding_systems))
6747 {
6748 struct coding_detection_info detect_info = {0};
6749 if (detect_coding_utf_16 (coding, &detect_info))
6750 {
6751 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6752 found = XCAR (coding_systems);
6753 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6754 found = XCDR (coding_systems);
6755 }
6756 }
6757 }
6758
6759 if (! NILP (found))
6760 {
6761 int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
6762 : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
6763 : EQ (eol_type, Qmac) ? EOL_SEEN_CR
6764 : EOL_SEEN_LF);
6765
6766 setup_coding_system (found, coding);
6767 if (specified_eol != EOL_SEEN_NONE)
6768 adjust_coding_eol_type (coding, specified_eol);
6769 }
6770
6771 coding->mode = saved_mode;
6772 }
6773
6774
6775 static void
6776 decode_eol (struct coding_system *coding)
6777 {
6778 Lisp_Object eol_type;
6779 unsigned char *p, *pbeg, *pend;
6780
6781 eol_type = CODING_ID_EOL_TYPE (coding->id);
6782 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6783 return;
6784
6785 if (NILP (coding->dst_object))
6786 pbeg = coding->destination;
6787 else
6788 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6789 pend = pbeg + coding->produced;
6790
6791 if (VECTORP (eol_type))
6792 {
6793 int eol_seen = EOL_SEEN_NONE;
6794
6795 for (p = pbeg; p < pend; p++)
6796 {
6797 if (*p == '\n')
6798 eol_seen |= EOL_SEEN_LF;
6799 else if (*p == '\r')
6800 {
6801 if (p + 1 < pend && *(p + 1) == '\n')
6802 {
6803 eol_seen |= EOL_SEEN_CRLF;
6804 p++;
6805 }
6806 else
6807 eol_seen |= EOL_SEEN_CR;
6808 }
6809 }
6810
6811 if ((eol_seen & EOL_SEEN_CRLF) != 0
6812 && (eol_seen & EOL_SEEN_CR) != 0
6813 && (eol_seen & EOL_SEEN_LF) == 0)
6814 eol_seen = EOL_SEEN_CRLF;
6815 else if (eol_seen != EOL_SEEN_NONE
6816 && eol_seen != EOL_SEEN_LF
6817 && eol_seen != EOL_SEEN_CRLF
6818 && eol_seen != EOL_SEEN_CR)
6819 eol_seen = EOL_SEEN_LF;
6820 if (eol_seen != EOL_SEEN_NONE)
6821 eol_type = adjust_coding_eol_type (coding, eol_seen);
6822 }
6823
6824 if (EQ (eol_type, Qmac))
6825 {
6826 for (p = pbeg; p < pend; p++)
6827 if (*p == '\r')
6828 *p = '\n';
6829 }
6830 else if (EQ (eol_type, Qdos))
6831 {
6832 ptrdiff_t n = 0;
6833 ptrdiff_t pos = coding->dst_pos;
6834 ptrdiff_t pos_byte = coding->dst_pos_byte;
6835 ptrdiff_t pos_end = pos_byte + coding->produced - 1;
6836
6837
6838
6839
6840 eassert (!NILP (coding->dst_object));
6841
6842 while (pos_byte < pos_end)
6843 {
6844 int incr;
6845
6846 p = BYTE_POS_ADDR (pos_byte);
6847 if (coding->dst_multibyte)
6848 incr = BYTES_BY_CHAR_HEAD (*p);
6849 else
6850 incr = 1;
6851
6852 if (*p == '\r' && p[1] == '\n')
6853 {
6854 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6855 n++;
6856 pos_end--;
6857 }
6858 pos++;
6859 pos_byte += incr;
6860 }
6861 coding->produced -= n;
6862 coding->produced_char -= n;
6863 }
6864 }
6865
6866
6867
6868
6869
6870 enum { MAX_LOOKUP_MAX = min (INT_MAX, MAX_ALLOCA / sizeof (int)) };
6871
6872
6873
6874
6875
6876 static Lisp_Object
6877 get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
6878 {
6879 Lisp_Object standard, translation_table;
6880 Lisp_Object val;
6881
6882 if (NILP (Venable_character_translation))
6883 {
6884 if (max_lookup)
6885 *max_lookup = 0;
6886 return Qnil;
6887 }
6888 if (encodep)
6889 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6890 standard = Vstandard_translation_table_for_encode;
6891 else
6892 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6893 standard = Vstandard_translation_table_for_decode;
6894 if (NILP (translation_table))
6895 translation_table = standard;
6896 else
6897 {
6898 if (SYMBOLP (translation_table))
6899 translation_table = Fget (translation_table, Qtranslation_table);
6900 else if (CONSP (translation_table))
6901 {
6902 translation_table = Fcopy_sequence (translation_table);
6903 for (val = translation_table; CONSP (val); val = XCDR (val))
6904 if (SYMBOLP (XCAR (val)))
6905 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6906 }
6907 if (CHAR_TABLE_P (standard))
6908 {
6909 if (CONSP (translation_table))
6910 translation_table = nconc2 (translation_table, list1 (standard));
6911 else
6912 translation_table = list2 (translation_table, standard);
6913 }
6914 }
6915
6916 if (max_lookup)
6917 {
6918 *max_lookup = 1;
6919 if (CHAR_TABLE_P (translation_table)
6920 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6921 {
6922 val = XCHAR_TABLE (translation_table)->extras[1];
6923 if (FIXNATP (val) && *max_lookup < XFIXNAT (val))
6924 *max_lookup = min (XFIXNAT (val), MAX_LOOKUP_MAX);
6925 }
6926 else if (CONSP (translation_table))
6927 {
6928 Lisp_Object tail;
6929
6930 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6931 if (CHAR_TABLE_P (XCAR (tail))
6932 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6933 {
6934 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6935 if (FIXNATP (tailval) && *max_lookup < XFIXNAT (tailval))
6936 *max_lookup = min (XFIXNAT (tailval), MAX_LOOKUP_MAX);
6937 }
6938 }
6939 }
6940 return translation_table;
6941 }
6942
6943 #define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6944 do { \
6945 trans = Qnil; \
6946 if (CHAR_TABLE_P (table)) \
6947 { \
6948 trans = CHAR_TABLE_REF (table, c); \
6949 if (CHARACTERP (trans)) \
6950 c = XFIXNAT (trans), trans = Qnil; \
6951 } \
6952 else if (CONSP (table)) \
6953 { \
6954 Lisp_Object tail; \
6955 \
6956 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6957 if (CHAR_TABLE_P (XCAR (tail))) \
6958 { \
6959 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6960 if (CHARACTERP (trans)) \
6961 c = XFIXNAT (trans), trans = Qnil; \
6962 else if (! NILP (trans)) \
6963 break; \
6964 } \
6965 } \
6966 } while (0)
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977 static Lisp_Object
6978 get_translation (Lisp_Object trans, int *buf, int *buf_end, ptrdiff_t *nchars)
6979 {
6980 if (FIXNUMP (trans) || VECTORP (trans))
6981 {
6982 *nchars = 1;
6983 return trans;
6984 }
6985 for (; CONSP (trans); trans = XCDR (trans))
6986 {
6987 Lisp_Object val = XCAR (trans);
6988 Lisp_Object from = XCAR (val);
6989 ptrdiff_t len = ASIZE (from);
6990 ptrdiff_t i;
6991
6992 for (i = 0; i < len; i++)
6993 {
6994 if (buf + i == buf_end)
6995 return Qt;
6996 if (XFIXNUM (AREF (from, i)) != buf[i])
6997 break;
6998 }
6999 if (i == len)
7000 {
7001 *nchars = len;
7002 return XCDR (val);
7003 }
7004 }
7005 return Qnil;
7006 }
7007
7008
7009 static int
7010 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
7011 bool last_block)
7012 {
7013 unsigned char *dst = coding->destination + coding->produced;
7014 unsigned char *dst_end = coding->destination + coding->dst_bytes;
7015 ptrdiff_t produced;
7016 ptrdiff_t produced_chars = 0;
7017 int carryover = 0;
7018
7019 if (! coding->chars_at_source)
7020 {
7021
7022 int *buf = coding->charbuf;
7023 int *buf_end = buf + coding->charbuf_used;
7024
7025 if (EQ (coding->src_object, coding->dst_object)
7026 && ! NILP (coding->dst_object))
7027 {
7028 eassert (growable_destination (coding));
7029 coding_set_source (coding);
7030 dst_end = ((unsigned char *) coding->source) + coding->consumed;
7031 }
7032
7033 while (buf < buf_end)
7034 {
7035 int c = *buf;
7036 ptrdiff_t i;
7037
7038 if (c >= 0)
7039 {
7040 ptrdiff_t from_nchars = 1, to_nchars = 1;
7041 Lisp_Object trans = Qnil;
7042
7043 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7044 if (! NILP (trans))
7045 {
7046 trans = get_translation (trans, buf, buf_end, &from_nchars);
7047 if (FIXNUMP (trans))
7048 c = XFIXNUM (trans);
7049 else if (VECTORP (trans))
7050 {
7051 to_nchars = ASIZE (trans);
7052 c = XFIXNUM (AREF (trans, 0));
7053 }
7054 else if (EQ (trans, Qt) && ! last_block)
7055 break;
7056 }
7057
7058 if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
7059 {
7060 eassert (growable_destination (coding));
7061 ptrdiff_t dst_size;
7062 if (ckd_mul (&dst_size, to_nchars, MAX_MULTIBYTE_LENGTH)
7063 || ckd_add (&dst_size, dst_size, buf_end - buf))
7064 memory_full (SIZE_MAX);
7065 dst = alloc_destination (coding, dst_size, dst);
7066 if (EQ (coding->src_object, coding->dst_object))
7067 {
7068 coding_set_source (coding);
7069 dst_end = (((unsigned char *) coding->source)
7070 + coding->consumed);
7071 }
7072 else
7073 dst_end = coding->destination + coding->dst_bytes;
7074 }
7075
7076 for (i = 0; i < to_nchars; i++)
7077 {
7078 if (i > 0)
7079 c = XFIXNUM (AREF (trans, i));
7080 if (coding->dst_multibyte
7081 || ! CHAR_BYTE8_P (c))
7082 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
7083 else
7084 *dst++ = CHAR_TO_BYTE8 (c);
7085 }
7086 produced_chars += to_nchars;
7087 buf += from_nchars;
7088 }
7089 else
7090
7091 buf += -c;
7092 }
7093 carryover = buf_end - buf;
7094 }
7095 else
7096 {
7097
7098 const unsigned char *src = coding->source;
7099 const unsigned char *src_end = src + coding->consumed;
7100
7101 if (EQ (coding->dst_object, coding->src_object))
7102 {
7103 eassert (growable_destination (coding));
7104 dst_end = (unsigned char *) src;
7105 }
7106 if (coding->src_multibyte != coding->dst_multibyte)
7107 {
7108 if (coding->src_multibyte)
7109 {
7110 bool multibytep = 1;
7111 ptrdiff_t consumed_chars = 0;
7112
7113 while (1)
7114 {
7115 const unsigned char *src_base = src;
7116 int c;
7117
7118 ONE_MORE_BYTE (c);
7119 if (dst == dst_end)
7120 {
7121 eassert (growable_destination (coding));
7122 if (EQ (coding->src_object, coding->dst_object))
7123 dst_end = (unsigned char *) src;
7124 if (dst == dst_end)
7125 {
7126 ptrdiff_t offset = src - coding->source;
7127
7128 dst = alloc_destination (coding, src_end - src + 1,
7129 dst);
7130 dst_end = coding->destination + coding->dst_bytes;
7131 coding_set_source (coding);
7132 src = coding->source + offset;
7133 src_end = coding->source + coding->consumed;
7134 if (EQ (coding->src_object, coding->dst_object))
7135 dst_end = (unsigned char *) src;
7136 }
7137 }
7138 *dst++ = c;
7139 produced_chars++;
7140 }
7141 no_more_source:
7142 ;
7143 }
7144 else
7145 while (src < src_end)
7146 {
7147 bool multibytep = 1;
7148 int c = *src++;
7149
7150 if (dst >= dst_end - 1)
7151 {
7152 eassert (growable_destination (coding));
7153 if (EQ (coding->src_object, coding->dst_object))
7154 dst_end = (unsigned char *) src;
7155 if (dst >= dst_end - 1)
7156 {
7157 ptrdiff_t offset = src - coding->source;
7158 ptrdiff_t more_bytes;
7159
7160 if (EQ (coding->src_object, coding->dst_object))
7161 more_bytes = ((src_end - src) / 2) + 2;
7162 else
7163 more_bytes = src_end - src + 2;
7164 dst = alloc_destination (coding, more_bytes, dst);
7165 dst_end = coding->destination + coding->dst_bytes;
7166 coding_set_source (coding);
7167 src = coding->source + offset;
7168 src_end = coding->source + coding->consumed;
7169 if (EQ (coding->src_object, coding->dst_object))
7170 dst_end = (unsigned char *) src;
7171 }
7172 }
7173 EMIT_ONE_BYTE (c);
7174 }
7175 }
7176 else
7177 {
7178 if (!EQ (coding->src_object, coding->dst_object))
7179 {
7180 ptrdiff_t require = coding->src_bytes - coding->dst_bytes;
7181
7182 if (require > 0)
7183 {
7184 ptrdiff_t offset = src - coding->source;
7185
7186 dst = alloc_destination (coding, require, dst);
7187 coding_set_source (coding);
7188 src = coding->source + offset;
7189 src_end = coding->source + coding->consumed;
7190 }
7191 }
7192 produced_chars = coding->consumed_char;
7193 while (src < src_end)
7194 *dst++ = *src++;
7195 }
7196 }
7197
7198 produced = dst - (coding->destination + coding->produced);
7199 if (BUFFERP (coding->dst_object) && produced_chars > 0)
7200 insert_from_gap (produced_chars, produced, 0);
7201 coding->produced += produced;
7202 coding->produced_char += produced_chars;
7203 return carryover;
7204 }
7205
7206
7207
7208
7209
7210
7211 static void
7212 produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
7213 {
7214 int len;
7215 ptrdiff_t to;
7216 enum composition_method method;
7217 Lisp_Object components;
7218
7219 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
7220 to = pos + charbuf[2];
7221 method = (enum composition_method) (charbuf[4]);
7222
7223 if (method == COMPOSITION_RELATIVE)
7224 components = Qnil;
7225 else
7226 {
7227 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
7228 int i, j;
7229
7230 if (method == COMPOSITION_WITH_RULE)
7231 len = charbuf[2] * 3 - 2;
7232 charbuf += MAX_ANNOTATION_LENGTH;
7233
7234 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
7235 {
7236 if (charbuf[i] >= 0)
7237 args[j] = make_fixnum (charbuf[i]);
7238 else
7239 {
7240 i++;
7241 args[j] = make_fixnum (charbuf[i] % 0x100);
7242 }
7243 }
7244 components = (i == j ? Fstring (j, args) : Fvector (j, args));
7245 }
7246 compose_text (pos, to, components, Qnil, coding->dst_object);
7247 }
7248
7249
7250
7251
7252
7253
7254
7255 static void
7256 produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
7257 {
7258 ptrdiff_t from = pos - charbuf[2];
7259 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7260
7261 Fput_text_property (make_fixnum (from), make_fixnum (pos),
7262 Qcharset, CHARSET_NAME (charset),
7263 coding->dst_object);
7264 }
7265
7266 #define MAX_CHARBUF_SIZE 0x4000
7267
7268
7269
7270 #define MAX_CHARBUF_EXTRA_SIZE ((MAX_ANNOTATION_LENGTH * 3) + 1)
7271
7272 #define ALLOC_CONVERSION_WORK_AREA(coding, size) \
7273 do { \
7274 ptrdiff_t units = min ((size) + MAX_CHARBUF_EXTRA_SIZE, \
7275 MAX_CHARBUF_SIZE); \
7276 coding->charbuf = SAFE_ALLOCA (units * sizeof (int)); \
7277 coding->charbuf_size = units; \
7278 } while (0)
7279
7280 static void
7281 produce_annotation (struct coding_system *coding, ptrdiff_t pos)
7282 {
7283 int *charbuf = coding->charbuf;
7284 int *charbuf_end = charbuf + coding->charbuf_used;
7285
7286 if (NILP (coding->dst_object))
7287 return;
7288
7289 while (charbuf < charbuf_end)
7290 {
7291 if (*charbuf >= 0)
7292 pos++, charbuf++;
7293 else
7294 {
7295 int len = -*charbuf;
7296
7297 if (len > 2)
7298 switch (charbuf[1])
7299 {
7300 case CODING_ANNOTATE_COMPOSITION_MASK:
7301 produce_composition (coding, charbuf, pos);
7302 break;
7303 case CODING_ANNOTATE_CHARSET_MASK:
7304 produce_charset (coding, charbuf, pos);
7305 break;
7306 default:
7307 break;
7308 }
7309 charbuf += len;
7310 }
7311 }
7312 }
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341 static void
7342 decode_coding (struct coding_system *coding)
7343 {
7344 Lisp_Object attrs;
7345 Lisp_Object undo_list;
7346 Lisp_Object translation_table;
7347 struct ccl_spec cclspec;
7348 int carryover;
7349 int i;
7350
7351 USE_SAFE_ALLOCA;
7352
7353 if (BUFFERP (coding->src_object)
7354 && coding->src_pos > 0
7355 && coding->src_pos < GPT
7356 && coding->src_pos + coding->src_chars > GPT)
7357 move_gap_both (coding->src_pos, coding->src_pos_byte);
7358
7359 undo_list = Qt;
7360 if (BUFFERP (coding->dst_object))
7361 {
7362 set_buffer_internal (XBUFFER (coding->dst_object));
7363 if (GPT != PT)
7364 move_gap_both (PT, PT_BYTE);
7365
7366
7367
7368
7369
7370
7371 if (MODIFF <= SAVE_MODIFF)
7372 record_first_change ();
7373
7374 undo_list = BVAR (current_buffer, undo_list);
7375 bset_undo_list (current_buffer, Qt);
7376 }
7377
7378 coding->consumed = coding->consumed_char = 0;
7379 coding->produced = coding->produced_char = 0;
7380 coding->chars_at_source = 0;
7381 record_conversion_result (coding, CODING_RESULT_SUCCESS);
7382
7383 ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
7384
7385 attrs = CODING_ID_ATTRS (coding->id);
7386 translation_table = get_translation_table (attrs, 0, NULL);
7387
7388 carryover = 0;
7389 if (coding->decoder == decode_coding_ccl)
7390 {
7391 coding->spec.ccl = &cclspec;
7392 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7393 }
7394 do
7395 {
7396 ptrdiff_t pos = coding->dst_pos + coding->produced_char;
7397
7398 coding_set_source (coding);
7399 coding->annotated = 0;
7400 coding->charbuf_used = carryover;
7401 (*(coding->decoder)) (coding);
7402 coding_set_destination (coding);
7403 carryover = produce_chars (coding, translation_table, 0);
7404 if (coding->annotated)
7405 produce_annotation (coding, pos);
7406 for (i = 0; i < carryover; i++)
7407 coding->charbuf[i]
7408 = coding->charbuf[coding->charbuf_used - carryover + i];
7409 }
7410 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7411 || (coding->consumed < coding->src_bytes
7412 && (coding->result == CODING_RESULT_SUCCESS
7413 || coding->result == CODING_RESULT_INVALID_SRC)));
7414
7415 if (carryover > 0)
7416 {
7417 coding_set_destination (coding);
7418 coding->charbuf_used = carryover;
7419 produce_chars (coding, translation_table, 1);
7420 }
7421
7422 coding->carryover_bytes = 0;
7423 if (coding->consumed < coding->src_bytes)
7424 {
7425 ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
7426 const unsigned char *src;
7427
7428 coding_set_source (coding);
7429 coding_set_destination (coding);
7430 src = coding->source + coding->consumed;
7431
7432 if (coding->mode & CODING_MODE_LAST_BLOCK)
7433 {
7434
7435
7436
7437 coding->charbuf_used = 0;
7438 coding->chars_at_source = 0;
7439
7440 while (nbytes-- > 0)
7441 {
7442 int c;
7443
7444
7445
7446 if (coding->src_multibyte
7447 && CHAR_BYTE8_HEAD_P (*src) && nbytes > 0)
7448 {
7449 c = string_char_advance (&src);
7450 nbytes--;
7451 }
7452 else
7453 {
7454 c = *src++;
7455
7456 if (c & 0x80)
7457 c = BYTE8_TO_CHAR (c);
7458 }
7459 coding->charbuf[coding->charbuf_used++] = c;
7460 }
7461 produce_chars (coding, Qnil, 1);
7462 }
7463 else
7464 {
7465
7466
7467
7468 unsigned char *p = coding->carryover;
7469
7470 if (nbytes > sizeof coding->carryover)
7471 nbytes = sizeof coding->carryover;
7472 coding->carryover_bytes = nbytes;
7473 while (nbytes-- > 0)
7474 *p++ = *src++;
7475 }
7476 coding->consumed = coding->src_bytes;
7477 }
7478
7479 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7480 && !inhibit_eol_conversion)
7481 decode_eol (coding);
7482 if (BUFFERP (coding->dst_object))
7483 {
7484 bset_undo_list (current_buffer, undo_list);
7485 record_insert (coding->dst_pos, coding->produced_char);
7486 }
7487
7488 SAFE_FREE ();
7489 }
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502 static int *
7503 handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7504 struct coding_system *coding, int *buf,
7505 ptrdiff_t *stop)
7506 {
7507 ptrdiff_t start, end;
7508 Lisp_Object prop;
7509
7510 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7511 || end > limit)
7512 *stop = limit;
7513 else if (start > pos)
7514 *stop = start;
7515 else
7516 {
7517 if (start == pos)
7518 {
7519
7520
7521 int *head = buf;
7522 enum composition_method method = composition_method (prop);
7523 int nchars = COMPOSITION_LENGTH (prop);
7524
7525 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7526 if (method != COMPOSITION_RELATIVE)
7527 {
7528 Lisp_Object components;
7529 ptrdiff_t i, len, i_byte;
7530
7531 components = COMPOSITION_COMPONENTS (prop);
7532 if (VECTORP (components))
7533 {
7534 len = ASIZE (components);
7535 for (i = 0; i < len; i++)
7536 *buf++ = XFIXNUM (AREF (components, i));
7537 }
7538 else if (STRINGP (components))
7539 {
7540 len = SCHARS (components);
7541 i = i_byte = 0;
7542 while (i < len)
7543 *buf++ = fetch_string_char_advance (components,
7544 &i, &i_byte);
7545 }
7546 else if (FIXNUMP (components))
7547 {
7548 len = 1;
7549 *buf++ = XFIXNUM (components);
7550 }
7551 else if (CONSP (components))
7552 {
7553 for (len = 0; CONSP (components);
7554 len++, components = XCDR (components))
7555 *buf++ = XFIXNUM (XCAR (components));
7556 }
7557 else
7558 emacs_abort ();
7559 *head -= len;
7560 }
7561 }
7562
7563 if (find_composition (end, limit, &start, &end, &prop,
7564 coding->src_object)
7565 && end <= limit)
7566 *stop = start;
7567 else
7568 *stop = limit;
7569 }
7570 return buf;
7571 }
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583 static int *
7584 handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
7585 struct coding_system *coding, int *buf,
7586 ptrdiff_t *stop)
7587 {
7588 Lisp_Object val, next;
7589 int id;
7590
7591 val = Fget_text_property (make_fixnum (pos), Qcharset, coding->src_object);
7592 if (! NILP (val) && CHARSETP (val))
7593 id = XFIXNUM (CHARSET_SYMBOL_ID (val));
7594 else
7595 id = -1;
7596 ADD_CHARSET_DATA (buf, 0, id);
7597 next = Fnext_single_property_change (make_fixnum (pos), Qcharset,
7598 coding->src_object,
7599 make_fixnum (limit));
7600 *stop = XFIXNUM (next);
7601 return buf;
7602 }
7603
7604
7605 static void
7606 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7607 int max_lookup)
7608 {
7609 int *buf = coding->charbuf;
7610 int *buf_end = coding->charbuf + coding->charbuf_size;
7611 const unsigned char *src = coding->source + coding->consumed;
7612 const unsigned char *src_end = coding->source + coding->src_bytes;
7613 ptrdiff_t pos = coding->src_pos + coding->consumed_char;
7614 ptrdiff_t end_pos = coding->src_pos + coding->src_chars;
7615 bool multibytep = coding->src_multibyte;
7616 Lisp_Object eol_type;
7617 int c;
7618 ptrdiff_t stop, stop_composition, stop_charset;
7619 int *lookup_buf = NULL;
7620
7621 if (! NILP (translation_table))
7622 lookup_buf = alloca (sizeof (int) * max_lookup);
7623
7624 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7625 if (VECTORP (eol_type))
7626 eol_type = Qunix;
7627
7628
7629 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7630
7631 if (NILP (coding->src_object))
7632 stop = stop_composition = stop_charset = end_pos;
7633 else
7634 {
7635 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7636 stop = stop_composition = pos;
7637 else
7638 stop = stop_composition = end_pos;
7639 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7640 stop = stop_charset = pos;
7641 else
7642 stop_charset = end_pos;
7643 }
7644
7645
7646 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7647 while (buf < buf_end)
7648 {
7649 Lisp_Object trans;
7650
7651 if (pos == stop)
7652 {
7653 if (pos == end_pos)
7654 break;
7655 if (pos == stop_composition)
7656 buf = handle_composition_annotation (pos, end_pos, coding,
7657 buf, &stop_composition);
7658 if (pos == stop_charset)
7659 buf = handle_charset_annotation (pos, end_pos, coding,
7660 buf, &stop_charset);
7661 stop = (stop_composition < stop_charset
7662 ? stop_composition : stop_charset);
7663 }
7664
7665 if (! multibytep)
7666 {
7667 if (coding->encoder == encode_coding_raw_text
7668 || coding->encoder == encode_coding_ccl)
7669 c = *src++, pos++;
7670 else
7671 {
7672 int bytes = multibyte_length (src, src_end, true, true);
7673 if (0 < bytes)
7674 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7675 else
7676 c = BYTE8_TO_CHAR (*src), src++, pos++;
7677 }
7678 }
7679 else
7680 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7681 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7682 c = '\n';
7683 if (! EQ (eol_type, Qunix))
7684 {
7685 if (c == '\n')
7686 {
7687 if (EQ (eol_type, Qdos))
7688 *buf++ = '\r';
7689 else
7690 c = '\r';
7691 }
7692 }
7693
7694 trans = Qnil;
7695 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7696 if (NILP (trans))
7697 *buf++ = c;
7698 else
7699 {
7700 ptrdiff_t from_nchars = 1, to_nchars = 1;
7701 int *lookup_buf_end;
7702 const unsigned char *p = src;
7703 int i;
7704
7705 lookup_buf[0] = c;
7706 for (i = 1; i < max_lookup && p < src_end; i++)
7707 lookup_buf[i] = string_char_advance (&p);
7708 lookup_buf_end = lookup_buf + i;
7709 trans = get_translation (trans, lookup_buf, lookup_buf_end,
7710 &from_nchars);
7711 if (FIXNUMP (trans))
7712 c = XFIXNUM (trans);
7713 else if (VECTORP (trans))
7714 {
7715 to_nchars = ASIZE (trans);
7716 if (buf_end - buf < to_nchars)
7717 break;
7718 c = XFIXNUM (AREF (trans, 0));
7719 }
7720 else
7721 break;
7722 *buf++ = c;
7723 for (i = 1; i < to_nchars; i++)
7724 *buf++ = XFIXNUM (AREF (trans, i));
7725 for (i = 1; i < from_nchars; i++, pos++)
7726 src += multibyte_length (src, NULL, false, true);
7727 }
7728 }
7729
7730 coding->consumed = src - coding->source;
7731 coding->consumed_char = pos - coding->src_pos;
7732 coding->charbuf_used = buf - coding->charbuf;
7733 coding->chars_at_source = 0;
7734 }
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758 static void
7759 encode_coding (struct coding_system *coding)
7760 {
7761 Lisp_Object attrs;
7762 Lisp_Object translation_table;
7763 int max_lookup;
7764 struct ccl_spec cclspec;
7765
7766 USE_SAFE_ALLOCA;
7767
7768 attrs = CODING_ID_ATTRS (coding->id);
7769 if (coding->encoder == encode_coding_raw_text)
7770 translation_table = Qnil, max_lookup = 0;
7771 else
7772 translation_table = get_translation_table (attrs, 1, &max_lookup);
7773
7774 if (BUFFERP (coding->dst_object))
7775 {
7776 set_buffer_internal (XBUFFER (coding->dst_object));
7777 coding->dst_multibyte
7778 = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7779 }
7780
7781 coding->consumed = coding->consumed_char = 0;
7782 coding->produced = coding->produced_char = 0;
7783 record_conversion_result (coding, CODING_RESULT_SUCCESS);
7784
7785 ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
7786
7787 if (coding->encoder == encode_coding_ccl)
7788 {
7789 coding->spec.ccl = &cclspec;
7790 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7791 }
7792 do {
7793 coding_set_source (coding);
7794 consume_chars (coding, translation_table, max_lookup);
7795 coding_set_destination (coding);
7796
7797
7798 unsigned saved_mode = coding->mode;
7799 if (coding->consumed_char < coding->src_chars)
7800 coding->mode &= ~CODING_MODE_LAST_BLOCK;
7801 (*(coding->encoder)) (coding);
7802 coding->mode = saved_mode;
7803 } while (coding->consumed_char < coding->src_chars);
7804
7805 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7806 insert_from_gap (coding->produced_char, coding->produced, 0);
7807
7808 SAFE_FREE ();
7809 }
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824 static Lisp_Object Vcode_conversion_workbuf_name;
7825
7826
7827 static Lisp_Object Vcode_conversion_reused_workbuf;
7828
7829
7830 static bool reused_workbuf_in_use;
7831
7832 static void
7833 code_conversion_restore (Lisp_Object arg)
7834 {
7835 Lisp_Object current, workbuf;
7836
7837 current = XCAR (arg);
7838 workbuf = XCDR (arg);
7839 if (! NILP (workbuf))
7840 {
7841 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7842 reused_workbuf_in_use = false;
7843 else
7844 Fkill_buffer (workbuf);
7845 }
7846 set_buffer_internal (XBUFFER (current));
7847 }
7848
7849 Lisp_Object
7850 code_conversion_save (bool with_work_buf, bool multibyte)
7851 {
7852 Lisp_Object workbuf = Qnil;
7853
7854 if (with_work_buf)
7855 {
7856 if (reused_workbuf_in_use)
7857 {
7858 Lisp_Object name
7859 = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7860 workbuf = Fget_buffer_create (name, Qt);
7861 }
7862 else
7863 {
7864 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7865 Vcode_conversion_reused_workbuf
7866 = Fget_buffer_create (Vcode_conversion_workbuf_name, Qt);
7867 workbuf = Vcode_conversion_reused_workbuf;
7868 }
7869 }
7870 record_unwind_protect (code_conversion_restore,
7871 Fcons (Fcurrent_buffer (), workbuf));
7872 if (!NILP (workbuf))
7873 {
7874 struct buffer *current = current_buffer;
7875 set_buffer_internal (XBUFFER (workbuf));
7876
7877
7878
7879 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7880 Ferase_buffer ();
7881 bset_undo_list (current_buffer, Qt);
7882 bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil);
7883 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7884 reused_workbuf_in_use = true;
7885 set_buffer_internal (current);
7886 }
7887
7888 return workbuf;
7889 }
7890
7891 static void
7892 coding_restore_undo_list (Lisp_Object arg)
7893 {
7894 Lisp_Object undo_list = XCAR (arg);
7895 struct buffer *buf = XBUFFER (XCDR (arg));
7896
7897 bset_undo_list (buf, undo_list);
7898 }
7899
7900
7901 void
7902 decode_coding_gap (struct coding_system *coding, ptrdiff_t bytes)
7903 {
7904 specpdl_ref count = SPECPDL_INDEX ();
7905 Lisp_Object attrs;
7906
7907 eassert (GPT_BYTE == PT_BYTE);
7908
7909 coding->src_object = Fcurrent_buffer ();
7910 coding->src_chars = bytes;
7911 coding->src_bytes = bytes;
7912 coding->src_pos = -bytes;
7913 coding->src_pos_byte = -bytes;
7914 coding->src_multibyte = false;
7915 coding->dst_object = coding->src_object;
7916 coding->dst_pos = PT;
7917 coding->dst_pos_byte = PT_BYTE;
7918 eassert (coding->dst_multibyte
7919 == !NILP (BVAR (current_buffer, enable_multibyte_characters)));
7920
7921 coding->head_ascii = -1;
7922 coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
7923 coding->eol_seen = EOL_SEEN_NONE;
7924 if (CODING_REQUIRE_DETECTION (coding))
7925 detect_coding (coding);
7926 attrs = CODING_ID_ATTRS (coding->id);
7927 if (! disable_ascii_optimization
7928 && ! coding->src_multibyte
7929 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
7930 && NILP (CODING_ATTR_POST_READ (attrs))
7931 && NILP (get_translation_table (attrs, 0, NULL)))
7932 {
7933 ptrdiff_t chars = coding->head_ascii;
7934 if (chars < 0)
7935 chars = check_ascii (coding);
7936 if (chars != bytes)
7937 {
7938
7939 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
7940 && coding->detected_utf8_bytes == coding->src_bytes)
7941 {
7942 if (coding->detected_utf8_chars >= 0)
7943 chars = coding->detected_utf8_chars;
7944 else
7945 chars = check_utf_8 (coding);
7946 if (CODING_UTF_8_BOM (coding) != utf_without_bom
7947 && coding->head_ascii == 0
7948 && coding->source[0] == UTF_8_BOM_1
7949 && coding->source[1] == UTF_8_BOM_2
7950 && coding->source[2] == UTF_8_BOM_3)
7951 {
7952 chars--;
7953 bytes -= 3;
7954 coding->src_bytes -= 3;
7955 }
7956 }
7957 else
7958 chars = -1;
7959 }
7960 if (chars >= 0)
7961 {
7962 Lisp_Object eol_type;
7963
7964 eol_type = CODING_ID_EOL_TYPE (coding->id);
7965 if (VECTORP (eol_type))
7966 {
7967 if (coding->eol_seen != EOL_SEEN_NONE)
7968 eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
7969 }
7970 if (EQ (eol_type, Qmac))
7971 {
7972 unsigned char *src_end = GAP_END_ADDR;
7973 unsigned char *src = src_end - coding->src_bytes;
7974
7975 while (src < src_end)
7976 {
7977 if (*src++ == '\r')
7978 src[-1] = '\n';
7979 }
7980 }
7981 else if (EQ (eol_type, Qdos))
7982 {
7983 unsigned char *src = GAP_END_ADDR;
7984 unsigned char *src_beg = src - coding->src_bytes;
7985 unsigned char *dst = src;
7986 ptrdiff_t diff;
7987
7988 while (src_beg < src)
7989 {
7990 *--dst = *--src;
7991 if (*src == '\n' && src > src_beg && src[-1] == '\r')
7992 src--;
7993 }
7994 diff = dst - src;
7995 bytes -= diff;
7996 chars -= diff;
7997 }
7998 coding->produced = bytes;
7999 coding->produced_char = chars;
8000 insert_from_gap (chars, bytes, 1);
8001 return;
8002 }
8003 }
8004 code_conversion_save (0, 0);
8005
8006 coding->mode |= CODING_MODE_LAST_BLOCK;
8007 current_buffer->text->inhibit_shrinking = 1;
8008 decode_coding (coding);
8009 current_buffer->text->inhibit_shrinking = 0;
8010
8011 if (! NILP (CODING_ATTR_POST_READ (attrs)))
8012 {
8013 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
8014 Lisp_Object val;
8015 Lisp_Object undo_list = BVAR (current_buffer, undo_list);
8016
8017 record_unwind_protect (coding_restore_undo_list,
8018 Fcons (undo_list, Fcurrent_buffer ()));
8019 bset_undo_list (current_buffer, Qt);
8020 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
8021 val = call1 (CODING_ATTR_POST_READ (attrs),
8022 make_fixnum (coding->produced_char));
8023 CHECK_FIXNAT (val);
8024 coding->produced_char += Z - prev_Z;
8025 coding->produced += Z_BYTE - prev_Z_BYTE;
8026 }
8027
8028 unbind_to (count, Qnil);
8029 }
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061 void
8062 decode_coding_object (struct coding_system *coding,
8063 Lisp_Object src_object,
8064 ptrdiff_t from, ptrdiff_t from_byte,
8065 ptrdiff_t to, ptrdiff_t to_byte,
8066 Lisp_Object dst_object)
8067 {
8068 specpdl_ref count = SPECPDL_INDEX ();
8069 unsigned char *destination UNINIT;
8070 ptrdiff_t dst_bytes UNINIT;
8071 ptrdiff_t chars = to - from;
8072 ptrdiff_t bytes = to_byte - from_byte;
8073 Lisp_Object attrs;
8074 ptrdiff_t saved_pt = -1, saved_pt_byte UNINIT;
8075 bool need_marker_adjustment = 0;
8076 Lisp_Object old_deactivate_mark;
8077
8078 old_deactivate_mark = Vdeactivate_mark;
8079
8080 if (NILP (dst_object))
8081 {
8082 destination = coding->destination;
8083 dst_bytes = coding->dst_bytes;
8084 }
8085
8086 coding->src_object = src_object;
8087 coding->src_chars = chars;
8088 coding->src_bytes = bytes;
8089 coding->src_multibyte = chars < bytes;
8090
8091 if (STRINGP (src_object))
8092 {
8093 coding->src_pos = from;
8094 coding->src_pos_byte = from_byte;
8095 }
8096 else if (BUFFERP (src_object))
8097 {
8098 set_buffer_internal (XBUFFER (src_object));
8099 if (from != GPT)
8100 move_gap_both (from, from_byte);
8101 if (EQ (src_object, dst_object))
8102 {
8103 struct Lisp_Marker *tail;
8104
8105 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8106 {
8107 tail->need_adjustment
8108 = tail->charpos == (tail->insertion_type ? from : to);
8109 need_marker_adjustment |= tail->need_adjustment;
8110 }
8111 saved_pt = PT, saved_pt_byte = PT_BYTE;
8112 TEMP_SET_PT_BOTH (from, from_byte);
8113 current_buffer->text->inhibit_shrinking = 1;
8114 del_range_both (from, from_byte, to, to_byte, 1);
8115 coding->src_pos = -chars;
8116 coding->src_pos_byte = -bytes;
8117 }
8118 else
8119 {
8120 coding->src_pos = from;
8121 coding->src_pos_byte = from_byte;
8122 }
8123 }
8124
8125 if (CODING_REQUIRE_DETECTION (coding))
8126 detect_coding (coding);
8127 attrs = CODING_ID_ATTRS (coding->id);
8128
8129 if (EQ (dst_object, Qt)
8130 || (! NILP (CODING_ATTR_POST_READ (attrs))
8131 && NILP (dst_object)))
8132 {
8133 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
8134 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
8135 coding->dst_pos = BEG;
8136 coding->dst_pos_byte = BEG_BYTE;
8137 }
8138 else if (BUFFERP (dst_object))
8139 {
8140 code_conversion_save (0, 0);
8141 coding->dst_object = dst_object;
8142 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
8143 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
8144 coding->dst_multibyte
8145 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
8146 }
8147 else
8148 {
8149 code_conversion_save (0, 0);
8150 coding->dst_object = Qnil;
8151
8152
8153
8154 coding->dst_multibyte = 1;
8155 }
8156
8157 decode_coding (coding);
8158
8159 if (BUFFERP (coding->dst_object))
8160 set_buffer_internal (XBUFFER (coding->dst_object));
8161
8162 if (! NILP (CODING_ATTR_POST_READ (attrs)))
8163 {
8164 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
8165 Lisp_Object val;
8166 Lisp_Object undo_list = BVAR (current_buffer, undo_list);
8167 specpdl_ref count1 = SPECPDL_INDEX ();
8168
8169 record_unwind_protect (coding_restore_undo_list,
8170 Fcons (undo_list, Fcurrent_buffer ()));
8171 bset_undo_list (current_buffer, Qt);
8172 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
8173 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
8174 make_fixnum (coding->produced_char));
8175 CHECK_FIXNAT (val);
8176 coding->produced_char += Z - prev_Z;
8177 coding->produced += Z_BYTE - prev_Z_BYTE;
8178 unbind_to (count1, Qnil);
8179 }
8180
8181 if (EQ (dst_object, Qt))
8182 {
8183 coding->dst_object = Fbuffer_string ();
8184 }
8185 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
8186 {
8187 set_buffer_internal (XBUFFER (coding->dst_object));
8188 if (dst_bytes < coding->produced)
8189 {
8190 eassert (coding->produced > 0);
8191 destination = xrealloc (destination, coding->produced);
8192 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
8193 move_gap_both (BEGV, BEGV_BYTE);
8194 memcpy (destination, BEGV_ADDR, coding->produced);
8195 coding->destination = destination;
8196 }
8197 }
8198
8199 if (saved_pt >= 0)
8200 {
8201
8202
8203
8204
8205 set_buffer_internal (XBUFFER (src_object));
8206 current_buffer->text->inhibit_shrinking = 0;
8207 if (saved_pt < from)
8208 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8209 else if (saved_pt < from + chars)
8210 TEMP_SET_PT_BOTH (from, from_byte);
8211 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8212 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8213 saved_pt_byte + (coding->produced - bytes));
8214 else
8215 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8216 saved_pt_byte + (coding->produced - bytes));
8217
8218 if (need_marker_adjustment)
8219 {
8220 struct Lisp_Marker *tail;
8221
8222 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8223 if (tail->need_adjustment)
8224 {
8225 tail->need_adjustment = 0;
8226 if (tail->insertion_type)
8227 {
8228 tail->bytepos = from_byte;
8229 tail->charpos = from;
8230 }
8231 else
8232 {
8233 tail->bytepos = from_byte + coding->produced;
8234 tail->charpos
8235 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8236 ? tail->bytepos : from + coding->produced_char);
8237 }
8238 }
8239 }
8240 }
8241
8242 Vdeactivate_mark = old_deactivate_mark;
8243 unbind_to (count, coding->dst_object);
8244 }
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280 void
8281 encode_coding_object (struct coding_system *coding,
8282 Lisp_Object src_object,
8283 ptrdiff_t from, ptrdiff_t from_byte,
8284 ptrdiff_t to, ptrdiff_t to_byte,
8285 Lisp_Object dst_object)
8286 {
8287 specpdl_ref count = SPECPDL_INDEX ();
8288 ptrdiff_t chars = to - from;
8289 ptrdiff_t bytes = to_byte - from_byte;
8290 Lisp_Object attrs;
8291 ptrdiff_t saved_pt = -1, saved_pt_byte UNINIT;
8292 bool need_marker_adjustment = 0;
8293 bool kill_src_buffer = 0;
8294 Lisp_Object old_deactivate_mark;
8295
8296 old_deactivate_mark = Vdeactivate_mark;
8297
8298 coding->src_object = src_object;
8299 coding->src_chars = chars;
8300 coding->src_bytes = bytes;
8301 coding->src_multibyte = chars < bytes;
8302
8303 attrs = CODING_ID_ATTRS (coding->id);
8304
8305 bool same_buffer = false;
8306 if (BASE_EQ (src_object, dst_object) && BUFFERP (src_object))
8307 {
8308 struct Lisp_Marker *tail;
8309
8310 same_buffer = true;
8311
8312 for (tail = BUF_MARKERS (XBUFFER (src_object)); tail; tail = tail->next)
8313 {
8314 tail->need_adjustment
8315 = tail->charpos == (tail->insertion_type ? from : to);
8316 need_marker_adjustment |= tail->need_adjustment;
8317 }
8318 }
8319
8320 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
8321 {
8322 coding->src_object = code_conversion_save (1, coding->src_multibyte);
8323 set_buffer_internal (XBUFFER (coding->src_object));
8324 if (STRINGP (src_object))
8325 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
8326 else if (BUFFERP (src_object))
8327 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
8328 else
8329 insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
8330
8331 if (same_buffer)
8332 {
8333 set_buffer_internal (XBUFFER (src_object));
8334 saved_pt = PT, saved_pt_byte = PT_BYTE;
8335 del_range_both (from, from_byte, to, to_byte, 1);
8336 set_buffer_internal (XBUFFER (coding->src_object));
8337 }
8338
8339 safe_call2 (CODING_ATTR_PRE_WRITE (attrs),
8340 make_fixnum (BEG), make_fixnum (Z));
8341 if (XBUFFER (coding->src_object) != current_buffer)
8342 kill_src_buffer = 1;
8343 coding->src_object = Fcurrent_buffer ();
8344 if (BEG != GPT)
8345 move_gap_both (BEG, BEG_BYTE);
8346 coding->src_chars = Z - BEG;
8347 coding->src_bytes = Z_BYTE - BEG_BYTE;
8348 coding->src_pos = BEG;
8349 coding->src_pos_byte = BEG_BYTE;
8350 coding->src_multibyte = Z < Z_BYTE;
8351 }
8352 else if (STRINGP (src_object))
8353 {
8354 code_conversion_save (0, 0);
8355 coding->src_pos = from;
8356 coding->src_pos_byte = from_byte;
8357 }
8358 else if (BUFFERP (src_object))
8359 {
8360 code_conversion_save (0, 0);
8361 set_buffer_internal (XBUFFER (src_object));
8362 if (same_buffer)
8363 {
8364 saved_pt = PT, saved_pt_byte = PT_BYTE;
8365 coding->src_object = del_range_1 (from, to, 1, 1);
8366 coding->src_pos = 0;
8367 coding->src_pos_byte = 0;
8368 }
8369 else
8370 {
8371 if (from < GPT && to >= GPT)
8372 move_gap_both (from, from_byte);
8373 coding->src_pos = from;
8374 coding->src_pos_byte = from_byte;
8375 }
8376 }
8377 else
8378 {
8379 code_conversion_save (0, 0);
8380 coding->src_pos = from;
8381 coding->src_pos_byte = from_byte;
8382 }
8383
8384 if (BUFFERP (dst_object))
8385 {
8386 coding->dst_object = dst_object;
8387 if (BASE_EQ (src_object, dst_object))
8388 {
8389 coding->dst_pos = from;
8390 coding->dst_pos_byte = from_byte;
8391 }
8392 else
8393 {
8394 struct buffer *current = current_buffer;
8395
8396 set_buffer_temp (XBUFFER (dst_object));
8397 coding->dst_pos = PT;
8398 coding->dst_pos_byte = PT_BYTE;
8399 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8400 set_buffer_temp (current);
8401 }
8402 coding->dst_multibyte
8403 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
8404 }
8405 else if (EQ (dst_object, Qt))
8406 {
8407 ptrdiff_t dst_bytes = max (1, coding->src_chars);
8408 coding->dst_object = Qnil;
8409 coding->destination = xmalloc (dst_bytes);
8410 coding->dst_bytes = dst_bytes;
8411 coding->dst_multibyte = 0;
8412 }
8413 else
8414 {
8415 coding->dst_object = Qnil;
8416 coding->dst_multibyte = 0;
8417 }
8418
8419 encode_coding (coding);
8420
8421 if (EQ (dst_object, Qt))
8422 {
8423 if (BUFFERP (coding->dst_object))
8424 coding->dst_object = Fbuffer_string ();
8425 else if (coding->raw_destination)
8426
8427
8428
8429 coding->dst_object = Qnil;
8430 else
8431 {
8432 coding->dst_object
8433 = make_unibyte_string ((char *) coding->destination,
8434 coding->produced);
8435 xfree (coding->destination);
8436 }
8437 }
8438
8439 if (saved_pt >= 0)
8440 {
8441
8442
8443
8444
8445 set_buffer_internal (XBUFFER (src_object));
8446 if (saved_pt < from)
8447 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8448 else if (saved_pt < from + chars)
8449 TEMP_SET_PT_BOTH (from, from_byte);
8450 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8451 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8452 saved_pt_byte + (coding->produced - bytes));
8453 else
8454 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8455 saved_pt_byte + (coding->produced - bytes));
8456
8457 if (need_marker_adjustment)
8458 {
8459 struct Lisp_Marker *tail;
8460
8461 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8462 if (tail->need_adjustment)
8463 {
8464 tail->need_adjustment = 0;
8465 if (tail->insertion_type)
8466 {
8467 tail->bytepos = from_byte;
8468 tail->charpos = from;
8469 }
8470 else
8471 {
8472 tail->bytepos = from_byte + coding->produced;
8473 tail->charpos
8474 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8475 ? tail->bytepos : from + coding->produced_char);
8476 }
8477 }
8478 }
8479 }
8480
8481 if (kill_src_buffer)
8482 Fkill_buffer (coding->src_object);
8483
8484 Vdeactivate_mark = old_deactivate_mark;
8485 unbind_to (count, Qnil);
8486 }
8487
8488
8489 Lisp_Object
8490 preferred_coding_system (void)
8491 {
8492 int id = coding_categories[coding_priorities[0]].id;
8493
8494 return CODING_ID_NAME (id);
8495 }
8496
8497 #if defined (WINDOWSNT) || defined (CYGWIN) || defined HAVE_ANDROID
8498
8499 Lisp_Object
8500 from_unicode (Lisp_Object str)
8501 {
8502 CHECK_STRING (str);
8503 if (!STRING_MULTIBYTE (str) &&
8504 SBYTES (str) & 1)
8505 {
8506 str = Fsubstring (str, make_fixnum (0), make_fixnum (-1));
8507 }
8508
8509 return code_convert_string_norecord (str, Qutf_16le, 0);
8510 }
8511
8512 Lisp_Object
8513 from_unicode_buffer (const wchar_t *wstr)
8514 {
8515 #if defined WINDOWSNT || defined CYGWIN
8516
8517 ptrdiff_t len = 1 + sizeof (wchar_t) * wcslen (wstr);
8518 AUTO_STRING_WITH_LEN (str, (char *) wstr, len);
8519 return from_unicode (str);
8520 #else
8521
8522
8523
8524 uint16_t *words;
8525 size_t length, i;
8526
8527 length = wcslen (wstr) + 1;
8528
8529 USE_SAFE_ALLOCA;
8530 SAFE_NALLOCA (words, sizeof *words, length);
8531
8532 for (i = 0; i < length - 1; ++i)
8533 words[i] = wstr[i];
8534
8535 words[i] = '\0';
8536 AUTO_STRING_WITH_LEN (str, (char *) words,
8537 (length - 1) * sizeof *words);
8538 return unbind_to (sa_count, from_unicode (str));
8539 #endif
8540 }
8541
8542 wchar_t *
8543 to_unicode (Lisp_Object str, Lisp_Object *buf)
8544 {
8545 *buf = code_convert_string_norecord (str, Qutf_16le, 1);
8546
8547
8548
8549
8550
8551
8552 str = make_uninit_string (SBYTES (*buf) + 1);
8553 memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf));
8554 SDATA (str) [SBYTES (*buf)] = '\0';
8555 *buf = str;
8556 return WCSDATA (*buf);
8557 }
8558
8559 #endif
8560
8561
8562
8563
8564 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8565 doc:
8566
8567 )
8568 (Lisp_Object object)
8569 {
8570 if (NILP (object)
8571 || CODING_SYSTEM_ID (object) >= 0)
8572 return Qt;
8573 if (! SYMBOLP (object)
8574 || NILP (Fget (object, Qcoding_system_define_form)))
8575 return Qnil;
8576 return Qt;
8577 }
8578
8579 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8580 Sread_non_nil_coding_system, 1, 1, 0,
8581 doc: )
8582 (Lisp_Object prompt)
8583 {
8584 Lisp_Object val;
8585 do
8586 {
8587 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8588 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8589 }
8590 while (SCHARS (val) == 0);
8591 return (Fintern (val, Qnil));
8592 }
8593
8594 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8595 doc:
8596
8597
8598 )
8599 (Lisp_Object prompt, Lisp_Object default_coding_system)
8600 {
8601 Lisp_Object val;
8602 specpdl_ref count = SPECPDL_INDEX ();
8603
8604 if (SYMBOLP (default_coding_system))
8605 default_coding_system = SYMBOL_NAME (default_coding_system);
8606 specbind (Qcompletion_ignore_case, Qt);
8607 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8608 Qt, Qnil, Qcoding_system_history,
8609 default_coding_system, Qnil);
8610 val = unbind_to (count, val);
8611 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8612 }
8613
8614 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8615 1, 1, 0,
8616 doc:
8617
8618
8619 )
8620 (Lisp_Object coding_system)
8621 {
8622 Lisp_Object define_form;
8623
8624 define_form = Fget (coding_system, Qcoding_system_define_form);
8625 if (! NILP (define_form))
8626 {
8627 Fput (coding_system, Qcoding_system_define_form, Qnil);
8628 safe_eval (define_form);
8629 }
8630 if (!NILP (Fcoding_system_p (coding_system)))
8631 return coding_system;
8632 xsignal1 (Qcoding_system_error, coding_system);
8633 }
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652 Lisp_Object
8653 detect_coding_system (const unsigned char *src,
8654 ptrdiff_t src_chars, ptrdiff_t src_bytes,
8655 bool highest, bool multibytep,
8656 Lisp_Object coding_system)
8657 {
8658 const unsigned char *src_end = src + src_bytes;
8659 Lisp_Object attrs, eol_type;
8660 Lisp_Object val = Qnil;
8661 struct coding_system coding;
8662 ptrdiff_t id;
8663 struct coding_detection_info detect_info = {0};
8664 enum coding_category base_category;
8665 bool null_byte_found = 0, eight_bit_found = 0;
8666
8667 if (NILP (coding_system))
8668 coding_system = Qundecided;
8669 setup_coding_system (coding_system, &coding);
8670 attrs = CODING_ID_ATTRS (coding.id);
8671 eol_type = CODING_ID_EOL_TYPE (coding.id);
8672 coding_system = CODING_ATTR_BASE_NAME (attrs);
8673
8674 coding.source = src;
8675 coding.src_chars = src_chars;
8676 coding.src_bytes = src_bytes;
8677 coding.src_multibyte = multibytep;
8678 coding.consumed = 0;
8679 coding.mode |= CODING_MODE_LAST_BLOCK;
8680 coding.head_ascii = 0;
8681
8682
8683 base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8684 if (base_category == coding_category_undecided)
8685 {
8686 enum coding_category category UNINIT;
8687 struct coding_system *this UNINIT;
8688 int c, i;
8689 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
8690 inhibit_null_byte_detection);
8691 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
8692 inhibit_iso_escape_detection);
8693 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
8694
8695
8696 for (; src < src_end; src++)
8697 {
8698 c = *src;
8699 if (c & 0x80)
8700 {
8701 eight_bit_found = 1;
8702 if (null_byte_found)
8703 break;
8704 }
8705 else if (c < 0x20)
8706 {
8707 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8708 && ! inhibit_ied
8709 && ! detect_info.checked)
8710 {
8711 if (detect_coding_iso_2022 (&coding, &detect_info))
8712 {
8713
8714 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8715 {
8716
8717
8718
8719
8720 src = src_end;
8721 coding.head_ascii = src - coding.source;
8722 }
8723 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8724 break;
8725 }
8726 }
8727 else if (! c && !inhibit_nbd)
8728 {
8729 null_byte_found = 1;
8730 if (eight_bit_found)
8731 break;
8732 }
8733 if (! eight_bit_found)
8734 coding.head_ascii++;
8735 }
8736 else if (! eight_bit_found)
8737 coding.head_ascii++;
8738 }
8739
8740 if (null_byte_found || eight_bit_found
8741 || coding.head_ascii < coding.src_bytes
8742 || detect_info.found)
8743 {
8744 if (coding.head_ascii == coding.src_bytes)
8745
8746 for (i = 0; i < coding_category_raw_text; i++)
8747 {
8748 category = coding_priorities[i];
8749 this = coding_categories + category;
8750 if (detect_info.found & (1 << category))
8751 break;
8752 }
8753 else
8754 {
8755 if (null_byte_found)
8756 {
8757 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8758 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8759 }
8760 else if (prefer_utf_8
8761 && detect_coding_utf_8 (&coding, &detect_info))
8762 {
8763 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
8764 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
8765 }
8766 for (i = 0; i < coding_category_raw_text; i++)
8767 {
8768 category = coding_priorities[i];
8769 this = coding_categories + category;
8770
8771 if (this->id < 0)
8772 {
8773
8774 detect_info.rejected |= (1 << category);
8775 }
8776 else if (category >= coding_category_raw_text)
8777 continue;
8778 else if (detect_info.checked & (1 << category))
8779 {
8780 if (highest
8781 && (detect_info.found & (1 << category)))
8782 break;
8783 }
8784 else if ((*(this->detector)) (&coding, &detect_info)
8785 && highest
8786 && (detect_info.found & (1 << category)))
8787 {
8788 if (category == coding_category_utf_16_auto)
8789 {
8790 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8791 category = coding_category_utf_16_le;
8792 else
8793 category = coding_category_utf_16_be;
8794 }
8795 break;
8796 }
8797 }
8798 }
8799 }
8800
8801 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8802 || null_byte_found)
8803 {
8804 detect_info.found = CATEGORY_MASK_RAW_TEXT;
8805 id = CODING_SYSTEM_ID (Qno_conversion);
8806 val = list1i (id);
8807 }
8808 else if (! detect_info.rejected && ! detect_info.found)
8809 {
8810 detect_info.found = CATEGORY_MASK_ANY;
8811 id = coding_categories[coding_category_undecided].id;
8812 val = list1i (id);
8813 }
8814 else if (highest)
8815 {
8816 if (detect_info.found)
8817 {
8818 detect_info.found = 1 << category;
8819 val = list1i (this->id);
8820 }
8821 else
8822 for (i = 0; i < coding_category_raw_text; i++)
8823 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8824 {
8825 detect_info.found = 1 << coding_priorities[i];
8826 id = coding_categories[coding_priorities[i]].id;
8827 val = list1i (id);
8828 break;
8829 }
8830 }
8831 else
8832 {
8833 int mask = detect_info.rejected | detect_info.found;
8834 int found = 0;
8835
8836 for (i = coding_category_raw_text - 1; i >= 0; i--)
8837 {
8838 category = coding_priorities[i];
8839 if (! (mask & (1 << category)))
8840 {
8841 found |= 1 << category;
8842 id = coding_categories[category].id;
8843 if (id >= 0)
8844 val = list1i (id);
8845 }
8846 }
8847 for (i = coding_category_raw_text - 1; i >= 0; i--)
8848 {
8849 category = coding_priorities[i];
8850 if (detect_info.found & (1 << category))
8851 {
8852 id = coding_categories[category].id;
8853 val = Fcons (make_fixnum (id), val);
8854 }
8855 }
8856 detect_info.found |= found;
8857 }
8858 }
8859 else if (base_category == coding_category_utf_8_auto)
8860 {
8861 if (detect_coding_utf_8 (&coding, &detect_info))
8862 {
8863 struct coding_system *this;
8864
8865 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8866 this = coding_categories + coding_category_utf_8_sig;
8867 else
8868 this = coding_categories + coding_category_utf_8_nosig;
8869 val = list1i (this->id);
8870 }
8871 }
8872 else if (base_category == coding_category_utf_16_auto)
8873 {
8874 if (detect_coding_utf_16 (&coding, &detect_info))
8875 {
8876 struct coding_system *this;
8877
8878 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8879 this = coding_categories + coding_category_utf_16_le;
8880 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8881 this = coding_categories + coding_category_utf_16_be;
8882 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8883 this = coding_categories + coding_category_utf_16_be_nosig;
8884 else
8885 this = coding_categories + coding_category_utf_16_le_nosig;
8886 val = list1i (this->id);
8887 }
8888 }
8889 else
8890 {
8891 detect_info.found = 1 << XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8892 val = list1i (coding.id);
8893 }
8894
8895
8896 {
8897 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8898 Lisp_Object tail;
8899
8900 if (VECTORP (eol_type))
8901 {
8902 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8903 {
8904 if (null_byte_found)
8905 normal_eol = EOL_SEEN_LF;
8906 else
8907 normal_eol = detect_eol (coding.source, src_bytes,
8908 coding_category_raw_text);
8909 }
8910 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8911 | CATEGORY_MASK_UTF_16_BE_NOSIG))
8912 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8913 coding_category_utf_16_be);
8914 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8915 | CATEGORY_MASK_UTF_16_LE_NOSIG))
8916 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8917 coding_category_utf_16_le);
8918 }
8919 else
8920 {
8921 if (EQ (eol_type, Qunix))
8922 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8923 else if (EQ (eol_type, Qdos))
8924 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8925 else
8926 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8927 }
8928
8929 for (tail = val; CONSP (tail); tail = XCDR (tail))
8930 {
8931 enum coding_category category;
8932 int this_eol;
8933
8934 id = XFIXNUM (XCAR (tail));
8935 attrs = CODING_ID_ATTRS (id);
8936 category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8937 eol_type = CODING_ID_EOL_TYPE (id);
8938 if (VECTORP (eol_type))
8939 {
8940 if (category == coding_category_utf_16_be
8941 || category == coding_category_utf_16_be_nosig)
8942 this_eol = utf_16_be_eol;
8943 else if (category == coding_category_utf_16_le
8944 || category == coding_category_utf_16_le_nosig)
8945 this_eol = utf_16_le_eol;
8946 else
8947 this_eol = normal_eol;
8948
8949 if (this_eol == EOL_SEEN_LF)
8950 XSETCAR (tail, AREF (eol_type, 0));
8951 else if (this_eol == EOL_SEEN_CRLF)
8952 XSETCAR (tail, AREF (eol_type, 1));
8953 else if (this_eol == EOL_SEEN_CR)
8954 XSETCAR (tail, AREF (eol_type, 2));
8955 else
8956 XSETCAR (tail, CODING_ID_NAME (id));
8957 }
8958 else
8959 XSETCAR (tail, CODING_ID_NAME (id));
8960 }
8961 }
8962
8963 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8964 }
8965
8966
8967 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8968 2, 3, 0,
8969 doc:
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980 )
8981 (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8982 {
8983 ptrdiff_t from, to;
8984 ptrdiff_t from_byte, to_byte;
8985
8986 validate_region (&start, &end);
8987 from = XFIXNUM (start), to = XFIXNUM (end);
8988 from_byte = CHAR_TO_BYTE (from);
8989 to_byte = CHAR_TO_BYTE (to);
8990
8991 if (from < GPT && to >= GPT)
8992 move_gap_both (to, to_byte);
8993
8994 return detect_coding_system (BYTE_POS_ADDR (from_byte),
8995 to - from, to_byte - from_byte,
8996 !NILP (highest),
8997 !NILP (BVAR (current_buffer
8998 , enable_multibyte_characters)),
8999 Qnil);
9000 }
9001
9002 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
9003 1, 2, 0,
9004 doc:
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015 )
9016 (Lisp_Object string, Lisp_Object highest)
9017 {
9018 CHECK_STRING (string);
9019
9020 return detect_coding_system (SDATA (string),
9021 SCHARS (string), SBYTES (string),
9022 !NILP (highest), STRING_MULTIBYTE (string),
9023 Qnil);
9024 }
9025
9026
9027 static bool
9028 char_encodable_p (int c, Lisp_Object attrs)
9029 {
9030 Lisp_Object tail;
9031 struct charset *charset;
9032 Lisp_Object translation_table;
9033
9034 translation_table = CODING_ATTR_TRANS_TBL (attrs);
9035 if (! NILP (translation_table))
9036 c = translate_char (translation_table, c);
9037 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
9038 CONSP (tail); tail = XCDR (tail))
9039 {
9040 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
9041 if (CHAR_CHARSET_P (c, charset))
9042 break;
9043 }
9044 return (! NILP (tail));
9045 }
9046
9047
9048
9049
9050
9051
9052
9053
9054 DEFUN ("find-coding-systems-region-internal",
9055 Ffind_coding_systems_region_internal,
9056 Sfind_coding_systems_region_internal, 2, 3, 0,
9057 doc: )
9058 (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
9059 {
9060 Lisp_Object coding_attrs_list, safe_codings;
9061 ptrdiff_t start_byte, end_byte;
9062 const unsigned char *p, *pbeg, *pend;
9063 int c;
9064 Lisp_Object tail, elt, work_table;
9065
9066 if (STRINGP (start))
9067 {
9068 if (!STRING_MULTIBYTE (start)
9069 || SCHARS (start) == SBYTES (start))
9070 return Qt;
9071 start_byte = 0;
9072 end_byte = SBYTES (start);
9073 }
9074 else
9075 {
9076 EMACS_INT s = fix_position (start);
9077 EMACS_INT e = fix_position (end);
9078 if (! (BEG <= s && s <= e && e <= Z))
9079 args_out_of_range (start, end);
9080 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
9081 return Qt;
9082 start_byte = CHAR_TO_BYTE (s);
9083 end_byte = CHAR_TO_BYTE (e);
9084 if (e - s == end_byte - start_byte)
9085 return Qt;
9086
9087 if (s < GPT && GPT < e)
9088 {
9089 if (GPT - s < e - GPT)
9090 move_gap_both (s, start_byte);
9091 else
9092 move_gap_both (e, end_byte);
9093 }
9094 }
9095
9096 coding_attrs_list = Qnil;
9097 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
9098 if (NILP (exclude)
9099 || NILP (Fmemq (XCAR (tail), exclude)))
9100 {
9101 Lisp_Object attrs;
9102
9103 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
9104 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
9105 {
9106 ASET (attrs, coding_attr_trans_tbl,
9107 get_translation_table (attrs, 1, NULL));
9108 coding_attrs_list = Fcons (attrs, coding_attrs_list);
9109 }
9110 }
9111
9112 if (STRINGP (start))
9113 p = pbeg = SDATA (start);
9114 else
9115 p = pbeg = BYTE_POS_ADDR (start_byte);
9116 pend = p + (end_byte - start_byte);
9117
9118 while (p < pend && ASCII_CHAR_P (*p)) p++;
9119 while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
9120
9121 work_table = Fmake_char_table (Qnil, Qnil);
9122 while (p < pend)
9123 {
9124 if (ASCII_CHAR_P (*p))
9125 p++;
9126 else
9127 {
9128 c = string_char_advance (&p);
9129 if (!NILP (char_table_ref (work_table, c)))
9130
9131 continue;
9132
9133 charset_map_loaded = 0;
9134 for (tail = coding_attrs_list; CONSP (tail);)
9135 {
9136 elt = XCAR (tail);
9137 if (NILP (elt))
9138 tail = XCDR (tail);
9139 else if (char_encodable_p (c, elt))
9140 tail = XCDR (tail);
9141 else if (CONSP (XCDR (tail)))
9142 {
9143 XSETCAR (tail, XCAR (XCDR (tail)));
9144 XSETCDR (tail, XCDR (XCDR (tail)));
9145 }
9146 else
9147 {
9148 XSETCAR (tail, Qnil);
9149 tail = XCDR (tail);
9150 }
9151 }
9152 if (charset_map_loaded)
9153 {
9154 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
9155
9156 if (STRINGP (start))
9157 pbeg = SDATA (start);
9158 else
9159 pbeg = BYTE_POS_ADDR (start_byte);
9160 p = pbeg + p_offset;
9161 pend = pbeg + pend_offset;
9162 }
9163 char_table_set (work_table, c, Qt);
9164 }
9165 }
9166
9167 safe_codings = list2 (Qraw_text, Qno_conversion);
9168 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
9169 if (! NILP (XCAR (tail)))
9170 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
9171
9172 return safe_codings;
9173 }
9174
9175
9176 DEFUN ("unencodable-char-position", Funencodable_char_position,
9177 Sunencodable_char_position, 3, 5, 0,
9178 doc:
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188 )
9189 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
9190 Lisp_Object count, Lisp_Object string)
9191 {
9192 EMACS_INT n;
9193 struct coding_system coding;
9194 Lisp_Object attrs, charset_list, translation_table;
9195 Lisp_Object positions;
9196 ptrdiff_t from, to;
9197 const unsigned char *p, *stop, *pend;
9198 bool ascii_compatible;
9199
9200 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
9201 attrs = CODING_ID_ATTRS (coding.id);
9202 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
9203 return Qnil;
9204 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
9205 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9206 translation_table = get_translation_table (attrs, 1, NULL);
9207
9208 if (NILP (string))
9209 {
9210 validate_region (&start, &end);
9211 from = XFIXNUM (start);
9212 to = XFIXNUM (end);
9213 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
9214 || (ascii_compatible
9215 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
9216 return Qnil;
9217 p = CHAR_POS_ADDR (from);
9218 pend = CHAR_POS_ADDR (to);
9219 if (from < GPT && to >= GPT)
9220 stop = GPT_ADDR;
9221 else
9222 stop = pend;
9223 }
9224 else
9225 {
9226 CHECK_STRING (string);
9227 validate_subarray (string, start, end, SCHARS (string), &from, &to);
9228 if (! STRING_MULTIBYTE (string))
9229 return Qnil;
9230 p = SDATA (string) + string_char_to_byte (string, from);
9231 stop = pend = SDATA (string) + string_char_to_byte (string, to);
9232 if (ascii_compatible && (to - from) == (pend - p))
9233 return Qnil;
9234 }
9235
9236 if (NILP (count))
9237 n = 1;
9238 else
9239 {
9240 CHECK_FIXNAT (count);
9241 n = XFIXNUM (count);
9242 }
9243
9244 positions = Qnil;
9245 charset_map_loaded = 0;
9246 while (1)
9247 {
9248 int c;
9249
9250 if (ascii_compatible)
9251 while (p < stop && ASCII_CHAR_P (*p))
9252 p++, from++;
9253 if (p >= stop)
9254 {
9255 if (p >= pend)
9256 break;
9257 stop = pend;
9258 p = GAP_END_ADDR;
9259 }
9260
9261 c = string_char_advance (&p);
9262 if (! (ASCII_CHAR_P (c) && ascii_compatible)
9263 && ! char_charset (translate_char (translation_table, c),
9264 charset_list, NULL))
9265 {
9266 positions = Fcons (make_fixnum (from), positions);
9267 n--;
9268 if (n == 0)
9269 break;
9270 }
9271
9272 from++;
9273 if (charset_map_loaded && NILP (string))
9274 {
9275 p = CHAR_POS_ADDR (from);
9276 pend = CHAR_POS_ADDR (to);
9277 if (from < GPT && to >= GPT)
9278 stop = GPT_ADDR;
9279 else
9280 stop = pend;
9281 charset_map_loaded = 0;
9282 }
9283 }
9284
9285 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
9286 }
9287
9288
9289 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
9290 Scheck_coding_systems_region, 3, 3, 0,
9291 doc:
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309 )
9310 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
9311 {
9312 Lisp_Object list;
9313 ptrdiff_t start_byte, end_byte;
9314 ptrdiff_t pos;
9315 const unsigned char *p, *pbeg, *pend;
9316 int c;
9317 Lisp_Object tail, elt, attrs;
9318
9319 if (STRINGP (start))
9320 {
9321 if (!STRING_MULTIBYTE (start)
9322 || SCHARS (start) == SBYTES (start))
9323 return Qnil;
9324 start_byte = 0;
9325 end_byte = SBYTES (start);
9326 pos = 0;
9327 }
9328 else
9329 {
9330 EMACS_INT s = fix_position (start);
9331 EMACS_INT e = fix_position (end);
9332 if (! (BEG <= s && s <= e && e <= Z))
9333 args_out_of_range (start, end);
9334 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
9335 return Qnil;
9336 start_byte = CHAR_TO_BYTE (s);
9337 end_byte = CHAR_TO_BYTE (e);
9338 if (e - s == end_byte - start_byte)
9339 return Qnil;
9340
9341 if (s < GPT && GPT < e)
9342 {
9343 if (GPT - s < e - GPT)
9344 move_gap_both (s, start_byte);
9345 else
9346 move_gap_both (e, end_byte);
9347 }
9348 pos = s;
9349 }
9350
9351 list = Qnil;
9352 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
9353 {
9354 elt = XCAR (tail);
9355 Lisp_Object spec = CODING_SYSTEM_SPEC (elt);
9356 if (!VECTORP (spec))
9357 xsignal1 (Qcoding_system_error, elt);
9358 attrs = AREF (spec, 0);
9359 ASET (attrs, coding_attr_trans_tbl,
9360 get_translation_table (attrs, 1, NULL));
9361 list = Fcons (list2 (elt, attrs), list);
9362 }
9363
9364 if (STRINGP (start))
9365 p = pbeg = SDATA (start);
9366 else
9367 p = pbeg = BYTE_POS_ADDR (start_byte);
9368 pend = p + (end_byte - start_byte);
9369
9370 while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
9371 while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
9372
9373 while (p < pend)
9374 {
9375 if (ASCII_CHAR_P (*p))
9376 p++;
9377 else
9378 {
9379 c = string_char_advance (&p);
9380
9381 charset_map_loaded = 0;
9382 for (tail = list; CONSP (tail); tail = XCDR (tail))
9383 {
9384 elt = XCDR (XCAR (tail));
9385 if (! char_encodable_p (c, XCAR (elt)))
9386 XSETCDR (elt, Fcons (make_fixnum (pos), XCDR (elt)));
9387 }
9388 if (charset_map_loaded)
9389 {
9390 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
9391
9392 if (STRINGP (start))
9393 pbeg = SDATA (start);
9394 else
9395 pbeg = BYTE_POS_ADDR (start_byte);
9396 p = pbeg + p_offset;
9397 pend = pbeg + pend_offset;
9398 }
9399 }
9400 pos++;
9401 }
9402
9403 tail = list;
9404 list = Qnil;
9405 for (; CONSP (tail); tail = XCDR (tail))
9406 {
9407 elt = XCAR (tail);
9408 if (CONSP (XCDR (XCDR (elt))))
9409 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9410 list);
9411 }
9412
9413 return list;
9414 }
9415
9416
9417 static Lisp_Object
9418 code_convert_region (Lisp_Object start, Lisp_Object end,
9419 Lisp_Object coding_system, Lisp_Object dst_object,
9420 bool encodep, bool norecord)
9421 {
9422 struct coding_system coding;
9423 ptrdiff_t from, from_byte, to, to_byte;
9424 Lisp_Object src_object;
9425
9426 if (NILP (coding_system))
9427 coding_system = Qno_conversion;
9428 else
9429 CHECK_CODING_SYSTEM (coding_system);
9430 src_object = Fcurrent_buffer ();
9431 if (NILP (dst_object))
9432 dst_object = src_object;
9433 else if (! EQ (dst_object, Qt))
9434 CHECK_BUFFER (dst_object);
9435
9436 validate_region (&start, &end);
9437 from = XFIXNAT (start);
9438 from_byte = CHAR_TO_BYTE (from);
9439 to = XFIXNAT (end);
9440 to_byte = CHAR_TO_BYTE (to);
9441
9442 setup_coding_system (coding_system, &coding);
9443 coding.mode |= CODING_MODE_LAST_BLOCK;
9444
9445 if (BUFFERP (dst_object) && !BASE_EQ (dst_object, src_object))
9446 {
9447 struct buffer *buf = XBUFFER (dst_object);
9448 ptrdiff_t buf_pt = BUF_PT (buf);
9449
9450 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9451 }
9452
9453 if (encodep)
9454 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9455 dst_object);
9456 else
9457 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9458 dst_object);
9459 if (! norecord)
9460 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9461
9462 return (BUFFERP (dst_object)
9463 ? make_fixnum (coding.produced_char)
9464 : coding.dst_object);
9465 }
9466
9467
9468 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9469 3, 4, "r\nzCoding system: ",
9470 doc:
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494 )
9495 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9496 {
9497 return code_convert_region (start, end, coding_system, destination, 0, 0);
9498 }
9499
9500 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9501 3, 4, "r\nzCoding system: ",
9502 doc:
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525 )
9526 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9527 {
9528 return code_convert_region (start, end, coding_system, destination, 1, 0);
9529 }
9530
9531
9532 bool
9533 string_ascii_p (Lisp_Object string)
9534 {
9535 ptrdiff_t nbytes = SBYTES (string);
9536 for (ptrdiff_t i = 0; i < nbytes; i++)
9537 if (SREF (string, i) > 127)
9538 return false;
9539 return true;
9540 }
9541
9542 Lisp_Object
9543 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9544 Lisp_Object dst_object, bool encodep, bool nocopy,
9545 bool norecord)
9546 {
9547 struct coding_system coding;
9548 ptrdiff_t chars, bytes;
9549
9550 CHECK_STRING (string);
9551 if (NILP (coding_system))
9552 {
9553 if (! norecord)
9554 Vlast_coding_system_used = Qno_conversion;
9555 if (NILP (dst_object))
9556 return nocopy ? string : Fcopy_sequence (string);
9557 }
9558
9559 if (NILP (coding_system))
9560 coding_system = Qno_conversion;
9561 else
9562 CHECK_CODING_SYSTEM (coding_system);
9563 if (NILP (dst_object))
9564 dst_object = Qt;
9565 else if (! EQ (dst_object, Qt))
9566 CHECK_BUFFER (dst_object);
9567
9568 setup_coding_system (coding_system, &coding);
9569 coding.mode |= CODING_MODE_LAST_BLOCK;
9570 chars = SCHARS (string);
9571 bytes = SBYTES (string);
9572
9573 if (EQ (dst_object, Qt))
9574 {
9575
9576
9577 Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
9578 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
9579 && (STRING_MULTIBYTE (string)
9580 ? (chars == bytes) : string_ascii_p (string))
9581 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
9582 || inhibit_eol_conversion
9583 || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
9584 {
9585 if (! norecord)
9586 Vlast_coding_system_used = coding_system;
9587 return (nocopy
9588 ? string
9589 : (encodep
9590 ? make_unibyte_string (SSDATA (string), bytes)
9591 : make_multibyte_string (SSDATA (string), bytes, bytes)));
9592 }
9593 }
9594 else if (BUFFERP (dst_object))
9595 {
9596 struct buffer *buf = XBUFFER (dst_object);
9597 ptrdiff_t buf_pt = BUF_PT (buf);
9598
9599 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9600 }
9601
9602 if (encodep)
9603 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9604 else
9605 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9606 if (! norecord)
9607 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9608
9609 return (BUFFERP (dst_object)
9610 ? make_fixnum (coding.produced_char)
9611 : coding.dst_object);
9612 }
9613
9614
9615
9616
9617
9618 Lisp_Object
9619 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9620 bool encodep)
9621 {
9622 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9623 }
9624
9625
9626
9627
9628
9629 static unsigned char *
9630 get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes)
9631 {
9632 struct buffer *buf = XBUFFER (buffer);
9633
9634 if (BUF_GPT (buf) != BUF_PT (buf))
9635 {
9636 struct buffer *oldb = current_buffer;
9637
9638 current_buffer = buf;
9639 move_gap_both (PT, PT_BYTE);
9640 current_buffer = oldb;
9641 }
9642 if (BUF_GAP_SIZE (buf) < nbytes)
9643 make_gap_1 (buf, nbytes);
9644 return BUF_GPT_ADDR (buf);
9645 }
9646
9647
9648
9649
9650
9651
9652
9653 static unsigned char *
9654 get_char_bytes (int c, int *len)
9655 {
9656
9657
9658
9659 static int chars[2];
9660 static unsigned char bytes[2][6];
9661 static int nbytes[2];
9662 static int last_index;
9663
9664 if (chars[last_index] == c)
9665 {
9666 *len = nbytes[last_index];
9667 return bytes[last_index];
9668 }
9669 if (chars[1 - last_index] == c)
9670 {
9671 *len = nbytes[1 - last_index];
9672 return bytes[1 - last_index];
9673 }
9674 last_index = 1 - last_index;
9675 chars[last_index] = c;
9676 *len = nbytes[last_index] = CHAR_STRING (c, bytes[last_index]);
9677 return bytes[last_index];
9678 }
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742 Lisp_Object
9743 encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9744 bool nocopy, Lisp_Object handle_8_bit,
9745 Lisp_Object handle_over_uni)
9746 {
9747 ptrdiff_t nchars = SCHARS (string), nbytes = SBYTES (string);
9748 if (NILP (buffer) && nchars == nbytes && nocopy)
9749
9750 return string;
9751
9752 ptrdiff_t num_8_bit = 0;
9753
9754 ptrdiff_t num_over_4 = 0;
9755 ptrdiff_t num_over_5 = 0;
9756 ptrdiff_t outbytes;
9757 unsigned char *p = SDATA (string);
9758 unsigned char *pend = p + nbytes;
9759 unsigned char *src = NULL, *dst = NULL;
9760 unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL;
9761 int replace_8_bit_len = 0, replace_over_uni_len = 0;
9762 Lisp_Object val;
9763
9764
9765
9766
9767
9768 for (int scan_count = 0; scan_count < 2; scan_count++)
9769 {
9770 while (p < pend)
9771 {
9772 if (nchars == pend - p)
9773
9774 break;
9775
9776 int c = *p;
9777 int len = BYTES_BY_CHAR_HEAD (c);
9778
9779 nchars--;
9780 if (len == 1
9781 || len == 3
9782 || (len == 2 ? ! CHAR_BYTE8_HEAD_P (c)
9783 : (EQ (handle_over_uni, Qt)
9784 || (len == 4
9785 && STRING_CHAR (p) <= MAX_UNICODE_CHAR))))
9786 {
9787 p += len;
9788 continue;
9789 }
9790
9791
9792
9793 if (len == 2)
9794 {
9795
9796 if (scan_count == 0)
9797 {
9798 if (NILP (handle_8_bit))
9799 return Qnil;
9800 num_8_bit++;
9801 }
9802 else
9803 {
9804 if (src < p)
9805 {
9806 memcpy (dst, src, p - src);
9807 dst += p - src;
9808 }
9809 if (replace_8_bit_len > 0)
9810 {
9811 memcpy (dst, replace_8_bit, replace_8_bit_len);
9812 dst += replace_8_bit_len;
9813 }
9814 else if (EQ (handle_8_bit, Qt))
9815 {
9816 int char8 = STRING_CHAR (p);
9817 *dst++ = CHAR_TO_BYTE8 (char8);
9818 }
9819 }
9820 }
9821 else
9822 {
9823
9824 if (scan_count == 0)
9825 {
9826 if (NILP (handle_over_uni))
9827 return Qnil;
9828 if (len == 4)
9829 num_over_4++;
9830 else
9831 num_over_5++;
9832 }
9833 else
9834 {
9835 if (src < p)
9836 {
9837 memcpy (dst, src, p - src);
9838 dst += p - src;
9839 }
9840 if (replace_over_uni_len > 0)
9841 {
9842 memcpy (dst, replace_over_uni, replace_over_uni_len);
9843 dst += replace_over_uni_len;
9844 }
9845 }
9846 }
9847 p += len;
9848 src = p;
9849 }
9850
9851 if (scan_count == 0)
9852 {
9853
9854 outbytes = nbytes;
9855 if (num_8_bit == 0
9856 && (num_over_4 + num_over_5 == 0 || EQ (handle_over_uni, Qt)))
9857 {
9858
9859
9860
9861 scan_count = 1;
9862 }
9863 else
9864 {
9865
9866
9867 if (num_8_bit > 0)
9868 {
9869 if (CHARACTERP (handle_8_bit))
9870 replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit),
9871 &replace_8_bit_len);
9872 else if (STRINGP (handle_8_bit))
9873 {
9874 replace_8_bit = SDATA (handle_8_bit);
9875 replace_8_bit_len = SBYTES (handle_8_bit);
9876 }
9877 if (replace_8_bit)
9878 outbytes += (replace_8_bit_len - 2) * num_8_bit;
9879 else if (EQ (handle_8_bit, Qignored))
9880 outbytes -= 2 * num_8_bit;
9881 else if (EQ (handle_8_bit, Qt))
9882 outbytes -= num_8_bit;
9883 else
9884 return Qnil;
9885 }
9886 if (num_over_4 + num_over_5 > 0)
9887 {
9888 if (CHARACTERP (handle_over_uni))
9889 replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni),
9890 &replace_over_uni_len);
9891 else if (STRINGP (handle_over_uni))
9892 {
9893 replace_over_uni = SDATA (handle_over_uni);
9894 replace_over_uni_len = SBYTES (handle_over_uni);
9895 }
9896 if (num_over_4 > 0)
9897 {
9898 if (replace_over_uni)
9899 outbytes += (replace_over_uni_len - 4) * num_over_4;
9900 else if (EQ (handle_over_uni, Qignored))
9901 outbytes -= 4 * num_over_4;
9902 else if (! EQ (handle_over_uni, Qt))
9903 return Qnil;
9904 }
9905 if (num_over_5 > 0)
9906 {
9907 if (replace_over_uni)
9908 outbytes += (replace_over_uni_len - 5) * num_over_5;
9909 else if (EQ (handle_over_uni, Qignored))
9910 outbytes -= 5 * num_over_5;
9911 else if (! EQ (handle_over_uni, Qt))
9912 return Qnil;
9913 }
9914 }
9915 }
9916
9917
9918 if (BUFFERP (buffer))
9919 {
9920 val = make_fixnum (outbytes);
9921 dst = get_buffer_gap_address (buffer, nbytes);
9922 }
9923 else
9924 {
9925 if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0)
9926 return string;
9927 val = make_uninit_string (outbytes);
9928 dst = SDATA (val);
9929 }
9930 p = src = SDATA (string);
9931 }
9932 }
9933
9934 if (src < pend)
9935 memcpy (dst, src, pend - src);
9936 if (BUFFERP (buffer))
9937 {
9938 struct buffer *oldb = current_buffer;
9939
9940 current_buffer = XBUFFER (buffer);
9941 insert_from_gap (outbytes, outbytes, false);
9942 current_buffer = oldb;
9943 }
9944 return val;
9945 }
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018 Lisp_Object
10019 decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
10020 Lisp_Object buffer, bool nocopy,
10021 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni)
10022 {
10023
10024
10025 #define UTF_8_SEQUENCE_LENGTH(c) \
10026 ((c) < 0xC2 ? 0 \
10027 : (c) < 0xE0 ? 2 \
10028 : (c) < 0xF0 ? 3 \
10029 : (c) < 0xF8 ? 4 \
10030 : (c) == 0xF8 ? 5 \
10031 : 0)
10032
10033 ptrdiff_t nbytes = STRINGP (string) ? SBYTES (string) : str_len;
10034 unsigned char *p = STRINGP (string) ? SDATA (string) : (unsigned char *) str;
10035 unsigned char *str_orig = p;
10036 unsigned char *pend = p + nbytes;
10037 ptrdiff_t num_8_bit = 0;
10038 ptrdiff_t num_over_4 = 0;
10039 ptrdiff_t num_over_5 = 0;
10040 ptrdiff_t outbytes = nbytes;
10041 ptrdiff_t outchars = 0;
10042 unsigned char *src = NULL, *dst = NULL;
10043 bool change_byte_sequence = false;
10044
10045
10046
10047
10048
10049 while (p < pend)
10050 {
10051 src = p;
10052
10053 while (p < pend && *p < 0x80) p++;
10054 outchars += (p - src);
10055 if (p == pend)
10056 break;
10057 int c = *p;
10058 outchars++;
10059 int len = UTF_8_SEQUENCE_LENGTH (c);
10060
10061 if (UTF_8_EXTRA_OCTET_P (p[1])
10062 && (len == 2
10063 || (UTF_8_EXTRA_OCTET_P (p[2])
10064 && (len == 3
10065 || (UTF_8_EXTRA_OCTET_P (p[3])
10066 && len == 4
10067 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)))))
10068 {
10069 p += len;
10070 continue;
10071 }
10072
10073
10074 if (len == 0)
10075 {
10076 if (NILP (handle_8_bit))
10077 return Qnil;
10078 num_8_bit++;
10079 len = 1;
10080 }
10081 else
10082 {
10083 if (NILP (handle_over_uni))
10084 return Qnil;
10085 if (len == 4)
10086 num_over_4++;
10087 else
10088 num_over_5++;
10089 }
10090 change_byte_sequence = true;
10091 p += len;
10092 }
10093
10094 Lisp_Object val;
10095
10096 if (! change_byte_sequence
10097 && NILP (buffer))
10098 {
10099 if (nocopy && STRINGP (string))
10100 return string;
10101 val = make_uninit_multibyte_string (outchars, outbytes);
10102 memcpy (SDATA (val), str_orig, pend - str_orig);
10103 return val;
10104 }
10105
10106
10107 unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL;
10108 int replace_8_bit_len = 0, replace_over_uni_len = 0;
10109
10110 if (change_byte_sequence)
10111 {
10112 if (num_8_bit > 0)
10113 {
10114 if (CHARACTERP (handle_8_bit))
10115 replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit),
10116 &replace_8_bit_len);
10117 else if (STRINGP (handle_8_bit))
10118 {
10119 replace_8_bit = SDATA (handle_8_bit);
10120 replace_8_bit_len = SBYTES (handle_8_bit);
10121 }
10122 if (replace_8_bit)
10123 outbytes += (replace_8_bit_len - 1) * num_8_bit;
10124 else if (EQ (handle_8_bit, Qignored))
10125 {
10126 outbytes -= num_8_bit;
10127 outchars -= num_8_bit;
10128 }
10129 else
10130 outbytes += num_8_bit;
10131 }
10132 else if (num_over_4 + num_over_5 > 0)
10133 {
10134 if (CHARACTERP (handle_over_uni))
10135 replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni),
10136 &replace_over_uni_len);
10137 else if (STRINGP (handle_over_uni))
10138 {
10139 replace_over_uni = SDATA (handle_over_uni);
10140 replace_over_uni_len = SBYTES (handle_over_uni);
10141 }
10142 if (num_over_4 > 0)
10143 {
10144 if (replace_over_uni)
10145 outbytes += (replace_over_uni_len - 4) * num_over_4;
10146 else if (EQ (handle_over_uni, Qignored))
10147 {
10148 outbytes -= 4 * num_over_4;
10149 outchars -= num_over_4;
10150 }
10151 }
10152 if (num_over_5 > 0)
10153 {
10154 if (replace_over_uni)
10155 outbytes += (replace_over_uni_len - 5) * num_over_5;
10156 else if (EQ (handle_over_uni, Qignored))
10157 {
10158 outbytes -= 5 * num_over_5;
10159 outchars -= num_over_5;
10160 }
10161 }
10162 }
10163 }
10164
10165
10166 if (BUFFERP (buffer))
10167 {
10168 val = make_fixnum (outchars);
10169 dst = get_buffer_gap_address (buffer, outbytes);
10170 }
10171 else
10172 {
10173 if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0
10174 && STRINGP (string))
10175 return string;
10176 val = make_uninit_multibyte_string (outchars, outbytes);
10177 dst = SDATA (val);
10178 }
10179
10180 src = str_orig;
10181 if (change_byte_sequence)
10182 {
10183 p = src;
10184 while (p < pend)
10185 {
10186
10187
10188
10189
10190 int c = *p;
10191 if (c < 0x80)
10192 {
10193 p++;
10194 continue;
10195 }
10196 int len = UTF_8_SEQUENCE_LENGTH (c);
10197 if (len > 1)
10198 {
10199 int mlen;
10200 for (mlen = 1; mlen < len && UTF_8_EXTRA_OCTET_P (p[mlen]);
10201 mlen++);
10202 if (mlen == len
10203 && (len <= 3
10204 || (len == 4 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)
10205 || EQ (handle_over_uni, Qt)))
10206 {
10207 p += len;
10208 continue;
10209 }
10210 }
10211
10212 if (src < p)
10213 {
10214 memcpy (dst, src, p - src);
10215 dst += p - src;
10216 }
10217 if (len == 0)
10218 {
10219 if (replace_8_bit)
10220 {
10221 memcpy (dst, replace_8_bit, replace_8_bit_len);
10222 dst += replace_8_bit_len;
10223 }
10224 else if (EQ (handle_8_bit, Qt))
10225 {
10226 dst += BYTE8_STRING (c, dst);
10227 }
10228 len = 1;
10229 }
10230 else
10231 {
10232
10233 if (replace_over_uni)
10234 {
10235 memcpy (dst, replace_over_uni, replace_over_uni_len);
10236 dst += replace_over_uni_len;
10237 }
10238 }
10239 p += len;
10240 src = p;
10241 }
10242 }
10243
10244 if (src < pend)
10245 memcpy (dst, src, pend - src);
10246 if (BUFFERP (buffer))
10247 {
10248 struct buffer *oldb = current_buffer;
10249
10250 current_buffer = XBUFFER (buffer);
10251 insert_from_gap (outchars, outbytes, false);
10252 current_buffer = oldb;
10253 }
10254 return val;
10255 }
10256
10257
10258
10259 #ifdef ENABLE_UTF_8_CONVERTER_TEST
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271 DEFUN ("internal-encode-string-utf-8", Finternal_encode_string_utf_8,
10272 Sinternal_encode_string_utf_8, 7, 7, 0,
10273 doc: )
10274 (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy,
10275 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni,
10276 Lisp_Object encode_method, Lisp_Object count)
10277 {
10278 int repeat_count;
10279 Lisp_Object val;
10280
10281
10282 if (! STRINGP (string))
10283 return Qnil;
10284 if (! NILP (buffer)
10285 && (! BUFFERP (buffer)
10286 || ! NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters))))
10287 return Qnil;
10288 if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt)
10289 && ! EQ (handle_8_bit, Qignored)
10290 && ! CHARACTERP (handle_8_bit)
10291 && (! STRINGP (handle_8_bit) || STRING_MULTIBYTE (handle_8_bit)))
10292 return Qnil;
10293 if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt)
10294 && ! EQ (handle_over_uni, Qignored)
10295 && ! CHARACTERP (handle_over_uni)
10296 && (! STRINGP (handle_over_uni) || STRING_MULTIBYTE (handle_over_uni)))
10297 return Qnil;
10298
10299 CHECK_FIXNUM (count);
10300 repeat_count = XFIXNUM (count);
10301
10302 val = Qnil;
10303
10304 if (NILP (encode_method))
10305 {
10306 for (int i = 0; i < repeat_count; i++)
10307 val = encode_string_utf_8 (string, buffer, ! NILP (nocopy),
10308 handle_8_bit, handle_over_uni);
10309 }
10310 else
10311 {
10312 for (int i = 0; i < repeat_count; i++)
10313 val = code_convert_string (string, Qutf_8_unix, Qnil, true,
10314 ! NILP (nocopy), true);
10315 }
10316 return val;
10317 }
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327 DEFUN ("internal-decode-string-utf-8", Finternal_decode_string_utf_8,
10328 Sinternal_decode_string_utf_8, 7, 7, 0,
10329 doc: )
10330 (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy,
10331 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni,
10332 Lisp_Object decode_method, Lisp_Object count)
10333 {
10334 int repeat_count;
10335 Lisp_Object val;
10336
10337
10338 if (! STRINGP (string))
10339 return Qnil;
10340 if (! NILP (buffer)
10341 && (! BUFFERP (buffer)
10342 || NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters))))
10343 return Qnil;
10344 if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt)
10345 && ! EQ (handle_8_bit, Qignored)
10346 && ! CHARACTERP (handle_8_bit)
10347 && (! STRINGP (handle_8_bit) || ! STRING_MULTIBYTE (handle_8_bit)))
10348 return Qnil;
10349 if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt)
10350 && ! EQ (handle_over_uni, Qignored)
10351 && ! CHARACTERP (handle_over_uni)
10352 && (! STRINGP (handle_over_uni) || ! STRING_MULTIBYTE (handle_over_uni)))
10353 return Qnil;
10354
10355 CHECK_FIXNUM (count);
10356 repeat_count = XFIXNUM (count);
10357
10358 val = Qnil;
10359
10360 if (NILP (decode_method))
10361 {
10362 for (int i = 0; i < repeat_count; i++)
10363 val = decode_string_utf_8 (string, buffer, ! NILP (nocopy),
10364 handle_8_bit, handle_over_uni);
10365 }
10366 else if (EQ (decode_method, Qt))
10367 {
10368 if (! BUFFERP (buffer))
10369 buffer = Qt;
10370 for (int i = 0; i < repeat_count; i++)
10371 val = code_convert_string (string, Qutf_8_unix, buffer, false,
10372 ! NILP (nocopy), true);
10373 }
10374 else if (! NILP (decode_method))
10375 {
10376 for (int i = 0; i < repeat_count; i++)
10377 val = make_string_from_utf8 ((char *) SDATA (string), SBYTES (string));
10378 }
10379 return val;
10380 }
10381
10382 #endif
10383
10384
10385
10386
10387 static Lisp_Object
10388 convert_string_nocopy (Lisp_Object string, Lisp_Object coding_system,
10389 bool encodep)
10390 {
10391 return code_convert_string (string, coding_system, Qt, encodep, 1, 1);
10392 }
10393
10394
10395
10396 Lisp_Object
10397 decode_file_name (Lisp_Object fname)
10398 {
10399 #ifdef WINDOWSNT
10400
10401
10402
10403 if (! NILP (Fcoding_system_p (Qutf_8)))
10404 return convert_string_nocopy (fname, Qutf_8, 0);
10405 return fname;
10406 #else
10407 if (! NILP (Vfile_name_coding_system))
10408 return convert_string_nocopy (fname, Vfile_name_coding_system, 0);
10409 else if (! NILP (Vdefault_file_name_coding_system))
10410 return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 0);
10411 else
10412 return fname;
10413 #endif
10414 }
10415
10416 static Lisp_Object
10417 encode_file_name_1 (Lisp_Object fname)
10418 {
10419
10420
10421
10422
10423 if (!STRING_MULTIBYTE (fname))
10424 return fname;
10425 #ifdef WINDOWSNT
10426
10427
10428
10429 if (! NILP (Fcoding_system_p (Qutf_8)))
10430 return convert_string_nocopy (fname, Qutf_8, 1);
10431 return fname;
10432 #else
10433 if (! NILP (Vfile_name_coding_system))
10434 return convert_string_nocopy (fname, Vfile_name_coding_system, 1);
10435 else if (! NILP (Vdefault_file_name_coding_system))
10436 return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 1);
10437 else
10438 return fname;
10439 #endif
10440 }
10441
10442 Lisp_Object
10443 encode_file_name (Lisp_Object fname)
10444 {
10445 Lisp_Object encoded = encode_file_name_1 (fname);
10446
10447
10448
10449
10450 CHECK_STRING_NULL_BYTES (encoded);
10451 return encoded;
10452 }
10453
10454 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
10455 2, 4, 0,
10456 doc:
10457
10458
10459
10460
10461
10462
10463
10464
10465
10466
10467
10468
10469 )
10470 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
10471 {
10472 return code_convert_string (string, coding_system, buffer,
10473 0, ! NILP (nocopy), 0);
10474 }
10475
10476 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
10477 2, 4, 0,
10478 doc:
10479
10480
10481
10482
10483
10484
10485
10486
10487
10488
10489 )
10490 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
10491 {
10492 return code_convert_string (string, coding_system, buffer,
10493 1, ! NILP (nocopy), 0);
10494 }
10495
10496
10497 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
10498 doc:
10499 )
10500 (Lisp_Object code)
10501 {
10502 Lisp_Object spec, attrs, val;
10503 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
10504 EMACS_INT ch;
10505 int c;
10506
10507 CHECK_FIXNAT (code);
10508 ch = XFIXNAT (code);
10509 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
10510 attrs = AREF (spec, 0);
10511
10512 if (ASCII_CHAR_P (ch)
10513 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10514 return code;
10515
10516 val = CODING_ATTR_CHARSET_LIST (attrs);
10517 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10518 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10519 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
10520
10521 if (ch <= 0x7F)
10522 {
10523 c = ch;
10524 charset = charset_roman;
10525 }
10526 else if (ch >= 0xA0 && ch < 0xDF)
10527 {
10528 c = ch - 0x80;
10529 charset = charset_kana;
10530 }
10531 else
10532 {
10533 EMACS_INT c1 = ch >> 8;
10534 int c2 = ch & 0xFF;
10535
10536 if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
10537 || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
10538 error ("Invalid code: %"pI"d", ch);
10539 c = ch;
10540 SJIS_TO_JIS (c);
10541 charset = charset_kanji;
10542 }
10543 c = DECODE_CHAR (charset, c);
10544 if (c < 0)
10545 error ("Invalid code: %"pI"d", ch);
10546 return make_fixnum (c);
10547 }
10548
10549
10550 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
10551 doc:
10552 )
10553 (Lisp_Object ch)
10554 {
10555 Lisp_Object spec, attrs, charset_list;
10556 int c;
10557 struct charset *charset;
10558 unsigned code;
10559
10560 CHECK_CHARACTER (ch);
10561 c = XFIXNAT (ch);
10562 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
10563 attrs = AREF (spec, 0);
10564
10565 if (ASCII_CHAR_P (c)
10566 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10567 return ch;
10568
10569 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
10570 charset = char_charset (c, charset_list, &code);
10571 if (code == CHARSET_INVALID_CODE (charset))
10572 error ("Can't encode by shift_jis encoding: %c", c);
10573 JIS_TO_SJIS (code);
10574
10575 return make_fixnum (code);
10576 }
10577
10578 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
10579 doc:
10580 )
10581 (Lisp_Object code)
10582 {
10583 Lisp_Object spec, attrs, val;
10584 struct charset *charset_roman, *charset_big5, *charset;
10585 EMACS_INT ch;
10586 int c;
10587
10588 CHECK_FIXNAT (code);
10589 ch = XFIXNAT (code);
10590 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
10591 attrs = AREF (spec, 0);
10592
10593 if (ASCII_CHAR_P (ch)
10594 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10595 return code;
10596
10597 val = CODING_ATTR_CHARSET_LIST (attrs);
10598 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10599 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
10600
10601 if (ch <= 0x7F)
10602 {
10603 c = ch;
10604 charset = charset_roman;
10605 }
10606 else
10607 {
10608 EMACS_INT b1 = ch >> 8;
10609 int b2 = ch & 0x7F;
10610 if (b1 < 0xA1 || b1 > 0xFE
10611 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
10612 error ("Invalid code: %"pI"d", ch);
10613 c = ch;
10614 charset = charset_big5;
10615 }
10616 c = DECODE_CHAR (charset, c);
10617 if (c < 0)
10618 error ("Invalid code: %"pI"d", ch);
10619 return make_fixnum (c);
10620 }
10621
10622 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
10623 doc:
10624 )
10625 (Lisp_Object ch)
10626 {
10627 Lisp_Object spec, attrs, charset_list;
10628 struct charset *charset;
10629 int c;
10630 unsigned code;
10631
10632 CHECK_CHARACTER (ch);
10633 c = XFIXNAT (ch);
10634 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
10635 attrs = AREF (spec, 0);
10636 if (ASCII_CHAR_P (c)
10637 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10638 return ch;
10639
10640 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
10641 charset = char_charset (c, charset_list, &code);
10642 if (code == CHARSET_INVALID_CODE (charset))
10643 error ("Can't encode by Big5 encoding: %c", c);
10644
10645 return make_fixnum (code);
10646 }
10647
10648
10649 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
10650 Sset_terminal_coding_system_internal, 1, 2, 0,
10651 doc: )
10652 (Lisp_Object coding_system, Lisp_Object terminal)
10653 {
10654 struct terminal *term = decode_live_terminal (terminal);
10655 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
10656 CHECK_SYMBOL (coding_system);
10657 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
10658
10659 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
10660
10661 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10662 terminal_coding->src_multibyte = 1;
10663 terminal_coding->dst_multibyte = 0;
10664 tset_charset_list
10665 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
10666 ? coding_charset_list (terminal_coding)
10667 : list1i (charset_ascii)));
10668 return Qnil;
10669 }
10670
10671 DEFUN ("set-safe-terminal-coding-system-internal",
10672 Fset_safe_terminal_coding_system_internal,
10673 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
10674 doc: )
10675 (Lisp_Object coding_system)
10676 {
10677 CHECK_SYMBOL (coding_system);
10678 setup_coding_system (Fcheck_coding_system (coding_system),
10679 &safe_terminal_coding);
10680
10681 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10682 safe_terminal_coding.src_multibyte = 1;
10683 safe_terminal_coding.dst_multibyte = 0;
10684 return Qnil;
10685 }
10686
10687 DEFUN ("terminal-coding-system", Fterminal_coding_system,
10688 Sterminal_coding_system, 0, 1, 0,
10689 doc:
10690
10691 )
10692 (Lisp_Object terminal)
10693 {
10694 struct coding_system *terminal_coding
10695 = TERMINAL_TERMINAL_CODING (decode_live_terminal (terminal));
10696 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
10697
10698
10699 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
10700 }
10701
10702 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
10703 Sset_keyboard_coding_system_internal, 1, 2, 0,
10704 doc: )
10705 (Lisp_Object coding_system, Lisp_Object terminal)
10706 {
10707 struct terminal *t = decode_live_terminal (terminal);
10708 CHECK_SYMBOL (coding_system);
10709 if (NILP (coding_system))
10710 coding_system = Qno_conversion;
10711 else
10712 Fcheck_coding_system (coding_system);
10713 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
10714
10715 TERMINAL_KEYBOARD_CODING (t)->common_flags
10716 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10717 return Qnil;
10718 }
10719
10720 DEFUN ("keyboard-coding-system",
10721 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
10722 doc: )
10723 (Lisp_Object terminal)
10724 {
10725 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
10726 (decode_live_terminal (terminal))->id);
10727 }
10728
10729
10730 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
10731 Sfind_operation_coding_system, 1, MANY, 0,
10732 doc:
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764
10765
10766
10767
10768
10769
10770 )
10771 (ptrdiff_t nargs, Lisp_Object *args)
10772 {
10773 Lisp_Object operation, target_idx, target, val;
10774 register Lisp_Object chain;
10775
10776 if (nargs < 2)
10777 error ("Too few arguments");
10778 operation = args[0];
10779 if (!SYMBOLP (operation)
10780 || (target_idx = Fget (operation, Qtarget_idx), !FIXNATP (target_idx)))
10781 error ("Invalid first argument");
10782 if (nargs <= 1 + XFIXNAT (target_idx))
10783 error ("Too few arguments for operation `%s'",
10784 SDATA (SYMBOL_NAME (operation)));
10785 target = args[XFIXNAT (target_idx) + 1];
10786 if (!(STRINGP (target)
10787 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
10788 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
10789 || (EQ (operation, Qopen_network_stream)
10790 && (FIXNUMP (target) || EQ (target, Qt)))))
10791 error ("Invalid argument %"pI"d of operation `%s'",
10792 XFIXNAT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
10793 if (CONSP (target))
10794 target = XCAR (target);
10795
10796 chain = ((EQ (operation, Qinsert_file_contents)
10797 || EQ (operation, Qwrite_region))
10798 ? Vfile_coding_system_alist
10799 : (EQ (operation, Qopen_network_stream)
10800 ? Vnetwork_coding_system_alist
10801 : Vprocess_coding_system_alist));
10802 if (NILP (chain))
10803 return Qnil;
10804
10805 for (; CONSP (chain); chain = XCDR (chain))
10806 {
10807 Lisp_Object elt;
10808
10809 elt = XCAR (chain);
10810 if (CONSP (elt)
10811 && ((STRINGP (target)
10812 && STRINGP (XCAR (elt))
10813 && fast_string_match (XCAR (elt), target) >= 0)
10814 || (FIXNUMP (target) && BASE_EQ (target, XCAR (elt)))))
10815 {
10816 val = XCDR (elt);
10817
10818
10819 if (CONSP (val))
10820 return val;
10821 if (! SYMBOLP (val))
10822 return Qnil;
10823 if (! NILP (Fcoding_system_p (val)))
10824 return Fcons (val, val);
10825 if (! NILP (Ffboundp (val)))
10826 {
10827
10828
10829
10830 val = call1 (val, Flist (nargs, args));
10831 if (CONSP (val))
10832 return val;
10833 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
10834 return Fcons (val, val);
10835 }
10836 return Qnil;
10837 }
10838 }
10839 return Qnil;
10840 }
10841
10842 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
10843 Sset_coding_system_priority, 0, MANY, 0,
10844 doc:
10845
10846
10847
10848 )
10849 (ptrdiff_t nargs, Lisp_Object *args)
10850 {
10851 ptrdiff_t i, j;
10852 bool changed[coding_category_max];
10853 enum coding_category priorities[coding_category_max];
10854
10855 memset (changed, 0, sizeof changed);
10856
10857 for (i = j = 0; i < nargs; i++)
10858 {
10859 enum coding_category category;
10860 Lisp_Object spec, attrs;
10861
10862 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
10863 attrs = AREF (spec, 0);
10864 category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
10865 if (changed[category])
10866
10867
10868 continue;
10869 changed[category] = 1;
10870 priorities[j++] = category;
10871 if (coding_categories[category].id >= 0
10872 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
10873 setup_coding_system (args[i], &coding_categories[category]);
10874 Fset (AREF (Vcoding_category_table, category), args[i]);
10875 }
10876
10877
10878
10879
10880 for (i = j, j = 0; i < coding_category_max; i++, j++)
10881 {
10882 while (j < coding_category_max
10883 && changed[coding_priorities[j]])
10884 j++;
10885 if (j == coding_category_max)
10886 emacs_abort ();
10887 priorities[i] = coding_priorities[j];
10888 }
10889
10890 memcpy (coding_priorities, priorities, sizeof priorities);
10891
10892
10893 Vcoding_category_list = Qnil;
10894 for (i = coding_category_max; i-- > 0; )
10895 Vcoding_category_list
10896 = Fcons (AREF (Vcoding_category_table, priorities[i]),
10897 Vcoding_category_list);
10898
10899 return Qnil;
10900 }
10901
10902 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
10903 Scoding_system_priority_list, 0, 1, 0,
10904 doc:
10905
10906
10907
10908 )
10909 (Lisp_Object highestp)
10910 {
10911 int i;
10912 Lisp_Object val;
10913
10914 for (i = 0, val = Qnil; i < coding_category_max; i++)
10915 {
10916 enum coding_category category = coding_priorities[i];
10917 int id = coding_categories[category].id;
10918 Lisp_Object attrs;
10919
10920 if (id < 0)
10921 continue;
10922 attrs = CODING_ID_ATTRS (id);
10923 if (! NILP (highestp))
10924 return CODING_ATTR_BASE_NAME (attrs);
10925 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
10926 }
10927 return Fnreverse (val);
10928 }
10929
10930 static Lisp_Object
10931 make_subsidiaries (Lisp_Object base)
10932 {
10933 static char const suffixes[][8] = { "-unix", "-dos", "-mac" };
10934 ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
10935 USE_SAFE_ALLOCA;
10936 char *buf = SAFE_ALLOCA (base_name_len + 6);
10937
10938 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
10939 Lisp_Object subsidiaries = make_nil_vector (3);
10940 for (int i = 0; i < 3; i++)
10941 {
10942 strcpy (buf + base_name_len, suffixes[i]);
10943 ASET (subsidiaries, i, intern (buf));
10944 }
10945 SAFE_FREE ();
10946 return subsidiaries;
10947 }
10948
10949
10950 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
10951 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
10952 doc:
10953 )
10954 (ptrdiff_t nargs, Lisp_Object *args)
10955 {
10956 enum coding_category category;
10957 int max_charset_id = 0;
10958
10959 if (nargs < coding_arg_max)
10960 goto short_args;
10961
10962 Lisp_Object attrs = make_nil_vector (coding_attr_last_index);
10963
10964 Lisp_Object name = args[coding_arg_name];
10965 CHECK_SYMBOL (name);
10966 ASET (attrs, coding_attr_base_name, name);
10967
10968 Lisp_Object val = args[coding_arg_mnemonic];
10969
10970 if (STRINGP (val))
10971 val = make_fixnum (STRING_CHAR (SDATA (val)));
10972 else
10973 CHECK_CHARACTER (val);
10974 ASET (attrs, coding_attr_mnemonic, val);
10975
10976 Lisp_Object coding_type = args[coding_arg_coding_type];
10977 CHECK_SYMBOL (coding_type);
10978 ASET (attrs, coding_attr_type, coding_type);
10979
10980 Lisp_Object charset_list = args[coding_arg_charset_list];
10981 if (SYMBOLP (charset_list))
10982 {
10983 if (EQ (charset_list, Qiso_2022))
10984 {
10985 if (! EQ (coding_type, Qiso_2022))
10986 error ("Invalid charset-list");
10987 charset_list = Viso_2022_charset_list;
10988 }
10989 else if (EQ (charset_list, Qemacs_mule))
10990 {
10991 if (! EQ (coding_type, Qemacs_mule))
10992 error ("Invalid charset-list");
10993 charset_list = Vemacs_mule_charset_list;
10994 }
10995 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
10996 {
10997 if (! RANGED_FIXNUMP (0, XCAR (tail), INT_MAX - 1))
10998 error ("Invalid charset-list");
10999 if (max_charset_id < XFIXNAT (XCAR (tail)))
11000 max_charset_id = XFIXNAT (XCAR (tail));
11001 }
11002 }
11003 else
11004 {
11005 charset_list = Fcopy_sequence (charset_list);
11006 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
11007 {
11008 struct charset *charset;
11009
11010 val = XCAR (tail);
11011 CHECK_CHARSET_GET_CHARSET (val, charset);
11012 if (EQ (coding_type, Qiso_2022)
11013 ? CHARSET_ISO_FINAL (charset) < 0
11014 : EQ (coding_type, Qemacs_mule)
11015 ? CHARSET_EMACS_MULE_ID (charset) < 0
11016 : 0)
11017 error ("Can't handle charset `%s'",
11018 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11019
11020 XSETCAR (tail, make_fixnum (charset->id));
11021 if (max_charset_id < charset->id)
11022 max_charset_id = charset->id;
11023 }
11024 }
11025 ASET (attrs, coding_attr_charset_list, charset_list);
11026
11027 Lisp_Object safe_charsets = make_uninit_string (max_charset_id + 1);
11028 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
11029 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
11030 SSET (safe_charsets, XFIXNAT (XCAR (tail)), 0);
11031 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
11032
11033 ASET (attrs, coding_attr_ascii_compat, args[coding_arg_ascii_compatible_p]);
11034
11035 val = args[coding_arg_decode_translation_table];
11036 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11037 CHECK_SYMBOL (val);
11038 ASET (attrs, coding_attr_decode_tbl, val);
11039
11040 val = args[coding_arg_encode_translation_table];
11041 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11042 CHECK_SYMBOL (val);
11043 ASET (attrs, coding_attr_encode_tbl, val);
11044
11045 val = args[coding_arg_post_read_conversion];
11046 CHECK_SYMBOL (val);
11047 ASET (attrs, coding_attr_post_read, val);
11048
11049 val = args[coding_arg_pre_write_conversion];
11050 CHECK_SYMBOL (val);
11051 ASET (attrs, coding_attr_pre_write, val);
11052
11053 val = args[coding_arg_default_char];
11054 if (NILP (val))
11055 ASET (attrs, coding_attr_default_char, make_fixnum (' '));
11056 else
11057 {
11058 CHECK_CHARACTER (val);
11059 ASET (attrs, coding_attr_default_char, val);
11060 }
11061
11062 val = args[coding_arg_for_unibyte];
11063 ASET (attrs, coding_attr_for_unibyte, NILP (val) ? Qnil : Qt);
11064
11065 val = args[coding_arg_plist];
11066 CHECK_LIST (val);
11067 ASET (attrs, coding_attr_plist, val);
11068
11069 if (EQ (coding_type, Qcharset))
11070 {
11071
11072
11073
11074
11075
11076
11077
11078
11079
11080
11081
11082
11083 val = make_nil_vector (256);
11084
11085 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
11086 {
11087 struct charset *charset = CHARSET_FROM_ID (XFIXNAT (XCAR (tail)));
11088 int dim = CHARSET_DIMENSION (charset);
11089 int idx = (dim - 1) * 4;
11090
11091 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11092 ASET (attrs, coding_attr_ascii_compat, Qt);
11093
11094 for (int i = charset->code_space[idx];
11095 i <= charset->code_space[idx + 1]; i++)
11096 {
11097 Lisp_Object tmp, tmp2;
11098 int dim2;
11099
11100 tmp = AREF (val, i);
11101 if (NILP (tmp))
11102 tmp = XCAR (tail);
11103 else if (FIXNATP (tmp))
11104 {
11105 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFIXNAT (tmp)));
11106 if (dim < dim2)
11107 tmp = list2 (XCAR (tail), tmp);
11108 else
11109 tmp = list2 (tmp, XCAR (tail));
11110 }
11111 else
11112 {
11113 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
11114 {
11115 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFIXNAT (XCAR (tmp2))));
11116 if (dim < dim2)
11117 break;
11118 }
11119 if (NILP (tmp2))
11120 tmp = nconc2 (tmp, list1 (XCAR (tail)));
11121 else
11122 {
11123 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
11124 XSETCAR (tmp2, XCAR (tail));
11125 }
11126 }
11127 ASET (val, i, tmp);
11128 }
11129 }
11130 ASET (attrs, coding_attr_charset_valids, val);
11131 category = coding_category_charset;
11132 }
11133 else if (EQ (coding_type, Qccl))
11134 {
11135 Lisp_Object valids;
11136
11137 if (nargs < coding_arg_ccl_max)
11138 goto short_args;
11139
11140 val = args[coding_arg_ccl_decoder];
11141 CHECK_CCL_PROGRAM (val);
11142 if (VECTORP (val))
11143 val = Fcopy_sequence (val);
11144 ASET (attrs, coding_attr_ccl_decoder, val);
11145
11146 val = args[coding_arg_ccl_encoder];
11147 CHECK_CCL_PROGRAM (val);
11148 if (VECTORP (val))
11149 val = Fcopy_sequence (val);
11150 ASET (attrs, coding_attr_ccl_encoder, val);
11151
11152 val = args[coding_arg_ccl_valids];
11153 valids = Fmake_string (make_fixnum (256), make_fixnum (0), Qnil);
11154 for (Lisp_Object tail = val; CONSP (tail); tail = XCDR (tail))
11155 {
11156 int from, to;
11157
11158 val = XCAR (tail);
11159 if (FIXNUMP (val))
11160 {
11161 if (! (0 <= XFIXNUM (val) && XFIXNUM (val) <= 255))
11162 args_out_of_range_3 (val, make_fixnum (0), make_fixnum (255));
11163 from = to = XFIXNUM (val);
11164 }
11165 else
11166 {
11167 CHECK_CONS (val);
11168 from = check_integer_range (XCAR (val), 0, 255);
11169 to = check_integer_range (XCDR (val), from, 255);
11170 }
11171 for (int i = from; i <= to; i++)
11172 SSET (valids, i, 1);
11173 }
11174 ASET (attrs, coding_attr_ccl_valids, valids);
11175
11176 category = coding_category_ccl;
11177 }
11178 else if (EQ (coding_type, Qutf_16))
11179 {
11180 Lisp_Object bom, endian;
11181
11182 ASET (attrs, coding_attr_ascii_compat, Qnil);
11183
11184 if (nargs < coding_arg_utf16_max)
11185 goto short_args;
11186
11187 bom = args[coding_arg_utf16_bom];
11188 if (! NILP (bom) && ! EQ (bom, Qt))
11189 {
11190 CHECK_CONS (bom);
11191 val = XCAR (bom);
11192 CHECK_CODING_SYSTEM (val);
11193 val = XCDR (bom);
11194 CHECK_CODING_SYSTEM (val);
11195 }
11196 ASET (attrs, coding_attr_utf_bom, bom);
11197
11198 endian = args[coding_arg_utf16_endian];
11199 CHECK_SYMBOL (endian);
11200 if (NILP (endian))
11201 endian = Qbig;
11202 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
11203 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
11204 ASET (attrs, coding_attr_utf_16_endian, endian);
11205
11206 category = (CONSP (bom)
11207 ? coding_category_utf_16_auto
11208 : NILP (bom)
11209 ? (EQ (endian, Qbig)
11210 ? coding_category_utf_16_be_nosig
11211 : coding_category_utf_16_le_nosig)
11212 : (EQ (endian, Qbig)
11213 ? coding_category_utf_16_be
11214 : coding_category_utf_16_le));
11215 }
11216 else if (EQ (coding_type, Qiso_2022))
11217 {
11218 Lisp_Object initial, reg_usage, request, flags;
11219
11220 if (nargs < coding_arg_iso2022_max)
11221 goto short_args;
11222
11223 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
11224 CHECK_VECTOR (initial);
11225 for (int i = 0; i < 4; i++)
11226 {
11227 val = AREF (initial, i);
11228 if (! NILP (val))
11229 {
11230 struct charset *charset;
11231
11232 CHECK_CHARSET_GET_CHARSET (val, charset);
11233 ASET (initial, i, make_fixnum (CHARSET_ID (charset)));
11234 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
11235 ASET (attrs, coding_attr_ascii_compat, Qt);
11236 }
11237 else
11238 ASET (initial, i, make_fixnum (-1));
11239 }
11240
11241 reg_usage = args[coding_arg_iso2022_reg_usage];
11242 CHECK_CONS (reg_usage);
11243 CHECK_FIXNUM (XCAR (reg_usage));
11244 CHECK_FIXNUM (XCDR (reg_usage));
11245
11246 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
11247 for (Lisp_Object tail = request; CONSP (tail); tail = XCDR (tail))
11248 {
11249 int id;
11250
11251 val = XCAR (tail);
11252 CHECK_CONS (val);
11253 CHECK_CHARSET_GET_ID (XCAR (val), id);
11254 check_integer_range (XCDR (val), 0, 3);
11255 XSETCAR (val, make_fixnum (id));
11256 }
11257
11258 flags = args[coding_arg_iso2022_flags];
11259 CHECK_FIXNAT (flags);
11260 int i = XFIXNUM (flags) & INT_MAX;
11261 if (EQ (args[coding_arg_charset_list], Qiso_2022))
11262 i |= CODING_ISO_FLAG_FULL_SUPPORT;
11263 flags = make_fixnum (i);
11264
11265 ASET (attrs, coding_attr_iso_initial, initial);
11266 ASET (attrs, coding_attr_iso_usage, reg_usage);
11267 ASET (attrs, coding_attr_iso_request, request);
11268 ASET (attrs, coding_attr_iso_flags, flags);
11269 setup_iso_safe_charsets (attrs);
11270
11271 if (i & CODING_ISO_FLAG_SEVEN_BITS)
11272 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
11273 | CODING_ISO_FLAG_SINGLE_SHIFT))
11274 ? coding_category_iso_7_else
11275 : EQ (args[coding_arg_charset_list], Qiso_2022)
11276 ? coding_category_iso_7
11277 : coding_category_iso_7_tight);
11278 else
11279 {
11280 int id = XFIXNUM (AREF (initial, 1));
11281
11282 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
11283 || EQ (args[coding_arg_charset_list], Qiso_2022)
11284 || id < 0)
11285 ? coding_category_iso_8_else
11286 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
11287 ? coding_category_iso_8_1
11288 : coding_category_iso_8_2);
11289 }
11290 if (category != coding_category_iso_8_1
11291 && category != coding_category_iso_8_2)
11292 ASET (attrs, coding_attr_ascii_compat, Qnil);
11293 }
11294 else if (EQ (coding_type, Qemacs_mule))
11295 {
11296 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
11297 ASET (attrs, coding_attr_emacs_mule_full, Qt);
11298 ASET (attrs, coding_attr_ascii_compat, Qt);
11299 category = coding_category_emacs_mule;
11300 }
11301 else if (EQ (coding_type, Qshift_jis))
11302 {
11303 ptrdiff_t charset_list_len = list_length (charset_list);
11304 if (charset_list_len != 3 && charset_list_len != 4)
11305 error ("There should be three or four charsets");
11306
11307 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11308 if (CHARSET_DIMENSION (charset) != 1)
11309 error ("Dimension of charset %s is not one",
11310 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11311 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11312 ASET (attrs, coding_attr_ascii_compat, Qt);
11313
11314 charset_list = XCDR (charset_list);
11315 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11316 if (CHARSET_DIMENSION (charset) != 1)
11317 error ("Dimension of charset %s is not one",
11318 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11319
11320 charset_list = XCDR (charset_list);
11321 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11322 if (CHARSET_DIMENSION (charset) != 2)
11323 error ("Dimension of charset %s is not two",
11324 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11325
11326 charset_list = XCDR (charset_list);
11327 if (! NILP (charset_list))
11328 {
11329 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11330 if (CHARSET_DIMENSION (charset) != 2)
11331 error ("Dimension of charset %s is not two",
11332 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11333 }
11334
11335 category = coding_category_sjis;
11336 Vsjis_coding_system = name;
11337 }
11338 else if (EQ (coding_type, Qbig5))
11339 {
11340 struct charset *charset;
11341
11342 if (list_length (charset_list) != 2)
11343 error ("There should be just two charsets");
11344
11345 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11346 if (CHARSET_DIMENSION (charset) != 1)
11347 error ("Dimension of charset %s is not one",
11348 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11349 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11350 ASET (attrs, coding_attr_ascii_compat, Qt);
11351
11352 charset_list = XCDR (charset_list);
11353 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11354 if (CHARSET_DIMENSION (charset) != 2)
11355 error ("Dimension of charset %s is not two",
11356 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11357
11358 category = coding_category_big5;
11359 Vbig5_coding_system = name;
11360 }
11361 else if (EQ (coding_type, Qraw_text))
11362 {
11363 category = coding_category_raw_text;
11364 ASET (attrs, coding_attr_ascii_compat, Qt);
11365 }
11366 else if (EQ (coding_type, Qutf_8))
11367 {
11368 Lisp_Object bom;
11369
11370 if (nargs < coding_arg_utf8_max)
11371 goto short_args;
11372
11373 bom = args[coding_arg_utf8_bom];
11374 if (! NILP (bom) && ! EQ (bom, Qt))
11375 {
11376 CHECK_CONS (bom);
11377 val = XCAR (bom);
11378 CHECK_CODING_SYSTEM (val);
11379 val = XCDR (bom);
11380 CHECK_CODING_SYSTEM (val);
11381 }
11382 ASET (attrs, coding_attr_utf_bom, bom);
11383 if (NILP (bom))
11384 ASET (attrs, coding_attr_ascii_compat, Qt);
11385
11386 category = (CONSP (bom) ? coding_category_utf_8_auto
11387 : NILP (bom) ? coding_category_utf_8_nosig
11388 : coding_category_utf_8_sig);
11389 }
11390 else if (EQ (coding_type, Qundecided))
11391 {
11392 if (nargs < coding_arg_undecided_max)
11393 goto short_args;
11394 ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
11395 args[coding_arg_undecided_inhibit_null_byte_detection]);
11396 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
11397 args[coding_arg_undecided_inhibit_iso_escape_detection]);
11398 ASET (attrs, coding_attr_undecided_prefer_utf_8,
11399 args[coding_arg_undecided_prefer_utf_8]);
11400 category = coding_category_undecided;
11401 }
11402 else
11403 error ("Invalid coding system type: %s",
11404 SDATA (SYMBOL_NAME (coding_type)));
11405
11406 ASET (attrs, coding_attr_category, make_fixnum (category));
11407 ASET (attrs, coding_attr_plist,
11408 Fcons (QCcategory,
11409 Fcons (AREF (Vcoding_category_table, category),
11410 CODING_ATTR_PLIST (attrs))));
11411 ASET (attrs, coding_attr_plist,
11412 Fcons (QCascii_compatible_p,
11413 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
11414 CODING_ATTR_PLIST (attrs))));
11415
11416 Lisp_Object eol_type = args[coding_arg_eol_type];
11417 if (! NILP (eol_type)
11418 && ! EQ (eol_type, Qunix)
11419 && ! EQ (eol_type, Qdos)
11420 && ! EQ (eol_type, Qmac))
11421 error ("Invalid eol-type");
11422
11423 Lisp_Object aliases = list1 (name);
11424
11425 if (NILP (eol_type))
11426 {
11427 eol_type = make_subsidiaries (name);
11428 for (int i = 0; i < 3; i++)
11429 {
11430 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
11431
11432 this_name = AREF (eol_type, i);
11433 this_aliases = list1 (this_name);
11434 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
11435 this_spec = make_uninit_vector (3);
11436 ASET (this_spec, 0, attrs);
11437 ASET (this_spec, 1, this_aliases);
11438 ASET (this_spec, 2, this_eol_type);
11439 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
11440 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
11441 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist, Qnil);
11442 if (NILP (val))
11443 Vcoding_system_alist
11444 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
11445 Vcoding_system_alist);
11446 }
11447 }
11448
11449 Lisp_Object spec_vec = make_uninit_vector (3);
11450 ASET (spec_vec, 0, attrs);
11451 ASET (spec_vec, 1, aliases);
11452 ASET (spec_vec, 2, eol_type);
11453
11454 Fputhash (name, spec_vec, Vcoding_system_hash_table);
11455 Vcoding_system_list = Fcons (name, Vcoding_system_list);
11456 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist, Qnil);
11457 if (NILP (val))
11458 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
11459 Vcoding_system_alist);
11460
11461 int id = coding_categories[category].id;
11462 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
11463 setup_coding_system (name, &coding_categories[category]);
11464
11465 return Qnil;
11466
11467 short_args:
11468 Fsignal (Qwrong_number_of_arguments,
11469 Fcons (intern ("define-coding-system-internal"),
11470 make_fixnum (nargs)));
11471 }
11472
11473
11474 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
11475 3, 3, 0,
11476 doc:
11477
11478
11479
11480
11481
11482
11483
11484
11485
11486
11487 )
11488 (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
11489 {
11490 Lisp_Object spec, attrs;
11491
11492 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11493 attrs = AREF (spec, 0);
11494 if (EQ (prop, QCmnemonic))
11495 {
11496
11497 if (STRINGP (val))
11498 val = make_fixnum (STRING_CHAR (SDATA (val)));
11499 else
11500 CHECK_CHARACTER (val);
11501 ASET (attrs, coding_attr_mnemonic, val);
11502 }
11503 else if (EQ (prop, QCdefault_char))
11504 {
11505 if (NILP (val))
11506 val = make_fixnum (' ');
11507 else
11508 CHECK_CHARACTER (val);
11509 ASET (attrs, coding_attr_default_char, val);
11510 }
11511 else if (EQ (prop, QCdecode_translation_table))
11512 {
11513 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11514 CHECK_SYMBOL (val);
11515 ASET (attrs, coding_attr_decode_tbl, val);
11516 }
11517 else if (EQ (prop, QCencode_translation_table))
11518 {
11519 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11520 CHECK_SYMBOL (val);
11521 ASET (attrs, coding_attr_encode_tbl, val);
11522 }
11523 else if (EQ (prop, QCpost_read_conversion))
11524 {
11525 CHECK_SYMBOL (val);
11526 ASET (attrs, coding_attr_post_read, val);
11527 }
11528 else if (EQ (prop, QCpre_write_conversion))
11529 {
11530 CHECK_SYMBOL (val);
11531 ASET (attrs, coding_attr_pre_write, val);
11532 }
11533 else if (EQ (prop, QCascii_compatible_p))
11534 {
11535 ASET (attrs, coding_attr_ascii_compat, val);
11536 }
11537
11538 ASET (attrs, coding_attr_plist,
11539 plist_put (CODING_ATTR_PLIST (attrs), prop, val));
11540 return val;
11541 }
11542
11543
11544 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
11545 Sdefine_coding_system_alias, 2, 2, 0,
11546 doc: )
11547 (Lisp_Object alias, Lisp_Object coding_system)
11548 {
11549 Lisp_Object spec, aliases, eol_type, val;
11550
11551 CHECK_SYMBOL (alias);
11552 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11553 aliases = AREF (spec, 1);
11554
11555
11556
11557 while (!NILP (XCDR (aliases)))
11558 aliases = XCDR (aliases);
11559 XSETCDR (aliases, list1 (alias));
11560
11561 eol_type = AREF (spec, 2);
11562 if (VECTORP (eol_type))
11563 {
11564 Lisp_Object subsidiaries;
11565 int i;
11566
11567 subsidiaries = make_subsidiaries (alias);
11568 for (i = 0; i < 3; i++)
11569 Fdefine_coding_system_alias (AREF (subsidiaries, i),
11570 AREF (eol_type, i));
11571 }
11572
11573 Fputhash (alias, spec, Vcoding_system_hash_table);
11574 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
11575 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist, Qnil);
11576 if (NILP (val))
11577 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
11578 Vcoding_system_alist);
11579
11580 return Qnil;
11581 }
11582
11583 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
11584 1, 1, 0,
11585 doc:
11586 )
11587 (Lisp_Object coding_system)
11588 {
11589 Lisp_Object spec, attrs;
11590
11591 if (NILP (coding_system))
11592 return (Qno_conversion);
11593 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11594 attrs = AREF (spec, 0);
11595 return CODING_ATTR_BASE_NAME (attrs);
11596 }
11597
11598 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
11599 1, 1, 0,
11600 doc: )
11601 (Lisp_Object coding_system)
11602 {
11603 Lisp_Object spec, attrs;
11604
11605 if (NILP (coding_system))
11606 coding_system = Qno_conversion;
11607 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11608 attrs = AREF (spec, 0);
11609 return CODING_ATTR_PLIST (attrs);
11610 }
11611
11612
11613 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
11614 1, 1, 0,
11615 doc: )
11616 (Lisp_Object coding_system)
11617 {
11618 Lisp_Object spec;
11619
11620 if (NILP (coding_system))
11621 coding_system = Qno_conversion;
11622 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11623 return AREF (spec, 1);
11624 }
11625
11626 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
11627 Scoding_system_eol_type, 1, 1, 0,
11628 doc:
11629
11630
11631
11632
11633
11634
11635
11636 )
11637 (Lisp_Object coding_system)
11638 {
11639 Lisp_Object spec, eol_type;
11640 int n;
11641
11642 if (NILP (coding_system))
11643 coding_system = Qno_conversion;
11644 if (! CODING_SYSTEM_P (coding_system))
11645 return Qnil;
11646 spec = CODING_SYSTEM_SPEC (coding_system);
11647 eol_type = AREF (spec, 2);
11648 if (VECTORP (eol_type))
11649 return Fcopy_sequence (eol_type);
11650 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
11651 return make_fixnum (n);
11652 }
11653
11654
11655
11656
11657 void
11658 init_coding_once (void)
11659 {
11660 int i;
11661
11662 for (i = 0; i < coding_category_max; i++)
11663 {
11664 coding_categories[i].id = -1;
11665 coding_priorities[i] = i;
11666 }
11667
11668 PDUMPER_REMEMBER_SCALAR (coding_categories);
11669 PDUMPER_REMEMBER_SCALAR (coding_priorities);
11670
11671
11672 for (i = 0; i < 0x20; i++)
11673 iso_code_class[i] = ISO_control_0;
11674 for (i = 0x21; i < 0x7F; i++)
11675 iso_code_class[i] = ISO_graphic_plane_0;
11676 for (i = 0x80; i < 0xA0; i++)
11677 iso_code_class[i] = ISO_control_1;
11678 for (i = 0xA1; i < 0xFF; i++)
11679 iso_code_class[i] = ISO_graphic_plane_1;
11680 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
11681 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
11682 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
11683 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
11684 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
11685 iso_code_class[ISO_CODE_ESC] = ISO_escape;
11686 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
11687 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
11688 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
11689
11690 PDUMPER_REMEMBER_SCALAR (iso_code_class);
11691
11692 for (i = 0; i < 256; i++)
11693 {
11694 emacs_mule_bytes[i] = 1;
11695 }
11696 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
11697 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
11698 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
11699 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
11700
11701 PDUMPER_REMEMBER_SCALAR (emacs_mule_bytes);
11702 }
11703
11704 static void reset_coding_after_pdumper_load (void);
11705
11706 void
11707 syms_of_coding (void)
11708 {
11709 staticpro (&Vcoding_system_hash_table);
11710 Vcoding_system_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
11711
11712 staticpro (&Vsjis_coding_system);
11713 Vsjis_coding_system = Qnil;
11714
11715 staticpro (&Vbig5_coding_system);
11716 Vbig5_coding_system = Qnil;
11717
11718 staticpro (&Vcode_conversion_reused_workbuf);
11719 Vcode_conversion_reused_workbuf = Qnil;
11720
11721 staticpro (&Vcode_conversion_workbuf_name);
11722 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
11723
11724 reused_workbuf_in_use = false;
11725 PDUMPER_REMEMBER_SCALAR (reused_workbuf_in_use);
11726
11727 DEFSYM (Qcharset, "charset");
11728 DEFSYM (Qtarget_idx, "target-idx");
11729 DEFSYM (Qcoding_system_history, "coding-system-history");
11730 Fset (Qcoding_system_history, Qnil);
11731
11732
11733 Fput (Qinsert_file_contents, Qtarget_idx, make_fixnum (0));
11734
11735 Fput (Qwrite_region, Qtarget_idx, make_fixnum (2));
11736
11737 DEFSYM (Qcall_process, "call-process");
11738
11739 Fput (Qcall_process, Qtarget_idx, make_fixnum (0));
11740
11741 DEFSYM (Qcall_process_region, "call-process-region");
11742
11743 Fput (Qcall_process_region, Qtarget_idx, make_fixnum (2));
11744
11745 DEFSYM (Qstart_process, "start-process");
11746
11747 Fput (Qstart_process, Qtarget_idx, make_fixnum (2));
11748
11749 DEFSYM (Qopen_network_stream, "open-network-stream");
11750
11751 Fput (Qopen_network_stream, Qtarget_idx, make_fixnum (3));
11752
11753 DEFSYM (Qunix, "unix");
11754 DEFSYM (Qdos, "dos");
11755 DEFSYM (Qmac, "mac");
11756
11757 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
11758 DEFSYM (Qundecided, "undecided");
11759 DEFSYM (Qno_conversion, "no-conversion");
11760 DEFSYM (Qraw_text, "raw-text");
11761 DEFSYM (Qus_ascii, "us-ascii");
11762
11763 DEFSYM (Qiso_2022, "iso-2022");
11764
11765 DEFSYM (Qutf_8, "utf-8");
11766 DEFSYM (Qutf_8_unix, "utf-8-unix");
11767 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
11768
11769 #if defined (WINDOWSNT) || defined (CYGWIN) || defined HAVE_ANDROID
11770
11771 DEFSYM (Qutf_16le, "utf-16le");
11772 #endif
11773
11774 DEFSYM (Qutf_16, "utf-16");
11775 DEFSYM (Qbig, "big");
11776 DEFSYM (Qlittle, "little");
11777
11778 DEFSYM (Qshift_jis, "shift-jis");
11779 DEFSYM (Qbig5, "big5");
11780
11781 DEFSYM (Qcoding_system_p, "coding-system-p");
11782
11783
11784 DEFSYM (Qcoding_system_error, "coding-system-error");
11785 Fput (Qcoding_system_error, Qerror_conditions,
11786 pure_list (Qcoding_system_error, Qerror));
11787 Fput (Qcoding_system_error, Qerror_message,
11788 build_pure_c_string ("Invalid coding system"));
11789
11790 DEFSYM (Qtranslation_table, "translation-table");
11791 Fput (Qtranslation_table, Qchar_table_extra_slots, make_fixnum (2));
11792 DEFSYM (Qtranslation_table_id, "translation-table-id");
11793
11794
11795
11796 DEFSYM (Qemacs_mule, "emacs-mule");
11797
11798 DEFSYM (QCcategory, ":category");
11799 DEFSYM (QCmnemonic, ":mnemonic");
11800 DEFSYM (QCdefault_char, ":default-char");
11801 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
11802 DEFSYM (QCencode_translation_table, ":encode-translation-table");
11803 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
11804 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
11805 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
11806
11807 Vcoding_category_table = make_nil_vector (coding_category_max);
11808 staticpro (&Vcoding_category_table);
11809
11810 ASET (Vcoding_category_table, coding_category_iso_7,
11811 intern_c_string ("coding-category-iso-7"));
11812 ASET (Vcoding_category_table, coding_category_iso_7_tight,
11813 intern_c_string ("coding-category-iso-7-tight"));
11814 ASET (Vcoding_category_table, coding_category_iso_8_1,
11815 intern_c_string ("coding-category-iso-8-1"));
11816 ASET (Vcoding_category_table, coding_category_iso_8_2,
11817 intern_c_string ("coding-category-iso-8-2"));
11818 ASET (Vcoding_category_table, coding_category_iso_7_else,
11819 intern_c_string ("coding-category-iso-7-else"));
11820 ASET (Vcoding_category_table, coding_category_iso_8_else,
11821 intern_c_string ("coding-category-iso-8-else"));
11822 ASET (Vcoding_category_table, coding_category_utf_8_auto,
11823 intern_c_string ("coding-category-utf-8-auto"));
11824 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
11825 intern_c_string ("coding-category-utf-8"));
11826 ASET (Vcoding_category_table, coding_category_utf_8_sig,
11827 intern_c_string ("coding-category-utf-8-sig"));
11828 ASET (Vcoding_category_table, coding_category_utf_16_be,
11829 intern_c_string ("coding-category-utf-16-be"));
11830 ASET (Vcoding_category_table, coding_category_utf_16_auto,
11831 intern_c_string ("coding-category-utf-16-auto"));
11832 ASET (Vcoding_category_table, coding_category_utf_16_le,
11833 intern_c_string ("coding-category-utf-16-le"));
11834 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
11835 intern_c_string ("coding-category-utf-16-be-nosig"));
11836 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
11837 intern_c_string ("coding-category-utf-16-le-nosig"));
11838 ASET (Vcoding_category_table, coding_category_charset,
11839 intern_c_string ("coding-category-charset"));
11840 ASET (Vcoding_category_table, coding_category_sjis,
11841 intern_c_string ("coding-category-sjis"));
11842 ASET (Vcoding_category_table, coding_category_big5,
11843 intern_c_string ("coding-category-big5"));
11844 ASET (Vcoding_category_table, coding_category_ccl,
11845 intern_c_string ("coding-category-ccl"));
11846 ASET (Vcoding_category_table, coding_category_emacs_mule,
11847 intern_c_string ("coding-category-emacs-mule"));
11848
11849 ASET (Vcoding_category_table, coding_category_raw_text,
11850 intern_c_string ("coding-category-raw-text"));
11851 ASET (Vcoding_category_table, coding_category_undecided,
11852 intern_c_string ("coding-category-undecided"));
11853
11854 DEFSYM (Qinsufficient_source, "insufficient-source");
11855 DEFSYM (Qinvalid_source, "invalid-source");
11856 DEFSYM (Qinterrupted, "interrupted");
11857
11858
11859
11860 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
11861
11862 DEFSYM (Qignored, "ignored");
11863
11864 DEFSYM (Qutf_8_string_p, "utf-8-string-p");
11865 DEFSYM (Qfilenamep, "filenamep");
11866
11867 defsubr (&Scoding_system_p);
11868 defsubr (&Sread_coding_system);
11869 defsubr (&Sread_non_nil_coding_system);
11870 defsubr (&Scheck_coding_system);
11871 defsubr (&Sdetect_coding_region);
11872 defsubr (&Sdetect_coding_string);
11873 defsubr (&Sfind_coding_systems_region_internal);
11874 defsubr (&Sunencodable_char_position);
11875 defsubr (&Scheck_coding_systems_region);
11876 defsubr (&Sdecode_coding_region);
11877 defsubr (&Sencode_coding_region);
11878 defsubr (&Sdecode_coding_string);
11879 defsubr (&Sencode_coding_string);
11880 #ifdef ENABLE_UTF_8_CONVERTER_TEST
11881 defsubr (&Sinternal_encode_string_utf_8);
11882 defsubr (&Sinternal_decode_string_utf_8);
11883 #endif
11884 defsubr (&Sdecode_sjis_char);
11885 defsubr (&Sencode_sjis_char);
11886 defsubr (&Sdecode_big5_char);
11887 defsubr (&Sencode_big5_char);
11888 defsubr (&Sset_terminal_coding_system_internal);
11889 defsubr (&Sset_safe_terminal_coding_system_internal);
11890 defsubr (&Sterminal_coding_system);
11891 defsubr (&Sset_keyboard_coding_system_internal);
11892 defsubr (&Skeyboard_coding_system);
11893 defsubr (&Sfind_operation_coding_system);
11894 defsubr (&Sset_coding_system_priority);
11895 defsubr (&Sdefine_coding_system_internal);
11896 defsubr (&Sdefine_coding_system_alias);
11897 defsubr (&Scoding_system_put);
11898 defsubr (&Scoding_system_base);
11899 defsubr (&Scoding_system_plist);
11900 defsubr (&Scoding_system_aliases);
11901 defsubr (&Scoding_system_eol_type);
11902 defsubr (&Scoding_system_priority_list);
11903
11904 DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
11905 doc:
11906
11907
11908
11909 );
11910 Vcoding_system_list = Qnil;
11911
11912 DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
11913 doc:
11914
11915
11916
11917
11918 );
11919 Vcoding_system_alist = Qnil;
11920
11921 DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
11922 doc:
11923
11924
11925
11926
11927
11928
11929 );
11930 {
11931 int i;
11932
11933 Vcoding_category_list = Qnil;
11934 for (i = coding_category_max - 1; i >= 0; i--)
11935 Vcoding_category_list
11936 = Fcons (AREF (Vcoding_category_table, i),
11937 Vcoding_category_list);
11938 }
11939
11940 DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
11941 doc:
11942
11943
11944
11945
11946 );
11947 Vcoding_system_for_read = Qnil;
11948
11949 DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
11950 doc:
11951
11952
11953
11954
11955
11956
11957
11958
11959
11960 );
11961 Vcoding_system_for_write = Qnil;
11962
11963 DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
11964 doc:
11965 );
11966 Vlast_coding_system_used = Qnil;
11967
11968 DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
11969 doc:
11970
11971
11972
11973
11974
11975
11976
11977
11978
11979
11980
11981
11982 );
11983 Vlast_code_conversion_error = Qnil;
11984
11985 DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
11986 doc:
11987
11988
11989 );
11990 inhibit_eol_conversion = 0;
11991
11992 DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
11993 doc:
11994
11995
11996 );
11997 inherit_process_coding_system = 0;
11998
11999 DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
12000 doc:
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017 );
12018 Vfile_coding_system_alist = Qnil;
12019
12020 DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
12021 doc:
12022
12023
12024
12025
12026
12027
12028
12029
12030
12031
12032
12033 );
12034 Vprocess_coding_system_alist = Qnil;
12035
12036 DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
12037 doc:
12038
12039
12040
12041
12042
12043
12044
12045
12046
12047
12048
12049
12050 );
12051 Vnetwork_coding_system_alist = Qnil;
12052
12053 DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
12054 doc:
12055
12056 );
12057 Vlocale_coding_system = Qnil;
12058
12059
12060 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
12061 doc:
12062 );
12063 eol_mnemonic_unix = build_pure_c_string (":");
12064
12065 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
12066 doc:
12067 );
12068 eol_mnemonic_dos = build_pure_c_string ("\\");
12069
12070 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
12071 doc:
12072 );
12073 eol_mnemonic_mac = build_pure_c_string ("/");
12074
12075 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
12076 doc:
12077 );
12078 eol_mnemonic_undecided = build_pure_c_string (":");
12079
12080 DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
12081 doc:
12082 );
12083 Venable_character_translation = Qt;
12084
12085 DEFVAR_LISP ("standard-translation-table-for-decode",
12086 Vstandard_translation_table_for_decode,
12087 doc: );
12088 Vstandard_translation_table_for_decode = Qnil;
12089
12090 DEFVAR_LISP ("standard-translation-table-for-encode",
12091 Vstandard_translation_table_for_encode,
12092 doc: );
12093 Vstandard_translation_table_for_encode = Qnil;
12094
12095 DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
12096 doc:
12097
12098
12099 );
12100 Vcharset_revision_table = Qnil;
12101
12102 DEFVAR_LISP ("default-process-coding-system",
12103 Vdefault_process_coding_system,
12104 doc:
12105
12106 );
12107 Vdefault_process_coding_system = Qnil;
12108
12109 DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
12110 doc:
12111
12112
12113
12114
12115
12116
12117
12118 );
12119 Vlatin_extra_code_table = make_nil_vector (256);
12120
12121 DEFVAR_LISP ("select-safe-coding-system-function",
12122 Vselect_safe_coding_system_function,
12123 doc:
12124
12125
12126
12127
12128
12129
12130
12131
12132 );
12133 Vselect_safe_coding_system_function = Qnil;
12134
12135 DEFVAR_BOOL ("coding-system-require-warning",
12136 coding_system_require_warning,
12137 doc:
12138
12139
12140 );
12141 coding_system_require_warning = 0;
12142
12143
12144 DEFVAR_BOOL ("inhibit-iso-escape-detection",
12145 inhibit_iso_escape_detection,
12146 doc:
12147
12148
12149
12150
12151
12152
12153
12154
12155
12156
12157
12158
12159
12160
12161
12162
12163
12164
12165
12166
12167
12168
12169 );
12170 inhibit_iso_escape_detection = 0;
12171
12172 DEFVAR_BOOL ("inhibit-null-byte-detection",
12173 inhibit_null_byte_detection,
12174 doc:
12175
12176
12177
12178
12179
12180
12181
12182 );
12183 inhibit_null_byte_detection = 0;
12184
12185 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
12186 doc:
12187 );
12188 disable_ascii_optimization = 0;
12189
12190 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
12191 doc:
12192
12193
12194
12195
12196
12197 );
12198 Vtranslation_table_for_input = Qnil;
12199
12200 Lisp_Object args[coding_arg_undecided_max];
12201 memclear (args, sizeof args);
12202
12203 Lisp_Object plist[] =
12204 {
12205 QCname,
12206 args[coding_arg_name] = Qno_conversion,
12207 QCmnemonic,
12208 args[coding_arg_mnemonic] = make_fixnum ('='),
12209 intern_c_string (":coding-type"),
12210 args[coding_arg_coding_type] = Qraw_text,
12211 QCascii_compatible_p,
12212 args[coding_arg_ascii_compatible_p] = Qt,
12213 QCdefault_char,
12214 args[coding_arg_default_char] = make_fixnum (0),
12215 intern_c_string (":for-unibyte"),
12216 args[coding_arg_for_unibyte] = Qt,
12217 intern_c_string (":docstring"),
12218 (build_pure_c_string
12219 ("Do no conversion.\n"
12220 "\n"
12221 "When you visit a file with this coding, the file is read into a\n"
12222 "unibyte buffer as is, thus each byte of a file is treated as a\n"
12223 "character.")),
12224 intern_c_string (":eol-type"),
12225 args[coding_arg_eol_type] = Qunix,
12226 };
12227 args[coding_arg_plist] = CALLMANY (Flist, plist);
12228 Fdefine_coding_system_internal (coding_arg_max, args);
12229
12230 plist[1] = args[coding_arg_name] = Qundecided;
12231 plist[3] = args[coding_arg_mnemonic] = make_fixnum ('-');
12232 plist[5] = args[coding_arg_coding_type] = Qundecided;
12233
12234
12235 plist[8] = intern_c_string (":charset-list");
12236 plist[9] = args[coding_arg_charset_list] = list1 (Qascii);
12237 plist[11] = args[coding_arg_for_unibyte] = Qnil;
12238 plist[13] = build_pure_c_string ("No conversion on encoding, "
12239 "automatic conversion on decoding.");
12240 plist[15] = args[coding_arg_eol_type] = Qnil;
12241 args[coding_arg_plist] = CALLMANY (Flist, plist);
12242 args[coding_arg_undecided_inhibit_null_byte_detection] = make_fixnum (0);
12243 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_fixnum (0);
12244 Fdefine_coding_system_internal (coding_arg_undecided_max, args);
12245
12246 setup_coding_system (Qno_conversion, &safe_terminal_coding);
12247
12248 for (int i = 0; i < coding_category_max; i++)
12249 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
12250
12251 pdumper_do_now_and_after_load (reset_coding_after_pdumper_load);
12252 }
12253
12254 static void
12255 reset_coding_after_pdumper_load (void)
12256 {
12257 if (!dumped_with_pdumper_p ())
12258 return;
12259 for (struct coding_system *this = &coding_categories[0];
12260 this < &coding_categories[coding_category_max];
12261 ++this)
12262 {
12263 int id = this->id;
12264 if (id >= 0)
12265 {
12266
12267
12268
12269 memset (this, 0, sizeof (*this));
12270 setup_coding_system (CODING_ID_NAME (id), this);
12271 }
12272 }
12273
12274
12275
12276
12277 Fset_safe_terminal_coding_system_internal (Qus_ascii);
12278 }