This source file includes following definitions.
- detect_coding_XXX
- decode_coding_XXXX
- encode_coding_XXX
- encode_inhibit_flag
- inhibit_flag
- growable_destination
- record_conversion_result
- coding_set_source
- coding_change_source
- coding_set_destination
- coding_change_destination
- coding_alloc_by_realloc
- coding_alloc_by_making_gap
- alloc_destination
- detect_coding_utf_8
- decode_coding_utf_8
- encode_coding_utf_8
- detect_coding_utf_16
- decode_coding_utf_16
- encode_coding_utf_16
- detect_coding_emacs_mule
- emacs_mule_char
- emacs_mule_finish_composition
- decode_coding_emacs_mule
- encode_coding_emacs_mule
- setup_iso_safe_charsets
- detect_coding_iso_2022
- finish_composition
- decode_coding_iso_2022
- encode_invocation_designation
- encode_designation_at_bol
- encode_coding_iso_2022
- detect_coding_sjis
- detect_coding_big5
- decode_coding_sjis
- decode_coding_big5
- encode_coding_sjis
- encode_coding_big5
- detect_coding_ccl
- decode_coding_ccl
- encode_coding_ccl
- decode_coding_raw_text
- encode_coding_raw_text
- detect_coding_charset
- decode_coding_charset
- encode_coding_charset
- setup_coding_system
- coding_charset_list
- coding_system_charset_list
- raw_text_coding_system
- raw_text_coding_system_p
- coding_inherit_eol_type
- complement_process_encoding_system
- check_ascii
- check_utf_8
- utf8_string_p
- make_string_from_utf8
- detect_eol
- adjust_coding_eol_type
- detect_coding
- decode_eol
- get_translation_table
- get_translation
- produce_chars
- produce_composition
- produce_charset
- produce_annotation
- decode_coding
- handle_composition_annotation
- handle_charset_annotation
- consume_chars
- encode_coding
- code_conversion_restore
- code_conversion_save
- coding_restore_undo_list
- decode_coding_gap
- decode_coding_object
- encode_coding_object
- preferred_coding_system
- from_unicode
- from_unicode_buffer
- to_unicode
- DEFUN
- DEFUN
- DEFUN
- detect_coding_system
- char_encodable_p
- code_convert_region
- string_ascii_p
- code_convert_string
- code_convert_string_norecord
- get_buffer_gap_address
- get_char_bytes
- encode_string_utf_8
- decode_string_utf_8
- convert_string_nocopy
- decode_file_name
- encode_file_name_1
- encode_file_name
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- make_subsidiaries
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- init_coding_once
- syms_of_coding
- reset_coding_after_pdumper_load
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154 #if 0
155 static bool
156 detect_coding_XXX (struct coding_system *coding,
157 struct coding_detection_info *detect_info)
158 {
159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes;
161 bool multibytep = coding->src_multibyte;
162 ptrdiff_t consumed_chars = 0;
163 int found = 0;
164 ...;
165
166 while (1)
167 {
168
169
170 ONE_MORE_BYTE (c);
171
172 if (! __C_conforms_to_XXX___ (c))
173 break;
174 if (! __C_strongly_suggests_XXX__ (c))
175 found = CATEGORY_MASK_XXX;
176 }
177
178 detect_info->rejected |= CATEGORY_MASK_XXX;
179 return 0;
180
181 no_more_source:
182
183 detect_info->found |= found;
184 return 1;
185 }
186 #endif
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202 #if 0
203 static void
204 decode_coding_XXXX (struct coding_system *coding)
205 {
206 const unsigned char *src = coding->source + coding->consumed;
207 const unsigned char *src_end = coding->source + coding->src_bytes;
208
209
210
211 const unsigned char *src_base;
212
213 int *charbuf = coding->charbuf + coding->charbuf_used;
214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
215 bool multibytep = coding->src_multibyte;
216
217 while (1)
218 {
219 src_base = src;
220 if (charbuf < charbuf_end)
221
222 break;
223 ONE_MORE_BYTE (c);
224
225 }
226
227 no_more_source:
228 if (src_base < src_end
229 && coding->mode & CODING_MODE_LAST_BLOCK)
230
231
232 while (src_base < src_end && charbuf < charbuf_end)
233 *charbuf++ = *src_base++;
234
235
236 coding->consumed = coding->consumed_char = src_base - coding->source;
237
238 coding->charbuf_used = charbuf - coding->charbuf;
239 }
240 #endif
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 #if 0
260 static void
261 encode_coding_XXX (struct coding_system *coding)
262 {
263 bool multibytep = coding->dst_multibyte;
264 int *charbuf = coding->charbuf;
265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
266 unsigned char *dst = coding->destination + coding->produced;
267 unsigned char *dst_end = coding->destination + coding->dst_bytes;
268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
269 ptrdiff_t produced_chars = 0;
270
271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
272 {
273 int c = *charbuf;
274
275 }
276 label_no_more_destination:
277
278 coding->produced_char += produced_chars;
279 coding->produced = dst - coding->destination;
280 }
281 #endif
282
283
284
285
286 #include <config.h>
287
288 #ifdef HAVE_WCHAR_H
289 #include <wchar.h>
290 #endif
291
292 #include "lisp.h"
293 #include "character.h"
294 #include "buffer.h"
295 #include "charset.h"
296 #include "ccl.h"
297 #include "composite.h"
298 #include "coding.h"
299 #include "termhooks.h"
300 #include "pdumper.h"
301
302 Lisp_Object Vcoding_system_hash_table;
303
304
305
306
307
308 struct coding_system safe_terminal_coding;
309
310
311 static Lisp_Object Vsjis_coding_system;
312 static Lisp_Object Vbig5_coding_system;
313
314
315
316 #define CODING_ISO_INITIAL(coding, reg) \
317 (XFIXNUM (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
318 coding_attr_iso_initial), \
319 reg)))
320
321
322 #define CODING_ISO_REQUEST(coding, charset_id) \
323 (((charset_id) <= (coding)->max_charset_id \
324 ? ((coding)->safe_charsets[charset_id] != 255 \
325 ? (coding)->safe_charsets[charset_id] \
326 : -1) \
327 : -1))
328
329
330 #define CODING_ISO_FLAGS(coding) \
331 ((coding)->spec.iso_2022.flags)
332 #define CODING_ISO_DESIGNATION(coding, reg) \
333 ((coding)->spec.iso_2022.current_designation[reg])
334 #define CODING_ISO_INVOCATION(coding, plane) \
335 ((coding)->spec.iso_2022.current_invocation[plane])
336 #define CODING_ISO_SINGLE_SHIFTING(coding) \
337 ((coding)->spec.iso_2022.single_shifting)
338 #define CODING_ISO_BOL(coding) \
339 ((coding)->spec.iso_2022.bol)
340 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
341 (CODING_ISO_INVOCATION (coding, plane) < 0 ? -1 \
342 : CODING_ISO_DESIGNATION (coding, CODING_ISO_INVOCATION (coding, plane)))
343 #define CODING_ISO_CMP_STATUS(coding) \
344 (&(coding)->spec.iso_2022.cmp_status)
345 #define CODING_ISO_EXTSEGMENT_LEN(coding) \
346 ((coding)->spec.iso_2022.ctext_extended_segment_len)
347 #define CODING_ISO_EMBEDDED_UTF_8(coding) \
348 ((coding)->spec.iso_2022.embedded_utf_8)
349
350
351
352 #define ISO_CODE_SO 0x0E
353 #define ISO_CODE_SI 0x0F
354 #define ISO_CODE_SS2_7 0x19
355 #define ISO_CODE_ESC 0x1B
356 #define ISO_CODE_SS2 0x8E
357 #define ISO_CODE_SS3 0x8F
358 #define ISO_CODE_CSI 0x9B
359
360
361
362 enum iso_code_class_type
363 {
364 ISO_control_0,
365
366
367 ISO_shift_out,
368 ISO_shift_in,
369 ISO_single_shift_2_7,
370 ISO_escape,
371 ISO_control_1,
372
373
374 ISO_single_shift_2,
375 ISO_single_shift_3,
376 ISO_control_sequence_introducer,
377 ISO_0x20_or_0x7F,
378 ISO_graphic_plane_0,
379 ISO_0xA0_or_0xFF,
380 ISO_graphic_plane_1
381 };
382
383
384
385
386
387
388 #define CODING_ISO_FLAG_LONG_FORM 0x0001
389
390
391
392 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
393
394
395
396 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
397
398
399 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
400
401
402 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
403
404
405
406 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
407
408
409 #define CODING_ISO_FLAG_DESIGNATION 0x0040
410
411
412 #define CODING_ISO_FLAG_REVISION 0x0080
413
414
415 #define CODING_ISO_FLAG_DIRECTION 0x0100
416
417
418
419 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
420
421
422
423 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
424
425
426 #define CODING_ISO_FLAG_SAFE 0x0800
427
428
429
430 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
431
432 #define CODING_ISO_FLAG_COMPOSITION 0x2000
433
434
435
436 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
437
438 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
439
440 #define CODING_ISO_FLAG_LEVEL_4 0x20000
441
442 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
443
444
445
446 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
447
448
449 #define CODING_UTF_8_BOM(coding) \
450 ((coding)->spec.utf_8_bom)
451
452
453 #define CODING_UTF_16_BOM(coding) \
454 ((coding)->spec.utf_16.bom)
455
456 #define CODING_UTF_16_ENDIAN(coding) \
457 ((coding)->spec.utf_16.endian)
458
459 #define CODING_UTF_16_SURROGATE(coding) \
460 ((coding)->spec.utf_16.surrogate)
461
462
463
464 #define CODING_CCL_DECODER(coding) \
465 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
466 #define CODING_CCL_ENCODER(coding) \
467 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
468 #define CODING_CCL_VALIDS(coding) \
469 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
470
471
472
473 enum coding_category
474 {
475 coding_category_iso_7,
476 coding_category_iso_7_tight,
477 coding_category_iso_8_1,
478 coding_category_iso_8_2,
479 coding_category_iso_7_else,
480 coding_category_iso_8_else,
481 coding_category_utf_8_auto,
482 coding_category_utf_8_nosig,
483 coding_category_utf_8_sig,
484 coding_category_utf_16_auto,
485 coding_category_utf_16_be,
486 coding_category_utf_16_le,
487 coding_category_utf_16_be_nosig,
488 coding_category_utf_16_le_nosig,
489 coding_category_charset,
490 coding_category_sjis,
491 coding_category_big5,
492 coding_category_ccl,
493 coding_category_emacs_mule,
494
495 coding_category_raw_text,
496 coding_category_undecided,
497 coding_category_max
498 };
499
500
501 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
502 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
503 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
504 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
505 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
506 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
507 #define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
508 #define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
509 #define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
510 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
511 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
512 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
513 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
514 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
515 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
516 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
517 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
518 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
519 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
520 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
521
522
523
524 #define CATEGORY_MASK_ANY \
525 (CATEGORY_MASK_ISO_7 \
526 | CATEGORY_MASK_ISO_7_TIGHT \
527 | CATEGORY_MASK_ISO_8_1 \
528 | CATEGORY_MASK_ISO_8_2 \
529 | CATEGORY_MASK_ISO_7_ELSE \
530 | CATEGORY_MASK_ISO_8_ELSE \
531 | CATEGORY_MASK_UTF_8_AUTO \
532 | CATEGORY_MASK_UTF_8_NOSIG \
533 | CATEGORY_MASK_UTF_8_SIG \
534 | CATEGORY_MASK_UTF_16_AUTO \
535 | CATEGORY_MASK_UTF_16_BE \
536 | CATEGORY_MASK_UTF_16_LE \
537 | CATEGORY_MASK_UTF_16_BE_NOSIG \
538 | CATEGORY_MASK_UTF_16_LE_NOSIG \
539 | CATEGORY_MASK_CHARSET \
540 | CATEGORY_MASK_SJIS \
541 | CATEGORY_MASK_BIG5 \
542 | CATEGORY_MASK_CCL \
543 | CATEGORY_MASK_EMACS_MULE)
544
545
546 #define CATEGORY_MASK_ISO_7BIT \
547 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
548
549 #define CATEGORY_MASK_ISO_8BIT \
550 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
551
552 #define CATEGORY_MASK_ISO_ELSE \
553 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
554
555 #define CATEGORY_MASK_ISO_ESCAPE \
556 (CATEGORY_MASK_ISO_7 \
557 | CATEGORY_MASK_ISO_7_TIGHT \
558 | CATEGORY_MASK_ISO_7_ELSE \
559 | CATEGORY_MASK_ISO_8_ELSE)
560
561 #define CATEGORY_MASK_ISO \
562 ( CATEGORY_MASK_ISO_7BIT \
563 | CATEGORY_MASK_ISO_8BIT \
564 | CATEGORY_MASK_ISO_ELSE)
565
566 #define CATEGORY_MASK_UTF_16 \
567 (CATEGORY_MASK_UTF_16_AUTO \
568 | CATEGORY_MASK_UTF_16_BE \
569 | CATEGORY_MASK_UTF_16_LE \
570 | CATEGORY_MASK_UTF_16_BE_NOSIG \
571 | CATEGORY_MASK_UTF_16_LE_NOSIG)
572
573 #define CATEGORY_MASK_UTF_8 \
574 (CATEGORY_MASK_UTF_8_AUTO \
575 | CATEGORY_MASK_UTF_8_NOSIG \
576 | CATEGORY_MASK_UTF_8_SIG)
577
578
579
580 static Lisp_Object Vcoding_category_table;
581
582
583 static enum coding_category coding_priorities[coding_category_max];
584
585
586
587 static struct coding_system coding_categories[coding_category_max];
588
589
590
591 static int
592 encode_inhibit_flag (Lisp_Object flag)
593 {
594 return NILP (flag) ? -1 : EQ (flag, Qt);
595 }
596
597
598
599
600 static bool
601 inhibit_flag (int encoded_flag, bool var)
602 {
603 return 0 < encoded_flag + var;
604 }
605
606 #define CODING_GET_INFO(coding, attrs, charset_list) \
607 do { \
608 (attrs) = CODING_ID_ATTRS ((coding)->id); \
609 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
610 } while (false)
611
612
613
614 static bool
615 growable_destination (struct coding_system *coding)
616 {
617 return STRINGP (coding->dst_object) || BUFFERP (coding->dst_object);
618 }
619
620
621
622
623
624
625
626
627
628
629 #define ONE_MORE_BYTE(c) \
630 do { \
631 if (src == src_end) \
632 { \
633 if (src_base < src) \
634 record_conversion_result \
635 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
636 goto no_more_source; \
637 } \
638 c = *src++; \
639 if (multibytep && (c & 0x80)) \
640 { \
641 if ((c & 0xFE) == 0xC0) \
642 c = ((c & 1) << 6) | *src++; \
643 else \
644 { \
645 src--; \
646 c = - string_char_advance (&src); \
647 record_conversion_result \
648 (coding, CODING_RESULT_INVALID_SRC); \
649 } \
650 } \
651 consumed_chars++; \
652 } while (0)
653
654
655
656
657
658
659
660
661
662
663
664 #define TWO_MORE_BYTES(c1, c2) \
665 do { \
666 do { \
667 if (src == src_end) \
668 goto no_more_source; \
669 c1 = *src++; \
670 if (multibytep && (c1 & 0x80)) \
671 { \
672 if ((c1 & 0xFE) == 0xC0) \
673 c1 = ((c1 & 1) << 6) | *src++; \
674 else \
675 { \
676 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
677 c1 = -1; \
678 } \
679 } \
680 } while (c1 < 0); \
681 if (src == src_end) \
682 goto no_more_source; \
683 c2 = *src++; \
684 if (multibytep && (c2 & 0x80)) \
685 { \
686 if ((c2 & 0xFE) == 0xC0) \
687 c2 = ((c2 & 1) << 6) | *src++; \
688 else \
689 c2 = -1; \
690 } \
691 } while (0)
692
693
694
695
696
697
698
699
700
701 #define EMIT_ONE_ASCII_BYTE(c) \
702 do { \
703 produced_chars++; \
704 *dst++ = (c); \
705 } while (0)
706
707
708
709
710 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
711 do { \
712 produced_chars += 2; \
713 *dst++ = (c1), *dst++ = (c2); \
714 } while (0)
715
716
717
718
719
720
721
722
723 #define EMIT_ONE_BYTE(c) \
724 do { \
725 produced_chars++; \
726 if (multibytep) \
727 { \
728 unsigned ch = (c); \
729 if (ch >= 0x80) \
730 ch = BYTE8_TO_CHAR (ch); \
731 dst += CHAR_STRING (ch, dst); \
732 } \
733 else \
734 *dst++ = (c); \
735 } while (0)
736
737
738
739
740 #define EMIT_TWO_BYTES(c1, c2) \
741 do { \
742 produced_chars += 2; \
743 if (multibytep) \
744 { \
745 unsigned ch; \
746 \
747 ch = (c1); \
748 if (ch >= 0x80) \
749 ch = BYTE8_TO_CHAR (ch); \
750 dst += CHAR_STRING (ch, dst); \
751 ch = (c2); \
752 if (ch >= 0x80) \
753 ch = BYTE8_TO_CHAR (ch); \
754 dst += CHAR_STRING (ch, dst); \
755 } \
756 else \
757 { \
758 *dst++ = (c1); \
759 *dst++ = (c2); \
760 } \
761 } while (0)
762
763
764 #define EMIT_THREE_BYTES(c1, c2, c3) \
765 do { \
766 EMIT_ONE_BYTE (c1); \
767 EMIT_TWO_BYTES (c2, c3); \
768 } while (0)
769
770
771 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
772 do { \
773 EMIT_TWO_BYTES (c1, c2); \
774 EMIT_TWO_BYTES (c3, c4); \
775 } while (0)
776
777
778 static void
779 record_conversion_result (struct coding_system *coding,
780 enum coding_result_code result)
781 {
782 coding->result = result;
783 switch (result)
784 {
785 case CODING_RESULT_INSUFFICIENT_SRC:
786 Vlast_code_conversion_error = Qinsufficient_source;
787 break;
788 case CODING_RESULT_INVALID_SRC:
789 Vlast_code_conversion_error = Qinvalid_source;
790 break;
791 case CODING_RESULT_INTERRUPT:
792 Vlast_code_conversion_error = Qinterrupted;
793 break;
794 case CODING_RESULT_INSUFFICIENT_DST:
795
796
797
798 break;
799 case CODING_RESULT_SUCCESS:
800 break;
801 default:
802 Vlast_code_conversion_error = intern ("Unknown error");
803 }
804 }
805
806
807
808
809
810
811 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
812 do { \
813 ptrdiff_t offset; \
814 \
815 charset_map_loaded = 0; \
816 c = DECODE_CHAR (charset, code); \
817 if (charset_map_loaded \
818 && (offset = coding_change_source (coding))) \
819 { \
820 src += offset; \
821 src_base += offset; \
822 src_end += offset; \
823 } \
824 } while (0)
825
826 #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
827 do { \
828 ptrdiff_t offset; \
829 \
830 charset_map_loaded = 0; \
831 code = ENCODE_CHAR (charset, c); \
832 if (charset_map_loaded \
833 && (offset = coding_change_destination (coding))) \
834 { \
835 dst += offset; \
836 dst_end += offset; \
837 } \
838 } while (0)
839
840 #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
841 do { \
842 ptrdiff_t offset; \
843 \
844 charset_map_loaded = 0; \
845 charset = char_charset (c, charset_list, code_return); \
846 if (charset_map_loaded \
847 && (offset = coding_change_destination (coding))) \
848 { \
849 dst += offset; \
850 dst_end += offset; \
851 } \
852 } while (0)
853
854 #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
855 do { \
856 ptrdiff_t offset; \
857 \
858 charset_map_loaded = 0; \
859 result = CHAR_CHARSET_P (c, charset); \
860 if (charset_map_loaded \
861 && (offset = coding_change_destination (coding))) \
862 { \
863 dst += offset; \
864 dst_end += offset; \
865 } \
866 } while (0)
867
868
869
870
871
872
873
874 #define ASSURE_DESTINATION(bytes) \
875 do { \
876 if (dst + (bytes) >= dst_end) \
877 { \
878 ptrdiff_t more_bytes = charbuf_end - charbuf + (bytes); \
879 \
880 dst = alloc_destination (coding, more_bytes, dst); \
881 dst_end = coding->destination + coding->dst_bytes; \
882 } \
883 } while (0)
884
885
886
887
888
889
890
891 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) ((p) += CHAR_STRING (c, p))
892
893
894
895
896
897
898 #define STRING_CHAR_ADVANCE_NO_UNIFY(p) string_char_advance (&(p))
899
900
901
902 static void
903 coding_set_source (struct coding_system *coding)
904 {
905 if (BUFFERP (coding->src_object))
906 {
907 struct buffer *buf = XBUFFER (coding->src_object);
908
909 if (coding->src_pos < 0)
910 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
911 else
912 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
913 }
914 else if (STRINGP (coding->src_object))
915 {
916 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
917 }
918 else
919 {
920
921
922 }
923 }
924
925
926
927
928
929 static ptrdiff_t
930 coding_change_source (struct coding_system *coding)
931 {
932 const unsigned char *orig = coding->source;
933 coding_set_source (coding);
934 return coding->source - orig;
935 }
936
937
938
939
940 static void
941 coding_set_destination (struct coding_system *coding)
942 {
943 if (BUFFERP (coding->dst_object))
944 {
945 if (BUFFERP (coding->src_object) && coding->src_pos < 0)
946 {
947 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
948 coding->dst_bytes = (GAP_END_ADDR
949 - (coding->src_bytes - coding->consumed)
950 - coding->destination);
951 }
952 else
953 {
954
955
956 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
957 + coding->dst_pos_byte - BEG_BYTE);
958 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
959 - coding->destination);
960 }
961 }
962 else
963 {
964
965
966 }
967 }
968
969
970
971
972
973 static ptrdiff_t
974 coding_change_destination (struct coding_system *coding)
975 {
976 const unsigned char *orig = coding->destination;
977 coding_set_destination (coding);
978 return coding->destination - orig;
979 }
980
981
982 static void
983 coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes)
984 {
985 ptrdiff_t newbytes;
986 if (INT_ADD_WRAPV (coding->dst_bytes, bytes, &newbytes)
987 || SIZE_MAX < newbytes)
988 string_overflow ();
989 coding->destination = xrealloc (coding->destination, newbytes);
990 coding->dst_bytes = newbytes;
991 }
992
993 static void
994 coding_alloc_by_making_gap (struct coding_system *coding,
995 ptrdiff_t gap_head_used, ptrdiff_t bytes)
996 {
997 if (EQ (coding->src_object, coding->dst_object))
998 {
999
1000
1001
1002
1003 ptrdiff_t add = GAP_SIZE;
1004
1005 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1006 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1007 make_gap (bytes);
1008 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1009 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1010 }
1011 else
1012 make_gap_1 (XBUFFER (coding->dst_object), bytes);
1013 }
1014
1015
1016 static unsigned char *
1017 alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1018 unsigned char *dst)
1019 {
1020 ptrdiff_t offset = dst - coding->destination;
1021
1022 if (BUFFERP (coding->dst_object))
1023 {
1024 struct buffer *buf = XBUFFER (coding->dst_object);
1025
1026 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1027 }
1028 else
1029 coding_alloc_by_realloc (coding, nbytes);
1030 coding_set_destination (coding);
1031 dst = coding->destination + offset;
1032 return dst;
1033 }
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 #define MAX_ANNOTATION_LENGTH 5
1068
1069 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1070 do { \
1071 *(buf)++ = -(len); \
1072 *(buf)++ = (mask); \
1073 *(buf)++ = (nchars); \
1074 coding->annotated = 1; \
1075 } while (0);
1076
1077 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
1078 do { \
1079 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1080 *buf++ = nbytes; \
1081 *buf++ = method; \
1082 } while (0)
1083
1084
1085 #define ADD_CHARSET_DATA(buf, nchars, id) \
1086 do { \
1087 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1088 *buf++ = id; \
1089 } while (0)
1090
1091
1092
1093
1094 #define EOL_SEEN_NONE 0
1095 #define EOL_SEEN_LF 1
1096 #define EOL_SEEN_CR 2
1097 #define EOL_SEEN_CRLF 4
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1111 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1112 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1113 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1114 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1115 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1116
1117 #define UTF_8_BOM_1 0xEF
1118 #define UTF_8_BOM_2 0xBB
1119 #define UTF_8_BOM_3 0xBF
1120
1121
1122
1123
1124 static bool
1125 detect_coding_utf_8 (struct coding_system *coding,
1126 struct coding_detection_info *detect_info)
1127 {
1128 const unsigned char *src = coding->source, *src_base;
1129 const unsigned char *src_end = coding->source + coding->src_bytes;
1130 bool multibytep = coding->src_multibyte;
1131 ptrdiff_t consumed_chars = 0;
1132 bool bom_found = 0;
1133 ptrdiff_t nchars = coding->head_ascii;
1134
1135 detect_info->checked |= CATEGORY_MASK_UTF_8;
1136
1137 src += nchars;
1138
1139 if (src == coding->source
1140 && src + 3 < src_end
1141 && src[0] == UTF_8_BOM_1
1142 && src[1] == UTF_8_BOM_2
1143 && src[2] == UTF_8_BOM_3)
1144 {
1145 bom_found = 1;
1146 src += 3;
1147 nchars++;
1148 }
1149
1150 while (1)
1151 {
1152 int c, c1, c2, c3, c4;
1153
1154 src_base = src;
1155 ONE_MORE_BYTE (c);
1156 if (c < 0 || UTF_8_1_OCTET_P (c))
1157 {
1158 nchars++;
1159 if (c == '\r')
1160 {
1161 if (src < src_end && *src == '\n')
1162 {
1163 src++;
1164 nchars++;
1165 }
1166 }
1167 continue;
1168 }
1169 ONE_MORE_BYTE (c1);
1170 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1171 break;
1172 if (UTF_8_2_OCTET_LEADING_P (c))
1173 {
1174 nchars++;
1175 continue;
1176 }
1177 ONE_MORE_BYTE (c2);
1178 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1179 break;
1180 if (UTF_8_3_OCTET_LEADING_P (c))
1181 {
1182 nchars++;
1183 continue;
1184 }
1185 ONE_MORE_BYTE (c3);
1186 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1187 break;
1188 if (UTF_8_4_OCTET_LEADING_P (c))
1189 {
1190 nchars++;
1191 continue;
1192 }
1193 ONE_MORE_BYTE (c4);
1194 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1195 break;
1196 if (UTF_8_5_OCTET_LEADING_P (c)
1197
1198
1199 && c < MAX_MULTIBYTE_LEADING_CODE)
1200 {
1201 nchars++;
1202 continue;
1203 }
1204 break;
1205 }
1206 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1207 return 0;
1208
1209 no_more_source:
1210 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1211 {
1212 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1213 return 0;
1214 }
1215 if (bom_found)
1216 {
1217
1218 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1219 }
1220 else
1221 {
1222 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1223 if (nchars < src_end - coding->source)
1224
1225
1226 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
1227 }
1228 coding->detected_utf8_bytes = src_base - coding->source;
1229 coding->detected_utf8_chars = nchars;
1230 return 1;
1231 }
1232
1233
1234 static void
1235 decode_coding_utf_8 (struct coding_system *coding)
1236 {
1237 const unsigned char *src = coding->source + coding->consumed;
1238 const unsigned char *src_end = coding->source + coding->src_bytes;
1239 const unsigned char *src_base;
1240 int *charbuf = coding->charbuf + coding->charbuf_used;
1241 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1242 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1243 bool multibytep = coding->src_multibyte;
1244 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1245 bool eol_dos
1246 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1247 int byte_after_cr = -1;
1248
1249 if (bom != utf_without_bom)
1250 {
1251 int c1, c2, c3;
1252
1253 src_base = src;
1254 ONE_MORE_BYTE (c1);
1255 if (! UTF_8_3_OCTET_LEADING_P (c1))
1256 src = src_base;
1257 else
1258 {
1259 ONE_MORE_BYTE (c2);
1260 if (! UTF_8_EXTRA_OCTET_P (c2))
1261 src = src_base;
1262 else
1263 {
1264 ONE_MORE_BYTE (c3);
1265 if (! UTF_8_EXTRA_OCTET_P (c3))
1266 src = src_base;
1267 else
1268 {
1269 if ((c1 != UTF_8_BOM_1)
1270 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1271 src = src_base;
1272 else
1273 CODING_UTF_8_BOM (coding) = utf_without_bom;
1274 }
1275 }
1276 }
1277 }
1278 CODING_UTF_8_BOM (coding) = utf_without_bom;
1279
1280 while (1)
1281 {
1282 int c, c1, c2, c3, c4, c5;
1283
1284 src_base = src;
1285 consumed_chars_base = consumed_chars;
1286
1287 if (charbuf >= charbuf_end)
1288 {
1289 if (byte_after_cr >= 0)
1290 src_base--;
1291 break;
1292 }
1293
1294
1295 if (multibytep && ! eol_dos
1296 && charbuf < charbuf_end - 6 && src < src_end - 6)
1297 {
1298 while (charbuf < charbuf_end - 6 && src < src_end - 6)
1299 {
1300 c1 = *src;
1301 if (c1 & 0x80)
1302 break;
1303 src++;
1304 consumed_chars++;
1305 *charbuf++ = c1;
1306
1307 c1 = *src;
1308 if (c1 & 0x80)
1309 break;
1310 src++;
1311 consumed_chars++;
1312 *charbuf++ = c1;
1313
1314 c1 = *src;
1315 if (c1 & 0x80)
1316 break;
1317 src++;
1318 consumed_chars++;
1319 *charbuf++ = c1;
1320
1321 c1 = *src;
1322 if (c1 & 0x80)
1323 break;
1324 src++;
1325 consumed_chars++;
1326 *charbuf++ = c1;
1327 }
1328
1329 if (src != src_base)
1330 continue;
1331 }
1332
1333 if (byte_after_cr >= 0)
1334 c1 = byte_after_cr, byte_after_cr = -1;
1335 else
1336 ONE_MORE_BYTE (c1);
1337 if (c1 < 0)
1338 {
1339 c = - c1;
1340 }
1341 else if (UTF_8_1_OCTET_P (c1))
1342 {
1343 if (eol_dos && c1 == '\r')
1344 ONE_MORE_BYTE (byte_after_cr);
1345 c = c1;
1346 }
1347 else
1348 {
1349 ONE_MORE_BYTE (c2);
1350 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1351 goto invalid_code;
1352 if (UTF_8_2_OCTET_LEADING_P (c1))
1353 {
1354 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1355
1356
1357
1358 if (c < 128)
1359 goto invalid_code;
1360 }
1361 else
1362 {
1363 ONE_MORE_BYTE (c3);
1364 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1365 goto invalid_code;
1366 if (UTF_8_3_OCTET_LEADING_P (c1))
1367 {
1368 c = (((c1 & 0xF) << 12)
1369 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1370 if (c < 0x800
1371 || (c >= 0xd800 && c < 0xe000))
1372 goto invalid_code;
1373 }
1374 else
1375 {
1376 ONE_MORE_BYTE (c4);
1377 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1378 goto invalid_code;
1379 if (UTF_8_4_OCTET_LEADING_P (c1))
1380 {
1381 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1382 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1383 if (c < 0x10000)
1384 goto invalid_code;
1385 }
1386 else
1387 {
1388 ONE_MORE_BYTE (c5);
1389 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1390 goto invalid_code;
1391 if (UTF_8_5_OCTET_LEADING_P (c1))
1392 {
1393 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1394 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1395 | (c5 & 0x3F));
1396 if ((c > MAX_CHAR) || (c < 0x200000))
1397 goto invalid_code;
1398 }
1399 else
1400 goto invalid_code;
1401 }
1402 }
1403 }
1404 }
1405
1406 *charbuf++ = c;
1407 continue;
1408
1409 invalid_code:
1410 src = src_base;
1411 consumed_chars = consumed_chars_base;
1412 ONE_MORE_BYTE (c);
1413 *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
1414 }
1415
1416 no_more_source:
1417 coding->consumed_char += consumed_chars_base;
1418 coding->consumed = src_base - coding->source;
1419 coding->charbuf_used = charbuf - coding->charbuf;
1420 }
1421
1422
1423 bool
1424 encode_coding_utf_8 (struct coding_system *coding)
1425 {
1426 bool multibytep = coding->dst_multibyte;
1427 int *charbuf = coding->charbuf;
1428 int *charbuf_end = charbuf + coding->charbuf_used;
1429 unsigned char *dst = coding->destination + coding->produced;
1430 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1431 ptrdiff_t produced_chars = 0;
1432 int c;
1433
1434 if (CODING_UTF_8_BOM (coding) != utf_without_bom)
1435 {
1436 ASSURE_DESTINATION (3);
1437 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1438 CODING_UTF_8_BOM (coding) = utf_without_bom;
1439 }
1440
1441 if (multibytep)
1442 {
1443 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1444
1445 while (charbuf < charbuf_end)
1446 {
1447 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1448
1449 ASSURE_DESTINATION (safe_room);
1450 c = *charbuf++;
1451 if (CHAR_BYTE8_P (c))
1452 {
1453 c = CHAR_TO_BYTE8 (c);
1454 EMIT_ONE_BYTE (c);
1455 }
1456 else
1457 {
1458 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1459 for (p = str; p < pend; p++)
1460 EMIT_ONE_BYTE (*p);
1461 }
1462 }
1463 }
1464 else
1465 {
1466 int safe_room = MAX_MULTIBYTE_LENGTH;
1467
1468 while (charbuf < charbuf_end)
1469 {
1470 ASSURE_DESTINATION (safe_room);
1471 c = *charbuf++;
1472 if (CHAR_BYTE8_P (c))
1473 *dst++ = CHAR_TO_BYTE8 (c);
1474 else
1475 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1476 }
1477 produced_chars = dst - (coding->destination + coding->produced);
1478 }
1479 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1480 coding->produced_char += produced_chars;
1481 coding->produced = dst - coding->destination;
1482 return 0;
1483 }
1484
1485
1486
1487
1488
1489 static bool
1490 detect_coding_utf_16 (struct coding_system *coding,
1491 struct coding_detection_info *detect_info)
1492 {
1493 const unsigned char *src = coding->source;
1494 const unsigned char *src_end = coding->source + coding->src_bytes;
1495 bool multibytep = coding->src_multibyte;
1496 int c1, c2;
1497
1498 detect_info->checked |= CATEGORY_MASK_UTF_16;
1499 if (coding->mode & CODING_MODE_LAST_BLOCK
1500 && (coding->src_chars & 1))
1501 {
1502 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1503 return 0;
1504 }
1505
1506 TWO_MORE_BYTES (c1, c2);
1507 if ((c1 == 0xFF) && (c2 == 0xFE))
1508 {
1509 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1510 | CATEGORY_MASK_UTF_16_AUTO);
1511 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1512 | CATEGORY_MASK_UTF_16_BE_NOSIG
1513 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1514 }
1515 else if ((c1 == 0xFE) && (c2 == 0xFF))
1516 {
1517 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1518 | CATEGORY_MASK_UTF_16_AUTO);
1519 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1520 | CATEGORY_MASK_UTF_16_BE_NOSIG
1521 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1522 }
1523 else if (c2 < 0)
1524 {
1525 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1526 return 0;
1527 }
1528 else
1529 {
1530
1531
1532 unsigned char e[256], o[256];
1533 unsigned e_num = 1, o_num = 1;
1534
1535 memset (e, 0, 256);
1536 memset (o, 0, 256);
1537 e[c1] = 1;
1538 o[c2] = 1;
1539
1540 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1541 |CATEGORY_MASK_UTF_16_BE
1542 | CATEGORY_MASK_UTF_16_LE);
1543
1544 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1545 != CATEGORY_MASK_UTF_16)
1546 {
1547 TWO_MORE_BYTES (c1, c2);
1548 if (c2 < 0)
1549 break;
1550 if (! e[c1])
1551 {
1552 e[c1] = 1;
1553 e_num++;
1554 if (e_num >= 128)
1555 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1556 }
1557 if (! o[c2])
1558 {
1559 o[c2] = 1;
1560 o_num++;
1561 if (o_num >= 128)
1562 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1563 }
1564 }
1565 return 0;
1566 }
1567
1568 no_more_source:
1569 return 1;
1570 }
1571
1572 static void
1573 decode_coding_utf_16 (struct coding_system *coding)
1574 {
1575 const unsigned char *src = coding->source + coding->consumed;
1576 const unsigned char *src_end = coding->source + coding->src_bytes;
1577 const unsigned char *src_base;
1578 int *charbuf = coding->charbuf + coding->charbuf_used;
1579
1580 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1581 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1582 bool multibytep = coding->src_multibyte;
1583 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1584 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1585 int surrogate = CODING_UTF_16_SURROGATE (coding);
1586 bool eol_dos
1587 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1588 int byte_after_cr1 = -1, byte_after_cr2 = -1;
1589
1590 if (bom == utf_with_bom)
1591 {
1592 int c, c1, c2;
1593
1594 src_base = src;
1595 ONE_MORE_BYTE (c1);
1596 ONE_MORE_BYTE (c2);
1597 c = (c1 << 8) | c2;
1598
1599 if (endian == utf_16_big_endian
1600 ? c != 0xFEFF : c != 0xFFFE)
1601 {
1602
1603
1604 src = src_base;
1605 }
1606 CODING_UTF_16_BOM (coding) = utf_without_bom;
1607 }
1608 else if (bom == utf_detect_bom)
1609 {
1610
1611
1612 CODING_UTF_16_BOM (coding) = utf_without_bom;
1613 }
1614
1615 while (1)
1616 {
1617 int c, c1, c2;
1618
1619 src_base = src;
1620 consumed_chars_base = consumed_chars;
1621
1622 if (charbuf >= charbuf_end)
1623 {
1624 if (byte_after_cr1 >= 0)
1625 src_base -= 2;
1626 break;
1627 }
1628
1629 if (byte_after_cr1 >= 0)
1630 c1 = byte_after_cr1, byte_after_cr1 = -1;
1631 else
1632 ONE_MORE_BYTE (c1);
1633 if (c1 < 0)
1634 {
1635 *charbuf++ = -c1;
1636 continue;
1637 }
1638 if (byte_after_cr2 >= 0)
1639 c2 = byte_after_cr2, byte_after_cr2 = -1;
1640 else
1641 ONE_MORE_BYTE (c2);
1642 if (c2 < 0)
1643 {
1644 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1645 *charbuf++ = -c2;
1646 continue;
1647 }
1648 c = (endian == utf_16_big_endian
1649 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1650
1651 if (surrogate)
1652 {
1653 if (! UTF_16_LOW_SURROGATE_P (c))
1654 {
1655 if (endian == utf_16_big_endian)
1656 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1657 else
1658 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1659 *charbuf++ = c1;
1660 *charbuf++ = c2;
1661 if (UTF_16_HIGH_SURROGATE_P (c))
1662 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1663 else
1664 *charbuf++ = c;
1665 }
1666 else
1667 {
1668 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1669 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1670 *charbuf++ = 0x10000 + c;
1671 }
1672 }
1673 else
1674 {
1675 if (UTF_16_HIGH_SURROGATE_P (c))
1676 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1677 else
1678 {
1679 if (eol_dos && c == '\r')
1680 {
1681 ONE_MORE_BYTE (byte_after_cr1);
1682 ONE_MORE_BYTE (byte_after_cr2);
1683 }
1684 *charbuf++ = c;
1685 }
1686 }
1687 }
1688
1689 no_more_source:
1690 coding->consumed_char += consumed_chars_base;
1691 coding->consumed = src_base - coding->source;
1692 coding->charbuf_used = charbuf - coding->charbuf;
1693 }
1694
1695 static bool
1696 encode_coding_utf_16 (struct coding_system *coding)
1697 {
1698 bool multibytep = coding->dst_multibyte;
1699 int *charbuf = coding->charbuf;
1700 int *charbuf_end = charbuf + coding->charbuf_used;
1701 unsigned char *dst = coding->destination + coding->produced;
1702 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1703 int safe_room = 8;
1704 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1705 bool big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1706 ptrdiff_t produced_chars = 0;
1707 int c;
1708
1709 if (bom != utf_without_bom)
1710 {
1711 ASSURE_DESTINATION (safe_room);
1712 if (big_endian)
1713 EMIT_TWO_BYTES (0xFE, 0xFF);
1714 else
1715 EMIT_TWO_BYTES (0xFF, 0xFE);
1716 CODING_UTF_16_BOM (coding) = utf_without_bom;
1717 }
1718
1719 while (charbuf < charbuf_end)
1720 {
1721 ASSURE_DESTINATION (safe_room);
1722 c = *charbuf++;
1723 if (c > MAX_UNICODE_CHAR)
1724 c = coding->default_char;
1725
1726 if (c < 0x10000)
1727 {
1728 if (big_endian)
1729 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1730 else
1731 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1732 }
1733 else
1734 {
1735 int c1, c2;
1736
1737 c -= 0x10000;
1738 c1 = (c >> 10) + 0xD800;
1739 c2 = (c & 0x3FF) + 0xDC00;
1740 if (big_endian)
1741 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1742 else
1743 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1744 }
1745 }
1746 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1747 coding->produced = dst - coding->destination;
1748 coding->produced_char += produced_chars;
1749 return 0;
1750 }
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826 char emacs_mule_bytes[256];
1827
1828
1829
1830
1831
1832 static bool
1833 detect_coding_emacs_mule (struct coding_system *coding,
1834 struct coding_detection_info *detect_info)
1835 {
1836 const unsigned char *src = coding->source, *src_base;
1837 const unsigned char *src_end = coding->source + coding->src_bytes;
1838 bool multibytep = coding->src_multibyte;
1839 ptrdiff_t consumed_chars = 0;
1840 int c;
1841 int found = 0;
1842
1843 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1844
1845 src += coding->head_ascii;
1846
1847 while (1)
1848 {
1849 src_base = src;
1850 ONE_MORE_BYTE (c);
1851 if (c < 0)
1852 continue;
1853 if (c == 0x80)
1854 {
1855
1856
1857
1858
1859 const unsigned char *src_start;
1860
1861 repeat:
1862 src_start = src;
1863 do
1864 {
1865 ONE_MORE_BYTE (c);
1866 }
1867 while (c >= 0xA0);
1868
1869 if (src - src_start <= 4)
1870 break;
1871 found = CATEGORY_MASK_EMACS_MULE;
1872 if (c == 0x80)
1873 goto repeat;
1874 }
1875
1876 if (c < 0x80)
1877 {
1878 if (c < 0x20
1879 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1880 break;
1881 }
1882 else
1883 {
1884 int more_bytes = emacs_mule_bytes[c] - 1;
1885
1886 while (more_bytes > 0)
1887 {
1888 ONE_MORE_BYTE (c);
1889 if (c < 0xA0)
1890 {
1891 src--;
1892 break;
1893 }
1894 more_bytes--;
1895 }
1896 if (more_bytes != 0)
1897 break;
1898 found = CATEGORY_MASK_EMACS_MULE;
1899 }
1900 }
1901 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1902 return 0;
1903
1904 no_more_source:
1905 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1906 {
1907 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1908 return 0;
1909 }
1910 detect_info->found |= found;
1911 return 1;
1912 }
1913
1914
1915
1916
1917
1918
1919
1920
1921 static int
1922 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1923 int *nbytes, int *nchars, int *id,
1924 struct composition_status *cmp_status)
1925 {
1926 const unsigned char *src_end = coding->source + coding->src_bytes;
1927 const unsigned char *src_base = src;
1928 bool multibytep = coding->src_multibyte;
1929 int charset_ID;
1930 unsigned code;
1931 int c;
1932 ptrdiff_t consumed_chars = 0;
1933 bool mseq_found = 0;
1934
1935 ONE_MORE_BYTE (c);
1936 if (c < 0)
1937 {
1938 c = -c;
1939 charset_ID = emacs_mule_charset[0];
1940 }
1941 else
1942 {
1943 if (c >= 0xA0)
1944 {
1945 if (cmp_status->state != COMPOSING_NO
1946 && cmp_status->old_form)
1947 {
1948 if (cmp_status->state == COMPOSING_CHAR)
1949 {
1950 if (c == 0xA0)
1951 {
1952 ONE_MORE_BYTE (c);
1953 c -= 0x80;
1954 if (c < 0)
1955 goto invalid_code;
1956 }
1957 else
1958 c -= 0x20;
1959 mseq_found = 1;
1960 }
1961 else
1962 {
1963 *nbytes = src - src_base;
1964 *nchars = consumed_chars;
1965 return -c;
1966 }
1967 }
1968 else
1969 goto invalid_code;
1970 }
1971
1972 switch (emacs_mule_bytes[c])
1973 {
1974 case 2:
1975 if ((charset_ID = emacs_mule_charset[c]) < 0)
1976 goto invalid_code;
1977 ONE_MORE_BYTE (c);
1978 if (c < 0xA0)
1979 goto invalid_code;
1980 code = c & 0x7F;
1981 break;
1982
1983 case 3:
1984 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1985 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1986 {
1987 ONE_MORE_BYTE (c);
1988 if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
1989 goto invalid_code;
1990 ONE_MORE_BYTE (c);
1991 if (c < 0xA0)
1992 goto invalid_code;
1993 code = c & 0x7F;
1994 }
1995 else
1996 {
1997 if ((charset_ID = emacs_mule_charset[c]) < 0)
1998 goto invalid_code;
1999 ONE_MORE_BYTE (c);
2000 if (c < 0xA0)
2001 goto invalid_code;
2002 code = (c & 0x7F) << 8;
2003 ONE_MORE_BYTE (c);
2004 if (c < 0xA0)
2005 goto invalid_code;
2006 code |= c & 0x7F;
2007 }
2008 break;
2009
2010 case 4:
2011 ONE_MORE_BYTE (c);
2012 if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2013 goto invalid_code;
2014 ONE_MORE_BYTE (c);
2015 if (c < 0xA0)
2016 goto invalid_code;
2017 code = (c & 0x7F) << 8;
2018 ONE_MORE_BYTE (c);
2019 if (c < 0xA0)
2020 goto invalid_code;
2021 code |= c & 0x7F;
2022 break;
2023
2024 case 1:
2025 code = c;
2026 charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
2027 break;
2028
2029 default:
2030 emacs_abort ();
2031 }
2032 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2033 CHARSET_FROM_ID (charset_ID), code, c);
2034 if (c < 0)
2035 goto invalid_code;
2036 }
2037 *nbytes = src - src_base;
2038 *nchars = consumed_chars;
2039 if (id)
2040 *id = charset_ID;
2041 return (mseq_found ? -c : c);
2042
2043 no_more_source:
2044 return -2;
2045
2046 invalid_code:
2047 return -1;
2048 }
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
2113 do { \
2114 int gref, nref; \
2115 \
2116 c -= 0xA0; \
2117 if (c < 0 || c >= 81) \
2118 goto invalid_code; \
2119 gref = c / 9, nref = c % 9; \
2120 if (gref == 4) gref = 10; \
2121 if (nref == 4) nref = 10; \
2122 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2123 } while (0)
2124
2125
2126
2127
2128
2129
2130 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
2131 do { \
2132 int gref, nref; \
2133 \
2134 gref = c - 0x20; \
2135 if (gref < 0 || gref >= 81) \
2136 goto invalid_code; \
2137 ONE_MORE_BYTE (c); \
2138 nref = c - 0x20; \
2139 if (nref < 0 || nref >= 81) \
2140 goto invalid_code; \
2141 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2142 } while (0)
2143
2144
2145
2146
2147
2148
2149
2150 #define DECODE_EMACS_MULE_21_COMPOSITION() \
2151 do { \
2152 enum composition_method method = c - 0xF2; \
2153 int nbytes, nchars; \
2154 \
2155 ONE_MORE_BYTE (c); \
2156 if (c < 0) \
2157 goto invalid_code; \
2158 nbytes = c - 0xA0; \
2159 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
2160 goto invalid_code; \
2161 ONE_MORE_BYTE (c); \
2162 nchars = c - 0xA0; \
2163 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2164 goto invalid_code; \
2165 cmp_status->old_form = 0; \
2166 cmp_status->method = method; \
2167 if (method == COMPOSITION_RELATIVE) \
2168 cmp_status->state = COMPOSING_CHAR; \
2169 else \
2170 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2171 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2172 cmp_status->nchars = nchars; \
2173 cmp_status->ncomps = nbytes - 4; \
2174 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
2175 } while (0)
2176
2177
2178
2179
2180 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2181 do { \
2182 cmp_status->old_form = 1; \
2183 cmp_status->method = COMPOSITION_RELATIVE; \
2184 cmp_status->state = COMPOSING_CHAR; \
2185 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2186 cmp_status->nchars = cmp_status->ncomps = 0; \
2187 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2188 } while (0)
2189
2190
2191
2192
2193 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2194 do { \
2195 cmp_status->old_form = 1; \
2196 cmp_status->method = COMPOSITION_WITH_RULE; \
2197 cmp_status->state = COMPOSING_CHAR; \
2198 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2199 cmp_status->nchars = cmp_status->ncomps = 0; \
2200 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2201 } while (0)
2202
2203
2204 #define DECODE_EMACS_MULE_COMPOSITION_START() \
2205 do { \
2206 const unsigned char *current_src = src; \
2207 \
2208 ONE_MORE_BYTE (c); \
2209 if (c < 0) \
2210 goto invalid_code; \
2211 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2212 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2213 DECODE_EMACS_MULE_21_COMPOSITION (); \
2214 else if (c < 0xA0) \
2215 goto invalid_code; \
2216 else if (c < 0xC0) \
2217 { \
2218 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2219 \
2220 src = current_src; \
2221 } \
2222 else if (c == 0xFF) \
2223 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2224 else \
2225 goto invalid_code; \
2226 } while (0)
2227
2228 #define EMACS_MULE_COMPOSITION_END() \
2229 do { \
2230 int idx = - cmp_status->length; \
2231 \
2232 if (cmp_status->old_form) \
2233 charbuf[idx + 2] = cmp_status->nchars; \
2234 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2235 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2236 cmp_status->state = COMPOSING_NO; \
2237 } while (0)
2238
2239
2240 static int
2241 emacs_mule_finish_composition (int *charbuf,
2242 struct composition_status *cmp_status)
2243 {
2244 int idx = - cmp_status->length;
2245 int new_chars;
2246
2247 if (cmp_status->old_form && cmp_status->nchars > 0)
2248 {
2249 charbuf[idx + 2] = cmp_status->nchars;
2250 new_chars = 0;
2251 if (cmp_status->method == COMPOSITION_WITH_RULE
2252 && cmp_status->state == COMPOSING_CHAR)
2253 {
2254
2255 int rule = charbuf[-1] + 0xA0;
2256
2257 charbuf[-2] = BYTE8_TO_CHAR (rule);
2258 charbuf[-1] = -1;
2259 new_chars = 1;
2260 }
2261 }
2262 else
2263 {
2264 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2265
2266 if (cmp_status->method == COMPOSITION_WITH_RULE)
2267 {
2268 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2269 charbuf[idx++] = -3;
2270 charbuf[idx++] = 0;
2271 new_chars = 1;
2272 }
2273 else
2274 {
2275 int nchars = charbuf[idx + 1] + 0xA0;
2276 int nbytes = charbuf[idx + 2] + 0xA0;
2277
2278 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2279 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2280 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2281 charbuf[idx++] = -1;
2282 new_chars = 4;
2283 }
2284 }
2285 cmp_status->state = COMPOSING_NO;
2286 return new_chars;
2287 }
2288
2289 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2290 do { \
2291 if (cmp_status->state != COMPOSING_NO) \
2292 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2293 } while (0)
2294
2295
2296 static void
2297 decode_coding_emacs_mule (struct coding_system *coding)
2298 {
2299 const unsigned char *src = coding->source + coding->consumed;
2300 const unsigned char *src_end = coding->source + coding->src_bytes;
2301 const unsigned char *src_base;
2302 int *charbuf = coding->charbuf + coding->charbuf_used;
2303
2304
2305 int *charbuf_end
2306 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2307
2308 - 1;
2309 ptrdiff_t consumed_chars = 0, consumed_chars_base;
2310 bool multibytep = coding->src_multibyte;
2311 ptrdiff_t char_offset = coding->produced_char;
2312 ptrdiff_t last_offset = char_offset;
2313 int last_id = charset_ascii;
2314 bool eol_dos
2315 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2316 int byte_after_cr = -1;
2317 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2318
2319 if (cmp_status->state != COMPOSING_NO)
2320 {
2321 int i;
2322
2323 if (charbuf_end - charbuf < cmp_status->length)
2324 emacs_abort ();
2325 for (i = 0; i < cmp_status->length; i++)
2326 *charbuf++ = cmp_status->carryover[i];
2327 coding->annotated = 1;
2328 }
2329
2330 while (1)
2331 {
2332 int c;
2333 int id UNINIT;
2334
2335 src_base = src;
2336 consumed_chars_base = consumed_chars;
2337
2338 if (charbuf >= charbuf_end)
2339 {
2340 if (byte_after_cr >= 0)
2341 src_base--;
2342 break;
2343 }
2344
2345 if (byte_after_cr >= 0)
2346 c = byte_after_cr, byte_after_cr = -1;
2347 else
2348 ONE_MORE_BYTE (c);
2349
2350 if (c < 0 || c == 0x80)
2351 {
2352 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2353 if (c < 0)
2354 {
2355 *charbuf++ = -c;
2356 char_offset++;
2357 }
2358 else
2359 DECODE_EMACS_MULE_COMPOSITION_START ();
2360 continue;
2361 }
2362
2363 if (c < 0x80)
2364 {
2365 if (eol_dos && c == '\r')
2366 ONE_MORE_BYTE (byte_after_cr);
2367 id = charset_ascii;
2368 if (cmp_status->state != COMPOSING_NO)
2369 {
2370 if (cmp_status->old_form)
2371 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2372 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2373 cmp_status->ncomps--;
2374 }
2375 }
2376 else
2377 {
2378 int nchars UNINIT, nbytes UNINIT;
2379
2380
2381
2382
2383
2384 const unsigned char *orig = coding->source;
2385 ptrdiff_t offset;
2386
2387 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2388 cmp_status);
2389 offset = coding->source - orig;
2390 if (offset)
2391 {
2392 src += offset;
2393 src_base += offset;
2394 src_end += offset;
2395 }
2396 if (c < 0)
2397 {
2398 if (c == -1)
2399 goto invalid_code;
2400 if (c == -2)
2401 break;
2402 }
2403 src = src_base + nbytes;
2404 consumed_chars = consumed_chars_base + nchars;
2405 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2406 cmp_status->ncomps -= nchars;
2407 }
2408
2409
2410
2411
2412
2413 if (cmp_status->state == COMPOSING_NO)
2414 {
2415 if (last_id != id)
2416 {
2417 if (last_id != charset_ascii)
2418 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2419 last_id);
2420 last_id = id;
2421 last_offset = char_offset;
2422 }
2423 *charbuf++ = c;
2424 char_offset++;
2425 }
2426 else if (cmp_status->state == COMPOSING_CHAR)
2427 {
2428 if (cmp_status->old_form)
2429 {
2430 if (c >= 0)
2431 {
2432 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2433 *charbuf++ = c;
2434 char_offset++;
2435 }
2436 else
2437 {
2438 *charbuf++ = -c;
2439 cmp_status->nchars++;
2440 cmp_status->length++;
2441 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2442 EMACS_MULE_COMPOSITION_END ();
2443 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2444 cmp_status->state = COMPOSING_RULE;
2445 }
2446 }
2447 else
2448 {
2449 *charbuf++ = c;
2450 cmp_status->length++;
2451 cmp_status->nchars--;
2452 if (cmp_status->nchars == 0)
2453 EMACS_MULE_COMPOSITION_END ();
2454 }
2455 }
2456 else if (cmp_status->state == COMPOSING_RULE)
2457 {
2458 int rule;
2459
2460 if (c >= 0)
2461 {
2462 EMACS_MULE_COMPOSITION_END ();
2463 *charbuf++ = c;
2464 char_offset++;
2465 }
2466 else
2467 {
2468 c = -c;
2469 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2470 if (rule < 0)
2471 goto invalid_code;
2472 *charbuf++ = -2;
2473 *charbuf++ = rule;
2474 cmp_status->length += 2;
2475 cmp_status->state = COMPOSING_CHAR;
2476 }
2477 }
2478 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2479 {
2480 *charbuf++ = c;
2481 cmp_status->length++;
2482 if (cmp_status->ncomps == 0)
2483 cmp_status->state = COMPOSING_CHAR;
2484 else if (cmp_status->ncomps > 0)
2485 {
2486 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2487 cmp_status->state = COMPOSING_COMPONENT_RULE;
2488 }
2489 else
2490 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2491 }
2492 else
2493 {
2494 int rule;
2495
2496 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2497 if (rule < 0)
2498 goto invalid_code;
2499 *charbuf++ = -2;
2500 *charbuf++ = rule;
2501 cmp_status->length += 2;
2502 cmp_status->ncomps--;
2503 if (cmp_status->ncomps > 0)
2504 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2505 else
2506 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2507 }
2508 continue;
2509
2510 invalid_code:
2511 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2512 src = src_base;
2513 consumed_chars = consumed_chars_base;
2514 ONE_MORE_BYTE (c);
2515 *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
2516 char_offset++;
2517 }
2518
2519 no_more_source:
2520 if (cmp_status->state != COMPOSING_NO)
2521 {
2522 if (coding->mode & CODING_MODE_LAST_BLOCK)
2523 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2524 else
2525 {
2526 int i;
2527
2528 charbuf -= cmp_status->length;
2529 for (i = 0; i < cmp_status->length; i++)
2530 cmp_status->carryover[i] = charbuf[i];
2531 }
2532 }
2533 if (last_id != charset_ascii)
2534 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2535 coding->consumed_char += consumed_chars_base;
2536 coding->consumed = src_base - coding->source;
2537 coding->charbuf_used = charbuf - coding->charbuf;
2538 }
2539
2540
2541 #define EMACS_MULE_LEADING_CODES(id, codes) \
2542 do { \
2543 if (id < 0xA0) \
2544 codes[0] = id, codes[1] = 0; \
2545 else if (id < 0xE0) \
2546 codes[0] = 0x9A, codes[1] = id; \
2547 else if (id < 0xF0) \
2548 codes[0] = 0x9B, codes[1] = id; \
2549 else if (id < 0xF5) \
2550 codes[0] = 0x9C, codes[1] = id; \
2551 else \
2552 codes[0] = 0x9D, codes[1] = id; \
2553 } while (0);
2554
2555
2556 static bool
2557 encode_coding_emacs_mule (struct coding_system *coding)
2558 {
2559 bool multibytep = coding->dst_multibyte;
2560 int *charbuf = coding->charbuf;
2561 int *charbuf_end = charbuf + coding->charbuf_used;
2562 unsigned char *dst = coding->destination + coding->produced;
2563 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2564 int safe_room = 8;
2565 ptrdiff_t produced_chars = 0;
2566 Lisp_Object attrs, charset_list;
2567 int c;
2568 int preferred_charset_id = -1;
2569
2570 CODING_GET_INFO (coding, attrs, charset_list);
2571 if (! EQ (charset_list, Vemacs_mule_charset_list))
2572 {
2573 charset_list = Vemacs_mule_charset_list;
2574 ASET (attrs, coding_attr_charset_list, charset_list);
2575 }
2576
2577 while (charbuf < charbuf_end)
2578 {
2579 ASSURE_DESTINATION (safe_room);
2580 c = *charbuf++;
2581
2582 if (c < 0)
2583 {
2584
2585 switch (*charbuf)
2586 {
2587 case CODING_ANNOTATE_COMPOSITION_MASK:
2588
2589 break;
2590 case CODING_ANNOTATE_CHARSET_MASK:
2591 preferred_charset_id = charbuf[3];
2592 if (preferred_charset_id >= 0
2593 && NILP (Fmemq (make_fixnum (preferred_charset_id),
2594 charset_list)))
2595 preferred_charset_id = -1;
2596 break;
2597 default:
2598 emacs_abort ();
2599 }
2600 charbuf += -c - 1;
2601 continue;
2602 }
2603
2604 if (ASCII_CHAR_P (c))
2605 EMIT_ONE_ASCII_BYTE (c);
2606 else if (CHAR_BYTE8_P (c))
2607 {
2608 c = CHAR_TO_BYTE8 (c);
2609 EMIT_ONE_BYTE (c);
2610 }
2611 else
2612 {
2613 struct charset *charset;
2614 unsigned code;
2615 int dimension;
2616 int emacs_mule_id;
2617 unsigned char leading_codes[2];
2618
2619 if (preferred_charset_id >= 0)
2620 {
2621 bool result;
2622
2623 charset = CHARSET_FROM_ID (preferred_charset_id);
2624 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2625 if (result)
2626 code = ENCODE_CHAR (charset, c);
2627 else
2628 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2629 &code, charset);
2630 }
2631 else
2632 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2633 &code, charset);
2634 if (! charset)
2635 {
2636 c = coding->default_char;
2637 if (ASCII_CHAR_P (c))
2638 {
2639 EMIT_ONE_ASCII_BYTE (c);
2640 continue;
2641 }
2642 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2643 &code, charset);
2644 }
2645 dimension = CHARSET_DIMENSION (charset);
2646 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2647 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2648 EMIT_ONE_BYTE (leading_codes[0]);
2649 if (leading_codes[1])
2650 EMIT_ONE_BYTE (leading_codes[1]);
2651 if (dimension == 1)
2652 EMIT_ONE_BYTE (code | 0x80);
2653 else
2654 {
2655 code |= 0x8080;
2656 EMIT_ONE_BYTE (code >> 8);
2657 EMIT_ONE_BYTE (code & 0xFF);
2658 }
2659 }
2660 }
2661 record_conversion_result (coding, CODING_RESULT_SUCCESS);
2662 coding->produced_char += produced_chars;
2663 coding->produced = dst - coding->destination;
2664 return 0;
2665 }
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845 static enum iso_code_class_type iso_code_class[256];
2846
2847 #define SAFE_CHARSET_P(coding, id) \
2848 ((id) <= (coding)->max_charset_id \
2849 && (coding)->safe_charsets[id] != 255)
2850
2851 static void
2852 setup_iso_safe_charsets (Lisp_Object attrs)
2853 {
2854 Lisp_Object charset_list, safe_charsets;
2855 Lisp_Object request;
2856 Lisp_Object reg_usage;
2857 Lisp_Object tail;
2858 EMACS_INT reg94, reg96;
2859 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
2860 int max_charset_id;
2861
2862 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2863 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2864 && ! EQ (charset_list, Viso_2022_charset_list))
2865 {
2866 charset_list = Viso_2022_charset_list;
2867 ASET (attrs, coding_attr_charset_list, charset_list);
2868 ASET (attrs, coding_attr_safe_charsets, Qnil);
2869 }
2870
2871 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2872 return;
2873
2874 max_charset_id = 0;
2875 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2876 {
2877 int id = XFIXNUM (XCAR (tail));
2878 if (max_charset_id < id)
2879 max_charset_id = id;
2880 }
2881
2882 safe_charsets = make_uninit_string (max_charset_id + 1);
2883 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2884 request = AREF (attrs, coding_attr_iso_request);
2885 reg_usage = AREF (attrs, coding_attr_iso_usage);
2886 reg94 = XFIXNUM (XCAR (reg_usage));
2887 reg96 = XFIXNUM (XCDR (reg_usage));
2888
2889 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2890 {
2891 Lisp_Object id;
2892 Lisp_Object reg;
2893 struct charset *charset;
2894
2895 id = XCAR (tail);
2896 charset = CHARSET_FROM_ID (XFIXNUM (id));
2897 reg = Fcdr (Fassq (id, request));
2898 if (! NILP (reg))
2899 SSET (safe_charsets, XFIXNUM (id), XFIXNUM (reg));
2900 else if (charset->iso_chars_96)
2901 {
2902 if (reg96 < 4)
2903 SSET (safe_charsets, XFIXNUM (id), reg96);
2904 }
2905 else
2906 {
2907 if (reg94 < 4)
2908 SSET (safe_charsets, XFIXNUM (id), reg94);
2909 }
2910 }
2911 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2912 }
2913
2914
2915
2916
2917
2918
2919 static bool
2920 detect_coding_iso_2022 (struct coding_system *coding,
2921 struct coding_detection_info *detect_info)
2922 {
2923 const unsigned char *src = coding->source, *src_base = src;
2924 const unsigned char *src_end = coding->source + coding->src_bytes;
2925 bool multibytep = coding->src_multibyte;
2926 bool single_shifting = 0;
2927 int id;
2928 int c, c1;
2929 ptrdiff_t consumed_chars = 0;
2930 int i;
2931 int rejected = 0;
2932 int found = 0;
2933 int composition_count = -1;
2934
2935 detect_info->checked |= CATEGORY_MASK_ISO;
2936
2937 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2938 {
2939 struct coding_system *this = &(coding_categories[i]);
2940 Lisp_Object attrs, val;
2941
2942 if (this->id < 0)
2943 continue;
2944 attrs = CODING_ID_ATTRS (this->id);
2945 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2946 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
2947 setup_iso_safe_charsets (attrs);
2948 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2949 this->max_charset_id = SCHARS (val) - 1;
2950 this->safe_charsets = SDATA (val);
2951 }
2952
2953
2954 src += coding->head_ascii;
2955
2956 while (rejected != CATEGORY_MASK_ISO)
2957 {
2958 src_base = src;
2959 ONE_MORE_BYTE (c);
2960 switch (c)
2961 {
2962 case ISO_CODE_ESC:
2963 if (inhibit_iso_escape_detection)
2964 break;
2965 single_shifting = 0;
2966 ONE_MORE_BYTE (c);
2967 if (c == 'N' || c == 'O')
2968 {
2969
2970 single_shifting = 1;
2971 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2972 }
2973 else if (c == '1')
2974 {
2975
2976 if (composition_count < 0
2977 || composition_count > MAX_COMPOSITION_COMPONENTS)
2978
2979 break;
2980 composition_count = -1;
2981 found |= CATEGORY_MASK_ISO;
2982 }
2983 else if (c >= '0' && c <= '4')
2984 {
2985
2986 composition_count = 0;
2987 }
2988 else
2989 {
2990 if (c >= '(' && c <= '/')
2991 {
2992
2993 ONE_MORE_BYTE (c1);
2994 if (c1 < ' ' || c1 >= 0x80
2995 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2996 {
2997
2998 if (c1 >= 0x80)
2999 rejected |= (CATEGORY_MASK_ISO_7BIT
3000 | CATEGORY_MASK_ISO_7_ELSE);
3001 break;
3002 }
3003 }
3004 else if (c == '$')
3005 {
3006
3007 ONE_MORE_BYTE (c);
3008 if (c >= '@' && c <= 'B')
3009
3010 id = iso_charset_table[1][0][c];
3011 else if (c >= '(' && c <= '/')
3012 {
3013 ONE_MORE_BYTE (c1);
3014 if (c1 < ' ' || c1 >= 0x80
3015 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3016 {
3017
3018 if (c1 >= 0x80)
3019 rejected |= (CATEGORY_MASK_ISO_7BIT
3020 | CATEGORY_MASK_ISO_7_ELSE);
3021 break;
3022 }
3023 }
3024 else
3025 {
3026
3027 if (c >= 0x80)
3028 rejected |= (CATEGORY_MASK_ISO_7BIT
3029 | CATEGORY_MASK_ISO_7_ELSE);
3030 break;
3031 }
3032 }
3033 else
3034 {
3035
3036 if (c >= 0x80)
3037 rejected |= (CATEGORY_MASK_ISO_7BIT
3038 | CATEGORY_MASK_ISO_7_ELSE);
3039 break;
3040 }
3041
3042
3043 rejected |= CATEGORY_MASK_ISO_8BIT;
3044 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3045 id))
3046 found |= CATEGORY_MASK_ISO_7;
3047 else
3048 rejected |= CATEGORY_MASK_ISO_7;
3049 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3050 id))
3051 found |= CATEGORY_MASK_ISO_7_TIGHT;
3052 else
3053 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3054 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3055 id))
3056 found |= CATEGORY_MASK_ISO_7_ELSE;
3057 else
3058 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3059 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3060 id))
3061 found |= CATEGORY_MASK_ISO_8_ELSE;
3062 else
3063 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3064 }
3065 break;
3066
3067 case ISO_CODE_SO:
3068 case ISO_CODE_SI:
3069
3070 if (inhibit_iso_escape_detection)
3071 break;
3072 single_shifting = 0;
3073 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3074 break;
3075
3076 case ISO_CODE_CSI:
3077
3078 single_shifting = 0;
3079 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3080 found |= CATEGORY_MASK_ISO_8_ELSE;
3081 goto check_extra_latin;
3082
3083 case ISO_CODE_SS2:
3084 case ISO_CODE_SS3:
3085
3086 if (inhibit_iso_escape_detection)
3087 break;
3088 single_shifting = 0;
3089 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3090 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3091 & CODING_ISO_FLAG_SINGLE_SHIFT)
3092 {
3093 found |= CATEGORY_MASK_ISO_8_1;
3094 single_shifting = 1;
3095 }
3096 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3097 & CODING_ISO_FLAG_SINGLE_SHIFT)
3098 {
3099 found |= CATEGORY_MASK_ISO_8_2;
3100 single_shifting = 1;
3101 }
3102 if (single_shifting)
3103 break;
3104 goto check_extra_latin;
3105
3106 default:
3107 if (c < 0)
3108 continue;
3109 if (c < 0x80)
3110 {
3111 if (composition_count >= 0)
3112 composition_count++;
3113 single_shifting = 0;
3114 break;
3115 }
3116 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3117 if (c >= 0xA0)
3118 {
3119 found |= CATEGORY_MASK_ISO_8_1;
3120
3121
3122
3123
3124 if (! single_shifting
3125 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3126 {
3127 ptrdiff_t len = 1;
3128 while (src < src_end)
3129 {
3130 src_base = src;
3131 ONE_MORE_BYTE (c);
3132 if (c < 0xA0)
3133 {
3134 src = src_base;
3135 break;
3136 }
3137 len++;
3138 }
3139
3140 if (len & 1 && src < src_end)
3141 {
3142 rejected |= CATEGORY_MASK_ISO_8_2;
3143 if (composition_count >= 0)
3144 composition_count += len;
3145 }
3146 else
3147 {
3148 found |= CATEGORY_MASK_ISO_8_2;
3149 if (composition_count >= 0)
3150 composition_count += len / 2;
3151 }
3152 }
3153 break;
3154 }
3155 check_extra_latin:
3156 if (! VECTORP (Vlatin_extra_code_table)
3157 || NILP (AREF (Vlatin_extra_code_table, c)))
3158 {
3159 rejected = CATEGORY_MASK_ISO;
3160 break;
3161 }
3162 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3163 & CODING_ISO_FLAG_LATIN_EXTRA)
3164 found |= CATEGORY_MASK_ISO_8_1;
3165 else
3166 rejected |= CATEGORY_MASK_ISO_8_1;
3167 rejected |= CATEGORY_MASK_ISO_8_2;
3168 break;
3169 }
3170 }
3171 detect_info->rejected |= CATEGORY_MASK_ISO;
3172 return 0;
3173
3174 no_more_source:
3175 detect_info->rejected |= rejected;
3176 detect_info->found |= (found & ~rejected);
3177 return 1;
3178 }
3179
3180
3181
3182
3183 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3184 do { \
3185 int id, prev; \
3186 \
3187 if (final < '0' || final >= 128 \
3188 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3189 || !SAFE_CHARSET_P (coding, id)) \
3190 { \
3191 CODING_ISO_DESIGNATION (coding, reg) = -2; \
3192 chars_96 = -1; \
3193 break; \
3194 } \
3195 prev = CODING_ISO_DESIGNATION (coding, reg); \
3196 if (id == charset_jisx0201_roman) \
3197 { \
3198 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3199 id = charset_ascii; \
3200 } \
3201 else if (id == charset_jisx0208_1978) \
3202 { \
3203 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3204 id = charset_jisx0208; \
3205 } \
3206 CODING_ISO_DESIGNATION (coding, reg) = id; \
3207
3208
3209 \
3210 if (prev == -2 && id == charset_ascii) \
3211 chars_96 = -1; \
3212 } while (0)
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261 #define DECODE_COMPOSITION_RULE(rule) \
3262 do { \
3263 rule = c1 - 32; \
3264 if (rule < 0) \
3265 goto invalid_code; \
3266 if (rule < 81) \
3267 { \
3268 int gref = (rule) / 9; \
3269 int nref = (rule) % 9; \
3270 if (gref == 4) gref = 10; \
3271 if (nref == 4) nref = 10; \
3272 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3273 } \
3274 else \
3275 { \
3276 int b; \
3277 \
3278 ONE_MORE_BYTE (b); \
3279 if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32)) \
3280 goto invalid_code; \
3281 rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \
3282 rule += 0x100; \
3283 } \
3284 } while (0)
3285
3286 #define ENCODE_COMPOSITION_RULE(rule) \
3287 do { \
3288 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3289 \
3290 if (rule < 0x100) \
3291 { \
3292 if (gref == 10) gref = 4; \
3293 if (nref == 10) nref = 4; \
3294 charbuf[idx] = 32 + gref * 9 + nref; \
3295 charbuf[idx + 1] = -1; \
3296 new_chars++; \
3297 } \
3298 else \
3299 { \
3300 charbuf[idx] = 32 + 81 + gref; \
3301 charbuf[idx + 1] = 32 + nref; \
3302 new_chars += 2; \
3303 } \
3304 } while (0)
3305
3306
3307
3308 static int
3309 finish_composition (int *charbuf, struct composition_status *cmp_status)
3310 {
3311 int idx = - cmp_status->length;
3312 int new_chars;
3313
3314
3315 charbuf[idx++] = ISO_CODE_ESC;
3316 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3317 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3318 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3319
3320 : '4');
3321 charbuf[idx++] = -2;
3322 charbuf[idx++] = 0;
3323 charbuf[idx++] = -1;
3324 new_chars = cmp_status->nchars;
3325 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3326 for (; idx < 0; idx++)
3327 {
3328 int elt = charbuf[idx];
3329
3330 if (elt == -2)
3331 {
3332 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3333 idx++;
3334 }
3335 else if (elt == -1)
3336 {
3337 charbuf[idx++] = ISO_CODE_ESC;
3338 charbuf[idx] = '0';
3339 new_chars += 2;
3340 }
3341 }
3342 cmp_status->state = COMPOSING_NO;
3343 return new_chars;
3344 }
3345
3346
3347 #define MAYBE_FINISH_COMPOSITION() \
3348 do { \
3349 if (cmp_status->state != COMPOSING_NO) \
3350 char_offset += finish_composition (charbuf, cmp_status); \
3351 } while (0)
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365 #define DECODE_COMPOSITION_START(c1) \
3366 do { \
3367 if (c1 == '0' \
3368 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3369 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3370 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3371 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3372 { \
3373 *charbuf++ = -1; \
3374 *charbuf++= -1; \
3375 cmp_status->state = COMPOSING_CHAR; \
3376 cmp_status->length += 2; \
3377 } \
3378 else \
3379 { \
3380 MAYBE_FINISH_COMPOSITION (); \
3381 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3382 : c1 == '2' ? COMPOSITION_WITH_RULE \
3383 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3384 : COMPOSITION_WITH_RULE_ALTCHARS); \
3385 cmp_status->state \
3386 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3387 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3388 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3389 cmp_status->nchars = cmp_status->ncomps = 0; \
3390 coding->annotated = 1; \
3391 } \
3392 } while (0)
3393
3394
3395
3396
3397 #define DECODE_COMPOSITION_END() \
3398 do { \
3399 if (cmp_status->nchars == 0 \
3400 || ((cmp_status->state == COMPOSING_CHAR) \
3401 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
3402 { \
3403 MAYBE_FINISH_COMPOSITION (); \
3404 goto invalid_code; \
3405 } \
3406 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3407 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3408 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3409 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3410 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3411 char_offset += cmp_status->nchars; \
3412 cmp_status->state = COMPOSING_NO; \
3413 } while (0)
3414
3415
3416
3417 #define STORE_COMPOSITION_RULE(rule) \
3418 do { \
3419 *charbuf++ = -2; \
3420 *charbuf++ = rule; \
3421 cmp_status->length += 2; \
3422 cmp_status->state--; \
3423 } while (0)
3424
3425
3426
3427
3428 #define STORE_COMPOSITION_CHAR(c) \
3429 do { \
3430 *charbuf++ = (c); \
3431 cmp_status->length++; \
3432 if (cmp_status->state == COMPOSING_CHAR) \
3433 cmp_status->nchars++; \
3434 else \
3435 cmp_status->ncomps++; \
3436 if (cmp_status->method == COMPOSITION_WITH_RULE \
3437 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3438 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3439 cmp_status->state++; \
3440 } while (0)
3441
3442
3443
3444
3445 static void
3446 decode_coding_iso_2022 (struct coding_system *coding)
3447 {
3448 const unsigned char *src = coding->source + coding->consumed;
3449 const unsigned char *src_end = coding->source + coding->src_bytes;
3450 const unsigned char *src_base;
3451 int *charbuf = coding->charbuf + coding->charbuf_used;
3452
3453
3454 int *charbuf_end
3455 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3456 ptrdiff_t consumed_chars = 0, consumed_chars_base;
3457 bool multibytep = coding->src_multibyte;
3458
3459 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3460 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3461 int charset_id_2, charset_id_3;
3462 struct charset *charset;
3463 int c;
3464 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3465 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3466 ptrdiff_t char_offset = coding->produced_char;
3467 ptrdiff_t last_offset = char_offset;
3468 int last_id = charset_ascii;
3469 bool eol_dos
3470 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3471 int byte_after_cr = -1;
3472 int i;
3473
3474 setup_iso_safe_charsets (attrs);
3475 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3476
3477 if (cmp_status->state != COMPOSING_NO)
3478 {
3479 if (charbuf_end - charbuf < cmp_status->length)
3480 emacs_abort ();
3481 for (i = 0; i < cmp_status->length; i++)
3482 *charbuf++ = cmp_status->carryover[i];
3483 coding->annotated = 1;
3484 }
3485
3486 while (1)
3487 {
3488 int c1, c2, c3;
3489
3490 src_base = src;
3491 consumed_chars_base = consumed_chars;
3492
3493 if (charbuf >= charbuf_end)
3494 {
3495 if (byte_after_cr >= 0)
3496 src_base--;
3497 break;
3498 }
3499
3500 if (byte_after_cr >= 0)
3501 c1 = byte_after_cr, byte_after_cr = -1;
3502 else
3503 ONE_MORE_BYTE (c1);
3504 if (c1 < 0)
3505 goto invalid_code;
3506
3507 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3508 {
3509 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3510 char_offset++;
3511 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3512 continue;
3513 }
3514
3515 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3516 {
3517 if (c1 == ISO_CODE_ESC)
3518 {
3519 if (src + 1 >= src_end)
3520 goto no_more_source;
3521 *charbuf++ = ISO_CODE_ESC;
3522 char_offset++;
3523 if (src[0] == '%' && src[1] == '@')
3524 {
3525 src += 2;
3526 consumed_chars += 2;
3527 char_offset += 2;
3528
3529 *charbuf++ = '%';
3530 *charbuf++ = '@';
3531 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3532 }
3533 }
3534 else
3535 {
3536 *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3537 char_offset++;
3538 }
3539 continue;
3540 }
3541
3542 if ((cmp_status->state == COMPOSING_RULE
3543 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3544 && c1 != ISO_CODE_ESC)
3545 {
3546 int rule;
3547
3548 DECODE_COMPOSITION_RULE (rule);
3549 STORE_COMPOSITION_RULE (rule);
3550 continue;
3551 }
3552
3553
3554 switch (iso_code_class [c1])
3555 {
3556 case ISO_0x20_or_0x7F:
3557 if (charset_id_0 < 0
3558 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3559
3560 charset = CHARSET_FROM_ID (charset_ascii);
3561 else
3562 charset = CHARSET_FROM_ID (charset_id_0);
3563 break;
3564
3565 case ISO_graphic_plane_0:
3566 if (charset_id_0 < 0)
3567 charset = CHARSET_FROM_ID (charset_ascii);
3568 else
3569 charset = CHARSET_FROM_ID (charset_id_0);
3570 break;
3571
3572 case ISO_0xA0_or_0xFF:
3573 if (charset_id_1 < 0
3574 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3575 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3576 goto invalid_code;
3577
3578 FALLTHROUGH;
3579 case ISO_graphic_plane_1:
3580 if (charset_id_1 < 0)
3581 goto invalid_code;
3582 charset = CHARSET_FROM_ID (charset_id_1);
3583 break;
3584
3585 case ISO_control_0:
3586 if (eol_dos && c1 == '\r')
3587 ONE_MORE_BYTE (byte_after_cr);
3588 MAYBE_FINISH_COMPOSITION ();
3589 charset = CHARSET_FROM_ID (charset_ascii);
3590 break;
3591
3592 case ISO_control_1:
3593 goto invalid_code;
3594
3595 case ISO_shift_out:
3596 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3597 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3598 goto invalid_code;
3599 CODING_ISO_INVOCATION (coding, 0) = 1;
3600 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3601 continue;
3602
3603 case ISO_shift_in:
3604 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3605 goto invalid_code;
3606 CODING_ISO_INVOCATION (coding, 0) = 0;
3607 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3608 continue;
3609
3610 case ISO_single_shift_2_7:
3611 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3612 goto invalid_code;
3613 FALLTHROUGH;
3614 case ISO_single_shift_2:
3615 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3616 goto invalid_code;
3617
3618 c1 = 'N';
3619 goto label_escape_sequence;
3620
3621 case ISO_single_shift_3:
3622 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3623 goto invalid_code;
3624
3625 c1 = 'O';
3626 goto label_escape_sequence;
3627
3628 case ISO_control_sequence_introducer:
3629
3630 c1 = '[';
3631 goto label_escape_sequence;
3632
3633 case ISO_escape:
3634 ONE_MORE_BYTE (c1);
3635 label_escape_sequence:
3636
3637
3638
3639 switch (c1)
3640 {
3641 case '&':
3642 ONE_MORE_BYTE (c1);
3643 if (!(c1 >= '@' && c1 <= '~'))
3644 goto invalid_code;
3645 ONE_MORE_BYTE (c1);
3646 if (c1 != ISO_CODE_ESC)
3647 goto invalid_code;
3648 ONE_MORE_BYTE (c1);
3649 goto label_escape_sequence;
3650
3651 case '$':
3652 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3653 goto invalid_code;
3654 {
3655 int reg, chars96;
3656
3657 ONE_MORE_BYTE (c1);
3658 if (c1 >= '@' && c1 <= 'B')
3659 {
3660
3661 reg = 0, chars96 = 0;
3662 }
3663 else if (c1 >= 0x28 && c1 <= 0x2B)
3664 {
3665 reg = c1 - 0x28, chars96 = 0;
3666 ONE_MORE_BYTE (c1);
3667 }
3668 else if (c1 >= 0x2C && c1 <= 0x2F)
3669 {
3670 reg = c1 - 0x2C, chars96 = 1;
3671 ONE_MORE_BYTE (c1);
3672 }
3673 else
3674 goto invalid_code;
3675 DECODE_DESIGNATION (reg, 2, chars96, c1);
3676
3677 if (reg == 0)
3678 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3679 else if (reg == 1)
3680 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3681 if (chars96 < 0)
3682 goto invalid_code;
3683 }
3684 continue;
3685
3686 case 'n':
3687 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3688 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3689 goto invalid_code;
3690 CODING_ISO_INVOCATION (coding, 0) = 2;
3691 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3692 continue;
3693
3694 case 'o':
3695 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3696 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3697 goto invalid_code;
3698 CODING_ISO_INVOCATION (coding, 0) = 3;
3699 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3700 continue;
3701
3702 case 'N':
3703 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3704 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3705 goto invalid_code;
3706 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3707 if (charset_id_2 < 0)
3708 charset = CHARSET_FROM_ID (charset_ascii);
3709 else
3710 charset = CHARSET_FROM_ID (charset_id_2);
3711 ONE_MORE_BYTE (c1);
3712 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3713 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3714 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3715 ? c1 >= 0x80 : c1 < 0x80)))
3716 goto invalid_code;
3717 break;
3718
3719 case 'O':
3720 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3721 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3722 goto invalid_code;
3723 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3724 if (charset_id_3 < 0)
3725 charset = CHARSET_FROM_ID (charset_ascii);
3726 else
3727 charset = CHARSET_FROM_ID (charset_id_3);
3728 ONE_MORE_BYTE (c1);
3729 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3730 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3731 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3732 ? c1 >= 0x80 : c1 < 0x80)))
3733 goto invalid_code;
3734 break;
3735
3736 case '0': case '2': case '3': case '4':
3737 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3738 goto invalid_code;
3739 if (last_id != charset_ascii)
3740 {
3741 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3742 last_id = charset_ascii;
3743 last_offset = char_offset;
3744 }
3745 DECODE_COMPOSITION_START (c1);
3746 continue;
3747
3748 case '1':
3749 if (cmp_status->state == COMPOSING_NO)
3750 goto invalid_code;
3751 DECODE_COMPOSITION_END ();
3752 continue;
3753
3754 case '[':
3755 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3756 goto invalid_code;
3757
3758
3759
3760 ONE_MORE_BYTE (c1);
3761 switch (c1)
3762 {
3763 case ']':
3764 coding->mode &= ~CODING_MODE_DIRECTION;
3765 break;
3766
3767 case '0':
3768 case '1':
3769 ONE_MORE_BYTE (c1);
3770 if (c1 == ']')
3771 coding->mode &= ~CODING_MODE_DIRECTION;
3772 else
3773 goto invalid_code;
3774 break;
3775
3776 case '2':
3777 ONE_MORE_BYTE (c1);
3778 if (c1 == ']')
3779 coding->mode |= CODING_MODE_DIRECTION;
3780 else
3781 goto invalid_code;
3782 break;
3783
3784 default:
3785 goto invalid_code;
3786 }
3787 continue;
3788
3789 case '%':
3790 ONE_MORE_BYTE (c1);
3791 if (c1 == '/')
3792 {
3793
3794
3795
3796
3797 int dim, M, L;
3798 int size;
3799
3800 ONE_MORE_BYTE (dim);
3801 if (dim < '0' || dim > '4')
3802 goto invalid_code;
3803 ONE_MORE_BYTE (M);
3804 if (M < 128)
3805 goto invalid_code;
3806 ONE_MORE_BYTE (L);
3807 if (L < 128)
3808 goto invalid_code;
3809 size = ((M - 128) * 128) + (L - 128);
3810 if (charbuf + 6 > charbuf_end)
3811 goto break_loop;
3812 *charbuf++ = ISO_CODE_ESC;
3813 *charbuf++ = '%';
3814 *charbuf++ = '/';
3815 *charbuf++ = dim;
3816 *charbuf++ = BYTE8_TO_CHAR (M);
3817 *charbuf++ = BYTE8_TO_CHAR (L);
3818 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3819 }
3820 else if (c1 == 'G')
3821 {
3822
3823
3824
3825
3826 if (charbuf + 3 > charbuf_end)
3827 goto break_loop;
3828 *charbuf++ = ISO_CODE_ESC;
3829 *charbuf++ = '%';
3830 *charbuf++ = 'G';
3831 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3832 }
3833 else
3834 goto invalid_code;
3835 continue;
3836 break;
3837
3838 default:
3839 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3840 goto invalid_code;
3841 {
3842 int reg, chars96;
3843
3844 if (c1 >= 0x28 && c1 <= 0x2B)
3845 {
3846 reg = c1 - 0x28, chars96 = 0;
3847 ONE_MORE_BYTE (c1);
3848 }
3849 else if (c1 >= 0x2C && c1 <= 0x2F)
3850 {
3851 reg = c1 - 0x2C, chars96 = 1;
3852 ONE_MORE_BYTE (c1);
3853 }
3854 else
3855 goto invalid_code;
3856 DECODE_DESIGNATION (reg, 1, chars96, c1);
3857
3858 if (reg == 0)
3859 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3860 else if (reg == 1)
3861 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3862 if (chars96 < 0)
3863 goto invalid_code;
3864 }
3865 continue;
3866 }
3867 break;
3868
3869 default:
3870 emacs_abort ();
3871 }
3872
3873 if (cmp_status->state == COMPOSING_NO
3874 && charset->id != charset_ascii
3875 && last_id != charset->id)
3876 {
3877 if (last_id != charset_ascii)
3878 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3879 last_id = charset->id;
3880 last_offset = char_offset;
3881 }
3882
3883
3884
3885
3886 if (CHARSET_DIMENSION (charset) > 1)
3887 {
3888 ONE_MORE_BYTE (c2);
3889 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3890 || ((c1 & 0x80) != (c2 & 0x80)))
3891
3892 goto invalid_code;
3893 if (CHARSET_DIMENSION (charset) == 2)
3894 c1 = (c1 << 8) | c2;
3895 else
3896 {
3897 ONE_MORE_BYTE (c3);
3898 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3899 || ((c1 & 0x80) != (c3 & 0x80)))
3900
3901 goto invalid_code;
3902 c1 = (c1 << 16) | (c2 << 8) | c2;
3903 }
3904 }
3905 c1 &= 0x7F7F7F;
3906 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3907 if (c < 0)
3908 {
3909 MAYBE_FINISH_COMPOSITION ();
3910 for (; src_base < src; src_base++, char_offset++)
3911 {
3912 if (ASCII_CHAR_P (*src_base))
3913 *charbuf++ = *src_base;
3914 else
3915 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3916 }
3917 }
3918 else if (cmp_status->state == COMPOSING_NO)
3919 {
3920 *charbuf++ = c;
3921 char_offset++;
3922 }
3923 else if ((cmp_status->state == COMPOSING_CHAR
3924 ? cmp_status->nchars
3925 : cmp_status->ncomps)
3926 >= MAX_COMPOSITION_COMPONENTS)
3927 {
3928
3929 MAYBE_FINISH_COMPOSITION ();
3930 *charbuf++ = c;
3931 char_offset++;
3932 }
3933 else
3934 STORE_COMPOSITION_CHAR (c);
3935 continue;
3936
3937 invalid_code:
3938 MAYBE_FINISH_COMPOSITION ();
3939 src = src_base;
3940 consumed_chars = consumed_chars_base;
3941 ONE_MORE_BYTE (c);
3942 *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
3943 char_offset++;
3944
3945
3946
3947
3948
3949 CODING_ISO_INVOCATION (coding, 0) = 0;
3950 CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
3951 charset_id_0 = charset_ascii;
3952 continue;
3953
3954 break_loop:
3955 break;
3956 }
3957
3958 no_more_source:
3959 if (cmp_status->state != COMPOSING_NO)
3960 {
3961 if (coding->mode & CODING_MODE_LAST_BLOCK)
3962 MAYBE_FINISH_COMPOSITION ();
3963 else
3964 {
3965 charbuf -= cmp_status->length;
3966 for (i = 0; i < cmp_status->length; i++)
3967 cmp_status->carryover[i] = charbuf[i];
3968 }
3969 }
3970 else if (last_id != charset_ascii)
3971 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3972 coding->consumed_char += consumed_chars_base;
3973 coding->consumed = src_base - coding->source;
3974 coding->charbuf_used = charbuf - coding->charbuf;
3975 }
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004 #define ENCODE_DESIGNATION(charset, reg, coding) \
4005 do { \
4006 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4007 const char *intermediate_char_94 = "()*+"; \
4008 const char *intermediate_char_96 = ",-./"; \
4009 int revision = -1; \
4010 \
4011 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
4012 revision = CHARSET_ISO_REVISION (charset); \
4013 \
4014 if (revision >= 0) \
4015 { \
4016 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4017 EMIT_ONE_BYTE ('@' + revision); \
4018 } \
4019 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4020 if (CHARSET_DIMENSION (charset) == 1) \
4021 { \
4022 int b; \
4023 if (! CHARSET_ISO_CHARS_96 (charset)) \
4024 b = intermediate_char_94[reg]; \
4025 else \
4026 b = intermediate_char_96[reg]; \
4027 EMIT_ONE_ASCII_BYTE (b); \
4028 } \
4029 else \
4030 { \
4031 EMIT_ONE_ASCII_BYTE ('$'); \
4032 if (! CHARSET_ISO_CHARS_96 (charset)) \
4033 { \
4034 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
4035 || reg != 0 \
4036 || final_char < '@' || final_char > 'B') \
4037 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4038 } \
4039 else \
4040 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4041 } \
4042 EMIT_ONE_ASCII_BYTE (final_char); \
4043 \
4044 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4045 } while (0)
4046
4047
4048
4049
4050
4051
4052 #define ENCODE_SINGLE_SHIFT_2 \
4053 do { \
4054 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4055 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4056 else \
4057 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4058 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4059 } while (0)
4060
4061
4062 #define ENCODE_SINGLE_SHIFT_3 \
4063 do { \
4064 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4065 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4066 else \
4067 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4068 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4069 } while (0)
4070
4071
4072
4073
4074
4075
4076 #define ENCODE_SHIFT_IN \
4077 do { \
4078 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4079 CODING_ISO_INVOCATION (coding, 0) = 0; \
4080 } while (0)
4081
4082
4083 #define ENCODE_SHIFT_OUT \
4084 do { \
4085 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4086 CODING_ISO_INVOCATION (coding, 0) = 1; \
4087 } while (0)
4088
4089
4090 #define ENCODE_LOCKING_SHIFT_2 \
4091 do { \
4092 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4093 CODING_ISO_INVOCATION (coding, 0) = 2; \
4094 } while (0)
4095
4096
4097 #define ENCODE_LOCKING_SHIFT_3 \
4098 do { \
4099 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4100 CODING_ISO_INVOCATION (coding, 0) = 3; \
4101 } while (0)
4102
4103
4104
4105
4106
4107
4108 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4109 do { \
4110 int id = CHARSET_ID (charset); \
4111 \
4112 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4113 && id == charset_ascii) \
4114 { \
4115 id = charset_jisx0201_roman; \
4116 charset = CHARSET_FROM_ID (id); \
4117 } \
4118 \
4119 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
4120 { \
4121 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4122 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
4123 else \
4124 EMIT_ONE_BYTE (c1 | 0x80); \
4125 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
4126 break; \
4127 } \
4128 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
4129 { \
4130 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
4131 break; \
4132 } \
4133 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
4134 { \
4135 EMIT_ONE_BYTE (c1 | 0x80); \
4136 break; \
4137 } \
4138 else \
4139
4140
4141
4142 \
4143 dst = encode_invocation_designation (charset, coding, dst, \
4144 &produced_chars); \
4145 } while (1)
4146
4147
4148
4149
4150
4151
4152 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4153 do { \
4154 int id = CHARSET_ID (charset); \
4155 \
4156 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4157 && id == charset_jisx0208) \
4158 { \
4159 id = charset_jisx0208_1978; \
4160 charset = CHARSET_FROM_ID (id); \
4161 } \
4162 \
4163 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
4164 { \
4165 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4166 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
4167 else \
4168 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4169 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
4170 break; \
4171 } \
4172 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
4173 { \
4174 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
4175 break; \
4176 } \
4177 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
4178 { \
4179 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4180 break; \
4181 } \
4182 else \
4183
4184
4185
4186 \
4187 dst = encode_invocation_designation (charset, coding, dst, \
4188 &produced_chars); \
4189 } while (1)
4190
4191
4192 #define ENCODE_ISO_CHARACTER(charset, c) \
4193 do { \
4194 unsigned code; \
4195 CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
4196 \
4197 if (CHARSET_DIMENSION (charset) == 1) \
4198 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4199 else \
4200 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4201 } while (0)
4202
4203
4204
4205
4206
4207
4208 static unsigned char *
4209 encode_invocation_designation (struct charset *charset,
4210 struct coding_system *coding,
4211 unsigned char *dst, ptrdiff_t *p_nchars)
4212 {
4213 bool multibytep = coding->dst_multibyte;
4214 ptrdiff_t produced_chars = *p_nchars;
4215 int reg;
4216 int id = CHARSET_ID (charset);
4217
4218
4219 for (reg = 0; reg < 4; reg++)
4220 if (id == CODING_ISO_DESIGNATION (coding, reg))
4221 break;
4222
4223 if (reg >= 4)
4224 {
4225
4226
4227 reg = CODING_ISO_REQUEST (coding, id);
4228 if (reg < 0)
4229
4230
4231 reg = 0;
4232
4233 ENCODE_DESIGNATION (charset, reg, coding);
4234 }
4235
4236 if (CODING_ISO_INVOCATION (coding, 0) != reg
4237 && CODING_ISO_INVOCATION (coding, 1) != reg)
4238 {
4239
4240
4241 switch (reg)
4242 {
4243 case 0:
4244 ENCODE_SHIFT_IN;
4245 break;
4246
4247 case 1:
4248 ENCODE_SHIFT_OUT;
4249 break;
4250
4251 case 2:
4252 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4253 ENCODE_SINGLE_SHIFT_2;
4254 else
4255 ENCODE_LOCKING_SHIFT_2;
4256 break;
4257
4258 case 3:
4259 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4260 ENCODE_SINGLE_SHIFT_3;
4261 else
4262 ENCODE_LOCKING_SHIFT_3;
4263 break;
4264
4265 default:
4266 break;
4267 }
4268 }
4269
4270 *p_nchars = produced_chars;
4271 return dst;
4272 }
4273
4274
4275
4276
4277 #define ENCODE_RESET_PLANE_AND_REGISTER() \
4278 do { \
4279 int reg; \
4280 struct charset *charset; \
4281 \
4282 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4283 ENCODE_SHIFT_IN; \
4284 for (reg = 0; reg < 4; reg++) \
4285 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4286 && (CODING_ISO_DESIGNATION (coding, reg) \
4287 != CODING_ISO_INITIAL (coding, reg))) \
4288 { \
4289 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4290 ENCODE_DESIGNATION (charset, reg, coding); \
4291 } \
4292 } while (0)
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303 static ptrdiff_t
4304 encode_designation_at_bol (struct coding_system *coding,
4305 int *charbuf, int *charbuf_end,
4306 unsigned char *dst)
4307 {
4308 unsigned char *orig = dst;
4309 struct charset *charset;
4310
4311 int r[4];
4312 int c, found = 0, reg;
4313 ptrdiff_t produced_chars = 0;
4314 bool multibytep = coding->dst_multibyte;
4315 Lisp_Object attrs;
4316 Lisp_Object charset_list;
4317
4318 attrs = CODING_ID_ATTRS (coding->id);
4319 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4320 if (EQ (charset_list, Qiso_2022))
4321 charset_list = Viso_2022_charset_list;
4322
4323 for (reg = 0; reg < 4; reg++)
4324 r[reg] = -1;
4325
4326 while (charbuf < charbuf_end && found < 4)
4327 {
4328 int id;
4329
4330 c = *charbuf++;
4331 if (c == '\n')
4332 break;
4333 charset = char_charset (c, charset_list, NULL);
4334 id = CHARSET_ID (charset);
4335 reg = CODING_ISO_REQUEST (coding, id);
4336 if (reg >= 0 && r[reg] < 0)
4337 {
4338 found++;
4339 r[reg] = id;
4340 }
4341 }
4342
4343 if (found)
4344 {
4345 for (reg = 0; reg < 4; reg++)
4346 if (r[reg] >= 0
4347 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4348 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4349 }
4350
4351 return dst - orig;
4352 }
4353
4354
4355
4356 static bool
4357 encode_coding_iso_2022 (struct coding_system *coding)
4358 {
4359 bool multibytep = coding->dst_multibyte;
4360 int *charbuf = coding->charbuf;
4361 int *charbuf_end = charbuf + coding->charbuf_used;
4362 unsigned char *dst = coding->destination + coding->produced;
4363 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4364 int safe_room = 16;
4365 bool bol_designation
4366 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4367 && CODING_ISO_BOL (coding));
4368 ptrdiff_t produced_chars = 0;
4369 Lisp_Object attrs, eol_type, charset_list;
4370 bool ascii_compatible;
4371 int c;
4372 int preferred_charset_id = -1;
4373
4374 CODING_GET_INFO (coding, attrs, charset_list);
4375 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4376 if (VECTORP (eol_type))
4377 eol_type = Qunix;
4378
4379 setup_iso_safe_charsets (attrs);
4380
4381 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4382 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4383
4384 ascii_compatible
4385 = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4386 && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4387 | CODING_ISO_FLAG_LOCKING_SHIFT)));
4388
4389 while (charbuf < charbuf_end)
4390 {
4391 ASSURE_DESTINATION (safe_room);
4392
4393 if (bol_designation)
4394 {
4395
4396 unsigned char desig_buf[16];
4397 ptrdiff_t nbytes;
4398 ptrdiff_t offset;
4399
4400 charset_map_loaded = 0;
4401 nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4402 desig_buf);
4403 if (charset_map_loaded
4404 && (offset = coding_change_destination (coding)))
4405 {
4406 dst += offset;
4407 dst_end += offset;
4408 }
4409 memcpy (dst, desig_buf, nbytes);
4410 dst += nbytes;
4411
4412 produced_chars += nbytes;
4413 bol_designation = 0;
4414 ASSURE_DESTINATION (safe_room);
4415 }
4416
4417 c = *charbuf++;
4418
4419 if (c < 0)
4420 {
4421
4422 switch (*charbuf)
4423 {
4424 case CODING_ANNOTATE_COMPOSITION_MASK:
4425
4426 break;
4427 case CODING_ANNOTATE_CHARSET_MASK:
4428 preferred_charset_id = charbuf[2];
4429 if (preferred_charset_id >= 0
4430 && NILP (Fmemq (make_fixnum (preferred_charset_id),
4431 charset_list)))
4432 preferred_charset_id = -1;
4433 break;
4434 default:
4435 emacs_abort ();
4436 }
4437 charbuf += -c - 1;
4438 continue;
4439 }
4440
4441
4442 if (c < 0x20 || c == 0x7F)
4443 {
4444 if (c == '\n'
4445 || (c == '\r' && EQ (eol_type, Qmac)))
4446 {
4447 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4448 ENCODE_RESET_PLANE_AND_REGISTER ();
4449 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4450 {
4451 int i;
4452
4453 for (i = 0; i < 4; i++)
4454 CODING_ISO_DESIGNATION (coding, i)
4455 = CODING_ISO_INITIAL (coding, i);
4456 }
4457 bol_designation = ((CODING_ISO_FLAGS (coding)
4458 & CODING_ISO_FLAG_DESIGNATE_AT_BOL)
4459 != 0);
4460 }
4461 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4462 ENCODE_RESET_PLANE_AND_REGISTER ();
4463 EMIT_ONE_ASCII_BYTE (c);
4464 }
4465 else if (ASCII_CHAR_P (c))
4466 {
4467 if (ascii_compatible)
4468 EMIT_ONE_ASCII_BYTE (c);
4469 else
4470 {
4471 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4472 ENCODE_ISO_CHARACTER (charset, c);
4473 }
4474 }
4475 else if (CHAR_BYTE8_P (c))
4476 {
4477 c = CHAR_TO_BYTE8 (c);
4478 EMIT_ONE_BYTE (c);
4479 }
4480 else
4481 {
4482 struct charset *charset;
4483
4484 if (preferred_charset_id >= 0)
4485 {
4486 bool result;
4487
4488 charset = CHARSET_FROM_ID (preferred_charset_id);
4489 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4490 if (! result)
4491 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4492 NULL, charset);
4493 }
4494 else
4495 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4496 NULL, charset);
4497 if (!charset)
4498 {
4499 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4500 {
4501 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4502 charset = CHARSET_FROM_ID (charset_ascii);
4503 }
4504 else
4505 {
4506 c = coding->default_char;
4507 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4508 charset_list, NULL, charset);
4509 }
4510 }
4511 ENCODE_ISO_CHARACTER (charset, c);
4512 }
4513 }
4514
4515 if (coding->mode & CODING_MODE_LAST_BLOCK
4516 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4517 {
4518 ASSURE_DESTINATION (safe_room);
4519 ENCODE_RESET_PLANE_AND_REGISTER ();
4520 }
4521 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4522 CODING_ISO_BOL (coding) = bol_designation;
4523 coding->produced_char += produced_chars;
4524 coding->produced = dst - coding->destination;
4525 return 0;
4526 }
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568 static bool
4569 detect_coding_sjis (struct coding_system *coding,
4570 struct coding_detection_info *detect_info)
4571 {
4572 const unsigned char *src = coding->source, *src_base;
4573 const unsigned char *src_end = coding->source + coding->src_bytes;
4574 bool multibytep = coding->src_multibyte;
4575 ptrdiff_t consumed_chars = 0;
4576 int found = 0;
4577 int c;
4578 Lisp_Object attrs, charset_list;
4579 int max_first_byte_of_2_byte_code;
4580
4581 CODING_GET_INFO (coding, attrs, charset_list);
4582 max_first_byte_of_2_byte_code = list_length (charset_list) <= 3 ? 0xEF : 0xFC;
4583
4584 detect_info->checked |= CATEGORY_MASK_SJIS;
4585
4586 src += coding->head_ascii;
4587
4588 while (1)
4589 {
4590 src_base = src;
4591 ONE_MORE_BYTE (c);
4592 if (c < 0x80)
4593 continue;
4594 if ((c >= 0x81 && c <= 0x9F)
4595 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4596 {
4597 ONE_MORE_BYTE (c);
4598 if (c < 0x40 || c == 0x7F || c > 0xFC)
4599 break;
4600 found = CATEGORY_MASK_SJIS;
4601 }
4602 else if (c >= 0xA0 && c < 0xE0)
4603 found = CATEGORY_MASK_SJIS;
4604 else
4605 break;
4606 }
4607 detect_info->rejected |= CATEGORY_MASK_SJIS;
4608 return 0;
4609
4610 no_more_source:
4611 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4612 {
4613 detect_info->rejected |= CATEGORY_MASK_SJIS;
4614 return 0;
4615 }
4616 detect_info->found |= found;
4617 return 1;
4618 }
4619
4620
4621
4622
4623 static bool
4624 detect_coding_big5 (struct coding_system *coding,
4625 struct coding_detection_info *detect_info)
4626 {
4627 const unsigned char *src = coding->source, *src_base;
4628 const unsigned char *src_end = coding->source + coding->src_bytes;
4629 bool multibytep = coding->src_multibyte;
4630 ptrdiff_t consumed_chars = 0;
4631 int found = 0;
4632 int c;
4633
4634 detect_info->checked |= CATEGORY_MASK_BIG5;
4635
4636 src += coding->head_ascii;
4637
4638 while (1)
4639 {
4640 src_base = src;
4641 ONE_MORE_BYTE (c);
4642 if (c < 0x80)
4643 continue;
4644 if (c >= 0xA1)
4645 {
4646 ONE_MORE_BYTE (c);
4647 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4648 return 0;
4649 found = CATEGORY_MASK_BIG5;
4650 }
4651 else
4652 break;
4653 }
4654 detect_info->rejected |= CATEGORY_MASK_BIG5;
4655 return 0;
4656
4657 no_more_source:
4658 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4659 {
4660 detect_info->rejected |= CATEGORY_MASK_BIG5;
4661 return 0;
4662 }
4663 detect_info->found |= found;
4664 return 1;
4665 }
4666
4667
4668
4669 static void
4670 decode_coding_sjis (struct coding_system *coding)
4671 {
4672 const unsigned char *src = coding->source + coding->consumed;
4673 const unsigned char *src_end = coding->source + coding->src_bytes;
4674 const unsigned char *src_base;
4675 int *charbuf = coding->charbuf + coding->charbuf_used;
4676
4677
4678 int *charbuf_end
4679 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4680 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4681 bool multibytep = coding->src_multibyte;
4682 struct charset *charset_roman, *charset_kanji, *charset_kana;
4683 struct charset *charset_kanji2;
4684 Lisp_Object attrs, charset_list, val;
4685 ptrdiff_t char_offset = coding->produced_char;
4686 ptrdiff_t last_offset = char_offset;
4687 int last_id = charset_ascii;
4688 bool eol_dos
4689 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4690 int byte_after_cr = -1;
4691
4692 CODING_GET_INFO (coding, attrs, charset_list);
4693
4694 val = charset_list;
4695 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4696 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4697 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4698 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4699
4700 while (1)
4701 {
4702 int c, c1;
4703 struct charset *charset;
4704
4705 src_base = src;
4706 consumed_chars_base = consumed_chars;
4707
4708 if (charbuf >= charbuf_end)
4709 {
4710 if (byte_after_cr >= 0)
4711 src_base--;
4712 break;
4713 }
4714
4715 if (byte_after_cr >= 0)
4716 c = byte_after_cr, byte_after_cr = -1;
4717 else
4718 ONE_MORE_BYTE (c);
4719 if (c < 0)
4720 goto invalid_code;
4721 if (c < 0x80)
4722 {
4723 if (eol_dos && c == '\r')
4724 ONE_MORE_BYTE (byte_after_cr);
4725 charset = charset_roman;
4726 }
4727 else if (c == 0x80 || c == 0xA0)
4728 goto invalid_code;
4729 else if (c >= 0xA1 && c <= 0xDF)
4730 {
4731
4732 c &= 0x7F;
4733 charset = charset_kana;
4734 }
4735 else if (c <= 0xEF)
4736 {
4737
4738 ONE_MORE_BYTE (c1);
4739 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4740 goto invalid_code;
4741 c = (c << 8) | c1;
4742 SJIS_TO_JIS (c);
4743 charset = charset_kanji;
4744 }
4745 else if (c <= 0xFC && charset_kanji2)
4746 {
4747
4748 ONE_MORE_BYTE (c1);
4749 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4750 goto invalid_code;
4751 c = (c << 8) | c1;
4752 SJIS_TO_JIS2 (c);
4753 charset = charset_kanji2;
4754 }
4755 else
4756 goto invalid_code;
4757 if (charset->id != charset_ascii
4758 && last_id != charset->id)
4759 {
4760 if (last_id != charset_ascii)
4761 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4762 last_id = charset->id;
4763 last_offset = char_offset;
4764 }
4765 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4766 *charbuf++ = c;
4767 char_offset++;
4768 continue;
4769
4770 invalid_code:
4771 src = src_base;
4772 consumed_chars = consumed_chars_base;
4773 ONE_MORE_BYTE (c);
4774 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4775 char_offset++;
4776 }
4777
4778 no_more_source:
4779 if (last_id != charset_ascii)
4780 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4781 coding->consumed_char += consumed_chars_base;
4782 coding->consumed = src_base - coding->source;
4783 coding->charbuf_used = charbuf - coding->charbuf;
4784 }
4785
4786 static void
4787 decode_coding_big5 (struct coding_system *coding)
4788 {
4789 const unsigned char *src = coding->source + coding->consumed;
4790 const unsigned char *src_end = coding->source + coding->src_bytes;
4791 const unsigned char *src_base;
4792 int *charbuf = coding->charbuf + coding->charbuf_used;
4793
4794
4795 int *charbuf_end
4796 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4797 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4798 bool multibytep = coding->src_multibyte;
4799 struct charset *charset_roman, *charset_big5;
4800 Lisp_Object attrs, charset_list, val;
4801 ptrdiff_t char_offset = coding->produced_char;
4802 ptrdiff_t last_offset = char_offset;
4803 int last_id = charset_ascii;
4804 bool eol_dos
4805 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4806 int byte_after_cr = -1;
4807
4808 CODING_GET_INFO (coding, attrs, charset_list);
4809 val = charset_list;
4810 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4811 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4812
4813 while (1)
4814 {
4815 int c, c1;
4816 struct charset *charset;
4817
4818 src_base = src;
4819 consumed_chars_base = consumed_chars;
4820
4821 if (charbuf >= charbuf_end)
4822 {
4823 if (byte_after_cr >= 0)
4824 src_base--;
4825 break;
4826 }
4827
4828 if (byte_after_cr >= 0)
4829 c = byte_after_cr, byte_after_cr = -1;
4830 else
4831 ONE_MORE_BYTE (c);
4832
4833 if (c < 0)
4834 goto invalid_code;
4835 if (c < 0x80)
4836 {
4837 if (eol_dos && c == '\r')
4838 ONE_MORE_BYTE (byte_after_cr);
4839 charset = charset_roman;
4840 }
4841 else
4842 {
4843
4844 if (c < 0xA1 || c > 0xFE)
4845 goto invalid_code;
4846 ONE_MORE_BYTE (c1);
4847 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4848 goto invalid_code;
4849 c = c << 8 | c1;
4850 charset = charset_big5;
4851 }
4852 if (charset->id != charset_ascii
4853 && last_id != charset->id)
4854 {
4855 if (last_id != charset_ascii)
4856 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4857 last_id = charset->id;
4858 last_offset = char_offset;
4859 }
4860 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4861 *charbuf++ = c;
4862 char_offset++;
4863 continue;
4864
4865 invalid_code:
4866 src = src_base;
4867 consumed_chars = consumed_chars_base;
4868 ONE_MORE_BYTE (c);
4869 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4870 char_offset++;
4871 }
4872
4873 no_more_source:
4874 if (last_id != charset_ascii)
4875 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4876 coding->consumed_char += consumed_chars_base;
4877 coding->consumed = src_base - coding->source;
4878 coding->charbuf_used = charbuf - coding->charbuf;
4879 }
4880
4881
4882
4883
4884
4885
4886
4887
4888 static bool
4889 encode_coding_sjis (struct coding_system *coding)
4890 {
4891 bool multibytep = coding->dst_multibyte;
4892 int *charbuf = coding->charbuf;
4893 int *charbuf_end = charbuf + coding->charbuf_used;
4894 unsigned char *dst = coding->destination + coding->produced;
4895 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4896 int safe_room = 4;
4897 ptrdiff_t produced_chars = 0;
4898 Lisp_Object attrs, charset_list, val;
4899 bool ascii_compatible;
4900 struct charset *charset_kanji, *charset_kana;
4901 struct charset *charset_kanji2;
4902 int c;
4903
4904 CODING_GET_INFO (coding, attrs, charset_list);
4905 val = XCDR (charset_list);
4906 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4907 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
4908 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
4909
4910 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4911
4912 while (charbuf < charbuf_end)
4913 {
4914 ASSURE_DESTINATION (safe_room);
4915 c = *charbuf++;
4916
4917 if (ASCII_CHAR_P (c) && ascii_compatible)
4918 EMIT_ONE_ASCII_BYTE (c);
4919 else if (CHAR_BYTE8_P (c))
4920 {
4921 c = CHAR_TO_BYTE8 (c);
4922 EMIT_ONE_BYTE (c);
4923 }
4924 else
4925 {
4926 unsigned code;
4927 struct charset *charset;
4928 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4929 &code, charset);
4930
4931 if (!charset)
4932 {
4933 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4934 {
4935 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4936 charset = CHARSET_FROM_ID (charset_ascii);
4937 }
4938 else
4939 {
4940 c = coding->default_char;
4941 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4942 charset_list, &code, charset);
4943 }
4944 }
4945 if (code == CHARSET_INVALID_CODE (charset))
4946 emacs_abort ();
4947 if (charset == charset_kanji)
4948 {
4949 int c1, c2;
4950 JIS_TO_SJIS (code);
4951 c1 = code >> 8, c2 = code & 0xFF;
4952 EMIT_TWO_BYTES (c1, c2);
4953 }
4954 else if (charset == charset_kana)
4955 EMIT_ONE_BYTE (code | 0x80);
4956 else if (charset_kanji2 && charset == charset_kanji2)
4957 {
4958 int c1, c2;
4959
4960 c1 = code >> 8;
4961 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
4962 || c1 == 0x28
4963 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4964 {
4965 JIS_TO_SJIS2 (code);
4966 c1 = code >> 8, c2 = code & 0xFF;
4967 EMIT_TWO_BYTES (c1, c2);
4968 }
4969 else
4970 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4971 }
4972 else
4973 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4974 }
4975 }
4976 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4977 coding->produced_char += produced_chars;
4978 coding->produced = dst - coding->destination;
4979 return 0;
4980 }
4981
4982 static bool
4983 encode_coding_big5 (struct coding_system *coding)
4984 {
4985 bool multibytep = coding->dst_multibyte;
4986 int *charbuf = coding->charbuf;
4987 int *charbuf_end = charbuf + coding->charbuf_used;
4988 unsigned char *dst = coding->destination + coding->produced;
4989 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4990 int safe_room = 4;
4991 ptrdiff_t produced_chars = 0;
4992 Lisp_Object attrs, charset_list, val;
4993 bool ascii_compatible;
4994 struct charset *charset_big5;
4995 int c;
4996
4997 CODING_GET_INFO (coding, attrs, charset_list);
4998 val = XCDR (charset_list);
4999 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
5000 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5001
5002 while (charbuf < charbuf_end)
5003 {
5004 ASSURE_DESTINATION (safe_room);
5005 c = *charbuf++;
5006
5007 if (ASCII_CHAR_P (c) && ascii_compatible)
5008 EMIT_ONE_ASCII_BYTE (c);
5009 else if (CHAR_BYTE8_P (c))
5010 {
5011 c = CHAR_TO_BYTE8 (c);
5012 EMIT_ONE_BYTE (c);
5013 }
5014 else
5015 {
5016 unsigned code;
5017 struct charset *charset;
5018 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5019 &code, charset);
5020
5021 if (! charset)
5022 {
5023 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5024 {
5025 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5026 charset = CHARSET_FROM_ID (charset_ascii);
5027 }
5028 else
5029 {
5030 c = coding->default_char;
5031 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5032 charset_list, &code, charset);
5033 }
5034 }
5035 if (code == CHARSET_INVALID_CODE (charset))
5036 emacs_abort ();
5037 if (charset == charset_big5)
5038 {
5039 int c1, c2;
5040
5041 c1 = code >> 8, c2 = code & 0xFF;
5042 EMIT_TWO_BYTES (c1, c2);
5043 }
5044 else
5045 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5046 }
5047 }
5048 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5049 coding->produced_char += produced_chars;
5050 coding->produced = dst - coding->destination;
5051 return 0;
5052 }
5053
5054
5055
5056
5057
5058
5059
5060
5061 static bool
5062 detect_coding_ccl (struct coding_system *coding,
5063 struct coding_detection_info *detect_info)
5064 {
5065 const unsigned char *src = coding->source, *src_base;
5066 const unsigned char *src_end = coding->source + coding->src_bytes;
5067 bool multibytep = coding->src_multibyte;
5068 ptrdiff_t consumed_chars = 0;
5069 int found = 0;
5070 unsigned char *valids;
5071 ptrdiff_t head_ascii = coding->head_ascii;
5072 Lisp_Object attrs;
5073
5074 detect_info->checked |= CATEGORY_MASK_CCL;
5075
5076 coding = &coding_categories[coding_category_ccl];
5077 valids = CODING_CCL_VALIDS (coding);
5078 attrs = CODING_ID_ATTRS (coding->id);
5079 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5080 src += head_ascii;
5081
5082 while (1)
5083 {
5084 int c;
5085
5086 src_base = src;
5087 ONE_MORE_BYTE (c);
5088 if (c < 0 || ! valids[c])
5089 break;
5090 if ((valids[c] > 1))
5091 found = CATEGORY_MASK_CCL;
5092 }
5093 detect_info->rejected |= CATEGORY_MASK_CCL;
5094 return 0;
5095
5096 no_more_source:
5097 detect_info->found |= found;
5098 return 1;
5099 }
5100
5101 static void
5102 decode_coding_ccl (struct coding_system *coding)
5103 {
5104 const unsigned char *src = coding->source + coding->consumed;
5105 const unsigned char *src_end = coding->source + coding->src_bytes;
5106 int *charbuf = coding->charbuf + coding->charbuf_used;
5107 int *charbuf_end = coding->charbuf + coding->charbuf_size;
5108 ptrdiff_t consumed_chars = 0;
5109 bool multibytep = coding->src_multibyte;
5110 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5111 int source_charbuf[1024];
5112 int source_byteidx[1025];
5113 Lisp_Object attrs, charset_list;
5114
5115 CODING_GET_INFO (coding, attrs, charset_list);
5116
5117 while (1)
5118 {
5119 const unsigned char *p = src;
5120 ptrdiff_t offset;
5121 int i = 0;
5122
5123 if (multibytep)
5124 {
5125 while (i < 1024 && p < src_end)
5126 {
5127 source_byteidx[i] = p - src;
5128 source_charbuf[i++] = string_char_advance (&p);
5129 }
5130 source_byteidx[i] = p - src;
5131 }
5132 else
5133 while (i < 1024 && p < src_end)
5134 source_charbuf[i++] = *p++;
5135
5136 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5137 ccl->last_block = true;
5138
5139 charset_map_loaded = 0;
5140 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5141 charset_list);
5142 if (charset_map_loaded
5143 && (offset = coding_change_source (coding)))
5144 {
5145 p += offset;
5146 src += offset;
5147 src_end += offset;
5148 }
5149 charbuf += ccl->produced;
5150 if (multibytep)
5151 src += source_byteidx[ccl->consumed];
5152 else
5153 src += ccl->consumed;
5154 consumed_chars += ccl->consumed;
5155 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5156 break;
5157 }
5158
5159 switch (ccl->status)
5160 {
5161 case CCL_STAT_SUSPEND_BY_SRC:
5162 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5163 break;
5164 case CCL_STAT_SUSPEND_BY_DST:
5165 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5166 break;
5167 case CCL_STAT_QUIT:
5168 case CCL_STAT_INVALID_CMD:
5169 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5170 break;
5171 default:
5172 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5173 break;
5174 }
5175 coding->consumed_char += consumed_chars;
5176 coding->consumed = src - coding->source;
5177 coding->charbuf_used = charbuf - coding->charbuf;
5178 }
5179
5180 static bool
5181 encode_coding_ccl (struct coding_system *coding)
5182 {
5183 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5184 bool multibytep = coding->dst_multibyte;
5185 int *charbuf = coding->charbuf;
5186 int *charbuf_end = charbuf + coding->charbuf_used;
5187 unsigned char *dst = coding->destination + coding->produced;
5188 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5189 int destination_charbuf[1024];
5190 ptrdiff_t produced_chars = 0;
5191 int i;
5192 Lisp_Object attrs, charset_list;
5193
5194 CODING_GET_INFO (coding, attrs, charset_list);
5195 if (coding->consumed_char == coding->src_chars
5196 && coding->mode & CODING_MODE_LAST_BLOCK)
5197 ccl->last_block = true;
5198
5199 do
5200 {
5201 ptrdiff_t offset;
5202
5203
5204 charset_map_loaded = 0;
5205 ccl_driver (ccl, charbuf, destination_charbuf,
5206 charbuf_end - charbuf, 1024, charset_list);
5207 if (charset_map_loaded
5208 && (offset = coding_change_destination (coding)))
5209 dst += offset;
5210 if (multibytep)
5211 {
5212 ASSURE_DESTINATION (ccl->produced * 2);
5213 for (i = 0; i < ccl->produced; i++)
5214 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5215 }
5216 else
5217 {
5218 ASSURE_DESTINATION (ccl->produced);
5219 for (i = 0; i < ccl->produced; i++)
5220 *dst++ = destination_charbuf[i] & 0xFF;
5221 produced_chars += ccl->produced;
5222 }
5223 charbuf += ccl->consumed;
5224 if (ccl->status == CCL_STAT_QUIT
5225 || ccl->status == CCL_STAT_INVALID_CMD)
5226 break;
5227 }
5228 while (charbuf < charbuf_end);
5229
5230 switch (ccl->status)
5231 {
5232 case CCL_STAT_SUSPEND_BY_SRC:
5233 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5234 break;
5235 case CCL_STAT_SUSPEND_BY_DST:
5236 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5237 break;
5238 case CCL_STAT_QUIT:
5239 case CCL_STAT_INVALID_CMD:
5240 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5241 break;
5242 default:
5243 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5244 break;
5245 }
5246
5247 coding->produced_char += produced_chars;
5248 coding->produced = dst - coding->destination;
5249 return 0;
5250 }
5251
5252
5253
5254
5255
5256
5257 static void
5258 decode_coding_raw_text (struct coding_system *coding)
5259 {
5260 bool eol_dos
5261 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5262
5263 coding->chars_at_source = 1;
5264 coding->consumed_char = coding->src_chars;
5265 coding->consumed = coding->src_bytes;
5266 if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5267 {
5268 coding->consumed_char--;
5269 coding->consumed--;
5270 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5271 }
5272 else
5273 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5274 }
5275
5276 static bool
5277 encode_coding_raw_text (struct coding_system *coding)
5278 {
5279 bool multibytep = coding->dst_multibyte;
5280 int *charbuf = coding->charbuf;
5281 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5282 unsigned char *dst = coding->destination + coding->produced;
5283 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5284 ptrdiff_t produced_chars = 0;
5285 int c;
5286
5287 if (multibytep)
5288 {
5289 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5290
5291 if (coding->src_multibyte)
5292 while (charbuf < charbuf_end)
5293 {
5294 ASSURE_DESTINATION (safe_room);
5295 c = *charbuf++;
5296 if (ASCII_CHAR_P (c))
5297 EMIT_ONE_ASCII_BYTE (c);
5298 else if (CHAR_BYTE8_P (c))
5299 {
5300 c = CHAR_TO_BYTE8 (c);
5301 EMIT_ONE_BYTE (c);
5302 }
5303 else
5304 {
5305 unsigned char str[MAX_MULTIBYTE_LENGTH];
5306 int len = CHAR_STRING (c, str);
5307 for (int i = 0; i < len; i++)
5308 EMIT_ONE_BYTE (str[i]);
5309 }
5310 }
5311 else
5312 while (charbuf < charbuf_end)
5313 {
5314 ASSURE_DESTINATION (safe_room);
5315 c = *charbuf++;
5316 EMIT_ONE_BYTE (c);
5317 }
5318 }
5319 else
5320 {
5321 if (coding->src_multibyte)
5322 {
5323 int safe_room = MAX_MULTIBYTE_LENGTH;
5324
5325 while (charbuf < charbuf_end)
5326 {
5327 ASSURE_DESTINATION (safe_room);
5328 c = *charbuf++;
5329 if (ASCII_CHAR_P (c))
5330 *dst++ = c;
5331 else if (CHAR_BYTE8_P (c))
5332 *dst++ = CHAR_TO_BYTE8 (c);
5333 else
5334 dst += CHAR_STRING (c, dst);
5335 }
5336 }
5337 else
5338 {
5339 ASSURE_DESTINATION (charbuf_end - charbuf);
5340 while (charbuf < charbuf_end && dst < dst_end)
5341 *dst++ = *charbuf++;
5342 }
5343 produced_chars = dst - (coding->destination + coding->produced);
5344 }
5345 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5346 coding->produced_char += produced_chars;
5347 coding->produced = dst - coding->destination;
5348 return 0;
5349 }
5350
5351
5352
5353
5354 static bool
5355 detect_coding_charset (struct coding_system *coding,
5356 struct coding_detection_info *detect_info)
5357 {
5358 const unsigned char *src = coding->source, *src_base;
5359 const unsigned char *src_end = coding->source + coding->src_bytes;
5360 bool multibytep = coding->src_multibyte;
5361 ptrdiff_t consumed_chars = 0;
5362 Lisp_Object attrs, valids, name;
5363 int found = 0;
5364 ptrdiff_t head_ascii = coding->head_ascii;
5365 bool check_latin_extra = 0;
5366
5367 detect_info->checked |= CATEGORY_MASK_CHARSET;
5368
5369 coding = &coding_categories[coding_category_charset];
5370 attrs = CODING_ID_ATTRS (coding->id);
5371 valids = AREF (attrs, coding_attr_charset_valids);
5372 name = CODING_ID_NAME (coding->id);
5373 if (strncmp (SSDATA (SYMBOL_NAME (name)),
5374 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5375 || strncmp (SSDATA (SYMBOL_NAME (name)),
5376 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5377 check_latin_extra = 1;
5378
5379 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5380 src += head_ascii;
5381
5382 while (1)
5383 {
5384 int c;
5385 Lisp_Object val;
5386 struct charset *charset;
5387 int dim, idx;
5388
5389 src_base = src;
5390 ONE_MORE_BYTE (c);
5391 if (c < 0)
5392 continue;
5393 val = AREF (valids, c);
5394 if (NILP (val))
5395 break;
5396 if (c >= 0x80)
5397 {
5398 if (c < 0xA0
5399 && check_latin_extra
5400 && (!VECTORP (Vlatin_extra_code_table)
5401 || NILP (AREF (Vlatin_extra_code_table, c))))
5402 break;
5403 found = CATEGORY_MASK_CHARSET;
5404 }
5405 if (FIXNUMP (val))
5406 {
5407 charset = CHARSET_FROM_ID (XFIXNAT (val));
5408 dim = CHARSET_DIMENSION (charset);
5409 for (idx = 1; idx < dim; idx++)
5410 {
5411 if (src == src_end)
5412 goto too_short;
5413 ONE_MORE_BYTE (c);
5414 if (c < charset->code_space[(dim - 1 - idx) * 4]
5415 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5416 break;
5417 }
5418 if (idx < dim)
5419 break;
5420 }
5421 else
5422 {
5423 idx = 1;
5424 for (; CONSP (val); val = XCDR (val))
5425 {
5426 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (val)));
5427 dim = CHARSET_DIMENSION (charset);
5428 while (idx < dim)
5429 {
5430 if (src == src_end)
5431 goto too_short;
5432 ONE_MORE_BYTE (c);
5433 if (c < charset->code_space[(dim - 1 - idx) * 4]
5434 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5435 break;
5436 idx++;
5437 }
5438 if (idx == dim)
5439 {
5440 val = Qnil;
5441 break;
5442 }
5443 }
5444 if (CONSP (val))
5445 break;
5446 }
5447 }
5448 too_short:
5449 detect_info->rejected |= CATEGORY_MASK_CHARSET;
5450 return 0;
5451
5452 no_more_source:
5453 detect_info->found |= found;
5454 return 1;
5455 }
5456
5457 static void
5458 decode_coding_charset (struct coding_system *coding)
5459 {
5460 const unsigned char *src = coding->source + coding->consumed;
5461 const unsigned char *src_end = coding->source + coding->src_bytes;
5462 const unsigned char *src_base;
5463 int *charbuf = coding->charbuf + coding->charbuf_used;
5464
5465
5466 int *charbuf_end
5467 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5468 ptrdiff_t consumed_chars = 0, consumed_chars_base;
5469 bool multibytep = coding->src_multibyte;
5470 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5471 Lisp_Object valids;
5472 ptrdiff_t char_offset = coding->produced_char;
5473 ptrdiff_t last_offset = char_offset;
5474 int last_id = charset_ascii;
5475 bool eol_dos
5476 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5477 int byte_after_cr = -1;
5478
5479 valids = AREF (attrs, coding_attr_charset_valids);
5480
5481 while (1)
5482 {
5483 int c;
5484 Lisp_Object val;
5485 struct charset *charset;
5486 int dim;
5487 int len = 1;
5488 unsigned code;
5489
5490 src_base = src;
5491 consumed_chars_base = consumed_chars;
5492
5493 if (charbuf >= charbuf_end)
5494 {
5495 if (byte_after_cr >= 0)
5496 src_base--;
5497 break;
5498 }
5499
5500 if (byte_after_cr >= 0)
5501 {
5502 c = byte_after_cr;
5503 byte_after_cr = -1;
5504 }
5505 else
5506 {
5507 ONE_MORE_BYTE (c);
5508 if (eol_dos && c == '\r')
5509 ONE_MORE_BYTE (byte_after_cr);
5510 }
5511 if (c < 0)
5512 goto invalid_code;
5513 code = c;
5514
5515 val = AREF (valids, c);
5516 if (! FIXNUMP (val) && ! CONSP (val))
5517 goto invalid_code;
5518 if (FIXNUMP (val))
5519 {
5520 charset = CHARSET_FROM_ID (XFIXNAT (val));
5521 dim = CHARSET_DIMENSION (charset);
5522 while (len < dim)
5523 {
5524 ONE_MORE_BYTE (c);
5525 code = (code << 8) | c;
5526 len++;
5527 }
5528 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5529 charset, code, c);
5530 }
5531 else
5532 {
5533
5534
5535
5536 while (CONSP (val))
5537 {
5538 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (val)));
5539 dim = CHARSET_DIMENSION (charset);
5540 while (len < dim)
5541 {
5542 ONE_MORE_BYTE (c);
5543 code = (code << 8) | c;
5544 len++;
5545 }
5546 CODING_DECODE_CHAR (coding, src, src_base,
5547 src_end, charset, code, c);
5548 if (c >= 0)
5549 break;
5550 val = XCDR (val);
5551 }
5552 }
5553 if (c < 0)
5554 goto invalid_code;
5555 if (charset->id != charset_ascii
5556 && last_id != charset->id)
5557 {
5558 if (last_id != charset_ascii)
5559 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5560 last_id = charset->id;
5561 last_offset = char_offset;
5562 }
5563
5564 *charbuf++ = c;
5565 char_offset++;
5566 continue;
5567
5568 invalid_code:
5569 src = src_base;
5570 consumed_chars = consumed_chars_base;
5571 ONE_MORE_BYTE (c);
5572 *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
5573 char_offset++;
5574 }
5575
5576 no_more_source:
5577 if (last_id != charset_ascii)
5578 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5579 coding->consumed_char += consumed_chars_base;
5580 coding->consumed = src_base - coding->source;
5581 coding->charbuf_used = charbuf - coding->charbuf;
5582 }
5583
5584 static bool
5585 encode_coding_charset (struct coding_system *coding)
5586 {
5587 bool multibytep = coding->dst_multibyte;
5588 int *charbuf = coding->charbuf;
5589 int *charbuf_end = charbuf + coding->charbuf_used;
5590 unsigned char *dst = coding->destination + coding->produced;
5591 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5592 int safe_room = MAX_MULTIBYTE_LENGTH;
5593 ptrdiff_t produced_chars = 0;
5594 Lisp_Object attrs, charset_list;
5595 bool ascii_compatible;
5596 int c;
5597
5598 CODING_GET_INFO (coding, attrs, charset_list);
5599 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5600
5601 while (charbuf < charbuf_end)
5602 {
5603 struct charset *charset;
5604 unsigned code;
5605
5606 ASSURE_DESTINATION (safe_room);
5607 c = *charbuf++;
5608 if (ascii_compatible && ASCII_CHAR_P (c))
5609 EMIT_ONE_ASCII_BYTE (c);
5610 else if (CHAR_BYTE8_P (c))
5611 {
5612 c = CHAR_TO_BYTE8 (c);
5613 EMIT_ONE_BYTE (c);
5614 }
5615 else
5616 {
5617 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5618 &code, charset);
5619
5620 if (charset)
5621 {
5622 if (CHARSET_DIMENSION (charset) == 1)
5623 EMIT_ONE_BYTE (code);
5624 else if (CHARSET_DIMENSION (charset) == 2)
5625 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5626 else if (CHARSET_DIMENSION (charset) == 3)
5627 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5628 else
5629 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5630 (code >> 8) & 0xFF, code & 0xFF);
5631 }
5632 else
5633 {
5634 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5635 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5636 else
5637 c = coding->default_char;
5638 EMIT_ONE_BYTE (c);
5639 }
5640 }
5641 }
5642
5643 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5644 coding->produced_char += produced_chars;
5645 coding->produced = dst - coding->destination;
5646 return 0;
5647 }
5648
5649
5650
5651
5652
5653
5654
5655
5656 void
5657 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5658 {
5659 Lisp_Object attrs;
5660 Lisp_Object eol_type;
5661 Lisp_Object coding_type;
5662 Lisp_Object val;
5663
5664 if (NILP (coding_system))
5665 coding_system = Qundecided;
5666
5667 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5668
5669 attrs = CODING_ID_ATTRS (coding->id);
5670 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5671
5672 coding->mode = 0;
5673 if (VECTORP (eol_type))
5674 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5675 | CODING_REQUIRE_DETECTION_MASK);
5676 else if (! EQ (eol_type, Qunix))
5677 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5678 | CODING_REQUIRE_ENCODING_MASK);
5679 else
5680 coding->common_flags = 0;
5681 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5682 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5683 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5684 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5685 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5686 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5687
5688 val = CODING_ATTR_SAFE_CHARSETS (attrs);
5689 coding->max_charset_id = SCHARS (val) - 1;
5690 coding->safe_charsets = SDATA (val);
5691 coding->default_char = XFIXNUM (CODING_ATTR_DEFAULT_CHAR (attrs));
5692 coding->carryover_bytes = 0;
5693 coding->raw_destination = 0;
5694
5695 coding_type = CODING_ATTR_TYPE (attrs);
5696 if (EQ (coding_type, Qundecided))
5697 {
5698 coding->detector = NULL;
5699 coding->decoder = decode_coding_raw_text;
5700 coding->encoder = encode_coding_raw_text;
5701 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5702 coding->spec.undecided.inhibit_nbd
5703 = (encode_inhibit_flag
5704 (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
5705 coding->spec.undecided.inhibit_ied
5706 = (encode_inhibit_flag
5707 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
5708 coding->spec.undecided.prefer_utf_8
5709 = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
5710 }
5711 else if (EQ (coding_type, Qiso_2022))
5712 {
5713 int i;
5714 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5715
5716
5717 CODING_ISO_INVOCATION (coding, 0) = 0;
5718
5719 CODING_ISO_INVOCATION (coding, 1)
5720 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5721
5722 for (i = 0; i < 4; i++)
5723 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5724
5725 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5726
5727 CODING_ISO_BOL (coding) = 1;
5728 coding->detector = detect_coding_iso_2022;
5729 coding->decoder = decode_coding_iso_2022;
5730 coding->encoder = encode_coding_iso_2022;
5731 if (flags & CODING_ISO_FLAG_SAFE)
5732 coding->mode |= CODING_MODE_SAFE_ENCODING;
5733 coding->common_flags
5734 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5735 | CODING_REQUIRE_FLUSHING_MASK);
5736 if (flags & CODING_ISO_FLAG_COMPOSITION)
5737 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5738 if (flags & CODING_ISO_FLAG_DESIGNATION)
5739 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5740 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5741 {
5742 setup_iso_safe_charsets (attrs);
5743 val = CODING_ATTR_SAFE_CHARSETS (attrs);
5744 coding->max_charset_id = SCHARS (val) - 1;
5745 coding->safe_charsets = SDATA (val);
5746 }
5747 CODING_ISO_FLAGS (coding) = flags;
5748 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5749 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5750 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5751 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5752 }
5753 else if (EQ (coding_type, Qcharset))
5754 {
5755 coding->detector = detect_coding_charset;
5756 coding->decoder = decode_coding_charset;
5757 coding->encoder = encode_coding_charset;
5758 coding->common_flags
5759 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5760 }
5761 else if (EQ (coding_type, Qutf_8))
5762 {
5763 val = AREF (attrs, coding_attr_utf_bom);
5764 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5765 : EQ (val, Qt) ? utf_with_bom
5766 : utf_without_bom);
5767 coding->detector = detect_coding_utf_8;
5768 coding->decoder = decode_coding_utf_8;
5769 coding->encoder = encode_coding_utf_8;
5770 coding->common_flags
5771 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5772 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5773 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5774 }
5775 else if (EQ (coding_type, Qutf_16))
5776 {
5777 val = AREF (attrs, coding_attr_utf_bom);
5778 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5779 : EQ (val, Qt) ? utf_with_bom
5780 : utf_without_bom);
5781 val = AREF (attrs, coding_attr_utf_16_endian);
5782 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5783 : utf_16_little_endian);
5784 CODING_UTF_16_SURROGATE (coding) = 0;
5785 coding->detector = detect_coding_utf_16;
5786 coding->decoder = decode_coding_utf_16;
5787 coding->encoder = encode_coding_utf_16;
5788 coding->common_flags
5789 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5790 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5791 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5792 }
5793 else if (EQ (coding_type, Qccl))
5794 {
5795 coding->detector = detect_coding_ccl;
5796 coding->decoder = decode_coding_ccl;
5797 coding->encoder = encode_coding_ccl;
5798 coding->common_flags
5799 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5800 | CODING_REQUIRE_FLUSHING_MASK);
5801 }
5802 else if (EQ (coding_type, Qemacs_mule))
5803 {
5804 coding->detector = detect_coding_emacs_mule;
5805 coding->decoder = decode_coding_emacs_mule;
5806 coding->encoder = encode_coding_emacs_mule;
5807 coding->common_flags
5808 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5809 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5810 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5811 {
5812 Lisp_Object tail, safe_charsets;
5813 int max_charset_id = 0;
5814
5815 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5816 tail = XCDR (tail))
5817 if (max_charset_id < XFIXNAT (XCAR (tail)))
5818 max_charset_id = XFIXNAT (XCAR (tail));
5819 safe_charsets = make_uninit_string (max_charset_id + 1);
5820 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5821 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5822 tail = XCDR (tail))
5823 SSET (safe_charsets, XFIXNAT (XCAR (tail)), 0);
5824 coding->max_charset_id = max_charset_id;
5825 coding->safe_charsets = SDATA (safe_charsets);
5826 }
5827 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5828 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5829 }
5830 else if (EQ (coding_type, Qshift_jis))
5831 {
5832 coding->detector = detect_coding_sjis;
5833 coding->decoder = decode_coding_sjis;
5834 coding->encoder = encode_coding_sjis;
5835 coding->common_flags
5836 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5837 }
5838 else if (EQ (coding_type, Qbig5))
5839 {
5840 coding->detector = detect_coding_big5;
5841 coding->decoder = decode_coding_big5;
5842 coding->encoder = encode_coding_big5;
5843 coding->common_flags
5844 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5845 }
5846 else
5847 {
5848 coding->detector = NULL;
5849 coding->decoder = decode_coding_raw_text;
5850 coding->encoder = encode_coding_raw_text;
5851 if (! EQ (eol_type, Qunix))
5852 {
5853 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5854 if (! VECTORP (eol_type))
5855 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5856 }
5857
5858 }
5859
5860 return;
5861 }
5862
5863
5864
5865 Lisp_Object
5866 coding_charset_list (struct coding_system *coding)
5867 {
5868 Lisp_Object attrs, charset_list;
5869
5870 CODING_GET_INFO (coding, attrs, charset_list);
5871 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5872 {
5873 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5874
5875 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5876 charset_list = Viso_2022_charset_list;
5877 }
5878 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5879 {
5880 charset_list = Vemacs_mule_charset_list;
5881 }
5882 return charset_list;
5883 }
5884
5885
5886
5887
5888 Lisp_Object
5889 coding_system_charset_list (Lisp_Object coding_system)
5890 {
5891 ptrdiff_t id;
5892 Lisp_Object attrs, charset_list;
5893
5894 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5895 attrs = CODING_ID_ATTRS (id);
5896
5897 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5898 {
5899 int flags = XFIXNUM (AREF (attrs, coding_attr_iso_flags));
5900
5901 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5902 charset_list = Viso_2022_charset_list;
5903 else
5904 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5905 }
5906 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5907 {
5908 charset_list = Vemacs_mule_charset_list;
5909 }
5910 else
5911 {
5912 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5913 }
5914 return charset_list;
5915 }
5916
5917
5918
5919
5920
5921 Lisp_Object
5922 raw_text_coding_system (Lisp_Object coding_system)
5923 {
5924 Lisp_Object spec, attrs;
5925 Lisp_Object eol_type, raw_text_eol_type;
5926
5927 if (NILP (coding_system))
5928 return Qraw_text;
5929 spec = CODING_SYSTEM_SPEC (coding_system);
5930 attrs = AREF (spec, 0);
5931
5932 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5933 return coding_system;
5934
5935 eol_type = AREF (spec, 2);
5936 if (VECTORP (eol_type))
5937 return Qraw_text;
5938 spec = CODING_SYSTEM_SPEC (Qraw_text);
5939 raw_text_eol_type = AREF (spec, 2);
5940 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5941 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5942 : AREF (raw_text_eol_type, 2));
5943 }
5944
5945
5946
5947 bool
5948 raw_text_coding_system_p (struct coding_system *coding)
5949 {
5950 return (coding->decoder == decode_coding_raw_text
5951 && coding->encoder == encode_coding_raw_text) ? true : false;
5952 }
5953
5954
5955
5956
5957
5958
5959 Lisp_Object
5960 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
5961 {
5962 Lisp_Object spec, eol_type;
5963
5964 if (NILP (coding_system))
5965 coding_system = Qraw_text;
5966 else
5967 CHECK_CODING_SYSTEM (coding_system);
5968 spec = CODING_SYSTEM_SPEC (coding_system);
5969 eol_type = AREF (spec, 2);
5970 if (VECTORP (eol_type))
5971 {
5972
5973
5974
5975
5976 Lisp_Object system_eol_type = Qunix;
5977 #ifdef DOS_NT
5978 system_eol_type = Qdos;
5979 #endif
5980
5981 Lisp_Object parent_eol_type = system_eol_type;
5982 if (! NILP (parent))
5983 {
5984 CHECK_CODING_SYSTEM (parent);
5985 Lisp_Object parent_spec = CODING_SYSTEM_SPEC (parent);
5986 Lisp_Object pspec_type = AREF (parent_spec, 2);
5987 if (!VECTORP (pspec_type))
5988 parent_eol_type = pspec_type;
5989 }
5990 if (EQ (parent_eol_type, Qunix))
5991 coding_system = AREF (eol_type, 0);
5992 else if (EQ (parent_eol_type, Qdos))
5993 coding_system = AREF (eol_type, 1);
5994 else if (EQ (parent_eol_type, Qmac))
5995 coding_system = AREF (eol_type, 2);
5996 }
5997 return coding_system;
5998 }
5999
6000
6001
6002
6003
6004
6005 Lisp_Object
6006 complement_process_encoding_system (Lisp_Object coding_system)
6007 {
6008 Lisp_Object coding_base = Qnil, eol_base = Qnil;
6009 Lisp_Object spec, attrs;
6010 int i;
6011
6012 for (i = 0; i < 3; i++)
6013 {
6014 if (i == 1)
6015 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6016 else if (i == 2)
6017 coding_system = preferred_coding_system ();
6018 spec = CODING_SYSTEM_SPEC (coding_system);
6019 if (NILP (spec))
6020 continue;
6021 attrs = AREF (spec, 0);
6022 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6023 coding_base = CODING_ATTR_BASE_NAME (attrs);
6024 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6025 eol_base = coding_system;
6026 if (! NILP (coding_base) && ! NILP (eol_base))
6027 break;
6028 }
6029
6030 if (i > 0)
6031
6032
6033
6034 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6035 return coding_system;
6036 }
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148 static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
6149 int eol_seen);
6150
6151
6152
6153
6154
6155
6156
6157
6158 static ptrdiff_t
6159 check_ascii (struct coding_system *coding)
6160 {
6161 const unsigned char *src, *end;
6162 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6163 int eol_seen = coding->eol_seen;
6164
6165 coding_set_source (coding);
6166 src = coding->source;
6167 end = src + coding->src_bytes;
6168
6169 if (inhibit_eol_conversion
6170 || SYMBOLP (eol_type))
6171 {
6172
6173 while (src < end && !( *src & 0x80))
6174 {
6175 if (*src++ == '\n')
6176 eol_seen |= EOL_SEEN_LF;
6177 }
6178 }
6179 else
6180 {
6181 end--;
6182 while (src < end)
6183 {
6184 int c = *src;
6185
6186 if (c & 0x80)
6187 break;
6188 src++;
6189 if (c == '\r')
6190 {
6191 if (*src == '\n')
6192 {
6193 eol_seen |= EOL_SEEN_CRLF;
6194 src++;
6195 }
6196 else
6197 eol_seen |= EOL_SEEN_CR;
6198 }
6199 else if (c == '\n')
6200 eol_seen |= EOL_SEEN_LF;
6201 }
6202 if (src == end)
6203 {
6204 int c = *src;
6205
6206
6207 if (! (c & 0x80))
6208 {
6209 if (c == '\r')
6210 eol_seen |= EOL_SEEN_CR;
6211 else if (c == '\n')
6212 eol_seen |= EOL_SEEN_LF;
6213 src++;
6214 }
6215 }
6216 }
6217 coding->head_ascii = src - coding->source;
6218 coding->eol_seen = eol_seen;
6219 return (coding->head_ascii);
6220 }
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230 static ptrdiff_t
6231 check_utf_8 (struct coding_system *coding)
6232 {
6233 const unsigned char *src, *end;
6234 int eol_seen;
6235 ptrdiff_t nchars = coding->head_ascii;
6236
6237 if (coding->head_ascii < 0)
6238 check_ascii (coding);
6239 else
6240 coding_set_source (coding);
6241 src = coding->source + coding->head_ascii;
6242
6243 end = coding->source + coding->src_bytes - 1;
6244 eol_seen = coding->eol_seen;
6245 while (src < end)
6246 {
6247 int c = *src;
6248
6249 if (UTF_8_1_OCTET_P (*src))
6250 {
6251 src++;
6252 if (c < 0x20)
6253 {
6254 if (c == '\r')
6255 {
6256 if (*src == '\n')
6257 {
6258 eol_seen |= EOL_SEEN_CRLF;
6259 src++;
6260 nchars++;
6261 }
6262 else
6263 eol_seen |= EOL_SEEN_CR;
6264 }
6265 else if (c == '\n')
6266 eol_seen |= EOL_SEEN_LF;
6267 }
6268 }
6269 else if (UTF_8_2_OCTET_LEADING_P (c))
6270 {
6271 if (c < 0xC2
6272 || src + 1 >= end
6273 || ! UTF_8_EXTRA_OCTET_P (src[1]))
6274 return -1;
6275 src += 2;
6276 }
6277 else if (UTF_8_3_OCTET_LEADING_P (c))
6278 {
6279 if (src + 2 >= end
6280 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6281 && UTF_8_EXTRA_OCTET_P (src[2])))
6282 return -1;
6283 c = (((c & 0xF) << 12)
6284 | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
6285 if (c < 0x800
6286 || (c >= 0xd800 && c < 0xe000))
6287 return -1;
6288 src += 3;
6289 }
6290 else if (UTF_8_4_OCTET_LEADING_P (c))
6291 {
6292 if (src + 3 >= end
6293 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6294 && UTF_8_EXTRA_OCTET_P (src[2])
6295 && UTF_8_EXTRA_OCTET_P (src[3])))
6296 return -1;
6297 c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
6298 | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
6299 if (c < 0x10000
6300 || c >= 0x110000)
6301 return -1;
6302 src += 4;
6303 }
6304 else
6305 return -1;
6306 nchars++;
6307 }
6308
6309 if (src == end)
6310 {
6311 if (! UTF_8_1_OCTET_P (*src))
6312 return -1;
6313 nchars++;
6314 if (*src == '\r')
6315 eol_seen |= EOL_SEEN_CR;
6316 else if (*src == '\n')
6317 eol_seen |= EOL_SEEN_LF;
6318 }
6319 coding->eol_seen = eol_seen;
6320 return nchars;
6321 }
6322
6323
6324
6325
6326
6327 bool
6328 utf8_string_p (Lisp_Object string)
6329 {
6330 eassert (!STRING_MULTIBYTE (string));
6331 struct coding_system coding;
6332 setup_coding_system (Qutf_8_unix, &coding);
6333
6334 coding.head_ascii = -1;
6335 coding.src_pos = 0;
6336 coding.src_pos_byte = 0;
6337 coding.src_chars = SCHARS (string);
6338 coding.src_bytes = SBYTES (string);
6339 coding.src_object = string;
6340 coding.eol_seen = EOL_SEEN_NONE;
6341 return check_utf_8 (&coding) != -1;
6342 }
6343
6344
6345
6346 Lisp_Object
6347 make_string_from_utf8 (const char *text, ptrdiff_t nbytes)
6348 {
6349 #if 0
6350
6351
6352
6353
6354 ptrdiff_t chars, bytes;
6355 parse_str_as_multibyte ((const unsigned char *) text, nbytes,
6356 &chars, &bytes);
6357
6358
6359 if (chars == nbytes || bytes == nbytes)
6360 return make_specified_string (text, chars, nbytes, true);
6361 else
6362 {
6363 struct coding_system coding;
6364 setup_coding_system (Qutf_8_unix, &coding);
6365 coding.mode |= CODING_MODE_LAST_BLOCK;
6366 coding.source = (const unsigned char *) text;
6367 decode_coding_object (&coding, Qnil, 0, 0, nbytes, nbytes, Qt);
6368 return coding.dst_object;
6369 }
6370 #else
6371 return decode_string_utf_8 (Qnil, text, nbytes, Qnil, false, Qt, Qt);
6372 #endif
6373 }
6374
6375
6376
6377
6378
6379
6380
6381
6382 #define MAX_EOL_CHECK_COUNT 3
6383
6384 static int
6385 detect_eol (const unsigned char *source, ptrdiff_t src_bytes,
6386 enum coding_category category)
6387 {
6388 const unsigned char *src = source, *src_end = src + src_bytes;
6389 unsigned char c;
6390 int total = 0;
6391 int eol_seen = EOL_SEEN_NONE;
6392
6393 if ((1 << category) & CATEGORY_MASK_UTF_16)
6394 {
6395 bool msb = category == (coding_category_utf_16_le
6396 | coding_category_utf_16_le_nosig);
6397 bool lsb = !msb;
6398
6399 while (src + 1 < src_end)
6400 {
6401 c = src[lsb];
6402 if (src[msb] == 0 && (c == '\n' || c == '\r'))
6403 {
6404 int this_eol;
6405
6406 if (c == '\n')
6407 this_eol = EOL_SEEN_LF;
6408 else if (src + 3 >= src_end
6409 || src[msb + 2] != 0
6410 || src[lsb + 2] != '\n')
6411 this_eol = EOL_SEEN_CR;
6412 else
6413 {
6414 this_eol = EOL_SEEN_CRLF;
6415 src += 2;
6416 }
6417
6418 if (eol_seen == EOL_SEEN_NONE)
6419
6420 eol_seen = this_eol;
6421 else if (eol_seen != this_eol)
6422 {
6423
6424
6425 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6426 || (eol_seen == EOL_SEEN_CRLF
6427 && this_eol == EOL_SEEN_CR))
6428 eol_seen = EOL_SEEN_CRLF;
6429 else
6430 {
6431 eol_seen = EOL_SEEN_LF;
6432 break;
6433 }
6434 }
6435 if (++total == MAX_EOL_CHECK_COUNT)
6436 break;
6437 }
6438 src += 2;
6439 }
6440 }
6441 else
6442 while (src < src_end)
6443 {
6444 c = *src++;
6445 if (c == '\n' || c == '\r')
6446 {
6447 int this_eol;
6448
6449 if (c == '\n')
6450 this_eol = EOL_SEEN_LF;
6451 else if (src >= src_end || *src != '\n')
6452 this_eol = EOL_SEEN_CR;
6453 else
6454 this_eol = EOL_SEEN_CRLF, src++;
6455
6456 if (eol_seen == EOL_SEEN_NONE)
6457
6458 eol_seen = this_eol;
6459 else if (eol_seen != this_eol)
6460 {
6461
6462
6463 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6464 || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6465 eol_seen = EOL_SEEN_CRLF;
6466 else
6467 {
6468 eol_seen = EOL_SEEN_LF;
6469 break;
6470 }
6471 }
6472 if (++total == MAX_EOL_CHECK_COUNT)
6473 break;
6474 }
6475 }
6476 return eol_seen;
6477 }
6478
6479
6480 static Lisp_Object
6481 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6482 {
6483 Lisp_Object eol_type;
6484
6485 eol_type = CODING_ID_EOL_TYPE (coding->id);
6486 if (! VECTORP (eol_type))
6487
6488 return eol_type;
6489 if (eol_seen & EOL_SEEN_LF)
6490 {
6491 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6492 eol_type = Qunix;
6493 }
6494 else if (eol_seen & EOL_SEEN_CRLF)
6495 {
6496 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6497 eol_type = Qdos;
6498 }
6499 else if (eol_seen & EOL_SEEN_CR)
6500 {
6501 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6502 eol_type = Qmac;
6503 }
6504 return eol_type;
6505 }
6506
6507
6508
6509
6510
6511 static void
6512 detect_coding (struct coding_system *coding)
6513 {
6514 const unsigned char *src, *src_end;
6515 unsigned int saved_mode = coding->mode;
6516 Lisp_Object found = Qnil;
6517 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6518
6519 coding->consumed = coding->consumed_char = 0;
6520 coding->produced = coding->produced_char = 0;
6521 coding_set_source (coding);
6522
6523 src_end = coding->source + coding->src_bytes;
6524
6525 coding->eol_seen = EOL_SEEN_NONE;
6526
6527
6528 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6529 {
6530 int c, i;
6531 struct coding_detection_info detect_info = {0};
6532 bool null_byte_found = 0, eight_bit_found = 0;
6533 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6534 inhibit_null_byte_detection);
6535 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
6536 inhibit_iso_escape_detection);
6537 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
6538
6539 coding->head_ascii = 0;
6540 for (src = coding->source; src < src_end; src++)
6541 {
6542 c = *src;
6543 if (c & 0x80)
6544 {
6545 eight_bit_found = 1;
6546 if (null_byte_found)
6547 break;
6548 }
6549 else if (c < 0x20)
6550 {
6551 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6552 && ! inhibit_ied
6553 && ! detect_info.checked)
6554 {
6555 if (detect_coding_iso_2022 (coding, &detect_info))
6556 {
6557
6558 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6559 {
6560
6561
6562
6563
6564 src = src_end;
6565 coding->head_ascii = src - coding->source;
6566 }
6567 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6568 break;
6569 }
6570 }
6571 else if (! c && !inhibit_nbd)
6572 {
6573 null_byte_found = 1;
6574 if (eight_bit_found)
6575 break;
6576 }
6577 else if (! disable_ascii_optimization
6578 && ! inhibit_eol_conversion)
6579 {
6580 if (c == '\r')
6581 {
6582 if (src < src_end && src[1] == '\n')
6583 {
6584 coding->eol_seen |= EOL_SEEN_CRLF;
6585 src++;
6586 if (! eight_bit_found)
6587 coding->head_ascii++;
6588 }
6589 else
6590 coding->eol_seen |= EOL_SEEN_CR;
6591 }
6592 else if (c == '\n')
6593 {
6594 coding->eol_seen |= EOL_SEEN_LF;
6595 }
6596 }
6597
6598 if (! eight_bit_found)
6599 coding->head_ascii++;
6600 }
6601 else if (! eight_bit_found)
6602 coding->head_ascii++;
6603 }
6604
6605 if (null_byte_found || eight_bit_found
6606 || coding->head_ascii < coding->src_bytes
6607 || detect_info.found)
6608 {
6609 enum coding_category category;
6610 struct coding_system *this;
6611
6612 if (coding->head_ascii == coding->src_bytes)
6613
6614 for (i = 0; i < coding_category_raw_text; i++)
6615 {
6616 category = coding_priorities[i];
6617 this = coding_categories + category;
6618 if (detect_info.found & (1 << category))
6619 break;
6620 }
6621 else
6622 {
6623 if (null_byte_found)
6624 {
6625 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6626 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6627 }
6628 else if (prefer_utf_8
6629 && detect_coding_utf_8 (coding, &detect_info))
6630 {
6631 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
6632 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
6633 }
6634 for (i = 0; i < coding_category_raw_text; i++)
6635 {
6636 category = coding_priorities[i];
6637 this = coding_categories + category;
6638
6639
6640 coding->id = this->id;
6641 if (this->id < 0)
6642 {
6643
6644 detect_info.rejected |= (1 << category);
6645 }
6646 else if (category >= coding_category_raw_text)
6647 continue;
6648 else if (detect_info.checked & (1 << category))
6649 {
6650 if (detect_info.found & (1 << category))
6651 break;
6652 }
6653 else if ((*(this->detector)) (coding, &detect_info)
6654 && detect_info.found & (1 << category))
6655 break;
6656 }
6657 }
6658
6659 if (i < coding_category_raw_text)
6660 {
6661 if (category == coding_category_utf_8_auto)
6662 {
6663 Lisp_Object coding_systems;
6664
6665 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6666 coding_attr_utf_bom);
6667 if (CONSP (coding_systems))
6668 {
6669 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6670 found = XCAR (coding_systems);
6671 else
6672 found = XCDR (coding_systems);
6673 }
6674 else
6675 found = CODING_ID_NAME (this->id);
6676 }
6677 else if (category == coding_category_utf_16_auto)
6678 {
6679 Lisp_Object coding_systems;
6680
6681 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6682 coding_attr_utf_bom);
6683 if (CONSP (coding_systems))
6684 {
6685 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6686 found = XCAR (coding_systems);
6687 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6688 found = XCDR (coding_systems);
6689 }
6690 else
6691 found = CODING_ID_NAME (this->id);
6692 }
6693 else
6694 found = CODING_ID_NAME (this->id);
6695 }
6696 else if (null_byte_found)
6697 found = Qno_conversion;
6698 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6699 == CATEGORY_MASK_ANY)
6700 found = Qraw_text;
6701 else if (detect_info.rejected)
6702 for (i = 0; i < coding_category_raw_text; i++)
6703 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6704 {
6705 this = coding_categories + coding_priorities[i];
6706 found = CODING_ID_NAME (this->id);
6707 break;
6708 }
6709 }
6710 }
6711 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6712 == coding_category_utf_8_auto)
6713 {
6714 Lisp_Object coding_systems
6715 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6716 if (check_ascii (coding) == coding->src_bytes)
6717 {
6718 if (CONSP (coding_systems))
6719 found = XCDR (coding_systems);
6720 }
6721 else
6722 {
6723 struct coding_detection_info detect_info = {0};
6724 if (CONSP (coding_systems)
6725 && detect_coding_utf_8 (coding, &detect_info))
6726 {
6727 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6728 found = XCAR (coding_systems);
6729 else
6730 found = XCDR (coding_systems);
6731 }
6732 }
6733 }
6734 else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6735 == coding_category_utf_16_auto)
6736 {
6737 Lisp_Object coding_systems
6738 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6739 coding->head_ascii = 0;
6740 if (CONSP (coding_systems))
6741 {
6742 struct coding_detection_info detect_info = {0};
6743 if (detect_coding_utf_16 (coding, &detect_info))
6744 {
6745 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6746 found = XCAR (coding_systems);
6747 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6748 found = XCDR (coding_systems);
6749 }
6750 }
6751 }
6752
6753 if (! NILP (found))
6754 {
6755 int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
6756 : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
6757 : EQ (eol_type, Qmac) ? EOL_SEEN_CR
6758 : EOL_SEEN_LF);
6759
6760 setup_coding_system (found, coding);
6761 if (specified_eol != EOL_SEEN_NONE)
6762 adjust_coding_eol_type (coding, specified_eol);
6763 }
6764
6765 coding->mode = saved_mode;
6766 }
6767
6768
6769 static void
6770 decode_eol (struct coding_system *coding)
6771 {
6772 Lisp_Object eol_type;
6773 unsigned char *p, *pbeg, *pend;
6774
6775 eol_type = CODING_ID_EOL_TYPE (coding->id);
6776 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6777 return;
6778
6779 if (NILP (coding->dst_object))
6780 pbeg = coding->destination;
6781 else
6782 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6783 pend = pbeg + coding->produced;
6784
6785 if (VECTORP (eol_type))
6786 {
6787 int eol_seen = EOL_SEEN_NONE;
6788
6789 for (p = pbeg; p < pend; p++)
6790 {
6791 if (*p == '\n')
6792 eol_seen |= EOL_SEEN_LF;
6793 else if (*p == '\r')
6794 {
6795 if (p + 1 < pend && *(p + 1) == '\n')
6796 {
6797 eol_seen |= EOL_SEEN_CRLF;
6798 p++;
6799 }
6800 else
6801 eol_seen |= EOL_SEEN_CR;
6802 }
6803 }
6804
6805 if ((eol_seen & EOL_SEEN_CRLF) != 0
6806 && (eol_seen & EOL_SEEN_CR) != 0
6807 && (eol_seen & EOL_SEEN_LF) == 0)
6808 eol_seen = EOL_SEEN_CRLF;
6809 else if (eol_seen != EOL_SEEN_NONE
6810 && eol_seen != EOL_SEEN_LF
6811 && eol_seen != EOL_SEEN_CRLF
6812 && eol_seen != EOL_SEEN_CR)
6813 eol_seen = EOL_SEEN_LF;
6814 if (eol_seen != EOL_SEEN_NONE)
6815 eol_type = adjust_coding_eol_type (coding, eol_seen);
6816 }
6817
6818 if (EQ (eol_type, Qmac))
6819 {
6820 for (p = pbeg; p < pend; p++)
6821 if (*p == '\r')
6822 *p = '\n';
6823 }
6824 else if (EQ (eol_type, Qdos))
6825 {
6826 ptrdiff_t n = 0;
6827 ptrdiff_t pos = coding->dst_pos;
6828 ptrdiff_t pos_byte = coding->dst_pos_byte;
6829 ptrdiff_t pos_end = pos_byte + coding->produced - 1;
6830
6831
6832
6833
6834 eassert (!NILP (coding->dst_object));
6835
6836 while (pos_byte < pos_end)
6837 {
6838 int incr;
6839
6840 p = BYTE_POS_ADDR (pos_byte);
6841 if (coding->dst_multibyte)
6842 incr = BYTES_BY_CHAR_HEAD (*p);
6843 else
6844 incr = 1;
6845
6846 if (*p == '\r' && p[1] == '\n')
6847 {
6848 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6849 n++;
6850 pos_end--;
6851 }
6852 pos++;
6853 pos_byte += incr;
6854 }
6855 coding->produced -= n;
6856 coding->produced_char -= n;
6857 }
6858 }
6859
6860
6861
6862
6863
6864 enum { MAX_LOOKUP_MAX = min (INT_MAX, MAX_ALLOCA / sizeof (int)) };
6865
6866
6867
6868
6869
6870 static Lisp_Object
6871 get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
6872 {
6873 Lisp_Object standard, translation_table;
6874 Lisp_Object val;
6875
6876 if (NILP (Venable_character_translation))
6877 {
6878 if (max_lookup)
6879 *max_lookup = 0;
6880 return Qnil;
6881 }
6882 if (encodep)
6883 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6884 standard = Vstandard_translation_table_for_encode;
6885 else
6886 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6887 standard = Vstandard_translation_table_for_decode;
6888 if (NILP (translation_table))
6889 translation_table = standard;
6890 else
6891 {
6892 if (SYMBOLP (translation_table))
6893 translation_table = Fget (translation_table, Qtranslation_table);
6894 else if (CONSP (translation_table))
6895 {
6896 translation_table = Fcopy_sequence (translation_table);
6897 for (val = translation_table; CONSP (val); val = XCDR (val))
6898 if (SYMBOLP (XCAR (val)))
6899 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6900 }
6901 if (CHAR_TABLE_P (standard))
6902 {
6903 if (CONSP (translation_table))
6904 translation_table = nconc2 (translation_table, list1 (standard));
6905 else
6906 translation_table = list2 (translation_table, standard);
6907 }
6908 }
6909
6910 if (max_lookup)
6911 {
6912 *max_lookup = 1;
6913 if (CHAR_TABLE_P (translation_table)
6914 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6915 {
6916 val = XCHAR_TABLE (translation_table)->extras[1];
6917 if (FIXNATP (val) && *max_lookup < XFIXNAT (val))
6918 *max_lookup = min (XFIXNAT (val), MAX_LOOKUP_MAX);
6919 }
6920 else if (CONSP (translation_table))
6921 {
6922 Lisp_Object tail;
6923
6924 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6925 if (CHAR_TABLE_P (XCAR (tail))
6926 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6927 {
6928 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6929 if (FIXNATP (tailval) && *max_lookup < XFIXNAT (tailval))
6930 *max_lookup = min (XFIXNAT (tailval), MAX_LOOKUP_MAX);
6931 }
6932 }
6933 }
6934 return translation_table;
6935 }
6936
6937 #define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6938 do { \
6939 trans = Qnil; \
6940 if (CHAR_TABLE_P (table)) \
6941 { \
6942 trans = CHAR_TABLE_REF (table, c); \
6943 if (CHARACTERP (trans)) \
6944 c = XFIXNAT (trans), trans = Qnil; \
6945 } \
6946 else if (CONSP (table)) \
6947 { \
6948 Lisp_Object tail; \
6949 \
6950 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6951 if (CHAR_TABLE_P (XCAR (tail))) \
6952 { \
6953 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6954 if (CHARACTERP (trans)) \
6955 c = XFIXNAT (trans), trans = Qnil; \
6956 else if (! NILP (trans)) \
6957 break; \
6958 } \
6959 } \
6960 } while (0)
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971 static Lisp_Object
6972 get_translation (Lisp_Object trans, int *buf, int *buf_end, ptrdiff_t *nchars)
6973 {
6974 if (FIXNUMP (trans) || VECTORP (trans))
6975 {
6976 *nchars = 1;
6977 return trans;
6978 }
6979 for (; CONSP (trans); trans = XCDR (trans))
6980 {
6981 Lisp_Object val = XCAR (trans);
6982 Lisp_Object from = XCAR (val);
6983 ptrdiff_t len = ASIZE (from);
6984 ptrdiff_t i;
6985
6986 for (i = 0; i < len; i++)
6987 {
6988 if (buf + i == buf_end)
6989 return Qt;
6990 if (XFIXNUM (AREF (from, i)) != buf[i])
6991 break;
6992 }
6993 if (i == len)
6994 {
6995 *nchars = len;
6996 return XCDR (val);
6997 }
6998 }
6999 return Qnil;
7000 }
7001
7002
7003 static int
7004 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
7005 bool last_block)
7006 {
7007 unsigned char *dst = coding->destination + coding->produced;
7008 unsigned char *dst_end = coding->destination + coding->dst_bytes;
7009 ptrdiff_t produced;
7010 ptrdiff_t produced_chars = 0;
7011 int carryover = 0;
7012
7013 if (! coding->chars_at_source)
7014 {
7015
7016 int *buf = coding->charbuf;
7017 int *buf_end = buf + coding->charbuf_used;
7018
7019 if (EQ (coding->src_object, coding->dst_object)
7020 && ! NILP (coding->dst_object))
7021 {
7022 eassert (growable_destination (coding));
7023 coding_set_source (coding);
7024 dst_end = ((unsigned char *) coding->source) + coding->consumed;
7025 }
7026
7027 while (buf < buf_end)
7028 {
7029 int c = *buf;
7030 ptrdiff_t i;
7031
7032 if (c >= 0)
7033 {
7034 ptrdiff_t from_nchars = 1, to_nchars = 1;
7035 Lisp_Object trans = Qnil;
7036
7037 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7038 if (! NILP (trans))
7039 {
7040 trans = get_translation (trans, buf, buf_end, &from_nchars);
7041 if (FIXNUMP (trans))
7042 c = XFIXNUM (trans);
7043 else if (VECTORP (trans))
7044 {
7045 to_nchars = ASIZE (trans);
7046 c = XFIXNUM (AREF (trans, 0));
7047 }
7048 else if (EQ (trans, Qt) && ! last_block)
7049 break;
7050 }
7051
7052 if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
7053 {
7054 eassert (growable_destination (coding));
7055 ptrdiff_t dst_size;
7056 if (INT_MULTIPLY_WRAPV (to_nchars, MAX_MULTIBYTE_LENGTH,
7057 &dst_size)
7058 || INT_ADD_WRAPV (buf_end - buf, dst_size, &dst_size))
7059 memory_full (SIZE_MAX);
7060 dst = alloc_destination (coding, dst_size, dst);
7061 if (EQ (coding->src_object, coding->dst_object))
7062 {
7063 coding_set_source (coding);
7064 dst_end = (((unsigned char *) coding->source)
7065 + coding->consumed);
7066 }
7067 else
7068 dst_end = coding->destination + coding->dst_bytes;
7069 }
7070
7071 for (i = 0; i < to_nchars; i++)
7072 {
7073 if (i > 0)
7074 c = XFIXNUM (AREF (trans, i));
7075 if (coding->dst_multibyte
7076 || ! CHAR_BYTE8_P (c))
7077 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
7078 else
7079 *dst++ = CHAR_TO_BYTE8 (c);
7080 }
7081 produced_chars += to_nchars;
7082 buf += from_nchars;
7083 }
7084 else
7085
7086 buf += -c;
7087 }
7088 carryover = buf_end - buf;
7089 }
7090 else
7091 {
7092
7093 const unsigned char *src = coding->source;
7094 const unsigned char *src_end = src + coding->consumed;
7095
7096 if (EQ (coding->dst_object, coding->src_object))
7097 {
7098 eassert (growable_destination (coding));
7099 dst_end = (unsigned char *) src;
7100 }
7101 if (coding->src_multibyte != coding->dst_multibyte)
7102 {
7103 if (coding->src_multibyte)
7104 {
7105 bool multibytep = 1;
7106 ptrdiff_t consumed_chars = 0;
7107
7108 while (1)
7109 {
7110 const unsigned char *src_base = src;
7111 int c;
7112
7113 ONE_MORE_BYTE (c);
7114 if (dst == dst_end)
7115 {
7116 eassert (growable_destination (coding));
7117 if (EQ (coding->src_object, coding->dst_object))
7118 dst_end = (unsigned char *) src;
7119 if (dst == dst_end)
7120 {
7121 ptrdiff_t offset = src - coding->source;
7122
7123 dst = alloc_destination (coding, src_end - src + 1,
7124 dst);
7125 dst_end = coding->destination + coding->dst_bytes;
7126 coding_set_source (coding);
7127 src = coding->source + offset;
7128 src_end = coding->source + coding->consumed;
7129 if (EQ (coding->src_object, coding->dst_object))
7130 dst_end = (unsigned char *) src;
7131 }
7132 }
7133 *dst++ = c;
7134 produced_chars++;
7135 }
7136 no_more_source:
7137 ;
7138 }
7139 else
7140 while (src < src_end)
7141 {
7142 bool multibytep = 1;
7143 int c = *src++;
7144
7145 if (dst >= dst_end - 1)
7146 {
7147 eassert (growable_destination (coding));
7148 if (EQ (coding->src_object, coding->dst_object))
7149 dst_end = (unsigned char *) src;
7150 if (dst >= dst_end - 1)
7151 {
7152 ptrdiff_t offset = src - coding->source;
7153 ptrdiff_t more_bytes;
7154
7155 if (EQ (coding->src_object, coding->dst_object))
7156 more_bytes = ((src_end - src) / 2) + 2;
7157 else
7158 more_bytes = src_end - src + 2;
7159 dst = alloc_destination (coding, more_bytes, dst);
7160 dst_end = coding->destination + coding->dst_bytes;
7161 coding_set_source (coding);
7162 src = coding->source + offset;
7163 src_end = coding->source + coding->consumed;
7164 if (EQ (coding->src_object, coding->dst_object))
7165 dst_end = (unsigned char *) src;
7166 }
7167 }
7168 EMIT_ONE_BYTE (c);
7169 }
7170 }
7171 else
7172 {
7173 if (!EQ (coding->src_object, coding->dst_object))
7174 {
7175 ptrdiff_t require = coding->src_bytes - coding->dst_bytes;
7176
7177 if (require > 0)
7178 {
7179 ptrdiff_t offset = src - coding->source;
7180
7181 dst = alloc_destination (coding, require, dst);
7182 coding_set_source (coding);
7183 src = coding->source + offset;
7184 src_end = coding->source + coding->consumed;
7185 }
7186 }
7187 produced_chars = coding->consumed_char;
7188 while (src < src_end)
7189 *dst++ = *src++;
7190 }
7191 }
7192
7193 produced = dst - (coding->destination + coding->produced);
7194 if (BUFFERP (coding->dst_object) && produced_chars > 0)
7195 insert_from_gap (produced_chars, produced, 0);
7196 coding->produced += produced;
7197 coding->produced_char += produced_chars;
7198 return carryover;
7199 }
7200
7201
7202
7203
7204
7205
7206 static void
7207 produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
7208 {
7209 int len;
7210 ptrdiff_t to;
7211 enum composition_method method;
7212 Lisp_Object components;
7213
7214 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
7215 to = pos + charbuf[2];
7216 method = (enum composition_method) (charbuf[4]);
7217
7218 if (method == COMPOSITION_RELATIVE)
7219 components = Qnil;
7220 else
7221 {
7222 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
7223 int i, j;
7224
7225 if (method == COMPOSITION_WITH_RULE)
7226 len = charbuf[2] * 3 - 2;
7227 charbuf += MAX_ANNOTATION_LENGTH;
7228
7229 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
7230 {
7231 if (charbuf[i] >= 0)
7232 args[j] = make_fixnum (charbuf[i]);
7233 else
7234 {
7235 i++;
7236 args[j] = make_fixnum (charbuf[i] % 0x100);
7237 }
7238 }
7239 components = (i == j ? Fstring (j, args) : Fvector (j, args));
7240 }
7241 compose_text (pos, to, components, Qnil, coding->dst_object);
7242 }
7243
7244
7245
7246
7247
7248
7249
7250 static void
7251 produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
7252 {
7253 ptrdiff_t from = pos - charbuf[2];
7254 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7255
7256 Fput_text_property (make_fixnum (from), make_fixnum (pos),
7257 Qcharset, CHARSET_NAME (charset),
7258 coding->dst_object);
7259 }
7260
7261 #define MAX_CHARBUF_SIZE 0x4000
7262
7263
7264
7265 #define MAX_CHARBUF_EXTRA_SIZE ((MAX_ANNOTATION_LENGTH * 3) + 1)
7266
7267 #define ALLOC_CONVERSION_WORK_AREA(coding, size) \
7268 do { \
7269 ptrdiff_t units = min ((size) + MAX_CHARBUF_EXTRA_SIZE, \
7270 MAX_CHARBUF_SIZE); \
7271 coding->charbuf = SAFE_ALLOCA (units * sizeof (int)); \
7272 coding->charbuf_size = units; \
7273 } while (0)
7274
7275 static void
7276 produce_annotation (struct coding_system *coding, ptrdiff_t pos)
7277 {
7278 int *charbuf = coding->charbuf;
7279 int *charbuf_end = charbuf + coding->charbuf_used;
7280
7281 if (NILP (coding->dst_object))
7282 return;
7283
7284 while (charbuf < charbuf_end)
7285 {
7286 if (*charbuf >= 0)
7287 pos++, charbuf++;
7288 else
7289 {
7290 int len = -*charbuf;
7291
7292 if (len > 2)
7293 switch (charbuf[1])
7294 {
7295 case CODING_ANNOTATE_COMPOSITION_MASK:
7296 produce_composition (coding, charbuf, pos);
7297 break;
7298 case CODING_ANNOTATE_CHARSET_MASK:
7299 produce_charset (coding, charbuf, pos);
7300 break;
7301 default:
7302 break;
7303 }
7304 charbuf += len;
7305 }
7306 }
7307 }
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336 static void
7337 decode_coding (struct coding_system *coding)
7338 {
7339 Lisp_Object attrs;
7340 Lisp_Object undo_list;
7341 Lisp_Object translation_table;
7342 struct ccl_spec cclspec;
7343 int carryover;
7344 int i;
7345
7346 USE_SAFE_ALLOCA;
7347
7348 if (BUFFERP (coding->src_object)
7349 && coding->src_pos > 0
7350 && coding->src_pos < GPT
7351 && coding->src_pos + coding->src_chars > GPT)
7352 move_gap_both (coding->src_pos, coding->src_pos_byte);
7353
7354 undo_list = Qt;
7355 if (BUFFERP (coding->dst_object))
7356 {
7357 set_buffer_internal (XBUFFER (coding->dst_object));
7358 if (GPT != PT)
7359 move_gap_both (PT, PT_BYTE);
7360
7361
7362
7363
7364
7365
7366 if (MODIFF <= SAVE_MODIFF)
7367 record_first_change ();
7368
7369 undo_list = BVAR (current_buffer, undo_list);
7370 bset_undo_list (current_buffer, Qt);
7371 }
7372
7373 coding->consumed = coding->consumed_char = 0;
7374 coding->produced = coding->produced_char = 0;
7375 coding->chars_at_source = 0;
7376 record_conversion_result (coding, CODING_RESULT_SUCCESS);
7377
7378 ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
7379
7380 attrs = CODING_ID_ATTRS (coding->id);
7381 translation_table = get_translation_table (attrs, 0, NULL);
7382
7383 carryover = 0;
7384 if (coding->decoder == decode_coding_ccl)
7385 {
7386 coding->spec.ccl = &cclspec;
7387 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7388 }
7389 do
7390 {
7391 ptrdiff_t pos = coding->dst_pos + coding->produced_char;
7392
7393 coding_set_source (coding);
7394 coding->annotated = 0;
7395 coding->charbuf_used = carryover;
7396 (*(coding->decoder)) (coding);
7397 coding_set_destination (coding);
7398 carryover = produce_chars (coding, translation_table, 0);
7399 if (coding->annotated)
7400 produce_annotation (coding, pos);
7401 for (i = 0; i < carryover; i++)
7402 coding->charbuf[i]
7403 = coding->charbuf[coding->charbuf_used - carryover + i];
7404 }
7405 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7406 || (coding->consumed < coding->src_bytes
7407 && (coding->result == CODING_RESULT_SUCCESS
7408 || coding->result == CODING_RESULT_INVALID_SRC)));
7409
7410 if (carryover > 0)
7411 {
7412 coding_set_destination (coding);
7413 coding->charbuf_used = carryover;
7414 produce_chars (coding, translation_table, 1);
7415 }
7416
7417 coding->carryover_bytes = 0;
7418 if (coding->consumed < coding->src_bytes)
7419 {
7420 ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
7421 const unsigned char *src;
7422
7423 coding_set_source (coding);
7424 coding_set_destination (coding);
7425 src = coding->source + coding->consumed;
7426
7427 if (coding->mode & CODING_MODE_LAST_BLOCK)
7428 {
7429
7430
7431
7432 coding->charbuf_used = 0;
7433 coding->chars_at_source = 0;
7434
7435 while (nbytes-- > 0)
7436 {
7437 int c;
7438
7439
7440
7441 if (coding->src_multibyte
7442 && CHAR_BYTE8_HEAD_P (*src) && nbytes > 0)
7443 {
7444 c = string_char_advance (&src);
7445 nbytes--;
7446 }
7447 else
7448 {
7449 c = *src++;
7450
7451 if (c & 0x80)
7452 c = BYTE8_TO_CHAR (c);
7453 }
7454 coding->charbuf[coding->charbuf_used++] = c;
7455 }
7456 produce_chars (coding, Qnil, 1);
7457 }
7458 else
7459 {
7460
7461
7462
7463 unsigned char *p = coding->carryover;
7464
7465 if (nbytes > sizeof coding->carryover)
7466 nbytes = sizeof coding->carryover;
7467 coding->carryover_bytes = nbytes;
7468 while (nbytes-- > 0)
7469 *p++ = *src++;
7470 }
7471 coding->consumed = coding->src_bytes;
7472 }
7473
7474 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7475 && !inhibit_eol_conversion)
7476 decode_eol (coding);
7477 if (BUFFERP (coding->dst_object))
7478 {
7479 bset_undo_list (current_buffer, undo_list);
7480 record_insert (coding->dst_pos, coding->produced_char);
7481 }
7482
7483 SAFE_FREE ();
7484 }
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497 static int *
7498 handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7499 struct coding_system *coding, int *buf,
7500 ptrdiff_t *stop)
7501 {
7502 ptrdiff_t start, end;
7503 Lisp_Object prop;
7504
7505 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7506 || end > limit)
7507 *stop = limit;
7508 else if (start > pos)
7509 *stop = start;
7510 else
7511 {
7512 if (start == pos)
7513 {
7514
7515
7516 int *head = buf;
7517 enum composition_method method = composition_method (prop);
7518 int nchars = COMPOSITION_LENGTH (prop);
7519
7520 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7521 if (method != COMPOSITION_RELATIVE)
7522 {
7523 Lisp_Object components;
7524 ptrdiff_t i, len, i_byte;
7525
7526 components = COMPOSITION_COMPONENTS (prop);
7527 if (VECTORP (components))
7528 {
7529 len = ASIZE (components);
7530 for (i = 0; i < len; i++)
7531 *buf++ = XFIXNUM (AREF (components, i));
7532 }
7533 else if (STRINGP (components))
7534 {
7535 len = SCHARS (components);
7536 i = i_byte = 0;
7537 while (i < len)
7538 *buf++ = fetch_string_char_advance (components,
7539 &i, &i_byte);
7540 }
7541 else if (FIXNUMP (components))
7542 {
7543 len = 1;
7544 *buf++ = XFIXNUM (components);
7545 }
7546 else if (CONSP (components))
7547 {
7548 for (len = 0; CONSP (components);
7549 len++, components = XCDR (components))
7550 *buf++ = XFIXNUM (XCAR (components));
7551 }
7552 else
7553 emacs_abort ();
7554 *head -= len;
7555 }
7556 }
7557
7558 if (find_composition (end, limit, &start, &end, &prop,
7559 coding->src_object)
7560 && end <= limit)
7561 *stop = start;
7562 else
7563 *stop = limit;
7564 }
7565 return buf;
7566 }
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578 static int *
7579 handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
7580 struct coding_system *coding, int *buf,
7581 ptrdiff_t *stop)
7582 {
7583 Lisp_Object val, next;
7584 int id;
7585
7586 val = Fget_text_property (make_fixnum (pos), Qcharset, coding->src_object);
7587 if (! NILP (val) && CHARSETP (val))
7588 id = XFIXNUM (CHARSET_SYMBOL_ID (val));
7589 else
7590 id = -1;
7591 ADD_CHARSET_DATA (buf, 0, id);
7592 next = Fnext_single_property_change (make_fixnum (pos), Qcharset,
7593 coding->src_object,
7594 make_fixnum (limit));
7595 *stop = XFIXNUM (next);
7596 return buf;
7597 }
7598
7599
7600 static void
7601 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7602 int max_lookup)
7603 {
7604 int *buf = coding->charbuf;
7605 int *buf_end = coding->charbuf + coding->charbuf_size;
7606 const unsigned char *src = coding->source + coding->consumed;
7607 const unsigned char *src_end = coding->source + coding->src_bytes;
7608 ptrdiff_t pos = coding->src_pos + coding->consumed_char;
7609 ptrdiff_t end_pos = coding->src_pos + coding->src_chars;
7610 bool multibytep = coding->src_multibyte;
7611 Lisp_Object eol_type;
7612 int c;
7613 ptrdiff_t stop, stop_composition, stop_charset;
7614 int *lookup_buf = NULL;
7615
7616 if (! NILP (translation_table))
7617 lookup_buf = alloca (sizeof (int) * max_lookup);
7618
7619 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7620 if (VECTORP (eol_type))
7621 eol_type = Qunix;
7622
7623
7624 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7625
7626 if (NILP (coding->src_object))
7627 stop = stop_composition = stop_charset = end_pos;
7628 else
7629 {
7630 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7631 stop = stop_composition = pos;
7632 else
7633 stop = stop_composition = end_pos;
7634 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7635 stop = stop_charset = pos;
7636 else
7637 stop_charset = end_pos;
7638 }
7639
7640
7641 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7642 while (buf < buf_end)
7643 {
7644 Lisp_Object trans;
7645
7646 if (pos == stop)
7647 {
7648 if (pos == end_pos)
7649 break;
7650 if (pos == stop_composition)
7651 buf = handle_composition_annotation (pos, end_pos, coding,
7652 buf, &stop_composition);
7653 if (pos == stop_charset)
7654 buf = handle_charset_annotation (pos, end_pos, coding,
7655 buf, &stop_charset);
7656 stop = (stop_composition < stop_charset
7657 ? stop_composition : stop_charset);
7658 }
7659
7660 if (! multibytep)
7661 {
7662 if (coding->encoder == encode_coding_raw_text
7663 || coding->encoder == encode_coding_ccl)
7664 c = *src++, pos++;
7665 else
7666 {
7667 int bytes = multibyte_length (src, src_end, true, true);
7668 if (0 < bytes)
7669 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7670 else
7671 c = BYTE8_TO_CHAR (*src), src++, pos++;
7672 }
7673 }
7674 else
7675 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7676 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7677 c = '\n';
7678 if (! EQ (eol_type, Qunix))
7679 {
7680 if (c == '\n')
7681 {
7682 if (EQ (eol_type, Qdos))
7683 *buf++ = '\r';
7684 else
7685 c = '\r';
7686 }
7687 }
7688
7689 trans = Qnil;
7690 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7691 if (NILP (trans))
7692 *buf++ = c;
7693 else
7694 {
7695 ptrdiff_t from_nchars = 1, to_nchars = 1;
7696 int *lookup_buf_end;
7697 const unsigned char *p = src;
7698 int i;
7699
7700 lookup_buf[0] = c;
7701 for (i = 1; i < max_lookup && p < src_end; i++)
7702 lookup_buf[i] = string_char_advance (&p);
7703 lookup_buf_end = lookup_buf + i;
7704 trans = get_translation (trans, lookup_buf, lookup_buf_end,
7705 &from_nchars);
7706 if (FIXNUMP (trans))
7707 c = XFIXNUM (trans);
7708 else if (VECTORP (trans))
7709 {
7710 to_nchars = ASIZE (trans);
7711 if (buf_end - buf < to_nchars)
7712 break;
7713 c = XFIXNUM (AREF (trans, 0));
7714 }
7715 else
7716 break;
7717 *buf++ = c;
7718 for (i = 1; i < to_nchars; i++)
7719 *buf++ = XFIXNUM (AREF (trans, i));
7720 for (i = 1; i < from_nchars; i++, pos++)
7721 src += multibyte_length (src, NULL, false, true);
7722 }
7723 }
7724
7725 coding->consumed = src - coding->source;
7726 coding->consumed_char = pos - coding->src_pos;
7727 coding->charbuf_used = buf - coding->charbuf;
7728 coding->chars_at_source = 0;
7729 }
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753 static void
7754 encode_coding (struct coding_system *coding)
7755 {
7756 Lisp_Object attrs;
7757 Lisp_Object translation_table;
7758 int max_lookup;
7759 struct ccl_spec cclspec;
7760
7761 USE_SAFE_ALLOCA;
7762
7763 attrs = CODING_ID_ATTRS (coding->id);
7764 if (coding->encoder == encode_coding_raw_text)
7765 translation_table = Qnil, max_lookup = 0;
7766 else
7767 translation_table = get_translation_table (attrs, 1, &max_lookup);
7768
7769 if (BUFFERP (coding->dst_object))
7770 {
7771 set_buffer_internal (XBUFFER (coding->dst_object));
7772 coding->dst_multibyte
7773 = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7774 }
7775
7776 coding->consumed = coding->consumed_char = 0;
7777 coding->produced = coding->produced_char = 0;
7778 record_conversion_result (coding, CODING_RESULT_SUCCESS);
7779
7780 ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
7781
7782 if (coding->encoder == encode_coding_ccl)
7783 {
7784 coding->spec.ccl = &cclspec;
7785 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7786 }
7787 do {
7788 coding_set_source (coding);
7789 consume_chars (coding, translation_table, max_lookup);
7790 coding_set_destination (coding);
7791
7792
7793 unsigned saved_mode = coding->mode;
7794 if (coding->consumed_char < coding->src_chars)
7795 coding->mode &= ~CODING_MODE_LAST_BLOCK;
7796 (*(coding->encoder)) (coding);
7797 coding->mode = saved_mode;
7798 } while (coding->consumed_char < coding->src_chars);
7799
7800 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7801 insert_from_gap (coding->produced_char, coding->produced, 0);
7802
7803 SAFE_FREE ();
7804 }
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819 static Lisp_Object Vcode_conversion_workbuf_name;
7820
7821
7822 static Lisp_Object Vcode_conversion_reused_workbuf;
7823
7824
7825 static bool reused_workbuf_in_use;
7826
7827 static void
7828 code_conversion_restore (Lisp_Object arg)
7829 {
7830 Lisp_Object current, workbuf;
7831
7832 current = XCAR (arg);
7833 workbuf = XCDR (arg);
7834 if (! NILP (workbuf))
7835 {
7836 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7837 reused_workbuf_in_use = false;
7838 else
7839 Fkill_buffer (workbuf);
7840 }
7841 set_buffer_internal (XBUFFER (current));
7842 }
7843
7844 Lisp_Object
7845 code_conversion_save (bool with_work_buf, bool multibyte)
7846 {
7847 Lisp_Object workbuf = Qnil;
7848
7849 if (with_work_buf)
7850 {
7851 if (reused_workbuf_in_use)
7852 {
7853 Lisp_Object name
7854 = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7855 workbuf = Fget_buffer_create (name, Qt);
7856 }
7857 else
7858 {
7859 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7860 Vcode_conversion_reused_workbuf
7861 = Fget_buffer_create (Vcode_conversion_workbuf_name, Qt);
7862 workbuf = Vcode_conversion_reused_workbuf;
7863 }
7864 }
7865 record_unwind_protect (code_conversion_restore,
7866 Fcons (Fcurrent_buffer (), workbuf));
7867 if (!NILP (workbuf))
7868 {
7869 struct buffer *current = current_buffer;
7870 set_buffer_internal (XBUFFER (workbuf));
7871
7872
7873
7874 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7875 Ferase_buffer ();
7876 bset_undo_list (current_buffer, Qt);
7877 bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil);
7878 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7879 reused_workbuf_in_use = true;
7880 set_buffer_internal (current);
7881 }
7882
7883 return workbuf;
7884 }
7885
7886 static void
7887 coding_restore_undo_list (Lisp_Object arg)
7888 {
7889 Lisp_Object undo_list = XCAR (arg);
7890 struct buffer *buf = XBUFFER (XCDR (arg));
7891
7892 bset_undo_list (buf, undo_list);
7893 }
7894
7895
7896 void
7897 decode_coding_gap (struct coding_system *coding, ptrdiff_t bytes)
7898 {
7899 specpdl_ref count = SPECPDL_INDEX ();
7900 Lisp_Object attrs;
7901
7902 eassert (GPT_BYTE == PT_BYTE);
7903
7904 coding->src_object = Fcurrent_buffer ();
7905 coding->src_chars = bytes;
7906 coding->src_bytes = bytes;
7907 coding->src_pos = -bytes;
7908 coding->src_pos_byte = -bytes;
7909 coding->src_multibyte = false;
7910 coding->dst_object = coding->src_object;
7911 coding->dst_pos = PT;
7912 coding->dst_pos_byte = PT_BYTE;
7913 eassert (coding->dst_multibyte
7914 == !NILP (BVAR (current_buffer, enable_multibyte_characters)));
7915
7916 coding->head_ascii = -1;
7917 coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
7918 coding->eol_seen = EOL_SEEN_NONE;
7919 if (CODING_REQUIRE_DETECTION (coding))
7920 detect_coding (coding);
7921 attrs = CODING_ID_ATTRS (coding->id);
7922 if (! disable_ascii_optimization
7923 && ! coding->src_multibyte
7924 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
7925 && NILP (CODING_ATTR_POST_READ (attrs))
7926 && NILP (get_translation_table (attrs, 0, NULL)))
7927 {
7928 ptrdiff_t chars = coding->head_ascii;
7929 if (chars < 0)
7930 chars = check_ascii (coding);
7931 if (chars != bytes)
7932 {
7933
7934 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
7935 && coding->detected_utf8_bytes == coding->src_bytes)
7936 {
7937 if (coding->detected_utf8_chars >= 0)
7938 chars = coding->detected_utf8_chars;
7939 else
7940 chars = check_utf_8 (coding);
7941 if (CODING_UTF_8_BOM (coding) != utf_without_bom
7942 && coding->head_ascii == 0
7943 && coding->source[0] == UTF_8_BOM_1
7944 && coding->source[1] == UTF_8_BOM_2
7945 && coding->source[2] == UTF_8_BOM_3)
7946 {
7947 chars--;
7948 bytes -= 3;
7949 coding->src_bytes -= 3;
7950 }
7951 }
7952 else
7953 chars = -1;
7954 }
7955 if (chars >= 0)
7956 {
7957 Lisp_Object eol_type;
7958
7959 eol_type = CODING_ID_EOL_TYPE (coding->id);
7960 if (VECTORP (eol_type))
7961 {
7962 if (coding->eol_seen != EOL_SEEN_NONE)
7963 eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
7964 }
7965 if (EQ (eol_type, Qmac))
7966 {
7967 unsigned char *src_end = GAP_END_ADDR;
7968 unsigned char *src = src_end - coding->src_bytes;
7969
7970 while (src < src_end)
7971 {
7972 if (*src++ == '\r')
7973 src[-1] = '\n';
7974 }
7975 }
7976 else if (EQ (eol_type, Qdos))
7977 {
7978 unsigned char *src = GAP_END_ADDR;
7979 unsigned char *src_beg = src - coding->src_bytes;
7980 unsigned char *dst = src;
7981 ptrdiff_t diff;
7982
7983 while (src_beg < src)
7984 {
7985 *--dst = *--src;
7986 if (*src == '\n' && src > src_beg && src[-1] == '\r')
7987 src--;
7988 }
7989 diff = dst - src;
7990 bytes -= diff;
7991 chars -= diff;
7992 }
7993 coding->produced = bytes;
7994 coding->produced_char = chars;
7995 insert_from_gap (chars, bytes, 1);
7996 return;
7997 }
7998 }
7999 code_conversion_save (0, 0);
8000
8001 coding->mode |= CODING_MODE_LAST_BLOCK;
8002 current_buffer->text->inhibit_shrinking = 1;
8003 decode_coding (coding);
8004 current_buffer->text->inhibit_shrinking = 0;
8005
8006 if (! NILP (CODING_ATTR_POST_READ (attrs)))
8007 {
8008 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
8009 Lisp_Object val;
8010 Lisp_Object undo_list = BVAR (current_buffer, undo_list);
8011
8012 record_unwind_protect (coding_restore_undo_list,
8013 Fcons (undo_list, Fcurrent_buffer ()));
8014 bset_undo_list (current_buffer, Qt);
8015 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
8016 val = call1 (CODING_ATTR_POST_READ (attrs),
8017 make_fixnum (coding->produced_char));
8018 CHECK_FIXNAT (val);
8019 coding->produced_char += Z - prev_Z;
8020 coding->produced += Z_BYTE - prev_Z_BYTE;
8021 }
8022
8023 unbind_to (count, Qnil);
8024 }
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056 void
8057 decode_coding_object (struct coding_system *coding,
8058 Lisp_Object src_object,
8059 ptrdiff_t from, ptrdiff_t from_byte,
8060 ptrdiff_t to, ptrdiff_t to_byte,
8061 Lisp_Object dst_object)
8062 {
8063 specpdl_ref count = SPECPDL_INDEX ();
8064 unsigned char *destination UNINIT;
8065 ptrdiff_t dst_bytes UNINIT;
8066 ptrdiff_t chars = to - from;
8067 ptrdiff_t bytes = to_byte - from_byte;
8068 Lisp_Object attrs;
8069 ptrdiff_t saved_pt = -1, saved_pt_byte UNINIT;
8070 bool need_marker_adjustment = 0;
8071 Lisp_Object old_deactivate_mark;
8072
8073 old_deactivate_mark = Vdeactivate_mark;
8074
8075 if (NILP (dst_object))
8076 {
8077 destination = coding->destination;
8078 dst_bytes = coding->dst_bytes;
8079 }
8080
8081 coding->src_object = src_object;
8082 coding->src_chars = chars;
8083 coding->src_bytes = bytes;
8084 coding->src_multibyte = chars < bytes;
8085
8086 if (STRINGP (src_object))
8087 {
8088 coding->src_pos = from;
8089 coding->src_pos_byte = from_byte;
8090 }
8091 else if (BUFFERP (src_object))
8092 {
8093 set_buffer_internal (XBUFFER (src_object));
8094 if (from != GPT)
8095 move_gap_both (from, from_byte);
8096 if (EQ (src_object, dst_object))
8097 {
8098 struct Lisp_Marker *tail;
8099
8100 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8101 {
8102 tail->need_adjustment
8103 = tail->charpos == (tail->insertion_type ? from : to);
8104 need_marker_adjustment |= tail->need_adjustment;
8105 }
8106 saved_pt = PT, saved_pt_byte = PT_BYTE;
8107 TEMP_SET_PT_BOTH (from, from_byte);
8108 current_buffer->text->inhibit_shrinking = 1;
8109 del_range_both (from, from_byte, to, to_byte, 1);
8110 coding->src_pos = -chars;
8111 coding->src_pos_byte = -bytes;
8112 }
8113 else
8114 {
8115 coding->src_pos = from;
8116 coding->src_pos_byte = from_byte;
8117 }
8118 }
8119
8120 if (CODING_REQUIRE_DETECTION (coding))
8121 detect_coding (coding);
8122 attrs = CODING_ID_ATTRS (coding->id);
8123
8124 if (EQ (dst_object, Qt)
8125 || (! NILP (CODING_ATTR_POST_READ (attrs))
8126 && NILP (dst_object)))
8127 {
8128 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
8129 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
8130 coding->dst_pos = BEG;
8131 coding->dst_pos_byte = BEG_BYTE;
8132 }
8133 else if (BUFFERP (dst_object))
8134 {
8135 code_conversion_save (0, 0);
8136 coding->dst_object = dst_object;
8137 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
8138 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
8139 coding->dst_multibyte
8140 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
8141 }
8142 else
8143 {
8144 code_conversion_save (0, 0);
8145 coding->dst_object = Qnil;
8146
8147
8148
8149 coding->dst_multibyte = 1;
8150 }
8151
8152 decode_coding (coding);
8153
8154 if (BUFFERP (coding->dst_object))
8155 set_buffer_internal (XBUFFER (coding->dst_object));
8156
8157 if (! NILP (CODING_ATTR_POST_READ (attrs)))
8158 {
8159 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
8160 Lisp_Object val;
8161 Lisp_Object undo_list = BVAR (current_buffer, undo_list);
8162 specpdl_ref count1 = SPECPDL_INDEX ();
8163
8164 record_unwind_protect (coding_restore_undo_list,
8165 Fcons (undo_list, Fcurrent_buffer ()));
8166 bset_undo_list (current_buffer, Qt);
8167 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
8168 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
8169 make_fixnum (coding->produced_char));
8170 CHECK_FIXNAT (val);
8171 coding->produced_char += Z - prev_Z;
8172 coding->produced += Z_BYTE - prev_Z_BYTE;
8173 unbind_to (count1, Qnil);
8174 }
8175
8176 if (EQ (dst_object, Qt))
8177 {
8178 coding->dst_object = Fbuffer_string ();
8179 }
8180 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
8181 {
8182 set_buffer_internal (XBUFFER (coding->dst_object));
8183 if (dst_bytes < coding->produced)
8184 {
8185 eassert (coding->produced > 0);
8186 destination = xrealloc (destination, coding->produced);
8187 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
8188 move_gap_both (BEGV, BEGV_BYTE);
8189 memcpy (destination, BEGV_ADDR, coding->produced);
8190 coding->destination = destination;
8191 }
8192 }
8193
8194 if (saved_pt >= 0)
8195 {
8196
8197
8198
8199
8200 set_buffer_internal (XBUFFER (src_object));
8201 current_buffer->text->inhibit_shrinking = 0;
8202 if (saved_pt < from)
8203 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8204 else if (saved_pt < from + chars)
8205 TEMP_SET_PT_BOTH (from, from_byte);
8206 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8207 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8208 saved_pt_byte + (coding->produced - bytes));
8209 else
8210 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8211 saved_pt_byte + (coding->produced - bytes));
8212
8213 if (need_marker_adjustment)
8214 {
8215 struct Lisp_Marker *tail;
8216
8217 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8218 if (tail->need_adjustment)
8219 {
8220 tail->need_adjustment = 0;
8221 if (tail->insertion_type)
8222 {
8223 tail->bytepos = from_byte;
8224 tail->charpos = from;
8225 }
8226 else
8227 {
8228 tail->bytepos = from_byte + coding->produced;
8229 tail->charpos
8230 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8231 ? tail->bytepos : from + coding->produced_char);
8232 }
8233 }
8234 }
8235 }
8236
8237 Vdeactivate_mark = old_deactivate_mark;
8238 unbind_to (count, coding->dst_object);
8239 }
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275 void
8276 encode_coding_object (struct coding_system *coding,
8277 Lisp_Object src_object,
8278 ptrdiff_t from, ptrdiff_t from_byte,
8279 ptrdiff_t to, ptrdiff_t to_byte,
8280 Lisp_Object dst_object)
8281 {
8282 specpdl_ref count = SPECPDL_INDEX ();
8283 ptrdiff_t chars = to - from;
8284 ptrdiff_t bytes = to_byte - from_byte;
8285 Lisp_Object attrs;
8286 ptrdiff_t saved_pt = -1, saved_pt_byte UNINIT;
8287 bool need_marker_adjustment = 0;
8288 bool kill_src_buffer = 0;
8289 Lisp_Object old_deactivate_mark;
8290
8291 old_deactivate_mark = Vdeactivate_mark;
8292
8293 coding->src_object = src_object;
8294 coding->src_chars = chars;
8295 coding->src_bytes = bytes;
8296 coding->src_multibyte = chars < bytes;
8297
8298 attrs = CODING_ID_ATTRS (coding->id);
8299
8300 bool same_buffer = false;
8301 if (BASE_EQ (src_object, dst_object) && BUFFERP (src_object))
8302 {
8303 struct Lisp_Marker *tail;
8304
8305 same_buffer = true;
8306
8307 for (tail = BUF_MARKERS (XBUFFER (src_object)); tail; tail = tail->next)
8308 {
8309 tail->need_adjustment
8310 = tail->charpos == (tail->insertion_type ? from : to);
8311 need_marker_adjustment |= tail->need_adjustment;
8312 }
8313 }
8314
8315 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
8316 {
8317 coding->src_object = code_conversion_save (1, coding->src_multibyte);
8318 set_buffer_internal (XBUFFER (coding->src_object));
8319 if (STRINGP (src_object))
8320 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
8321 else if (BUFFERP (src_object))
8322 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
8323 else
8324 insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
8325
8326 if (same_buffer)
8327 {
8328 set_buffer_internal (XBUFFER (src_object));
8329 saved_pt = PT, saved_pt_byte = PT_BYTE;
8330 del_range_both (from, from_byte, to, to_byte, 1);
8331 set_buffer_internal (XBUFFER (coding->src_object));
8332 }
8333
8334 safe_call2 (CODING_ATTR_PRE_WRITE (attrs),
8335 make_fixnum (BEG), make_fixnum (Z));
8336 if (XBUFFER (coding->src_object) != current_buffer)
8337 kill_src_buffer = 1;
8338 coding->src_object = Fcurrent_buffer ();
8339 if (BEG != GPT)
8340 move_gap_both (BEG, BEG_BYTE);
8341 coding->src_chars = Z - BEG;
8342 coding->src_bytes = Z_BYTE - BEG_BYTE;
8343 coding->src_pos = BEG;
8344 coding->src_pos_byte = BEG_BYTE;
8345 coding->src_multibyte = Z < Z_BYTE;
8346 }
8347 else if (STRINGP (src_object))
8348 {
8349 code_conversion_save (0, 0);
8350 coding->src_pos = from;
8351 coding->src_pos_byte = from_byte;
8352 }
8353 else if (BUFFERP (src_object))
8354 {
8355 code_conversion_save (0, 0);
8356 set_buffer_internal (XBUFFER (src_object));
8357 if (same_buffer)
8358 {
8359 saved_pt = PT, saved_pt_byte = PT_BYTE;
8360 coding->src_object = del_range_1 (from, to, 1, 1);
8361 coding->src_pos = 0;
8362 coding->src_pos_byte = 0;
8363 }
8364 else
8365 {
8366 if (from < GPT && to >= GPT)
8367 move_gap_both (from, from_byte);
8368 coding->src_pos = from;
8369 coding->src_pos_byte = from_byte;
8370 }
8371 }
8372 else
8373 {
8374 code_conversion_save (0, 0);
8375 coding->src_pos = from;
8376 coding->src_pos_byte = from_byte;
8377 }
8378
8379 if (BUFFERP (dst_object))
8380 {
8381 coding->dst_object = dst_object;
8382 if (BASE_EQ (src_object, dst_object))
8383 {
8384 coding->dst_pos = from;
8385 coding->dst_pos_byte = from_byte;
8386 }
8387 else
8388 {
8389 struct buffer *current = current_buffer;
8390
8391 set_buffer_temp (XBUFFER (dst_object));
8392 coding->dst_pos = PT;
8393 coding->dst_pos_byte = PT_BYTE;
8394 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8395 set_buffer_temp (current);
8396 }
8397 coding->dst_multibyte
8398 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
8399 }
8400 else if (EQ (dst_object, Qt))
8401 {
8402 ptrdiff_t dst_bytes = max (1, coding->src_chars);
8403 coding->dst_object = Qnil;
8404 coding->destination = xmalloc (dst_bytes);
8405 coding->dst_bytes = dst_bytes;
8406 coding->dst_multibyte = 0;
8407 }
8408 else
8409 {
8410 coding->dst_object = Qnil;
8411 coding->dst_multibyte = 0;
8412 }
8413
8414 encode_coding (coding);
8415
8416 if (EQ (dst_object, Qt))
8417 {
8418 if (BUFFERP (coding->dst_object))
8419 coding->dst_object = Fbuffer_string ();
8420 else if (coding->raw_destination)
8421
8422
8423
8424 coding->dst_object = Qnil;
8425 else
8426 {
8427 coding->dst_object
8428 = make_unibyte_string ((char *) coding->destination,
8429 coding->produced);
8430 xfree (coding->destination);
8431 }
8432 }
8433
8434 if (saved_pt >= 0)
8435 {
8436
8437
8438
8439
8440 set_buffer_internal (XBUFFER (src_object));
8441 if (saved_pt < from)
8442 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8443 else if (saved_pt < from + chars)
8444 TEMP_SET_PT_BOTH (from, from_byte);
8445 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8446 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8447 saved_pt_byte + (coding->produced - bytes));
8448 else
8449 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8450 saved_pt_byte + (coding->produced - bytes));
8451
8452 if (need_marker_adjustment)
8453 {
8454 struct Lisp_Marker *tail;
8455
8456 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8457 if (tail->need_adjustment)
8458 {
8459 tail->need_adjustment = 0;
8460 if (tail->insertion_type)
8461 {
8462 tail->bytepos = from_byte;
8463 tail->charpos = from;
8464 }
8465 else
8466 {
8467 tail->bytepos = from_byte + coding->produced;
8468 tail->charpos
8469 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8470 ? tail->bytepos : from + coding->produced_char);
8471 }
8472 }
8473 }
8474 }
8475
8476 if (kill_src_buffer)
8477 Fkill_buffer (coding->src_object);
8478
8479 Vdeactivate_mark = old_deactivate_mark;
8480 unbind_to (count, Qnil);
8481 }
8482
8483
8484 Lisp_Object
8485 preferred_coding_system (void)
8486 {
8487 int id = coding_categories[coding_priorities[0]].id;
8488
8489 return CODING_ID_NAME (id);
8490 }
8491
8492 #if defined (WINDOWSNT) || defined (CYGWIN)
8493
8494 Lisp_Object
8495 from_unicode (Lisp_Object str)
8496 {
8497 CHECK_STRING (str);
8498 if (!STRING_MULTIBYTE (str) &&
8499 SBYTES (str) & 1)
8500 {
8501 str = Fsubstring (str, make_fixnum (0), make_fixnum (-1));
8502 }
8503
8504 return code_convert_string_norecord (str, Qutf_16le, 0);
8505 }
8506
8507 Lisp_Object
8508 from_unicode_buffer (const wchar_t *wstr)
8509 {
8510
8511 ptrdiff_t len = 1 + sizeof (wchar_t) * wcslen (wstr);
8512 AUTO_STRING_WITH_LEN (str, (char *) wstr, len);
8513 return from_unicode (str);
8514 }
8515
8516 wchar_t *
8517 to_unicode (Lisp_Object str, Lisp_Object *buf)
8518 {
8519 *buf = code_convert_string_norecord (str, Qutf_16le, 1);
8520
8521
8522
8523
8524
8525
8526 str = make_uninit_string (SBYTES (*buf) + 1);
8527 memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf));
8528 SDATA (str) [SBYTES (*buf)] = '\0';
8529 *buf = str;
8530 return WCSDATA (*buf);
8531 }
8532
8533 #endif
8534
8535
8536
8537
8538 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8539 doc:
8540
8541 )
8542 (Lisp_Object object)
8543 {
8544 if (NILP (object)
8545 || CODING_SYSTEM_ID (object) >= 0)
8546 return Qt;
8547 if (! SYMBOLP (object)
8548 || NILP (Fget (object, Qcoding_system_define_form)))
8549 return Qnil;
8550 return Qt;
8551 }
8552
8553 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8554 Sread_non_nil_coding_system, 1, 1, 0,
8555 doc: )
8556 (Lisp_Object prompt)
8557 {
8558 Lisp_Object val;
8559 do
8560 {
8561 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8562 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8563 }
8564 while (SCHARS (val) == 0);
8565 return (Fintern (val, Qnil));
8566 }
8567
8568 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8569 doc:
8570
8571
8572 )
8573 (Lisp_Object prompt, Lisp_Object default_coding_system)
8574 {
8575 Lisp_Object val;
8576 specpdl_ref count = SPECPDL_INDEX ();
8577
8578 if (SYMBOLP (default_coding_system))
8579 default_coding_system = SYMBOL_NAME (default_coding_system);
8580 specbind (Qcompletion_ignore_case, Qt);
8581 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8582 Qt, Qnil, Qcoding_system_history,
8583 default_coding_system, Qnil);
8584 val = unbind_to (count, val);
8585 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8586 }
8587
8588 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8589 1, 1, 0,
8590 doc:
8591
8592
8593 )
8594 (Lisp_Object coding_system)
8595 {
8596 Lisp_Object define_form;
8597
8598 define_form = Fget (coding_system, Qcoding_system_define_form);
8599 if (! NILP (define_form))
8600 {
8601 Fput (coding_system, Qcoding_system_define_form, Qnil);
8602 safe_eval (define_form);
8603 }
8604 if (!NILP (Fcoding_system_p (coding_system)))
8605 return coding_system;
8606 xsignal1 (Qcoding_system_error, coding_system);
8607 }
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626 Lisp_Object
8627 detect_coding_system (const unsigned char *src,
8628 ptrdiff_t src_chars, ptrdiff_t src_bytes,
8629 bool highest, bool multibytep,
8630 Lisp_Object coding_system)
8631 {
8632 const unsigned char *src_end = src + src_bytes;
8633 Lisp_Object attrs, eol_type;
8634 Lisp_Object val = Qnil;
8635 struct coding_system coding;
8636 ptrdiff_t id;
8637 struct coding_detection_info detect_info = {0};
8638 enum coding_category base_category;
8639 bool null_byte_found = 0, eight_bit_found = 0;
8640
8641 if (NILP (coding_system))
8642 coding_system = Qundecided;
8643 setup_coding_system (coding_system, &coding);
8644 attrs = CODING_ID_ATTRS (coding.id);
8645 eol_type = CODING_ID_EOL_TYPE (coding.id);
8646 coding_system = CODING_ATTR_BASE_NAME (attrs);
8647
8648 coding.source = src;
8649 coding.src_chars = src_chars;
8650 coding.src_bytes = src_bytes;
8651 coding.src_multibyte = multibytep;
8652 coding.consumed = 0;
8653 coding.mode |= CODING_MODE_LAST_BLOCK;
8654 coding.head_ascii = 0;
8655
8656
8657 base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8658 if (base_category == coding_category_undecided)
8659 {
8660 enum coding_category category UNINIT;
8661 struct coding_system *this UNINIT;
8662 int c, i;
8663 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
8664 inhibit_null_byte_detection);
8665 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
8666 inhibit_iso_escape_detection);
8667 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
8668
8669
8670 for (; src < src_end; src++)
8671 {
8672 c = *src;
8673 if (c & 0x80)
8674 {
8675 eight_bit_found = 1;
8676 if (null_byte_found)
8677 break;
8678 }
8679 else if (c < 0x20)
8680 {
8681 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8682 && ! inhibit_ied
8683 && ! detect_info.checked)
8684 {
8685 if (detect_coding_iso_2022 (&coding, &detect_info))
8686 {
8687
8688 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8689 {
8690
8691
8692
8693
8694 src = src_end;
8695 coding.head_ascii = src - coding.source;
8696 }
8697 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8698 break;
8699 }
8700 }
8701 else if (! c && !inhibit_nbd)
8702 {
8703 null_byte_found = 1;
8704 if (eight_bit_found)
8705 break;
8706 }
8707 if (! eight_bit_found)
8708 coding.head_ascii++;
8709 }
8710 else if (! eight_bit_found)
8711 coding.head_ascii++;
8712 }
8713
8714 if (null_byte_found || eight_bit_found
8715 || coding.head_ascii < coding.src_bytes
8716 || detect_info.found)
8717 {
8718 if (coding.head_ascii == coding.src_bytes)
8719
8720 for (i = 0; i < coding_category_raw_text; i++)
8721 {
8722 category = coding_priorities[i];
8723 this = coding_categories + category;
8724 if (detect_info.found & (1 << category))
8725 break;
8726 }
8727 else
8728 {
8729 if (null_byte_found)
8730 {
8731 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8732 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8733 }
8734 else if (prefer_utf_8
8735 && detect_coding_utf_8 (&coding, &detect_info))
8736 {
8737 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
8738 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
8739 }
8740 for (i = 0; i < coding_category_raw_text; i++)
8741 {
8742 category = coding_priorities[i];
8743 this = coding_categories + category;
8744
8745 if (this->id < 0)
8746 {
8747
8748 detect_info.rejected |= (1 << category);
8749 }
8750 else if (category >= coding_category_raw_text)
8751 continue;
8752 else if (detect_info.checked & (1 << category))
8753 {
8754 if (highest
8755 && (detect_info.found & (1 << category)))
8756 break;
8757 }
8758 else if ((*(this->detector)) (&coding, &detect_info)
8759 && highest
8760 && (detect_info.found & (1 << category)))
8761 {
8762 if (category == coding_category_utf_16_auto)
8763 {
8764 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8765 category = coding_category_utf_16_le;
8766 else
8767 category = coding_category_utf_16_be;
8768 }
8769 break;
8770 }
8771 }
8772 }
8773 }
8774
8775 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8776 || null_byte_found)
8777 {
8778 detect_info.found = CATEGORY_MASK_RAW_TEXT;
8779 id = CODING_SYSTEM_ID (Qno_conversion);
8780 val = list1i (id);
8781 }
8782 else if (! detect_info.rejected && ! detect_info.found)
8783 {
8784 detect_info.found = CATEGORY_MASK_ANY;
8785 id = coding_categories[coding_category_undecided].id;
8786 val = list1i (id);
8787 }
8788 else if (highest)
8789 {
8790 if (detect_info.found)
8791 {
8792 detect_info.found = 1 << category;
8793 val = list1i (this->id);
8794 }
8795 else
8796 for (i = 0; i < coding_category_raw_text; i++)
8797 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8798 {
8799 detect_info.found = 1 << coding_priorities[i];
8800 id = coding_categories[coding_priorities[i]].id;
8801 val = list1i (id);
8802 break;
8803 }
8804 }
8805 else
8806 {
8807 int mask = detect_info.rejected | detect_info.found;
8808 int found = 0;
8809
8810 for (i = coding_category_raw_text - 1; i >= 0; i--)
8811 {
8812 category = coding_priorities[i];
8813 if (! (mask & (1 << category)))
8814 {
8815 found |= 1 << category;
8816 id = coding_categories[category].id;
8817 if (id >= 0)
8818 val = list1i (id);
8819 }
8820 }
8821 for (i = coding_category_raw_text - 1; i >= 0; i--)
8822 {
8823 category = coding_priorities[i];
8824 if (detect_info.found & (1 << category))
8825 {
8826 id = coding_categories[category].id;
8827 val = Fcons (make_fixnum (id), val);
8828 }
8829 }
8830 detect_info.found |= found;
8831 }
8832 }
8833 else if (base_category == coding_category_utf_8_auto)
8834 {
8835 if (detect_coding_utf_8 (&coding, &detect_info))
8836 {
8837 struct coding_system *this;
8838
8839 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8840 this = coding_categories + coding_category_utf_8_sig;
8841 else
8842 this = coding_categories + coding_category_utf_8_nosig;
8843 val = list1i (this->id);
8844 }
8845 }
8846 else if (base_category == coding_category_utf_16_auto)
8847 {
8848 if (detect_coding_utf_16 (&coding, &detect_info))
8849 {
8850 struct coding_system *this;
8851
8852 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8853 this = coding_categories + coding_category_utf_16_le;
8854 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8855 this = coding_categories + coding_category_utf_16_be;
8856 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8857 this = coding_categories + coding_category_utf_16_be_nosig;
8858 else
8859 this = coding_categories + coding_category_utf_16_le_nosig;
8860 val = list1i (this->id);
8861 }
8862 }
8863 else
8864 {
8865 detect_info.found = 1 << XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8866 val = list1i (coding.id);
8867 }
8868
8869
8870 {
8871 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8872 Lisp_Object tail;
8873
8874 if (VECTORP (eol_type))
8875 {
8876 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8877 {
8878 if (null_byte_found)
8879 normal_eol = EOL_SEEN_LF;
8880 else
8881 normal_eol = detect_eol (coding.source, src_bytes,
8882 coding_category_raw_text);
8883 }
8884 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8885 | CATEGORY_MASK_UTF_16_BE_NOSIG))
8886 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8887 coding_category_utf_16_be);
8888 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8889 | CATEGORY_MASK_UTF_16_LE_NOSIG))
8890 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8891 coding_category_utf_16_le);
8892 }
8893 else
8894 {
8895 if (EQ (eol_type, Qunix))
8896 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8897 else if (EQ (eol_type, Qdos))
8898 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8899 else
8900 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8901 }
8902
8903 for (tail = val; CONSP (tail); tail = XCDR (tail))
8904 {
8905 enum coding_category category;
8906 int this_eol;
8907
8908 id = XFIXNUM (XCAR (tail));
8909 attrs = CODING_ID_ATTRS (id);
8910 category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8911 eol_type = CODING_ID_EOL_TYPE (id);
8912 if (VECTORP (eol_type))
8913 {
8914 if (category == coding_category_utf_16_be
8915 || category == coding_category_utf_16_be_nosig)
8916 this_eol = utf_16_be_eol;
8917 else if (category == coding_category_utf_16_le
8918 || category == coding_category_utf_16_le_nosig)
8919 this_eol = utf_16_le_eol;
8920 else
8921 this_eol = normal_eol;
8922
8923 if (this_eol == EOL_SEEN_LF)
8924 XSETCAR (tail, AREF (eol_type, 0));
8925 else if (this_eol == EOL_SEEN_CRLF)
8926 XSETCAR (tail, AREF (eol_type, 1));
8927 else if (this_eol == EOL_SEEN_CR)
8928 XSETCAR (tail, AREF (eol_type, 2));
8929 else
8930 XSETCAR (tail, CODING_ID_NAME (id));
8931 }
8932 else
8933 XSETCAR (tail, CODING_ID_NAME (id));
8934 }
8935 }
8936
8937 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8938 }
8939
8940
8941 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8942 2, 3, 0,
8943 doc:
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954 )
8955 (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8956 {
8957 ptrdiff_t from, to;
8958 ptrdiff_t from_byte, to_byte;
8959
8960 validate_region (&start, &end);
8961 from = XFIXNUM (start), to = XFIXNUM (end);
8962 from_byte = CHAR_TO_BYTE (from);
8963 to_byte = CHAR_TO_BYTE (to);
8964
8965 if (from < GPT && to >= GPT)
8966 move_gap_both (to, to_byte);
8967
8968 return detect_coding_system (BYTE_POS_ADDR (from_byte),
8969 to - from, to_byte - from_byte,
8970 !NILP (highest),
8971 !NILP (BVAR (current_buffer
8972 , enable_multibyte_characters)),
8973 Qnil);
8974 }
8975
8976 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8977 1, 2, 0,
8978 doc:
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989 )
8990 (Lisp_Object string, Lisp_Object highest)
8991 {
8992 CHECK_STRING (string);
8993
8994 return detect_coding_system (SDATA (string),
8995 SCHARS (string), SBYTES (string),
8996 !NILP (highest), STRING_MULTIBYTE (string),
8997 Qnil);
8998 }
8999
9000
9001 static bool
9002 char_encodable_p (int c, Lisp_Object attrs)
9003 {
9004 Lisp_Object tail;
9005 struct charset *charset;
9006 Lisp_Object translation_table;
9007
9008 translation_table = CODING_ATTR_TRANS_TBL (attrs);
9009 if (! NILP (translation_table))
9010 c = translate_char (translation_table, c);
9011 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
9012 CONSP (tail); tail = XCDR (tail))
9013 {
9014 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
9015 if (CHAR_CHARSET_P (c, charset))
9016 break;
9017 }
9018 return (! NILP (tail));
9019 }
9020
9021
9022
9023
9024
9025
9026
9027
9028 DEFUN ("find-coding-systems-region-internal",
9029 Ffind_coding_systems_region_internal,
9030 Sfind_coding_systems_region_internal, 2, 3, 0,
9031 doc: )
9032 (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
9033 {
9034 Lisp_Object coding_attrs_list, safe_codings;
9035 ptrdiff_t start_byte, end_byte;
9036 const unsigned char *p, *pbeg, *pend;
9037 int c;
9038 Lisp_Object tail, elt, work_table;
9039
9040 if (STRINGP (start))
9041 {
9042 if (!STRING_MULTIBYTE (start)
9043 || SCHARS (start) == SBYTES (start))
9044 return Qt;
9045 start_byte = 0;
9046 end_byte = SBYTES (start);
9047 }
9048 else
9049 {
9050 EMACS_INT s = fix_position (start);
9051 EMACS_INT e = fix_position (end);
9052 if (! (BEG <= s && s <= e && e <= Z))
9053 args_out_of_range (start, end);
9054 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
9055 return Qt;
9056 start_byte = CHAR_TO_BYTE (s);
9057 end_byte = CHAR_TO_BYTE (e);
9058 if (e - s == end_byte - start_byte)
9059 return Qt;
9060
9061 if (s < GPT && GPT < e)
9062 {
9063 if (GPT - s < e - GPT)
9064 move_gap_both (s, start_byte);
9065 else
9066 move_gap_both (e, end_byte);
9067 }
9068 }
9069
9070 coding_attrs_list = Qnil;
9071 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
9072 if (NILP (exclude)
9073 || NILP (Fmemq (XCAR (tail), exclude)))
9074 {
9075 Lisp_Object attrs;
9076
9077 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
9078 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
9079 {
9080 ASET (attrs, coding_attr_trans_tbl,
9081 get_translation_table (attrs, 1, NULL));
9082 coding_attrs_list = Fcons (attrs, coding_attrs_list);
9083 }
9084 }
9085
9086 if (STRINGP (start))
9087 p = pbeg = SDATA (start);
9088 else
9089 p = pbeg = BYTE_POS_ADDR (start_byte);
9090 pend = p + (end_byte - start_byte);
9091
9092 while (p < pend && ASCII_CHAR_P (*p)) p++;
9093 while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
9094
9095 work_table = Fmake_char_table (Qnil, Qnil);
9096 while (p < pend)
9097 {
9098 if (ASCII_CHAR_P (*p))
9099 p++;
9100 else
9101 {
9102 c = string_char_advance (&p);
9103 if (!NILP (char_table_ref (work_table, c)))
9104
9105 continue;
9106
9107 charset_map_loaded = 0;
9108 for (tail = coding_attrs_list; CONSP (tail);)
9109 {
9110 elt = XCAR (tail);
9111 if (NILP (elt))
9112 tail = XCDR (tail);
9113 else if (char_encodable_p (c, elt))
9114 tail = XCDR (tail);
9115 else if (CONSP (XCDR (tail)))
9116 {
9117 XSETCAR (tail, XCAR (XCDR (tail)));
9118 XSETCDR (tail, XCDR (XCDR (tail)));
9119 }
9120 else
9121 {
9122 XSETCAR (tail, Qnil);
9123 tail = XCDR (tail);
9124 }
9125 }
9126 if (charset_map_loaded)
9127 {
9128 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
9129
9130 if (STRINGP (start))
9131 pbeg = SDATA (start);
9132 else
9133 pbeg = BYTE_POS_ADDR (start_byte);
9134 p = pbeg + p_offset;
9135 pend = pbeg + pend_offset;
9136 }
9137 char_table_set (work_table, c, Qt);
9138 }
9139 }
9140
9141 safe_codings = list2 (Qraw_text, Qno_conversion);
9142 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
9143 if (! NILP (XCAR (tail)))
9144 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
9145
9146 return safe_codings;
9147 }
9148
9149
9150 DEFUN ("unencodable-char-position", Funencodable_char_position,
9151 Sunencodable_char_position, 3, 5, 0,
9152 doc:
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162 )
9163 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
9164 Lisp_Object count, Lisp_Object string)
9165 {
9166 EMACS_INT n;
9167 struct coding_system coding;
9168 Lisp_Object attrs, charset_list, translation_table;
9169 Lisp_Object positions;
9170 ptrdiff_t from, to;
9171 const unsigned char *p, *stop, *pend;
9172 bool ascii_compatible;
9173
9174 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
9175 attrs = CODING_ID_ATTRS (coding.id);
9176 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
9177 return Qnil;
9178 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
9179 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9180 translation_table = get_translation_table (attrs, 1, NULL);
9181
9182 if (NILP (string))
9183 {
9184 validate_region (&start, &end);
9185 from = XFIXNUM (start);
9186 to = XFIXNUM (end);
9187 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
9188 || (ascii_compatible
9189 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
9190 return Qnil;
9191 p = CHAR_POS_ADDR (from);
9192 pend = CHAR_POS_ADDR (to);
9193 if (from < GPT && to >= GPT)
9194 stop = GPT_ADDR;
9195 else
9196 stop = pend;
9197 }
9198 else
9199 {
9200 CHECK_STRING (string);
9201 validate_subarray (string, start, end, SCHARS (string), &from, &to);
9202 if (! STRING_MULTIBYTE (string))
9203 return Qnil;
9204 p = SDATA (string) + string_char_to_byte (string, from);
9205 stop = pend = SDATA (string) + string_char_to_byte (string, to);
9206 if (ascii_compatible && (to - from) == (pend - p))
9207 return Qnil;
9208 }
9209
9210 if (NILP (count))
9211 n = 1;
9212 else
9213 {
9214 CHECK_FIXNAT (count);
9215 n = XFIXNUM (count);
9216 }
9217
9218 positions = Qnil;
9219 charset_map_loaded = 0;
9220 while (1)
9221 {
9222 int c;
9223
9224 if (ascii_compatible)
9225 while (p < stop && ASCII_CHAR_P (*p))
9226 p++, from++;
9227 if (p >= stop)
9228 {
9229 if (p >= pend)
9230 break;
9231 stop = pend;
9232 p = GAP_END_ADDR;
9233 }
9234
9235 c = string_char_advance (&p);
9236 if (! (ASCII_CHAR_P (c) && ascii_compatible)
9237 && ! char_charset (translate_char (translation_table, c),
9238 charset_list, NULL))
9239 {
9240 positions = Fcons (make_fixnum (from), positions);
9241 n--;
9242 if (n == 0)
9243 break;
9244 }
9245
9246 from++;
9247 if (charset_map_loaded && NILP (string))
9248 {
9249 p = CHAR_POS_ADDR (from);
9250 pend = CHAR_POS_ADDR (to);
9251 if (from < GPT && to >= GPT)
9252 stop = GPT_ADDR;
9253 else
9254 stop = pend;
9255 charset_map_loaded = 0;
9256 }
9257 }
9258
9259 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
9260 }
9261
9262
9263 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
9264 Scheck_coding_systems_region, 3, 3, 0,
9265 doc:
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283 )
9284 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
9285 {
9286 Lisp_Object list;
9287 ptrdiff_t start_byte, end_byte;
9288 ptrdiff_t pos;
9289 const unsigned char *p, *pbeg, *pend;
9290 int c;
9291 Lisp_Object tail, elt, attrs;
9292
9293 if (STRINGP (start))
9294 {
9295 if (!STRING_MULTIBYTE (start)
9296 || SCHARS (start) == SBYTES (start))
9297 return Qnil;
9298 start_byte = 0;
9299 end_byte = SBYTES (start);
9300 pos = 0;
9301 }
9302 else
9303 {
9304 EMACS_INT s = fix_position (start);
9305 EMACS_INT e = fix_position (end);
9306 if (! (BEG <= s && s <= e && e <= Z))
9307 args_out_of_range (start, end);
9308 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
9309 return Qnil;
9310 start_byte = CHAR_TO_BYTE (s);
9311 end_byte = CHAR_TO_BYTE (e);
9312 if (e - s == end_byte - start_byte)
9313 return Qnil;
9314
9315 if (s < GPT && GPT < e)
9316 {
9317 if (GPT - s < e - GPT)
9318 move_gap_both (s, start_byte);
9319 else
9320 move_gap_both (e, end_byte);
9321 }
9322 pos = s;
9323 }
9324
9325 list = Qnil;
9326 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
9327 {
9328 elt = XCAR (tail);
9329 Lisp_Object spec = CODING_SYSTEM_SPEC (elt);
9330 if (!VECTORP (spec))
9331 xsignal1 (Qcoding_system_error, elt);
9332 attrs = AREF (spec, 0);
9333 ASET (attrs, coding_attr_trans_tbl,
9334 get_translation_table (attrs, 1, NULL));
9335 list = Fcons (list2 (elt, attrs), list);
9336 }
9337
9338 if (STRINGP (start))
9339 p = pbeg = SDATA (start);
9340 else
9341 p = pbeg = BYTE_POS_ADDR (start_byte);
9342 pend = p + (end_byte - start_byte);
9343
9344 while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
9345 while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
9346
9347 while (p < pend)
9348 {
9349 if (ASCII_CHAR_P (*p))
9350 p++;
9351 else
9352 {
9353 c = string_char_advance (&p);
9354
9355 charset_map_loaded = 0;
9356 for (tail = list; CONSP (tail); tail = XCDR (tail))
9357 {
9358 elt = XCDR (XCAR (tail));
9359 if (! char_encodable_p (c, XCAR (elt)))
9360 XSETCDR (elt, Fcons (make_fixnum (pos), XCDR (elt)));
9361 }
9362 if (charset_map_loaded)
9363 {
9364 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
9365
9366 if (STRINGP (start))
9367 pbeg = SDATA (start);
9368 else
9369 pbeg = BYTE_POS_ADDR (start_byte);
9370 p = pbeg + p_offset;
9371 pend = pbeg + pend_offset;
9372 }
9373 }
9374 pos++;
9375 }
9376
9377 tail = list;
9378 list = Qnil;
9379 for (; CONSP (tail); tail = XCDR (tail))
9380 {
9381 elt = XCAR (tail);
9382 if (CONSP (XCDR (XCDR (elt))))
9383 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9384 list);
9385 }
9386
9387 return list;
9388 }
9389
9390
9391 static Lisp_Object
9392 code_convert_region (Lisp_Object start, Lisp_Object end,
9393 Lisp_Object coding_system, Lisp_Object dst_object,
9394 bool encodep, bool norecord)
9395 {
9396 struct coding_system coding;
9397 ptrdiff_t from, from_byte, to, to_byte;
9398 Lisp_Object src_object;
9399
9400 if (NILP (coding_system))
9401 coding_system = Qno_conversion;
9402 else
9403 CHECK_CODING_SYSTEM (coding_system);
9404 src_object = Fcurrent_buffer ();
9405 if (NILP (dst_object))
9406 dst_object = src_object;
9407 else if (! EQ (dst_object, Qt))
9408 CHECK_BUFFER (dst_object);
9409
9410 validate_region (&start, &end);
9411 from = XFIXNAT (start);
9412 from_byte = CHAR_TO_BYTE (from);
9413 to = XFIXNAT (end);
9414 to_byte = CHAR_TO_BYTE (to);
9415
9416 setup_coding_system (coding_system, &coding);
9417 coding.mode |= CODING_MODE_LAST_BLOCK;
9418
9419 if (BUFFERP (dst_object) && !BASE_EQ (dst_object, src_object))
9420 {
9421 struct buffer *buf = XBUFFER (dst_object);
9422 ptrdiff_t buf_pt = BUF_PT (buf);
9423
9424 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9425 }
9426
9427 if (encodep)
9428 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9429 dst_object);
9430 else
9431 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9432 dst_object);
9433 if (! norecord)
9434 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9435
9436 return (BUFFERP (dst_object)
9437 ? make_fixnum (coding.produced_char)
9438 : coding.dst_object);
9439 }
9440
9441
9442 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9443 3, 4, "r\nzCoding system: ",
9444 doc:
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468 )
9469 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9470 {
9471 return code_convert_region (start, end, coding_system, destination, 0, 0);
9472 }
9473
9474 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9475 3, 4, "r\nzCoding system: ",
9476 doc:
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499 )
9500 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9501 {
9502 return code_convert_region (start, end, coding_system, destination, 1, 0);
9503 }
9504
9505
9506 bool
9507 string_ascii_p (Lisp_Object string)
9508 {
9509 ptrdiff_t nbytes = SBYTES (string);
9510 for (ptrdiff_t i = 0; i < nbytes; i++)
9511 if (SREF (string, i) > 127)
9512 return false;
9513 return true;
9514 }
9515
9516 Lisp_Object
9517 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9518 Lisp_Object dst_object, bool encodep, bool nocopy,
9519 bool norecord)
9520 {
9521 struct coding_system coding;
9522 ptrdiff_t chars, bytes;
9523
9524 CHECK_STRING (string);
9525 if (NILP (coding_system))
9526 {
9527 if (! norecord)
9528 Vlast_coding_system_used = Qno_conversion;
9529 if (NILP (dst_object))
9530 return nocopy ? string : Fcopy_sequence (string);
9531 }
9532
9533 if (NILP (coding_system))
9534 coding_system = Qno_conversion;
9535 else
9536 CHECK_CODING_SYSTEM (coding_system);
9537 if (NILP (dst_object))
9538 dst_object = Qt;
9539 else if (! EQ (dst_object, Qt))
9540 CHECK_BUFFER (dst_object);
9541
9542 setup_coding_system (coding_system, &coding);
9543 coding.mode |= CODING_MODE_LAST_BLOCK;
9544 chars = SCHARS (string);
9545 bytes = SBYTES (string);
9546
9547 if (EQ (dst_object, Qt))
9548 {
9549
9550
9551 Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
9552 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
9553 && (STRING_MULTIBYTE (string)
9554 ? (chars == bytes) : string_ascii_p (string))
9555 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
9556 || inhibit_eol_conversion
9557 || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
9558 {
9559 if (! norecord)
9560 Vlast_coding_system_used = coding_system;
9561 return (nocopy
9562 ? string
9563 : (encodep
9564 ? make_unibyte_string (SSDATA (string), bytes)
9565 : make_multibyte_string (SSDATA (string), bytes, bytes)));
9566 }
9567 }
9568 else if (BUFFERP (dst_object))
9569 {
9570 struct buffer *buf = XBUFFER (dst_object);
9571 ptrdiff_t buf_pt = BUF_PT (buf);
9572
9573 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9574 }
9575
9576 if (encodep)
9577 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9578 else
9579 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9580 if (! norecord)
9581 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9582
9583 return (BUFFERP (dst_object)
9584 ? make_fixnum (coding.produced_char)
9585 : coding.dst_object);
9586 }
9587
9588
9589
9590
9591
9592 Lisp_Object
9593 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9594 bool encodep)
9595 {
9596 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9597 }
9598
9599
9600
9601
9602
9603 static unsigned char *
9604 get_buffer_gap_address (Lisp_Object buffer, ptrdiff_t nbytes)
9605 {
9606 struct buffer *buf = XBUFFER (buffer);
9607
9608 if (BUF_GPT (buf) != BUF_PT (buf))
9609 {
9610 struct buffer *oldb = current_buffer;
9611
9612 current_buffer = buf;
9613 move_gap_both (PT, PT_BYTE);
9614 current_buffer = oldb;
9615 }
9616 if (BUF_GAP_SIZE (buf) < nbytes)
9617 make_gap_1 (buf, nbytes);
9618 return BUF_GPT_ADDR (buf);
9619 }
9620
9621
9622
9623
9624
9625
9626
9627 static unsigned char *
9628 get_char_bytes (int c, int *len)
9629 {
9630
9631
9632
9633 static int chars[2];
9634 static unsigned char bytes[2][6];
9635 static int nbytes[2];
9636 static int last_index;
9637
9638 if (chars[last_index] == c)
9639 {
9640 *len = nbytes[last_index];
9641 return bytes[last_index];
9642 }
9643 if (chars[1 - last_index] == c)
9644 {
9645 *len = nbytes[1 - last_index];
9646 return bytes[1 - last_index];
9647 }
9648 last_index = 1 - last_index;
9649 chars[last_index] = c;
9650 *len = nbytes[last_index] = CHAR_STRING (c, bytes[last_index]);
9651 return bytes[last_index];
9652 }
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716 Lisp_Object
9717 encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
9718 bool nocopy, Lisp_Object handle_8_bit,
9719 Lisp_Object handle_over_uni)
9720 {
9721 ptrdiff_t nchars = SCHARS (string), nbytes = SBYTES (string);
9722 if (NILP (buffer) && nchars == nbytes && nocopy)
9723
9724 return string;
9725
9726 ptrdiff_t num_8_bit = 0;
9727
9728 ptrdiff_t num_over_4 = 0;
9729 ptrdiff_t num_over_5 = 0;
9730 ptrdiff_t outbytes;
9731 unsigned char *p = SDATA (string);
9732 unsigned char *pend = p + nbytes;
9733 unsigned char *src = NULL, *dst = NULL;
9734 unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL;
9735 int replace_8_bit_len = 0, replace_over_uni_len = 0;
9736 Lisp_Object val;
9737
9738
9739
9740
9741
9742 for (int scan_count = 0; scan_count < 2; scan_count++)
9743 {
9744 while (p < pend)
9745 {
9746 if (nchars == pend - p)
9747
9748 break;
9749
9750 int c = *p;
9751 int len = BYTES_BY_CHAR_HEAD (c);
9752
9753 nchars--;
9754 if (len == 1
9755 || len == 3
9756 || (len == 2 ? ! CHAR_BYTE8_HEAD_P (c)
9757 : (EQ (handle_over_uni, Qt)
9758 || (len == 4
9759 && STRING_CHAR (p) <= MAX_UNICODE_CHAR))))
9760 {
9761 p += len;
9762 continue;
9763 }
9764
9765
9766
9767 if (len == 2)
9768 {
9769
9770 if (scan_count == 0)
9771 {
9772 if (NILP (handle_8_bit))
9773 return Qnil;
9774 num_8_bit++;
9775 }
9776 else
9777 {
9778 if (src < p)
9779 {
9780 memcpy (dst, src, p - src);
9781 dst += p - src;
9782 }
9783 if (replace_8_bit_len > 0)
9784 {
9785 memcpy (dst, replace_8_bit, replace_8_bit_len);
9786 dst += replace_8_bit_len;
9787 }
9788 else if (EQ (handle_8_bit, Qt))
9789 {
9790 int char8 = STRING_CHAR (p);
9791 *dst++ = CHAR_TO_BYTE8 (char8);
9792 }
9793 }
9794 }
9795 else
9796 {
9797
9798 if (scan_count == 0)
9799 {
9800 if (NILP (handle_over_uni))
9801 return Qnil;
9802 if (len == 4)
9803 num_over_4++;
9804 else
9805 num_over_5++;
9806 }
9807 else
9808 {
9809 if (src < p)
9810 {
9811 memcpy (dst, src, p - src);
9812 dst += p - src;
9813 }
9814 if (replace_over_uni_len > 0)
9815 {
9816 memcpy (dst, replace_over_uni, replace_over_uni_len);
9817 dst += replace_over_uni_len;
9818 }
9819 }
9820 }
9821 p += len;
9822 src = p;
9823 }
9824
9825 if (scan_count == 0)
9826 {
9827
9828 outbytes = nbytes;
9829 if (num_8_bit == 0
9830 && (num_over_4 + num_over_5 == 0 || EQ (handle_over_uni, Qt)))
9831 {
9832
9833
9834
9835 scan_count = 1;
9836 }
9837 else
9838 {
9839
9840
9841 if (num_8_bit > 0)
9842 {
9843 if (CHARACTERP (handle_8_bit))
9844 replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit),
9845 &replace_8_bit_len);
9846 else if (STRINGP (handle_8_bit))
9847 {
9848 replace_8_bit = SDATA (handle_8_bit);
9849 replace_8_bit_len = SBYTES (handle_8_bit);
9850 }
9851 if (replace_8_bit)
9852 outbytes += (replace_8_bit_len - 2) * num_8_bit;
9853 else if (EQ (handle_8_bit, Qignored))
9854 outbytes -= 2 * num_8_bit;
9855 else if (EQ (handle_8_bit, Qt))
9856 outbytes -= num_8_bit;
9857 else
9858 return Qnil;
9859 }
9860 if (num_over_4 + num_over_5 > 0)
9861 {
9862 if (CHARACTERP (handle_over_uni))
9863 replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni),
9864 &replace_over_uni_len);
9865 else if (STRINGP (handle_over_uni))
9866 {
9867 replace_over_uni = SDATA (handle_over_uni);
9868 replace_over_uni_len = SBYTES (handle_over_uni);
9869 }
9870 if (num_over_4 > 0)
9871 {
9872 if (replace_over_uni)
9873 outbytes += (replace_over_uni_len - 4) * num_over_4;
9874 else if (EQ (handle_over_uni, Qignored))
9875 outbytes -= 4 * num_over_4;
9876 else if (! EQ (handle_over_uni, Qt))
9877 return Qnil;
9878 }
9879 if (num_over_5 > 0)
9880 {
9881 if (replace_over_uni)
9882 outbytes += (replace_over_uni_len - 5) * num_over_5;
9883 else if (EQ (handle_over_uni, Qignored))
9884 outbytes -= 5 * num_over_5;
9885 else if (! EQ (handle_over_uni, Qt))
9886 return Qnil;
9887 }
9888 }
9889 }
9890
9891
9892 if (BUFFERP (buffer))
9893 {
9894 val = make_fixnum (outbytes);
9895 dst = get_buffer_gap_address (buffer, nbytes);
9896 }
9897 else
9898 {
9899 if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0)
9900 return string;
9901 val = make_uninit_string (outbytes);
9902 dst = SDATA (val);
9903 }
9904 p = src = SDATA (string);
9905 }
9906 }
9907
9908 if (src < pend)
9909 memcpy (dst, src, pend - src);
9910 if (BUFFERP (buffer))
9911 {
9912 struct buffer *oldb = current_buffer;
9913
9914 current_buffer = XBUFFER (buffer);
9915 insert_from_gap (outbytes, outbytes, false);
9916 current_buffer = oldb;
9917 }
9918 return val;
9919 }
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938
9939
9940
9941
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992 Lisp_Object
9993 decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
9994 Lisp_Object buffer, bool nocopy,
9995 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni)
9996 {
9997
9998
9999 #define UTF_8_SEQUENCE_LENGTH(c) \
10000 ((c) < 0xC2 ? 0 \
10001 : (c) < 0xE0 ? 2 \
10002 : (c) < 0xF0 ? 3 \
10003 : (c) < 0xF8 ? 4 \
10004 : (c) == 0xF8 ? 5 \
10005 : 0)
10006
10007 ptrdiff_t nbytes = STRINGP (string) ? SBYTES (string) : str_len;
10008 unsigned char *p = STRINGP (string) ? SDATA (string) : (unsigned char *) str;
10009 unsigned char *str_orig = p;
10010 unsigned char *pend = p + nbytes;
10011 ptrdiff_t num_8_bit = 0;
10012 ptrdiff_t num_over_4 = 0;
10013 ptrdiff_t num_over_5 = 0;
10014 ptrdiff_t outbytes = nbytes;
10015 ptrdiff_t outchars = 0;
10016 unsigned char *src = NULL, *dst = NULL;
10017 bool change_byte_sequence = false;
10018
10019
10020
10021
10022
10023 while (p < pend)
10024 {
10025 src = p;
10026
10027 while (p < pend && *p < 0x80) p++;
10028 outchars += (p - src);
10029 if (p == pend)
10030 break;
10031 int c = *p;
10032 outchars++;
10033 int len = UTF_8_SEQUENCE_LENGTH (c);
10034
10035 if (UTF_8_EXTRA_OCTET_P (p[1])
10036 && (len == 2
10037 || (UTF_8_EXTRA_OCTET_P (p[2])
10038 && (len == 3
10039 || (UTF_8_EXTRA_OCTET_P (p[3])
10040 && len == 4
10041 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)))))
10042 {
10043 p += len;
10044 continue;
10045 }
10046
10047
10048 if (len == 0)
10049 {
10050 if (NILP (handle_8_bit))
10051 return Qnil;
10052 num_8_bit++;
10053 len = 1;
10054 }
10055 else
10056 {
10057 if (NILP (handle_over_uni))
10058 return Qnil;
10059 if (len == 4)
10060 num_over_4++;
10061 else
10062 num_over_5++;
10063 }
10064 change_byte_sequence = true;
10065 p += len;
10066 }
10067
10068 Lisp_Object val;
10069
10070 if (! change_byte_sequence
10071 && NILP (buffer))
10072 {
10073 if (nocopy && STRINGP (string))
10074 return string;
10075 val = make_uninit_multibyte_string (outchars, outbytes);
10076 memcpy (SDATA (val), str_orig, pend - str_orig);
10077 return val;
10078 }
10079
10080
10081 unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL;
10082 int replace_8_bit_len = 0, replace_over_uni_len = 0;
10083
10084 if (change_byte_sequence)
10085 {
10086 if (num_8_bit > 0)
10087 {
10088 if (CHARACTERP (handle_8_bit))
10089 replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit),
10090 &replace_8_bit_len);
10091 else if (STRINGP (handle_8_bit))
10092 {
10093 replace_8_bit = SDATA (handle_8_bit);
10094 replace_8_bit_len = SBYTES (handle_8_bit);
10095 }
10096 if (replace_8_bit)
10097 outbytes += (replace_8_bit_len - 1) * num_8_bit;
10098 else if (EQ (handle_8_bit, Qignored))
10099 {
10100 outbytes -= num_8_bit;
10101 outchars -= num_8_bit;
10102 }
10103 else
10104 outbytes += num_8_bit;
10105 }
10106 else if (num_over_4 + num_over_5 > 0)
10107 {
10108 if (CHARACTERP (handle_over_uni))
10109 replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni),
10110 &replace_over_uni_len);
10111 else if (STRINGP (handle_over_uni))
10112 {
10113 replace_over_uni = SDATA (handle_over_uni);
10114 replace_over_uni_len = SBYTES (handle_over_uni);
10115 }
10116 if (num_over_4 > 0)
10117 {
10118 if (replace_over_uni)
10119 outbytes += (replace_over_uni_len - 4) * num_over_4;
10120 else if (EQ (handle_over_uni, Qignored))
10121 {
10122 outbytes -= 4 * num_over_4;
10123 outchars -= num_over_4;
10124 }
10125 }
10126 if (num_over_5 > 0)
10127 {
10128 if (replace_over_uni)
10129 outbytes += (replace_over_uni_len - 5) * num_over_5;
10130 else if (EQ (handle_over_uni, Qignored))
10131 {
10132 outbytes -= 5 * num_over_5;
10133 outchars -= num_over_5;
10134 }
10135 }
10136 }
10137 }
10138
10139
10140 if (BUFFERP (buffer))
10141 {
10142 val = make_fixnum (outchars);
10143 dst = get_buffer_gap_address (buffer, outbytes);
10144 }
10145 else
10146 {
10147 if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0
10148 && STRINGP (string))
10149 return string;
10150 val = make_uninit_multibyte_string (outchars, outbytes);
10151 dst = SDATA (val);
10152 }
10153
10154 src = str_orig;
10155 if (change_byte_sequence)
10156 {
10157 p = src;
10158 while (p < pend)
10159 {
10160
10161
10162
10163
10164 int c = *p;
10165 if (c < 0x80)
10166 {
10167 p++;
10168 continue;
10169 }
10170 int len = UTF_8_SEQUENCE_LENGTH (c);
10171 if (len > 1)
10172 {
10173 int mlen;
10174 for (mlen = 1; mlen < len && UTF_8_EXTRA_OCTET_P (p[mlen]);
10175 mlen++);
10176 if (mlen == len
10177 && (len <= 3
10178 || (len == 4 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)
10179 || EQ (handle_over_uni, Qt)))
10180 {
10181 p += len;
10182 continue;
10183 }
10184 }
10185
10186 if (src < p)
10187 {
10188 memcpy (dst, src, p - src);
10189 dst += p - src;
10190 }
10191 if (len == 0)
10192 {
10193 if (replace_8_bit)
10194 {
10195 memcpy (dst, replace_8_bit, replace_8_bit_len);
10196 dst += replace_8_bit_len;
10197 }
10198 else if (EQ (handle_8_bit, Qt))
10199 {
10200 dst += BYTE8_STRING (c, dst);
10201 }
10202 len = 1;
10203 }
10204 else
10205 {
10206
10207 if (replace_over_uni)
10208 {
10209 memcpy (dst, replace_over_uni, replace_over_uni_len);
10210 dst += replace_over_uni_len;
10211 }
10212 }
10213 p += len;
10214 src = p;
10215 }
10216 }
10217
10218 if (src < pend)
10219 memcpy (dst, src, pend - src);
10220 if (BUFFERP (buffer))
10221 {
10222 struct buffer *oldb = current_buffer;
10223
10224 current_buffer = XBUFFER (buffer);
10225 insert_from_gap (outchars, outbytes, false);
10226 current_buffer = oldb;
10227 }
10228 return val;
10229 }
10230
10231
10232
10233 #ifdef ENABLE_UTF_8_CONVERTER_TEST
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245 DEFUN ("internal-encode-string-utf-8", Finternal_encode_string_utf_8,
10246 Sinternal_encode_string_utf_8, 7, 7, 0,
10247 doc: )
10248 (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy,
10249 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni,
10250 Lisp_Object encode_method, Lisp_Object count)
10251 {
10252 int repeat_count;
10253 Lisp_Object val;
10254
10255
10256 if (! STRINGP (string))
10257 return Qnil;
10258 if (! NILP (buffer)
10259 && (! BUFFERP (buffer)
10260 || ! NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters))))
10261 return Qnil;
10262 if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt)
10263 && ! EQ (handle_8_bit, Qignored)
10264 && ! CHARACTERP (handle_8_bit)
10265 && (! STRINGP (handle_8_bit) || STRING_MULTIBYTE (handle_8_bit)))
10266 return Qnil;
10267 if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt)
10268 && ! EQ (handle_over_uni, Qignored)
10269 && ! CHARACTERP (handle_over_uni)
10270 && (! STRINGP (handle_over_uni) || STRING_MULTIBYTE (handle_over_uni)))
10271 return Qnil;
10272
10273 CHECK_FIXNUM (count);
10274 repeat_count = XFIXNUM (count);
10275
10276 val = Qnil;
10277
10278 if (NILP (encode_method))
10279 {
10280 for (int i = 0; i < repeat_count; i++)
10281 val = encode_string_utf_8 (string, buffer, ! NILP (nocopy),
10282 handle_8_bit, handle_over_uni);
10283 }
10284 else
10285 {
10286 for (int i = 0; i < repeat_count; i++)
10287 val = code_convert_string (string, Qutf_8_unix, Qnil, true,
10288 ! NILP (nocopy), true);
10289 }
10290 return val;
10291 }
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301 DEFUN ("internal-decode-string-utf-8", Finternal_decode_string_utf_8,
10302 Sinternal_decode_string_utf_8, 7, 7, 0,
10303 doc: )
10304 (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy,
10305 Lisp_Object handle_8_bit, Lisp_Object handle_over_uni,
10306 Lisp_Object decode_method, Lisp_Object count)
10307 {
10308 int repeat_count;
10309 Lisp_Object val;
10310
10311
10312 if (! STRINGP (string))
10313 return Qnil;
10314 if (! NILP (buffer)
10315 && (! BUFFERP (buffer)
10316 || NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters))))
10317 return Qnil;
10318 if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt)
10319 && ! EQ (handle_8_bit, Qignored)
10320 && ! CHARACTERP (handle_8_bit)
10321 && (! STRINGP (handle_8_bit) || ! STRING_MULTIBYTE (handle_8_bit)))
10322 return Qnil;
10323 if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt)
10324 && ! EQ (handle_over_uni, Qignored)
10325 && ! CHARACTERP (handle_over_uni)
10326 && (! STRINGP (handle_over_uni) || ! STRING_MULTIBYTE (handle_over_uni)))
10327 return Qnil;
10328
10329 CHECK_FIXNUM (count);
10330 repeat_count = XFIXNUM (count);
10331
10332 val = Qnil;
10333
10334 if (NILP (decode_method))
10335 {
10336 for (int i = 0; i < repeat_count; i++)
10337 val = decode_string_utf_8 (string, buffer, ! NILP (nocopy),
10338 handle_8_bit, handle_over_uni);
10339 }
10340 else if (EQ (decode_method, Qt))
10341 {
10342 if (! BUFFERP (buffer))
10343 buffer = Qt;
10344 for (int i = 0; i < repeat_count; i++)
10345 val = code_convert_string (string, Qutf_8_unix, buffer, false,
10346 ! NILP (nocopy), true);
10347 }
10348 else if (! NILP (decode_method))
10349 {
10350 for (int i = 0; i < repeat_count; i++)
10351 val = make_string_from_utf8 ((char *) SDATA (string), SBYTES (string));
10352 }
10353 return val;
10354 }
10355
10356 #endif
10357
10358
10359
10360
10361 static Lisp_Object
10362 convert_string_nocopy (Lisp_Object string, Lisp_Object coding_system,
10363 bool encodep)
10364 {
10365 return code_convert_string (string, coding_system, Qt, encodep, 1, 1);
10366 }
10367
10368
10369
10370 Lisp_Object
10371 decode_file_name (Lisp_Object fname)
10372 {
10373 #ifdef WINDOWSNT
10374
10375
10376
10377 if (! NILP (Fcoding_system_p (Qutf_8)))
10378 return convert_string_nocopy (fname, Qutf_8, 0);
10379 return fname;
10380 #else
10381 if (! NILP (Vfile_name_coding_system))
10382 return convert_string_nocopy (fname, Vfile_name_coding_system, 0);
10383 else if (! NILP (Vdefault_file_name_coding_system))
10384 return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 0);
10385 else
10386 return fname;
10387 #endif
10388 }
10389
10390 static Lisp_Object
10391 encode_file_name_1 (Lisp_Object fname)
10392 {
10393
10394
10395
10396
10397 if (!STRING_MULTIBYTE (fname))
10398 return fname;
10399 #ifdef WINDOWSNT
10400
10401
10402
10403 if (! NILP (Fcoding_system_p (Qutf_8)))
10404 return convert_string_nocopy (fname, Qutf_8, 1);
10405 return fname;
10406 #else
10407 if (! NILP (Vfile_name_coding_system))
10408 return convert_string_nocopy (fname, Vfile_name_coding_system, 1);
10409 else if (! NILP (Vdefault_file_name_coding_system))
10410 return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 1);
10411 else
10412 return fname;
10413 #endif
10414 }
10415
10416 Lisp_Object
10417 encode_file_name (Lisp_Object fname)
10418 {
10419 Lisp_Object encoded = encode_file_name_1 (fname);
10420
10421
10422
10423
10424 CHECK_STRING_NULL_BYTES (encoded);
10425 return encoded;
10426 }
10427
10428 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
10429 2, 4, 0,
10430 doc:
10431
10432
10433
10434
10435
10436
10437
10438
10439
10440
10441
10442
10443 )
10444 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
10445 {
10446 return code_convert_string (string, coding_system, buffer,
10447 0, ! NILP (nocopy), 0);
10448 }
10449
10450 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
10451 2, 4, 0,
10452 doc:
10453
10454
10455
10456
10457
10458
10459
10460
10461
10462
10463 )
10464 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
10465 {
10466 return code_convert_string (string, coding_system, buffer,
10467 1, ! NILP (nocopy), 0);
10468 }
10469
10470
10471 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
10472 doc:
10473 )
10474 (Lisp_Object code)
10475 {
10476 Lisp_Object spec, attrs, val;
10477 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
10478 EMACS_INT ch;
10479 int c;
10480
10481 CHECK_FIXNAT (code);
10482 ch = XFIXNAT (code);
10483 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
10484 attrs = AREF (spec, 0);
10485
10486 if (ASCII_CHAR_P (ch)
10487 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10488 return code;
10489
10490 val = CODING_ATTR_CHARSET_LIST (attrs);
10491 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10492 charset_kana = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10493 charset_kanji = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
10494
10495 if (ch <= 0x7F)
10496 {
10497 c = ch;
10498 charset = charset_roman;
10499 }
10500 else if (ch >= 0xA0 && ch < 0xDF)
10501 {
10502 c = ch - 0x80;
10503 charset = charset_kana;
10504 }
10505 else
10506 {
10507 EMACS_INT c1 = ch >> 8;
10508 int c2 = ch & 0xFF;
10509
10510 if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
10511 || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
10512 error ("Invalid code: %"pI"d", ch);
10513 c = ch;
10514 SJIS_TO_JIS (c);
10515 charset = charset_kanji;
10516 }
10517 c = DECODE_CHAR (charset, c);
10518 if (c < 0)
10519 error ("Invalid code: %"pI"d", ch);
10520 return make_fixnum (c);
10521 }
10522
10523
10524 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
10525 doc:
10526 )
10527 (Lisp_Object ch)
10528 {
10529 Lisp_Object spec, attrs, charset_list;
10530 int c;
10531 struct charset *charset;
10532 unsigned code;
10533
10534 CHECK_CHARACTER (ch);
10535 c = XFIXNAT (ch);
10536 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
10537 attrs = AREF (spec, 0);
10538
10539 if (ASCII_CHAR_P (c)
10540 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10541 return ch;
10542
10543 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
10544 charset = char_charset (c, charset_list, &code);
10545 if (code == CHARSET_INVALID_CODE (charset))
10546 error ("Can't encode by shift_jis encoding: %c", c);
10547 JIS_TO_SJIS (code);
10548
10549 return make_fixnum (code);
10550 }
10551
10552 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
10553 doc:
10554 )
10555 (Lisp_Object code)
10556 {
10557 Lisp_Object spec, attrs, val;
10558 struct charset *charset_roman, *charset_big5, *charset;
10559 EMACS_INT ch;
10560 int c;
10561
10562 CHECK_FIXNAT (code);
10563 ch = XFIXNAT (code);
10564 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
10565 attrs = AREF (spec, 0);
10566
10567 if (ASCII_CHAR_P (ch)
10568 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10569 return code;
10570
10571 val = CODING_ATTR_CHARSET_LIST (attrs);
10572 charset_roman = CHARSET_FROM_ID (XFIXNUM (XCAR (val))), val = XCDR (val);
10573 charset_big5 = CHARSET_FROM_ID (XFIXNUM (XCAR (val)));
10574
10575 if (ch <= 0x7F)
10576 {
10577 c = ch;
10578 charset = charset_roman;
10579 }
10580 else
10581 {
10582 EMACS_INT b1 = ch >> 8;
10583 int b2 = ch & 0x7F;
10584 if (b1 < 0xA1 || b1 > 0xFE
10585 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
10586 error ("Invalid code: %"pI"d", ch);
10587 c = ch;
10588 charset = charset_big5;
10589 }
10590 c = DECODE_CHAR (charset, c);
10591 if (c < 0)
10592 error ("Invalid code: %"pI"d", ch);
10593 return make_fixnum (c);
10594 }
10595
10596 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
10597 doc:
10598 )
10599 (Lisp_Object ch)
10600 {
10601 Lisp_Object spec, attrs, charset_list;
10602 struct charset *charset;
10603 int c;
10604 unsigned code;
10605
10606 CHECK_CHARACTER (ch);
10607 c = XFIXNAT (ch);
10608 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
10609 attrs = AREF (spec, 0);
10610 if (ASCII_CHAR_P (c)
10611 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
10612 return ch;
10613
10614 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
10615 charset = char_charset (c, charset_list, &code);
10616 if (code == CHARSET_INVALID_CODE (charset))
10617 error ("Can't encode by Big5 encoding: %c", c);
10618
10619 return make_fixnum (code);
10620 }
10621
10622
10623 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
10624 Sset_terminal_coding_system_internal, 1, 2, 0,
10625 doc: )
10626 (Lisp_Object coding_system, Lisp_Object terminal)
10627 {
10628 struct terminal *term = decode_live_terminal (terminal);
10629 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
10630 CHECK_SYMBOL (coding_system);
10631 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
10632
10633 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
10634
10635 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10636 terminal_coding->src_multibyte = 1;
10637 terminal_coding->dst_multibyte = 0;
10638 tset_charset_list
10639 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
10640 ? coding_charset_list (terminal_coding)
10641 : list1i (charset_ascii)));
10642 return Qnil;
10643 }
10644
10645 DEFUN ("set-safe-terminal-coding-system-internal",
10646 Fset_safe_terminal_coding_system_internal,
10647 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
10648 doc: )
10649 (Lisp_Object coding_system)
10650 {
10651 CHECK_SYMBOL (coding_system);
10652 setup_coding_system (Fcheck_coding_system (coding_system),
10653 &safe_terminal_coding);
10654
10655 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10656 safe_terminal_coding.src_multibyte = 1;
10657 safe_terminal_coding.dst_multibyte = 0;
10658 return Qnil;
10659 }
10660
10661 DEFUN ("terminal-coding-system", Fterminal_coding_system,
10662 Sterminal_coding_system, 0, 1, 0,
10663 doc:
10664
10665 )
10666 (Lisp_Object terminal)
10667 {
10668 struct coding_system *terminal_coding
10669 = TERMINAL_TERMINAL_CODING (decode_live_terminal (terminal));
10670 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
10671
10672
10673 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
10674 }
10675
10676 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
10677 Sset_keyboard_coding_system_internal, 1, 2, 0,
10678 doc: )
10679 (Lisp_Object coding_system, Lisp_Object terminal)
10680 {
10681 struct terminal *t = decode_live_terminal (terminal);
10682 CHECK_SYMBOL (coding_system);
10683 if (NILP (coding_system))
10684 coding_system = Qno_conversion;
10685 else
10686 Fcheck_coding_system (coding_system);
10687 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
10688
10689 TERMINAL_KEYBOARD_CODING (t)->common_flags
10690 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
10691 return Qnil;
10692 }
10693
10694 DEFUN ("keyboard-coding-system",
10695 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
10696 doc: )
10697 (Lisp_Object terminal)
10698 {
10699 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
10700 (decode_live_terminal (terminal))->id);
10701 }
10702
10703
10704 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
10705 Sfind_operation_coding_system, 1, MANY, 0,
10706 doc:
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744 )
10745 (ptrdiff_t nargs, Lisp_Object *args)
10746 {
10747 Lisp_Object operation, target_idx, target, val;
10748 register Lisp_Object chain;
10749
10750 if (nargs < 2)
10751 error ("Too few arguments");
10752 operation = args[0];
10753 if (!SYMBOLP (operation)
10754 || (target_idx = Fget (operation, Qtarget_idx), !FIXNATP (target_idx)))
10755 error ("Invalid first argument");
10756 if (nargs <= 1 + XFIXNAT (target_idx))
10757 error ("Too few arguments for operation `%s'",
10758 SDATA (SYMBOL_NAME (operation)));
10759 target = args[XFIXNAT (target_idx) + 1];
10760 if (!(STRINGP (target)
10761 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
10762 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
10763 || (EQ (operation, Qopen_network_stream)
10764 && (FIXNUMP (target) || EQ (target, Qt)))))
10765 error ("Invalid argument %"pI"d of operation `%s'",
10766 XFIXNAT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
10767 if (CONSP (target))
10768 target = XCAR (target);
10769
10770 chain = ((EQ (operation, Qinsert_file_contents)
10771 || EQ (operation, Qwrite_region))
10772 ? Vfile_coding_system_alist
10773 : (EQ (operation, Qopen_network_stream)
10774 ? Vnetwork_coding_system_alist
10775 : Vprocess_coding_system_alist));
10776 if (NILP (chain))
10777 return Qnil;
10778
10779 for (; CONSP (chain); chain = XCDR (chain))
10780 {
10781 Lisp_Object elt;
10782
10783 elt = XCAR (chain);
10784 if (CONSP (elt)
10785 && ((STRINGP (target)
10786 && STRINGP (XCAR (elt))
10787 && fast_string_match (XCAR (elt), target) >= 0)
10788 || (FIXNUMP (target) && BASE_EQ (target, XCAR (elt)))))
10789 {
10790 val = XCDR (elt);
10791
10792
10793 if (CONSP (val))
10794 return val;
10795 if (! SYMBOLP (val))
10796 return Qnil;
10797 if (! NILP (Fcoding_system_p (val)))
10798 return Fcons (val, val);
10799 if (! NILP (Ffboundp (val)))
10800 {
10801
10802
10803
10804 val = call1 (val, Flist (nargs, args));
10805 if (CONSP (val))
10806 return val;
10807 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
10808 return Fcons (val, val);
10809 }
10810 return Qnil;
10811 }
10812 }
10813 return Qnil;
10814 }
10815
10816 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
10817 Sset_coding_system_priority, 0, MANY, 0,
10818 doc:
10819
10820
10821
10822 )
10823 (ptrdiff_t nargs, Lisp_Object *args)
10824 {
10825 ptrdiff_t i, j;
10826 bool changed[coding_category_max];
10827 enum coding_category priorities[coding_category_max];
10828
10829 memset (changed, 0, sizeof changed);
10830
10831 for (i = j = 0; i < nargs; i++)
10832 {
10833 enum coding_category category;
10834 Lisp_Object spec, attrs;
10835
10836 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
10837 attrs = AREF (spec, 0);
10838 category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
10839 if (changed[category])
10840
10841
10842 continue;
10843 changed[category] = 1;
10844 priorities[j++] = category;
10845 if (coding_categories[category].id >= 0
10846 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
10847 setup_coding_system (args[i], &coding_categories[category]);
10848 Fset (AREF (Vcoding_category_table, category), args[i]);
10849 }
10850
10851
10852
10853
10854 for (i = j, j = 0; i < coding_category_max; i++, j++)
10855 {
10856 while (j < coding_category_max
10857 && changed[coding_priorities[j]])
10858 j++;
10859 if (j == coding_category_max)
10860 emacs_abort ();
10861 priorities[i] = coding_priorities[j];
10862 }
10863
10864 memcpy (coding_priorities, priorities, sizeof priorities);
10865
10866
10867 Vcoding_category_list = Qnil;
10868 for (i = coding_category_max; i-- > 0; )
10869 Vcoding_category_list
10870 = Fcons (AREF (Vcoding_category_table, priorities[i]),
10871 Vcoding_category_list);
10872
10873 return Qnil;
10874 }
10875
10876 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
10877 Scoding_system_priority_list, 0, 1, 0,
10878 doc:
10879
10880
10881
10882 )
10883 (Lisp_Object highestp)
10884 {
10885 int i;
10886 Lisp_Object val;
10887
10888 for (i = 0, val = Qnil; i < coding_category_max; i++)
10889 {
10890 enum coding_category category = coding_priorities[i];
10891 int id = coding_categories[category].id;
10892 Lisp_Object attrs;
10893
10894 if (id < 0)
10895 continue;
10896 attrs = CODING_ID_ATTRS (id);
10897 if (! NILP (highestp))
10898 return CODING_ATTR_BASE_NAME (attrs);
10899 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
10900 }
10901 return Fnreverse (val);
10902 }
10903
10904 static Lisp_Object
10905 make_subsidiaries (Lisp_Object base)
10906 {
10907 static char const suffixes[][8] = { "-unix", "-dos", "-mac" };
10908 ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
10909 USE_SAFE_ALLOCA;
10910 char *buf = SAFE_ALLOCA (base_name_len + 6);
10911
10912 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
10913 Lisp_Object subsidiaries = make_nil_vector (3);
10914 for (int i = 0; i < 3; i++)
10915 {
10916 strcpy (buf + base_name_len, suffixes[i]);
10917 ASET (subsidiaries, i, intern (buf));
10918 }
10919 SAFE_FREE ();
10920 return subsidiaries;
10921 }
10922
10923
10924 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
10925 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
10926 doc:
10927 )
10928 (ptrdiff_t nargs, Lisp_Object *args)
10929 {
10930 enum coding_category category;
10931 int max_charset_id = 0;
10932
10933 if (nargs < coding_arg_max)
10934 goto short_args;
10935
10936 Lisp_Object attrs = make_nil_vector (coding_attr_last_index);
10937
10938 Lisp_Object name = args[coding_arg_name];
10939 CHECK_SYMBOL (name);
10940 ASET (attrs, coding_attr_base_name, name);
10941
10942 Lisp_Object val = args[coding_arg_mnemonic];
10943
10944 if (STRINGP (val))
10945 val = make_fixnum (STRING_CHAR (SDATA (val)));
10946 else
10947 CHECK_CHARACTER (val);
10948 ASET (attrs, coding_attr_mnemonic, val);
10949
10950 Lisp_Object coding_type = args[coding_arg_coding_type];
10951 CHECK_SYMBOL (coding_type);
10952 ASET (attrs, coding_attr_type, coding_type);
10953
10954 Lisp_Object charset_list = args[coding_arg_charset_list];
10955 if (SYMBOLP (charset_list))
10956 {
10957 if (EQ (charset_list, Qiso_2022))
10958 {
10959 if (! EQ (coding_type, Qiso_2022))
10960 error ("Invalid charset-list");
10961 charset_list = Viso_2022_charset_list;
10962 }
10963 else if (EQ (charset_list, Qemacs_mule))
10964 {
10965 if (! EQ (coding_type, Qemacs_mule))
10966 error ("Invalid charset-list");
10967 charset_list = Vemacs_mule_charset_list;
10968 }
10969 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
10970 {
10971 if (! RANGED_FIXNUMP (0, XCAR (tail), INT_MAX - 1))
10972 error ("Invalid charset-list");
10973 if (max_charset_id < XFIXNAT (XCAR (tail)))
10974 max_charset_id = XFIXNAT (XCAR (tail));
10975 }
10976 }
10977 else
10978 {
10979 charset_list = Fcopy_sequence (charset_list);
10980 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
10981 {
10982 struct charset *charset;
10983
10984 val = XCAR (tail);
10985 CHECK_CHARSET_GET_CHARSET (val, charset);
10986 if (EQ (coding_type, Qiso_2022)
10987 ? CHARSET_ISO_FINAL (charset) < 0
10988 : EQ (coding_type, Qemacs_mule)
10989 ? CHARSET_EMACS_MULE_ID (charset) < 0
10990 : 0)
10991 error ("Can't handle charset `%s'",
10992 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10993
10994 XSETCAR (tail, make_fixnum (charset->id));
10995 if (max_charset_id < charset->id)
10996 max_charset_id = charset->id;
10997 }
10998 }
10999 ASET (attrs, coding_attr_charset_list, charset_list);
11000
11001 Lisp_Object safe_charsets = make_uninit_string (max_charset_id + 1);
11002 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
11003 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
11004 SSET (safe_charsets, XFIXNAT (XCAR (tail)), 0);
11005 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
11006
11007 ASET (attrs, coding_attr_ascii_compat, args[coding_arg_ascii_compatible_p]);
11008
11009 val = args[coding_arg_decode_translation_table];
11010 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11011 CHECK_SYMBOL (val);
11012 ASET (attrs, coding_attr_decode_tbl, val);
11013
11014 val = args[coding_arg_encode_translation_table];
11015 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11016 CHECK_SYMBOL (val);
11017 ASET (attrs, coding_attr_encode_tbl, val);
11018
11019 val = args[coding_arg_post_read_conversion];
11020 CHECK_SYMBOL (val);
11021 ASET (attrs, coding_attr_post_read, val);
11022
11023 val = args[coding_arg_pre_write_conversion];
11024 CHECK_SYMBOL (val);
11025 ASET (attrs, coding_attr_pre_write, val);
11026
11027 val = args[coding_arg_default_char];
11028 if (NILP (val))
11029 ASET (attrs, coding_attr_default_char, make_fixnum (' '));
11030 else
11031 {
11032 CHECK_CHARACTER (val);
11033 ASET (attrs, coding_attr_default_char, val);
11034 }
11035
11036 val = args[coding_arg_for_unibyte];
11037 ASET (attrs, coding_attr_for_unibyte, NILP (val) ? Qnil : Qt);
11038
11039 val = args[coding_arg_plist];
11040 CHECK_LIST (val);
11041 ASET (attrs, coding_attr_plist, val);
11042
11043 if (EQ (coding_type, Qcharset))
11044 {
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057 val = make_nil_vector (256);
11058
11059 for (Lisp_Object tail = charset_list; CONSP (tail); tail = XCDR (tail))
11060 {
11061 struct charset *charset = CHARSET_FROM_ID (XFIXNAT (XCAR (tail)));
11062 int dim = CHARSET_DIMENSION (charset);
11063 int idx = (dim - 1) * 4;
11064
11065 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11066 ASET (attrs, coding_attr_ascii_compat, Qt);
11067
11068 for (int i = charset->code_space[idx];
11069 i <= charset->code_space[idx + 1]; i++)
11070 {
11071 Lisp_Object tmp, tmp2;
11072 int dim2;
11073
11074 tmp = AREF (val, i);
11075 if (NILP (tmp))
11076 tmp = XCAR (tail);
11077 else if (FIXNATP (tmp))
11078 {
11079 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFIXNAT (tmp)));
11080 if (dim < dim2)
11081 tmp = list2 (XCAR (tail), tmp);
11082 else
11083 tmp = list2 (tmp, XCAR (tail));
11084 }
11085 else
11086 {
11087 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
11088 {
11089 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFIXNAT (XCAR (tmp2))));
11090 if (dim < dim2)
11091 break;
11092 }
11093 if (NILP (tmp2))
11094 tmp = nconc2 (tmp, list1 (XCAR (tail)));
11095 else
11096 {
11097 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
11098 XSETCAR (tmp2, XCAR (tail));
11099 }
11100 }
11101 ASET (val, i, tmp);
11102 }
11103 }
11104 ASET (attrs, coding_attr_charset_valids, val);
11105 category = coding_category_charset;
11106 }
11107 else if (EQ (coding_type, Qccl))
11108 {
11109 Lisp_Object valids;
11110
11111 if (nargs < coding_arg_ccl_max)
11112 goto short_args;
11113
11114 val = args[coding_arg_ccl_decoder];
11115 CHECK_CCL_PROGRAM (val);
11116 if (VECTORP (val))
11117 val = Fcopy_sequence (val);
11118 ASET (attrs, coding_attr_ccl_decoder, val);
11119
11120 val = args[coding_arg_ccl_encoder];
11121 CHECK_CCL_PROGRAM (val);
11122 if (VECTORP (val))
11123 val = Fcopy_sequence (val);
11124 ASET (attrs, coding_attr_ccl_encoder, val);
11125
11126 val = args[coding_arg_ccl_valids];
11127 valids = Fmake_string (make_fixnum (256), make_fixnum (0), Qnil);
11128 for (Lisp_Object tail = val; CONSP (tail); tail = XCDR (tail))
11129 {
11130 int from, to;
11131
11132 val = XCAR (tail);
11133 if (FIXNUMP (val))
11134 {
11135 if (! (0 <= XFIXNUM (val) && XFIXNUM (val) <= 255))
11136 args_out_of_range_3 (val, make_fixnum (0), make_fixnum (255));
11137 from = to = XFIXNUM (val);
11138 }
11139 else
11140 {
11141 CHECK_CONS (val);
11142 from = check_integer_range (XCAR (val), 0, 255);
11143 to = check_integer_range (XCDR (val), from, 255);
11144 }
11145 for (int i = from; i <= to; i++)
11146 SSET (valids, i, 1);
11147 }
11148 ASET (attrs, coding_attr_ccl_valids, valids);
11149
11150 category = coding_category_ccl;
11151 }
11152 else if (EQ (coding_type, Qutf_16))
11153 {
11154 Lisp_Object bom, endian;
11155
11156 ASET (attrs, coding_attr_ascii_compat, Qnil);
11157
11158 if (nargs < coding_arg_utf16_max)
11159 goto short_args;
11160
11161 bom = args[coding_arg_utf16_bom];
11162 if (! NILP (bom) && ! EQ (bom, Qt))
11163 {
11164 CHECK_CONS (bom);
11165 val = XCAR (bom);
11166 CHECK_CODING_SYSTEM (val);
11167 val = XCDR (bom);
11168 CHECK_CODING_SYSTEM (val);
11169 }
11170 ASET (attrs, coding_attr_utf_bom, bom);
11171
11172 endian = args[coding_arg_utf16_endian];
11173 CHECK_SYMBOL (endian);
11174 if (NILP (endian))
11175 endian = Qbig;
11176 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
11177 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
11178 ASET (attrs, coding_attr_utf_16_endian, endian);
11179
11180 category = (CONSP (bom)
11181 ? coding_category_utf_16_auto
11182 : NILP (bom)
11183 ? (EQ (endian, Qbig)
11184 ? coding_category_utf_16_be_nosig
11185 : coding_category_utf_16_le_nosig)
11186 : (EQ (endian, Qbig)
11187 ? coding_category_utf_16_be
11188 : coding_category_utf_16_le));
11189 }
11190 else if (EQ (coding_type, Qiso_2022))
11191 {
11192 Lisp_Object initial, reg_usage, request, flags;
11193
11194 if (nargs < coding_arg_iso2022_max)
11195 goto short_args;
11196
11197 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
11198 CHECK_VECTOR (initial);
11199 for (int i = 0; i < 4; i++)
11200 {
11201 val = AREF (initial, i);
11202 if (! NILP (val))
11203 {
11204 struct charset *charset;
11205
11206 CHECK_CHARSET_GET_CHARSET (val, charset);
11207 ASET (initial, i, make_fixnum (CHARSET_ID (charset)));
11208 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
11209 ASET (attrs, coding_attr_ascii_compat, Qt);
11210 }
11211 else
11212 ASET (initial, i, make_fixnum (-1));
11213 }
11214
11215 reg_usage = args[coding_arg_iso2022_reg_usage];
11216 CHECK_CONS (reg_usage);
11217 CHECK_FIXNUM (XCAR (reg_usage));
11218 CHECK_FIXNUM (XCDR (reg_usage));
11219
11220 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
11221 for (Lisp_Object tail = request; CONSP (tail); tail = XCDR (tail))
11222 {
11223 int id;
11224
11225 val = XCAR (tail);
11226 CHECK_CONS (val);
11227 CHECK_CHARSET_GET_ID (XCAR (val), id);
11228 check_integer_range (XCDR (val), 0, 3);
11229 XSETCAR (val, make_fixnum (id));
11230 }
11231
11232 flags = args[coding_arg_iso2022_flags];
11233 CHECK_FIXNAT (flags);
11234 int i = XFIXNUM (flags) & INT_MAX;
11235 if (EQ (args[coding_arg_charset_list], Qiso_2022))
11236 i |= CODING_ISO_FLAG_FULL_SUPPORT;
11237 flags = make_fixnum (i);
11238
11239 ASET (attrs, coding_attr_iso_initial, initial);
11240 ASET (attrs, coding_attr_iso_usage, reg_usage);
11241 ASET (attrs, coding_attr_iso_request, request);
11242 ASET (attrs, coding_attr_iso_flags, flags);
11243 setup_iso_safe_charsets (attrs);
11244
11245 if (i & CODING_ISO_FLAG_SEVEN_BITS)
11246 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
11247 | CODING_ISO_FLAG_SINGLE_SHIFT))
11248 ? coding_category_iso_7_else
11249 : EQ (args[coding_arg_charset_list], Qiso_2022)
11250 ? coding_category_iso_7
11251 : coding_category_iso_7_tight);
11252 else
11253 {
11254 int id = XFIXNUM (AREF (initial, 1));
11255
11256 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
11257 || EQ (args[coding_arg_charset_list], Qiso_2022)
11258 || id < 0)
11259 ? coding_category_iso_8_else
11260 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
11261 ? coding_category_iso_8_1
11262 : coding_category_iso_8_2);
11263 }
11264 if (category != coding_category_iso_8_1
11265 && category != coding_category_iso_8_2)
11266 ASET (attrs, coding_attr_ascii_compat, Qnil);
11267 }
11268 else if (EQ (coding_type, Qemacs_mule))
11269 {
11270 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
11271 ASET (attrs, coding_attr_emacs_mule_full, Qt);
11272 ASET (attrs, coding_attr_ascii_compat, Qt);
11273 category = coding_category_emacs_mule;
11274 }
11275 else if (EQ (coding_type, Qshift_jis))
11276 {
11277 ptrdiff_t charset_list_len = list_length (charset_list);
11278 if (charset_list_len != 3 && charset_list_len != 4)
11279 error ("There should be three or four charsets");
11280
11281 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11282 if (CHARSET_DIMENSION (charset) != 1)
11283 error ("Dimension of charset %s is not one",
11284 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11285 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11286 ASET (attrs, coding_attr_ascii_compat, Qt);
11287
11288 charset_list = XCDR (charset_list);
11289 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11290 if (CHARSET_DIMENSION (charset) != 1)
11291 error ("Dimension of charset %s is not one",
11292 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11293
11294 charset_list = XCDR (charset_list);
11295 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11296 if (CHARSET_DIMENSION (charset) != 2)
11297 error ("Dimension of charset %s is not two",
11298 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11299
11300 charset_list = XCDR (charset_list);
11301 if (! NILP (charset_list))
11302 {
11303 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11304 if (CHARSET_DIMENSION (charset) != 2)
11305 error ("Dimension of charset %s is not two",
11306 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11307 }
11308
11309 category = coding_category_sjis;
11310 Vsjis_coding_system = name;
11311 }
11312 else if (EQ (coding_type, Qbig5))
11313 {
11314 struct charset *charset;
11315
11316 if (list_length (charset_list) != 2)
11317 error ("There should be just two charsets");
11318
11319 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11320 if (CHARSET_DIMENSION (charset) != 1)
11321 error ("Dimension of charset %s is not one",
11322 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11323 if (CHARSET_ASCII_COMPATIBLE_P (charset))
11324 ASET (attrs, coding_attr_ascii_compat, Qt);
11325
11326 charset_list = XCDR (charset_list);
11327 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
11328 if (CHARSET_DIMENSION (charset) != 2)
11329 error ("Dimension of charset %s is not two",
11330 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
11331
11332 category = coding_category_big5;
11333 Vbig5_coding_system = name;
11334 }
11335 else if (EQ (coding_type, Qraw_text))
11336 {
11337 category = coding_category_raw_text;
11338 ASET (attrs, coding_attr_ascii_compat, Qt);
11339 }
11340 else if (EQ (coding_type, Qutf_8))
11341 {
11342 Lisp_Object bom;
11343
11344 if (nargs < coding_arg_utf8_max)
11345 goto short_args;
11346
11347 bom = args[coding_arg_utf8_bom];
11348 if (! NILP (bom) && ! EQ (bom, Qt))
11349 {
11350 CHECK_CONS (bom);
11351 val = XCAR (bom);
11352 CHECK_CODING_SYSTEM (val);
11353 val = XCDR (bom);
11354 CHECK_CODING_SYSTEM (val);
11355 }
11356 ASET (attrs, coding_attr_utf_bom, bom);
11357 if (NILP (bom))
11358 ASET (attrs, coding_attr_ascii_compat, Qt);
11359
11360 category = (CONSP (bom) ? coding_category_utf_8_auto
11361 : NILP (bom) ? coding_category_utf_8_nosig
11362 : coding_category_utf_8_sig);
11363 }
11364 else if (EQ (coding_type, Qundecided))
11365 {
11366 if (nargs < coding_arg_undecided_max)
11367 goto short_args;
11368 ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
11369 args[coding_arg_undecided_inhibit_null_byte_detection]);
11370 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
11371 args[coding_arg_undecided_inhibit_iso_escape_detection]);
11372 ASET (attrs, coding_attr_undecided_prefer_utf_8,
11373 args[coding_arg_undecided_prefer_utf_8]);
11374 category = coding_category_undecided;
11375 }
11376 else
11377 error ("Invalid coding system type: %s",
11378 SDATA (SYMBOL_NAME (coding_type)));
11379
11380 ASET (attrs, coding_attr_category, make_fixnum (category));
11381 ASET (attrs, coding_attr_plist,
11382 Fcons (QCcategory,
11383 Fcons (AREF (Vcoding_category_table, category),
11384 CODING_ATTR_PLIST (attrs))));
11385 ASET (attrs, coding_attr_plist,
11386 Fcons (QCascii_compatible_p,
11387 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
11388 CODING_ATTR_PLIST (attrs))));
11389
11390 Lisp_Object eol_type = args[coding_arg_eol_type];
11391 if (! NILP (eol_type)
11392 && ! EQ (eol_type, Qunix)
11393 && ! EQ (eol_type, Qdos)
11394 && ! EQ (eol_type, Qmac))
11395 error ("Invalid eol-type");
11396
11397 Lisp_Object aliases = list1 (name);
11398
11399 if (NILP (eol_type))
11400 {
11401 eol_type = make_subsidiaries (name);
11402 for (int i = 0; i < 3; i++)
11403 {
11404 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
11405
11406 this_name = AREF (eol_type, i);
11407 this_aliases = list1 (this_name);
11408 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
11409 this_spec = make_uninit_vector (3);
11410 ASET (this_spec, 0, attrs);
11411 ASET (this_spec, 1, this_aliases);
11412 ASET (this_spec, 2, this_eol_type);
11413 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
11414 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
11415 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist, Qnil);
11416 if (NILP (val))
11417 Vcoding_system_alist
11418 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
11419 Vcoding_system_alist);
11420 }
11421 }
11422
11423 Lisp_Object spec_vec = make_uninit_vector (3);
11424 ASET (spec_vec, 0, attrs);
11425 ASET (spec_vec, 1, aliases);
11426 ASET (spec_vec, 2, eol_type);
11427
11428 Fputhash (name, spec_vec, Vcoding_system_hash_table);
11429 Vcoding_system_list = Fcons (name, Vcoding_system_list);
11430 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist, Qnil);
11431 if (NILP (val))
11432 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
11433 Vcoding_system_alist);
11434
11435 int id = coding_categories[category].id;
11436 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
11437 setup_coding_system (name, &coding_categories[category]);
11438
11439 return Qnil;
11440
11441 short_args:
11442 Fsignal (Qwrong_number_of_arguments,
11443 Fcons (intern ("define-coding-system-internal"),
11444 make_fixnum (nargs)));
11445 }
11446
11447
11448 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
11449 3, 3, 0,
11450 doc:
11451
11452
11453
11454
11455
11456
11457
11458
11459
11460
11461 )
11462 (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
11463 {
11464 Lisp_Object spec, attrs;
11465
11466 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11467 attrs = AREF (spec, 0);
11468 if (EQ (prop, QCmnemonic))
11469 {
11470
11471 if (STRINGP (val))
11472 val = make_fixnum (STRING_CHAR (SDATA (val)));
11473 else
11474 CHECK_CHARACTER (val);
11475 ASET (attrs, coding_attr_mnemonic, val);
11476 }
11477 else if (EQ (prop, QCdefault_char))
11478 {
11479 if (NILP (val))
11480 val = make_fixnum (' ');
11481 else
11482 CHECK_CHARACTER (val);
11483 ASET (attrs, coding_attr_default_char, val);
11484 }
11485 else if (EQ (prop, QCdecode_translation_table))
11486 {
11487 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11488 CHECK_SYMBOL (val);
11489 ASET (attrs, coding_attr_decode_tbl, val);
11490 }
11491 else if (EQ (prop, QCencode_translation_table))
11492 {
11493 if (! CHAR_TABLE_P (val) && ! CONSP (val))
11494 CHECK_SYMBOL (val);
11495 ASET (attrs, coding_attr_encode_tbl, val);
11496 }
11497 else if (EQ (prop, QCpost_read_conversion))
11498 {
11499 CHECK_SYMBOL (val);
11500 ASET (attrs, coding_attr_post_read, val);
11501 }
11502 else if (EQ (prop, QCpre_write_conversion))
11503 {
11504 CHECK_SYMBOL (val);
11505 ASET (attrs, coding_attr_pre_write, val);
11506 }
11507 else if (EQ (prop, QCascii_compatible_p))
11508 {
11509 ASET (attrs, coding_attr_ascii_compat, val);
11510 }
11511
11512 ASET (attrs, coding_attr_plist,
11513 plist_put (CODING_ATTR_PLIST (attrs), prop, val));
11514 return val;
11515 }
11516
11517
11518 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
11519 Sdefine_coding_system_alias, 2, 2, 0,
11520 doc: )
11521 (Lisp_Object alias, Lisp_Object coding_system)
11522 {
11523 Lisp_Object spec, aliases, eol_type, val;
11524
11525 CHECK_SYMBOL (alias);
11526 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11527 aliases = AREF (spec, 1);
11528
11529
11530
11531 while (!NILP (XCDR (aliases)))
11532 aliases = XCDR (aliases);
11533 XSETCDR (aliases, list1 (alias));
11534
11535 eol_type = AREF (spec, 2);
11536 if (VECTORP (eol_type))
11537 {
11538 Lisp_Object subsidiaries;
11539 int i;
11540
11541 subsidiaries = make_subsidiaries (alias);
11542 for (i = 0; i < 3; i++)
11543 Fdefine_coding_system_alias (AREF (subsidiaries, i),
11544 AREF (eol_type, i));
11545 }
11546
11547 Fputhash (alias, spec, Vcoding_system_hash_table);
11548 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
11549 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist, Qnil);
11550 if (NILP (val))
11551 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
11552 Vcoding_system_alist);
11553
11554 return Qnil;
11555 }
11556
11557 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
11558 1, 1, 0,
11559 doc:
11560 )
11561 (Lisp_Object coding_system)
11562 {
11563 Lisp_Object spec, attrs;
11564
11565 if (NILP (coding_system))
11566 return (Qno_conversion);
11567 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11568 attrs = AREF (spec, 0);
11569 return CODING_ATTR_BASE_NAME (attrs);
11570 }
11571
11572 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
11573 1, 1, 0,
11574 doc: )
11575 (Lisp_Object coding_system)
11576 {
11577 Lisp_Object spec, attrs;
11578
11579 if (NILP (coding_system))
11580 coding_system = Qno_conversion;
11581 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11582 attrs = AREF (spec, 0);
11583 return CODING_ATTR_PLIST (attrs);
11584 }
11585
11586
11587 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
11588 1, 1, 0,
11589 doc: )
11590 (Lisp_Object coding_system)
11591 {
11592 Lisp_Object spec;
11593
11594 if (NILP (coding_system))
11595 coding_system = Qno_conversion;
11596 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
11597 return AREF (spec, 1);
11598 }
11599
11600 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
11601 Scoding_system_eol_type, 1, 1, 0,
11602 doc:
11603
11604
11605
11606
11607
11608
11609
11610 )
11611 (Lisp_Object coding_system)
11612 {
11613 Lisp_Object spec, eol_type;
11614 int n;
11615
11616 if (NILP (coding_system))
11617 coding_system = Qno_conversion;
11618 if (! CODING_SYSTEM_P (coding_system))
11619 return Qnil;
11620 spec = CODING_SYSTEM_SPEC (coding_system);
11621 eol_type = AREF (spec, 2);
11622 if (VECTORP (eol_type))
11623 return Fcopy_sequence (eol_type);
11624 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
11625 return make_fixnum (n);
11626 }
11627
11628
11629
11630
11631 void
11632 init_coding_once (void)
11633 {
11634 int i;
11635
11636 for (i = 0; i < coding_category_max; i++)
11637 {
11638 coding_categories[i].id = -1;
11639 coding_priorities[i] = i;
11640 }
11641
11642 PDUMPER_REMEMBER_SCALAR (coding_categories);
11643 PDUMPER_REMEMBER_SCALAR (coding_priorities);
11644
11645
11646 for (i = 0; i < 0x20; i++)
11647 iso_code_class[i] = ISO_control_0;
11648 for (i = 0x21; i < 0x7F; i++)
11649 iso_code_class[i] = ISO_graphic_plane_0;
11650 for (i = 0x80; i < 0xA0; i++)
11651 iso_code_class[i] = ISO_control_1;
11652 for (i = 0xA1; i < 0xFF; i++)
11653 iso_code_class[i] = ISO_graphic_plane_1;
11654 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
11655 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
11656 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
11657 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
11658 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
11659 iso_code_class[ISO_CODE_ESC] = ISO_escape;
11660 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
11661 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
11662 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
11663
11664 PDUMPER_REMEMBER_SCALAR (iso_code_class);
11665
11666 for (i = 0; i < 256; i++)
11667 {
11668 emacs_mule_bytes[i] = 1;
11669 }
11670 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
11671 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
11672 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
11673 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
11674
11675 PDUMPER_REMEMBER_SCALAR (emacs_mule_bytes);
11676 }
11677
11678 static void reset_coding_after_pdumper_load (void);
11679
11680 void
11681 syms_of_coding (void)
11682 {
11683 staticpro (&Vcoding_system_hash_table);
11684 Vcoding_system_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
11685
11686 staticpro (&Vsjis_coding_system);
11687 Vsjis_coding_system = Qnil;
11688
11689 staticpro (&Vbig5_coding_system);
11690 Vbig5_coding_system = Qnil;
11691
11692 staticpro (&Vcode_conversion_reused_workbuf);
11693 Vcode_conversion_reused_workbuf = Qnil;
11694
11695 staticpro (&Vcode_conversion_workbuf_name);
11696 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
11697
11698 reused_workbuf_in_use = false;
11699 PDUMPER_REMEMBER_SCALAR (reused_workbuf_in_use);
11700
11701 DEFSYM (Qcharset, "charset");
11702 DEFSYM (Qtarget_idx, "target-idx");
11703 DEFSYM (Qcoding_system_history, "coding-system-history");
11704 Fset (Qcoding_system_history, Qnil);
11705
11706
11707 Fput (Qinsert_file_contents, Qtarget_idx, make_fixnum (0));
11708
11709 Fput (Qwrite_region, Qtarget_idx, make_fixnum (2));
11710
11711 DEFSYM (Qcall_process, "call-process");
11712
11713 Fput (Qcall_process, Qtarget_idx, make_fixnum (0));
11714
11715 DEFSYM (Qcall_process_region, "call-process-region");
11716
11717 Fput (Qcall_process_region, Qtarget_idx, make_fixnum (2));
11718
11719 DEFSYM (Qstart_process, "start-process");
11720
11721 Fput (Qstart_process, Qtarget_idx, make_fixnum (2));
11722
11723 DEFSYM (Qopen_network_stream, "open-network-stream");
11724
11725 Fput (Qopen_network_stream, Qtarget_idx, make_fixnum (3));
11726
11727 DEFSYM (Qunix, "unix");
11728 DEFSYM (Qdos, "dos");
11729 DEFSYM (Qmac, "mac");
11730
11731 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
11732 DEFSYM (Qundecided, "undecided");
11733 DEFSYM (Qno_conversion, "no-conversion");
11734 DEFSYM (Qraw_text, "raw-text");
11735 DEFSYM (Qus_ascii, "us-ascii");
11736
11737 DEFSYM (Qiso_2022, "iso-2022");
11738
11739 DEFSYM (Qutf_8, "utf-8");
11740 DEFSYM (Qutf_8_unix, "utf-8-unix");
11741 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
11742
11743 #if defined (WINDOWSNT) || defined (CYGWIN)
11744
11745 DEFSYM (Qutf_16le, "utf-16le");
11746 #endif
11747
11748 DEFSYM (Qutf_16, "utf-16");
11749 DEFSYM (Qbig, "big");
11750 DEFSYM (Qlittle, "little");
11751
11752 DEFSYM (Qshift_jis, "shift-jis");
11753 DEFSYM (Qbig5, "big5");
11754
11755 DEFSYM (Qcoding_system_p, "coding-system-p");
11756
11757
11758 DEFSYM (Qcoding_system_error, "coding-system-error");
11759 Fput (Qcoding_system_error, Qerror_conditions,
11760 pure_list (Qcoding_system_error, Qerror));
11761 Fput (Qcoding_system_error, Qerror_message,
11762 build_pure_c_string ("Invalid coding system"));
11763
11764 DEFSYM (Qtranslation_table, "translation-table");
11765 Fput (Qtranslation_table, Qchar_table_extra_slots, make_fixnum (2));
11766 DEFSYM (Qtranslation_table_id, "translation-table-id");
11767
11768
11769
11770 DEFSYM (Qemacs_mule, "emacs-mule");
11771
11772 DEFSYM (QCcategory, ":category");
11773 DEFSYM (QCmnemonic, ":mnemonic");
11774 DEFSYM (QCdefault_char, ":default-char");
11775 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
11776 DEFSYM (QCencode_translation_table, ":encode-translation-table");
11777 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
11778 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
11779 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
11780
11781 Vcoding_category_table = make_nil_vector (coding_category_max);
11782 staticpro (&Vcoding_category_table);
11783
11784 ASET (Vcoding_category_table, coding_category_iso_7,
11785 intern_c_string ("coding-category-iso-7"));
11786 ASET (Vcoding_category_table, coding_category_iso_7_tight,
11787 intern_c_string ("coding-category-iso-7-tight"));
11788 ASET (Vcoding_category_table, coding_category_iso_8_1,
11789 intern_c_string ("coding-category-iso-8-1"));
11790 ASET (Vcoding_category_table, coding_category_iso_8_2,
11791 intern_c_string ("coding-category-iso-8-2"));
11792 ASET (Vcoding_category_table, coding_category_iso_7_else,
11793 intern_c_string ("coding-category-iso-7-else"));
11794 ASET (Vcoding_category_table, coding_category_iso_8_else,
11795 intern_c_string ("coding-category-iso-8-else"));
11796 ASET (Vcoding_category_table, coding_category_utf_8_auto,
11797 intern_c_string ("coding-category-utf-8-auto"));
11798 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
11799 intern_c_string ("coding-category-utf-8"));
11800 ASET (Vcoding_category_table, coding_category_utf_8_sig,
11801 intern_c_string ("coding-category-utf-8-sig"));
11802 ASET (Vcoding_category_table, coding_category_utf_16_be,
11803 intern_c_string ("coding-category-utf-16-be"));
11804 ASET (Vcoding_category_table, coding_category_utf_16_auto,
11805 intern_c_string ("coding-category-utf-16-auto"));
11806 ASET (Vcoding_category_table, coding_category_utf_16_le,
11807 intern_c_string ("coding-category-utf-16-le"));
11808 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
11809 intern_c_string ("coding-category-utf-16-be-nosig"));
11810 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
11811 intern_c_string ("coding-category-utf-16-le-nosig"));
11812 ASET (Vcoding_category_table, coding_category_charset,
11813 intern_c_string ("coding-category-charset"));
11814 ASET (Vcoding_category_table, coding_category_sjis,
11815 intern_c_string ("coding-category-sjis"));
11816 ASET (Vcoding_category_table, coding_category_big5,
11817 intern_c_string ("coding-category-big5"));
11818 ASET (Vcoding_category_table, coding_category_ccl,
11819 intern_c_string ("coding-category-ccl"));
11820 ASET (Vcoding_category_table, coding_category_emacs_mule,
11821 intern_c_string ("coding-category-emacs-mule"));
11822
11823 ASET (Vcoding_category_table, coding_category_raw_text,
11824 intern_c_string ("coding-category-raw-text"));
11825 ASET (Vcoding_category_table, coding_category_undecided,
11826 intern_c_string ("coding-category-undecided"));
11827
11828 DEFSYM (Qinsufficient_source, "insufficient-source");
11829 DEFSYM (Qinvalid_source, "invalid-source");
11830 DEFSYM (Qinterrupted, "interrupted");
11831
11832
11833
11834 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
11835
11836 DEFSYM (Qignored, "ignored");
11837
11838 DEFSYM (Qutf_8_string_p, "utf-8-string-p");
11839 DEFSYM (Qfilenamep, "filenamep");
11840
11841 defsubr (&Scoding_system_p);
11842 defsubr (&Sread_coding_system);
11843 defsubr (&Sread_non_nil_coding_system);
11844 defsubr (&Scheck_coding_system);
11845 defsubr (&Sdetect_coding_region);
11846 defsubr (&Sdetect_coding_string);
11847 defsubr (&Sfind_coding_systems_region_internal);
11848 defsubr (&Sunencodable_char_position);
11849 defsubr (&Scheck_coding_systems_region);
11850 defsubr (&Sdecode_coding_region);
11851 defsubr (&Sencode_coding_region);
11852 defsubr (&Sdecode_coding_string);
11853 defsubr (&Sencode_coding_string);
11854 #ifdef ENABLE_UTF_8_CONVERTER_TEST
11855 defsubr (&Sinternal_encode_string_utf_8);
11856 defsubr (&Sinternal_decode_string_utf_8);
11857 #endif
11858 defsubr (&Sdecode_sjis_char);
11859 defsubr (&Sencode_sjis_char);
11860 defsubr (&Sdecode_big5_char);
11861 defsubr (&Sencode_big5_char);
11862 defsubr (&Sset_terminal_coding_system_internal);
11863 defsubr (&Sset_safe_terminal_coding_system_internal);
11864 defsubr (&Sterminal_coding_system);
11865 defsubr (&Sset_keyboard_coding_system_internal);
11866 defsubr (&Skeyboard_coding_system);
11867 defsubr (&Sfind_operation_coding_system);
11868 defsubr (&Sset_coding_system_priority);
11869 defsubr (&Sdefine_coding_system_internal);
11870 defsubr (&Sdefine_coding_system_alias);
11871 defsubr (&Scoding_system_put);
11872 defsubr (&Scoding_system_base);
11873 defsubr (&Scoding_system_plist);
11874 defsubr (&Scoding_system_aliases);
11875 defsubr (&Scoding_system_eol_type);
11876 defsubr (&Scoding_system_priority_list);
11877
11878 DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
11879 doc:
11880
11881
11882
11883 );
11884 Vcoding_system_list = Qnil;
11885
11886 DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
11887 doc:
11888
11889
11890
11891
11892 );
11893 Vcoding_system_alist = Qnil;
11894
11895 DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
11896 doc:
11897
11898
11899
11900
11901
11902
11903 );
11904 {
11905 int i;
11906
11907 Vcoding_category_list = Qnil;
11908 for (i = coding_category_max - 1; i >= 0; i--)
11909 Vcoding_category_list
11910 = Fcons (AREF (Vcoding_category_table, i),
11911 Vcoding_category_list);
11912 }
11913
11914 DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
11915 doc:
11916
11917
11918
11919
11920 );
11921 Vcoding_system_for_read = Qnil;
11922
11923 DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
11924 doc:
11925
11926
11927
11928
11929
11930
11931
11932
11933
11934 );
11935 Vcoding_system_for_write = Qnil;
11936
11937 DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
11938 doc:
11939 );
11940 Vlast_coding_system_used = Qnil;
11941
11942 DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
11943 doc:
11944
11945
11946
11947
11948
11949
11950
11951
11952
11953
11954
11955
11956 );
11957 Vlast_code_conversion_error = Qnil;
11958
11959 DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
11960 doc:
11961
11962
11963 );
11964 inhibit_eol_conversion = 0;
11965
11966 DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
11967 doc:
11968
11969
11970 );
11971 inherit_process_coding_system = 0;
11972
11973 DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
11974 doc:
11975
11976
11977
11978
11979
11980
11981
11982
11983
11984
11985
11986
11987
11988
11989
11990
11991 );
11992 Vfile_coding_system_alist = Qnil;
11993
11994 DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
11995 doc:
11996
11997
11998
11999
12000
12001
12002
12003
12004
12005
12006
12007 );
12008 Vprocess_coding_system_alist = Qnil;
12009
12010 DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
12011 doc:
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022
12023
12024 );
12025 Vnetwork_coding_system_alist = Qnil;
12026
12027 DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
12028 doc:
12029
12030 );
12031 Vlocale_coding_system = Qnil;
12032
12033
12034 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
12035 doc:
12036 );
12037 eol_mnemonic_unix = build_pure_c_string (":");
12038
12039 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
12040 doc:
12041 );
12042 eol_mnemonic_dos = build_pure_c_string ("\\");
12043
12044 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
12045 doc:
12046 );
12047 eol_mnemonic_mac = build_pure_c_string ("/");
12048
12049 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
12050 doc:
12051 );
12052 eol_mnemonic_undecided = build_pure_c_string (":");
12053
12054 DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
12055 doc:
12056 );
12057 Venable_character_translation = Qt;
12058
12059 DEFVAR_LISP ("standard-translation-table-for-decode",
12060 Vstandard_translation_table_for_decode,
12061 doc: );
12062 Vstandard_translation_table_for_decode = Qnil;
12063
12064 DEFVAR_LISP ("standard-translation-table-for-encode",
12065 Vstandard_translation_table_for_encode,
12066 doc: );
12067 Vstandard_translation_table_for_encode = Qnil;
12068
12069 DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
12070 doc:
12071
12072
12073 );
12074 Vcharset_revision_table = Qnil;
12075
12076 DEFVAR_LISP ("default-process-coding-system",
12077 Vdefault_process_coding_system,
12078 doc:
12079
12080 );
12081 Vdefault_process_coding_system = Qnil;
12082
12083 DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
12084 doc:
12085
12086
12087
12088
12089
12090
12091
12092 );
12093 Vlatin_extra_code_table = make_nil_vector (256);
12094
12095 DEFVAR_LISP ("select-safe-coding-system-function",
12096 Vselect_safe_coding_system_function,
12097 doc:
12098
12099
12100
12101
12102
12103
12104
12105
12106 );
12107 Vselect_safe_coding_system_function = Qnil;
12108
12109 DEFVAR_BOOL ("coding-system-require-warning",
12110 coding_system_require_warning,
12111 doc:
12112
12113
12114 );
12115 coding_system_require_warning = 0;
12116
12117
12118 DEFVAR_BOOL ("inhibit-iso-escape-detection",
12119 inhibit_iso_escape_detection,
12120 doc:
12121
12122
12123
12124
12125
12126
12127
12128
12129
12130
12131
12132
12133
12134
12135
12136
12137
12138
12139
12140
12141
12142
12143 );
12144 inhibit_iso_escape_detection = 0;
12145
12146 DEFVAR_BOOL ("inhibit-null-byte-detection",
12147 inhibit_null_byte_detection,
12148 doc:
12149
12150
12151
12152
12153
12154
12155
12156 );
12157 inhibit_null_byte_detection = 0;
12158
12159 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
12160 doc:
12161 );
12162 disable_ascii_optimization = 0;
12163
12164 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
12165 doc:
12166
12167
12168
12169
12170
12171 );
12172 Vtranslation_table_for_input = Qnil;
12173
12174 Lisp_Object args[coding_arg_undecided_max];
12175 memclear (args, sizeof args);
12176
12177 Lisp_Object plist[] =
12178 {
12179 QCname,
12180 args[coding_arg_name] = Qno_conversion,
12181 QCmnemonic,
12182 args[coding_arg_mnemonic] = make_fixnum ('='),
12183 intern_c_string (":coding-type"),
12184 args[coding_arg_coding_type] = Qraw_text,
12185 QCascii_compatible_p,
12186 args[coding_arg_ascii_compatible_p] = Qt,
12187 QCdefault_char,
12188 args[coding_arg_default_char] = make_fixnum (0),
12189 intern_c_string (":for-unibyte"),
12190 args[coding_arg_for_unibyte] = Qt,
12191 intern_c_string (":docstring"),
12192 (build_pure_c_string
12193 ("Do no conversion.\n"
12194 "\n"
12195 "When you visit a file with this coding, the file is read into a\n"
12196 "unibyte buffer as is, thus each byte of a file is treated as a\n"
12197 "character.")),
12198 intern_c_string (":eol-type"),
12199 args[coding_arg_eol_type] = Qunix,
12200 };
12201 args[coding_arg_plist] = CALLMANY (Flist, plist);
12202 Fdefine_coding_system_internal (coding_arg_max, args);
12203
12204 plist[1] = args[coding_arg_name] = Qundecided;
12205 plist[3] = args[coding_arg_mnemonic] = make_fixnum ('-');
12206 plist[5] = args[coding_arg_coding_type] = Qundecided;
12207
12208
12209 plist[8] = intern_c_string (":charset-list");
12210 plist[9] = args[coding_arg_charset_list] = list1 (Qascii);
12211 plist[11] = args[coding_arg_for_unibyte] = Qnil;
12212 plist[13] = build_pure_c_string ("No conversion on encoding, "
12213 "automatic conversion on decoding.");
12214 plist[15] = args[coding_arg_eol_type] = Qnil;
12215 args[coding_arg_plist] = CALLMANY (Flist, plist);
12216 args[coding_arg_undecided_inhibit_null_byte_detection] = make_fixnum (0);
12217 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_fixnum (0);
12218 Fdefine_coding_system_internal (coding_arg_undecided_max, args);
12219
12220 setup_coding_system (Qno_conversion, &safe_terminal_coding);
12221
12222 for (int i = 0; i < coding_category_max; i++)
12223 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
12224
12225 pdumper_do_now_and_after_load (reset_coding_after_pdumper_load);
12226 }
12227
12228 static void
12229 reset_coding_after_pdumper_load (void)
12230 {
12231 if (!dumped_with_pdumper_p ())
12232 return;
12233 for (struct coding_system *this = &coding_categories[0];
12234 this < &coding_categories[coding_category_max];
12235 ++this)
12236 {
12237 int id = this->id;
12238 if (id >= 0)
12239 {
12240
12241
12242
12243 memset (this, 0, sizeof (*this));
12244 setup_coding_system (CODING_ID_NAME (id), this);
12245 }
12246 }
12247
12248
12249
12250
12251 Fset_safe_terminal_coding_system_internal (Qus_ascii);
12252 }