This source file includes following definitions.
- load_charset_map
- read_hex
- load_charset_map_from_file
- load_charset_map_from_vector
- load_charset
- DEFUN
- map_charset_for_dump
- map_charset_chars
- define_charset_internal
- DEFUN
- check_iso_charset_parameter
- string_xstring_p
- find_charsets_in_text
- maybe_unify_char
- decode_char
- encode_char
- char_charset
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- charset_compare
- DEFUN
- init_charset
- init_charset_once
- syms_of_charset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include <config.h>
30
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "buffer.h"
41 #include "sysstdio.h"
42 #include "pdumper.h"
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 Lisp_Object Vcharset_hash_table;
62
63
64 struct charset *charset_table;
65 int charset_table_size;
66 int charset_table_used;
67
68
69 int charset_ascii;
70 int charset_eight_bit;
71 static int charset_iso_8859_1;
72 int charset_unicode;
73 static int charset_emacs;
74
75
76 int charset_jisx0201_roman;
77 int charset_jisx0208_1978;
78 int charset_jisx0208;
79 int charset_ksc5601;
80
81
82 int charset_unibyte;
83
84
85 Lisp_Object Vcharset_ordered_list;
86
87
88
89 Lisp_Object Vcharset_non_preferred_head;
90
91
92
93 EMACS_UINT charset_ordered_list_tick;
94
95
96 Lisp_Object Viso_2022_charset_list;
97
98
99 Lisp_Object Vemacs_mule_charset_list;
100
101 int emacs_mule_charset[256];
102
103
104
105 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
106
107 #define CODE_POINT_TO_INDEX(charset, code) \
108 ((charset)->code_linear_p \
109 ? (int) ((code) - (charset)->min_code) \
110 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
111 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
112 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
113 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
114 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
115 * (charset)->code_space[11]) \
116 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
117 * (charset)->code_space[7]) \
118 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
119 * (charset)->code_space[3]) \
120 + (((code) & 0xFF) - (charset)->code_space[0]) \
121 - ((charset)->char_index_offset)) \
122 : -1)
123
124
125
126
127
128
129 #define INDEX_TO_CODE_POINT(charset, idx) \
130 ((charset)->code_linear_p \
131 ? (idx) + (charset)->min_code \
132 : (idx += (charset)->char_index_offset, \
133 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
134 | (((charset)->code_space[4] \
135 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
136 << 8) \
137 | (((charset)->code_space[8] \
138 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
139 << 16) \
140 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
141 << 24))))
142
143
144
145
146 static struct
147 {
148
149 struct charset *current;
150
151
152 short for_encoder;
153
154
155
156 int min_char, max_char;
157
158
159
160
161
162 int zero_index_char;
163
164 union {
165
166
167
168 int decoder[0x10000];
169
170
171
172
173
174 unsigned short encoder[0x20000];
175 } table;
176 } *temp_charset_work;
177
178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
179 do { \
180 if ((CODE) == 0) \
181 temp_charset_work->zero_index_char = (C); \
182 else if ((C) < 0x20000) \
183 temp_charset_work->table.encoder[(C)] = (CODE); \
184 else \
185 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
186 } while (0)
187
188 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
189 ((C) == temp_charset_work->zero_index_char ? 0 \
190 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
191 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
192 : temp_charset_work->table.encoder[(C) - 0x10000] \
193 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
194
195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
196 (temp_charset_work->table.decoder[(CODE)] = (C))
197
198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
199 (temp_charset_work->table.decoder[(CODE)])
200
201
202
203
204 bool charset_map_loaded;
205
206 struct charset_map_entries
207 {
208 struct {
209 unsigned from, to;
210 int c;
211 } entry[0x10000];
212 struct charset_map_entries *next;
213 };
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 static void
241 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
242 {
243 Lisp_Object vec UNINIT;
244 Lisp_Object table UNINIT;
245 unsigned max_code = CHARSET_MAX_CODE (charset);
246 bool ascii_compatible_p = charset->ascii_compatible_p;
247 int min_char, max_char, nonascii_min_char;
248 int i;
249 unsigned char *fast_map = charset->fast_map;
250
251 if (n_entries <= 0)
252 return;
253
254 if (control_flag)
255 {
256 if (! inhibit_load_charset_map)
257 {
258 if (control_flag == 1)
259 {
260 if (charset->method == CHARSET_METHOD_MAP)
261 {
262 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
263
264 vec = make_vector (n, make_fixnum (-1));
265 set_charset_attr (charset, charset_decoder, vec);
266 }
267 else
268 {
269 char_table_set_range (Vchar_unify_table,
270 charset->min_char, charset->max_char,
271 Qnil);
272 }
273 }
274 else
275 {
276 table = Fmake_char_table (Qnil, Qnil);
277 set_charset_attr (charset,
278 (charset->method == CHARSET_METHOD_MAP
279 ? charset_encoder : charset_deunifier),
280 table);
281 }
282 }
283 else
284 {
285 if (! temp_charset_work)
286 temp_charset_work = xmalloc (sizeof *temp_charset_work);
287 if (control_flag == 1)
288 {
289 memset (temp_charset_work->table.decoder, -1,
290 sizeof (int) * 0x10000);
291 }
292 else
293 {
294 memset (temp_charset_work->table.encoder, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work->zero_index_char = -1;
297 }
298 temp_charset_work->current = charset;
299 temp_charset_work->for_encoder = (control_flag == 2);
300 control_flag += 2;
301 }
302 charset_map_loaded = 1;
303 }
304
305 min_char = max_char = entries->entry[0].c;
306 nonascii_min_char = MAX_CHAR;
307 for (i = 0; i < n_entries; i++)
308 {
309 unsigned from, to;
310 int from_index, to_index, lim_index;
311 int from_c, to_c;
312 int idx = i % 0x10000;
313
314 if (i > 0 && idx == 0)
315 entries = entries->next;
316 from = entries->entry[idx].from;
317 to = entries->entry[idx].to;
318 from_c = entries->entry[idx].c;
319 from_index = CODE_POINT_TO_INDEX (charset, from);
320 if (from == to)
321 {
322 to_index = from_index;
323 to_c = from_c;
324 }
325 else
326 {
327 to_index = CODE_POINT_TO_INDEX (charset, to);
328 to_c = from_c + (to_index - from_index);
329 }
330 if (from_index < 0 || to_index < 0)
331 continue;
332 lim_index = to_index + 1;
333
334 if (to_c > max_char)
335 max_char = to_c;
336 else if (from_c < min_char)
337 min_char = from_c;
338
339 if (control_flag == 1)
340 {
341 if (charset->method == CHARSET_METHOD_MAP)
342 for (; from_index < lim_index; from_index++, from_c++)
343 ASET (vec, from_index, make_fixnum (from_c));
344 else
345 for (; from_index < lim_index; from_index++, from_c++)
346 CHAR_TABLE_SET (Vchar_unify_table,
347 CHARSET_CODE_OFFSET (charset) + from_index,
348 make_fixnum (from_c));
349 }
350 else if (control_flag == 2)
351 {
352 if (charset->method == CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset))
354 for (; from_index < lim_index; from_index++, from_c++)
355 {
356 unsigned code = from_index;
357 code = INDEX_TO_CODE_POINT (charset, code);
358
359 if (NILP (CHAR_TABLE_REF (table, from_c)))
360 CHAR_TABLE_SET (table, from_c, make_fixnum (code));
361 }
362 else
363 for (; from_index < lim_index; from_index++, from_c++)
364 {
365 if (NILP (CHAR_TABLE_REF (table, from_c)))
366 CHAR_TABLE_SET (table, from_c, make_fixnum (from_index));
367 }
368 }
369 else if (control_flag == 3)
370 for (; from_index < lim_index; from_index++, from_c++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
372 else if (control_flag == 4)
373 for (; from_index < lim_index; from_index++, from_c++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
375 else
376 {
377 if (ascii_compatible_p)
378 {
379 if (! ASCII_CHAR_P (from_c))
380 {
381 if (from_c < nonascii_min_char)
382 nonascii_min_char = from_c;
383 }
384 else if (! ASCII_CHAR_P (to_c))
385 {
386 nonascii_min_char = 0x80;
387 }
388 }
389
390 for (; from_c <= to_c; from_c++)
391 CHARSET_FAST_MAP_SET (from_c, fast_map);
392 }
393 }
394
395 if (control_flag == 0)
396 {
397 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
398 ? nonascii_min_char : min_char);
399 CHARSET_MAX_CHAR (charset) = max_char;
400 }
401 else if (control_flag == 4)
402 {
403 temp_charset_work->min_char = min_char;
404 temp_charset_work->max_char = max_char;
405 }
406 }
407
408
409
410
411
412
413
414
415 static unsigned
416 read_hex (FILE *fp, int lookahead, int *terminator, bool *overflow)
417 {
418 int c = lookahead < 0 ? getc (fp) : lookahead;
419
420 while (true)
421 {
422 if (c == '#')
423 do
424 c = getc (fp);
425 while (0 <= c && c != '\n');
426 else if (c == '0')
427 {
428 c = getc (fp);
429 if (c < 0 || c == 'x')
430 break;
431 }
432 if (c < 0)
433 break;
434 c = getc (fp);
435 }
436
437 unsigned n = 0;
438 bool v = false;
439
440 if (0 <= c)
441 while (true)
442 {
443 c = getc (fp);
444 int digit = char_hexdigit (c);
445 if (digit < 0)
446 break;
447 v |= INT_LEFT_SHIFT_OVERFLOW (n, 4);
448 n = (n << 4) + digit;
449 }
450
451 *terminator = c;
452 *overflow |= v;
453 return n;
454 }
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 static void
474 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
475 int control_flag)
476 {
477 unsigned min_code = CHARSET_MIN_CODE (charset);
478 unsigned max_code = CHARSET_MAX_CODE (charset);
479 int fd;
480 FILE *fp;
481 struct charset_map_entries *head, *entries;
482 int n_entries;
483 AUTO_STRING (map, ".map");
484 AUTO_STRING (txt, ".txt");
485 AUTO_LIST2 (suffixes, map, txt);
486 specpdl_ref count = SPECPDL_INDEX ();
487 record_unwind_protect_nothing ();
488 specbind (Qfile_name_handler_alist, Qnil);
489 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false, false,
490 NULL);
491 fp = fd < 0 ? 0 : emacs_fdopen (fd, "r");
492 if (!fp)
493 {
494 int open_errno = errno;
495 emacs_close (fd);
496 report_file_errno ("Loading charset map", mapfile, open_errno);
497 }
498 set_unwind_protect_ptr (count, fclose_unwind, fp);
499 unbind_to (specpdl_ref_add (count, 1), Qnil);
500
501
502
503 head = record_xmalloc (sizeof *head);
504 entries = head;
505 memset (entries, 0, sizeof (struct charset_map_entries));
506
507 n_entries = 0;
508 int ch = -1;
509 while (true)
510 {
511 bool overflow = false;
512 unsigned from = read_hex (fp, ch, &ch, &overflow), to;
513 if (ch < 0)
514 break;
515 if (ch == '-')
516 {
517 to = read_hex (fp, -1, &ch, &overflow);
518 if (ch < 0)
519 break;
520 }
521 else
522 {
523 to = from;
524 ch = -1;
525 }
526 unsigned c = read_hex (fp, ch, &ch, &overflow);
527 if (ch < 0)
528 break;
529
530 if (overflow)
531 continue;
532 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
533 continue;
534
535 if (n_entries == 0x10000)
536 {
537 entries->next = record_xmalloc (sizeof *entries->next);
538 entries = entries->next;
539 memset (entries, 0, sizeof (struct charset_map_entries));
540 n_entries = 0;
541 }
542 int idx = n_entries;
543 entries->entry[idx].from = from;
544 entries->entry[idx].to = to;
545 entries->entry[idx].c = c;
546 n_entries++;
547 }
548 emacs_fclose (fp);
549 clear_unwind_protect (count);
550
551 load_charset_map (charset, head, n_entries, control_flag);
552 unbind_to (count, Qnil);
553 }
554
555 static void
556 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
557 {
558 unsigned min_code = CHARSET_MIN_CODE (charset);
559 unsigned max_code = CHARSET_MAX_CODE (charset);
560 struct charset_map_entries *head, *entries;
561 int n_entries;
562 int len = ASIZE (vec);
563 int i;
564 USE_SAFE_ALLOCA;
565
566 if (len % 2 == 1)
567 {
568 add_to_log ("Failure in loading charset map: %V", vec);
569 return;
570 }
571
572
573
574 head = SAFE_ALLOCA (sizeof *head);
575 entries = head;
576 memset (entries, 0, sizeof (struct charset_map_entries));
577
578 n_entries = 0;
579 for (i = 0; i < len; i += 2)
580 {
581 Lisp_Object val, val2;
582 unsigned from, to;
583 EMACS_INT c;
584 int idx;
585
586 val = AREF (vec, i);
587 if (CONSP (val))
588 {
589 val2 = XCDR (val);
590 val = XCAR (val);
591 from = XFIXNAT (val);
592 to = XFIXNAT (val2);
593 }
594 else
595 from = to = XFIXNAT (val);
596 val = AREF (vec, i + 1);
597 CHECK_FIXNAT (val);
598 c = XFIXNAT (val);
599
600 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
601 continue;
602
603 if (n_entries > 0 && (n_entries % 0x10000) == 0)
604 {
605 entries->next = SAFE_ALLOCA (sizeof *entries->next);
606 entries = entries->next;
607 memset (entries, 0, sizeof (struct charset_map_entries));
608 }
609 idx = n_entries % 0x10000;
610 entries->entry[idx].from = from;
611 entries->entry[idx].to = to;
612 entries->entry[idx].c = c;
613 n_entries++;
614 }
615
616 load_charset_map (charset, head, n_entries, control_flag);
617 SAFE_FREE ();
618 }
619
620
621
622
623
624 static void
625 load_charset (struct charset *charset, int control_flag)
626 {
627 Lisp_Object map;
628
629 if (inhibit_load_charset_map
630 && temp_charset_work
631 && charset == temp_charset_work->current
632 && ((control_flag == 2) == temp_charset_work->for_encoder))
633 return;
634
635 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
636 map = CHARSET_MAP (charset);
637 else
638 {
639 if (! CHARSET_UNIFIED_P (charset))
640 emacs_abort ();
641 map = CHARSET_UNIFY_MAP (charset);
642 }
643 if (STRINGP (map))
644 load_charset_map_from_file (charset, map, control_flag);
645 else
646 load_charset_map_from_vector (charset, map, control_flag);
647 }
648
649
650 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
651 doc: )
652 (Lisp_Object object)
653 {
654 return (CHARSETP (object) ? Qt : Qnil);
655 }
656
657
658 static void
659 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
660 Lisp_Object function, Lisp_Object arg,
661 unsigned int from, unsigned int to)
662 {
663 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
664 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
665 Lisp_Object range = Fcons (Qnil, Qnil);
666 int c, stop;
667
668 c = temp_charset_work->min_char;
669 stop = (temp_charset_work->max_char < 0x20000
670 ? temp_charset_work->max_char : 0xFFFF);
671
672 while (1)
673 {
674 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
675
676 if (idx >= from_idx && idx <= to_idx)
677 {
678 if (NILP (XCAR (range)))
679 XSETCAR (range, make_fixnum (c));
680 }
681 else if (! NILP (XCAR (range)))
682 {
683 XSETCDR (range, make_fixnum (c - 1));
684 if (c_function)
685 (*c_function) (arg, range);
686 else
687 call2 (function, range, arg);
688 XSETCAR (range, Qnil);
689 }
690 if (c == stop)
691 {
692 if (c == temp_charset_work->max_char)
693 {
694 if (! NILP (XCAR (range)))
695 {
696 XSETCDR (range, make_fixnum (c));
697 if (c_function)
698 (*c_function) (arg, range);
699 else
700 call2 (function, range, arg);
701 }
702 break;
703 }
704 c = 0x1FFFF;
705 stop = temp_charset_work->max_char;
706 }
707 c++;
708 }
709 }
710
711 void
712 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
713 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
714 {
715 Lisp_Object range;
716 bool partial = (from > CHARSET_MIN_CODE (charset)
717 || to < CHARSET_MAX_CODE (charset));
718
719 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
720 {
721 int from_idx = CODE_POINT_TO_INDEX (charset, from);
722 int to_idx = CODE_POINT_TO_INDEX (charset, to);
723 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
724 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
725
726 if (CHARSET_UNIFIED_P (charset))
727 {
728 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
729 load_charset (charset, 2);
730 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
731 map_char_table_for_charset (c_function, function,
732 CHARSET_DEUNIFIER (charset), arg,
733 partial ? charset : NULL, from, to);
734 else
735 map_charset_for_dump (c_function, function, arg, from, to);
736 }
737
738 range = Fcons (make_fixnum (from_c), make_fixnum (to_c));
739 if (NILP (function))
740 (*c_function) (arg, range);
741 else
742 call2 (function, range, arg);
743 }
744 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
745 {
746 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
747 load_charset (charset, 2);
748 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
749 map_char_table_for_charset (c_function, function,
750 CHARSET_ENCODER (charset), arg,
751 partial ? charset : NULL, from, to);
752 else
753 map_charset_for_dump (c_function, function, arg, from, to);
754 }
755 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
756 {
757 Lisp_Object subset_info;
758 int offset;
759
760 subset_info = CHARSET_SUBSET (charset);
761 charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
762 offset = XFIXNUM (AREF (subset_info, 3));
763 from -= offset;
764 if (from < XFIXNAT (AREF (subset_info, 1)))
765 from = XFIXNAT (AREF (subset_info, 1));
766 to -= offset;
767 if (to > XFIXNAT (AREF (subset_info, 2)))
768 to = XFIXNAT (AREF (subset_info, 2));
769 map_charset_chars (c_function, function, arg, charset, from, to);
770 }
771 else
772 {
773 Lisp_Object parents;
774
775 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
776 parents = XCDR (parents))
777 {
778 int offset;
779 unsigned this_from, this_to;
780
781 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (XCAR (parents))));
782 offset = XFIXNUM (XCDR (XCAR (parents)));
783 this_from = from > offset ? from - offset : 0;
784 this_to = to > offset ? to - offset : 0;
785 if (this_from < CHARSET_MIN_CODE (charset))
786 this_from = CHARSET_MIN_CODE (charset);
787 if (this_to > CHARSET_MAX_CODE (charset))
788 this_to = CHARSET_MAX_CODE (charset);
789 map_charset_chars (c_function, function, arg, charset,
790 this_from, this_to);
791 }
792 }
793 }
794
795 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
796 doc:
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811 )
812 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
813 {
814 struct charset *cs;
815 unsigned from, to;
816
817 CHECK_CHARSET_GET_CHARSET (charset, cs);
818 if (NILP (from_code))
819 from = CHARSET_MIN_CODE (cs);
820 else
821 {
822 from = XFIXNUM (from_code);
823 if (from < CHARSET_MIN_CODE (cs))
824 from = CHARSET_MIN_CODE (cs);
825 }
826 if (NILP (to_code))
827 to = CHARSET_MAX_CODE (cs);
828 else
829 {
830 to = XFIXNUM (to_code);
831 if (to > CHARSET_MAX_CODE (cs))
832 to = CHARSET_MAX_CODE (cs);
833 }
834 map_charset_chars (NULL, function, arg, cs, from, to);
835 return Qnil;
836 }
837
838
839
840
841
842
843
844 DEFUN ("define-charset-internal", Fdefine_charset_internal,
845 Sdefine_charset_internal, charset_arg_max, MANY, 0,
846 doc:
847 )
848 (ptrdiff_t nargs, Lisp_Object *args)
849 {
850
851 Lisp_Object attrs;
852 Lisp_Object val;
853 Lisp_Object hash_code;
854 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
855 int i, j;
856 struct charset charset;
857 int id;
858 int dimension;
859 bool new_definition_p;
860 int nchars;
861
862 memset (&charset, 0, sizeof (charset));
863
864 if (nargs != charset_arg_max)
865 Fsignal (Qwrong_number_of_arguments,
866 Fcons (intern ("define-charset-internal"),
867 make_fixnum (nargs)));
868
869 attrs = make_nil_vector (charset_attr_max);
870
871 CHECK_SYMBOL (args[charset_arg_name]);
872 ASET (attrs, charset_name, args[charset_arg_name]);
873
874 val = args[charset_arg_code_space];
875 for (i = 0, dimension = 0, nchars = 1; ; i++)
876 {
877 Lisp_Object min_byte_obj = Faref (val, make_fixnum (i * 2));
878 Lisp_Object max_byte_obj = Faref (val, make_fixnum (i * 2 + 1));
879 int min_byte = check_integer_range (min_byte_obj, 0, 255);
880 int max_byte = check_integer_range (max_byte_obj, min_byte, 255);
881 charset.code_space[i * 4] = min_byte;
882 charset.code_space[i * 4 + 1] = max_byte;
883 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
884 if (max_byte > 0)
885 dimension = i + 1;
886 if (i == 3)
887 break;
888 nchars *= charset.code_space[i * 4 + 2];
889 charset.code_space[i * 4 + 3] = nchars;
890 }
891
892 val = args[charset_arg_dimension];
893 charset.dimension
894 = !NILP (val) ? check_integer_range (val, 1, 4) : dimension;
895
896 charset.code_linear_p
897 = (charset.dimension == 1
898 || (charset.code_space[2] == 256
899 && (charset.dimension == 2
900 || (charset.code_space[6] == 256
901 && (charset.dimension == 3
902 || charset.code_space[10] == 256)))));
903
904 if (! charset.code_linear_p)
905 {
906 charset.code_space_mask = xzalloc (256);
907 for (i = 0; i < 4; i++)
908 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
909 j++)
910 charset.code_space_mask[j] |= (1 << i);
911 }
912
913 charset.iso_chars_96 = charset.code_space[2] == 96;
914
915 charset.min_code = (charset.code_space[0]
916 | (charset.code_space[4] << 8)
917 | (charset.code_space[8] << 16)
918 | ((unsigned) charset.code_space[12] << 24));
919 charset.max_code = (charset.code_space[1]
920 | (charset.code_space[5] << 8)
921 | (charset.code_space[9] << 16)
922 | ((unsigned) charset.code_space[13] << 24));
923 charset.char_index_offset = 0;
924
925 val = args[charset_arg_min_code];
926 if (! NILP (val))
927 {
928 unsigned code = cons_to_unsigned (val, UINT_MAX);
929
930 if (code < charset.min_code
931 || code > charset.max_code)
932 args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
933 INT_TO_INTEGER (charset.max_code), val);
934 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
935 charset.min_code = code;
936 }
937
938 val = args[charset_arg_max_code];
939 if (! NILP (val))
940 {
941 unsigned code = cons_to_unsigned (val, UINT_MAX);
942
943 if (code < charset.min_code
944 || code > charset.max_code)
945 args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
946 INT_TO_INTEGER (charset.max_code), val);
947 charset.max_code = code;
948 }
949
950 charset.compact_codes_p = charset.max_code < 0x10000;
951
952 val = args[charset_arg_invalid_code];
953 if (NILP (val))
954 {
955 if (charset.min_code > 0)
956 charset.invalid_code = 0;
957 else
958 {
959 if (charset.max_code < UINT_MAX)
960 charset.invalid_code = charset.max_code + 1;
961 else
962 error ("Attribute :invalid-code must be specified");
963 }
964 }
965 else
966 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
967
968 val = args[charset_arg_iso_final];
969 if (NILP (val))
970 charset.iso_final = -1;
971 else
972 {
973 CHECK_FIXNUM (val);
974 if (XFIXNUM (val) < '0' || XFIXNUM (val) > 127)
975 error ("Invalid iso-final-char: %"pI"d", XFIXNUM (val));
976 charset.iso_final = XFIXNUM (val);
977 }
978
979 val = args[charset_arg_iso_revision];
980 charset.iso_revision = !NILP (val) ? check_integer_range (val, -1, 63) : -1;
981
982 val = args[charset_arg_emacs_mule_id];
983 if (NILP (val))
984 charset.emacs_mule_id = -1;
985 else
986 {
987 CHECK_FIXNAT (val);
988 if ((XFIXNUM (val) > 0 && XFIXNUM (val) <= 128) || XFIXNUM (val) >= 256)
989 error ("Invalid emacs-mule-id: %"pI"d", XFIXNUM (val));
990 charset.emacs_mule_id = XFIXNUM (val);
991 }
992
993 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
994
995 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
996
997 charset.unified_p = 0;
998
999 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1000
1001 if (! NILP (args[charset_arg_code_offset]))
1002 {
1003 val = args[charset_arg_code_offset];
1004 CHECK_CHARACTER (val);
1005
1006 charset.method = CHARSET_METHOD_OFFSET;
1007 charset.code_offset = XFIXNUM (val);
1008
1009 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1010 if (MAX_CHAR - charset.code_offset < i)
1011 error ("Unsupported max char: %d", charset.max_char);
1012 charset.max_char = i + charset.code_offset;
1013 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1014 charset.min_char = i + charset.code_offset;
1015
1016 i = (charset.min_char >> 7) << 7;
1017 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1018 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1019 i = (i >> 12) << 12;
1020 for (; i <= charset.max_char; i += 0x1000)
1021 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1022 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1023 charset.ascii_compatible_p = 1;
1024 }
1025 else if (! NILP (args[charset_arg_map]))
1026 {
1027 val = args[charset_arg_map];
1028 ASET (attrs, charset_map, val);
1029 charset.method = CHARSET_METHOD_MAP;
1030 }
1031 else if (! NILP (args[charset_arg_subset]))
1032 {
1033 Lisp_Object parent;
1034 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1035 struct charset *parent_charset;
1036
1037 val = args[charset_arg_subset];
1038 parent = Fcar (val);
1039 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1040 parent_min_code = Fnth (make_fixnum (1), val);
1041 CHECK_FIXNAT (parent_min_code);
1042 parent_max_code = Fnth (make_fixnum (2), val);
1043 CHECK_FIXNAT (parent_max_code);
1044 parent_code_offset = Fnth (make_fixnum (3), val);
1045 CHECK_FIXNUM (parent_code_offset);
1046 ASET (attrs, charset_subset,
1047 CALLN (Fvector, make_fixnum (parent_charset->id),
1048 parent_min_code, parent_max_code, parent_code_offset));
1049
1050 charset.method = CHARSET_METHOD_SUBSET;
1051
1052
1053
1054 memcpy (charset.fast_map, parent_charset->fast_map,
1055 sizeof charset.fast_map);
1056
1057
1058 charset.min_char = parent_charset->min_char;
1059 charset.max_char = parent_charset->max_char;
1060 }
1061 else if (! NILP (args[charset_arg_superset]))
1062 {
1063 val = args[charset_arg_superset];
1064 charset.method = CHARSET_METHOD_SUPERSET;
1065 val = Fcopy_sequence (val);
1066 ASET (attrs, charset_superset, val);
1067
1068 charset.min_char = MAX_CHAR;
1069 charset.max_char = 0;
1070 for (; ! NILP (val); val = Fcdr (val))
1071 {
1072 Lisp_Object elt, car_part, cdr_part;
1073 int this_id, offset;
1074 struct charset *this_charset;
1075
1076 elt = Fcar (val);
1077 if (CONSP (elt))
1078 {
1079 car_part = XCAR (elt);
1080 cdr_part = XCDR (elt);
1081 CHECK_CHARSET_GET_ID (car_part, this_id);
1082 offset = check_integer_range (cdr_part, INT_MIN, INT_MAX);
1083 }
1084 else
1085 {
1086 CHECK_CHARSET_GET_ID (elt, this_id);
1087 offset = 0;
1088 }
1089 XSETCAR (val, Fcons (make_fixnum (this_id), make_fixnum (offset)));
1090
1091 this_charset = CHARSET_FROM_ID (this_id);
1092 if (charset.min_char > this_charset->min_char)
1093 charset.min_char = this_charset->min_char;
1094 if (charset.max_char < this_charset->max_char)
1095 charset.max_char = this_charset->max_char;
1096 for (i = 0; i < 190; i++)
1097 charset.fast_map[i] |= this_charset->fast_map[i];
1098 }
1099 }
1100 else
1101 error ("None of :code-offset, :map, :parents are specified");
1102
1103 val = args[charset_arg_unify_map];
1104 if (! NILP (val) && !STRINGP (val))
1105 CHECK_VECTOR (val);
1106 ASET (attrs, charset_unify_map, val);
1107
1108 CHECK_LIST (args[charset_arg_plist]);
1109 ASET (attrs, charset_plist, args[charset_arg_plist]);
1110
1111 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1112 &hash_code);
1113 if (charset.hash_index >= 0)
1114 {
1115 new_definition_p = 0;
1116 id = XFIXNAT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1117 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1118 }
1119 else
1120 {
1121 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1122 hash_code);
1123 if (charset_table_used == charset_table_size)
1124 {
1125
1126
1127
1128
1129
1130 int old_size = charset_table_size;
1131 ptrdiff_t new_size = old_size;
1132 struct charset *new_table =
1133 xpalloc (0, &new_size, 1,
1134 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1135 sizeof *charset_table);
1136 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1137 charset_table = new_table;
1138 charset_table_size = new_size;
1139
1140
1141
1142
1143
1144
1145
1146 }
1147 id = charset_table_used++;
1148 new_definition_p = 1;
1149 }
1150
1151 ASET (attrs, charset_id, make_fixnum (id));
1152 charset.id = id;
1153 charset_table[id] = charset;
1154
1155 if (charset.method == CHARSET_METHOD_MAP)
1156 {
1157 load_charset (&charset, 0);
1158 charset_table[id] = charset;
1159 }
1160
1161 if (charset.iso_final >= 0)
1162 {
1163 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1164 charset.iso_final) = id;
1165 if (new_definition_p)
1166 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, list1i (id));
1167 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1168 charset_jisx0201_roman = id;
1169 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1170 charset_jisx0208_1978 = id;
1171 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1172 charset_jisx0208 = id;
1173 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1174 charset_ksc5601 = id;
1175 }
1176
1177 if (charset.emacs_mule_id >= 0)
1178 {
1179 emacs_mule_charset[charset.emacs_mule_id] = id;
1180 if (charset.emacs_mule_id < 0xA0)
1181 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1182 else
1183 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1184 if (new_definition_p)
1185 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1186 list1i (id));
1187 }
1188
1189 if (new_definition_p)
1190 {
1191 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1192 if (charset.supplementary_p)
1193 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, list1i (id));
1194 else
1195 {
1196 Lisp_Object tail;
1197
1198 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1199 {
1200 struct charset *cs = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
1201
1202 if (cs->supplementary_p)
1203 break;
1204 }
1205 if (EQ (tail, Vcharset_ordered_list))
1206 Vcharset_ordered_list = Fcons (make_fixnum (id),
1207 Vcharset_ordered_list);
1208 else if (NILP (tail))
1209 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1210 list1i (id));
1211 else
1212 {
1213 val = Fcons (XCAR (tail), XCDR (tail));
1214 XSETCDR (tail, val);
1215 XSETCAR (tail, make_fixnum (id));
1216 }
1217 }
1218 charset_ordered_list_tick++;
1219 }
1220
1221 return Qnil;
1222 }
1223
1224
1225
1226
1227
1228
1229
1230 static int
1231 define_charset_internal (Lisp_Object name,
1232 int dimension,
1233 const char *code_space_chars,
1234 unsigned min_code, unsigned max_code,
1235 int iso_final, int iso_revision, int emacs_mule_id,
1236 bool ascii_compatible, bool supplementary,
1237 int code_offset)
1238 {
1239 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1240 Lisp_Object args[charset_arg_max];
1241 Lisp_Object val;
1242 int i;
1243
1244 args[charset_arg_name] = name;
1245 args[charset_arg_dimension] = make_fixnum (dimension);
1246 val = make_uninit_vector (8);
1247 for (i = 0; i < 8; i++)
1248 ASET (val, i, make_fixnum (code_space[i]));
1249 args[charset_arg_code_space] = val;
1250 args[charset_arg_min_code] = make_fixnum (min_code);
1251 args[charset_arg_max_code] = make_fixnum (max_code);
1252 args[charset_arg_iso_final]
1253 = (iso_final < 0 ? Qnil : make_fixnum (iso_final));
1254 args[charset_arg_iso_revision] = make_fixnum (iso_revision);
1255 args[charset_arg_emacs_mule_id]
1256 = (emacs_mule_id < 0 ? Qnil : make_fixnum (emacs_mule_id));
1257 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1258 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1259 args[charset_arg_invalid_code] = Qnil;
1260 args[charset_arg_code_offset] = make_fixnum (code_offset);
1261 args[charset_arg_map] = Qnil;
1262 args[charset_arg_subset] = Qnil;
1263 args[charset_arg_superset] = Qnil;
1264 args[charset_arg_unify_map] = Qnil;
1265
1266 args[charset_arg_plist] =
1267 list (QCname,
1268 args[charset_arg_name],
1269 intern_c_string (":dimension"),
1270 args[charset_arg_dimension],
1271 intern_c_string (":code-space"),
1272 args[charset_arg_code_space],
1273 intern_c_string (":iso-final-char"),
1274 args[charset_arg_iso_final],
1275 intern_c_string (":emacs-mule-id"),
1276 args[charset_arg_emacs_mule_id],
1277 QCascii_compatible_p,
1278 args[charset_arg_ascii_compatible_p],
1279 intern_c_string (":code-offset"),
1280 args[charset_arg_code_offset]);
1281 Fdefine_charset_internal (charset_arg_max, args);
1282
1283 return XFIXNUM (CHARSET_SYMBOL_ID (name));
1284 }
1285
1286
1287 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1288 Sdefine_charset_alias, 2, 2, 0,
1289 doc: )
1290 (Lisp_Object alias, Lisp_Object charset)
1291 {
1292 Lisp_Object attr;
1293
1294 CHECK_CHARSET_GET_ATTR (charset, attr);
1295 Fputhash (alias, attr, Vcharset_hash_table);
1296 Vcharset_list = Fcons (alias, Vcharset_list);
1297 return Qnil;
1298 }
1299
1300
1301 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1302 doc: )
1303 (Lisp_Object charset)
1304 {
1305 Lisp_Object attrs;
1306
1307 CHECK_CHARSET_GET_ATTR (charset, attrs);
1308 return CHARSET_ATTR_PLIST (attrs);
1309 }
1310
1311
1312 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1313 doc: )
1314 (Lisp_Object charset, Lisp_Object plist)
1315 {
1316 Lisp_Object attrs;
1317
1318 CHECK_CHARSET_GET_ATTR (charset, attrs);
1319 ASET (attrs, charset_plist, plist);
1320 return plist;
1321 }
1322
1323
1324 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1325 doc:
1326
1327
1328
1329
1330
1331
1332
1333 )
1334 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1335 {
1336 int id;
1337 struct charset *cs;
1338
1339 CHECK_CHARSET_GET_ID (charset, id);
1340 cs = CHARSET_FROM_ID (id);
1341 if (NILP (deunify)
1342 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1343 : ! CHARSET_UNIFIED_P (cs))
1344 return Qnil;
1345
1346 CHARSET_UNIFIED_P (cs) = 0;
1347 if (NILP (deunify))
1348 {
1349 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1350 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1351 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1352 if (NILP (unify_map))
1353 unify_map = CHARSET_UNIFY_MAP (cs);
1354 else
1355 {
1356 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1357 signal_error ("Bad unify-map", unify_map);
1358 set_charset_attr (cs, charset_unify_map, unify_map);
1359 }
1360 if (NILP (Vchar_unify_table))
1361 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1362 char_table_set_range (Vchar_unify_table,
1363 cs->min_char, cs->max_char, charset);
1364 CHARSET_UNIFIED_P (cs) = 1;
1365 }
1366 else if (CHAR_TABLE_P (Vchar_unify_table))
1367 {
1368 unsigned min_code = CHARSET_MIN_CODE (cs);
1369 unsigned max_code = CHARSET_MAX_CODE (cs);
1370 int min_char = DECODE_CHAR (cs, min_code);
1371 int max_char = DECODE_CHAR (cs, max_code);
1372
1373 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1374 }
1375
1376 return Qnil;
1377 }
1378
1379
1380
1381
1382 static bool
1383 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1384 Lisp_Object final_char)
1385 {
1386 CHECK_FIXNUM (dimension);
1387 CHECK_FIXNUM (chars);
1388 CHECK_CHARACTER (final_char);
1389
1390 if (! (1 <= XFIXNUM (dimension) && XFIXNUM (dimension) <= 3))
1391 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1392 XFIXNUM (dimension));
1393
1394 bool chars_flag = XFIXNUM (chars) == 96;
1395 if (! (chars_flag || XFIXNUM (chars) == 94))
1396 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XFIXNUM (chars));
1397
1398 int final_ch = XFIXNAT (final_char);
1399 if (! ('0' <= final_ch && final_ch <= '~'))
1400 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
1401
1402 return chars_flag;
1403 }
1404
1405 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1406 Sget_unused_iso_final_char, 2, 2, 0,
1407 doc:
1408
1409
1410
1411
1412
1413
1414 )
1415 (Lisp_Object dimension, Lisp_Object chars)
1416 {
1417 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1418 make_fixnum ('0'));
1419 for (int final_char = '0'; final_char <= '?'; final_char++)
1420 if (ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, final_char) < 0)
1421 return make_fixnum (final_char);
1422 return Qnil;
1423 }
1424
1425
1426 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1427 4, 4, 0,
1428 doc:
1429
1430
1431
1432 )
1433 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1434 {
1435 int id;
1436
1437 CHECK_CHARSET_GET_ID (charset, id);
1438 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1439 ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, XFIXNAT (final_char)) = id;
1440 return Qnil;
1441 }
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 int
1458 string_xstring_p (Lisp_Object string)
1459 {
1460 const unsigned char *p = SDATA (string);
1461 const unsigned char *endp = p + SBYTES (string);
1462
1463 if (SCHARS (string) == SBYTES (string))
1464 return 0;
1465
1466 while (p < endp)
1467 {
1468 int c = string_char_advance (&p);
1469
1470 if (c >= 0x100)
1471 return 2;
1472 }
1473 return 1;
1474 }
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484 static void
1485 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1486 ptrdiff_t nbytes, Lisp_Object charsets,
1487 Lisp_Object table, bool multibyte)
1488 {
1489 const unsigned char *pend = ptr + nbytes;
1490
1491 if (nchars == nbytes)
1492 {
1493 if (multibyte)
1494 ASET (charsets, charset_ascii, Qt);
1495 else
1496 while (ptr < pend)
1497 {
1498 int c = *ptr++;
1499
1500 if (!NILP (table))
1501 c = translate_char (table, c);
1502 if (ASCII_CHAR_P (c))
1503 ASET (charsets, charset_ascii, Qt);
1504 else
1505 ASET (charsets, charset_eight_bit, Qt);
1506 }
1507 }
1508 else
1509 {
1510 while (ptr < pend)
1511 {
1512 int c = string_char_advance (&ptr);
1513 struct charset *charset;
1514
1515 if (!NILP (table))
1516 c = translate_char (table, c);
1517 charset = CHAR_CHARSET (c);
1518 ASET (charsets, CHARSET_ID (charset), Qt);
1519 }
1520 }
1521 }
1522
1523 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1524 2, 3, 0,
1525 doc:
1526
1527
1528
1529
1530 )
1531 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1532 {
1533 Lisp_Object charsets;
1534 ptrdiff_t from, from_byte, to, stop, stop_byte;
1535 int i;
1536 Lisp_Object val;
1537 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1538
1539 validate_region (&beg, &end);
1540 from = XFIXNAT (beg);
1541 stop = to = XFIXNAT (end);
1542
1543 if (from < GPT && GPT < to)
1544 {
1545 stop = GPT;
1546 stop_byte = GPT_BYTE;
1547 }
1548 else
1549 stop_byte = CHAR_TO_BYTE (stop);
1550
1551 from_byte = CHAR_TO_BYTE (from);
1552
1553 charsets = make_nil_vector (charset_table_used);
1554 while (1)
1555 {
1556 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1557 stop_byte - from_byte, charsets, table,
1558 multibyte);
1559 if (stop < to)
1560 {
1561 from = stop, from_byte = stop_byte;
1562 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1563 }
1564 else
1565 break;
1566 }
1567
1568 val = Qnil;
1569 for (i = charset_table_used - 1; i >= 0; i--)
1570 if (!NILP (AREF (charsets, i)))
1571 val = Fcons (CHARSET_NAME (charset_table + i), val);
1572 return val;
1573 }
1574
1575 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1576 1, 2, 0,
1577 doc:
1578
1579
1580
1581 )
1582 (Lisp_Object str, Lisp_Object table)
1583 {
1584 CHECK_STRING (str);
1585
1586 Lisp_Object charsets = make_nil_vector (charset_table_used);
1587 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1588 charsets, table,
1589 STRING_MULTIBYTE (str));
1590 Lisp_Object val = Qnil;
1591 for (int i = charset_table_used - 1; i >= 0; i--)
1592 if (!NILP (AREF (charsets, i)))
1593 val = Fcons (CHARSET_NAME (charset_table + i), val);
1594 return val;
1595 }
1596
1597
1598
1599
1600
1601
1602 static int
1603 maybe_unify_char (int c, Lisp_Object val)
1604 {
1605 struct charset *charset;
1606
1607 if (FIXNUMP (val))
1608 return XFIXNAT (val);
1609 if (NILP (val))
1610 return c;
1611
1612 CHECK_CHARSET_GET_CHARSET (val, charset);
1613 #ifdef REL_ALLOC
1614
1615
1616
1617 r_alloc_inhibit_buffer_relocation (1);
1618 #endif
1619 load_charset (charset, 1);
1620 if (! inhibit_load_charset_map)
1621 {
1622 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1623 if (! NILP (val))
1624 c = XFIXNAT (val);
1625 }
1626 else
1627 {
1628 int code_index = c - CHARSET_CODE_OFFSET (charset);
1629 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1630
1631 if (unified > 0)
1632 c = unified;
1633 }
1634 #ifdef REL_ALLOC
1635 r_alloc_inhibit_buffer_relocation (0);
1636 #endif
1637 return c;
1638 }
1639
1640
1641
1642
1643
1644 int
1645 decode_char (struct charset *charset, unsigned int code)
1646 {
1647 int c, char_index;
1648 enum charset_method method = CHARSET_METHOD (charset);
1649
1650 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1651 return -1;
1652
1653 if (method == CHARSET_METHOD_SUBSET)
1654 {
1655 Lisp_Object subset_info;
1656
1657 subset_info = CHARSET_SUBSET (charset);
1658 charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
1659 code -= XFIXNUM (AREF (subset_info, 3));
1660 if (code < XFIXNAT (AREF (subset_info, 1))
1661 || code > XFIXNAT (AREF (subset_info, 2)))
1662 c = -1;
1663 else
1664 c = DECODE_CHAR (charset, code);
1665 }
1666 else if (method == CHARSET_METHOD_SUPERSET)
1667 {
1668 Lisp_Object parents;
1669
1670 parents = CHARSET_SUPERSET (charset);
1671 c = -1;
1672 for (; CONSP (parents); parents = XCDR (parents))
1673 {
1674 int id = XFIXNUM (XCAR (XCAR (parents)));
1675 int code_offset = XFIXNUM (XCDR (XCAR (parents)));
1676 unsigned this_code = code - code_offset;
1677
1678 charset = CHARSET_FROM_ID (id);
1679 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1680 break;
1681 }
1682 }
1683 else
1684 {
1685 char_index = CODE_POINT_TO_INDEX (charset, code);
1686 if (char_index < 0)
1687 return -1;
1688
1689 if (method == CHARSET_METHOD_MAP)
1690 {
1691 Lisp_Object decoder;
1692
1693 decoder = CHARSET_DECODER (charset);
1694 if (! VECTORP (decoder))
1695 {
1696 load_charset (charset, 1);
1697 decoder = CHARSET_DECODER (charset);
1698 }
1699 if (VECTORP (decoder))
1700 c = XFIXNUM (AREF (decoder, char_index));
1701 else
1702 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1703 }
1704 else
1705 {
1706 c = char_index + CHARSET_CODE_OFFSET (charset);
1707 if (CHARSET_UNIFIED_P (charset)
1708 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1709 {
1710
1711 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1712 c = maybe_unify_char (c, val);
1713 }
1714 }
1715 }
1716
1717 return c;
1718 }
1719
1720
1721 Lisp_Object charset_work;
1722
1723
1724
1725
1726
1727 unsigned
1728 encode_char (struct charset *charset, int c)
1729 {
1730 unsigned code;
1731 enum charset_method method = CHARSET_METHOD (charset);
1732
1733 if (CHARSET_UNIFIED_P (charset))
1734 {
1735 Lisp_Object deunifier;
1736 int code_index = -1;
1737
1738 deunifier = CHARSET_DEUNIFIER (charset);
1739 if (! CHAR_TABLE_P (deunifier))
1740 {
1741 load_charset (charset, 2);
1742 deunifier = CHARSET_DEUNIFIER (charset);
1743 }
1744 if (CHAR_TABLE_P (deunifier))
1745 {
1746 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1747
1748 if (FIXNUMP (deunified))
1749 code_index = XFIXNUM (deunified);
1750 }
1751 else
1752 {
1753 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1754 }
1755 if (code_index >= 0)
1756 c = CHARSET_CODE_OFFSET (charset) + code_index;
1757 }
1758
1759 if (method == CHARSET_METHOD_SUBSET)
1760 {
1761 Lisp_Object subset_info;
1762 struct charset *this_charset;
1763
1764 subset_info = CHARSET_SUBSET (charset);
1765 this_charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
1766 code = ENCODE_CHAR (this_charset, c);
1767 if (code == CHARSET_INVALID_CODE (this_charset)
1768 || code < XFIXNAT (AREF (subset_info, 1))
1769 || code > XFIXNAT (AREF (subset_info, 2)))
1770 return CHARSET_INVALID_CODE (charset);
1771 code += XFIXNUM (AREF (subset_info, 3));
1772 return code;
1773 }
1774
1775 if (method == CHARSET_METHOD_SUPERSET)
1776 {
1777 Lisp_Object parents;
1778
1779 parents = CHARSET_SUPERSET (charset);
1780 for (; CONSP (parents); parents = XCDR (parents))
1781 {
1782 int id = XFIXNUM (XCAR (XCAR (parents)));
1783 int code_offset = XFIXNUM (XCDR (XCAR (parents)));
1784 struct charset *this_charset = CHARSET_FROM_ID (id);
1785
1786 code = ENCODE_CHAR (this_charset, c);
1787 if (code != CHARSET_INVALID_CODE (this_charset))
1788 return code + code_offset;
1789 }
1790 return CHARSET_INVALID_CODE (charset);
1791 }
1792
1793 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1794 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1795 return CHARSET_INVALID_CODE (charset);
1796
1797 if (method == CHARSET_METHOD_MAP)
1798 {
1799 Lisp_Object encoder;
1800 Lisp_Object val;
1801
1802 encoder = CHARSET_ENCODER (charset);
1803 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1804 {
1805 load_charset (charset, 2);
1806 encoder = CHARSET_ENCODER (charset);
1807 }
1808 if (CHAR_TABLE_P (encoder))
1809 {
1810 val = CHAR_TABLE_REF (encoder, c);
1811 if (NILP (val))
1812 return CHARSET_INVALID_CODE (charset);
1813 code = XFIXNUM (val);
1814 if (! CHARSET_COMPACT_CODES_P (charset))
1815 code = INDEX_TO_CODE_POINT (charset, code);
1816 }
1817 else
1818 {
1819 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1820 code = INDEX_TO_CODE_POINT (charset, code);
1821 }
1822 }
1823 else
1824 {
1825 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1826
1827 code = INDEX_TO_CODE_POINT (charset, code_index);
1828 }
1829
1830 return code;
1831 }
1832
1833
1834 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
1835 doc:
1836
1837
1838
1839 )
1840 (Lisp_Object charset, Lisp_Object code_point)
1841 {
1842 int c, id;
1843 unsigned code;
1844 struct charset *charsetp;
1845
1846 CHECK_CHARSET_GET_ID (charset, id);
1847 code = cons_to_unsigned (code_point, UINT_MAX);
1848 charsetp = CHARSET_FROM_ID (id);
1849 c = DECODE_CHAR (charsetp, code);
1850 return (c >= 0 ? make_fixnum (c) : Qnil);
1851 }
1852
1853
1854 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
1855 doc:
1856
1857 )
1858 (Lisp_Object ch, Lisp_Object charset)
1859 {
1860 int c, id;
1861 unsigned code;
1862 struct charset *charsetp;
1863
1864 CHECK_CHARSET_GET_ID (charset, id);
1865 CHECK_CHARACTER (ch);
1866 c = XFIXNAT (ch);
1867 charsetp = CHARSET_FROM_ID (id);
1868 code = ENCODE_CHAR (charsetp, c);
1869 if (code == CHARSET_INVALID_CODE (charsetp))
1870 return Qnil;
1871
1872
1873
1874
1875
1876
1877
1878 return INT_TO_INTEGER (code);
1879 }
1880
1881
1882 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1883 doc:
1884
1885
1886
1887
1888 )
1889 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1890 {
1891 int id, dimension;
1892 struct charset *charsetp;
1893 unsigned code;
1894 int c;
1895
1896 CHECK_CHARSET_GET_ID (charset, id);
1897 charsetp = CHARSET_FROM_ID (id);
1898
1899 dimension = CHARSET_DIMENSION (charsetp);
1900 if (NILP (code1))
1901 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1902 ? 0 : CHARSET_MIN_CODE (charsetp));
1903 else
1904 {
1905 CHECK_FIXNAT (code1);
1906 if (XFIXNAT (code1) >= 0x100)
1907 args_out_of_range (make_fixnum (0xFF), code1);
1908 code = XFIXNAT (code1);
1909
1910 if (dimension > 1)
1911 {
1912 code <<= 8;
1913 if (NILP (code2))
1914 code |= charsetp->code_space[(dimension - 2) * 4];
1915 else
1916 {
1917 CHECK_FIXNAT (code2);
1918 if (XFIXNAT (code2) >= 0x100)
1919 args_out_of_range (make_fixnum (0xFF), code2);
1920 code |= XFIXNAT (code2);
1921 }
1922
1923 if (dimension > 2)
1924 {
1925 code <<= 8;
1926 if (NILP (code3))
1927 code |= charsetp->code_space[(dimension - 3) * 4];
1928 else
1929 {
1930 CHECK_FIXNAT (code3);
1931 if (XFIXNAT (code3) >= 0x100)
1932 args_out_of_range (make_fixnum (0xFF), code3);
1933 code |= XFIXNAT (code3);
1934 }
1935
1936 if (dimension > 3)
1937 {
1938 code <<= 8;
1939 if (NILP (code4))
1940 code |= charsetp->code_space[0];
1941 else
1942 {
1943 CHECK_FIXNAT (code4);
1944 if (XFIXNAT (code4) >= 0x100)
1945 args_out_of_range (make_fixnum (0xFF), code4);
1946 code |= XFIXNAT (code4);
1947 }
1948 }
1949 }
1950 }
1951 }
1952
1953 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1954 code &= 0x7F7F7F7F;
1955 c = DECODE_CHAR (charsetp, code);
1956 if (c < 0)
1957 error ("Invalid code(s)");
1958 return make_fixnum (c);
1959 }
1960
1961
1962
1963
1964
1965
1966 struct charset *
1967 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1968 {
1969 bool maybe_null = 0;
1970
1971 if (NILP (charset_list))
1972 charset_list = Vcharset_ordered_list;
1973 else
1974 maybe_null = 1;
1975
1976 while (CONSP (charset_list))
1977 {
1978 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
1979 unsigned code = ENCODE_CHAR (charset, c);
1980
1981 if (code != CHARSET_INVALID_CODE (charset))
1982 {
1983 if (code_return)
1984 *code_return = code;
1985 return charset;
1986 }
1987 charset_list = XCDR (charset_list);
1988 if (! maybe_null
1989 && c <= MAX_UNICODE_CHAR
1990 && EQ (charset_list, Vcharset_non_preferred_head))
1991 return CHARSET_FROM_ID (charset_unicode);
1992 }
1993 return (maybe_null ? NULL
1994 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
1995 : CHARSET_FROM_ID (charset_eight_bit));
1996 }
1997
1998
1999 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2000 doc:
2001
2002
2003
2004 )
2005 (Lisp_Object ch)
2006 {
2007 struct charset *charset;
2008 int c, dimension;
2009 unsigned code;
2010 Lisp_Object val;
2011
2012 CHECK_CHARACTER (ch);
2013 c = XFIXNAT (ch);
2014 charset = CHAR_CHARSET (c);
2015 if (! charset)
2016 emacs_abort ();
2017 code = ENCODE_CHAR (charset, c);
2018 if (code == CHARSET_INVALID_CODE (charset))
2019 emacs_abort ();
2020 dimension = CHARSET_DIMENSION (charset);
2021 for (val = Qnil; dimension > 0; dimension--)
2022 {
2023 val = Fcons (make_fixnum (code & 0xFF), val);
2024 code >>= 8;
2025 }
2026 return Fcons (CHARSET_NAME (charset), val);
2027 }
2028
2029
2030 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2031 doc:
2032
2033
2034
2035
2036 )
2037 (Lisp_Object ch, Lisp_Object restriction)
2038 {
2039 struct charset *charset;
2040
2041 CHECK_CHARACTER (ch);
2042 if (NILP (restriction))
2043 charset = CHAR_CHARSET (XFIXNUM (ch));
2044 else
2045 {
2046 if (CONSP (restriction))
2047 {
2048 int c = XFIXNAT (ch);
2049
2050 for (; CONSP (restriction); restriction = XCDR (restriction))
2051 {
2052 struct charset *rcharset;
2053
2054 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2055 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2056 return XCAR (restriction);
2057 }
2058 return Qnil;
2059 }
2060 restriction = coding_system_charset_list (restriction);
2061 charset = char_charset (XFIXNUM (ch), restriction, NULL);
2062 if (! charset)
2063 return Qnil;
2064 }
2065 return (CHARSET_NAME (charset));
2066 }
2067
2068
2069 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2070 doc:
2071
2072
2073 )
2074 (Lisp_Object pos)
2075 {
2076 Lisp_Object ch;
2077 struct charset *charset;
2078
2079 ch = Fchar_after (pos);
2080 if (! FIXNUMP (ch))
2081 return ch;
2082 charset = CHAR_CHARSET (XFIXNUM (ch));
2083 return (CHARSET_NAME (charset));
2084 }
2085
2086
2087 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2088 doc:
2089
2090
2091
2092
2093
2094
2095 )
2096 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2097 {
2098 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2099 int id = ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag,
2100 XFIXNAT (final_char));
2101 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2102 }
2103
2104
2105 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2106 0, 0, 0,
2107 doc:
2108
2109
2110 )
2111 (void)
2112 {
2113 if (temp_charset_work)
2114 {
2115 xfree (temp_charset_work);
2116 temp_charset_work = NULL;
2117 }
2118
2119 if (CHAR_TABLE_P (Vchar_unify_table))
2120 Foptimize_char_table (Vchar_unify_table, Qnil);
2121
2122 return Qnil;
2123 }
2124
2125 DEFUN ("charset-priority-list", Fcharset_priority_list,
2126 Scharset_priority_list, 0, 1, 0,
2127 doc:
2128 )
2129 (Lisp_Object highestp)
2130 {
2131 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2132
2133 if (!NILP (highestp))
2134 return CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (Fcar (list))));
2135
2136 while (!NILP (list))
2137 {
2138 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (XCAR (list)))), val);
2139 list = XCDR (list);
2140 }
2141 return Fnreverse (val);
2142 }
2143
2144 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2145 1, MANY, 0,
2146 doc:
2147 )
2148 (ptrdiff_t nargs, Lisp_Object *args)
2149 {
2150 Lisp_Object new_head, old_list;
2151 Lisp_Object list_2022, list_emacs_mule;
2152 ptrdiff_t i;
2153 int id;
2154
2155 old_list = Fcopy_sequence (Vcharset_ordered_list);
2156 new_head = Qnil;
2157 for (i = 0; i < nargs; i++)
2158 {
2159 CHECK_CHARSET_GET_ID (args[i], id);
2160 if (! NILP (Fmemq (make_fixnum (id), old_list)))
2161 {
2162 old_list = Fdelq (make_fixnum (id), old_list);
2163 new_head = Fcons (make_fixnum (id), new_head);
2164 }
2165 }
2166 Vcharset_non_preferred_head = old_list;
2167 Vcharset_ordered_list = nconc2 (Fnreverse (new_head), old_list);
2168
2169 charset_ordered_list_tick++;
2170
2171 charset_unibyte = -1;
2172 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2173 CONSP (old_list); old_list = XCDR (old_list))
2174 {
2175 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2176 list_2022 = Fcons (XCAR (old_list), list_2022);
2177 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2178 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2179 if (charset_unibyte < 0)
2180 {
2181 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (old_list)));
2182
2183 if (CHARSET_DIMENSION (charset) == 1
2184 && CHARSET_ASCII_COMPATIBLE_P (charset)
2185 && CHARSET_MAX_CHAR (charset) >= 0x80)
2186 charset_unibyte = CHARSET_ID (charset);
2187 }
2188 }
2189 Viso_2022_charset_list = Fnreverse (list_2022);
2190 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2191 if (charset_unibyte < 0)
2192 charset_unibyte = charset_iso_8859_1;
2193
2194 return Qnil;
2195 }
2196
2197 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2198 0, 1, 0,
2199 doc:
2200 )
2201 (Lisp_Object charset)
2202 {
2203 int id;
2204
2205 CHECK_CHARSET_GET_ID (charset, id);
2206 return make_fixnum (id);
2207 }
2208
2209 struct charset_sort_data
2210 {
2211 Lisp_Object charset;
2212 int id;
2213 ptrdiff_t priority;
2214 };
2215
2216 static int
2217 charset_compare (const void *d1, const void *d2)
2218 {
2219 const struct charset_sort_data *data1 = d1, *data2 = d2;
2220 if (data1->priority != data2->priority)
2221 return data1->priority < data2->priority ? -1 : 1;
2222 return 0;
2223 }
2224
2225 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2226 doc:
2227
2228 )
2229 (Lisp_Object charsets)
2230 {
2231 ptrdiff_t n = list_length (charsets), i, j;
2232 int done;
2233 Lisp_Object tail, elt, attrs;
2234 struct charset_sort_data *sort_data;
2235 int id, min_id = INT_MAX, max_id = INT_MIN;
2236 USE_SAFE_ALLOCA;
2237
2238 if (n == 0)
2239 return Qnil;
2240 SAFE_NALLOCA (sort_data, 1, n);
2241 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2242 {
2243 elt = XCAR (tail);
2244 CHECK_CHARSET_GET_ATTR (elt, attrs);
2245 sort_data[i].charset = elt;
2246 sort_data[i].id = id = XFIXNUM (CHARSET_ATTR_ID (attrs));
2247 if (id < min_id)
2248 min_id = id;
2249 if (id > max_id)
2250 max_id = id;
2251 }
2252 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2253 done < n && CONSP (tail); tail = XCDR (tail), i++)
2254 {
2255 elt = XCAR (tail);
2256 id = XFIXNAT (elt);
2257 if (id >= min_id && id <= max_id)
2258 for (j = 0; j < n; j++)
2259 if (sort_data[j].id == id)
2260 {
2261 sort_data[j].priority = i;
2262 done++;
2263 }
2264 }
2265 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2266 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2267 XSETCAR (tail, sort_data[i].charset);
2268 SAFE_FREE ();
2269 return charsets;
2270 }
2271
2272
2273 void
2274 init_charset (void)
2275 {
2276 Lisp_Object tempdir;
2277 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2278 if (! file_accessible_directory_p (tempdir))
2279 {
2280
2281
2282
2283 fprintf (stderr,
2284 ("Error: %s: %s\n"
2285 "Emacs will not function correctly "
2286 "without the character map files.\n"
2287 "%s"
2288 "Please check your installation!\n"),
2289 SDATA (tempdir), strerror (errno),
2290 (egetenv ("EMACSDATA")
2291 ? ("The EMACSDATA environment variable is set. "
2292 "Maybe it has the wrong value?\n")
2293 : ""));
2294 exit (1);
2295 }
2296
2297 Vcharset_map_path = list1 (tempdir);
2298 }
2299
2300
2301 void
2302 init_charset_once (void)
2303 {
2304 int i, j, k;
2305
2306 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2307 for (j = 0; j < ISO_MAX_CHARS; j++)
2308 for (k = 0; k < ISO_MAX_FINAL; k++)
2309 iso_charset_table[i][j][k] = -1;
2310
2311 PDUMPER_REMEMBER_SCALAR (iso_charset_table);
2312
2313 for (i = 0; i < 256; i++)
2314 emacs_mule_charset[i] = -1;
2315
2316 PDUMPER_REMEMBER_SCALAR (emacs_mule_charset);
2317
2318 charset_jisx0201_roman = -1;
2319 PDUMPER_REMEMBER_SCALAR (charset_jisx0201_roman);
2320
2321 charset_jisx0208_1978 = -1;
2322 PDUMPER_REMEMBER_SCALAR (charset_jisx0208_1978);
2323
2324 charset_jisx0208 = -1;
2325 PDUMPER_REMEMBER_SCALAR (charset_jisx0208);
2326
2327 charset_ksc5601 = -1;
2328 PDUMPER_REMEMBER_SCALAR (charset_ksc5601);
2329 }
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341 static struct charset charset_table_init[180];
2342
2343 void
2344 syms_of_charset (void)
2345 {
2346 DEFSYM (Qcharsetp, "charsetp");
2347
2348
2349 DEFSYM (Qascii, "ascii");
2350 DEFSYM (Qunicode, "unicode");
2351 DEFSYM (Qemacs, "emacs");
2352 DEFSYM (Qeight_bit, "eight-bit");
2353 DEFSYM (Qiso_8859_1, "iso-8859-1");
2354
2355 staticpro (&Vcharset_ordered_list);
2356 Vcharset_ordered_list = Qnil;
2357
2358 staticpro (&Viso_2022_charset_list);
2359 Viso_2022_charset_list = Qnil;
2360
2361 staticpro (&Vemacs_mule_charset_list);
2362 Vemacs_mule_charset_list = Qnil;
2363
2364 staticpro (&Vcharset_hash_table);
2365 Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
2366
2367 charset_table = charset_table_init;
2368 charset_table_size = ARRAYELTS (charset_table_init);
2369 PDUMPER_REMEMBER_SCALAR (charset_table_size);
2370 charset_table_used = 0;
2371 PDUMPER_REMEMBER_SCALAR (charset_table_used);
2372
2373 defsubr (&Scharsetp);
2374 defsubr (&Smap_charset_chars);
2375 defsubr (&Sdefine_charset_internal);
2376 defsubr (&Sdefine_charset_alias);
2377 defsubr (&Scharset_plist);
2378 defsubr (&Sset_charset_plist);
2379 defsubr (&Sunify_charset);
2380 defsubr (&Sget_unused_iso_final_char);
2381 defsubr (&Sdeclare_equiv_charset);
2382 defsubr (&Sfind_charset_region);
2383 defsubr (&Sfind_charset_string);
2384 defsubr (&Sdecode_char);
2385 defsubr (&Sencode_char);
2386 defsubr (&Ssplit_char);
2387 defsubr (&Smake_char);
2388 defsubr (&Schar_charset);
2389 defsubr (&Scharset_after);
2390 defsubr (&Siso_charset);
2391 defsubr (&Sclear_charset_maps);
2392 defsubr (&Scharset_priority_list);
2393 defsubr (&Sset_charset_priority);
2394 defsubr (&Scharset_id_internal);
2395 defsubr (&Ssort_charsets);
2396
2397 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2398 doc: );
2399 Vcharset_map_path = Qnil;
2400
2401 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2402 doc: );
2403 inhibit_load_charset_map = 0;
2404
2405 DEFVAR_LISP ("charset-list", Vcharset_list,
2406 doc: );
2407 Vcharset_list = Qnil;
2408
2409 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2410 doc:
2411
2412 );
2413 Vcurrent_iso639_language = Qnil;
2414
2415 charset_ascii
2416 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2417 0, 127, 'B', -1, 0, 1, 0, 0);
2418 PDUMPER_REMEMBER_SCALAR (charset_ascii);
2419
2420 charset_iso_8859_1
2421 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2422 0, 255, -1, -1, -1, 1, 0, 0);
2423 PDUMPER_REMEMBER_SCALAR (charset_iso_8859_1);
2424
2425 charset_unicode
2426 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2427 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2428 PDUMPER_REMEMBER_SCALAR (charset_unicode);
2429
2430 charset_emacs
2431 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2432 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2433 PDUMPER_REMEMBER_SCALAR (charset_emacs);
2434
2435 charset_eight_bit
2436 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2437 128, 255, -1, 0, -1, 0, 1,
2438 MAX_5_BYTE_CHAR + 1);
2439 PDUMPER_REMEMBER_SCALAR (charset_eight_bit);
2440
2441 charset_unibyte = charset_iso_8859_1;
2442 PDUMPER_REMEMBER_SCALAR (charset_unibyte);
2443 }