This source file includes following definitions.
- load_charset_map
- read_hex
- load_charset_map_from_file
- load_charset_map_from_vector
- load_charset
- DEFUN
- map_charset_for_dump
- map_charset_chars
- define_charset_internal
- DEFUN
- check_iso_charset_parameter
- string_xstring_p
- find_charsets_in_text
- maybe_unify_char
- decode_char
- encode_char
- char_charset
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- charset_compare
- DEFUN
- init_charset
- init_charset_once
- syms_of_charset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include <config.h>
30
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "buffer.h"
41 #include "sysstdio.h"
42 #include "pdumper.h"
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 Lisp_Object Vcharset_hash_table;
62
63
64 struct charset *charset_table;
65 int charset_table_size;
66 int charset_table_used;
67
68
69 int charset_ascii;
70 int charset_eight_bit;
71 static int charset_iso_8859_1;
72 int charset_unicode;
73 static int charset_emacs;
74
75
76 int charset_jisx0201_roman;
77 int charset_jisx0208_1978;
78 int charset_jisx0208;
79 int charset_ksc5601;
80
81
82 int charset_unibyte;
83
84
85 Lisp_Object Vcharset_ordered_list;
86
87
88
89 Lisp_Object Vcharset_non_preferred_head;
90
91
92
93 EMACS_UINT charset_ordered_list_tick;
94
95
96 Lisp_Object Viso_2022_charset_list;
97
98
99 Lisp_Object Vemacs_mule_charset_list;
100
101 int emacs_mule_charset[256];
102
103
104
105 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
106
107 #define CODE_POINT_TO_INDEX(charset, code) \
108 ((charset)->code_linear_p \
109 ? (int) ((code) - (charset)->min_code) \
110 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
111 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
112 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
113 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
114 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
115 * (charset)->code_space[11]) \
116 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
117 * (charset)->code_space[7]) \
118 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
119 * (charset)->code_space[3]) \
120 + (((code) & 0xFF) - (charset)->code_space[0]) \
121 - ((charset)->char_index_offset)) \
122 : -1)
123
124
125
126
127
128
129 #define INDEX_TO_CODE_POINT(charset, idx) \
130 ((charset)->code_linear_p \
131 ? (idx) + (charset)->min_code \
132 : (idx += (charset)->char_index_offset, \
133 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
134 | (((charset)->code_space[4] \
135 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
136 << 8) \
137 | (((charset)->code_space[8] \
138 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
139 << 16) \
140 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
141 << 24))))
142
143
144
145
146 static struct
147 {
148
149 struct charset *current;
150
151
152 short for_encoder;
153
154
155
156 int min_char, max_char;
157
158
159
160
161
162 int zero_index_char;
163
164 union {
165
166
167
168 int decoder[0x10000];
169
170
171
172
173
174 unsigned short encoder[0x20000];
175 } table;
176 } *temp_charset_work;
177
178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
179 do { \
180 if ((CODE) == 0) \
181 temp_charset_work->zero_index_char = (C); \
182 else if ((C) < 0x20000) \
183 temp_charset_work->table.encoder[(C)] = (CODE); \
184 else \
185 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
186 } while (0)
187
188 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
189 ((C) == temp_charset_work->zero_index_char ? 0 \
190 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
191 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
192 : temp_charset_work->table.encoder[(C) - 0x10000] \
193 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
194
195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
196 (temp_charset_work->table.decoder[(CODE)] = (C))
197
198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
199 (temp_charset_work->table.decoder[(CODE)])
200
201
202
203
204 bool charset_map_loaded;
205
206 struct charset_map_entries
207 {
208 struct {
209 unsigned from, to;
210 int c;
211 } entry[0x10000];
212 struct charset_map_entries *next;
213 };
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240 static void
241 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
242 {
243 Lisp_Object vec UNINIT;
244 Lisp_Object table UNINIT;
245 unsigned max_code = CHARSET_MAX_CODE (charset);
246 bool ascii_compatible_p = charset->ascii_compatible_p;
247 int min_char, max_char, nonascii_min_char;
248 int i;
249 unsigned char *fast_map = charset->fast_map;
250
251 if (n_entries <= 0)
252 return;
253
254 if (control_flag)
255 {
256 if (! inhibit_load_charset_map)
257 {
258 if (control_flag == 1)
259 {
260 if (charset->method == CHARSET_METHOD_MAP)
261 {
262 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
263
264 vec = make_vector (n, make_fixnum (-1));
265 set_charset_attr (charset, charset_decoder, vec);
266 }
267 else
268 {
269 char_table_set_range (Vchar_unify_table,
270 charset->min_char, charset->max_char,
271 Qnil);
272 }
273 }
274 else
275 {
276 table = Fmake_char_table (Qnil, Qnil);
277 set_charset_attr (charset,
278 (charset->method == CHARSET_METHOD_MAP
279 ? charset_encoder : charset_deunifier),
280 table);
281 }
282 }
283 else
284 {
285 if (! temp_charset_work)
286 temp_charset_work = xmalloc (sizeof *temp_charset_work);
287 if (control_flag == 1)
288 {
289 memset (temp_charset_work->table.decoder, -1,
290 sizeof (int) * 0x10000);
291 }
292 else
293 {
294 memset (temp_charset_work->table.encoder, 0,
295 sizeof (unsigned short) * 0x20000);
296 temp_charset_work->zero_index_char = -1;
297 }
298 temp_charset_work->current = charset;
299 temp_charset_work->for_encoder = (control_flag == 2);
300 control_flag += 2;
301 }
302 charset_map_loaded = 1;
303 }
304
305 min_char = max_char = entries->entry[0].c;
306 nonascii_min_char = MAX_CHAR;
307 for (i = 0; i < n_entries; i++)
308 {
309 unsigned from, to;
310 int from_index, to_index, lim_index;
311 int from_c, to_c;
312 int idx = i % 0x10000;
313
314 if (i > 0 && idx == 0)
315 entries = entries->next;
316 from = entries->entry[idx].from;
317 to = entries->entry[idx].to;
318 from_c = entries->entry[idx].c;
319 from_index = CODE_POINT_TO_INDEX (charset, from);
320 if (from == to)
321 {
322 to_index = from_index;
323 to_c = from_c;
324 }
325 else
326 {
327 to_index = CODE_POINT_TO_INDEX (charset, to);
328 to_c = from_c + (to_index - from_index);
329 }
330 if (from_index < 0 || to_index < 0)
331 continue;
332 lim_index = to_index + 1;
333
334 if (to_c > max_char)
335 max_char = to_c;
336 else if (from_c < min_char)
337 min_char = from_c;
338
339 if (control_flag == 1)
340 {
341 if (charset->method == CHARSET_METHOD_MAP)
342 for (; from_index < lim_index; from_index++, from_c++)
343 ASET (vec, from_index, make_fixnum (from_c));
344 else
345 for (; from_index < lim_index; from_index++, from_c++)
346 CHAR_TABLE_SET (Vchar_unify_table,
347 CHARSET_CODE_OFFSET (charset) + from_index,
348 make_fixnum (from_c));
349 }
350 else if (control_flag == 2)
351 {
352 if (charset->method == CHARSET_METHOD_MAP
353 && CHARSET_COMPACT_CODES_P (charset))
354 for (; from_index < lim_index; from_index++, from_c++)
355 {
356 unsigned code = from_index;
357 code = INDEX_TO_CODE_POINT (charset, code);
358
359 if (NILP (CHAR_TABLE_REF (table, from_c)))
360 CHAR_TABLE_SET (table, from_c, make_fixnum (code));
361 }
362 else
363 for (; from_index < lim_index; from_index++, from_c++)
364 {
365 if (NILP (CHAR_TABLE_REF (table, from_c)))
366 CHAR_TABLE_SET (table, from_c, make_fixnum (from_index));
367 }
368 }
369 else if (control_flag == 3)
370 for (; from_index < lim_index; from_index++, from_c++)
371 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
372 else if (control_flag == 4)
373 for (; from_index < lim_index; from_index++, from_c++)
374 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
375 else
376 {
377 if (ascii_compatible_p)
378 {
379 if (! ASCII_CHAR_P (from_c))
380 {
381 if (from_c < nonascii_min_char)
382 nonascii_min_char = from_c;
383 }
384 else if (! ASCII_CHAR_P (to_c))
385 {
386 nonascii_min_char = 0x80;
387 }
388 }
389
390 for (; from_c <= to_c; from_c++)
391 CHARSET_FAST_MAP_SET (from_c, fast_map);
392 }
393 }
394
395 if (control_flag == 0)
396 {
397 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
398 ? nonascii_min_char : min_char);
399 CHARSET_MAX_CHAR (charset) = max_char;
400 }
401 else if (control_flag == 4)
402 {
403 temp_charset_work->min_char = min_char;
404 temp_charset_work->max_char = max_char;
405 }
406 }
407
408
409
410
411
412
413
414
415 static unsigned
416 read_hex (FILE *fp, int lookahead, int *terminator, bool *overflow)
417 {
418 int c = lookahead < 0 ? getc (fp) : lookahead;
419
420 while (true)
421 {
422 if (c == '#')
423 do
424 c = getc (fp);
425 while (0 <= c && c != '\n');
426 else if (c == '0')
427 {
428 c = getc (fp);
429 if (c < 0 || c == 'x')
430 break;
431 }
432 if (c < 0)
433 break;
434 c = getc (fp);
435 }
436
437 unsigned n = 0;
438 bool v = false;
439
440 if (0 <= c)
441 while (true)
442 {
443 c = getc (fp);
444 int digit = char_hexdigit (c);
445 if (digit < 0)
446 break;
447 v |= INT_LEFT_SHIFT_OVERFLOW (n, 4);
448 n = (n << 4) + digit;
449 }
450
451 *terminator = c;
452 *overflow |= v;
453 return n;
454 }
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 static void
474 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
475 int control_flag)
476 {
477 unsigned min_code = CHARSET_MIN_CODE (charset);
478 unsigned max_code = CHARSET_MAX_CODE (charset);
479 int fd;
480 FILE *fp;
481 struct charset_map_entries *head, *entries;
482 int n_entries;
483 AUTO_STRING (map, ".map");
484 AUTO_STRING (txt, ".txt");
485 AUTO_LIST2 (suffixes, map, txt);
486 specpdl_ref count = SPECPDL_INDEX ();
487 record_unwind_protect_nothing ();
488 specbind (Qfile_name_handler_alist, Qnil);
489 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false, false);
490 fp = fd < 0 ? 0 : fdopen (fd, "r");
491 if (!fp)
492 {
493 int open_errno = errno;
494 emacs_close (fd);
495 report_file_errno ("Loading charset map", mapfile, open_errno);
496 }
497 set_unwind_protect_ptr (count, fclose_unwind, fp);
498 unbind_to (specpdl_ref_add (count, 1), Qnil);
499
500
501
502 head = record_xmalloc (sizeof *head);
503 entries = head;
504 memset (entries, 0, sizeof (struct charset_map_entries));
505
506 n_entries = 0;
507 int ch = -1;
508 while (true)
509 {
510 bool overflow = false;
511 unsigned from = read_hex (fp, ch, &ch, &overflow), to;
512 if (ch < 0)
513 break;
514 if (ch == '-')
515 {
516 to = read_hex (fp, -1, &ch, &overflow);
517 if (ch < 0)
518 break;
519 }
520 else
521 {
522 to = from;
523 ch = -1;
524 }
525 unsigned c = read_hex (fp, ch, &ch, &overflow);
526 if (ch < 0)
527 break;
528
529 if (overflow)
530 continue;
531 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
532 continue;
533
534 if (n_entries == 0x10000)
535 {
536 entries->next = record_xmalloc (sizeof *entries->next);
537 entries = entries->next;
538 memset (entries, 0, sizeof (struct charset_map_entries));
539 n_entries = 0;
540 }
541 int idx = n_entries;
542 entries->entry[idx].from = from;
543 entries->entry[idx].to = to;
544 entries->entry[idx].c = c;
545 n_entries++;
546 }
547 fclose (fp);
548 clear_unwind_protect (count);
549
550 load_charset_map (charset, head, n_entries, control_flag);
551 unbind_to (count, Qnil);
552 }
553
554 static void
555 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
556 {
557 unsigned min_code = CHARSET_MIN_CODE (charset);
558 unsigned max_code = CHARSET_MAX_CODE (charset);
559 struct charset_map_entries *head, *entries;
560 int n_entries;
561 int len = ASIZE (vec);
562 int i;
563 USE_SAFE_ALLOCA;
564
565 if (len % 2 == 1)
566 {
567 add_to_log ("Failure in loading charset map: %V", vec);
568 return;
569 }
570
571
572
573 head = SAFE_ALLOCA (sizeof *head);
574 entries = head;
575 memset (entries, 0, sizeof (struct charset_map_entries));
576
577 n_entries = 0;
578 for (i = 0; i < len; i += 2)
579 {
580 Lisp_Object val, val2;
581 unsigned from, to;
582 EMACS_INT c;
583 int idx;
584
585 val = AREF (vec, i);
586 if (CONSP (val))
587 {
588 val2 = XCDR (val);
589 val = XCAR (val);
590 from = XFIXNAT (val);
591 to = XFIXNAT (val2);
592 }
593 else
594 from = to = XFIXNAT (val);
595 val = AREF (vec, i + 1);
596 CHECK_FIXNAT (val);
597 c = XFIXNAT (val);
598
599 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
600 continue;
601
602 if (n_entries > 0 && (n_entries % 0x10000) == 0)
603 {
604 entries->next = SAFE_ALLOCA (sizeof *entries->next);
605 entries = entries->next;
606 memset (entries, 0, sizeof (struct charset_map_entries));
607 }
608 idx = n_entries % 0x10000;
609 entries->entry[idx].from = from;
610 entries->entry[idx].to = to;
611 entries->entry[idx].c = c;
612 n_entries++;
613 }
614
615 load_charset_map (charset, head, n_entries, control_flag);
616 SAFE_FREE ();
617 }
618
619
620
621
622
623 static void
624 load_charset (struct charset *charset, int control_flag)
625 {
626 Lisp_Object map;
627
628 if (inhibit_load_charset_map
629 && temp_charset_work
630 && charset == temp_charset_work->current
631 && ((control_flag == 2) == temp_charset_work->for_encoder))
632 return;
633
634 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
635 map = CHARSET_MAP (charset);
636 else
637 {
638 if (! CHARSET_UNIFIED_P (charset))
639 emacs_abort ();
640 map = CHARSET_UNIFY_MAP (charset);
641 }
642 if (STRINGP (map))
643 load_charset_map_from_file (charset, map, control_flag);
644 else
645 load_charset_map_from_vector (charset, map, control_flag);
646 }
647
648
649 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
650 doc: )
651 (Lisp_Object object)
652 {
653 return (CHARSETP (object) ? Qt : Qnil);
654 }
655
656
657 static void
658 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
659 Lisp_Object function, Lisp_Object arg,
660 unsigned int from, unsigned int to)
661 {
662 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
663 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
664 Lisp_Object range = Fcons (Qnil, Qnil);
665 int c, stop;
666
667 c = temp_charset_work->min_char;
668 stop = (temp_charset_work->max_char < 0x20000
669 ? temp_charset_work->max_char : 0xFFFF);
670
671 while (1)
672 {
673 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
674
675 if (idx >= from_idx && idx <= to_idx)
676 {
677 if (NILP (XCAR (range)))
678 XSETCAR (range, make_fixnum (c));
679 }
680 else if (! NILP (XCAR (range)))
681 {
682 XSETCDR (range, make_fixnum (c - 1));
683 if (c_function)
684 (*c_function) (arg, range);
685 else
686 call2 (function, range, arg);
687 XSETCAR (range, Qnil);
688 }
689 if (c == stop)
690 {
691 if (c == temp_charset_work->max_char)
692 {
693 if (! NILP (XCAR (range)))
694 {
695 XSETCDR (range, make_fixnum (c));
696 if (c_function)
697 (*c_function) (arg, range);
698 else
699 call2 (function, range, arg);
700 }
701 break;
702 }
703 c = 0x1FFFF;
704 stop = temp_charset_work->max_char;
705 }
706 c++;
707 }
708 }
709
710 void
711 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
712 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
713 {
714 Lisp_Object range;
715 bool partial = (from > CHARSET_MIN_CODE (charset)
716 || to < CHARSET_MAX_CODE (charset));
717
718 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
719 {
720 int from_idx = CODE_POINT_TO_INDEX (charset, from);
721 int to_idx = CODE_POINT_TO_INDEX (charset, to);
722 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
723 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
724
725 if (CHARSET_UNIFIED_P (charset))
726 {
727 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
728 load_charset (charset, 2);
729 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
730 map_char_table_for_charset (c_function, function,
731 CHARSET_DEUNIFIER (charset), arg,
732 partial ? charset : NULL, from, to);
733 else
734 map_charset_for_dump (c_function, function, arg, from, to);
735 }
736
737 range = Fcons (make_fixnum (from_c), make_fixnum (to_c));
738 if (NILP (function))
739 (*c_function) (arg, range);
740 else
741 call2 (function, range, arg);
742 }
743 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
744 {
745 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
746 load_charset (charset, 2);
747 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
748 map_char_table_for_charset (c_function, function,
749 CHARSET_ENCODER (charset), arg,
750 partial ? charset : NULL, from, to);
751 else
752 map_charset_for_dump (c_function, function, arg, from, to);
753 }
754 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
755 {
756 Lisp_Object subset_info;
757 int offset;
758
759 subset_info = CHARSET_SUBSET (charset);
760 charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
761 offset = XFIXNUM (AREF (subset_info, 3));
762 from -= offset;
763 if (from < XFIXNAT (AREF (subset_info, 1)))
764 from = XFIXNAT (AREF (subset_info, 1));
765 to -= offset;
766 if (to > XFIXNAT (AREF (subset_info, 2)))
767 to = XFIXNAT (AREF (subset_info, 2));
768 map_charset_chars (c_function, function, arg, charset, from, to);
769 }
770 else
771 {
772 Lisp_Object parents;
773
774 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
775 parents = XCDR (parents))
776 {
777 int offset;
778 unsigned this_from, this_to;
779
780 charset = CHARSET_FROM_ID (XFIXNAT (XCAR (XCAR (parents))));
781 offset = XFIXNUM (XCDR (XCAR (parents)));
782 this_from = from > offset ? from - offset : 0;
783 this_to = to > offset ? to - offset : 0;
784 if (this_from < CHARSET_MIN_CODE (charset))
785 this_from = CHARSET_MIN_CODE (charset);
786 if (this_to > CHARSET_MAX_CODE (charset))
787 this_to = CHARSET_MAX_CODE (charset);
788 map_charset_chars (c_function, function, arg, charset,
789 this_from, this_to);
790 }
791 }
792 }
793
794 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
795 doc:
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810 )
811 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
812 {
813 struct charset *cs;
814 unsigned from, to;
815
816 CHECK_CHARSET_GET_CHARSET (charset, cs);
817 if (NILP (from_code))
818 from = CHARSET_MIN_CODE (cs);
819 else
820 {
821 from = XFIXNUM (from_code);
822 if (from < CHARSET_MIN_CODE (cs))
823 from = CHARSET_MIN_CODE (cs);
824 }
825 if (NILP (to_code))
826 to = CHARSET_MAX_CODE (cs);
827 else
828 {
829 to = XFIXNUM (to_code);
830 if (to > CHARSET_MAX_CODE (cs))
831 to = CHARSET_MAX_CODE (cs);
832 }
833 map_charset_chars (NULL, function, arg, cs, from, to);
834 return Qnil;
835 }
836
837
838
839
840
841
842
843 DEFUN ("define-charset-internal", Fdefine_charset_internal,
844 Sdefine_charset_internal, charset_arg_max, MANY, 0,
845 doc:
846 )
847 (ptrdiff_t nargs, Lisp_Object *args)
848 {
849
850 Lisp_Object attrs;
851 Lisp_Object val;
852 Lisp_Object hash_code;
853 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
854 int i, j;
855 struct charset charset;
856 int id;
857 int dimension;
858 bool new_definition_p;
859 int nchars;
860
861 memset (&charset, 0, sizeof (charset));
862
863 if (nargs != charset_arg_max)
864 Fsignal (Qwrong_number_of_arguments,
865 Fcons (intern ("define-charset-internal"),
866 make_fixnum (nargs)));
867
868 attrs = make_nil_vector (charset_attr_max);
869
870 CHECK_SYMBOL (args[charset_arg_name]);
871 ASET (attrs, charset_name, args[charset_arg_name]);
872
873 val = args[charset_arg_code_space];
874 for (i = 0, dimension = 0, nchars = 1; ; i++)
875 {
876 Lisp_Object min_byte_obj = Faref (val, make_fixnum (i * 2));
877 Lisp_Object max_byte_obj = Faref (val, make_fixnum (i * 2 + 1));
878 int min_byte = check_integer_range (min_byte_obj, 0, 255);
879 int max_byte = check_integer_range (max_byte_obj, min_byte, 255);
880 charset.code_space[i * 4] = min_byte;
881 charset.code_space[i * 4 + 1] = max_byte;
882 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
883 if (max_byte > 0)
884 dimension = i + 1;
885 if (i == 3)
886 break;
887 nchars *= charset.code_space[i * 4 + 2];
888 charset.code_space[i * 4 + 3] = nchars;
889 }
890
891 val = args[charset_arg_dimension];
892 charset.dimension
893 = !NILP (val) ? check_integer_range (val, 1, 4) : dimension;
894
895 charset.code_linear_p
896 = (charset.dimension == 1
897 || (charset.code_space[2] == 256
898 && (charset.dimension == 2
899 || (charset.code_space[6] == 256
900 && (charset.dimension == 3
901 || charset.code_space[10] == 256)))));
902
903 if (! charset.code_linear_p)
904 {
905 charset.code_space_mask = xzalloc (256);
906 for (i = 0; i < 4; i++)
907 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
908 j++)
909 charset.code_space_mask[j] |= (1 << i);
910 }
911
912 charset.iso_chars_96 = charset.code_space[2] == 96;
913
914 charset.min_code = (charset.code_space[0]
915 | (charset.code_space[4] << 8)
916 | (charset.code_space[8] << 16)
917 | ((unsigned) charset.code_space[12] << 24));
918 charset.max_code = (charset.code_space[1]
919 | (charset.code_space[5] << 8)
920 | (charset.code_space[9] << 16)
921 | ((unsigned) charset.code_space[13] << 24));
922 charset.char_index_offset = 0;
923
924 val = args[charset_arg_min_code];
925 if (! NILP (val))
926 {
927 unsigned code = cons_to_unsigned (val, UINT_MAX);
928
929 if (code < charset.min_code
930 || code > charset.max_code)
931 args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
932 INT_TO_INTEGER (charset.max_code), val);
933 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
934 charset.min_code = code;
935 }
936
937 val = args[charset_arg_max_code];
938 if (! NILP (val))
939 {
940 unsigned code = cons_to_unsigned (val, UINT_MAX);
941
942 if (code < charset.min_code
943 || code > charset.max_code)
944 args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
945 INT_TO_INTEGER (charset.max_code), val);
946 charset.max_code = code;
947 }
948
949 charset.compact_codes_p = charset.max_code < 0x10000;
950
951 val = args[charset_arg_invalid_code];
952 if (NILP (val))
953 {
954 if (charset.min_code > 0)
955 charset.invalid_code = 0;
956 else
957 {
958 if (charset.max_code < UINT_MAX)
959 charset.invalid_code = charset.max_code + 1;
960 else
961 error ("Attribute :invalid-code must be specified");
962 }
963 }
964 else
965 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
966
967 val = args[charset_arg_iso_final];
968 if (NILP (val))
969 charset.iso_final = -1;
970 else
971 {
972 CHECK_FIXNUM (val);
973 if (XFIXNUM (val) < '0' || XFIXNUM (val) > 127)
974 error ("Invalid iso-final-char: %"pI"d", XFIXNUM (val));
975 charset.iso_final = XFIXNUM (val);
976 }
977
978 val = args[charset_arg_iso_revision];
979 charset.iso_revision = !NILP (val) ? check_integer_range (val, -1, 63) : -1;
980
981 val = args[charset_arg_emacs_mule_id];
982 if (NILP (val))
983 charset.emacs_mule_id = -1;
984 else
985 {
986 CHECK_FIXNAT (val);
987 if ((XFIXNUM (val) > 0 && XFIXNUM (val) <= 128) || XFIXNUM (val) >= 256)
988 error ("Invalid emacs-mule-id: %"pI"d", XFIXNUM (val));
989 charset.emacs_mule_id = XFIXNUM (val);
990 }
991
992 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
993
994 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
995
996 charset.unified_p = 0;
997
998 memset (charset.fast_map, 0, sizeof (charset.fast_map));
999
1000 if (! NILP (args[charset_arg_code_offset]))
1001 {
1002 val = args[charset_arg_code_offset];
1003 CHECK_CHARACTER (val);
1004
1005 charset.method = CHARSET_METHOD_OFFSET;
1006 charset.code_offset = XFIXNUM (val);
1007
1008 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1009 if (MAX_CHAR - charset.code_offset < i)
1010 error ("Unsupported max char: %d", charset.max_char);
1011 charset.max_char = i + charset.code_offset;
1012 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1013 charset.min_char = i + charset.code_offset;
1014
1015 i = (charset.min_char >> 7) << 7;
1016 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1017 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1018 i = (i >> 12) << 12;
1019 for (; i <= charset.max_char; i += 0x1000)
1020 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1021 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1022 charset.ascii_compatible_p = 1;
1023 }
1024 else if (! NILP (args[charset_arg_map]))
1025 {
1026 val = args[charset_arg_map];
1027 ASET (attrs, charset_map, val);
1028 charset.method = CHARSET_METHOD_MAP;
1029 }
1030 else if (! NILP (args[charset_arg_subset]))
1031 {
1032 Lisp_Object parent;
1033 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1034 struct charset *parent_charset;
1035
1036 val = args[charset_arg_subset];
1037 parent = Fcar (val);
1038 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1039 parent_min_code = Fnth (make_fixnum (1), val);
1040 CHECK_FIXNAT (parent_min_code);
1041 parent_max_code = Fnth (make_fixnum (2), val);
1042 CHECK_FIXNAT (parent_max_code);
1043 parent_code_offset = Fnth (make_fixnum (3), val);
1044 CHECK_FIXNUM (parent_code_offset);
1045 ASET (attrs, charset_subset,
1046 CALLN (Fvector, make_fixnum (parent_charset->id),
1047 parent_min_code, parent_max_code, parent_code_offset));
1048
1049 charset.method = CHARSET_METHOD_SUBSET;
1050
1051
1052
1053 memcpy (charset.fast_map, parent_charset->fast_map,
1054 sizeof charset.fast_map);
1055
1056
1057 charset.min_char = parent_charset->min_char;
1058 charset.max_char = parent_charset->max_char;
1059 }
1060 else if (! NILP (args[charset_arg_superset]))
1061 {
1062 val = args[charset_arg_superset];
1063 charset.method = CHARSET_METHOD_SUPERSET;
1064 val = Fcopy_sequence (val);
1065 ASET (attrs, charset_superset, val);
1066
1067 charset.min_char = MAX_CHAR;
1068 charset.max_char = 0;
1069 for (; ! NILP (val); val = Fcdr (val))
1070 {
1071 Lisp_Object elt, car_part, cdr_part;
1072 int this_id, offset;
1073 struct charset *this_charset;
1074
1075 elt = Fcar (val);
1076 if (CONSP (elt))
1077 {
1078 car_part = XCAR (elt);
1079 cdr_part = XCDR (elt);
1080 CHECK_CHARSET_GET_ID (car_part, this_id);
1081 offset = check_integer_range (cdr_part, INT_MIN, INT_MAX);
1082 }
1083 else
1084 {
1085 CHECK_CHARSET_GET_ID (elt, this_id);
1086 offset = 0;
1087 }
1088 XSETCAR (val, Fcons (make_fixnum (this_id), make_fixnum (offset)));
1089
1090 this_charset = CHARSET_FROM_ID (this_id);
1091 if (charset.min_char > this_charset->min_char)
1092 charset.min_char = this_charset->min_char;
1093 if (charset.max_char < this_charset->max_char)
1094 charset.max_char = this_charset->max_char;
1095 for (i = 0; i < 190; i++)
1096 charset.fast_map[i] |= this_charset->fast_map[i];
1097 }
1098 }
1099 else
1100 error ("None of :code-offset, :map, :parents are specified");
1101
1102 val = args[charset_arg_unify_map];
1103 if (! NILP (val) && !STRINGP (val))
1104 CHECK_VECTOR (val);
1105 ASET (attrs, charset_unify_map, val);
1106
1107 CHECK_LIST (args[charset_arg_plist]);
1108 ASET (attrs, charset_plist, args[charset_arg_plist]);
1109
1110 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1111 &hash_code);
1112 if (charset.hash_index >= 0)
1113 {
1114 new_definition_p = 0;
1115 id = XFIXNAT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1116 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1117 }
1118 else
1119 {
1120 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1121 hash_code);
1122 if (charset_table_used == charset_table_size)
1123 {
1124
1125
1126
1127
1128
1129 int old_size = charset_table_size;
1130 ptrdiff_t new_size = old_size;
1131 struct charset *new_table =
1132 xpalloc (0, &new_size, 1,
1133 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1134 sizeof *charset_table);
1135 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1136 charset_table = new_table;
1137 charset_table_size = new_size;
1138
1139
1140
1141
1142
1143
1144
1145 }
1146 id = charset_table_used++;
1147 new_definition_p = 1;
1148 }
1149
1150 ASET (attrs, charset_id, make_fixnum (id));
1151 charset.id = id;
1152 charset_table[id] = charset;
1153
1154 if (charset.method == CHARSET_METHOD_MAP)
1155 {
1156 load_charset (&charset, 0);
1157 charset_table[id] = charset;
1158 }
1159
1160 if (charset.iso_final >= 0)
1161 {
1162 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1163 charset.iso_final) = id;
1164 if (new_definition_p)
1165 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, list1i (id));
1166 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1167 charset_jisx0201_roman = id;
1168 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1169 charset_jisx0208_1978 = id;
1170 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1171 charset_jisx0208 = id;
1172 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1173 charset_ksc5601 = id;
1174 }
1175
1176 if (charset.emacs_mule_id >= 0)
1177 {
1178 emacs_mule_charset[charset.emacs_mule_id] = id;
1179 if (charset.emacs_mule_id < 0xA0)
1180 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1181 else
1182 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1183 if (new_definition_p)
1184 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1185 list1i (id));
1186 }
1187
1188 if (new_definition_p)
1189 {
1190 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1191 if (charset.supplementary_p)
1192 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, list1i (id));
1193 else
1194 {
1195 Lisp_Object tail;
1196
1197 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1198 {
1199 struct charset *cs = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
1200
1201 if (cs->supplementary_p)
1202 break;
1203 }
1204 if (EQ (tail, Vcharset_ordered_list))
1205 Vcharset_ordered_list = Fcons (make_fixnum (id),
1206 Vcharset_ordered_list);
1207 else if (NILP (tail))
1208 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1209 list1i (id));
1210 else
1211 {
1212 val = Fcons (XCAR (tail), XCDR (tail));
1213 XSETCDR (tail, val);
1214 XSETCAR (tail, make_fixnum (id));
1215 }
1216 }
1217 charset_ordered_list_tick++;
1218 }
1219
1220 return Qnil;
1221 }
1222
1223
1224
1225
1226
1227
1228
1229 static int
1230 define_charset_internal (Lisp_Object name,
1231 int dimension,
1232 const char *code_space_chars,
1233 unsigned min_code, unsigned max_code,
1234 int iso_final, int iso_revision, int emacs_mule_id,
1235 bool ascii_compatible, bool supplementary,
1236 int code_offset)
1237 {
1238 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1239 Lisp_Object args[charset_arg_max];
1240 Lisp_Object val;
1241 int i;
1242
1243 args[charset_arg_name] = name;
1244 args[charset_arg_dimension] = make_fixnum (dimension);
1245 val = make_uninit_vector (8);
1246 for (i = 0; i < 8; i++)
1247 ASET (val, i, make_fixnum (code_space[i]));
1248 args[charset_arg_code_space] = val;
1249 args[charset_arg_min_code] = make_fixnum (min_code);
1250 args[charset_arg_max_code] = make_fixnum (max_code);
1251 args[charset_arg_iso_final]
1252 = (iso_final < 0 ? Qnil : make_fixnum (iso_final));
1253 args[charset_arg_iso_revision] = make_fixnum (iso_revision);
1254 args[charset_arg_emacs_mule_id]
1255 = (emacs_mule_id < 0 ? Qnil : make_fixnum (emacs_mule_id));
1256 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1257 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1258 args[charset_arg_invalid_code] = Qnil;
1259 args[charset_arg_code_offset] = make_fixnum (code_offset);
1260 args[charset_arg_map] = Qnil;
1261 args[charset_arg_subset] = Qnil;
1262 args[charset_arg_superset] = Qnil;
1263 args[charset_arg_unify_map] = Qnil;
1264
1265 args[charset_arg_plist] =
1266 list (QCname,
1267 args[charset_arg_name],
1268 intern_c_string (":dimension"),
1269 args[charset_arg_dimension],
1270 intern_c_string (":code-space"),
1271 args[charset_arg_code_space],
1272 intern_c_string (":iso-final-char"),
1273 args[charset_arg_iso_final],
1274 intern_c_string (":emacs-mule-id"),
1275 args[charset_arg_emacs_mule_id],
1276 QCascii_compatible_p,
1277 args[charset_arg_ascii_compatible_p],
1278 intern_c_string (":code-offset"),
1279 args[charset_arg_code_offset]);
1280 Fdefine_charset_internal (charset_arg_max, args);
1281
1282 return XFIXNUM (CHARSET_SYMBOL_ID (name));
1283 }
1284
1285
1286 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1287 Sdefine_charset_alias, 2, 2, 0,
1288 doc: )
1289 (Lisp_Object alias, Lisp_Object charset)
1290 {
1291 Lisp_Object attr;
1292
1293 CHECK_CHARSET_GET_ATTR (charset, attr);
1294 Fputhash (alias, attr, Vcharset_hash_table);
1295 Vcharset_list = Fcons (alias, Vcharset_list);
1296 return Qnil;
1297 }
1298
1299
1300 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1301 doc: )
1302 (Lisp_Object charset)
1303 {
1304 Lisp_Object attrs;
1305
1306 CHECK_CHARSET_GET_ATTR (charset, attrs);
1307 return CHARSET_ATTR_PLIST (attrs);
1308 }
1309
1310
1311 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1312 doc: )
1313 (Lisp_Object charset, Lisp_Object plist)
1314 {
1315 Lisp_Object attrs;
1316
1317 CHECK_CHARSET_GET_ATTR (charset, attrs);
1318 ASET (attrs, charset_plist, plist);
1319 return plist;
1320 }
1321
1322
1323 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1324 doc:
1325
1326
1327
1328
1329
1330
1331
1332 )
1333 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1334 {
1335 int id;
1336 struct charset *cs;
1337
1338 CHECK_CHARSET_GET_ID (charset, id);
1339 cs = CHARSET_FROM_ID (id);
1340 if (NILP (deunify)
1341 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1342 : ! CHARSET_UNIFIED_P (cs))
1343 return Qnil;
1344
1345 CHARSET_UNIFIED_P (cs) = 0;
1346 if (NILP (deunify))
1347 {
1348 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1349 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1350 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1351 if (NILP (unify_map))
1352 unify_map = CHARSET_UNIFY_MAP (cs);
1353 else
1354 {
1355 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1356 signal_error ("Bad unify-map", unify_map);
1357 set_charset_attr (cs, charset_unify_map, unify_map);
1358 }
1359 if (NILP (Vchar_unify_table))
1360 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1361 char_table_set_range (Vchar_unify_table,
1362 cs->min_char, cs->max_char, charset);
1363 CHARSET_UNIFIED_P (cs) = 1;
1364 }
1365 else if (CHAR_TABLE_P (Vchar_unify_table))
1366 {
1367 unsigned min_code = CHARSET_MIN_CODE (cs);
1368 unsigned max_code = CHARSET_MAX_CODE (cs);
1369 int min_char = DECODE_CHAR (cs, min_code);
1370 int max_char = DECODE_CHAR (cs, max_code);
1371
1372 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1373 }
1374
1375 return Qnil;
1376 }
1377
1378
1379
1380
1381 static bool
1382 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1383 Lisp_Object final_char)
1384 {
1385 CHECK_FIXNUM (dimension);
1386 CHECK_FIXNUM (chars);
1387 CHECK_CHARACTER (final_char);
1388
1389 if (! (1 <= XFIXNUM (dimension) && XFIXNUM (dimension) <= 3))
1390 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1391 XFIXNUM (dimension));
1392
1393 bool chars_flag = XFIXNUM (chars) == 96;
1394 if (! (chars_flag || XFIXNUM (chars) == 94))
1395 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XFIXNUM (chars));
1396
1397 int final_ch = XFIXNAT (final_char);
1398 if (! ('0' <= final_ch && final_ch <= '~'))
1399 error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
1400
1401 return chars_flag;
1402 }
1403
1404 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1405 Sget_unused_iso_final_char, 2, 2, 0,
1406 doc:
1407
1408
1409
1410
1411
1412
1413 )
1414 (Lisp_Object dimension, Lisp_Object chars)
1415 {
1416 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1417 make_fixnum ('0'));
1418 for (int final_char = '0'; final_char <= '?'; final_char++)
1419 if (ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, final_char) < 0)
1420 return make_fixnum (final_char);
1421 return Qnil;
1422 }
1423
1424
1425 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1426 4, 4, 0,
1427 doc:
1428
1429
1430
1431 )
1432 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1433 {
1434 int id;
1435
1436 CHECK_CHARSET_GET_ID (charset, id);
1437 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1438 ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, XFIXNAT (final_char)) = id;
1439 return Qnil;
1440 }
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456 int
1457 string_xstring_p (Lisp_Object string)
1458 {
1459 const unsigned char *p = SDATA (string);
1460 const unsigned char *endp = p + SBYTES (string);
1461
1462 if (SCHARS (string) == SBYTES (string))
1463 return 0;
1464
1465 while (p < endp)
1466 {
1467 int c = string_char_advance (&p);
1468
1469 if (c >= 0x100)
1470 return 2;
1471 }
1472 return 1;
1473 }
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483 static void
1484 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1485 ptrdiff_t nbytes, Lisp_Object charsets,
1486 Lisp_Object table, bool multibyte)
1487 {
1488 const unsigned char *pend = ptr + nbytes;
1489
1490 if (nchars == nbytes)
1491 {
1492 if (multibyte)
1493 ASET (charsets, charset_ascii, Qt);
1494 else
1495 while (ptr < pend)
1496 {
1497 int c = *ptr++;
1498
1499 if (!NILP (table))
1500 c = translate_char (table, c);
1501 if (ASCII_CHAR_P (c))
1502 ASET (charsets, charset_ascii, Qt);
1503 else
1504 ASET (charsets, charset_eight_bit, Qt);
1505 }
1506 }
1507 else
1508 {
1509 while (ptr < pend)
1510 {
1511 int c = string_char_advance (&ptr);
1512 struct charset *charset;
1513
1514 if (!NILP (table))
1515 c = translate_char (table, c);
1516 charset = CHAR_CHARSET (c);
1517 ASET (charsets, CHARSET_ID (charset), Qt);
1518 }
1519 }
1520 }
1521
1522 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1523 2, 3, 0,
1524 doc:
1525
1526
1527
1528
1529 )
1530 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1531 {
1532 Lisp_Object charsets;
1533 ptrdiff_t from, from_byte, to, stop, stop_byte;
1534 int i;
1535 Lisp_Object val;
1536 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1537
1538 validate_region (&beg, &end);
1539 from = XFIXNAT (beg);
1540 stop = to = XFIXNAT (end);
1541
1542 if (from < GPT && GPT < to)
1543 {
1544 stop = GPT;
1545 stop_byte = GPT_BYTE;
1546 }
1547 else
1548 stop_byte = CHAR_TO_BYTE (stop);
1549
1550 from_byte = CHAR_TO_BYTE (from);
1551
1552 charsets = make_nil_vector (charset_table_used);
1553 while (1)
1554 {
1555 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1556 stop_byte - from_byte, charsets, table,
1557 multibyte);
1558 if (stop < to)
1559 {
1560 from = stop, from_byte = stop_byte;
1561 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1562 }
1563 else
1564 break;
1565 }
1566
1567 val = Qnil;
1568 for (i = charset_table_used - 1; i >= 0; i--)
1569 if (!NILP (AREF (charsets, i)))
1570 val = Fcons (CHARSET_NAME (charset_table + i), val);
1571 return val;
1572 }
1573
1574 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1575 1, 2, 0,
1576 doc:
1577
1578
1579
1580 )
1581 (Lisp_Object str, Lisp_Object table)
1582 {
1583 CHECK_STRING (str);
1584
1585 Lisp_Object charsets = make_nil_vector (charset_table_used);
1586 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1587 charsets, table,
1588 STRING_MULTIBYTE (str));
1589 Lisp_Object val = Qnil;
1590 for (int i = charset_table_used - 1; i >= 0; i--)
1591 if (!NILP (AREF (charsets, i)))
1592 val = Fcons (CHARSET_NAME (charset_table + i), val);
1593 return val;
1594 }
1595
1596
1597
1598
1599
1600
1601 static int
1602 maybe_unify_char (int c, Lisp_Object val)
1603 {
1604 struct charset *charset;
1605
1606 if (FIXNUMP (val))
1607 return XFIXNAT (val);
1608 if (NILP (val))
1609 return c;
1610
1611 CHECK_CHARSET_GET_CHARSET (val, charset);
1612 #ifdef REL_ALLOC
1613
1614
1615
1616 r_alloc_inhibit_buffer_relocation (1);
1617 #endif
1618 load_charset (charset, 1);
1619 if (! inhibit_load_charset_map)
1620 {
1621 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1622 if (! NILP (val))
1623 c = XFIXNAT (val);
1624 }
1625 else
1626 {
1627 int code_index = c - CHARSET_CODE_OFFSET (charset);
1628 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1629
1630 if (unified > 0)
1631 c = unified;
1632 }
1633 #ifdef REL_ALLOC
1634 r_alloc_inhibit_buffer_relocation (0);
1635 #endif
1636 return c;
1637 }
1638
1639
1640
1641
1642
1643 int
1644 decode_char (struct charset *charset, unsigned int code)
1645 {
1646 int c, char_index;
1647 enum charset_method method = CHARSET_METHOD (charset);
1648
1649 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1650 return -1;
1651
1652 if (method == CHARSET_METHOD_SUBSET)
1653 {
1654 Lisp_Object subset_info;
1655
1656 subset_info = CHARSET_SUBSET (charset);
1657 charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
1658 code -= XFIXNUM (AREF (subset_info, 3));
1659 if (code < XFIXNAT (AREF (subset_info, 1))
1660 || code > XFIXNAT (AREF (subset_info, 2)))
1661 c = -1;
1662 else
1663 c = DECODE_CHAR (charset, code);
1664 }
1665 else if (method == CHARSET_METHOD_SUPERSET)
1666 {
1667 Lisp_Object parents;
1668
1669 parents = CHARSET_SUPERSET (charset);
1670 c = -1;
1671 for (; CONSP (parents); parents = XCDR (parents))
1672 {
1673 int id = XFIXNUM (XCAR (XCAR (parents)));
1674 int code_offset = XFIXNUM (XCDR (XCAR (parents)));
1675 unsigned this_code = code - code_offset;
1676
1677 charset = CHARSET_FROM_ID (id);
1678 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1679 break;
1680 }
1681 }
1682 else
1683 {
1684 char_index = CODE_POINT_TO_INDEX (charset, code);
1685 if (char_index < 0)
1686 return -1;
1687
1688 if (method == CHARSET_METHOD_MAP)
1689 {
1690 Lisp_Object decoder;
1691
1692 decoder = CHARSET_DECODER (charset);
1693 if (! VECTORP (decoder))
1694 {
1695 load_charset (charset, 1);
1696 decoder = CHARSET_DECODER (charset);
1697 }
1698 if (VECTORP (decoder))
1699 c = XFIXNUM (AREF (decoder, char_index));
1700 else
1701 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1702 }
1703 else
1704 {
1705 c = char_index + CHARSET_CODE_OFFSET (charset);
1706 if (CHARSET_UNIFIED_P (charset)
1707 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1708 {
1709
1710 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1711 c = maybe_unify_char (c, val);
1712 }
1713 }
1714 }
1715
1716 return c;
1717 }
1718
1719
1720 Lisp_Object charset_work;
1721
1722
1723
1724
1725
1726 unsigned
1727 encode_char (struct charset *charset, int c)
1728 {
1729 unsigned code;
1730 enum charset_method method = CHARSET_METHOD (charset);
1731
1732 if (CHARSET_UNIFIED_P (charset))
1733 {
1734 Lisp_Object deunifier;
1735 int code_index = -1;
1736
1737 deunifier = CHARSET_DEUNIFIER (charset);
1738 if (! CHAR_TABLE_P (deunifier))
1739 {
1740 load_charset (charset, 2);
1741 deunifier = CHARSET_DEUNIFIER (charset);
1742 }
1743 if (CHAR_TABLE_P (deunifier))
1744 {
1745 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1746
1747 if (FIXNUMP (deunified))
1748 code_index = XFIXNUM (deunified);
1749 }
1750 else
1751 {
1752 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1753 }
1754 if (code_index >= 0)
1755 c = CHARSET_CODE_OFFSET (charset) + code_index;
1756 }
1757
1758 if (method == CHARSET_METHOD_SUBSET)
1759 {
1760 Lisp_Object subset_info;
1761 struct charset *this_charset;
1762
1763 subset_info = CHARSET_SUBSET (charset);
1764 this_charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
1765 code = ENCODE_CHAR (this_charset, c);
1766 if (code == CHARSET_INVALID_CODE (this_charset)
1767 || code < XFIXNAT (AREF (subset_info, 1))
1768 || code > XFIXNAT (AREF (subset_info, 2)))
1769 return CHARSET_INVALID_CODE (charset);
1770 code += XFIXNUM (AREF (subset_info, 3));
1771 return code;
1772 }
1773
1774 if (method == CHARSET_METHOD_SUPERSET)
1775 {
1776 Lisp_Object parents;
1777
1778 parents = CHARSET_SUPERSET (charset);
1779 for (; CONSP (parents); parents = XCDR (parents))
1780 {
1781 int id = XFIXNUM (XCAR (XCAR (parents)));
1782 int code_offset = XFIXNUM (XCDR (XCAR (parents)));
1783 struct charset *this_charset = CHARSET_FROM_ID (id);
1784
1785 code = ENCODE_CHAR (this_charset, c);
1786 if (code != CHARSET_INVALID_CODE (this_charset))
1787 return code + code_offset;
1788 }
1789 return CHARSET_INVALID_CODE (charset);
1790 }
1791
1792 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1793 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1794 return CHARSET_INVALID_CODE (charset);
1795
1796 if (method == CHARSET_METHOD_MAP)
1797 {
1798 Lisp_Object encoder;
1799 Lisp_Object val;
1800
1801 encoder = CHARSET_ENCODER (charset);
1802 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1803 {
1804 load_charset (charset, 2);
1805 encoder = CHARSET_ENCODER (charset);
1806 }
1807 if (CHAR_TABLE_P (encoder))
1808 {
1809 val = CHAR_TABLE_REF (encoder, c);
1810 if (NILP (val))
1811 return CHARSET_INVALID_CODE (charset);
1812 code = XFIXNUM (val);
1813 if (! CHARSET_COMPACT_CODES_P (charset))
1814 code = INDEX_TO_CODE_POINT (charset, code);
1815 }
1816 else
1817 {
1818 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1819 code = INDEX_TO_CODE_POINT (charset, code);
1820 }
1821 }
1822 else
1823 {
1824 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1825
1826 code = INDEX_TO_CODE_POINT (charset, code_index);
1827 }
1828
1829 return code;
1830 }
1831
1832
1833 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
1834 doc:
1835
1836
1837
1838 )
1839 (Lisp_Object charset, Lisp_Object code_point)
1840 {
1841 int c, id;
1842 unsigned code;
1843 struct charset *charsetp;
1844
1845 CHECK_CHARSET_GET_ID (charset, id);
1846 code = cons_to_unsigned (code_point, UINT_MAX);
1847 charsetp = CHARSET_FROM_ID (id);
1848 c = DECODE_CHAR (charsetp, code);
1849 return (c >= 0 ? make_fixnum (c) : Qnil);
1850 }
1851
1852
1853 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
1854 doc:
1855
1856 )
1857 (Lisp_Object ch, Lisp_Object charset)
1858 {
1859 int c, id;
1860 unsigned code;
1861 struct charset *charsetp;
1862
1863 CHECK_CHARSET_GET_ID (charset, id);
1864 CHECK_CHARACTER (ch);
1865 c = XFIXNAT (ch);
1866 charsetp = CHARSET_FROM_ID (id);
1867 code = ENCODE_CHAR (charsetp, c);
1868 if (code == CHARSET_INVALID_CODE (charsetp))
1869 return Qnil;
1870
1871
1872
1873
1874
1875
1876
1877 return INT_TO_INTEGER (code);
1878 }
1879
1880
1881 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1882 doc:
1883
1884
1885
1886
1887 )
1888 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1889 {
1890 int id, dimension;
1891 struct charset *charsetp;
1892 unsigned code;
1893 int c;
1894
1895 CHECK_CHARSET_GET_ID (charset, id);
1896 charsetp = CHARSET_FROM_ID (id);
1897
1898 dimension = CHARSET_DIMENSION (charsetp);
1899 if (NILP (code1))
1900 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1901 ? 0 : CHARSET_MIN_CODE (charsetp));
1902 else
1903 {
1904 CHECK_FIXNAT (code1);
1905 if (XFIXNAT (code1) >= 0x100)
1906 args_out_of_range (make_fixnum (0xFF), code1);
1907 code = XFIXNAT (code1);
1908
1909 if (dimension > 1)
1910 {
1911 code <<= 8;
1912 if (NILP (code2))
1913 code |= charsetp->code_space[(dimension - 2) * 4];
1914 else
1915 {
1916 CHECK_FIXNAT (code2);
1917 if (XFIXNAT (code2) >= 0x100)
1918 args_out_of_range (make_fixnum (0xFF), code2);
1919 code |= XFIXNAT (code2);
1920 }
1921
1922 if (dimension > 2)
1923 {
1924 code <<= 8;
1925 if (NILP (code3))
1926 code |= charsetp->code_space[(dimension - 3) * 4];
1927 else
1928 {
1929 CHECK_FIXNAT (code3);
1930 if (XFIXNAT (code3) >= 0x100)
1931 args_out_of_range (make_fixnum (0xFF), code3);
1932 code |= XFIXNAT (code3);
1933 }
1934
1935 if (dimension > 3)
1936 {
1937 code <<= 8;
1938 if (NILP (code4))
1939 code |= charsetp->code_space[0];
1940 else
1941 {
1942 CHECK_FIXNAT (code4);
1943 if (XFIXNAT (code4) >= 0x100)
1944 args_out_of_range (make_fixnum (0xFF), code4);
1945 code |= XFIXNAT (code4);
1946 }
1947 }
1948 }
1949 }
1950 }
1951
1952 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1953 code &= 0x7F7F7F7F;
1954 c = DECODE_CHAR (charsetp, code);
1955 if (c < 0)
1956 error ("Invalid code(s)");
1957 return make_fixnum (c);
1958 }
1959
1960
1961
1962
1963
1964
1965 struct charset *
1966 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1967 {
1968 bool maybe_null = 0;
1969
1970 if (NILP (charset_list))
1971 charset_list = Vcharset_ordered_list;
1972 else
1973 maybe_null = 1;
1974
1975 while (CONSP (charset_list))
1976 {
1977 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
1978 unsigned code = ENCODE_CHAR (charset, c);
1979
1980 if (code != CHARSET_INVALID_CODE (charset))
1981 {
1982 if (code_return)
1983 *code_return = code;
1984 return charset;
1985 }
1986 charset_list = XCDR (charset_list);
1987 if (! maybe_null
1988 && c <= MAX_UNICODE_CHAR
1989 && EQ (charset_list, Vcharset_non_preferred_head))
1990 return CHARSET_FROM_ID (charset_unicode);
1991 }
1992 return (maybe_null ? NULL
1993 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
1994 : CHARSET_FROM_ID (charset_eight_bit));
1995 }
1996
1997
1998 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1999 doc:
2000
2001
2002
2003 )
2004 (Lisp_Object ch)
2005 {
2006 struct charset *charset;
2007 int c, dimension;
2008 unsigned code;
2009 Lisp_Object val;
2010
2011 CHECK_CHARACTER (ch);
2012 c = XFIXNAT (ch);
2013 charset = CHAR_CHARSET (c);
2014 if (! charset)
2015 emacs_abort ();
2016 code = ENCODE_CHAR (charset, c);
2017 if (code == CHARSET_INVALID_CODE (charset))
2018 emacs_abort ();
2019 dimension = CHARSET_DIMENSION (charset);
2020 for (val = Qnil; dimension > 0; dimension--)
2021 {
2022 val = Fcons (make_fixnum (code & 0xFF), val);
2023 code >>= 8;
2024 }
2025 return Fcons (CHARSET_NAME (charset), val);
2026 }
2027
2028
2029 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2030 doc:
2031
2032
2033
2034
2035 )
2036 (Lisp_Object ch, Lisp_Object restriction)
2037 {
2038 struct charset *charset;
2039
2040 CHECK_CHARACTER (ch);
2041 if (NILP (restriction))
2042 charset = CHAR_CHARSET (XFIXNUM (ch));
2043 else
2044 {
2045 if (CONSP (restriction))
2046 {
2047 int c = XFIXNAT (ch);
2048
2049 for (; CONSP (restriction); restriction = XCDR (restriction))
2050 {
2051 struct charset *rcharset;
2052
2053 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2054 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2055 return XCAR (restriction);
2056 }
2057 return Qnil;
2058 }
2059 restriction = coding_system_charset_list (restriction);
2060 charset = char_charset (XFIXNUM (ch), restriction, NULL);
2061 if (! charset)
2062 return Qnil;
2063 }
2064 return (CHARSET_NAME (charset));
2065 }
2066
2067
2068 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2069 doc:
2070
2071
2072 )
2073 (Lisp_Object pos)
2074 {
2075 Lisp_Object ch;
2076 struct charset *charset;
2077
2078 ch = Fchar_after (pos);
2079 if (! FIXNUMP (ch))
2080 return ch;
2081 charset = CHAR_CHARSET (XFIXNUM (ch));
2082 return (CHARSET_NAME (charset));
2083 }
2084
2085
2086 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2087 doc:
2088
2089
2090
2091
2092
2093
2094 )
2095 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2096 {
2097 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2098 int id = ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag,
2099 XFIXNAT (final_char));
2100 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2101 }
2102
2103
2104 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2105 0, 0, 0,
2106 doc:
2107
2108
2109 )
2110 (void)
2111 {
2112 if (temp_charset_work)
2113 {
2114 xfree (temp_charset_work);
2115 temp_charset_work = NULL;
2116 }
2117
2118 if (CHAR_TABLE_P (Vchar_unify_table))
2119 Foptimize_char_table (Vchar_unify_table, Qnil);
2120
2121 return Qnil;
2122 }
2123
2124 DEFUN ("charset-priority-list", Fcharset_priority_list,
2125 Scharset_priority_list, 0, 1, 0,
2126 doc:
2127 )
2128 (Lisp_Object highestp)
2129 {
2130 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2131
2132 if (!NILP (highestp))
2133 return CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (Fcar (list))));
2134
2135 while (!NILP (list))
2136 {
2137 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (XCAR (list)))), val);
2138 list = XCDR (list);
2139 }
2140 return Fnreverse (val);
2141 }
2142
2143 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2144 1, MANY, 0,
2145 doc:
2146 )
2147 (ptrdiff_t nargs, Lisp_Object *args)
2148 {
2149 Lisp_Object new_head, old_list;
2150 Lisp_Object list_2022, list_emacs_mule;
2151 ptrdiff_t i;
2152 int id;
2153
2154 old_list = Fcopy_sequence (Vcharset_ordered_list);
2155 new_head = Qnil;
2156 for (i = 0; i < nargs; i++)
2157 {
2158 CHECK_CHARSET_GET_ID (args[i], id);
2159 if (! NILP (Fmemq (make_fixnum (id), old_list)))
2160 {
2161 old_list = Fdelq (make_fixnum (id), old_list);
2162 new_head = Fcons (make_fixnum (id), new_head);
2163 }
2164 }
2165 Vcharset_non_preferred_head = old_list;
2166 Vcharset_ordered_list = nconc2 (Fnreverse (new_head), old_list);
2167
2168 charset_ordered_list_tick++;
2169
2170 charset_unibyte = -1;
2171 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2172 CONSP (old_list); old_list = XCDR (old_list))
2173 {
2174 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2175 list_2022 = Fcons (XCAR (old_list), list_2022);
2176 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2177 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2178 if (charset_unibyte < 0)
2179 {
2180 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (old_list)));
2181
2182 if (CHARSET_DIMENSION (charset) == 1
2183 && CHARSET_ASCII_COMPATIBLE_P (charset)
2184 && CHARSET_MAX_CHAR (charset) >= 0x80)
2185 charset_unibyte = CHARSET_ID (charset);
2186 }
2187 }
2188 Viso_2022_charset_list = Fnreverse (list_2022);
2189 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2190 if (charset_unibyte < 0)
2191 charset_unibyte = charset_iso_8859_1;
2192
2193 return Qnil;
2194 }
2195
2196 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2197 0, 1, 0,
2198 doc:
2199 )
2200 (Lisp_Object charset)
2201 {
2202 int id;
2203
2204 CHECK_CHARSET_GET_ID (charset, id);
2205 return make_fixnum (id);
2206 }
2207
2208 struct charset_sort_data
2209 {
2210 Lisp_Object charset;
2211 int id;
2212 ptrdiff_t priority;
2213 };
2214
2215 static int
2216 charset_compare (const void *d1, const void *d2)
2217 {
2218 const struct charset_sort_data *data1 = d1, *data2 = d2;
2219 if (data1->priority != data2->priority)
2220 return data1->priority < data2->priority ? -1 : 1;
2221 return 0;
2222 }
2223
2224 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2225 doc:
2226
2227 )
2228 (Lisp_Object charsets)
2229 {
2230 ptrdiff_t n = list_length (charsets), i, j;
2231 int done;
2232 Lisp_Object tail, elt, attrs;
2233 struct charset_sort_data *sort_data;
2234 int id, min_id = INT_MAX, max_id = INT_MIN;
2235 USE_SAFE_ALLOCA;
2236
2237 if (n == 0)
2238 return Qnil;
2239 SAFE_NALLOCA (sort_data, 1, n);
2240 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2241 {
2242 elt = XCAR (tail);
2243 CHECK_CHARSET_GET_ATTR (elt, attrs);
2244 sort_data[i].charset = elt;
2245 sort_data[i].id = id = XFIXNUM (CHARSET_ATTR_ID (attrs));
2246 if (id < min_id)
2247 min_id = id;
2248 if (id > max_id)
2249 max_id = id;
2250 }
2251 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2252 done < n && CONSP (tail); tail = XCDR (tail), i++)
2253 {
2254 elt = XCAR (tail);
2255 id = XFIXNAT (elt);
2256 if (id >= min_id && id <= max_id)
2257 for (j = 0; j < n; j++)
2258 if (sort_data[j].id == id)
2259 {
2260 sort_data[j].priority = i;
2261 done++;
2262 }
2263 }
2264 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2265 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2266 XSETCAR (tail, sort_data[i].charset);
2267 SAFE_FREE ();
2268 return charsets;
2269 }
2270
2271
2272 void
2273 init_charset (void)
2274 {
2275 Lisp_Object tempdir;
2276 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2277 if (! file_accessible_directory_p (tempdir))
2278 {
2279
2280
2281
2282 fprintf (stderr,
2283 ("Error: %s: %s\n"
2284 "Emacs will not function correctly "
2285 "without the character map files.\n"
2286 "%s"
2287 "Please check your installation!\n"),
2288 SDATA (tempdir), strerror (errno),
2289 (egetenv ("EMACSDATA")
2290 ? ("The EMACSDATA environment variable is set. "
2291 "Maybe it has the wrong value?\n")
2292 : ""));
2293 exit (1);
2294 }
2295
2296 Vcharset_map_path = list1 (tempdir);
2297 }
2298
2299
2300 void
2301 init_charset_once (void)
2302 {
2303 int i, j, k;
2304
2305 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2306 for (j = 0; j < ISO_MAX_CHARS; j++)
2307 for (k = 0; k < ISO_MAX_FINAL; k++)
2308 iso_charset_table[i][j][k] = -1;
2309
2310 PDUMPER_REMEMBER_SCALAR (iso_charset_table);
2311
2312 for (i = 0; i < 256; i++)
2313 emacs_mule_charset[i] = -1;
2314
2315 PDUMPER_REMEMBER_SCALAR (emacs_mule_charset);
2316
2317 charset_jisx0201_roman = -1;
2318 PDUMPER_REMEMBER_SCALAR (charset_jisx0201_roman);
2319
2320 charset_jisx0208_1978 = -1;
2321 PDUMPER_REMEMBER_SCALAR (charset_jisx0208_1978);
2322
2323 charset_jisx0208 = -1;
2324 PDUMPER_REMEMBER_SCALAR (charset_jisx0208);
2325
2326 charset_ksc5601 = -1;
2327 PDUMPER_REMEMBER_SCALAR (charset_ksc5601);
2328 }
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340 static struct charset charset_table_init[180];
2341
2342 void
2343 syms_of_charset (void)
2344 {
2345 DEFSYM (Qcharsetp, "charsetp");
2346
2347
2348 DEFSYM (Qascii, "ascii");
2349 DEFSYM (Qunicode, "unicode");
2350 DEFSYM (Qemacs, "emacs");
2351 DEFSYM (Qeight_bit, "eight-bit");
2352 DEFSYM (Qiso_8859_1, "iso-8859-1");
2353
2354 staticpro (&Vcharset_ordered_list);
2355 Vcharset_ordered_list = Qnil;
2356
2357 staticpro (&Viso_2022_charset_list);
2358 Viso_2022_charset_list = Qnil;
2359
2360 staticpro (&Vemacs_mule_charset_list);
2361 Vemacs_mule_charset_list = Qnil;
2362
2363 staticpro (&Vcharset_hash_table);
2364 Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
2365
2366 charset_table = charset_table_init;
2367 charset_table_size = ARRAYELTS (charset_table_init);
2368 PDUMPER_REMEMBER_SCALAR (charset_table_size);
2369 charset_table_used = 0;
2370 PDUMPER_REMEMBER_SCALAR (charset_table_used);
2371
2372 defsubr (&Scharsetp);
2373 defsubr (&Smap_charset_chars);
2374 defsubr (&Sdefine_charset_internal);
2375 defsubr (&Sdefine_charset_alias);
2376 defsubr (&Scharset_plist);
2377 defsubr (&Sset_charset_plist);
2378 defsubr (&Sunify_charset);
2379 defsubr (&Sget_unused_iso_final_char);
2380 defsubr (&Sdeclare_equiv_charset);
2381 defsubr (&Sfind_charset_region);
2382 defsubr (&Sfind_charset_string);
2383 defsubr (&Sdecode_char);
2384 defsubr (&Sencode_char);
2385 defsubr (&Ssplit_char);
2386 defsubr (&Smake_char);
2387 defsubr (&Schar_charset);
2388 defsubr (&Scharset_after);
2389 defsubr (&Siso_charset);
2390 defsubr (&Sclear_charset_maps);
2391 defsubr (&Scharset_priority_list);
2392 defsubr (&Sset_charset_priority);
2393 defsubr (&Scharset_id_internal);
2394 defsubr (&Ssort_charsets);
2395
2396 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2397 doc: );
2398 Vcharset_map_path = Qnil;
2399
2400 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2401 doc: );
2402 inhibit_load_charset_map = 0;
2403
2404 DEFVAR_LISP ("charset-list", Vcharset_list,
2405 doc: );
2406 Vcharset_list = Qnil;
2407
2408 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2409 doc:
2410
2411 );
2412 Vcurrent_iso639_language = Qnil;
2413
2414 charset_ascii
2415 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2416 0, 127, 'B', -1, 0, 1, 0, 0);
2417 PDUMPER_REMEMBER_SCALAR (charset_ascii);
2418
2419 charset_iso_8859_1
2420 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2421 0, 255, -1, -1, -1, 1, 0, 0);
2422 PDUMPER_REMEMBER_SCALAR (charset_iso_8859_1);
2423
2424 charset_unicode
2425 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2426 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2427 PDUMPER_REMEMBER_SCALAR (charset_unicode);
2428
2429 charset_emacs
2430 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2431 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2432 PDUMPER_REMEMBER_SCALAR (charset_emacs);
2433
2434 charset_eight_bit
2435 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2436 128, 255, -1, 0, -1, 0, 1,
2437 MAX_5_BYTE_CHAR + 1);
2438 PDUMPER_REMEMBER_SCALAR (charset_eight_bit);
2439
2440 charset_unibyte = charset_iso_8859_1;
2441 PDUMPER_REMEMBER_SCALAR (charset_unibyte);
2442 }