This source file includes following definitions.
- char_resolve_modifier_mask
- char_string
- translate_char
- DEFUN
- DEFUN
- DEFUN
- char_width
- DEFUN
- c_string_width
- strwidth
- lisp_string_width
- chars_in_text
- multibyte_chars_in_text
- parse_str_as_multibyte
- str_as_multibyte
- count_size_as_multibyte
- str_to_multibyte
- str_as_unibyte
- string_count_byte8
- string_escape_byte8
- DEFUN
- alphabeticp
- alphanumericp
- graphicp
- printablep
- graphic_base_p
- blankp
- syms_of_character
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 #include <config.h>
29
30 #include <stdio.h>
31
32 #include <sys/types.h>
33 #include <intprops.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "buffer.h"
37 #include "frame.h"
38 #include "dispextern.h"
39 #include "composite.h"
40 #include "disptab.h"
41
42
43
44 Lisp_Object Vchar_unify_table;
45
46
47
48
49
50
51 EMACS_INT
52 char_resolve_modifier_mask (EMACS_INT c)
53 {
54
55 if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
56 return c;
57
58
59 if (c & CHAR_SHIFT)
60 {
61
62 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
63 c &= ~CHAR_SHIFT;
64 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
65 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
66
67 else if ((c & ~CHAR_MODIFIER_MASK) <= 0x20)
68 c &= ~CHAR_SHIFT;
69 }
70 if (c & CHAR_CTL)
71 {
72
73
74 if ((c & 0377) == ' ')
75 c &= ~0177 & ~ CHAR_CTL;
76 else if ((c & 0377) == '?')
77 c = 0177 | (c & ~0177 & ~CHAR_CTL);
78
79
80 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
81 c &= (037 | (~0177 & ~CHAR_CTL));
82 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
83 c &= (037 | (~0177 & ~CHAR_CTL));
84 }
85 #if 0
86 if (c & CHAR_META)
87 {
88
89 c = (c & ~CHAR_META) | 0x80;
90 }
91 #endif
92
93 return c;
94 }
95
96
97
98
99
100 int
101 char_string (unsigned int c, unsigned char *p)
102 {
103 int bytes;
104
105 if (c & CHAR_MODIFIER_MASK)
106 {
107 c = char_resolve_modifier_mask (c);
108
109 c &= ~CHAR_MODIFIER_MASK;
110 }
111
112 if (c <= MAX_3_BYTE_CHAR)
113 {
114 bytes = CHAR_STRING (c, p);
115 }
116 else if (c <= MAX_4_BYTE_CHAR)
117 {
118 p[0] = (0xF0 | (c >> 18));
119 p[1] = (0x80 | ((c >> 12) & 0x3F));
120 p[2] = (0x80 | ((c >> 6) & 0x3F));
121 p[3] = (0x80 | (c & 0x3F));
122 bytes = 4;
123 }
124 else if (c <= MAX_5_BYTE_CHAR)
125 {
126 p[0] = 0xF8;
127 p[1] = (0x80 | ((c >> 18) & 0x0F));
128 p[2] = (0x80 | ((c >> 12) & 0x3F));
129 p[3] = (0x80 | ((c >> 6) & 0x3F));
130 p[4] = (0x80 | (c & 0x3F));
131 bytes = 5;
132 }
133 else if (c <= MAX_CHAR)
134 {
135 c = CHAR_TO_BYTE8 (c);
136 bytes = BYTE8_STRING (c, p);
137 }
138 else
139 error ("Invalid character: %x", c);
140
141 return bytes;
142 }
143
144
145
146
147
148
149
150 int
151 translate_char (Lisp_Object table, int c)
152 {
153 if (CHAR_TABLE_P (table))
154 {
155 Lisp_Object ch;
156
157 ch = CHAR_TABLE_REF (table, c);
158 if (CHARACTERP (ch))
159 c = XFIXNUM (ch);
160 }
161 else
162 {
163 for (; CONSP (table); table = XCDR (table))
164 c = translate_char (XCAR (table), c);
165 }
166 return c;
167 }
168
169 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
170 doc:
171
172
173
174
175 attributes: const)
176 (Lisp_Object object, Lisp_Object ignore)
177 {
178 return (CHARACTERP (object) ? Qt : Qnil);
179 }
180
181 DEFUN ("max-char", Fmax_char, Smax_char, 0, 1, 0,
182 doc:
183
184
185 attributes: const)
186 (Lisp_Object unicode)
187 {
188 return (!NILP (unicode)
189 ? make_fixnum (MAX_UNICODE_CHAR)
190 : make_fixnum (MAX_CHAR));
191 }
192
193 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
194 Sunibyte_char_to_multibyte, 1, 1, 0,
195 doc: )
196 (Lisp_Object ch)
197 {
198 int c;
199
200 CHECK_CHARACTER (ch);
201 c = XFIXNAT (ch);
202 if (c >= 0x100)
203 error ("Not a unibyte character: %d", c);
204 return make_fixnum (make_char_multibyte (c));
205 }
206
207 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
208 Smultibyte_char_to_unibyte, 1, 1, 0,
209 doc:
210 )
211 (Lisp_Object ch)
212 {
213 int cm;
214
215 CHECK_CHARACTER (ch);
216 cm = XFIXNAT (ch);
217 if (cm < 256)
218
219
220 return ch;
221 else
222 {
223 int cu = CHAR_TO_BYTE_SAFE (cm);
224 return make_fixnum (cu);
225 }
226 }
227
228
229
230
231 static ptrdiff_t
232 char_width (int c, struct Lisp_Char_Table *dp)
233 {
234 ptrdiff_t width = CHARACTER_WIDTH (c);
235
236 if (dp)
237 {
238 Lisp_Object disp = DISP_CHAR_VECTOR (dp, c), ch;
239 int i;
240
241 if (VECTORP (disp))
242 for (i = 0, width = 0; i < ASIZE (disp); i++)
243 {
244 int c = -1;
245 ch = AREF (disp, i);
246 if (GLYPH_CODE_P (ch))
247 c = GLYPH_CODE_CHAR (ch);
248 else if (CHARACTERP (ch))
249 c = XFIXNUM (ch);
250 if (c >= 0)
251 {
252 int w = CHARACTER_WIDTH (c);
253 if (ckd_add (&width, width, w))
254 string_overflow ();
255 }
256 }
257 }
258 return width;
259 }
260
261
262 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
263 doc:
264
265
266
267
268
269
270 )
271 (Lisp_Object ch)
272 {
273 int c;
274 ptrdiff_t width;
275
276 CHECK_CHARACTER (ch);
277 c = XFIXNUM (ch);
278 width = char_width (c, buffer_display_table ());
279 return make_fixnum (width);
280 }
281
282
283
284
285
286
287
288
289 ptrdiff_t
290 c_string_width (const unsigned char *str, ptrdiff_t len, int precision,
291 ptrdiff_t *nchars, ptrdiff_t *nbytes)
292 {
293 ptrdiff_t i = 0, i_byte = 0;
294 ptrdiff_t width = 0;
295 struct Lisp_Char_Table *dp = buffer_display_table ();
296
297 while (i_byte < len)
298 {
299 int bytes, c = string_char_and_length (str + i_byte, &bytes);
300 ptrdiff_t thiswidth = char_width (c, dp);
301
302 if (0 < precision && precision - width < thiswidth)
303 {
304 *nchars = i;
305 *nbytes = i_byte;
306 return width;
307 }
308 if (ckd_add (&width, width, thiswidth))
309 string_overflow ();
310 i++;
311 i_byte += bytes;
312 }
313
314 if (precision > 0)
315 {
316 *nchars = i;
317 *nbytes = i_byte;
318 }
319
320 return width;
321 }
322
323
324
325
326
327 ptrdiff_t
328 strwidth (const char *str, ptrdiff_t len)
329 {
330 return c_string_width ((const unsigned char *) str, len, -1, NULL, NULL);
331 }
332
333
334
335
336
337
338
339
340
341
342
343 ptrdiff_t
344 lisp_string_width (Lisp_Object string, ptrdiff_t from, ptrdiff_t to,
345 ptrdiff_t precision, ptrdiff_t *nchars, ptrdiff_t *nbytes,
346 bool auto_comp)
347 {
348
349
350
351 bool multibyte = SCHARS (string) < SBYTES (string);
352 ptrdiff_t i = from, i_byte = from ? string_char_to_byte (string, from) : 0;
353 ptrdiff_t from_byte = i_byte;
354 ptrdiff_t width = 0;
355 struct Lisp_Char_Table *dp = buffer_display_table ();
356 #ifdef HAVE_WINDOW_SYSTEM
357 struct frame *f =
358 (FRAMEP (selected_frame) && FRAME_LIVE_P (XFRAME (selected_frame)))
359 ? XFRAME (selected_frame)
360 : NULL;
361 int font_width = -1;
362 Lisp_Object default_font, frame_font;
363 #endif
364
365 eassert (precision <= 0 || (nchars && nbytes));
366
367 while (i < to)
368 {
369 ptrdiff_t chars, bytes, thiswidth;
370 Lisp_Object val;
371 ptrdiff_t cmp_id;
372 ptrdiff_t ignore, end;
373
374 if (find_composition (i, -1, &ignore, &end, &val, string)
375 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
376 >= 0))
377 {
378 thiswidth = composition_table[cmp_id]->width;
379 chars = end - i;
380 bytes = string_char_to_byte (string, end) - i_byte;
381 }
382 #ifdef HAVE_WINDOW_SYSTEM
383 else if (auto_comp
384 && f && FRAME_WINDOW_P (f)
385 && multibyte
386 && find_automatic_composition (i, -1, i, &ignore,
387 &end, &val, string)
388 && end > i)
389 {
390 int j;
391 for (j = 0; j < LGSTRING_GLYPH_LEN (val); j++)
392 if (NILP (LGSTRING_GLYPH (val, j)))
393 break;
394
395 int pixelwidth = composition_gstring_width (val, 0, j, NULL);
396
397
398
399 if (font_width < 0)
400 {
401 font_width = FRAME_COLUMN_WIDTH (f);
402 default_font = Fface_font (Qdefault, Qnil, Qnil);
403 frame_font = Fframe_parameter (Qnil, Qfont);
404
405 if (STRINGP (default_font) && STRINGP (frame_font)
406 && (SCHARS (default_font) != SCHARS (frame_font)
407 || SBYTES (default_font) != SBYTES (frame_font)
408 || memcmp (SDATA (default_font), SDATA (frame_font),
409 SBYTES (default_font))))
410 {
411 Lisp_Object font_info = Ffont_info (default_font, Qnil);
412 if (VECTORP (font_info))
413 {
414 font_width = XFIXNUM (AREF (font_info, 11));
415 if (font_width <= 0)
416 font_width = XFIXNUM (AREF (font_info, 10));
417 }
418 }
419 }
420 thiswidth = (double) pixelwidth / font_width + 0.5;
421 chars = end - i;
422 bytes = string_char_to_byte (string, end) - i_byte;
423 }
424 #endif
425 else
426 {
427 int c;
428 unsigned char *str = SDATA (string);
429
430 if (multibyte)
431 {
432 int cbytes;
433 c = string_char_and_length (str + i_byte, &cbytes);
434 bytes = cbytes;
435 }
436 else
437 c = str[i_byte], bytes = 1;
438 chars = 1;
439 thiswidth = char_width (c, dp);
440 }
441
442 if (0 < precision && precision - width < thiswidth)
443 {
444 *nchars = i - from;
445 *nbytes = i_byte - from_byte;
446 return width;
447 }
448 if (ckd_add (&width, width, thiswidth))
449 string_overflow ();
450 i += chars;
451 i_byte += bytes;
452 }
453
454 if (precision > 0)
455 {
456 *nchars = i - from;
457 *nbytes = i_byte - from_byte;
458 }
459
460 return width;
461 }
462
463 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 3, 0,
464 doc:
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484 )
485 (Lisp_Object str, Lisp_Object from, Lisp_Object to)
486 {
487 Lisp_Object val;
488 ptrdiff_t ifrom, ito;
489
490 CHECK_STRING (str);
491 validate_subarray (str, from, to, SCHARS (str), &ifrom, &ito);
492 XSETFASTINT (val, lisp_string_width (str, ifrom, ito, -1, NULL, NULL, true));
493 return val;
494 }
495
496
497
498
499
500
501
502 ptrdiff_t
503 chars_in_text (const unsigned char *ptr, ptrdiff_t nbytes)
504 {
505
506 if (current_buffer == 0
507 || NILP (BVAR (current_buffer, enable_multibyte_characters)))
508 return nbytes;
509
510 return multibyte_chars_in_text (ptr, nbytes);
511 }
512
513
514
515
516
517
518 ptrdiff_t
519 multibyte_chars_in_text (const unsigned char *ptr, ptrdiff_t nbytes)
520 {
521 const unsigned char *endp = ptr + nbytes;
522 ptrdiff_t chars = 0;
523
524 while (ptr < endp)
525 {
526 int len = multibyte_length (ptr, endp, true, true);
527
528 if (len == 0)
529 emacs_abort ();
530 ptr += len;
531 chars++;
532 }
533
534 return chars;
535 }
536
537
538
539
540
541
542
543 void
544 parse_str_as_multibyte (const unsigned char *str, ptrdiff_t len,
545 ptrdiff_t *nchars, ptrdiff_t *nbytes)
546 {
547 const unsigned char *endp = str + len;
548 ptrdiff_t chars = 0, bytes = 0;
549
550 if (len >= MAX_MULTIBYTE_LENGTH)
551 {
552 const unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
553 while (str < adjusted_endp)
554 {
555 int n = multibyte_length (str, NULL, false, false);
556 if (0 < n)
557 str += n, bytes += n;
558 else
559 str++, bytes += 2;
560 chars++;
561 }
562 }
563 while (str < endp)
564 {
565 int n = multibyte_length (str, endp, true, false);
566 if (0 < n)
567 str += n, bytes += n;
568 else
569 str++, bytes += 2;
570 chars++;
571 }
572
573 *nchars = chars;
574 *nbytes = bytes;
575 return;
576 }
577
578
579
580
581
582
583
584
585
586 ptrdiff_t
587 str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes,
588 ptrdiff_t *nchars)
589 {
590 unsigned char *p = str, *endp = str + nbytes;
591 unsigned char *to;
592 ptrdiff_t chars = 0;
593
594 if (nbytes >= MAX_MULTIBYTE_LENGTH)
595 {
596 unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
597 while (p < adjusted_endp)
598 {
599 int n = multibyte_length (p, NULL, false, false);
600 if (n <= 0)
601 break;
602 p += n, chars++;
603 }
604 }
605 while (true)
606 {
607 int n = multibyte_length (p, endp, true, false);
608 if (n <= 0)
609 break;
610 p += n, chars++;
611 }
612 if (nchars)
613 *nchars = chars;
614 if (p == endp)
615 return nbytes;
616
617 to = p;
618 nbytes = endp - p;
619 endp = str + len;
620 memmove (endp - nbytes, p, nbytes);
621 p = endp - nbytes;
622
623 if (nbytes >= MAX_MULTIBYTE_LENGTH)
624 {
625 unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
626 while (p < adjusted_endp)
627 {
628 int n = multibyte_length (p, NULL, false, false);
629 if (0 < n)
630 {
631 while (n--)
632 *to++ = *p++;
633 }
634 else
635 {
636 int c = *p++;
637 c = BYTE8_TO_CHAR (c);
638 to += CHAR_STRING (c, to);
639 }
640 }
641 chars++;
642 }
643 while (p < endp)
644 {
645 int n = multibyte_length (p, endp, true, false);
646 if (0 < n)
647 {
648 while (n--)
649 *to++ = *p++;
650 }
651 else
652 {
653 int c = *p++;
654 c = BYTE8_TO_CHAR (c);
655 to += CHAR_STRING (c, to);
656 }
657 chars++;
658 }
659 if (nchars)
660 *nchars = chars;
661 return (to - str);
662 }
663
664
665
666
667
668 ptrdiff_t
669 count_size_as_multibyte (const unsigned char *str, ptrdiff_t len)
670 {
671
672
673 ptrdiff_t nonascii = 0;
674 for (ptrdiff_t i = 0; i < len; i++)
675 nonascii += str[i] >> 7;
676 ptrdiff_t bytes;
677 if (ckd_add (&bytes, len, nonascii))
678 string_overflow ();
679 return bytes;
680 }
681
682
683
684
685
686 ptrdiff_t
687 str_to_multibyte (unsigned char *dst, const unsigned char *src,
688 ptrdiff_t nchars)
689 {
690 unsigned char *d = dst;
691 for (ptrdiff_t i = 0; i < nchars; i++)
692 {
693 unsigned char c = src[i];
694 if (c <= 0x7f)
695 *d++ = c;
696 else
697 {
698 *d++ = 0xc0 + ((c >> 6) & 1);
699 *d++ = 0x80 + (c & 0x3f);
700 }
701 }
702 return d - dst;
703 }
704
705
706
707
708
709 ptrdiff_t
710 str_as_unibyte (unsigned char *str, ptrdiff_t bytes)
711 {
712 const unsigned char *p = str, *endp = str + bytes;
713 unsigned char *to;
714 int c, len;
715
716 while (p < endp)
717 {
718 c = *p;
719 len = BYTES_BY_CHAR_HEAD (c);
720 if (CHAR_BYTE8_HEAD_P (c))
721 break;
722 p += len;
723 }
724 to = str + (p - str);
725 while (p < endp)
726 {
727 c = *p;
728 len = BYTES_BY_CHAR_HEAD (c);
729 if (CHAR_BYTE8_HEAD_P (c))
730 {
731 c = string_char_advance (&p);
732 *to++ = CHAR_TO_BYTE8 (c);
733 }
734 else
735 {
736 while (len--) *to++ = *p++;
737 }
738 }
739 return (to - str);
740 }
741
742 static ptrdiff_t
743 string_count_byte8 (Lisp_Object string)
744 {
745 bool multibyte = STRING_MULTIBYTE (string);
746 ptrdiff_t nbytes = SBYTES (string);
747 unsigned char *p = SDATA (string);
748 unsigned char *pend = p + nbytes;
749 ptrdiff_t count = 0;
750 int c, len;
751
752 if (multibyte)
753 while (p < pend)
754 {
755 c = *p;
756 len = BYTES_BY_CHAR_HEAD (c);
757
758 if (CHAR_BYTE8_HEAD_P (c))
759 count++;
760 p += len;
761 }
762 else
763 while (p < pend)
764 {
765 if (*p++ >= 0x80)
766 count++;
767 }
768 return count;
769 }
770
771
772 Lisp_Object
773 string_escape_byte8 (Lisp_Object string)
774 {
775 ptrdiff_t nchars = SCHARS (string);
776 ptrdiff_t nbytes = SBYTES (string);
777 bool multibyte = STRING_MULTIBYTE (string);
778 ptrdiff_t byte8_count;
779 ptrdiff_t thrice_byte8_count, uninit_nchars, uninit_nbytes;
780 const unsigned char *src, *src_end;
781 unsigned char *dst;
782 Lisp_Object val;
783 int c, len;
784
785 if (multibyte && nchars == nbytes)
786 return string;
787
788 byte8_count = string_count_byte8 (string);
789
790 if (byte8_count == 0)
791 return string;
792
793 if (ckd_mul (&thrice_byte8_count, byte8_count, 3))
794 string_overflow ();
795
796 if (multibyte)
797 {
798
799 if (ckd_add (&uninit_nchars, nchars, thrice_byte8_count)
800 || ckd_add (&uninit_nbytes, nbytes, 2 * byte8_count))
801 string_overflow ();
802 val = make_uninit_multibyte_string (uninit_nchars, uninit_nbytes);
803 }
804 else
805 {
806
807 if (ckd_add (&uninit_nbytes, thrice_byte8_count, nbytes))
808 string_overflow ();
809 val = make_uninit_string (uninit_nbytes);
810 }
811
812 src = SDATA (string);
813 src_end = src + nbytes;
814 dst = SDATA (val);
815 if (multibyte)
816 while (src < src_end)
817 {
818 c = *src;
819 len = BYTES_BY_CHAR_HEAD (c);
820
821 if (CHAR_BYTE8_HEAD_P (c))
822 {
823 c = string_char_advance (&src);
824 c = CHAR_TO_BYTE8 (c);
825 dst += sprintf ((char *) dst, "\\%03o", c + 0u);
826 }
827 else
828 while (len--) *dst++ = *src++;
829 }
830 else
831 while (src < src_end)
832 {
833 c = *src++;
834 if (c >= 0x80)
835 dst += sprintf ((char *) dst, "\\%03o", c + 0u);
836 else
837 *dst++ = c;
838 }
839 return val;
840 }
841
842
843 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
844 doc:
845
846 )
847 (ptrdiff_t n, Lisp_Object *args)
848 {
849 ptrdiff_t nbytes = 0;
850 for (ptrdiff_t i = 0; i < n; i++)
851 {
852 CHECK_CHARACTER (args[i]);
853 nbytes += CHAR_BYTES (XFIXNUM (args[i]));
854 }
855 if (nbytes == n)
856 return Funibyte_string (n, args);
857 Lisp_Object str = make_uninit_multibyte_string (n, nbytes);
858 unsigned char *p = SDATA (str);
859 for (ptrdiff_t i = 0; i < n; i++)
860 {
861 eassume (CHARACTERP (args[i]));
862 int c = XFIXNUM (args[i]);
863 p += CHAR_STRING (c, p);
864 }
865 return str;
866 }
867
868 DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
869 doc:
870 )
871 (ptrdiff_t n, Lisp_Object *args)
872 {
873 Lisp_Object str = make_uninit_string (n);
874 unsigned char *p = SDATA (str);
875 for (ptrdiff_t i = 0; i < n; i++)
876 *p++ = check_integer_range (args[i], 0, 255);
877 return str;
878 }
879
880 DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers,
881 Schar_resolve_modifiers, 1, 1, 0,
882 doc:
883
884
885 )
886 (Lisp_Object character)
887 {
888 EMACS_INT c;
889
890 CHECK_FIXNUM (character);
891 c = XFIXNUM (character);
892 return make_fixnum (char_resolve_modifier_mask (c));
893 }
894
895 DEFUN ("get-byte", Fget_byte, Sget_byte, 0, 2, 0,
896 doc:
897
898
899
900
901
902
903
904 )
905 (Lisp_Object position, Lisp_Object string)
906 {
907 int c;
908 ptrdiff_t pos;
909 unsigned char *p;
910
911 if (NILP (string))
912 {
913 if (NILP (position))
914 {
915 p = PT_ADDR;
916 }
917 else
918 {
919 EMACS_INT fixed_pos = fix_position (position);
920 if (! (BEGV <= fixed_pos && fixed_pos < ZV))
921 args_out_of_range_3 (position, make_fixnum (BEGV), make_fixnum (ZV));
922 pos = fixed_pos;
923 p = CHAR_POS_ADDR (pos);
924 }
925 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
926 return make_fixnum (*p);
927 }
928 else
929 {
930 CHECK_STRING (string);
931 if (NILP (position))
932 {
933 p = SDATA (string);
934 }
935 else
936 {
937 CHECK_FIXNAT (position);
938 if (XFIXNUM (position) >= SCHARS (string))
939 args_out_of_range (string, position);
940 pos = XFIXNAT (position);
941 p = SDATA (string) + string_char_to_byte (string, pos);
942 }
943 if (! STRING_MULTIBYTE (string))
944 return make_fixnum (*p);
945 }
946 c = STRING_CHAR (p);
947 if (CHAR_BYTE8_P (c))
948 c = CHAR_TO_BYTE8 (c);
949 else if (! ASCII_CHAR_P (c))
950 error ("Not an ASCII nor an 8-bit character: %d", c);
951 return make_fixnum (c);
952 }
953
954
955 bool
956 alphabeticp (int c)
957 {
958 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
959 if (! FIXNUMP (category))
960 return false;
961 EMACS_INT gen_cat = XFIXNUM (category);
962
963
964
965
966 return (gen_cat == UNICODE_CATEGORY_Lu
967 || gen_cat == UNICODE_CATEGORY_Ll
968 || gen_cat == UNICODE_CATEGORY_Lt
969 || gen_cat == UNICODE_CATEGORY_Lm
970 || gen_cat == UNICODE_CATEGORY_Lo
971 || gen_cat == UNICODE_CATEGORY_Mn
972 || gen_cat == UNICODE_CATEGORY_Mc
973 || gen_cat == UNICODE_CATEGORY_Me
974 || gen_cat == UNICODE_CATEGORY_Nl);
975 }
976
977
978 bool
979 alphanumericp (int c)
980 {
981 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
982 if (! FIXNUMP (category))
983 return false;
984 EMACS_INT gen_cat = XFIXNUM (category);
985
986
987 return (gen_cat == UNICODE_CATEGORY_Lu
988 || gen_cat == UNICODE_CATEGORY_Ll
989 || gen_cat == UNICODE_CATEGORY_Lt
990 || gen_cat == UNICODE_CATEGORY_Lm
991 || gen_cat == UNICODE_CATEGORY_Lo
992 || gen_cat == UNICODE_CATEGORY_Mn
993 || gen_cat == UNICODE_CATEGORY_Mc
994 || gen_cat == UNICODE_CATEGORY_Me
995 || gen_cat == UNICODE_CATEGORY_Nl
996 || gen_cat == UNICODE_CATEGORY_Nd);
997 }
998
999
1000 bool
1001 graphicp (int c)
1002 {
1003 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
1004 if (! FIXNUMP (category))
1005 return false;
1006 EMACS_INT gen_cat = XFIXNUM (category);
1007
1008
1009 return (!(gen_cat == UNICODE_CATEGORY_Zs
1010 || gen_cat == UNICODE_CATEGORY_Zl
1011 || gen_cat == UNICODE_CATEGORY_Zp
1012 || gen_cat == UNICODE_CATEGORY_Cc
1013 || gen_cat == UNICODE_CATEGORY_Cs
1014 || gen_cat == UNICODE_CATEGORY_Cn));
1015 }
1016
1017
1018 bool
1019 printablep (int c)
1020 {
1021 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
1022 if (! FIXNUMP (category))
1023 return false;
1024 EMACS_INT gen_cat = XFIXNUM (category);
1025
1026
1027 return (!(gen_cat == UNICODE_CATEGORY_Cc
1028 || gen_cat == UNICODE_CATEGORY_Cs
1029 || gen_cat == UNICODE_CATEGORY_Cn));
1030 }
1031
1032
1033 bool
1034 graphic_base_p (int c)
1035 {
1036 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
1037 if (! FIXNUMP (category))
1038 return false;
1039 EMACS_INT gen_cat = XFIXNUM (category);
1040
1041 return (!(gen_cat == UNICODE_CATEGORY_Mn
1042 || gen_cat == UNICODE_CATEGORY_Mc
1043 || gen_cat == UNICODE_CATEGORY_Me
1044 || gen_cat == UNICODE_CATEGORY_Zs
1045 || gen_cat == UNICODE_CATEGORY_Zl
1046 || gen_cat == UNICODE_CATEGORY_Zp
1047 || gen_cat == UNICODE_CATEGORY_Cc
1048 || gen_cat == UNICODE_CATEGORY_Cs
1049 || gen_cat == UNICODE_CATEGORY_Cf
1050 || gen_cat == UNICODE_CATEGORY_Cn));
1051 }
1052
1053
1054
1055 bool
1056 blankp (int c)
1057 {
1058 Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
1059 if (! FIXNUMP (category))
1060 return false;
1061
1062 return XFIXNUM (category) == UNICODE_CATEGORY_Zs;
1063 }
1064
1065
1066
1067 signed char const hexdigit[UCHAR_MAX + 1] =
1068 {
1069 ['0'] = 1 + 0, ['1'] = 1 + 1, ['2'] = 1 + 2, ['3'] = 1 + 3, ['4'] = 1 + 4,
1070 ['5'] = 1 + 5, ['6'] = 1 + 6, ['7'] = 1 + 7, ['8'] = 1 + 8, ['9'] = 1 + 9,
1071 ['A'] = 1 + 10, ['B'] = 1 + 11, ['C'] = 1 + 12,
1072 ['D'] = 1 + 13, ['E'] = 1 + 14, ['F'] = 1 + 15,
1073 ['a'] = 1 + 10, ['b'] = 1 + 11, ['c'] = 1 + 12,
1074 ['d'] = 1 + 13, ['e'] = 1 + 14, ['f'] = 1 + 15
1075 };
1076
1077 void
1078 syms_of_character (void)
1079 {
1080 DEFSYM (Qcharacterp, "characterp");
1081 DEFSYM (Qauto_fill_chars, "auto-fill-chars");
1082
1083 staticpro (&Vchar_unify_table);
1084 Vchar_unify_table = Qnil;
1085
1086 defsubr (&Smax_char);
1087 defsubr (&Scharacterp);
1088 defsubr (&Sunibyte_char_to_multibyte);
1089 defsubr (&Smultibyte_char_to_unibyte);
1090 defsubr (&Schar_width);
1091 defsubr (&Sstring_width);
1092 defsubr (&Sstring);
1093 defsubr (&Sunibyte_string);
1094 defsubr (&Schar_resolve_modifiers);
1095 defsubr (&Sget_byte);
1096
1097 DEFVAR_LISP ("translation-table-vector", Vtranslation_table_vector,
1098 doc:
1099
1100
1101 );
1102 Vtranslation_table_vector = make_nil_vector (16);
1103
1104 DEFVAR_LISP ("auto-fill-chars", Vauto_fill_chars,
1105 doc:
1106
1107 );
1108 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1109 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
1110 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
1111
1112 DEFVAR_LISP ("char-width-table", Vchar_width_table,
1113 doc:
1114 );
1115 Vchar_width_table = Fmake_char_table (Qnil, make_fixnum (1));
1116 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_fixnum (4));
1117 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
1118 make_fixnum (4));
1119
1120 DEFVAR_LISP ("ambiguous-width-chars", Vambiguous_width_chars,
1121 doc:
1122
1123
1124
1125 );
1126 Vambiguous_width_chars = Fmake_char_table (Qnil, Qnil);
1127
1128 DEFVAR_LISP ("printable-chars", Vprintable_chars,
1129 doc: );
1130 Vprintable_chars = Fmake_char_table (Qnil, Qnil);
1131 Fset_char_table_range (Vprintable_chars,
1132 Fcons (make_fixnum (32), make_fixnum (126)), Qt);
1133 Fset_char_table_range (Vprintable_chars,
1134 Fcons (make_fixnum (160),
1135 make_fixnum (MAX_5_BYTE_CHAR)), Qt);
1136
1137 DEFVAR_LISP ("char-script-table", Vchar_script_table,
1138 doc:
1139 );
1140
1141 DEFSYM (Qchar_script_table, "char-script-table");
1142 Fput (Qchar_script_table, Qchar_table_extra_slots, make_fixnum (1));
1143 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
1144
1145 DEFVAR_LISP ("script-representative-chars", Vscript_representative_chars,
1146 doc:
1147
1148
1149
1150
1151
1152 );
1153 Vscript_representative_chars = Qnil;
1154
1155 DEFVAR_LISP ("unicode-category-table", Vunicode_category_table,
1156 doc:
1157
1158
1159
1160 );
1161
1162 Vunicode_category_table = Qnil;
1163 }