1 /* Composite sequence support.
2 Copyright (C) 2001-2023 Free Software Foundation, Inc.
3 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6 Copyright (C) 2003, 2006
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
9
10 This file is part of GNU Emacs.
11
12 GNU Emacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or (at
15 your option) any later version.
16
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
24
25 #include <config.h>
26
27 #include <stdlib.h> /* for qsort */
28
29 #include "lisp.h"
30 #include "character.h"
31 #include "composite.h"
32 #include "buffer.h"
33 #include "coding.h"
34 #include "intervals.h"
35 #include "frame.h"
36 #include "dispextern.h"
37 #include "termhooks.h"
38
39
40 /* Emacs uses special text property `composition' to support character
41 composition. A sequence of characters that have the same (i.e. eq)
42 `composition' property value is treated as a single composite
43 sequence (we call it just `composition' here after). Characters in
44 a composition are all composed somehow on the screen.
45
46 The property value has this form when the composition is made:
47 ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
48 then turns to this form:
49 (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
50 when the composition is registered in composition_hash_table and
51 composition_table. These rather peculiar structures were designed
52 to make it easy to distinguish them quickly (we can do that by
53 checking only the first element) and to extract LENGTH (from the
54 former form) and COMPOSITION-ID (from the latter form).
55
56 We register a composition when it is displayed, or when the width
57 is required (for instance, to calculate columns).
58
59 LENGTH -- Length of the composition. This information is used to
60 check the validity of the composition.
61
62 COMPONENTS -- Character, string, vector, list, or nil.
63
64 If it is nil, characters in the text are composed relatively
65 according to their metrics in font glyphs.
66
67 If it is a character or a string, the character or characters
68 in the string are composed relatively.
69
70 If it is a vector or list of integers, the element is a
71 character or an encoded composition rule. The characters are
72 composed according to the rules. (2N)th elements are
73 characters to be composed and (2N+1)th elements are
74 composition rules to tell how to compose (2N+2)th element with
75 the previously composed 2N glyphs.
76
77 COMPONENTS-VEC -- Vector of integers. In a relative composition,
78 the elements are the characters to be composed. In a rule-base
79 composition, the elements are characters or encoded
80 composition rules.
81
82 MODIFICATION-FUNC -- If non-nil, it is a function to call when the
83 composition gets invalid after a modification in a buffer. If
84 it is nil, a function in `composition-function-table' of the
85 first character in the sequence is called.
86
87 COMPOSITION-ID --Identification number of the composition. It is
88 used as an index to composition_table for the composition.
89
90 When Emacs has to display a composition or has to know its
91 displaying width, the function get_composition_id is called. It
92 returns COMPOSITION-ID so that the caller can access the
93 information about the composition through composition_table. If a
94 COMPOSITION-ID has not yet been assigned to the composition,
95 get_composition_id checks the validity of `composition' property,
96 and, if valid, assigns a new ID, registers the information in
97 composition_hash_table and composition_table, and changes the form
98 of the property value. If the property is invalid,
99 get_composition_id returns -1 without changing the property value.
100
101 We use two tables to keep the information about composition;
102 composition_hash_table and composition_table.
103
104 The former is a hash table whose keys are COMPONENTS-VECs and
105 values are the corresponding COMPOSITION-IDs. This hash table is
106 weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
107 `composition' property, it won't be collected as garbage until all
108 bits of text that have the same COMPONENTS-VEC are deleted.
109
110 The latter is a table of pointers to `struct composition' indexed
111 by COMPOSITION-ID. This structure keeps the other information (see
112 composite.h).
113
114 In general, a text property holds information about individual
115 characters. But, a `composition' property holds information about
116 a sequence of characters (in this sense, it is like the `intangible'
117 property). That means that we should not share the property value
118 in adjacent compositions -- we can't distinguish them if they have the
119 same property. So, after any changes, we call
120 `update_compositions' and change a property of one of adjacent
121 compositions to a copy of it. This function also runs a proper
122 composition modification function to make a composition that gets
123 invalid by the change valid again.
124
125 As the value of the `composition' property holds information about a
126 specific range of text, the value gets invalid if we change the
127 text in the range. We treat the `composition' property as always
128 rear-nonsticky (currently by setting default-text-properties to
129 (rear-nonsticky (composition))) and we never make properties of
130 adjacent compositions identical. Thus, any such changes make the
131 range just shorter. So, we can check the validity of the `composition'
132 property by comparing LENGTH information with the actual length of
133 the composition.
134
135 */
136
137
138 /* Table of pointers to the structure `composition' indexed by
139 COMPOSITION-ID. This structure is for storing information about
140 each composition except for COMPONENTS-VEC. */
141 struct composition **composition_table;
142
143 /* The current size of `composition_table'. */
144 static ptrdiff_t composition_table_size;
145
146 /* Number of compositions currently made. */
147 ptrdiff_t n_compositions;
148
149 /* Hash table for compositions. The key is COMPONENTS-VEC of
150 `composition' property. The value is the corresponding
151 COMPOSITION-ID. */
152 Lisp_Object composition_hash_table;
153
154 /* Maximum number of characters to look back for
155 auto-compositions. */
156 #define MAX_AUTO_COMPOSITION_LOOKBACK 3
157
158 /* Return COMPOSITION-ID of a composition at buffer position
159 CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
160 the sequence is PROP. STRING, if non-nil, is a string that
161 contains the composition instead of the current buffer.
162
163 If the composition is invalid, return -1. */
164
165 ptrdiff_t
166 get_composition_id (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t nchars,
167 Lisp_Object prop, Lisp_Object string)
168 {
169 Lisp_Object id, length, components, key, *key_contents, hash_code;
170 ptrdiff_t glyph_len;
171 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table);
172 ptrdiff_t hash_index;
173 enum composition_method method;
174 struct composition *cmp;
175 int ch;
176
177 /* Maximum length of a string of glyphs. XftGlyphExtents limits
178 this to INT_MAX, and Emacs limits it further. Divide INT_MAX - 1
179 by 2 because gui_produce_glyphs computes glyph_len * 2 + 1. Divide
180 the size by MAX_MULTIBYTE_LENGTH because encode_terminal_code
181 multiplies glyph_len by MAX_MULTIBYTE_LENGTH. */
182 enum {
183 GLYPH_LEN_MAX = min ((INT_MAX - 1) / 2,
184 min (PTRDIFF_MAX, SIZE_MAX) / MAX_MULTIBYTE_LENGTH)
185 };
186
187 /* PROP should be
188 Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
189 or
190 Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
191 */
192 if (nchars == 0 || !CONSP (prop))
193 goto invalid_composition;
194
195 id = XCAR (prop);
196 if (FIXNUMP (id))
197 {
198 /* PROP should be Form-B. */
199 if (XFIXNUM (id) < 0 || XFIXNUM (id) >= n_compositions)
200 goto invalid_composition;
201 return XFIXNUM (id);
202 }
203
204 /* PROP should be Form-A.
205 Thus, ID should be (LENGTH . COMPONENTS). */
206 if (!CONSP (id))
207 goto invalid_composition;
208 length = XCAR (id);
209 if (!FIXNUMP (length) || XFIXNUM (length) != nchars)
210 goto invalid_composition;
211
212 components = XCDR (id);
213
214 /* Check if the same composition has already been registered or not
215 by consulting composition_hash_table. The key for this table is
216 COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
217 nil, vector of characters in the composition range. */
218 if (FIXNUMP (components))
219 key = make_vector (1, components);
220 else if (STRINGP (components) || CONSP (components))
221 key = Fvconcat (1, &components);
222 else if (VECTORP (components))
223 key = components;
224 else if (NILP (components))
225 {
226 key = make_uninit_vector (nchars);
227 if (STRINGP (string))
228 for (ptrdiff_t i = 0; i < nchars; i++)
229 {
230 ch = fetch_string_char_advance (string, &charpos, &bytepos);
231 ASET (key, i, make_fixnum (ch));
232 }
233 else
234 for (ptrdiff_t i = 0; i < nchars; i++)
235 {
236 ch = fetch_char_advance (&charpos, &bytepos);
237 ASET (key, i, make_fixnum (ch));
238 }
239 }
240 else
241 goto invalid_composition;
242
243 hash_index = hash_lookup (hash_table, key, &hash_code);
244 if (hash_index >= 0)
245 {
246 /* We have already registered the same composition. Change PROP
247 from Form-A above to Form-B while replacing COMPONENTS with
248 COMPONENTS-VEC stored in the hash table. We can directly
249 modify the cons cell of PROP because it is not shared. */
250 key = HASH_KEY (hash_table, hash_index);
251 id = HASH_VALUE (hash_table, hash_index);
252 XSETCAR (prop, id);
253 XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
254 return XFIXNUM (id);
255 }
256
257 /* This composition is a new one. We must register it. */
258
259 /* Check if we have sufficient memory to store this information. */
260 if (composition_table_size <= n_compositions)
261 composition_table = xpalloc (composition_table, &composition_table_size,
262 1, -1, sizeof *composition_table);
263
264 key_contents = XVECTOR (key)->contents;
265
266 /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
267 vector or a list. It should be a sequence of:
268 char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
269
270 if (VECTORP (components)
271 && ASIZE (components) >= 2
272 && VECTORP (AREF (components, 0)))
273 {
274 /* COMPONENTS is a glyph-string. */
275 ptrdiff_t len = ASIZE (key);
276
277 for (ptrdiff_t i = 1; i < len; i++)
278 if (! VECTORP (AREF (key, i)))
279 goto invalid_composition;
280 }
281 else if (VECTORP (components) || CONSP (components))
282 {
283 ptrdiff_t len = ASIZE (key);
284
285 /* The number of elements should be odd. */
286 if ((len % 2) == 0)
287 goto invalid_composition;
288 /* All elements should be integers (character or encoded
289 composition rule). */
290 for (ptrdiff_t i = 0; i < len; i++)
291 {
292 if (!FIXNUMP (key_contents[i]))
293 goto invalid_composition;
294 }
295 }
296
297 /* Change PROP from Form-A above to Form-B. We can directly modify
298 the cons cell of PROP because it is not shared. */
299 XSETFASTINT (id, n_compositions);
300 XSETCAR (prop, id);
301 XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
302
303 /* Register the composition in composition_hash_table. */
304 hash_index = hash_put (hash_table, key, id, hash_code);
305
306 method = (NILP (components)
307 ? COMPOSITION_RELATIVE
308 : ((FIXNUMP (components) || STRINGP (components))
309 ? COMPOSITION_WITH_ALTCHARS
310 : COMPOSITION_WITH_RULE_ALTCHARS));
311
312 glyph_len = (method == COMPOSITION_WITH_RULE_ALTCHARS
313 ? (ASIZE (key) + 1) / 2
314 : ASIZE (key));
315
316 if (GLYPH_LEN_MAX < glyph_len)
317 memory_full (SIZE_MAX);
318
319 /* Register the composition in composition_table. */
320 cmp = xmalloc (sizeof *cmp);
321
322 cmp->method = method;
323 cmp->hash_index = hash_index;
324 cmp->glyph_len = glyph_len;
325 cmp->offsets = xnmalloc (glyph_len, 2 * sizeof *cmp->offsets);
326 cmp->font = NULL;
327
328 if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
329 {
330 /* Relative composition. */
331 cmp->width = 0;
332 for (ptrdiff_t i = 0; i < glyph_len; i++)
333 {
334 int this_width;
335 ch = XFIXNUM (key_contents[i]);
336 /* TAB in a composition means display glyphs with padding
337 space on the left or right. */
338 this_width = (ch == '\t' ? 1 : CHARACTER_WIDTH (ch));
339 if (cmp->width < this_width)
340 cmp->width = this_width;
341 }
342 }
343 else
344 {
345 /* Rule-base composition. */
346 double leftmost = 0.0, rightmost;
347
348 ch = XFIXNUM (key_contents[0]);
349 rightmost = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
350
351 for (ptrdiff_t i = 1; i < glyph_len; i += 2)
352 {
353 int rule, gref, nref;
354 int this_width;
355 double this_left;
356
357 rule = XFIXNUM (key_contents[i]);
358 ch = XFIXNUM (key_contents[i + 1]);
359 this_width = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
360
361 /* A composition rule is specified by an integer value
362 that encodes global and new reference points (GREF and
363 NREF). GREF and NREF are specified by numbers as
364 below:
365 0---1---2 -- ascent
366 | |
367 | |
368 | |
369 9--10--11 -- center
370 | |
371 ---3---4---5--- baseline
372 | |
373 6---7---8 -- descent
374 */
375 COMPOSITION_DECODE_REFS (rule, gref, nref);
376 this_left = (leftmost
377 + (gref % 3) * (rightmost - leftmost) / 2.0
378 - (nref % 3) * this_width / 2.0);
379
380 if (this_left < leftmost)
381 leftmost = this_left;
382 if (this_left + this_width > rightmost)
383 rightmost = this_left + this_width;
384 }
385
386 cmp->width = rightmost - leftmost;
387 if (cmp->width < (rightmost - leftmost))
388 /* To get a ceiling integer value. */
389 cmp->width++;
390 }
391
392 composition_table[n_compositions] = cmp;
393
394 return n_compositions++;
395
396 invalid_composition:
397 /* Would it be better to remove this `composition' property? */
398 return -1;
399 }
400
401
402 /* Find a static composition at or nearest to position POS of OBJECT
403 (buffer or string).
404
405 OBJECT defaults to the current buffer. If there's a composition at
406 POS, set *START and *END to the start and end of the sequence,
407 *PROP to the `composition' property, and return 1.
408
409 If there's no composition at POS and LIMIT is negative, return 0.
410
411 Otherwise, search for a composition forward (LIMIT > POS) or
412 backward (LIMIT < POS). In this case, LIMIT bounds the search.
413
414 If a composition is found, set *START, *END, and *PROP as above,
415 and return 1, else return 0.
416
417 This doesn't check the validity of composition. */
418
419 bool
420 find_composition (ptrdiff_t pos, ptrdiff_t limit,
421 ptrdiff_t *start, ptrdiff_t *end,
422 Lisp_Object *prop, Lisp_Object object)
423 {
424 Lisp_Object val;
425
426 if (get_property_and_range (pos, Qcomposition, prop, start, end, object))
427 return 1;
428
429 if (limit < 0 || limit == pos)
430 return 0;
431
432 if (limit > pos) /* search forward */
433 {
434 val = Fnext_single_property_change (make_fixnum (pos), Qcomposition,
435 object, make_fixnum (limit));
436 pos = XFIXNUM (val);
437 if (pos == limit)
438 return 0;
439 }
440 else /* search backward */
441 {
442 if (get_property_and_range (pos - 1, Qcomposition, prop, start, end,
443 object))
444 return 1;
445 val = Fprevious_single_property_change (make_fixnum (pos), Qcomposition,
446 object, make_fixnum (limit));
447 pos = XFIXNUM (val);
448 if (pos == limit)
449 return 0;
450 pos--;
451 }
452 get_property_and_range (pos, Qcomposition, prop, start, end, object);
453 return 1;
454 }
455
456 /* Run a proper function to adjust the composition sitting between
457 FROM and TO with property PROP. */
458
459 static void
460 run_composition_function (ptrdiff_t from, ptrdiff_t to, Lisp_Object prop)
461 {
462 Lisp_Object func;
463 ptrdiff_t start, end;
464
465 func = COMPOSITION_MODIFICATION_FUNC (prop);
466 /* If an invalid composition precedes or follows, try to make them
467 valid too. */
468 if (from > BEGV
469 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
470 && !composition_valid_p (start, end, prop))
471 from = start;
472 if (to < ZV
473 && find_composition (to, -1, &start, &end, &prop, Qnil)
474 && !composition_valid_p (start, end, prop))
475 to = end;
476 if (!NILP (Ffboundp (func)))
477 call2 (func, make_fixnum (from), make_fixnum (to));
478 }
479
480 /* Make invalid compositions adjacent to or inside FROM and TO valid.
481 CHECK_MASK is bitwise `or' of mask bits defined by macros
482 CHECK_XXX (see the comment in composite.h).
483
484 It also resets the text-property `auto-composed' to a proper region
485 so that automatic character composition works correctly later while
486 displaying the region.
487
488 This function is called when a buffer text is changed. If the
489 change is deletion, FROM == TO. Otherwise, FROM < TO. */
490
491 void
492 update_compositions (ptrdiff_t from, ptrdiff_t to, int check_mask)
493 {
494 Lisp_Object prop;
495 ptrdiff_t start, end;
496 /* The beginning and end of the region to set the property
497 `auto-composed' to nil. */
498 ptrdiff_t min_pos = from, max_pos = to;
499
500 if (inhibit_modification_hooks)
501 return;
502
503 /* If FROM and TO are not in a valid range, do nothing. */
504 if (! (BEGV <= from && from <= to && to <= ZV))
505 return;
506
507 if (check_mask & CHECK_HEAD)
508 {
509 /* FROM should be at composition boundary. But, insertion or
510 deletion will make two compositions adjacent and
511 indistinguishable when they have same (eq) property. To
512 avoid it, in such a case, we change the property of the
513 latter to the copy of it. */
514 if (from > BEGV
515 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
516 && composition_valid_p (start, end, prop))
517 {
518 min_pos = start;
519 if (end > to)
520 max_pos = end;
521 if (from < end)
522 Fput_text_property (make_fixnum (from), make_fixnum (end),
523 Qcomposition,
524 Fcons (XCAR (prop), XCDR (prop)), Qnil);
525 run_composition_function (start, end, prop);
526 from = end;
527 }
528 else if (from < ZV
529 && find_composition (from, -1, &start, &from, &prop, Qnil)
530 && composition_valid_p (start, from, prop))
531 {
532 if (from > to)
533 max_pos = from;
534 run_composition_function (start, from, prop);
535 }
536 }
537
538 if (check_mask & CHECK_INSIDE)
539 {
540 /* In this case, we are sure that (check & CHECK_TAIL) is also
541 nonzero. Thus, here we should check only compositions before
542 (to - 1). */
543 while (from < to - 1
544 && find_composition (from, to, &start, &from, &prop, Qnil)
545 && composition_valid_p (start, from, prop)
546 && from < to - 1)
547 run_composition_function (start, from, prop);
548 }
549
550 if (check_mask & CHECK_TAIL)
551 {
552 if (from < to
553 && find_composition (to - 1, -1, &start, &end, &prop, Qnil)
554 && composition_valid_p (start, end, prop))
555 {
556 /* TO should be also at composition boundary. But,
557 insertion or deletion will make two compositions adjacent
558 and indistinguishable when they have same (eq) property.
559 To avoid it, in such a case, we change the property of
560 the former to the copy of it. */
561 if (to < end)
562 {
563 Fput_text_property (make_fixnum (start), make_fixnum (to),
564 Qcomposition,
565 Fcons (XCAR (prop), XCDR (prop)), Qnil);
566 max_pos = end;
567 }
568 run_composition_function (start, end, prop);
569 }
570 else if (to < ZV
571 && find_composition (to, -1, &start, &end, &prop, Qnil)
572 && composition_valid_p (start, end, prop))
573 {
574 run_composition_function (start, end, prop);
575 max_pos = end;
576 }
577 }
578 if (min_pos < max_pos)
579 {
580 specpdl_ref count = SPECPDL_INDEX ();
581
582 specbind (Qinhibit_read_only, Qt);
583 specbind (Qinhibit_modification_hooks, Qt);
584 specbind (Qinhibit_point_motion_hooks, Qt);
585 Fremove_list_of_text_properties (make_fixnum (min_pos),
586 make_fixnum (max_pos),
587 list1 (Qauto_composed), Qnil);
588 unbind_to (count, Qnil);
589 }
590 }
591
592
593 /* Modify composition property values in LIST destructively. LIST is
594 a list as returned from text_property_list. Change values to the
595 top-level copies of them so that none of them are `eq'. */
596
597 void
598 make_composition_value_copy (Lisp_Object list)
599 {
600 Lisp_Object plist, val;
601
602 for (; CONSP (list); list = XCDR (list))
603 {
604 plist = XCAR (XCDR (XCDR (XCAR (list))));
605 while (CONSP (plist) && CONSP (XCDR (plist)))
606 {
607 if (EQ (XCAR (plist), Qcomposition)
608 && (val = XCAR (XCDR (plist)), CONSP (val)))
609 XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val)));
610 plist = XCDR (XCDR (plist));
611 }
612 }
613 }
614
615
616 /* Make text in the region between START and END a composition that
617 has COMPONENTS and MODIFICATION-FUNC.
618
619 If STRING is non-nil, then operate on characters contained between
620 indices START and END in STRING. */
621
622 void
623 compose_text (ptrdiff_t start, ptrdiff_t end, Lisp_Object components,
624 Lisp_Object modification_func, Lisp_Object string)
625 {
626 Lisp_Object prop;
627
628 prop = Fcons (Fcons (make_fixnum (end - start), components),
629 modification_func);
630 Fput_text_property (make_fixnum (start), make_fixnum (end),
631 Qcomposition, prop, string);
632 }
633
634 /* Lisp glyph-string handlers. */
635
636 /* Hash table for automatic composition. The key is a header of a
637 lgstring (Lispy glyph-string), and the value is a body of a
638 lgstring. */
639
640 static Lisp_Object gstring_hash_table;
641
642 Lisp_Object
643 composition_gstring_lookup_cache (Lisp_Object header)
644 {
645 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
646 ptrdiff_t i = hash_lookup (h, header, NULL);
647
648 return (i >= 0 ? HASH_VALUE (h, i) : Qnil);
649 }
650
651 Lisp_Object
652 composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
653 {
654 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
655 Lisp_Object header = LGSTRING_HEADER (gstring);
656 Lisp_Object hash = h->test.hashfn (header, h);
657 if (len < 0)
658 {
659 ptrdiff_t glyph_len = LGSTRING_GLYPH_LEN (gstring);
660 for (len = 0; len < glyph_len; len++)
661 if (NILP (LGSTRING_GLYPH (gstring, len)))
662 break;
663 }
664
665 Lisp_Object copy = make_nil_vector (len + 2);
666 LGSTRING_SET_HEADER (copy, Fcopy_sequence (header));
667 for (ptrdiff_t i = 0; i < len; i++)
668 LGSTRING_SET_GLYPH (copy, i, Fcopy_sequence (LGSTRING_GLYPH (gstring, i)));
669 ptrdiff_t id = hash_put (h, LGSTRING_HEADER (copy), copy, hash);
670 LGSTRING_SET_ID (copy, make_fixnum (id));
671 return copy;
672 }
673
674 Lisp_Object
675 composition_gstring_from_id (ptrdiff_t id)
676 {
677 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
678
679 return HASH_VALUE (h, id);
680 }
681
682 /* Remove from the composition hash table every lgstring that
683 references the given FONT_OBJECT. */
684 void
685 composition_gstring_cache_clear_font (Lisp_Object font_object)
686 {
687 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
688
689 for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
690 {
691 Lisp_Object k = HASH_KEY (h, i);
692
693 if (!BASE_EQ (k, Qunbound))
694 {
695 Lisp_Object gstring = HASH_VALUE (h, i);
696
697 if (EQ (LGSTRING_FONT (gstring), font_object))
698 hash_remove_from_table (h, k);
699 }
700 }
701 }
702
703 DEFUN ("clear-composition-cache", Fclear_composition_cache,
704 Sclear_composition_cache, 0, 0, 0,
705 doc: /* Internal use only.
706 Clear composition cache. */)
707 (void)
708 {
709 gstring_hash_table = CALLN (Fmake_hash_table, QCtest, Qequal,
710 QCsize, make_fixnum (311));
711 /* Fixme: We call Fclear_face_cache to force complete re-building of
712 display glyphs. But, it may be better to call this function from
713 Fclear_face_cache instead. */
714 return Fclear_face_cache (Qt);
715 }
716
717 bool
718 composition_gstring_p (Lisp_Object gstring)
719 {
720 Lisp_Object header;
721 ptrdiff_t i;
722
723 if (! VECTORP (gstring) || ASIZE (gstring) < 2)
724 return 0;
725 header = LGSTRING_HEADER (gstring);
726 if (! VECTORP (header) || ASIZE (header) < 2)
727 return 0;
728 if (! NILP (LGSTRING_FONT (gstring))
729 && (! FONT_OBJECT_P (LGSTRING_FONT (gstring))
730 && ! CODING_SYSTEM_P (LGSTRING_FONT (gstring))))
731 return 0;
732 for (i = 1; i < ASIZE (LGSTRING_HEADER (gstring)); i++)
733 if (! FIXNATP (AREF (LGSTRING_HEADER (gstring), i)))
734 return 0;
735 if (! NILP (LGSTRING_ID (gstring)) && ! FIXNATP (LGSTRING_ID (gstring)))
736 return 0;
737 for (i = 0; i < LGSTRING_GLYPH_LEN (gstring); i++)
738 {
739 Lisp_Object glyph = LGSTRING_GLYPH (gstring, i);
740 if (NILP (glyph))
741 break;
742 if (! VECTORP (glyph) || ASIZE (glyph) != LGLYPH_SIZE)
743 return 0;
744 }
745 return 1;
746 }
747
748 int
749 composition_gstring_width (Lisp_Object gstring, ptrdiff_t from, ptrdiff_t to,
750 struct font_metrics *metrics)
751 {
752 Lisp_Object *glyph;
753 int width = 0;
754
755 if (metrics)
756 {
757 Lisp_Object font_object = LGSTRING_FONT (gstring);
758
759 if (FONT_OBJECT_P (font_object))
760 {
761 struct font *font = XFONT_OBJECT (font_object);
762 int font_ascent, font_descent;
763
764 get_font_ascent_descent (font, &font_ascent, &font_descent);
765 metrics->ascent = font_ascent;
766 metrics->descent = font_descent;
767 }
768 else
769 {
770 metrics->ascent = 1;
771 metrics->descent = 0;
772 }
773 metrics->width = metrics->lbearing = metrics->rbearing = 0;
774 }
775 for (glyph = lgstring_glyph_addr (gstring, from); from < to; from++, glyph++)
776 {
777 int x;
778
779 if (NILP (LGLYPH_ADJUSTMENT (*glyph)))
780 width += LGLYPH_WIDTH (*glyph);
781 else
782 width += LGLYPH_WADJUST (*glyph);
783 if (metrics)
784 {
785 x = metrics->width + LGLYPH_LBEARING (*glyph) + LGLYPH_XOFF (*glyph);
786 if (metrics->lbearing > x)
787 metrics->lbearing = x;
788 x = metrics->width + LGLYPH_RBEARING (*glyph) + LGLYPH_XOFF (*glyph);
789 if (metrics->rbearing < x)
790 metrics->rbearing = x;
791 metrics->width = width;
792 x = LGLYPH_ASCENT (*glyph) - LGLYPH_YOFF (*glyph);
793 if (metrics->ascent < x)
794 metrics->ascent = x;
795 x = LGLYPH_DESCENT (*glyph) + LGLYPH_YOFF (*glyph);
796 if (metrics->descent < x)
797 metrics->descent = x;
798 }
799 }
800 return width;
801 }
802
803 /* Adjust the width of each grapheme cluster of GSTRING because
804 zero-width grapheme clusters are not displayed. If the width is
805 zero, then the width of the last glyph in the cluster is
806 incremented. */
807
808 void
809 composition_gstring_adjust_zero_width (Lisp_Object gstring)
810 {
811 ptrdiff_t from = 0;
812 int width = 0;
813
814 for (ptrdiff_t i = 0; ; i++)
815 {
816 Lisp_Object glyph;
817
818 if (i < LGSTRING_GLYPH_LEN (gstring))
819 glyph = LGSTRING_GLYPH (gstring, i);
820 else
821 glyph = Qnil;
822
823 if (NILP (glyph) || from != LGLYPH_FROM (glyph))
824 {
825 eassert (i > 0);
826 Lisp_Object last = LGSTRING_GLYPH (gstring, i - 1);
827
828 if (width == 0)
829 {
830 if (NILP (LGLYPH_ADJUSTMENT (last)))
831 LGLYPH_SET_ADJUSTMENT (last,
832 CALLN (Fvector,
833 make_fixnum (0), make_fixnum (0),
834 make_fixnum (LGLYPH_WIDTH (last)
835 + 1)));
836 else
837 ASET (LGLYPH_ADJUSTMENT (last), 2,
838 make_fixnum (LGLYPH_WADJUST (last) + 1));
839 }
840 if (NILP (glyph))
841 break;
842 from = LGLYPH_FROM (glyph);
843 width = 0;
844 }
845 width += (NILP (LGLYPH_ADJUSTMENT (glyph))
846 ? LGLYPH_WIDTH (glyph) : LGLYPH_WADJUST (glyph));
847 }
848 }
849
850
851 static Lisp_Object gstring_work;
852 static Lisp_Object gstring_work_headers;
853
854 static Lisp_Object
855 fill_gstring_header (ptrdiff_t from, ptrdiff_t from_byte,
856 ptrdiff_t to, Lisp_Object font_object, Lisp_Object string)
857 {
858 ptrdiff_t len = to - from;
859 if (len == 0)
860 error ("Attempt to shape zero-length text");
861 eassume (0 < len);
862 Lisp_Object header = (len <= 8
863 ? AREF (gstring_work_headers, len - 1)
864 : make_uninit_vector (len + 1));
865
866 ASET (header, 0, font_object);
867 for (ptrdiff_t i = 0; i < len; i++)
868 {
869 int c
870 = (NILP (string)
871 ? fetch_char_advance_no_check (&from, &from_byte)
872 : fetch_string_char_advance_no_check (string, &from, &from_byte));
873 ASET (header, i + 1, make_fixnum (c));
874 }
875 return header;
876 }
877
878 static void
879 fill_gstring_body (Lisp_Object gstring)
880 {
881 Lisp_Object font_object = LGSTRING_FONT (gstring);
882 Lisp_Object header = AREF (gstring, 0);
883 ptrdiff_t len = LGSTRING_CHAR_LEN (gstring);
884 ptrdiff_t i;
885 struct font *font = NULL;
886 unsigned int code;
887
888 if (FONT_OBJECT_P (font_object))
889 font = XFONT_OBJECT (font_object);
890
891 for (i = 0; i < len; i++)
892 {
893 Lisp_Object g = LGSTRING_GLYPH (gstring, i);
894 int c = XFIXNAT (AREF (header, i + 1));
895
896 if (NILP (g))
897 {
898 g = LGLYPH_NEW ();
899 LGSTRING_SET_GLYPH (gstring, i, g);
900 }
901 LGLYPH_SET_FROM (g, i);
902 LGLYPH_SET_TO (g, i);
903 LGLYPH_SET_CHAR (g, c);
904
905 if (font != NULL)
906 code = font->driver->encode_char (font, LGLYPH_CHAR (g));
907 else
908 code = FONT_INVALID_CODE;
909 if (code != FONT_INVALID_CODE)
910 {
911 font_fill_lglyph_metrics (g, font, code);
912 }
913 else
914 {
915 int width = XFIXNAT (CHAR_TABLE_REF (Vchar_width_table, c));
916
917 LGLYPH_SET_CODE (g, c);
918 LGLYPH_SET_LBEARING (g, 0);
919 LGLYPH_SET_RBEARING (g, width);
920 LGLYPH_SET_WIDTH (g, width);
921 LGLYPH_SET_ASCENT (g, 1);
922 LGLYPH_SET_DESCENT (g, 0);
923 }
924 LGLYPH_SET_ADJUSTMENT (g, Qnil);
925 }
926 len = LGSTRING_GLYPH_LEN (gstring);
927 for (; i < len; i++)
928 LGSTRING_SET_GLYPH (gstring, i, Qnil);
929 }
930
931
932 /* Try to compose the characters at CHARPOS according to composition
933 rule RULE ([PATTERN PREV-CHARS FUNC]). LIMIT limits the characters
934 to compose. STRING, if not nil, is a target string. WIN is a
935 window where the characters are being displayed. CH is the
936 character that triggered the composition check. If characters are
937 successfully composed, return the composition as a glyph-string
938 object. Otherwise return nil. */
939
940 static Lisp_Object
941 autocmp_chars (Lisp_Object rule, ptrdiff_t charpos, ptrdiff_t bytepos,
942 ptrdiff_t limit, struct window *win, struct face *face,
943 Lisp_Object string, Lisp_Object direction, int ch)
944 {
945 specpdl_ref count = SPECPDL_INDEX ();
946 Lisp_Object pos = make_fixnum (charpos);
947 ptrdiff_t to;
948 ptrdiff_t pt = PT, pt_byte = PT_BYTE;
949 Lisp_Object re, font_object, lgstring;
950 ptrdiff_t len;
951
952 record_unwind_save_match_data ();
953 re = AREF (rule, 0);
954 if (NILP (re))
955 len = 1;
956 else if (! STRINGP (re))
957 return unbind_to (count, Qnil);
958 else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string))
959 > 0)
960 {
961 if (NILP (string))
962 len = BYTE_TO_CHAR (bytepos + len) - charpos;
963 else
964 len = string_byte_to_char (string, bytepos + len) - charpos;
965 }
966 if (len <= 0)
967 return unbind_to (count, Qnil);
968 to = limit = charpos + len;
969 font_object = win->frame;
970 #ifdef HAVE_WINDOW_SYSTEM
971 struct frame *f = XFRAME (font_object);
972 if (FRAME_WINDOW_P (f))
973 {
974 font_object = font_range (charpos, bytepos, &to, win, face, string, ch);
975 if (! FONT_OBJECT_P (font_object)
976 || (! NILP (re)
977 && to < limit
978 && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0)))
979 return unbind_to (count, Qnil);
980 }
981 #endif
982 lgstring = Fcomposition_get_gstring (pos, make_fixnum (to), font_object,
983 string);
984 if (NILP (LGSTRING_ID (lgstring)))
985 {
986 /* Save point as marker before calling out to lisp. */
987 if (NILP (string))
988 record_unwind_protect (restore_point_unwind,
989 build_marker (current_buffer, pt, pt_byte));
990 lgstring = safe_call (7, Vauto_composition_function, AREF (rule, 2),
991 pos, make_fixnum (to), font_object, string,
992 direction);
993 }
994 return unbind_to (count, lgstring);
995 }
996
997 /* 1 iff the character C is composable. Characters of general
998 category Z? or C? are not composable except for ZWNJ and ZWJ,
999 and characters of category Zs. */
1000
1001 static bool
1002 char_composable_p (int c)
1003 {
1004 Lisp_Object val;
1005 return (c >= ' '
1006 && (c == ZERO_WIDTH_NON_JOINER || c == ZERO_WIDTH_JOINER
1007 /* Per Unicode TR51, these tag characters can be part of
1008 Emoji sequences. */
1009 || (TAG_SPACE <= c && c <= CANCEL_TAG)
1010 /* unicode-category-table may not be available during
1011 dumping. */
1012 || (CHAR_TABLE_P (Vunicode_category_table)
1013 && (val = CHAR_TABLE_REF (Vunicode_category_table, c),
1014 (FIXNUMP (val)
1015 && (XFIXNUM (val) <= UNICODE_CATEGORY_Zs))))));
1016 }
1017
1018 static inline bool
1019 inhibit_auto_composition (void)
1020 {
1021 if (NILP (Vauto_composition_mode))
1022 return true;
1023
1024 if (STRINGP (Vauto_composition_mode))
1025 {
1026 char *name = tty_type_name (Qnil);
1027
1028 if (name && ! strcmp (SSDATA (Vauto_composition_mode), name))
1029 return true;
1030 }
1031
1032 return false;
1033 }
1034
1035 /* Update cmp_it->stop_pos to the next position after CHARPOS (and
1036 BYTEPOS) where character composition may happen. If BYTEPOS is
1037 negative, compute it. ENDPOS is a limit of searching. If it is
1038 less than CHARPOS, search backward to ENDPOS+1 assuming that
1039 set_iterator_to_next works in reverse order. In this case, if a
1040 composition closest to CHARPOS is found, set cmp_it->stop_pos to
1041 the last character of the composition. STRING, if non-nil, is
1042 the string (as opposed to a buffer) whose characters should be
1043 tested for being composable.
1044
1045 If no composition is found, set cmp_it->ch to -2. If a static
1046 composition is found, set cmp_it->ch to -1. Otherwise, set
1047 cmp_it->ch to the character that triggers the automatic
1048 composition. */
1049
1050 void
1051 composition_compute_stop_pos (struct composition_it *cmp_it, ptrdiff_t charpos,
1052 ptrdiff_t bytepos, ptrdiff_t endpos,
1053 Lisp_Object string)
1054 {
1055 ptrdiff_t start, end;
1056 int c;
1057 Lisp_Object prop, val;
1058 /* This is from forward_to_next_line_start in xdisp.c. */
1059 const int MAX_NEWLINE_DISTANCE = 500;
1060
1061 if (charpos < endpos)
1062 {
1063 if (endpos > charpos + MAX_NEWLINE_DISTANCE)
1064 endpos = charpos + MAX_NEWLINE_DISTANCE;
1065 }
1066 else if (endpos < charpos)
1067 {
1068 /* We search backward for a position to check composition. */
1069 if (endpos < 0)
1070 {
1071 /* But we don't know where to stop the searching. */
1072 endpos = NILP (string) ? BEGV - 1 : -1;
1073 /* Usually we don't reach ENDPOS because we stop searching
1074 at an uncomposable character (NL, LRE, etc). In buffers
1075 with long lines, however, NL might be far away, so
1076 pretend that the buffer is smaller. */
1077 if (current_buffer->long_line_optimizations_p)
1078 endpos = get_small_narrowing_begv (cmp_it->parent_it->w, charpos);
1079 }
1080 }
1081 cmp_it->id = -1;
1082 cmp_it->ch = -2;
1083 cmp_it->reversed_p = 0;
1084 cmp_it->stop_pos = endpos;
1085 if (charpos == endpos)
1086 return;
1087 /* FIXME: Bidi is not yet handled well in static composition. */
1088 if (charpos < endpos
1089 && find_composition (charpos, endpos, &start, &end, &prop, string)
1090 && start >= charpos
1091 && composition_valid_p (start, end, prop))
1092 {
1093 cmp_it->stop_pos = endpos = start;
1094 cmp_it->ch = -1;
1095 }
1096 if ((NILP (string)
1097 && NILP (BVAR (current_buffer, enable_multibyte_characters)))
1098 || (STRINGP (string) && !STRING_MULTIBYTE (string))
1099 || inhibit_auto_composition ())
1100 return;
1101 if (bytepos < 0)
1102 {
1103 if (NILP (string))
1104 bytepos = CHAR_TO_BYTE (charpos);
1105 else
1106 bytepos = string_char_to_byte (string, charpos);
1107 }
1108
1109 start = charpos;
1110 if (charpos < endpos)
1111 {
1112 /* Forward search. */
1113 while (charpos < endpos)
1114 {
1115 c = (STRINGP (string)
1116 ? fetch_string_char_advance (string, &charpos, &bytepos)
1117 : fetch_char_advance (&charpos, &bytepos));
1118 if (c == '\n')
1119 {
1120 cmp_it->ch = -2;
1121 break;
1122 }
1123 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1124 if (! NILP (val))
1125 {
1126 for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
1127 {
1128 Lisp_Object elt = XCAR (val);
1129 if (VECTORP (elt) && ASIZE (elt) == 3
1130 && FIXNATP (AREF (elt, 1))
1131 && charpos - 1 - XFIXNAT (AREF (elt, 1)) >= start)
1132 {
1133 cmp_it->rule_idx = ridx;
1134 cmp_it->lookback = XFIXNAT (AREF (elt, 1));
1135 cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
1136 cmp_it->ch = c;
1137 return;
1138 }
1139 }
1140 }
1141 }
1142 if (charpos == endpos
1143 && !(STRINGP (string) && endpos == SCHARS (string)))
1144 {
1145 /* We couldn't find a composition point before ENDPOS. But,
1146 some character after ENDPOS may be composed with
1147 characters before ENDPOS. So, we should stop at the safe
1148 point. */
1149 charpos = endpos - MAX_AUTO_COMPOSITION_LOOKBACK;
1150 if (charpos < start)
1151 charpos = start;
1152 }
1153 }
1154 else if (charpos > endpos)
1155 {
1156 /* Search backward for a pattern that may be composed and the
1157 position of (possibly) the last character of the match is
1158 closest to (but not after) START. The reason for the last
1159 character is that set_iterator_to_next works in reverse order,
1160 and thus we must stop at the last character for composition
1161 check. */
1162 unsigned char *p;
1163 int len;
1164 /* Limit byte position used in fast_looking_at. This is the
1165 byte position of the character after START. */
1166 ptrdiff_t limit;
1167
1168 if (NILP (string))
1169 p = BYTE_POS_ADDR (bytepos);
1170 else
1171 p = SDATA (string) + bytepos;
1172 c = string_char_and_length (p, &len);
1173 limit = bytepos + len;
1174 while (char_composable_p (c))
1175 {
1176 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1177 for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
1178 {
1179 Lisp_Object elt = XCAR (val);
1180 if (VECTORP (elt) && ASIZE (elt) == 3
1181 && FIXNATP (AREF (elt, 1))
1182 && charpos - XFIXNAT (AREF (elt, 1)) > endpos)
1183 {
1184 ptrdiff_t back = XFIXNAT (AREF (elt, 1));
1185 ptrdiff_t cpos = charpos - back, bpos;
1186
1187 if (back == 0)
1188 bpos = bytepos;
1189 else
1190 bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
1191 : string_char_to_byte (string, cpos));
1192 ptrdiff_t blen
1193 = (STRINGP (AREF (elt, 0))
1194 ? fast_looking_at (AREF (elt, 0), cpos, bpos,
1195 start + 1, limit, string)
1196 : 1);
1197 if (blen > 0)
1198 {
1199 /* Make CPOS point to the last character of
1200 match. Note that BLEN is byte-length. */
1201 if (blen > 1)
1202 {
1203 bpos += blen;
1204 if (NILP (string))
1205 cpos = BYTE_TO_CHAR (bpos) - 1;
1206 else
1207 cpos = string_byte_to_char (string, bpos) - 1;
1208 }
1209 back = cpos - (charpos - back);
1210 if (cmp_it->stop_pos < cpos
1211 || (cmp_it->stop_pos == cpos
1212 && cmp_it->lookback < back))
1213 {
1214 cmp_it->rule_idx = ridx;
1215 cmp_it->stop_pos = cpos;
1216 cmp_it->ch = c;
1217 cmp_it->lookback = back;
1218 cmp_it->nchars = back + 1;
1219 }
1220 }
1221 }
1222 }
1223 if (charpos - 1 == endpos)
1224 break;
1225 if (STRINGP (string))
1226 {
1227 p--, bytepos--;
1228 while (! CHAR_HEAD_P (*p))
1229 p--, bytepos--;
1230 charpos--;
1231 }
1232 else
1233 {
1234 dec_both (&charpos, &bytepos);
1235 p = BYTE_POS_ADDR (bytepos);
1236 }
1237 c = STRING_CHAR (p);
1238 }
1239 if (cmp_it->ch >= 0)
1240 /* We found a position to check. */
1241 return;
1242 /* Skip all uncomposable characters. */
1243 if (NILP (string))
1244 {
1245 while (charpos - 1 > endpos && ! char_composable_p (c))
1246 {
1247 dec_both (&charpos, &bytepos);
1248 c = FETCH_MULTIBYTE_CHAR (bytepos);
1249 }
1250 }
1251 else
1252 {
1253 while (charpos - 1 > endpos && ! char_composable_p (c))
1254 {
1255 p--;
1256 while (! CHAR_HEAD_P (*p))
1257 p--;
1258 charpos--;
1259 c = STRING_CHAR (p);
1260 }
1261 }
1262 }
1263 cmp_it->stop_pos = charpos;
1264 }
1265
1266 /* Check if the character at CHARPOS (and BYTEPOS) is composed
1267 (possibly with the following characters) on window W. ENDPOS limits
1268 characters to be composed. FACE, if non-NULL, is a base face of
1269 the character. If STRING is not nil, it is a string containing the
1270 character to check, and CHARPOS and BYTEPOS are indices in the
1271 string. In that case, FACE must not be NULL. BIDI_LEVEL is the bidi
1272 embedding level of the current paragraph, and is used to calculate the
1273 direction argument to pass to the font shaper; value of -1 means the
1274 caller doesn't know the embedding level (used by callers which didn't
1275 invoke the display routines that perform bidi-display-reordering).
1276
1277 If the character is composed, setup members of CMP_IT (id, nglyphs,
1278 from, to, reversed_p), and return true. Otherwise, update
1279 CMP_IT->stop_pos, and return false. */
1280
1281 bool
1282 composition_reseat_it (struct composition_it *cmp_it, ptrdiff_t charpos,
1283 ptrdiff_t bytepos, ptrdiff_t endpos, struct window *w,
1284 signed char bidi_level, struct face *face, Lisp_Object string)
1285 {
1286 if (cmp_it->ch == -2)
1287 {
1288 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
1289 if (cmp_it->ch == -2 || cmp_it->stop_pos != charpos)
1290 /* The current position is not composed. */
1291 return 0;
1292 }
1293
1294 if (endpos < 0)
1295 endpos = NILP (string) ? BEGV : 0;
1296
1297 if (cmp_it->ch < 0)
1298 {
1299 /* We are looking at a static composition. */
1300 ptrdiff_t start, end;
1301 Lisp_Object prop;
1302
1303 find_composition (charpos, -1, &start, &end, &prop, string);
1304 cmp_it->id = get_composition_id (charpos, bytepos, end - start,
1305 prop, string);
1306 if (cmp_it->id < 0)
1307 goto no_composition;
1308 cmp_it->nchars = end - start;
1309 cmp_it->nglyphs = composition_table[cmp_it->id]->glyph_len;
1310 }
1311 else if (w)
1312 {
1313 Lisp_Object lgstring = Qnil;
1314 Lisp_Object val, elt, direction = Qnil;
1315
1316 val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
1317 for (EMACS_INT i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val))
1318 continue;
1319 if (charpos < endpos)
1320 {
1321 if (bidi_level < 0)
1322 direction = Qnil;
1323 else if ((bidi_level & 1) == 0)
1324 direction = QL2R;
1325 else
1326 direction = QR2L;
1327 for (; CONSP (val); val = XCDR (val))
1328 {
1329 elt = XCAR (val);
1330 if (! VECTORP (elt) || ASIZE (elt) != 3
1331 || ! FIXNUMP (AREF (elt, 1)))
1332 continue;
1333 if (XFIXNAT (AREF (elt, 1)) != cmp_it->lookback)
1334 goto no_composition;
1335 lgstring = autocmp_chars (elt, charpos, bytepos, endpos,
1336 w, face, string, direction, cmp_it->ch);
1337 if (composition_gstring_p (lgstring))
1338 break;
1339 lgstring = Qnil;
1340 /* Composition failed perhaps because the font doesn't
1341 support sufficient range of characters. Try the
1342 other composition rules if any. */
1343 }
1344 cmp_it->reversed_p = 0;
1345 }
1346 else
1347 {
1348 ptrdiff_t cpos = charpos, bpos = bytepos;
1349
1350 cmp_it->reversed_p = 1;
1351 elt = XCAR (val);
1352 if (cmp_it->lookback > 0)
1353 {
1354 cpos = charpos - cmp_it->lookback;
1355 /* Reject the composition if it starts before ENDPOS,
1356 which here can only happen if
1357 composition-break-at-point is non-nil and point is
1358 inside the composition. */
1359 if (cpos < endpos)
1360 {
1361 eassert (composition_break_at_point);
1362 eassert (endpos == PT);
1363 goto no_composition;
1364 }
1365 if (STRINGP (string))
1366 bpos = string_char_to_byte (string, cpos);
1367 else
1368 bpos = CHAR_TO_BYTE (cpos);
1369 }
1370 /* The bidi_level < 0 case below strictly speaking should
1371 never happen, since we get here when bidi scan direction
1372 is backward in the buffer, which can only happen if the
1373 display routines were called to perform the bidi
1374 reordering. But it doesn't harm to test for that, and
1375 avoid someone raising their brows and thinking it's a
1376 subtle bug... */
1377 if (bidi_level < 0)
1378 direction = Qnil;
1379 else if ((bidi_level & 1) == 0)
1380 direction = QL2R;
1381 else
1382 direction = QR2L;
1383 lgstring = autocmp_chars (elt, cpos, bpos, charpos + 1, w, face,
1384 string, direction, cmp_it->ch);
1385 if (! composition_gstring_p (lgstring)
1386 || cpos + LGSTRING_CHAR_LEN (lgstring) - 1 != charpos)
1387 /* Composition failed or didn't cover the current
1388 character. */
1389 goto no_composition;
1390 }
1391 if (NILP (lgstring))
1392 goto no_composition;
1393 if (NILP (LGSTRING_ID (lgstring)))
1394 lgstring = composition_gstring_put_cache (lgstring, -1);
1395 cmp_it->id = XFIXNUM (LGSTRING_ID (lgstring));
1396 int i;
1397 for (i = 0; i < LGSTRING_GLYPH_LEN (lgstring); i++)
1398 if (NILP (LGSTRING_GLYPH (lgstring, i)))
1399 break;
1400 cmp_it->nglyphs = i;
1401 cmp_it->from = 0;
1402 cmp_it->to = i;
1403 }
1404 else
1405 goto no_composition;
1406 return 1;
1407
1408 no_composition:
1409 if (charpos == endpos)
1410 return 0;
1411 if (charpos < endpos)
1412 {
1413 charpos++;
1414 if (NILP (string))
1415 bytepos += next_char_len (bytepos);
1416 else
1417 bytepos += BYTES_BY_CHAR_HEAD (*(SDATA (string) + bytepos));
1418 }
1419 else
1420 {
1421 charpos--;
1422 /* BYTEPOS is calculated in composition_compute_stop_pos */
1423 bytepos = -1;
1424 }
1425 if (cmp_it->reversed_p)
1426 endpos = -1;
1427 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
1428 return 0;
1429 }
1430
1431 /* Update charpos, nchars, nbytes, and width of the current grapheme
1432 cluster.
1433
1434 If the composition is static or automatic in L2R context, the
1435 cluster is identified by CMP_IT->from, and CHARPOS is the position
1436 of the first character of the cluster. In this case, update
1437 CMP_IT->to too.
1438
1439 If the composition is automatic in R2L context, the cluster is
1440 identified by CMP_IT->to, and CHARPOS is the position of the last
1441 character of the cluster. In this case, update CMP_IT->from too.
1442
1443 The return value is the character code of the first character of
1444 the cluster, or -1 if the composition is somehow broken. */
1445
1446 int
1447 composition_update_it (struct composition_it *cmp_it, ptrdiff_t charpos, ptrdiff_t bytepos, Lisp_Object string)
1448 {
1449 int i;
1450 int c UNINIT;
1451
1452 if (cmp_it->ch < 0)
1453 {
1454 /* static composition */
1455 struct composition *cmp = composition_table[cmp_it->id];
1456
1457 cmp_it->charpos = charpos;
1458 cmp_it->to = cmp_it->nglyphs;
1459 if (cmp_it->nglyphs == 0)
1460 c = -1;
1461 else
1462 {
1463 for (i = 0; i < cmp->glyph_len; i++)
1464 /* TAB in a composition means display glyphs with padding
1465 space on the left or right. */
1466 if ((c = COMPOSITION_GLYPH (cmp, i)) != '\t')
1467 break;
1468 if (c == '\t')
1469 c = ' ';
1470 }
1471 cmp_it->width = cmp->width;
1472 charpos += cmp_it->nchars;
1473 if (STRINGP (string))
1474 cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos;
1475 else
1476 cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos;
1477 }
1478 else
1479 {
1480 /* Automatic composition. */
1481 Lisp_Object gstring = composition_gstring_from_id (cmp_it->id);
1482 Lisp_Object glyph;
1483 ptrdiff_t from;
1484
1485 if (cmp_it->nglyphs == 0)
1486 {
1487 cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
1488 cmp_it->width = 0;
1489 cmp_it->from = cmp_it->to = 0;
1490 return -1;
1491 }
1492 if (! cmp_it->reversed_p)
1493 {
1494 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1495 from = LGLYPH_FROM (glyph);
1496 for (cmp_it->to = cmp_it->from + 1; cmp_it->to < cmp_it->nglyphs;
1497 cmp_it->to++)
1498 {
1499 glyph = LGSTRING_GLYPH (gstring, cmp_it->to);
1500 if (LGLYPH_FROM (glyph) != from)
1501 break;
1502 }
1503 cmp_it->charpos = charpos;
1504 }
1505 else
1506 {
1507 glyph = LGSTRING_GLYPH (gstring, cmp_it->to - 1);
1508 from = LGLYPH_FROM (glyph);
1509 cmp_it->charpos = charpos - (LGLYPH_TO (glyph) - from);
1510 for (cmp_it->from = cmp_it->to - 1; cmp_it->from > 0;
1511 cmp_it->from--)
1512 {
1513 glyph = LGSTRING_GLYPH (gstring, cmp_it->from - 1);
1514 if (LGLYPH_FROM (glyph) != from)
1515 break;
1516 }
1517 }
1518 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1519 cmp_it->nchars = LGLYPH_TO (glyph) + 1 - from;
1520 cmp_it->nbytes = 0;
1521 cmp_it->width = 0;
1522 for (i = cmp_it->nchars - 1; i >= 0; i--)
1523 {
1524 c = XFIXNUM (LGSTRING_CHAR (gstring, from + i));
1525 cmp_it->nbytes += CHAR_BYTES (c);
1526 cmp_it->width += CHARACTER_WIDTH (c);
1527 }
1528 }
1529 return c;
1530 }
1531
1532
1533 struct position_record
1534 {
1535 ptrdiff_t pos, pos_byte;
1536 unsigned char *p;
1537 };
1538
1539 /* Update the members of POSITION to the next character boundary. */
1540 #define FORWARD_CHAR(POSITION, STOP) \
1541 do { \
1542 (POSITION).pos++; \
1543 if ((POSITION).pos == (STOP)) \
1544 { \
1545 (POSITION).p = GAP_END_ADDR; \
1546 (POSITION).pos_byte = GPT_BYTE; \
1547 } \
1548 else \
1549 { \
1550 (POSITION).pos_byte += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
1551 (POSITION).p += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
1552 } \
1553 } while (0)
1554
1555 /* Update the members of POSITION to the previous character boundary. */
1556 #define BACKWARD_CHAR(POSITION, STOP) \
1557 do { \
1558 if ((POSITION).pos == (STOP)) \
1559 (POSITION).p = GPT_ADDR; \
1560 do { \
1561 (POSITION).pos_byte--; \
1562 (POSITION).p--; \
1563 } while (! CHAR_HEAD_P (*((POSITION).p))); \
1564 (POSITION).pos--; \
1565 } while (0)
1566
1567 /* Similar to find_composition, but find an automatic composition instead.
1568
1569 This function looks for automatic composition at or near position
1570 POS of STRING object, either a buffer or a Lisp string. If STRING
1571 is nil, it defaults to the current buffer. It must be assured that
1572 POS is not within a static composition. Also, the current buffer
1573 must be displayed in some window, otherwise the function will
1574 return FALSE.
1575
1576 If LIMIT is negative, and there's no composition that includes POS
1577 (i.e. starts at or before POS and ends at or after POS), return
1578 FALSE. In this case, the function is allowed to look from POS as
1579 far back as BACKLIM, and as far forward as POS+1 plus
1580 MAX_AUTO_COMPOSITION_LOOKBACK, the maximum number of look-back for
1581 automatic compositions (3) -- this is a limitation imposed by
1582 composition rules in composition-function-table, which see. If
1583 BACKLIM is negative, it stands for the beginning of STRING object:
1584 BEGV for a buffer or position zero for a string.
1585
1586 If LIMIT is positive, search for a composition forward (LIMIT >
1587 POS) or backward (LIMIT < POS). In this case, LIMIT bounds the
1588 search for the first character of a composed sequence.
1589 (LIMIT == POS is the same as LIMIT < 0.) If LIMIT > POS, the
1590 function can find a composition that starts after POS.
1591
1592 BACKLIM limits how far back is the function allowed to look in
1593 STRING object while trying to find a position where it is safe to
1594 start searching forward for compositions. Such a safe place is
1595 generally the position after a character that can never be
1596 composed.
1597
1598 If BACKLIM is negative, that means the first character position of
1599 STRING object; this is useful when calling the function for the
1600 first time for a given buffer or string, since it is possible that
1601 a composition begins before POS. However, if POS is very far from
1602 the beginning of STRING object, a negative value of BACKLIM could
1603 make the function slow. For that reason, when STRING is a buffer
1604 or nil, we restrict the search back to the first newline before
1605 POS. Also, in this case the function may return START and END that
1606 do not include POS, something that is not necessarily wanted, and
1607 needs to be explicitly checked by the caller.
1608
1609 When calling the function in a loop for the same buffer/string, the
1610 caller should generally set BACKLIM equal to POS, to avoid costly
1611 repeated searches backward. This is because if the previous
1612 positions were already checked for compositions, there should be no
1613 reason to re-check them.
1614
1615 If BACKLIM is positive, it must be less or equal to LIMIT.
1616
1617 If an automatic composition satisfying the above conditions is
1618 found, set *GSTRING to the Lispy glyph-string representing the
1619 composition, set *START and *END to the start and end of the
1620 composed sequence, and return TRUE. Otherwise, set *GSTRING to
1621 nil, and return FALSE. */
1622
1623 bool
1624 find_automatic_composition (ptrdiff_t pos, ptrdiff_t limit, ptrdiff_t backlim,
1625 ptrdiff_t *start, ptrdiff_t *end,
1626 Lisp_Object *gstring, Lisp_Object string)
1627 {
1628 ptrdiff_t head, tail, stop;
1629 /* Forward limit position of checking a composition taking a
1630 looking-back count into account. */
1631 ptrdiff_t fore_check_limit;
1632 struct position_record cur, prev;
1633 int c;
1634 Lisp_Object window;
1635 struct window *w;
1636 bool need_adjustment = 0;
1637
1638 window = Fget_buffer_window (Fcurrent_buffer (), Qnil);
1639 if (NILP (window))
1640 return 0;
1641 w = XWINDOW (window);
1642
1643 cur.pos = pos;
1644 if (NILP (string))
1645 {
1646 if (backlim < 0)
1647 {
1648 /* This assumes a newline can never be composed. */
1649 head = find_newline (pos, -1, 0, -1, -1, NULL, NULL, false);
1650 }
1651 else
1652 head = backlim;
1653 if (current_buffer->long_line_optimizations_p)
1654 {
1655 /* In buffers with very long lines, this function becomes very
1656 slow. Pretend that the buffer is narrowed to make it fast. */
1657 ptrdiff_t begv = get_small_narrowing_begv (w, window_point (w));
1658 if (pos > begv)
1659 head = begv;
1660 }
1661 tail = ZV;
1662 stop = GPT;
1663 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1664 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1665 }
1666 else
1667 {
1668 head = backlim < 0 ? 0 : backlim, tail = SCHARS (string), stop = -1;
1669 cur.pos_byte = string_char_to_byte (string, cur.pos);
1670 cur.p = SDATA (string) + cur.pos_byte;
1671 }
1672 if (limit < 0)
1673 /* Finding a composition covering the character after POS is the
1674 same as setting LIMIT to POS. */
1675 limit = pos;
1676
1677 eassert (backlim < 0 || backlim <= limit);
1678
1679 if (limit <= pos)
1680 fore_check_limit = min (tail, pos + 1 + MAX_AUTO_COMPOSITION_LOOKBACK);
1681 else
1682 fore_check_limit = min (tail, limit + MAX_AUTO_COMPOSITION_LOOKBACK);
1683
1684 /* Provided that we have these possible compositions now:
1685
1686 POS: 1 2 3 4 5 6 7 8 9
1687 |-A-|
1688 |-B-|-C-|--D--|
1689
1690 Here, it is known that characters after positions 1 and 9 can
1691 never be composed (i.e. ! char_composable_p (CH)), and
1692 composition A is an invalid one because it's partially covered by
1693 the valid composition C. And to know whether a composition is
1694 valid or not, the only way is to start searching forward from a
1695 position that can not be a tail part of composition (it's 2 in
1696 the above case).
1697
1698 Now we have these cases (1 through 4):
1699
1700 -- character after POS is ... --
1701 not composable composable
1702 LIMIT <= POS (1) (3)
1703 POS < LIMIT (2) (4)
1704
1705 Among them, in case (2), we simply search forward from POS.
1706
1707 In the other cases, we at first rewind back to the position where
1708 the previous character is not composable or the beginning of
1709 buffer (string), then search compositions forward. In case (1)
1710 and (3) we repeat this process until a composition is found. */
1711
1712 while (1)
1713 {
1714 c = STRING_CHAR (cur.p);
1715 if (! char_composable_p (c))
1716 {
1717 if (limit <= pos) /* case (1) */
1718 {
1719 do {
1720 if (cur.pos <= limit)
1721 return 0;
1722 BACKWARD_CHAR (cur, stop);
1723 c = STRING_CHAR (cur.p);
1724 } while (! char_composable_p (c));
1725 fore_check_limit = cur.pos + 1;
1726 }
1727 else /* case (2) */
1728 /* No need of rewinding back. */
1729 goto search_forward;
1730 }
1731
1732 /* Rewind back to the position where we can safely search
1733 forward for compositions. It is assured that the character
1734 at cur.pos is composable. */
1735 while (head < cur.pos)
1736 {
1737 prev = cur;
1738 BACKWARD_CHAR (cur, stop);
1739 c = STRING_CHAR (cur.p);
1740 if (! char_composable_p (c))
1741 {
1742 cur = prev;
1743 break;
1744 }
1745 }
1746
1747 search_forward:
1748 /* Now search forward. */
1749 *gstring = Qnil;
1750 prev = cur; /* remember the start of searching position. */
1751 while (cur.pos < fore_check_limit)
1752 {
1753 Lisp_Object val;
1754
1755 c = STRING_CHAR (cur.p);
1756 for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1757 CONSP (val); val = XCDR (val))
1758 {
1759 Lisp_Object elt = XCAR (val);
1760
1761 if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
1762 {
1763 EMACS_INT check_pos = cur.pos - XFIXNAT (AREF (elt, 1));
1764 struct position_record check;
1765
1766 if (check_pos < head
1767 || (limit <= pos ? pos < check_pos
1768 : limit <= check_pos))
1769 continue;
1770 for (check = cur; check_pos < check.pos; )
1771 BACKWARD_CHAR (check, stop);
1772 *gstring = autocmp_chars (elt, check.pos, check.pos_byte,
1773 tail, w, NULL, string, Qnil, c);
1774 need_adjustment = 1;
1775 if (NILP (*gstring))
1776 {
1777 /* As we have called Lisp, there's a possibility
1778 that buffer/string is relocated. */
1779 if (NILP (string))
1780 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1781 else
1782 cur.p = SDATA (string) + cur.pos_byte;
1783 }
1784 else
1785 {
1786 /* We found a candidate of a target composition. */
1787 *start = check.pos;
1788 *end = check.pos + LGSTRING_CHAR_LEN (*gstring);
1789 if (pos < limit
1790 ? pos < *end
1791 : *start <= pos && pos < *end)
1792 /* This is the target composition. */
1793 return 1;
1794 cur.pos = *end;
1795 if (NILP (string))
1796 {
1797 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1798 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1799 }
1800 else
1801 {
1802 cur.pos_byte = string_char_to_byte (string, cur.pos);
1803 cur.p = SDATA (string) + cur.pos_byte;
1804 }
1805 break;
1806 }
1807 }
1808 }
1809 if (! CONSP (val))
1810 /* We found no composition here. */
1811 FORWARD_CHAR (cur, stop);
1812 }
1813
1814 if (pos < limit) /* case (2) and (4)*/
1815 return 0;
1816 if (! NILP (*gstring))
1817 return 1;
1818 if (prev.pos == head)
1819 return 0;
1820 cur = prev;
1821 if (need_adjustment)
1822 {
1823 if (NILP (string))
1824 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1825 else
1826 cur.p = SDATA (string) + cur.pos_byte;
1827 }
1828 BACKWARD_CHAR (cur, stop);
1829 }
1830 }
1831
1832 /* Return the adjusted point provided that point is moved from LAST_PT
1833 to NEW_PT. */
1834
1835 ptrdiff_t
1836 composition_adjust_point (ptrdiff_t last_pt, ptrdiff_t new_pt)
1837 {
1838 ptrdiff_t i, beg, end;
1839 Lisp_Object val;
1840
1841 if (new_pt == BEGV || new_pt == ZV)
1842 return new_pt;
1843
1844 /* At first check the static composition. */
1845 if (get_property_and_range (new_pt, Qcomposition, &val, &beg, &end, Qnil)
1846 && composition_valid_p (beg, end, val))
1847 {
1848 if (beg < new_pt /* && end > new_pt <- It's always the case. */
1849 && (last_pt <= beg || last_pt >= end))
1850 return (new_pt < last_pt ? beg : end);
1851 return new_pt;
1852 }
1853
1854 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
1855 || inhibit_auto_composition ())
1856 return new_pt;
1857
1858 /* Next check the automatic composition. */
1859 if (! find_automatic_composition (new_pt, (ptrdiff_t) -1, (ptrdiff_t) -1,
1860 &beg, &end, &val, Qnil)
1861 || beg == new_pt)
1862 return new_pt;
1863 for (i = 0; i < LGSTRING_GLYPH_LEN (val); i++)
1864 {
1865 Lisp_Object glyph = LGSTRING_GLYPH (val, i);
1866
1867 if (NILP (glyph))
1868 break;
1869 if (beg + LGLYPH_FROM (glyph) == new_pt)
1870 return new_pt;
1871 if (beg + LGLYPH_TO (glyph) >= new_pt)
1872 return (new_pt < last_pt
1873 ? beg + LGLYPH_FROM (glyph)
1874 : beg + LGLYPH_TO (glyph) + 1);
1875 }
1876 return new_pt;
1877 }
1878
1879 DEFUN ("composition-get-gstring", Fcomposition_get_gstring,
1880 Scomposition_get_gstring, 4, 4, 0,
1881 doc: /* Return a glyph-string for characters between FROM and TO.
1882 If the glyph string is for graphic display, FONT-OBJECT must be
1883 a font-object to use for those characters.
1884 Otherwise (for terminal display), FONT-OBJECT must be a terminal ID, a
1885 frame, or nil for the selected frame's terminal device.
1886
1887 If the optional 4th argument STRING is not nil, it is a string
1888 containing the target characters between indices FROM and TO,
1889 which are treated as in `substring'. Otherwise FROM and TO are
1890 character positions in current buffer; they can be in either order,
1891 and can be integers or markers.
1892
1893 A glyph-string is a vector containing information about how to display
1894 a specific character sequence. The format is:
1895 [HEADER ID GLYPH ...]
1896
1897 HEADER is a vector of this form:
1898 [FONT-OBJECT CHAR ...]
1899 where
1900 FONT-OBJECT is a font-object for all glyphs in the glyph-string,
1901 or the terminal coding system of the specified terminal.
1902 CHARs are characters to be composed by GLYPHs.
1903
1904 ID is an identification number of the glyph-string. It may be nil if
1905 not yet shaped.
1906
1907 GLYPH is a vector whose elements have this form:
1908 [ FROM-IDX TO-IDX C CODE WIDTH LBEARING RBEARING ASCENT DESCENT
1909 [ [X-OFF Y-OFF WADJUST] | nil] ]
1910 where
1911 FROM-IDX and TO-IDX are used internally and should not be touched.
1912 C is the character of the glyph.
1913 CODE is the glyph-code of C in FONT-OBJECT.
1914 WIDTH thru DESCENT are the metrics (in pixels) of the glyph.
1915 X-OFF and Y-OFF are offsets to the base position for the glyph.
1916 WADJUST is the adjustment to the normal width of the glyph.
1917
1918 If GLYPH is nil, the remaining elements of the glyph-string vector
1919 should be ignored. */)
1920 (Lisp_Object from, Lisp_Object to, Lisp_Object font_object, Lisp_Object string)
1921 {
1922 Lisp_Object gstring, header;
1923 ptrdiff_t frompos, frombyte, topos;
1924
1925 if (! FONT_OBJECT_P (font_object))
1926 {
1927 struct coding_system *coding;
1928 struct terminal *terminal = decode_live_terminal (font_object);
1929
1930 coding = ((TERMINAL_TERMINAL_CODING (terminal)->common_flags
1931 & CODING_REQUIRE_ENCODING_MASK)
1932 ? TERMINAL_TERMINAL_CODING (terminal) : &safe_terminal_coding);
1933 font_object = CODING_ID_NAME (coding->id);
1934 }
1935
1936 if (NILP (string))
1937 {
1938 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
1939 error ("Attempt to shape unibyte text");
1940 validate_region (&from, &to);
1941 frompos = XFIXNAT (from);
1942 topos = XFIXNAT (to);
1943 frombyte = CHAR_TO_BYTE (frompos);
1944 }
1945 else
1946 {
1947 CHECK_STRING (string);
1948 ptrdiff_t chars = SCHARS (string);
1949 validate_subarray (string, from, to, chars, &frompos, &topos);
1950 if (! STRING_MULTIBYTE (string))
1951 {
1952 ptrdiff_t i;
1953
1954 for (i = SBYTES (string) - 1; i >= 0; i--)
1955 if (!ASCII_CHAR_P (SREF (string, i)))
1956 error ("Attempt to shape unibyte text");
1957 /* STRING is a pure-ASCII string, so we can convert it (or,
1958 rather, its copy) to multibyte and use that thereafter. */
1959 /* FIXME: Not clear why we need to do that: AFAICT the rest of
1960 the code should work on an ASCII-only unibyte string just
1961 as well (bug#56347). */
1962 string = make_multibyte_string (SSDATA (string), chars, chars);
1963 }
1964 frombyte = string_char_to_byte (string, frompos);
1965 }
1966
1967 header = fill_gstring_header (frompos, frombyte,
1968 topos, font_object, string);
1969 gstring = composition_gstring_lookup_cache (header);
1970 if (! NILP (gstring))
1971 return gstring;
1972
1973 if (LGSTRING_GLYPH_LEN (gstring_work) < topos - frompos)
1974 gstring_work = make_nil_vector (topos - frompos + 2);
1975 LGSTRING_SET_HEADER (gstring_work, header);
1976 LGSTRING_SET_ID (gstring_work, Qnil);
1977 fill_gstring_body (gstring_work);
1978 return gstring_work;
1979 }
1980
1981
1982 /* Emacs Lisp APIs. */
1983
1984 DEFUN ("compose-region-internal", Fcompose_region_internal,
1985 Scompose_region_internal, 2, 4, 0,
1986 doc: /* Internal use only.
1987
1988 Compose text in the region between START and END.
1989 Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
1990 for the composition. See `compose-region' for more details. */)
1991 (Lisp_Object start, Lisp_Object end, Lisp_Object components, Lisp_Object modification_func)
1992 {
1993 validate_region (&start, &end);
1994 if (!NILP (components)
1995 && !FIXNUMP (components)
1996 && !CONSP (components)
1997 && !STRINGP (components))
1998 CHECK_VECTOR (components);
1999
2000 compose_text (XFIXNUM (start), XFIXNUM (end), components, modification_func, Qnil);
2001 return Qnil;
2002 }
2003
2004 DEFUN ("compose-string-internal", Fcompose_string_internal,
2005 Scompose_string_internal, 3, 5, 0,
2006 doc: /* Internal use only.
2007
2008 Compose text between indices START and END of STRING, where
2009 START and END are treated as in `substring'. Optional 4th
2010 and 5th arguments are COMPONENTS and MODIFICATION-FUNC
2011 for the composition. See `compose-string' for more details. */)
2012 (Lisp_Object string, Lisp_Object start, Lisp_Object end,
2013 Lisp_Object components, Lisp_Object modification_func)
2014 {
2015 ptrdiff_t from, to;
2016
2017 CHECK_STRING (string);
2018 validate_subarray (string, start, end, SCHARS (string), &from, &to);
2019 compose_text (from, to, components, modification_func, string);
2020 return string;
2021 }
2022
2023 DEFUN ("find-composition-internal", Ffind_composition_internal,
2024 Sfind_composition_internal, 4, 4, 0,
2025 doc: /* Internal use only.
2026
2027 Return information about composition at or nearest to position POS.
2028 See `find-composition' for more details. */)
2029 (Lisp_Object pos, Lisp_Object limit, Lisp_Object string, Lisp_Object detail_p)
2030 {
2031 Lisp_Object prop, tail, gstring;
2032 ptrdiff_t start, end, from, to;
2033 int id;
2034
2035 EMACS_INT fixed_pos = fix_position (pos);
2036 if (!NILP (limit))
2037 to = clip_to_bounds (PTRDIFF_MIN, fix_position (limit), ZV);
2038 else
2039 to = -1;
2040
2041 if (!NILP (string))
2042 {
2043 CHECK_STRING (string);
2044 if (! (0 <= fixed_pos && fixed_pos <= SCHARS (string)))
2045 args_out_of_range (string, pos);
2046 }
2047 else
2048 {
2049 if (! (BEGV <= fixed_pos && fixed_pos <= ZV))
2050 args_out_of_range (Fcurrent_buffer (), pos);
2051 }
2052 from = fixed_pos;
2053
2054 if (!find_composition (from, to, &start, &end, &prop, string))
2055 {
2056 if (((NILP (string)
2057 && !NILP (BVAR (current_buffer, enable_multibyte_characters)))
2058 || (!NILP (string) && STRING_MULTIBYTE (string)))
2059 && ! inhibit_auto_composition ()
2060 && find_automatic_composition (from, to, (ptrdiff_t) -1,
2061 &start, &end, &gstring, string))
2062 return list3 (make_fixnum (start), make_fixnum (end), gstring);
2063 return Qnil;
2064 }
2065 if (! (start <= fixed_pos && fixed_pos < end))
2066 {
2067 ptrdiff_t s, e;
2068
2069 if (find_automatic_composition (from, to, (ptrdiff_t) -1,
2070 &s, &e, &gstring, string)
2071 && (e <= fixed_pos ? e > end : s < start))
2072 return list3 (make_fixnum (s), make_fixnum (e), gstring);
2073 }
2074 if (!composition_valid_p (start, end, prop))
2075 return list3 (make_fixnum (start), make_fixnum (end), Qnil);
2076 if (NILP (detail_p))
2077 return list3 (make_fixnum (start), make_fixnum (end), Qt);
2078
2079 if (composition_registered_p (prop))
2080 id = COMPOSITION_ID (prop);
2081 else
2082 {
2083 ptrdiff_t start_byte = (NILP (string)
2084 ? CHAR_TO_BYTE (start)
2085 : string_char_to_byte (string, start));
2086 id = get_composition_id (start, start_byte, end - start, prop, string);
2087 }
2088
2089 if (id >= 0)
2090 {
2091 Lisp_Object components, relative_p, mod_func;
2092 enum composition_method method = composition_method (prop);
2093 int width = composition_table[id]->width;
2094
2095 components = Fcopy_sequence (COMPOSITION_COMPONENTS (prop));
2096 relative_p = (method == COMPOSITION_WITH_RULE_ALTCHARS
2097 ? Qnil : Qt);
2098 mod_func = COMPOSITION_MODIFICATION_FUNC (prop);
2099 tail = list4 (components, relative_p, mod_func, make_fixnum (width));
2100 }
2101 else
2102 tail = Qnil;
2103
2104 return Fcons (make_fixnum (start), Fcons (make_fixnum (end), tail));
2105 }
2106
2107 static int
2108 compare_composition_rules (const void *r1, const void *r2)
2109 {
2110 Lisp_Object vec1 = *(Lisp_Object *)r1, vec2 = *(Lisp_Object *)r2;
2111
2112 return XFIXNAT (AREF (vec2, 1)) - XFIXNAT (AREF (vec1, 1));
2113 }
2114
2115 DEFUN ("composition-sort-rules", Fcomposition_sort_rules,
2116 Scomposition_sort_rules, 1, 1, 0,
2117 doc: /* Sort composition RULES by their LOOKBACK parameter.
2118
2119 If RULES include just one rule, return RULES.
2120 Otherwise, return a new list of rules where all the rules are
2121 arranged in decreasing order of the LOOKBACK parameter of the
2122 rules (the second element of the rule's vector). This is required
2123 when combining composition rules from different sources, because
2124 of the way buffer text is examined for matching one of the rules. */)
2125 (Lisp_Object rules)
2126 {
2127 ptrdiff_t nrules;
2128 USE_SAFE_ALLOCA;
2129
2130 CHECK_LIST (rules);
2131 nrules = list_length (rules);
2132 if (nrules > 1)
2133 {
2134 ptrdiff_t i;
2135 Lisp_Object *sortvec;
2136
2137 SAFE_NALLOCA (sortvec, 1, nrules);
2138 for (i = 0; i < nrules; i++)
2139 {
2140 Lisp_Object elt = XCAR (rules);
2141 if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
2142 sortvec[i] = elt;
2143 else
2144 error ("Invalid composition rule in RULES argument");
2145 rules = XCDR (rules);
2146 }
2147 qsort (sortvec, nrules, sizeof (Lisp_Object), compare_composition_rules);
2148 rules = Flist (nrules, sortvec);
2149 SAFE_FREE ();
2150 }
2151
2152 return rules;
2153 }
2154
2155
2156 void
2157 syms_of_composite (void)
2158 {
2159 int i;
2160
2161 DEFSYM (Qcomposition, "composition");
2162
2163 /* Make a hash table for static composition. */
2164 /* We used to make the hash table weak so that unreferenced
2165 compositions can be garbage-collected. But, usually once
2166 created compositions are repeatedly used in an Emacs session,
2167 and thus it's not worth to save memory in such a way. So, we
2168 make the table not weak. */
2169 Lisp_Object args[] = {QCtest, Qequal, QCsize, make_fixnum (311)};
2170 composition_hash_table = CALLMANY (Fmake_hash_table, args);
2171 staticpro (&composition_hash_table);
2172
2173 /* Make a hash table for glyph-string. */
2174 gstring_hash_table = CALLMANY (Fmake_hash_table, args);
2175 staticpro (&gstring_hash_table);
2176
2177 staticpro (&gstring_work_headers);
2178 gstring_work_headers = make_nil_vector (8);
2179 for (i = 0; i < 8; i++)
2180 ASET (gstring_work_headers, i, make_nil_vector (i + 2));
2181 staticpro (&gstring_work);
2182 gstring_work = make_nil_vector (10);
2183
2184 /* Text property `composition' should be nonsticky by default. */
2185 Vtext_property_default_nonsticky
2186 = Fcons (Fcons (Qcomposition, Qt), Vtext_property_default_nonsticky);
2187
2188 DEFVAR_LISP ("compose-chars-after-function", Vcompose_chars_after_function,
2189 doc: /* Function to adjust composition of buffer text.
2190
2191 This function is called with three arguments: FROM, TO, and OBJECT.
2192 FROM and TO specify the range of text whose composition should be
2193 adjusted. OBJECT, if non-nil, is a string that contains the text.
2194
2195 This function is called after a text with `composition' property is
2196 inserted or deleted to keep `composition' property of buffer text
2197 valid.
2198
2199 The default value is the function `compose-chars-after'. */);
2200 Vcompose_chars_after_function = intern_c_string ("compose-chars-after");
2201
2202 DEFSYM (Qauto_composed, "auto-composed");
2203
2204 DEFVAR_LISP ("auto-composition-mode", Vauto_composition_mode,
2205 doc: /* Non-nil if Auto-Composition mode is enabled.
2206 Use the command `auto-composition-mode' to change this variable.
2207
2208 If this variable is a string, `auto-composition-mode' will be disabled in
2209 buffers displayed on a terminal whose type, as reported by `tty-type',
2210 compares equal to that string. */);
2211 Vauto_composition_mode = Qt;
2212
2213 DEFVAR_LISP ("auto-composition-function", Vauto_composition_function,
2214 doc: /* Function to call to compose characters automatically.
2215 This function is called from the display engine with 6 arguments:
2216 FUNC, FROM, TO, FONT-OBJECT, STRING, and DIRECTION.
2217
2218 FUNC is the function to compose characters. On text-mode display,
2219 FUNC is ignored and `compose-gstring-for-terminal' is used instead.
2220
2221 If STRING is nil, the function must compose characters in the region
2222 between FROM and TO in the current buffer.
2223
2224 Otherwise, STRING is a string, and FROM and TO are indices into the
2225 string. In this case, the function must compose characters in the
2226 string.
2227
2228 FONT-OBJECT is the font to use, or nil if characters are to be
2229 composed on a text-mode display.
2230
2231 DIRECTION is the bidi directionality of the text to shape. It could
2232 be L2R or R2L, or nil if unknown. */);
2233 Vauto_composition_function = Qnil;
2234
2235 DEFVAR_LISP ("composition-function-table", Vcomposition_function_table,
2236 doc: /* Char-table of functions for automatic character composition.
2237 For each character that has to be composed automatically with
2238 preceding and/or following characters, this char-table contains
2239 a function to call to compose that character.
2240
2241 The element at index C in the table, if non-nil, is a list of
2242 composition rules of the form ([PATTERN PREV-CHARS FUNC] ...);
2243 the rules must be specified in the descending order of PREV-CHARS
2244 values.
2245
2246 PATTERN is a regular expression which C and the surrounding
2247 characters must match.
2248
2249 PREV-CHARS is a non-negative integer (less than 4) specifying how many
2250 characters before C to check the matching with PATTERN. If it is 0,
2251 PATTERN must match C and the following characters. If it is 1,
2252 PATTERN must match a character before C and the following characters.
2253
2254 If PREV-CHARS is 0, PATTERN can be nil, which means that the
2255 single character C should be composed.
2256
2257 FUNC is a function to return a glyph-string representing a
2258 composition of the characters that match PATTERN. It is
2259 called with one argument GSTRING.
2260
2261 GSTRING is a template of a glyph-string to return. It is already
2262 filled with a proper header for the characters to compose, and
2263 glyphs corresponding to those characters one by one. The
2264 function must return a new glyph-string with the same header as
2265 GSTRING, or modify GSTRING itself and return it.
2266
2267 See also the documentation of `auto-composition-mode'. */);
2268 Vcomposition_function_table = Fmake_char_table (Qnil, Qnil);
2269
2270 DEFVAR_LISP ("auto-composition-emoji-eligible-codepoints", Vauto_composition_emoji_eligible_codepoints,
2271 doc: /* List of codepoints for which auto-composition will check for an emoji font.
2272
2273 These are codepoints which have Emoji_Presentation = No, and thus by
2274 default are not displayed as emoji. In certain circumstances, such as
2275 when followed by U+FE0F (VS-16) the emoji font should be used for
2276 them anyway.
2277
2278 This list is auto-generated, you should not need to modify it. */);
2279 Vauto_composition_emoji_eligible_codepoints = Qnil;
2280
2281 defsubr (&Scompose_region_internal);
2282 defsubr (&Scompose_string_internal);
2283 defsubr (&Sfind_composition_internal);
2284 defsubr (&Scomposition_get_gstring);
2285 defsubr (&Sclear_composition_cache);
2286 defsubr (&Scomposition_sort_rules);
2287 }