1 /* Composite sequence support.
2 Copyright (C) 2001-2023 Free Software Foundation, Inc.
3 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6 Copyright (C) 2003, 2006
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
9
10 This file is part of GNU Emacs.
11
12 GNU Emacs is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or (at
15 your option) any later version.
16
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
24
25 #include <config.h>
26
27 #include <stdlib.h> /* for qsort */
28
29 #include "lisp.h"
30 #include "character.h"
31 #include "composite.h"
32 #include "buffer.h"
33 #include "coding.h"
34 #include "intervals.h"
35 #include "frame.h"
36 #include "dispextern.h"
37 #include "termhooks.h"
38
39
40 /* Emacs uses special text property `composition' to support character
41 composition. A sequence of characters that have the same (i.e. eq)
42 `composition' property value is treated as a single composite
43 sequence (we call it just `composition' here after). Characters in
44 a composition are all composed somehow on the screen.
45
46 The property value has this form when the composition is made:
47 ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
48 then turns to this form:
49 (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
50 when the composition is registered in composition_hash_table and
51 composition_table. These rather peculiar structures were designed
52 to make it easy to distinguish them quickly (we can do that by
53 checking only the first element) and to extract LENGTH (from the
54 former form) and COMPOSITION-ID (from the latter form).
55
56 We register a composition when it is displayed, or when the width
57 is required (for instance, to calculate columns).
58
59 LENGTH -- Length of the composition. This information is used to
60 check the validity of the composition.
61
62 COMPONENTS -- Character, string, vector, list, or nil.
63
64 If it is nil, characters in the text are composed relatively
65 according to their metrics in font glyphs.
66
67 If it is a character or a string, the character or characters
68 in the string are composed relatively.
69
70 If it is a vector or list of integers, the element is a
71 character or an encoded composition rule. The characters are
72 composed according to the rules. (2N)th elements are
73 characters to be composed and (2N+1)th elements are
74 composition rules to tell how to compose (2N+2)th element with
75 the previously composed 2N glyphs.
76
77 COMPONENTS-VEC -- Vector of integers. In a relative composition,
78 the elements are the characters to be composed. In a rule-base
79 composition, the elements are characters or encoded
80 composition rules.
81
82 MODIFICATION-FUNC -- If non-nil, it is a function to call when the
83 composition gets invalid after a modification in a buffer. If
84 it is nil, a function in `composition-function-table' of the
85 first character in the sequence is called.
86
87 COMPOSITION-ID --Identification number of the composition. It is
88 used as an index to composition_table for the composition.
89
90 When Emacs has to display a composition or has to know its
91 displaying width, the function get_composition_id is called. It
92 returns COMPOSITION-ID so that the caller can access the
93 information about the composition through composition_table. If a
94 COMPOSITION-ID has not yet been assigned to the composition,
95 get_composition_id checks the validity of `composition' property,
96 and, if valid, assigns a new ID, registers the information in
97 composition_hash_table and composition_table, and changes the form
98 of the property value. If the property is invalid,
99 get_composition_id returns -1 without changing the property value.
100
101 We use two tables to keep the information about composition;
102 composition_hash_table and composition_table.
103
104 The former is a hash table whose keys are COMPONENTS-VECs and
105 values are the corresponding COMPOSITION-IDs. This hash table is
106 weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
107 `composition' property, it won't be collected as garbage until all
108 bits of text that have the same COMPONENTS-VEC are deleted.
109
110 The latter is a table of pointers to `struct composition' indexed
111 by COMPOSITION-ID. This structure keeps the other information (see
112 composite.h).
113
114 In general, a text property holds information about individual
115 characters. But, a `composition' property holds information about
116 a sequence of characters (in this sense, it is like the `intangible'
117 property). That means that we should not share the property value
118 in adjacent compositions -- we can't distinguish them if they have the
119 same property. So, after any changes, we call
120 `update_compositions' and change a property of one of adjacent
121 compositions to a copy of it. This function also runs a proper
122 composition modification function to make a composition that gets
123 invalid by the change valid again.
124
125 As the value of the `composition' property holds information about a
126 specific range of text, the value gets invalid if we change the
127 text in the range. We treat the `composition' property as always
128 rear-nonsticky (currently by setting default-text-properties to
129 (rear-nonsticky (composition))) and we never make properties of
130 adjacent compositions identical. Thus, any such changes make the
131 range just shorter. So, we can check the validity of the `composition'
132 property by comparing LENGTH information with the actual length of
133 the composition.
134
135 */
136
137
138 /* Table of pointers to the structure `composition' indexed by
139 COMPOSITION-ID. This structure is for storing information about
140 each composition except for COMPONENTS-VEC. */
141 struct composition **composition_table;
142
143 /* The current size of `composition_table'. */
144 static ptrdiff_t composition_table_size;
145
146 /* Number of compositions currently made. */
147 ptrdiff_t n_compositions;
148
149 /* Hash table for compositions. The key is COMPONENTS-VEC of
150 `composition' property. The value is the corresponding
151 COMPOSITION-ID. */
152 Lisp_Object composition_hash_table;
153
154 /* Maximum number of characters to look back for
155 auto-compositions. */
156 #define MAX_AUTO_COMPOSITION_LOOKBACK 3
157
158 /* Return COMPOSITION-ID of a composition at buffer position
159 CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
160 the sequence is PROP. STRING, if non-nil, is a string that
161 contains the composition instead of the current buffer.
162
163 If the composition is invalid, return -1. */
164
165 ptrdiff_t
166 get_composition_id (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t nchars,
167 Lisp_Object prop, Lisp_Object string)
168 {
169 Lisp_Object id, length, components, key, *key_contents, hash_code;
170 ptrdiff_t glyph_len;
171 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table);
172 ptrdiff_t hash_index;
173 enum composition_method method;
174 struct composition *cmp;
175 int ch;
176
177 /* Maximum length of a string of glyphs. XftGlyphExtents limits
178 this to INT_MAX, and Emacs limits it further. Divide INT_MAX - 1
179 by 2 because gui_produce_glyphs computes glyph_len * 2 + 1. Divide
180 the size by MAX_MULTIBYTE_LENGTH because encode_terminal_code
181 multiplies glyph_len by MAX_MULTIBYTE_LENGTH. */
182 enum {
183 GLYPH_LEN_MAX = min ((INT_MAX - 1) / 2,
184 min (PTRDIFF_MAX, SIZE_MAX) / MAX_MULTIBYTE_LENGTH)
185 };
186
187 /* PROP should be
188 Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
189 or
190 Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
191 */
192 if (nchars == 0 || !CONSP (prop))
193 goto invalid_composition;
194
195 id = XCAR (prop);
196 if (FIXNUMP (id))
197 {
198 /* PROP should be Form-B. */
199 if (XFIXNUM (id) < 0 || XFIXNUM (id) >= n_compositions)
200 goto invalid_composition;
201 return XFIXNUM (id);
202 }
203
204 /* PROP should be Form-A.
205 Thus, ID should be (LENGTH . COMPONENTS). */
206 if (!CONSP (id))
207 goto invalid_composition;
208 length = XCAR (id);
209 if (!FIXNUMP (length) || XFIXNUM (length) != nchars)
210 goto invalid_composition;
211
212 components = XCDR (id);
213
214 /* Check if the same composition has already been registered or not
215 by consulting composition_hash_table. The key for this table is
216 COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
217 nil, vector of characters in the composition range. */
218 if (FIXNUMP (components))
219 key = make_vector (1, components);
220 else if (STRINGP (components) || CONSP (components))
221 key = Fvconcat (1, &components);
222 else if (VECTORP (components))
223 key = components;
224 else if (NILP (components))
225 {
226 key = make_uninit_vector (nchars);
227 if (STRINGP (string))
228 for (ptrdiff_t i = 0; i < nchars; i++)
229 {
230 ch = fetch_string_char_advance (string, &charpos, &bytepos);
231 ASET (key, i, make_fixnum (ch));
232 }
233 else
234 for (ptrdiff_t i = 0; i < nchars; i++)
235 {
236 ch = fetch_char_advance (&charpos, &bytepos);
237 ASET (key, i, make_fixnum (ch));
238 }
239 }
240 else
241 goto invalid_composition;
242
243 hash_index = hash_lookup (hash_table, key, &hash_code);
244 if (hash_index >= 0)
245 {
246 /* We have already registered the same composition. Change PROP
247 from Form-A above to Form-B while replacing COMPONENTS with
248 COMPONENTS-VEC stored in the hash table. We can directly
249 modify the cons cell of PROP because it is not shared. */
250 key = HASH_KEY (hash_table, hash_index);
251 id = HASH_VALUE (hash_table, hash_index);
252 XSETCAR (prop, id);
253 XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
254 return XFIXNUM (id);
255 }
256
257 /* This composition is a new one. We must register it. */
258
259 /* Check if we have sufficient memory to store this information. */
260 if (composition_table_size <= n_compositions)
261 composition_table = xpalloc (composition_table, &composition_table_size,
262 1, -1, sizeof *composition_table);
263
264 key_contents = XVECTOR (key)->contents;
265
266 /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
267 vector or a list. It should be a sequence of:
268 char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
269
270 if (VECTORP (components)
271 && ASIZE (components) >= 2
272 && VECTORP (AREF (components, 0)))
273 {
274 /* COMPONENTS is a glyph-string. */
275 ptrdiff_t len = ASIZE (key);
276
277 for (ptrdiff_t i = 1; i < len; i++)
278 if (! VECTORP (AREF (key, i)))
279 goto invalid_composition;
280 }
281 else if (VECTORP (components) || CONSP (components))
282 {
283 ptrdiff_t len = ASIZE (key);
284
285 /* The number of elements should be odd. */
286 if ((len % 2) == 0)
287 goto invalid_composition;
288 /* All elements should be integers (character or encoded
289 composition rule). */
290 for (ptrdiff_t i = 0; i < len; i++)
291 {
292 if (!FIXNUMP (key_contents[i]))
293 goto invalid_composition;
294 }
295 }
296
297 /* Change PROP from Form-A above to Form-B. We can directly modify
298 the cons cell of PROP because it is not shared. */
299 XSETFASTINT (id, n_compositions);
300 XSETCAR (prop, id);
301 XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
302
303 /* Register the composition in composition_hash_table. */
304 hash_index = hash_put (hash_table, key, id, hash_code);
305
306 method = (NILP (components)
307 ? COMPOSITION_RELATIVE
308 : ((FIXNUMP (components) || STRINGP (components))
309 ? COMPOSITION_WITH_ALTCHARS
310 : COMPOSITION_WITH_RULE_ALTCHARS));
311
312 glyph_len = (method == COMPOSITION_WITH_RULE_ALTCHARS
313 ? (ASIZE (key) + 1) / 2
314 : ASIZE (key));
315
316 if (GLYPH_LEN_MAX < glyph_len)
317 memory_full (SIZE_MAX);
318
319 /* Register the composition in composition_table. */
320 cmp = xmalloc (sizeof *cmp);
321
322 cmp->method = method;
323 cmp->hash_index = hash_index;
324 cmp->glyph_len = glyph_len;
325 cmp->offsets = xnmalloc (glyph_len, 2 * sizeof *cmp->offsets);
326 cmp->font = NULL;
327
328 if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
329 {
330 /* Relative composition. */
331 cmp->width = 0;
332 for (ptrdiff_t i = 0; i < glyph_len; i++)
333 {
334 int this_width;
335 ch = XFIXNUM (key_contents[i]);
336 /* TAB in a composition means display glyphs with padding
337 space on the left or right. */
338 this_width = (ch == '\t' ? 1 : CHARACTER_WIDTH (ch));
339 if (cmp->width < this_width)
340 cmp->width = this_width;
341 }
342 }
343 else
344 {
345 /* Rule-base composition. */
346 double leftmost = 0.0, rightmost;
347
348 ch = XFIXNUM (key_contents[0]);
349 rightmost = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
350
351 for (ptrdiff_t i = 1; i < glyph_len; i += 2)
352 {
353 int rule, gref, nref;
354 int this_width;
355 double this_left;
356
357 rule = XFIXNUM (key_contents[i]);
358 ch = XFIXNUM (key_contents[i + 1]);
359 this_width = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
360
361 /* A composition rule is specified by an integer value
362 that encodes global and new reference points (GREF and
363 NREF). GREF and NREF are specified by numbers as
364 below:
365 0---1---2 -- ascent
366 | |
367 | |
368 | |
369 9--10--11 -- center
370 | |
371 ---3---4---5--- baseline
372 | |
373 6---7---8 -- descent
374 */
375 COMPOSITION_DECODE_REFS (rule, gref, nref);
376 this_left = (leftmost
377 + (gref % 3) * (rightmost - leftmost) / 2.0
378 - (nref % 3) * this_width / 2.0);
379
380 if (this_left < leftmost)
381 leftmost = this_left;
382 if (this_left + this_width > rightmost)
383 rightmost = this_left + this_width;
384 }
385
386 cmp->width = rightmost - leftmost;
387 if (cmp->width < (rightmost - leftmost))
388 /* To get a ceiling integer value. */
389 cmp->width++;
390 }
391
392 composition_table[n_compositions] = cmp;
393
394 return n_compositions++;
395
396 invalid_composition:
397 /* Would it be better to remove this `composition' property? */
398 return -1;
399 }
400
401
402 /* Find a static composition at or nearest to position POS of OBJECT
403 (buffer or string).
404
405 OBJECT defaults to the current buffer. If there's a composition at
406 POS, set *START and *END to the start and end of the sequence,
407 *PROP to the `composition' property, and return 1.
408
409 If there's no composition at POS and LIMIT is negative, return 0.
410
411 Otherwise, search for a composition forward (LIMIT > POS) or
412 backward (LIMIT < POS). In this case, LIMIT bounds the search.
413
414 If a composition is found, set *START, *END, and *PROP as above,
415 and return 1, else return 0.
416
417 This doesn't check the validity of composition. */
418
419 bool
420 find_composition (ptrdiff_t pos, ptrdiff_t limit,
421 ptrdiff_t *start, ptrdiff_t *end,
422 Lisp_Object *prop, Lisp_Object object)
423 {
424 Lisp_Object val;
425
426 if (get_property_and_range (pos, Qcomposition, prop, start, end, object))
427 return 1;
428
429 if (limit < 0 || limit == pos)
430 return 0;
431
432 if (limit > pos) /* search forward */
433 {
434 val = Fnext_single_property_change (make_fixnum (pos), Qcomposition,
435 object, make_fixnum (limit));
436 pos = XFIXNUM (val);
437 if (pos == limit)
438 return 0;
439 }
440 else /* search backward */
441 {
442 if (get_property_and_range (pos - 1, Qcomposition, prop, start, end,
443 object))
444 return 1;
445 val = Fprevious_single_property_change (make_fixnum (pos), Qcomposition,
446 object, make_fixnum (limit));
447 pos = XFIXNUM (val);
448 if (pos == limit)
449 return 0;
450 pos--;
451 }
452 get_property_and_range (pos, Qcomposition, prop, start, end, object);
453 return 1;
454 }
455
456 /* Run a proper function to adjust the composition sitting between
457 FROM and TO with property PROP. */
458
459 static void
460 run_composition_function (ptrdiff_t from, ptrdiff_t to, Lisp_Object prop)
461 {
462 Lisp_Object func;
463 ptrdiff_t start, end;
464
465 func = COMPOSITION_MODIFICATION_FUNC (prop);
466 /* If an invalid composition precedes or follows, try to make them
467 valid too. */
468 if (from > BEGV
469 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
470 && !composition_valid_p (start, end, prop))
471 from = start;
472 if (to < ZV
473 && find_composition (to, -1, &start, &end, &prop, Qnil)
474 && !composition_valid_p (start, end, prop))
475 to = end;
476 if (!NILP (Ffboundp (func)))
477 call2 (func, make_fixnum (from), make_fixnum (to));
478 }
479
480 /* Make invalid compositions adjacent to or inside FROM and TO valid.
481 CHECK_MASK is bitwise `or' of mask bits defined by macros
482 CHECK_XXX (see the comment in composite.h).
483
484 It also resets the text-property `auto-composed' to a proper region
485 so that automatic character composition works correctly later while
486 displaying the region.
487
488 This function is called when a buffer text is changed. If the
489 change is deletion, FROM == TO. Otherwise, FROM < TO. */
490
491 void
492 update_compositions (ptrdiff_t from, ptrdiff_t to, int check_mask)
493 {
494 Lisp_Object prop;
495 ptrdiff_t start, end;
496 /* The beginning and end of the region to set the property
497 `auto-composed' to nil. */
498 ptrdiff_t min_pos = from, max_pos = to;
499
500 if (inhibit_modification_hooks)
501 return;
502
503 /* If FROM and TO are not in a valid range, do nothing. */
504 if (! (BEGV <= from && from <= to && to <= ZV))
505 return;
506
507 if (check_mask & CHECK_HEAD)
508 {
509 /* FROM should be at composition boundary. But, insertion or
510 deletion will make two compositions adjacent and
511 indistinguishable when they have same (eq) property. To
512 avoid it, in such a case, we change the property of the
513 latter to the copy of it. */
514 if (from > BEGV
515 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
516 && composition_valid_p (start, end, prop))
517 {
518 min_pos = start;
519 if (end > to)
520 max_pos = end;
521 if (from < end)
522 Fput_text_property (make_fixnum (from), make_fixnum (end),
523 Qcomposition,
524 Fcons (XCAR (prop), XCDR (prop)), Qnil);
525 run_composition_function (start, end, prop);
526 from = end;
527 }
528 else if (from < ZV
529 && find_composition (from, -1, &start, &from, &prop, Qnil)
530 && composition_valid_p (start, from, prop))
531 {
532 if (from > to)
533 max_pos = from;
534 run_composition_function (start, from, prop);
535 }
536 }
537
538 if (check_mask & CHECK_INSIDE)
539 {
540 /* In this case, we are sure that (check & CHECK_TAIL) is also
541 nonzero. Thus, here we should check only compositions before
542 (to - 1). */
543 while (from < to - 1
544 && find_composition (from, to, &start, &from, &prop, Qnil)
545 && composition_valid_p (start, from, prop)
546 && from < to - 1)
547 run_composition_function (start, from, prop);
548 }
549
550 if (check_mask & CHECK_TAIL)
551 {
552 if (from < to
553 && find_composition (to - 1, -1, &start, &end, &prop, Qnil)
554 && composition_valid_p (start, end, prop))
555 {
556 /* TO should be also at composition boundary. But,
557 insertion or deletion will make two compositions adjacent
558 and indistinguishable when they have same (eq) property.
559 To avoid it, in such a case, we change the property of
560 the former to the copy of it. */
561 if (to < end)
562 {
563 Fput_text_property (make_fixnum (start), make_fixnum (to),
564 Qcomposition,
565 Fcons (XCAR (prop), XCDR (prop)), Qnil);
566 max_pos = end;
567 }
568 run_composition_function (start, end, prop);
569 }
570 else if (to < ZV
571 && find_composition (to, -1, &start, &end, &prop, Qnil)
572 && composition_valid_p (start, end, prop))
573 {
574 run_composition_function (start, end, prop);
575 max_pos = end;
576 }
577 }
578 if (min_pos < max_pos)
579 {
580 specpdl_ref count = SPECPDL_INDEX ();
581
582 specbind (Qinhibit_read_only, Qt);
583 specbind (Qinhibit_modification_hooks, Qt);
584 specbind (Qinhibit_point_motion_hooks, Qt);
585 Fremove_list_of_text_properties (make_fixnum (min_pos),
586 make_fixnum (max_pos),
587 list1 (Qauto_composed), Qnil);
588 unbind_to (count, Qnil);
589 }
590 }
591
592
593 /* Modify composition property values in LIST destructively. LIST is
594 a list as returned from text_property_list. Change values to the
595 top-level copies of them so that none of them are `eq'. */
596
597 void
598 make_composition_value_copy (Lisp_Object list)
599 {
600 Lisp_Object plist, val;
601
602 for (; CONSP (list); list = XCDR (list))
603 {
604 plist = XCAR (XCDR (XCDR (XCAR (list))));
605 while (CONSP (plist) && CONSP (XCDR (plist)))
606 {
607 if (EQ (XCAR (plist), Qcomposition)
608 && (val = XCAR (XCDR (plist)), CONSP (val)))
609 XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val)));
610 plist = XCDR (XCDR (plist));
611 }
612 }
613 }
614
615
616 /* Make text in the region between START and END a composition that
617 has COMPONENTS and MODIFICATION-FUNC.
618
619 If STRING is non-nil, then operate on characters contained between
620 indices START and END in STRING. */
621
622 void
623 compose_text (ptrdiff_t start, ptrdiff_t end, Lisp_Object components,
624 Lisp_Object modification_func, Lisp_Object string)
625 {
626 Lisp_Object prop;
627
628 prop = Fcons (Fcons (make_fixnum (end - start), components),
629 modification_func);
630 Fput_text_property (make_fixnum (start), make_fixnum (end),
631 Qcomposition, prop, string);
632 }
633
634 /* Lisp glyph-string handlers. */
635
636 /* Hash table for automatic composition. The key is a header of a
637 lgstring (Lispy glyph-string), and the value is a body of a
638 lgstring. */
639
640 static Lisp_Object gstring_hash_table;
641
642 Lisp_Object
643 composition_gstring_lookup_cache (Lisp_Object header)
644 {
645 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
646 ptrdiff_t i = hash_lookup (h, header, NULL);
647
648 return (i >= 0 ? HASH_VALUE (h, i) : Qnil);
649 }
650
651 Lisp_Object
652 composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
653 {
654 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
655 Lisp_Object header = LGSTRING_HEADER (gstring);
656 Lisp_Object hash = h->test.hashfn (header, h);
657 if (len < 0)
658 {
659 ptrdiff_t glyph_len = LGSTRING_GLYPH_LEN (gstring);
660 for (len = 0; len < glyph_len; len++)
661 if (NILP (LGSTRING_GLYPH (gstring, len)))
662 break;
663 }
664
665 Lisp_Object copy = make_nil_vector (len + 2);
666 LGSTRING_SET_HEADER (copy, Fcopy_sequence (header));
667 for (ptrdiff_t i = 0; i < len; i++)
668 LGSTRING_SET_GLYPH (copy, i, Fcopy_sequence (LGSTRING_GLYPH (gstring, i)));
669 ptrdiff_t id = hash_put (h, LGSTRING_HEADER (copy), copy, hash);
670 LGSTRING_SET_ID (copy, make_fixnum (id));
671 return copy;
672 }
673
674 Lisp_Object
675 composition_gstring_from_id (ptrdiff_t id)
676 {
677 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
678
679 return HASH_VALUE (h, id);
680 }
681
682 /* Remove from the composition hash table every lgstring that
683 references the given FONT_OBJECT. */
684 void
685 composition_gstring_cache_clear_font (Lisp_Object font_object)
686 {
687 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
688
689 for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
690 {
691 Lisp_Object k = HASH_KEY (h, i);
692
693 if (!BASE_EQ (k, Qunbound))
694 {
695 Lisp_Object gstring = HASH_VALUE (h, i);
696
697 if (EQ (LGSTRING_FONT (gstring), font_object))
698 hash_remove_from_table (h, k);
699 }
700 }
701 }
702
703 DEFUN ("clear-composition-cache", Fclear_composition_cache,
704 Sclear_composition_cache, 0, 0, 0,
705 doc: /* Internal use only.
706 Clear composition cache. */)
707 (void)
708 {
709 gstring_hash_table = CALLN (Fmake_hash_table, QCtest, Qequal,
710 QCsize, make_fixnum (311));
711 /* Fixme: We call Fclear_face_cache to force complete re-building of
712 display glyphs. But, it may be better to call this function from
713 Fclear_face_cache instead. */
714 return Fclear_face_cache (Qt);
715 }
716
717 bool
718 composition_gstring_p (Lisp_Object gstring)
719 {
720 Lisp_Object header;
721 ptrdiff_t i;
722
723 if (! VECTORP (gstring) || ASIZE (gstring) < 2)
724 return 0;
725 header = LGSTRING_HEADER (gstring);
726 if (! VECTORP (header) || ASIZE (header) < 2)
727 return 0;
728 if (! NILP (LGSTRING_FONT (gstring))
729 && (! FONT_OBJECT_P (LGSTRING_FONT (gstring))
730 && ! CODING_SYSTEM_P (LGSTRING_FONT (gstring))))
731 return 0;
732 for (i = 1; i < ASIZE (LGSTRING_HEADER (gstring)); i++)
733 if (! FIXNATP (AREF (LGSTRING_HEADER (gstring), i)))
734 return 0;
735 if (! NILP (LGSTRING_ID (gstring)) && ! FIXNATP (LGSTRING_ID (gstring)))
736 return 0;
737 for (i = 0; i < LGSTRING_GLYPH_LEN (gstring); i++)
738 {
739 Lisp_Object glyph = LGSTRING_GLYPH (gstring, i);
740 if (NILP (glyph))
741 break;
742 if (! VECTORP (glyph) || ASIZE (glyph) != LGLYPH_SIZE)
743 return 0;
744 }
745 return 1;
746 }
747
748 int
749 composition_gstring_width (Lisp_Object gstring, ptrdiff_t from, ptrdiff_t to,
750 struct font_metrics *metrics)
751 {
752 Lisp_Object *glyph;
753 int width = 0;
754
755 if (metrics)
756 {
757 Lisp_Object font_object = LGSTRING_FONT (gstring);
758
759 if (FONT_OBJECT_P (font_object))
760 {
761 struct font *font = XFONT_OBJECT (font_object);
762 int font_ascent, font_descent;
763
764 get_font_ascent_descent (font, &font_ascent, &font_descent);
765 metrics->ascent = font_ascent;
766 metrics->descent = font_descent;
767 }
768 else
769 {
770 metrics->ascent = 1;
771 metrics->descent = 0;
772 }
773 metrics->width = metrics->lbearing = metrics->rbearing = 0;
774 }
775 for (glyph = lgstring_glyph_addr (gstring, from); from < to; from++, glyph++)
776 {
777 int x;
778
779 if (NILP (LGLYPH_ADJUSTMENT (*glyph)))
780 width += LGLYPH_WIDTH (*glyph);
781 else
782 width += LGLYPH_WADJUST (*glyph);
783 if (metrics)
784 {
785 x = metrics->width + LGLYPH_LBEARING (*glyph) + LGLYPH_XOFF (*glyph);
786 if (metrics->lbearing > x)
787 metrics->lbearing = x;
788 x = metrics->width + LGLYPH_RBEARING (*glyph) + LGLYPH_XOFF (*glyph);
789 if (metrics->rbearing < x)
790 metrics->rbearing = x;
791 metrics->width = width;
792 x = LGLYPH_ASCENT (*glyph) - LGLYPH_YOFF (*glyph);
793 if (metrics->ascent < x)
794 metrics->ascent = x;
795 x = LGLYPH_DESCENT (*glyph) + LGLYPH_YOFF (*glyph);
796 if (metrics->descent < x)
797 metrics->descent = x;
798 }
799 }
800 return width;
801 }
802
803 /* Adjust the width of each grapheme cluster of GSTRING because
804 zero-width grapheme clusters are not displayed. If the width is
805 zero, then the width of the last glyph in the cluster is
806 incremented. */
807
808 void
809 composition_gstring_adjust_zero_width (Lisp_Object gstring)
810 {
811 ptrdiff_t from = 0;
812 int width = 0;
813
814 for (ptrdiff_t i = 0; ; i++)
815 {
816 Lisp_Object glyph;
817
818 if (i < LGSTRING_GLYPH_LEN (gstring))
819 glyph = LGSTRING_GLYPH (gstring, i);
820 else
821 glyph = Qnil;
822
823 if (NILP (glyph) || from != LGLYPH_FROM (glyph))
824 {
825 eassert (i > 0);
826 Lisp_Object last = LGSTRING_GLYPH (gstring, i - 1);
827
828 if (width == 0)
829 {
830 if (NILP (LGLYPH_ADJUSTMENT (last)))
831 LGLYPH_SET_ADJUSTMENT (last,
832 CALLN (Fvector,
833 make_fixnum (0), make_fixnum (0),
834 make_fixnum (LGLYPH_WIDTH (last)
835 + 1)));
836 else
837 ASET (LGLYPH_ADJUSTMENT (last), 2,
838 make_fixnum (LGLYPH_WADJUST (last) + 1));
839 }
840 if (NILP (glyph))
841 break;
842 from = LGLYPH_FROM (glyph);
843 width = 0;
844 }
845 width += (NILP (LGLYPH_ADJUSTMENT (glyph))
846 ? LGLYPH_WIDTH (glyph) : LGLYPH_WADJUST (glyph));
847 }
848 }
849
850
851 static Lisp_Object gstring_work;
852 static Lisp_Object gstring_work_headers;
853
854 static Lisp_Object
855 fill_gstring_header (ptrdiff_t from, ptrdiff_t from_byte,
856 ptrdiff_t to, Lisp_Object font_object, Lisp_Object string)
857 {
858 ptrdiff_t len = to - from;
859 if (len == 0)
860 error ("Attempt to shape zero-length text");
861 eassume (0 < len);
862 Lisp_Object header = (len <= 8
863 ? AREF (gstring_work_headers, len - 1)
864 : make_uninit_vector (len + 1));
865
866 ASET (header, 0, font_object);
867 for (ptrdiff_t i = 0; i < len; i++)
868 {
869 int c
870 = (NILP (string)
871 ? fetch_char_advance_no_check (&from, &from_byte)
872 : fetch_string_char_advance_no_check (string, &from, &from_byte));
873 ASET (header, i + 1, make_fixnum (c));
874 }
875 return header;
876 }
877
878 static void
879 fill_gstring_body (Lisp_Object gstring)
880 {
881 Lisp_Object font_object = LGSTRING_FONT (gstring);
882 Lisp_Object header = AREF (gstring, 0);
883 ptrdiff_t len = LGSTRING_CHAR_LEN (gstring);
884 ptrdiff_t i;
885 struct font *font = NULL;
886 unsigned int code;
887
888 if (FONT_OBJECT_P (font_object))
889 font = XFONT_OBJECT (font_object);
890
891 for (i = 0; i < len; i++)
892 {
893 Lisp_Object g = LGSTRING_GLYPH (gstring, i);
894 int c = XFIXNAT (AREF (header, i + 1));
895
896 if (NILP (g))
897 {
898 g = LGLYPH_NEW ();
899 LGSTRING_SET_GLYPH (gstring, i, g);
900 }
901 LGLYPH_SET_FROM (g, i);
902 LGLYPH_SET_TO (g, i);
903 LGLYPH_SET_CHAR (g, c);
904
905 if (font != NULL)
906 code = font->driver->encode_char (font, LGLYPH_CHAR (g));
907 else
908 code = FONT_INVALID_CODE;
909 if (code != FONT_INVALID_CODE)
910 {
911 font_fill_lglyph_metrics (g, font, code);
912 }
913 else
914 {
915 int width = XFIXNAT (CHAR_TABLE_REF (Vchar_width_table, c));
916
917 LGLYPH_SET_CODE (g, c);
918 LGLYPH_SET_LBEARING (g, 0);
919 LGLYPH_SET_RBEARING (g, width);
920 LGLYPH_SET_WIDTH (g, width);
921 LGLYPH_SET_ASCENT (g, 1);
922 LGLYPH_SET_DESCENT (g, 0);
923 }
924 LGLYPH_SET_ADJUSTMENT (g, Qnil);
925 }
926 len = LGSTRING_GLYPH_LEN (gstring);
927 for (; i < len; i++)
928 LGSTRING_SET_GLYPH (gstring, i, Qnil);
929 }
930
931
932 /* Try to compose the characters at CHARPOS according to composition
933 rule RULE ([PATTERN PREV-CHARS FUNC]). LIMIT limits the characters
934 to compose. STRING, if not nil, is a target string. WIN is a
935 window where the characters are being displayed. CH is the
936 character that triggered the composition check. If characters are
937 successfully composed, return the composition as a glyph-string
938 object. Otherwise return nil. */
939
940 static Lisp_Object
941 autocmp_chars (Lisp_Object rule, ptrdiff_t charpos, ptrdiff_t bytepos,
942 ptrdiff_t limit, struct window *win, struct face *face,
943 Lisp_Object string, Lisp_Object direction, int ch)
944 {
945 specpdl_ref count = SPECPDL_INDEX ();
946 Lisp_Object pos = make_fixnum (charpos);
947 ptrdiff_t to;
948 ptrdiff_t pt = PT, pt_byte = PT_BYTE;
949 Lisp_Object re, font_object, lgstring;
950 ptrdiff_t len;
951
952 record_unwind_save_match_data ();
953 re = AREF (rule, 0);
954 if (NILP (re))
955 len = 1;
956 else if (! STRINGP (re))
957 return unbind_to (count, Qnil);
958 else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string))
959 > 0)
960 {
961 if (NILP (string))
962 len = BYTE_TO_CHAR (bytepos + len) - charpos;
963 else
964 len = string_byte_to_char (string, bytepos + len) - charpos;
965 }
966 if (len <= 0)
967 return unbind_to (count, Qnil);
968 to = limit = charpos + len;
969 font_object = win->frame;
970 #ifdef HAVE_WINDOW_SYSTEM
971 struct frame *f = XFRAME (font_object);
972 if (FRAME_WINDOW_P (f))
973 {
974 font_object = font_range (charpos, bytepos, &to, win, face, string, ch);
975 if (! FONT_OBJECT_P (font_object)
976 || (! NILP (re)
977 && to < limit
978 && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0)))
979 return unbind_to (count, Qnil);
980 }
981 #endif
982 lgstring = Fcomposition_get_gstring (pos, make_fixnum (to), font_object,
983 string);
984 if (NILP (LGSTRING_ID (lgstring)))
985 {
986 /* Save point as marker before calling out to lisp. */
987 if (NILP (string))
988 record_unwind_protect (restore_point_unwind,
989 build_marker (current_buffer, pt, pt_byte));
990 lgstring = safe_call (7, Vauto_composition_function, AREF (rule, 2),
991 pos, make_fixnum (to), font_object, string,
992 direction);
993 }
994 return unbind_to (count, lgstring);
995 }
996
997 /* 1 iff the character C is composable. Characters of general
998 category Z? or C? are not composable except for ZWNJ and ZWJ,
999 and characters of category Zs. */
1000
1001 static bool
1002 char_composable_p (int c)
1003 {
1004 Lisp_Object val;
1005 return (c >= ' '
1006 && (c == ZERO_WIDTH_NON_JOINER || c == ZERO_WIDTH_JOINER
1007 /* Per Unicode TR51, these tag characters can be part of
1008 Emoji sequences. */
1009 || (TAG_SPACE <= c && c <= CANCEL_TAG)
1010 /* unicode-category-table may not be available during
1011 dumping. */
1012 || (CHAR_TABLE_P (Vunicode_category_table)
1013 && (val = CHAR_TABLE_REF (Vunicode_category_table, c),
1014 (FIXNUMP (val)
1015 && (XFIXNUM (val) <= UNICODE_CATEGORY_Zs))))));
1016 }
1017
1018 static inline bool
1019 inhibit_auto_composition (void)
1020 {
1021 if (NILP (Vauto_composition_mode))
1022 return true;
1023
1024 if (STRINGP (Vauto_composition_mode))
1025 {
1026 char *name = tty_type_name (Qnil);
1027
1028 if (name && ! strcmp (SSDATA (Vauto_composition_mode), name))
1029 return true;
1030 }
1031
1032 return false;
1033 }
1034
1035 /* Update cmp_it->stop_pos to the next position after CHARPOS (and
1036 BYTEPOS) where character composition may happen. If BYTEPOS is
1037 negative, compute it. ENDPOS is a limit of searching. If it is
1038 less than CHARPOS, search backward to ENDPOS+1 assuming that
1039 set_iterator_to_next works in reverse order. In this case, if a
1040 composition closest to CHARPOS is found, set cmp_it->stop_pos to
1041 the last character of the composition. STRING, if non-nil, is
1042 the string (as opposed to a buffer) whose characters should be
1043 tested for being composable. INCLUDE_STATIC non-zero means
1044 consider both static and automatic compositions; if zero, look
1045 only for potential automatic compositions.
1046
1047 If no composition is found, set cmp_it->ch to -2. If a static
1048 composition is found, set cmp_it->ch to -1. Otherwise, set
1049 cmp_it->ch to the character that triggers the automatic
1050 composition. */
1051
1052 void
1053 composition_compute_stop_pos (struct composition_it *cmp_it, ptrdiff_t charpos,
1054 ptrdiff_t bytepos, ptrdiff_t endpos,
1055 Lisp_Object string, bool include_static)
1056 {
1057 ptrdiff_t start, end;
1058 int c;
1059 Lisp_Object prop, val;
1060 /* This is from forward_to_next_line_start in xdisp.c. */
1061 const int MAX_NEWLINE_DISTANCE = 500;
1062
1063 if (charpos < endpos)
1064 {
1065 if (endpos > charpos + MAX_NEWLINE_DISTANCE)
1066 endpos = charpos + MAX_NEWLINE_DISTANCE;
1067 }
1068 else if (endpos < charpos)
1069 {
1070 /* We search backward for a position to check composition. */
1071 if (endpos < 0)
1072 {
1073 /* But we don't know where to stop the searching. */
1074 endpos = NILP (string) ? BEGV - 1 : -1;
1075 /* Usually we don't reach ENDPOS because we stop searching
1076 at an uncomposable character (NL, LRE, etc). In buffers
1077 with long lines, however, NL might be far away, so
1078 pretend that the buffer is smaller. */
1079 if (current_buffer->long_line_optimizations_p)
1080 endpos = get_small_narrowing_begv (cmp_it->parent_it->w, charpos);
1081 }
1082 }
1083 cmp_it->id = -1;
1084 cmp_it->ch = -2;
1085 cmp_it->reversed_p = 0;
1086 cmp_it->stop_pos = endpos;
1087 if (charpos == endpos)
1088 return;
1089 /* Look for static compositions. */
1090 /* FIXME: Bidi is not yet handled well in static composition. */
1091 if (include_static
1092 && charpos < endpos
1093 && find_composition (charpos, endpos, &start, &end, &prop, string)
1094 && start >= charpos
1095 && composition_valid_p (start, end, prop))
1096 {
1097 cmp_it->stop_pos = endpos = start;
1098 cmp_it->ch = -1;
1099 }
1100 if ((NILP (string)
1101 && NILP (BVAR (current_buffer, enable_multibyte_characters)))
1102 || (STRINGP (string) && !STRING_MULTIBYTE (string))
1103 || inhibit_auto_composition ())
1104 return;
1105 if (bytepos < 0)
1106 {
1107 if (NILP (string))
1108 bytepos = CHAR_TO_BYTE (charpos);
1109 else
1110 bytepos = string_char_to_byte (string, charpos);
1111 }
1112
1113 /* Look for automatic compositions. */
1114 start = charpos;
1115 if (charpos < endpos)
1116 {
1117 /* Forward search. */
1118 while (charpos < endpos)
1119 {
1120 c = (STRINGP (string)
1121 ? fetch_string_char_advance (string, &charpos, &bytepos)
1122 : fetch_char_advance (&charpos, &bytepos));
1123 if (c == '\n')
1124 {
1125 cmp_it->ch = -2;
1126 break;
1127 }
1128 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1129 if (! NILP (val))
1130 {
1131 for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
1132 {
1133 Lisp_Object elt = XCAR (val);
1134 if (VECTORP (elt) && ASIZE (elt) == 3
1135 && FIXNATP (AREF (elt, 1))
1136 && charpos - 1 - XFIXNAT (AREF (elt, 1)) >= start)
1137 {
1138 cmp_it->rule_idx = ridx;
1139 cmp_it->lookback = XFIXNAT (AREF (elt, 1));
1140 cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
1141 cmp_it->ch = c;
1142 return;
1143 }
1144 }
1145 }
1146 }
1147 if (charpos == endpos
1148 && !(STRINGP (string) && endpos == SCHARS (string)))
1149 {
1150 /* We couldn't find a composition point before ENDPOS. But,
1151 some character after ENDPOS may be composed with
1152 characters before ENDPOS. So, we should stop at the safe
1153 point. */
1154 charpos = endpos - MAX_AUTO_COMPOSITION_LOOKBACK;
1155 if (charpos < start)
1156 charpos = start;
1157 }
1158 }
1159 else if (charpos > endpos)
1160 {
1161 /* Search backward for a pattern that may be composed and the
1162 position of (possibly) the last character of the match is
1163 closest to (but not after) START. The reason for the last
1164 character is that set_iterator_to_next works in reverse order,
1165 and thus we must stop at the last character for composition
1166 check. */
1167 unsigned char *p;
1168 int len;
1169 /* Limit byte position used in fast_looking_at. This is the
1170 byte position of the character after START. */
1171 ptrdiff_t limit;
1172
1173 if (NILP (string))
1174 p = BYTE_POS_ADDR (bytepos);
1175 else
1176 p = SDATA (string) + bytepos;
1177 c = string_char_and_length (p, &len);
1178 limit = bytepos + len;
1179 while (char_composable_p (c))
1180 {
1181 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1182 for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
1183 {
1184 Lisp_Object elt = XCAR (val);
1185 if (VECTORP (elt) && ASIZE (elt) == 3
1186 && FIXNATP (AREF (elt, 1))
1187 && charpos - XFIXNAT (AREF (elt, 1)) > endpos)
1188 {
1189 ptrdiff_t back = XFIXNAT (AREF (elt, 1));
1190 ptrdiff_t cpos = charpos - back, bpos;
1191
1192 if (back == 0)
1193 bpos = bytepos;
1194 else
1195 bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
1196 : string_char_to_byte (string, cpos));
1197 ptrdiff_t blen
1198 = (STRINGP (AREF (elt, 0))
1199 ? fast_looking_at (AREF (elt, 0), cpos, bpos,
1200 start + 1, limit, string)
1201 : 1);
1202 if (blen > 0)
1203 {
1204 /* Make CPOS point to the last character of
1205 match. Note that BLEN is byte-length. */
1206 if (blen > 1)
1207 {
1208 bpos += blen;
1209 if (NILP (string))
1210 cpos = BYTE_TO_CHAR (bpos) - 1;
1211 else
1212 cpos = string_byte_to_char (string, bpos) - 1;
1213 }
1214 back = cpos - (charpos - back);
1215 if (cmp_it->stop_pos < cpos
1216 || (cmp_it->stop_pos == cpos
1217 && cmp_it->lookback < back))
1218 {
1219 cmp_it->rule_idx = ridx;
1220 cmp_it->stop_pos = cpos;
1221 cmp_it->ch = c;
1222 cmp_it->lookback = back;
1223 cmp_it->nchars = back + 1;
1224 }
1225 }
1226 }
1227 }
1228 if (charpos - 1 == endpos)
1229 break;
1230 if (STRINGP (string))
1231 {
1232 p--, bytepos--;
1233 while (! CHAR_HEAD_P (*p))
1234 p--, bytepos--;
1235 charpos--;
1236 }
1237 else
1238 {
1239 dec_both (&charpos, &bytepos);
1240 p = BYTE_POS_ADDR (bytepos);
1241 }
1242 c = STRING_CHAR (p);
1243 }
1244 if (cmp_it->ch >= 0)
1245 /* We found a position to check. */
1246 return;
1247 /* Skip all uncomposable characters. */
1248 if (NILP (string))
1249 {
1250 while (charpos - 1 > endpos && ! char_composable_p (c))
1251 {
1252 dec_both (&charpos, &bytepos);
1253 c = FETCH_MULTIBYTE_CHAR (bytepos);
1254 }
1255 }
1256 else
1257 {
1258 while (charpos - 1 > endpos && ! char_composable_p (c))
1259 {
1260 p--;
1261 while (! CHAR_HEAD_P (*p))
1262 p--;
1263 charpos--;
1264 c = STRING_CHAR (p);
1265 }
1266 }
1267 }
1268 cmp_it->stop_pos = charpos;
1269 }
1270
1271 /* Check if the character at CHARPOS (and BYTEPOS) is composed
1272 (possibly with the following characters) on window W. ENDPOS limits
1273 characters to be composed. FACE, if non-NULL, is a base face of
1274 the character. If STRING is not nil, it is a string containing the
1275 character to check, and CHARPOS and BYTEPOS are indices in the
1276 string. In that case, FACE must not be NULL. BIDI_LEVEL is the bidi
1277 embedding level of the current paragraph, and is used to calculate the
1278 direction argument to pass to the font shaper; value of -1 means the
1279 caller doesn't know the embedding level (used by callers which didn't
1280 invoke the display routines that perform bidi-display-reordering).
1281
1282 If the character is composed, setup members of CMP_IT (id, nglyphs,
1283 from, to, reversed_p), and return true. Otherwise, update
1284 CMP_IT->stop_pos, and return false. */
1285
1286 bool
1287 composition_reseat_it (struct composition_it *cmp_it, ptrdiff_t charpos,
1288 ptrdiff_t bytepos, ptrdiff_t endpos, struct window *w,
1289 signed char bidi_level, struct face *face, Lisp_Object string)
1290 {
1291 if (cmp_it->ch == -2)
1292 {
1293 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string,
1294 true);
1295 if (cmp_it->ch == -2 || cmp_it->stop_pos != charpos)
1296 /* The current position is not composed. */
1297 return 0;
1298 }
1299
1300 if (endpos < 0)
1301 endpos = NILP (string) ? BEGV : 0;
1302
1303 if (cmp_it->ch < 0)
1304 {
1305 /* We are looking at a static composition. */
1306 ptrdiff_t start, end;
1307 Lisp_Object prop;
1308
1309 find_composition (charpos, -1, &start, &end, &prop, string);
1310 cmp_it->id = get_composition_id (charpos, bytepos, end - start,
1311 prop, string);
1312 if (cmp_it->id < 0)
1313 goto no_composition;
1314 cmp_it->nchars = end - start;
1315 cmp_it->nglyphs = composition_table[cmp_it->id]->glyph_len;
1316 }
1317 else if (w)
1318 {
1319 Lisp_Object lgstring = Qnil;
1320 Lisp_Object val, elt, direction = Qnil;
1321
1322 val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
1323 for (EMACS_INT i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val))
1324 continue;
1325 if (charpos < endpos)
1326 {
1327 if (bidi_level < 0)
1328 direction = Qnil;
1329 else if ((bidi_level & 1) == 0)
1330 direction = QL2R;
1331 else
1332 direction = QR2L;
1333 for (; CONSP (val); val = XCDR (val))
1334 {
1335 elt = XCAR (val);
1336 if (! VECTORP (elt) || ASIZE (elt) != 3
1337 || ! FIXNUMP (AREF (elt, 1)))
1338 continue;
1339 if (XFIXNAT (AREF (elt, 1)) != cmp_it->lookback)
1340 goto no_composition;
1341 lgstring = autocmp_chars (elt, charpos, bytepos, endpos,
1342 w, face, string, direction, cmp_it->ch);
1343 if (composition_gstring_p (lgstring))
1344 break;
1345 lgstring = Qnil;
1346 /* Composition failed perhaps because the font doesn't
1347 support sufficient range of characters. Try the
1348 other composition rules if any. */
1349 }
1350 cmp_it->reversed_p = 0;
1351 }
1352 else
1353 {
1354 ptrdiff_t cpos = charpos, bpos = bytepos;
1355
1356 cmp_it->reversed_p = 1;
1357 elt = XCAR (val);
1358 if (cmp_it->lookback > 0)
1359 {
1360 cpos = charpos - cmp_it->lookback;
1361 /* Reject the composition if it starts before ENDPOS,
1362 which here can only happen if
1363 composition-break-at-point is non-nil and point is
1364 inside the composition. */
1365 if (cpos < endpos)
1366 {
1367 eassert (composition_break_at_point);
1368 eassert (endpos == PT);
1369 goto no_composition;
1370 }
1371 if (STRINGP (string))
1372 bpos = string_char_to_byte (string, cpos);
1373 else
1374 bpos = CHAR_TO_BYTE (cpos);
1375 }
1376 /* The bidi_level < 0 case below strictly speaking should
1377 never happen, since we get here when bidi scan direction
1378 is backward in the buffer, which can only happen if the
1379 display routines were called to perform the bidi
1380 reordering. But it doesn't harm to test for that, and
1381 avoid someone raising their brows and thinking it's a
1382 subtle bug... */
1383 if (bidi_level < 0)
1384 direction = Qnil;
1385 else if ((bidi_level & 1) == 0)
1386 direction = QL2R;
1387 else
1388 direction = QR2L;
1389 lgstring = autocmp_chars (elt, cpos, bpos, charpos + 1, w, face,
1390 string, direction, cmp_it->ch);
1391 if (! composition_gstring_p (lgstring)
1392 || cpos + LGSTRING_CHAR_LEN (lgstring) - 1 != charpos)
1393 /* Composition failed or didn't cover the current
1394 character. */
1395 goto no_composition;
1396 }
1397 if (NILP (lgstring))
1398 goto no_composition;
1399 if (NILP (LGSTRING_ID (lgstring)))
1400 lgstring = composition_gstring_put_cache (lgstring, -1);
1401 cmp_it->id = XFIXNUM (LGSTRING_ID (lgstring));
1402 int i;
1403 for (i = 0; i < LGSTRING_GLYPH_LEN (lgstring); i++)
1404 if (NILP (LGSTRING_GLYPH (lgstring, i)))
1405 break;
1406 cmp_it->nglyphs = i;
1407 cmp_it->from = 0;
1408 cmp_it->to = i;
1409 }
1410 else
1411 goto no_composition;
1412 return 1;
1413
1414 no_composition:
1415 if (charpos == endpos)
1416 return 0;
1417 if (charpos < endpos)
1418 {
1419 charpos++;
1420 if (NILP (string))
1421 bytepos += next_char_len (bytepos);
1422 else
1423 bytepos += BYTES_BY_CHAR_HEAD (*(SDATA (string) + bytepos));
1424 }
1425 else
1426 {
1427 charpos--;
1428 /* BYTEPOS is calculated in composition_compute_stop_pos */
1429 bytepos = -1;
1430 }
1431 if (cmp_it->reversed_p)
1432 endpos = -1;
1433 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string, true);
1434 return 0;
1435 }
1436
1437 /* Update charpos, nchars, nbytes, and width of the current grapheme
1438 cluster.
1439
1440 If the composition is static or automatic in L2R context, the
1441 cluster is identified by CMP_IT->from, and CHARPOS is the position
1442 of the first character of the cluster. In this case, update
1443 CMP_IT->to too.
1444
1445 If the composition is automatic in R2L context, the cluster is
1446 identified by CMP_IT->to, and CHARPOS is the position of the last
1447 character of the cluster. In this case, update CMP_IT->from too.
1448
1449 The return value is the character code of the first character of
1450 the cluster, or -1 if the composition is somehow broken. */
1451
1452 int
1453 composition_update_it (struct composition_it *cmp_it, ptrdiff_t charpos, ptrdiff_t bytepos, Lisp_Object string)
1454 {
1455 int i;
1456 int c UNINIT;
1457
1458 if (cmp_it->ch < 0)
1459 {
1460 /* static composition */
1461 struct composition *cmp = composition_table[cmp_it->id];
1462
1463 cmp_it->charpos = charpos;
1464 cmp_it->to = cmp_it->nglyphs;
1465 if (cmp_it->nglyphs == 0)
1466 c = -1;
1467 else
1468 {
1469 for (i = 0; i < cmp->glyph_len; i++)
1470 /* TAB in a composition means display glyphs with padding
1471 space on the left or right. */
1472 if ((c = COMPOSITION_GLYPH (cmp, i)) != '\t')
1473 break;
1474 if (c == '\t')
1475 c = ' ';
1476 }
1477 cmp_it->width = cmp->width;
1478 charpos += cmp_it->nchars;
1479 if (STRINGP (string))
1480 cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos;
1481 else
1482 cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos;
1483 }
1484 else
1485 {
1486 /* Automatic composition. */
1487 Lisp_Object gstring = composition_gstring_from_id (cmp_it->id);
1488 Lisp_Object glyph;
1489 ptrdiff_t from;
1490
1491 if (cmp_it->nglyphs == 0)
1492 {
1493 cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
1494 cmp_it->width = 0;
1495 cmp_it->from = cmp_it->to = 0;
1496 return -1;
1497 }
1498 if (! cmp_it->reversed_p)
1499 {
1500 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1501 from = LGLYPH_FROM (glyph);
1502 for (cmp_it->to = cmp_it->from + 1; cmp_it->to < cmp_it->nglyphs;
1503 cmp_it->to++)
1504 {
1505 glyph = LGSTRING_GLYPH (gstring, cmp_it->to);
1506 if (LGLYPH_FROM (glyph) != from)
1507 break;
1508 }
1509 cmp_it->charpos = charpos;
1510 }
1511 else
1512 {
1513 glyph = LGSTRING_GLYPH (gstring, cmp_it->to - 1);
1514 from = LGLYPH_FROM (glyph);
1515 cmp_it->charpos = charpos - (LGLYPH_TO (glyph) - from);
1516 for (cmp_it->from = cmp_it->to - 1; cmp_it->from > 0;
1517 cmp_it->from--)
1518 {
1519 glyph = LGSTRING_GLYPH (gstring, cmp_it->from - 1);
1520 if (LGLYPH_FROM (glyph) != from)
1521 break;
1522 }
1523 }
1524 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1525 cmp_it->nchars = LGLYPH_TO (glyph) + 1 - from;
1526 cmp_it->nbytes = 0;
1527 cmp_it->width = 0;
1528 for (i = cmp_it->nchars - 1; i >= 0; i--)
1529 {
1530 c = XFIXNUM (LGSTRING_CHAR (gstring, from + i));
1531 cmp_it->nbytes += CHAR_BYTES (c);
1532 cmp_it->width += CHARACTER_WIDTH (c);
1533 }
1534 }
1535 return c;
1536 }
1537
1538
1539 struct position_record
1540 {
1541 ptrdiff_t pos, pos_byte;
1542 unsigned char *p;
1543 };
1544
1545 /* Update the members of POSITION to the next character boundary. */
1546 #define FORWARD_CHAR(POSITION, STOP) \
1547 do { \
1548 (POSITION).pos++; \
1549 if ((POSITION).pos == (STOP)) \
1550 { \
1551 (POSITION).p = GAP_END_ADDR; \
1552 (POSITION).pos_byte = GPT_BYTE; \
1553 } \
1554 else \
1555 { \
1556 (POSITION).pos_byte += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
1557 (POSITION).p += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
1558 } \
1559 } while (0)
1560
1561 /* Update the members of POSITION to the previous character boundary. */
1562 #define BACKWARD_CHAR(POSITION, STOP) \
1563 do { \
1564 if ((POSITION).pos == (STOP)) \
1565 (POSITION).p = GPT_ADDR; \
1566 do { \
1567 (POSITION).pos_byte--; \
1568 (POSITION).p--; \
1569 } while (! CHAR_HEAD_P (*((POSITION).p))); \
1570 (POSITION).pos--; \
1571 } while (0)
1572
1573 /* Similar to find_composition, but find an automatic composition instead.
1574
1575 This function looks for automatic composition at or near position
1576 POS of STRING object, either a buffer or a Lisp string. If STRING
1577 is nil, it defaults to the current buffer. It must be assured that
1578 POS is not within a static composition. Also, the current buffer
1579 must be displayed in some window, otherwise the function will
1580 return FALSE.
1581
1582 If LIMIT is negative, and there's no composition that includes POS
1583 (i.e. starts at or before POS and ends at or after POS), return
1584 FALSE. In this case, the function is allowed to look from POS as
1585 far back as BACKLIM, and as far forward as POS+1 plus
1586 MAX_AUTO_COMPOSITION_LOOKBACK, the maximum number of look-back for
1587 automatic compositions (3) -- this is a limitation imposed by
1588 composition rules in composition-function-table, which see. If
1589 BACKLIM is negative, it stands for the beginning of STRING object:
1590 BEGV for a buffer or position zero for a string.
1591
1592 If LIMIT is positive, search for a composition forward (LIMIT >
1593 POS) or backward (LIMIT < POS). In this case, LIMIT bounds the
1594 search for the first character of a composed sequence.
1595 (LIMIT == POS is the same as LIMIT < 0.) If LIMIT > POS, the
1596 function can find a composition that starts after POS.
1597
1598 BACKLIM limits how far back is the function allowed to look in
1599 STRING object while trying to find a position where it is safe to
1600 start searching forward for compositions. Such a safe place is
1601 generally the position after a character that can never be
1602 composed.
1603
1604 If BACKLIM is negative, that means the first character position of
1605 STRING object; this is useful when calling the function for the
1606 first time for a given buffer or string, since it is possible that
1607 a composition begins before POS. However, if POS is very far from
1608 the beginning of STRING object, a negative value of BACKLIM could
1609 make the function slow. For that reason, when STRING is a buffer
1610 or nil, we restrict the search back to the first newline before
1611 POS. Also, in this case the function may return START and END that
1612 do not include POS, something that is not necessarily wanted, and
1613 needs to be explicitly checked by the caller.
1614
1615 When calling the function in a loop for the same buffer/string, the
1616 caller should generally set BACKLIM equal to POS, to avoid costly
1617 repeated searches backward. This is because if the previous
1618 positions were already checked for compositions, there should be no
1619 reason to re-check them.
1620
1621 If BACKLIM is positive, it must be less or equal to LIMIT.
1622
1623 If an automatic composition satisfying the above conditions is
1624 found, set *GSTRING to the Lispy glyph-string representing the
1625 composition, set *START and *END to the start and end of the
1626 composed sequence, and return TRUE. Otherwise, set *GSTRING to
1627 nil, and return FALSE. */
1628
1629 bool
1630 find_automatic_composition (ptrdiff_t pos, ptrdiff_t limit, ptrdiff_t backlim,
1631 ptrdiff_t *start, ptrdiff_t *end,
1632 Lisp_Object *gstring, Lisp_Object string)
1633 {
1634 ptrdiff_t head, tail, stop;
1635 /* Forward limit position of checking a composition taking a
1636 looking-back count into account. */
1637 ptrdiff_t fore_check_limit;
1638 struct position_record cur, prev;
1639 int c;
1640 Lisp_Object window;
1641 struct window *w;
1642 bool need_adjustment = 0;
1643
1644 window = Fget_buffer_window (Fcurrent_buffer (), Qnil);
1645 if (NILP (window))
1646 return 0;
1647 w = XWINDOW (window);
1648
1649 cur.pos = pos;
1650 if (NILP (string))
1651 {
1652 if (backlim < 0)
1653 {
1654 /* This assumes a newline can never be composed. */
1655 head = find_newline (pos, -1, 0, -1, -1, NULL, NULL, false);
1656 }
1657 else
1658 head = backlim;
1659 if (current_buffer->long_line_optimizations_p)
1660 {
1661 /* In buffers with very long lines, this function becomes very
1662 slow. Pretend that the buffer is narrowed to make it fast. */
1663 ptrdiff_t begv = get_small_narrowing_begv (w, window_point (w));
1664 if (pos > begv)
1665 head = begv;
1666 }
1667 tail = ZV;
1668 stop = GPT;
1669 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1670 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1671 }
1672 else
1673 {
1674 head = backlim < 0 ? 0 : backlim, tail = SCHARS (string), stop = -1;
1675 cur.pos_byte = string_char_to_byte (string, cur.pos);
1676 cur.p = SDATA (string) + cur.pos_byte;
1677 }
1678 if (limit < 0)
1679 /* Finding a composition covering the character after POS is the
1680 same as setting LIMIT to POS. */
1681 limit = pos;
1682
1683 eassert (backlim < 0 || backlim <= limit);
1684
1685 if (limit <= pos)
1686 fore_check_limit = min (tail, pos + 1 + MAX_AUTO_COMPOSITION_LOOKBACK);
1687 else
1688 fore_check_limit = min (tail, limit + MAX_AUTO_COMPOSITION_LOOKBACK);
1689
1690 /* Provided that we have these possible compositions now:
1691
1692 POS: 1 2 3 4 5 6 7 8 9
1693 |-A-|
1694 |-B-|-C-|--D--|
1695
1696 Here, it is known that characters after positions 1 and 9 can
1697 never be composed (i.e. ! char_composable_p (CH)), and
1698 composition A is an invalid one because it's partially covered by
1699 the valid composition C. And to know whether a composition is
1700 valid or not, the only way is to start searching forward from a
1701 position that can not be a tail part of composition (it's 2 in
1702 the above case).
1703
1704 Now we have these cases (1 through 4):
1705
1706 -- character after POS is ... --
1707 not composable composable
1708 LIMIT <= POS (1) (3)
1709 POS < LIMIT (2) (4)
1710
1711 Among them, in case (2), we simply search forward from POS.
1712
1713 In the other cases, we at first rewind back to the position where
1714 the previous character is not composable or the beginning of
1715 buffer (string), then search compositions forward. In case (1)
1716 and (3) we repeat this process until a composition is found. */
1717
1718 while (1)
1719 {
1720 c = STRING_CHAR (cur.p);
1721 if (! char_composable_p (c))
1722 {
1723 if (limit <= pos) /* case (1) */
1724 {
1725 do {
1726 if (cur.pos <= limit)
1727 return 0;
1728 BACKWARD_CHAR (cur, stop);
1729 c = STRING_CHAR (cur.p);
1730 } while (! char_composable_p (c));
1731 fore_check_limit = cur.pos + 1;
1732 }
1733 else /* case (2) */
1734 /* No need of rewinding back. */
1735 goto search_forward;
1736 }
1737
1738 /* Rewind back to the position where we can safely search
1739 forward for compositions. It is assured that the character
1740 at cur.pos is composable. */
1741 while (head < cur.pos)
1742 {
1743 prev = cur;
1744 BACKWARD_CHAR (cur, stop);
1745 c = STRING_CHAR (cur.p);
1746 if (! char_composable_p (c))
1747 {
1748 cur = prev;
1749 break;
1750 }
1751 }
1752
1753 search_forward:
1754 /* Now search forward. */
1755 *gstring = Qnil;
1756 prev = cur; /* remember the start of searching position. */
1757 while (cur.pos < fore_check_limit)
1758 {
1759 Lisp_Object val;
1760
1761 c = STRING_CHAR (cur.p);
1762 for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1763 CONSP (val); val = XCDR (val))
1764 {
1765 Lisp_Object elt = XCAR (val);
1766
1767 if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
1768 {
1769 EMACS_INT check_pos = cur.pos - XFIXNAT (AREF (elt, 1));
1770 struct position_record check;
1771
1772 if (check_pos < head
1773 || (limit <= pos ? pos < check_pos
1774 : limit <= check_pos))
1775 continue;
1776 for (check = cur; check_pos < check.pos; )
1777 BACKWARD_CHAR (check, stop);
1778 *gstring = autocmp_chars (elt, check.pos, check.pos_byte,
1779 tail, w, NULL, string, Qnil, c);
1780 need_adjustment = 1;
1781 if (NILP (*gstring))
1782 {
1783 /* As we have called Lisp, there's a possibility
1784 that buffer/string is relocated. */
1785 if (NILP (string))
1786 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1787 else
1788 cur.p = SDATA (string) + cur.pos_byte;
1789 }
1790 else
1791 {
1792 /* We found a candidate of a target composition. */
1793 *start = check.pos;
1794 *end = check.pos + LGSTRING_CHAR_LEN (*gstring);
1795 if (pos < limit
1796 ? pos < *end
1797 : *start <= pos && pos < *end)
1798 /* This is the target composition. */
1799 return 1;
1800 cur.pos = *end;
1801 if (NILP (string))
1802 {
1803 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1804 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1805 }
1806 else
1807 {
1808 cur.pos_byte = string_char_to_byte (string, cur.pos);
1809 cur.p = SDATA (string) + cur.pos_byte;
1810 }
1811 break;
1812 }
1813 }
1814 }
1815 if (! CONSP (val))
1816 /* We found no composition here. */
1817 FORWARD_CHAR (cur, stop);
1818 }
1819
1820 if (pos < limit) /* case (2) and (4)*/
1821 return 0;
1822 if (! NILP (*gstring))
1823 return 1;
1824 if (prev.pos == head)
1825 return 0;
1826 cur = prev;
1827 if (need_adjustment)
1828 {
1829 if (NILP (string))
1830 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1831 else
1832 cur.p = SDATA (string) + cur.pos_byte;
1833 }
1834 BACKWARD_CHAR (cur, stop);
1835 }
1836 }
1837
1838 /* Return the adjusted point provided that point is moved from LAST_PT
1839 to NEW_PT. */
1840
1841 ptrdiff_t
1842 composition_adjust_point (ptrdiff_t last_pt, ptrdiff_t new_pt)
1843 {
1844 ptrdiff_t i, beg, end;
1845 Lisp_Object val;
1846
1847 if (new_pt == BEGV || new_pt == ZV)
1848 return new_pt;
1849
1850 /* At first check the static composition. */
1851 if (get_property_and_range (new_pt, Qcomposition, &val, &beg, &end, Qnil)
1852 && composition_valid_p (beg, end, val))
1853 {
1854 if (beg < new_pt /* && end > new_pt <- It's always the case. */
1855 && (last_pt <= beg || last_pt >= end))
1856 return (new_pt < last_pt ? beg : end);
1857 return new_pt;
1858 }
1859
1860 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
1861 || inhibit_auto_composition ())
1862 return new_pt;
1863
1864 /* Next check the automatic composition. */
1865 if (! find_automatic_composition (new_pt, (ptrdiff_t) -1, (ptrdiff_t) -1,
1866 &beg, &end, &val, Qnil)
1867 || beg == new_pt)
1868 return new_pt;
1869 for (i = 0; i < LGSTRING_GLYPH_LEN (val); i++)
1870 {
1871 Lisp_Object glyph = LGSTRING_GLYPH (val, i);
1872
1873 if (NILP (glyph))
1874 break;
1875 if (beg + LGLYPH_FROM (glyph) == new_pt)
1876 return new_pt;
1877 if (beg + LGLYPH_TO (glyph) >= new_pt)
1878 return (new_pt < last_pt
1879 ? beg + LGLYPH_FROM (glyph)
1880 : beg + LGLYPH_TO (glyph) + 1);
1881 }
1882 return new_pt;
1883 }
1884
1885 DEFUN ("composition-get-gstring", Fcomposition_get_gstring,
1886 Scomposition_get_gstring, 4, 4, 0,
1887 doc: /* Return a glyph-string for characters between FROM and TO.
1888 If the glyph string is for graphic display, FONT-OBJECT must be
1889 a font-object to use for those characters.
1890 Otherwise (for terminal display), FONT-OBJECT must be a terminal ID, a
1891 frame, or nil for the selected frame's terminal device.
1892
1893 If the optional 4th argument STRING is not nil, it is a string
1894 containing the target characters between indices FROM and TO,
1895 which are treated as in `substring'. Otherwise FROM and TO are
1896 character positions in current buffer; they can be in either order,
1897 and can be integers or markers.
1898
1899 A glyph-string is a vector containing information about how to display
1900 a specific character sequence. The format is:
1901 [HEADER ID GLYPH ...]
1902
1903 HEADER is a vector of this form:
1904 [FONT-OBJECT CHAR ...]
1905 where
1906 FONT-OBJECT is a font-object for all glyphs in the glyph-string,
1907 or the terminal coding system of the specified terminal.
1908 CHARs are characters to be composed by GLYPHs.
1909
1910 ID is an identification number of the glyph-string. It may be nil if
1911 not yet shaped.
1912
1913 GLYPH is a vector whose elements have this form:
1914 [ FROM-IDX TO-IDX C CODE WIDTH LBEARING RBEARING ASCENT DESCENT
1915 [ [X-OFF Y-OFF WADJUST] | nil] ]
1916 where
1917 FROM-IDX and TO-IDX are used internally and should not be touched.
1918 C is the character of the glyph.
1919 CODE is the glyph-code of C in FONT-OBJECT.
1920 WIDTH thru DESCENT are the metrics (in pixels) of the glyph.
1921 X-OFF and Y-OFF are offsets to the base position for the glyph.
1922 WADJUST is the adjustment to the normal width of the glyph.
1923
1924 If GLYPH is nil, the remaining elements of the glyph-string vector
1925 should be ignored. */)
1926 (Lisp_Object from, Lisp_Object to, Lisp_Object font_object, Lisp_Object string)
1927 {
1928 Lisp_Object gstring, header;
1929 ptrdiff_t frompos, frombyte, topos;
1930
1931 if (! FONT_OBJECT_P (font_object))
1932 {
1933 struct coding_system *coding;
1934 struct terminal *terminal = decode_live_terminal (font_object);
1935
1936 coding = ((TERMINAL_TERMINAL_CODING (terminal)->common_flags
1937 & CODING_REQUIRE_ENCODING_MASK)
1938 ? TERMINAL_TERMINAL_CODING (terminal) : &safe_terminal_coding);
1939 font_object = CODING_ID_NAME (coding->id);
1940 }
1941
1942 if (NILP (string))
1943 {
1944 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
1945 error ("Attempt to shape unibyte text");
1946 validate_region (&from, &to);
1947 frompos = XFIXNAT (from);
1948 topos = XFIXNAT (to);
1949 frombyte = CHAR_TO_BYTE (frompos);
1950 }
1951 else
1952 {
1953 CHECK_STRING (string);
1954 ptrdiff_t chars = SCHARS (string);
1955 validate_subarray (string, from, to, chars, &frompos, &topos);
1956 if (! STRING_MULTIBYTE (string))
1957 {
1958 ptrdiff_t i;
1959
1960 for (i = SBYTES (string) - 1; i >= 0; i--)
1961 if (!ASCII_CHAR_P (SREF (string, i)))
1962 error ("Attempt to shape unibyte text");
1963 /* STRING is a pure-ASCII string, so we can convert it (or,
1964 rather, its copy) to multibyte and use that thereafter. */
1965 /* FIXME: Not clear why we need to do that: AFAICT the rest of
1966 the code should work on an ASCII-only unibyte string just
1967 as well (bug#56347). */
1968 string = make_multibyte_string (SSDATA (string), chars, chars);
1969 }
1970 frombyte = string_char_to_byte (string, frompos);
1971 }
1972
1973 header = fill_gstring_header (frompos, frombyte,
1974 topos, font_object, string);
1975 gstring = composition_gstring_lookup_cache (header);
1976 if (! NILP (gstring))
1977 return gstring;
1978
1979 if (LGSTRING_GLYPH_LEN (gstring_work) < topos - frompos)
1980 gstring_work = make_nil_vector (topos - frompos + 2);
1981 LGSTRING_SET_HEADER (gstring_work, header);
1982 LGSTRING_SET_ID (gstring_work, Qnil);
1983 fill_gstring_body (gstring_work);
1984 return gstring_work;
1985 }
1986
1987
1988 /* Emacs Lisp APIs. */
1989
1990 DEFUN ("compose-region-internal", Fcompose_region_internal,
1991 Scompose_region_internal, 2, 4, 0,
1992 doc: /* Internal use only.
1993
1994 Compose text in the region between START and END.
1995 Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
1996 for the composition. See `compose-region' for more details. */)
1997 (Lisp_Object start, Lisp_Object end, Lisp_Object components, Lisp_Object modification_func)
1998 {
1999 validate_region (&start, &end);
2000 if (!NILP (components)
2001 && !FIXNUMP (components)
2002 && !CONSP (components)
2003 && !STRINGP (components))
2004 CHECK_VECTOR (components);
2005
2006 compose_text (XFIXNUM (start), XFIXNUM (end), components, modification_func, Qnil);
2007 return Qnil;
2008 }
2009
2010 DEFUN ("compose-string-internal", Fcompose_string_internal,
2011 Scompose_string_internal, 3, 5, 0,
2012 doc: /* Internal use only.
2013
2014 Compose text between indices START and END of STRING, where
2015 START and END are treated as in `substring'. Optional 4th
2016 and 5th arguments are COMPONENTS and MODIFICATION-FUNC
2017 for the composition. See `compose-string' for more details. */)
2018 (Lisp_Object string, Lisp_Object start, Lisp_Object end,
2019 Lisp_Object components, Lisp_Object modification_func)
2020 {
2021 ptrdiff_t from, to;
2022
2023 CHECK_STRING (string);
2024 validate_subarray (string, start, end, SCHARS (string), &from, &to);
2025 compose_text (from, to, components, modification_func, string);
2026 return string;
2027 }
2028
2029 DEFUN ("find-composition-internal", Ffind_composition_internal,
2030 Sfind_composition_internal, 4, 4, 0,
2031 doc: /* Internal use only.
2032
2033 Return information about composition at or nearest to position POS.
2034 See `find-composition' for more details. */)
2035 (Lisp_Object pos, Lisp_Object limit, Lisp_Object string, Lisp_Object detail_p)
2036 {
2037 Lisp_Object prop, tail, gstring;
2038 ptrdiff_t start, end, from, to;
2039 int id;
2040
2041 EMACS_INT fixed_pos = fix_position (pos);
2042 if (!NILP (limit))
2043 to = clip_to_bounds (PTRDIFF_MIN, fix_position (limit), ZV);
2044 else
2045 to = -1;
2046
2047 if (!NILP (string))
2048 {
2049 CHECK_STRING (string);
2050 if (! (0 <= fixed_pos && fixed_pos <= SCHARS (string)))
2051 args_out_of_range (string, pos);
2052 }
2053 else
2054 {
2055 if (! (BEGV <= fixed_pos && fixed_pos <= ZV))
2056 args_out_of_range (Fcurrent_buffer (), pos);
2057 }
2058 from = fixed_pos;
2059
2060 if (!find_composition (from, to, &start, &end, &prop, string))
2061 {
2062 if (((NILP (string)
2063 && !NILP (BVAR (current_buffer, enable_multibyte_characters)))
2064 || (!NILP (string) && STRING_MULTIBYTE (string)))
2065 && ! inhibit_auto_composition ()
2066 && find_automatic_composition (from, to, (ptrdiff_t) -1,
2067 &start, &end, &gstring, string))
2068 return list3 (make_fixnum (start), make_fixnum (end), gstring);
2069 return Qnil;
2070 }
2071 if (! (start <= fixed_pos && fixed_pos < end))
2072 {
2073 ptrdiff_t s, e;
2074
2075 if (find_automatic_composition (from, to, (ptrdiff_t) -1,
2076 &s, &e, &gstring, string)
2077 && (e <= fixed_pos ? e > end : s < start))
2078 return list3 (make_fixnum (s), make_fixnum (e), gstring);
2079 }
2080 if (!composition_valid_p (start, end, prop))
2081 return list3 (make_fixnum (start), make_fixnum (end), Qnil);
2082 if (NILP (detail_p))
2083 return list3 (make_fixnum (start), make_fixnum (end), Qt);
2084
2085 if (composition_registered_p (prop))
2086 id = COMPOSITION_ID (prop);
2087 else
2088 {
2089 ptrdiff_t start_byte = (NILP (string)
2090 ? CHAR_TO_BYTE (start)
2091 : string_char_to_byte (string, start));
2092 id = get_composition_id (start, start_byte, end - start, prop, string);
2093 }
2094
2095 if (id >= 0)
2096 {
2097 Lisp_Object components, relative_p, mod_func;
2098 enum composition_method method = composition_method (prop);
2099 int width = composition_table[id]->width;
2100
2101 components = Fcopy_sequence (COMPOSITION_COMPONENTS (prop));
2102 relative_p = (method == COMPOSITION_WITH_RULE_ALTCHARS
2103 ? Qnil : Qt);
2104 mod_func = COMPOSITION_MODIFICATION_FUNC (prop);
2105 tail = list4 (components, relative_p, mod_func, make_fixnum (width));
2106 }
2107 else
2108 tail = Qnil;
2109
2110 return Fcons (make_fixnum (start), Fcons (make_fixnum (end), tail));
2111 }
2112
2113 static int
2114 compare_composition_rules (const void *r1, const void *r2)
2115 {
2116 Lisp_Object vec1 = *(Lisp_Object *)r1, vec2 = *(Lisp_Object *)r2;
2117
2118 return XFIXNAT (AREF (vec2, 1)) - XFIXNAT (AREF (vec1, 1));
2119 }
2120
2121 DEFUN ("composition-sort-rules", Fcomposition_sort_rules,
2122 Scomposition_sort_rules, 1, 1, 0,
2123 doc: /* Sort composition RULES by their LOOKBACK parameter.
2124
2125 If RULES include just one rule, return RULES.
2126 Otherwise, return a new list of rules where all the rules are
2127 arranged in decreasing order of the LOOKBACK parameter of the
2128 rules (the second element of the rule's vector). This is required
2129 when combining composition rules from different sources, because
2130 of the way buffer text is examined for matching one of the rules. */)
2131 (Lisp_Object rules)
2132 {
2133 ptrdiff_t nrules;
2134 USE_SAFE_ALLOCA;
2135
2136 CHECK_LIST (rules);
2137 nrules = list_length (rules);
2138 if (nrules > 1)
2139 {
2140 ptrdiff_t i;
2141 Lisp_Object *sortvec;
2142
2143 SAFE_NALLOCA (sortvec, 1, nrules);
2144 for (i = 0; i < nrules; i++)
2145 {
2146 Lisp_Object elt = XCAR (rules);
2147 if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
2148 sortvec[i] = elt;
2149 else
2150 error ("Invalid composition rule in RULES argument");
2151 rules = XCDR (rules);
2152 }
2153 qsort (sortvec, nrules, sizeof (Lisp_Object), compare_composition_rules);
2154 rules = Flist (nrules, sortvec);
2155 SAFE_FREE ();
2156 }
2157
2158 return rules;
2159 }
2160
2161
2162 void
2163 syms_of_composite (void)
2164 {
2165 int i;
2166
2167 DEFSYM (Qcomposition, "composition");
2168
2169 /* Make a hash table for static composition. */
2170 /* We used to make the hash table weak so that unreferenced
2171 compositions can be garbage-collected. But, usually once
2172 created compositions are repeatedly used in an Emacs session,
2173 and thus it's not worth to save memory in such a way. So, we
2174 make the table not weak. */
2175 Lisp_Object args[] = {QCtest, Qequal, QCsize, make_fixnum (311)};
2176 composition_hash_table = CALLMANY (Fmake_hash_table, args);
2177 staticpro (&composition_hash_table);
2178
2179 /* Make a hash table for glyph-string. */
2180 gstring_hash_table = CALLMANY (Fmake_hash_table, args);
2181 staticpro (&gstring_hash_table);
2182
2183 staticpro (&gstring_work_headers);
2184 gstring_work_headers = make_nil_vector (8);
2185 for (i = 0; i < 8; i++)
2186 ASET (gstring_work_headers, i, make_nil_vector (i + 2));
2187 staticpro (&gstring_work);
2188 gstring_work = make_nil_vector (10);
2189
2190 /* Text property `composition' should be nonsticky by default. */
2191 Vtext_property_default_nonsticky
2192 = Fcons (Fcons (Qcomposition, Qt), Vtext_property_default_nonsticky);
2193
2194 DEFVAR_LISP ("compose-chars-after-function", Vcompose_chars_after_function,
2195 doc: /* Function to adjust composition of buffer text.
2196
2197 This function is called with three arguments: FROM, TO, and OBJECT.
2198 FROM and TO specify the range of text whose composition should be
2199 adjusted. OBJECT, if non-nil, is a string that contains the text.
2200
2201 This function is called after a text with `composition' property is
2202 inserted or deleted to keep `composition' property of buffer text
2203 valid.
2204
2205 The default value is the function `compose-chars-after'. */);
2206 Vcompose_chars_after_function = intern_c_string ("compose-chars-after");
2207
2208 DEFSYM (Qauto_composed, "auto-composed");
2209
2210 DEFVAR_LISP ("auto-composition-mode", Vauto_composition_mode,
2211 doc: /* Non-nil if Auto-Composition mode is enabled.
2212 Use the command `auto-composition-mode' to change this variable.
2213
2214 If this variable is a string, `auto-composition-mode' will be disabled in
2215 buffers displayed on a terminal whose type, as reported by `tty-type',
2216 compares equal to that string. */);
2217 Vauto_composition_mode = Qt;
2218
2219 DEFVAR_LISP ("auto-composition-function", Vauto_composition_function,
2220 doc: /* Function to call to compose characters automatically.
2221 This function is called from the display engine with 6 arguments:
2222 FUNC, FROM, TO, FONT-OBJECT, STRING, and DIRECTION.
2223
2224 FUNC is the function to compose characters. On text-mode display,
2225 FUNC is ignored and `compose-gstring-for-terminal' is used instead.
2226
2227 If STRING is nil, the function must compose characters in the region
2228 between FROM and TO in the current buffer.
2229
2230 Otherwise, STRING is a string, and FROM and TO are indices into the
2231 string. In this case, the function must compose characters in the
2232 string.
2233
2234 FONT-OBJECT is the font to use, or nil if characters are to be
2235 composed on a text-mode display.
2236
2237 DIRECTION is the bidi directionality of the text to shape. It could
2238 be L2R or R2L, or nil if unknown. */);
2239 Vauto_composition_function = Qnil;
2240
2241 DEFVAR_LISP ("composition-function-table", Vcomposition_function_table,
2242 doc: /* Char-table of functions for automatic character composition.
2243 For each character that has to be composed automatically with
2244 preceding and/or following characters, this char-table contains
2245 a function to call to compose that character.
2246
2247 The element at index C in the table, if non-nil, is a list of
2248 composition rules of the form ([PATTERN PREV-CHARS FUNC] ...);
2249 the rules must be specified in the descending order of PREV-CHARS
2250 values.
2251
2252 PATTERN is a regular expression which C and the surrounding
2253 characters must match.
2254
2255 PREV-CHARS is a non-negative integer (less than 4) specifying how many
2256 characters before C to check the matching with PATTERN. If it is 0,
2257 PATTERN must match C and the following characters. If it is 1,
2258 PATTERN must match a character before C and the following characters.
2259
2260 If PREV-CHARS is 0, PATTERN can be nil, which means that the
2261 single character C should be composed.
2262
2263 FUNC is a function to return a glyph-string representing a
2264 composition of the characters that match PATTERN. It is
2265 called with one argument GSTRING.
2266
2267 GSTRING is a template of a glyph-string to return. It is already
2268 filled with a proper header for the characters to compose, and
2269 glyphs corresponding to those characters one by one. The
2270 function must return a new glyph-string with the same header as
2271 GSTRING, or modify GSTRING itself and return it.
2272
2273 See also the documentation of `auto-composition-mode'. */);
2274 Vcomposition_function_table = Fmake_char_table (Qnil, Qnil);
2275
2276 DEFVAR_LISP ("auto-composition-emoji-eligible-codepoints", Vauto_composition_emoji_eligible_codepoints,
2277 doc: /* List of codepoints for which auto-composition will check for an emoji font.
2278
2279 These are codepoints which have Emoji_Presentation = No, and thus by
2280 default are not displayed as emoji. In certain circumstances, such as
2281 when followed by U+FE0F (VS-16) the emoji font should be used for
2282 them anyway.
2283
2284 This list is auto-generated, you should not need to modify it. */);
2285 Vauto_composition_emoji_eligible_codepoints = Qnil;
2286
2287 defsubr (&Scompose_region_internal);
2288 defsubr (&Scompose_string_internal);
2289 defsubr (&Sfind_composition_internal);
2290 defsubr (&Scomposition_get_gstring);
2291 defsubr (&Sclear_composition_cache);
2292 defsubr (&Scomposition_sort_rules);
2293 }