root/src/composite.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_composition_id
  2. find_composition
  3. run_composition_function
  4. update_compositions
  5. make_composition_value_copy
  6. compose_text
  7. composition_gstring_lookup_cache
  8. composition_gstring_put_cache
  9. composition_gstring_from_id
  10. composition_gstring_cache_clear_font
  11. DEFUN
  12. composition_gstring_p
  13. composition_gstring_width
  14. composition_gstring_adjust_zero_width
  15. fill_gstring_header
  16. fill_gstring_body
  17. autocmp_chars
  18. char_composable_p
  19. inhibit_auto_composition
  20. composition_compute_stop_pos
  21. composition_reseat_it
  22. composition_update_it
  23. find_automatic_composition
  24. composition_adjust_point
  25. compare_composition_rules
  26. DEFUN
  27. syms_of_composite

     1 /* Composite sequence support.
     2    Copyright (C) 2001-2023 Free Software Foundation, Inc.
     3    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
     4      National Institute of Advanced Industrial Science and Technology (AIST)
     5      Registration Number H14PRO021
     6    Copyright (C) 2003, 2006
     7      National Institute of Advanced Industrial Science and Technology (AIST)
     8      Registration Number H13PRO009
     9 
    10 This file is part of GNU Emacs.
    11 
    12 GNU Emacs is free software: you can redistribute it and/or modify
    13 it under the terms of the GNU General Public License as published by
    14 the Free Software Foundation, either version 3 of the License, or (at
    15 your option) any later version.
    16 
    17 GNU Emacs is distributed in the hope that it will be useful,
    18 but WITHOUT ANY WARRANTY; without even the implied warranty of
    19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    20 GNU General Public License for more details.
    21 
    22 You should have received a copy of the GNU General Public License
    23 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    24 
    25 #include <config.h>
    26 
    27 #include <stdlib.h>             /* for qsort */
    28 
    29 #include "lisp.h"
    30 #include "character.h"
    31 #include "composite.h"
    32 #include "buffer.h"
    33 #include "coding.h"
    34 #include "intervals.h"
    35 #include "frame.h"
    36 #include "dispextern.h"
    37 #include "termhooks.h"
    38 
    39 
    40 /* Emacs uses special text property `composition' to support character
    41    composition.  A sequence of characters that have the same (i.e. eq)
    42    `composition' property value is treated as a single composite
    43    sequence (we call it just `composition' here after).  Characters in
    44    a composition are all composed somehow on the screen.
    45 
    46    The property value has this form when the composition is made:
    47         ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
    48    then turns to this form:
    49         (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
    50    when the composition is registered in composition_hash_table and
    51    composition_table.  These rather peculiar structures were designed
    52    to make it easy to distinguish them quickly (we can do that by
    53    checking only the first element) and to extract LENGTH (from the
    54    former form) and COMPOSITION-ID (from the latter form).
    55 
    56    We register a composition when it is displayed, or when the width
    57    is required (for instance, to calculate columns).
    58 
    59    LENGTH -- Length of the composition.  This information is used to
    60         check the validity of the composition.
    61 
    62    COMPONENTS --  Character, string, vector, list, or nil.
    63 
    64         If it is nil, characters in the text are composed relatively
    65         according to their metrics in font glyphs.
    66 
    67         If it is a character or a string, the character or characters
    68         in the string are composed relatively.
    69 
    70         If it is a vector or list of integers, the element is a
    71         character or an encoded composition rule.  The characters are
    72         composed according to the rules.  (2N)th elements are
    73         characters to be composed and (2N+1)th elements are
    74         composition rules to tell how to compose (2N+2)th element with
    75         the previously composed 2N glyphs.
    76 
    77    COMPONENTS-VEC -- Vector of integers.  In a relative composition,
    78         the elements are the characters to be composed.  In a rule-base
    79         composition, the elements are characters or encoded
    80         composition rules.
    81 
    82    MODIFICATION-FUNC -- If non-nil, it is a function to call when the
    83         composition gets invalid after a modification in a buffer.  If
    84         it is nil, a function in `composition-function-table' of the
    85         first character in the sequence is called.
    86 
    87    COMPOSITION-ID --Identification number of the composition.  It is
    88         used as an index to composition_table for the composition.
    89 
    90    When Emacs has to display a composition or has to know its
    91    displaying width, the function get_composition_id is called.  It
    92    returns COMPOSITION-ID so that the caller can access the
    93    information about the composition through composition_table.  If a
    94    COMPOSITION-ID has not yet been assigned to the composition,
    95    get_composition_id checks the validity of `composition' property,
    96    and, if valid, assigns a new ID, registers the information in
    97    composition_hash_table and composition_table, and changes the form
    98    of the property value.  If the property is invalid,
    99    get_composition_id returns -1 without changing the property value.
   100 
   101    We use two tables to keep the information about composition;
   102    composition_hash_table and composition_table.
   103 
   104    The former is a hash table whose keys are COMPONENTS-VECs and
   105    values are the corresponding COMPOSITION-IDs.  This hash table is
   106    weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
   107    `composition' property, it won't be collected as garbage until all
   108    bits of text that have the same COMPONENTS-VEC are deleted.
   109 
   110    The latter is a table of pointers to `struct composition' indexed
   111    by COMPOSITION-ID.  This structure keeps the other information (see
   112    composite.h).
   113 
   114    In general, a text property holds information about individual
   115    characters.  But, a `composition' property holds information about
   116    a sequence of characters (in this sense, it is like the `intangible'
   117    property).  That means that we should not share the property value
   118    in adjacent compositions -- we can't distinguish them if they have the
   119    same property.  So, after any changes, we call
   120    `update_compositions' and change a property of one of adjacent
   121    compositions to a copy of it.  This function also runs a proper
   122    composition modification function to make a composition that gets
   123    invalid by the change valid again.
   124 
   125    As the value of the `composition' property holds information about a
   126    specific range of text, the value gets invalid if we change the
   127    text in the range.  We treat the `composition' property as always
   128    rear-nonsticky (currently by setting default-text-properties to
   129    (rear-nonsticky (composition))) and we never make properties of
   130    adjacent compositions identical.  Thus, any such changes make the
   131    range just shorter.  So, we can check the validity of the `composition'
   132    property by comparing LENGTH information with the actual length of
   133    the composition.
   134 
   135 */
   136 
   137 
   138 /* Table of pointers to the structure `composition' indexed by
   139    COMPOSITION-ID.  This structure is for storing information about
   140    each composition except for COMPONENTS-VEC.  */
   141 struct composition **composition_table;
   142 
   143 /* The current size of `composition_table'.  */
   144 static ptrdiff_t composition_table_size;
   145 
   146 /* Number of compositions currently made. */
   147 ptrdiff_t n_compositions;
   148 
   149 /* Hash table for compositions.  The key is COMPONENTS-VEC of
   150    `composition' property.  The value is the corresponding
   151    COMPOSITION-ID.  */
   152 Lisp_Object composition_hash_table;
   153 
   154 /* Maximum number of characters to look back for
   155    auto-compositions.  */
   156 #define MAX_AUTO_COMPOSITION_LOOKBACK 3
   157 
   158 /* Return COMPOSITION-ID of a composition at buffer position
   159    CHARPOS/BYTEPOS and length NCHARS.  The `composition' property of
   160    the sequence is PROP.  STRING, if non-nil, is a string that
   161    contains the composition instead of the current buffer.
   162 
   163    If the composition is invalid, return -1.  */
   164 
   165 ptrdiff_t
   166 get_composition_id (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t nchars,
   167                     Lisp_Object prop, Lisp_Object string)
   168 {
   169   Lisp_Object id, length, components, key, *key_contents, hash_code;
   170   ptrdiff_t glyph_len;
   171   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table);
   172   ptrdiff_t hash_index;
   173   enum composition_method method;
   174   struct composition *cmp;
   175   int ch;
   176 
   177   /* Maximum length of a string of glyphs.  XftGlyphExtents limits
   178      this to INT_MAX, and Emacs limits it further.  Divide INT_MAX - 1
   179      by 2 because gui_produce_glyphs computes glyph_len * 2 + 1.  Divide
   180      the size by MAX_MULTIBYTE_LENGTH because encode_terminal_code
   181      multiplies glyph_len by MAX_MULTIBYTE_LENGTH.  */
   182   enum {
   183     GLYPH_LEN_MAX = min ((INT_MAX - 1) / 2,
   184                          min (PTRDIFF_MAX, SIZE_MAX) / MAX_MULTIBYTE_LENGTH)
   185   };
   186 
   187   /* PROP should be
   188         Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
   189      or
   190         Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
   191   */
   192   if (nchars == 0 || !CONSP (prop))
   193     goto invalid_composition;
   194 
   195   id = XCAR (prop);
   196   if (FIXNUMP (id))
   197     {
   198       /* PROP should be Form-B.  */
   199       if (XFIXNUM (id) < 0 || XFIXNUM (id) >= n_compositions)
   200         goto invalid_composition;
   201       return XFIXNUM (id);
   202     }
   203 
   204   /* PROP should be Form-A.
   205      Thus, ID should be (LENGTH . COMPONENTS).  */
   206   if (!CONSP (id))
   207     goto invalid_composition;
   208   length = XCAR (id);
   209   if (!FIXNUMP (length) || XFIXNUM (length) != nchars)
   210     goto invalid_composition;
   211 
   212   components = XCDR (id);
   213 
   214   /* Check if the same composition has already been registered or not
   215      by consulting composition_hash_table.  The key for this table is
   216      COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
   217      nil, vector of characters in the composition range.  */
   218   if (FIXNUMP (components))
   219     key = make_vector (1, components);
   220   else if (STRINGP (components) || CONSP (components))
   221     key = Fvconcat (1, &components);
   222   else if (VECTORP (components))
   223     key = components;
   224   else if (NILP (components))
   225     {
   226       key = make_uninit_vector (nchars);
   227       if (STRINGP (string))
   228         for (ptrdiff_t i = 0; i < nchars; i++)
   229           {
   230             ch = fetch_string_char_advance (string, &charpos, &bytepos);
   231             ASET (key, i, make_fixnum (ch));
   232           }
   233       else
   234         for (ptrdiff_t i = 0; i < nchars; i++)
   235           {
   236             ch = fetch_char_advance (&charpos, &bytepos);
   237             ASET (key, i, make_fixnum (ch));
   238           }
   239     }
   240   else
   241     goto invalid_composition;
   242 
   243   hash_index = hash_lookup (hash_table, key, &hash_code);
   244   if (hash_index >= 0)
   245     {
   246       /* We have already registered the same composition.  Change PROP
   247          from Form-A above to Form-B while replacing COMPONENTS with
   248          COMPONENTS-VEC stored in the hash table.  We can directly
   249          modify the cons cell of PROP because it is not shared.  */
   250       key = HASH_KEY (hash_table, hash_index);
   251       id = HASH_VALUE (hash_table, hash_index);
   252       XSETCAR (prop, id);
   253       XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
   254       return XFIXNUM (id);
   255     }
   256 
   257   /* This composition is a new one.  We must register it.  */
   258 
   259   /* Check if we have sufficient memory to store this information.  */
   260   if (composition_table_size <= n_compositions)
   261     composition_table = xpalloc (composition_table, &composition_table_size,
   262                                  1, -1, sizeof *composition_table);
   263 
   264   key_contents = XVECTOR (key)->contents;
   265 
   266   /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
   267      vector or a list.  It should be a sequence of:
   268         char1 rule1 char2 rule2 char3 ...    ruleN charN+1  */
   269 
   270   if (VECTORP (components)
   271       && ASIZE (components) >= 2
   272       && VECTORP (AREF (components, 0)))
   273     {
   274       /* COMPONENTS is a glyph-string.  */
   275       ptrdiff_t len = ASIZE (key);
   276 
   277       for (ptrdiff_t i = 1; i < len; i++)
   278         if (! VECTORP (AREF (key, i)))
   279           goto invalid_composition;
   280     }
   281   else if (VECTORP (components) || CONSP (components))
   282     {
   283       ptrdiff_t len = ASIZE (key);
   284 
   285       /* The number of elements should be odd.  */
   286       if ((len % 2) == 0)
   287         goto invalid_composition;
   288       /* All elements should be integers (character or encoded
   289          composition rule).  */
   290       for (ptrdiff_t i = 0; i < len; i++)
   291         {
   292           if (!FIXNUMP (key_contents[i]))
   293             goto invalid_composition;
   294         }
   295     }
   296 
   297   /* Change PROP from Form-A above to Form-B.  We can directly modify
   298      the cons cell of PROP because it is not shared.  */
   299   XSETFASTINT (id, n_compositions);
   300   XSETCAR (prop, id);
   301   XSETCDR (prop, Fcons (make_fixnum (nchars), Fcons (key, XCDR (prop))));
   302 
   303   /* Register the composition in composition_hash_table.  */
   304   hash_index = hash_put (hash_table, key, id, hash_code);
   305 
   306   method = (NILP (components)
   307             ? COMPOSITION_RELATIVE
   308             : ((FIXNUMP (components) || STRINGP (components))
   309                ? COMPOSITION_WITH_ALTCHARS
   310                : COMPOSITION_WITH_RULE_ALTCHARS));
   311 
   312   glyph_len = (method == COMPOSITION_WITH_RULE_ALTCHARS
   313                ? (ASIZE (key) + 1) / 2
   314                : ASIZE (key));
   315 
   316   if (GLYPH_LEN_MAX < glyph_len)
   317     memory_full (SIZE_MAX);
   318 
   319   /* Register the composition in composition_table.  */
   320   cmp = xmalloc (sizeof *cmp);
   321 
   322   cmp->method = method;
   323   cmp->hash_index = hash_index;
   324   cmp->glyph_len = glyph_len;
   325   cmp->offsets = xnmalloc (glyph_len, 2 * sizeof *cmp->offsets);
   326   cmp->font = NULL;
   327 
   328   if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
   329     {
   330       /* Relative composition.  */
   331       cmp->width = 0;
   332       for (ptrdiff_t i = 0; i < glyph_len; i++)
   333         {
   334           int this_width;
   335           ch = XFIXNUM (key_contents[i]);
   336           /* TAB in a composition means display glyphs with padding
   337              space on the left or right.  */
   338           this_width = (ch == '\t' ? 1 : CHARACTER_WIDTH (ch));
   339           if (cmp->width < this_width)
   340             cmp->width = this_width;
   341         }
   342     }
   343   else
   344     {
   345       /* Rule-base composition.  */
   346       double leftmost = 0.0, rightmost;
   347 
   348       ch = XFIXNUM (key_contents[0]);
   349       rightmost = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
   350 
   351       for (ptrdiff_t i = 1; i < glyph_len; i += 2)
   352         {
   353           int rule, gref, nref;
   354           int this_width;
   355           double this_left;
   356 
   357           rule = XFIXNUM (key_contents[i]);
   358           ch = XFIXNUM (key_contents[i + 1]);
   359           this_width = ch != '\t' ? CHARACTER_WIDTH (ch) : 1;
   360 
   361           /* A composition rule is specified by an integer value
   362              that encodes global and new reference points (GREF and
   363              NREF).  GREF and NREF are specified by numbers as
   364              below:
   365                 0---1---2 -- ascent
   366                 |       |
   367                 |       |
   368                 |       |
   369                 9--10--11 -- center
   370                 |       |
   371              ---3---4---5--- baseline
   372                 |       |
   373                 6---7---8 -- descent
   374           */
   375           COMPOSITION_DECODE_REFS (rule, gref, nref);
   376           this_left = (leftmost
   377                        + (gref % 3) * (rightmost - leftmost) / 2.0
   378                        - (nref % 3) * this_width / 2.0);
   379 
   380           if (this_left < leftmost)
   381             leftmost = this_left;
   382           if (this_left + this_width > rightmost)
   383             rightmost = this_left + this_width;
   384         }
   385 
   386       cmp->width = rightmost - leftmost;
   387       if (cmp->width < (rightmost - leftmost))
   388         /* To get a ceiling integer value.  */
   389         cmp->width++;
   390     }
   391 
   392   composition_table[n_compositions] = cmp;
   393 
   394   return n_compositions++;
   395 
   396  invalid_composition:
   397   /* Would it be better to remove this `composition' property?  */
   398   return -1;
   399 }
   400 
   401 
   402 /* Find a static composition at or nearest to position POS of OBJECT
   403    (buffer or string).
   404 
   405    OBJECT defaults to the current buffer.  If there's a composition at
   406    POS, set *START and *END to the start and end of the sequence,
   407    *PROP to the `composition' property, and return 1.
   408 
   409    If there's no composition at POS and LIMIT is negative, return 0.
   410 
   411    Otherwise, search for a composition forward (LIMIT > POS) or
   412    backward (LIMIT < POS).  In this case, LIMIT bounds the search.
   413 
   414    If a composition is found, set *START, *END, and *PROP as above,
   415    and return 1, else return 0.
   416 
   417    This doesn't check the validity of composition.  */
   418 
   419 bool
   420 find_composition (ptrdiff_t pos, ptrdiff_t limit,
   421                   ptrdiff_t *start, ptrdiff_t *end,
   422                   Lisp_Object *prop, Lisp_Object object)
   423 {
   424   Lisp_Object val;
   425 
   426   if (get_property_and_range (pos, Qcomposition, prop, start, end, object))
   427     return 1;
   428 
   429   if (limit < 0 || limit == pos)
   430     return 0;
   431 
   432   if (limit > pos)              /* search forward */
   433     {
   434       val = Fnext_single_property_change (make_fixnum (pos), Qcomposition,
   435                                           object, make_fixnum (limit));
   436       pos = XFIXNUM (val);
   437       if (pos == limit)
   438         return 0;
   439     }
   440   else                          /* search backward */
   441     {
   442       if (get_property_and_range (pos - 1, Qcomposition, prop, start, end,
   443                                   object))
   444         return 1;
   445       val = Fprevious_single_property_change (make_fixnum (pos), Qcomposition,
   446                                               object, make_fixnum (limit));
   447       pos = XFIXNUM (val);
   448       if (pos == limit)
   449         return 0;
   450       pos--;
   451     }
   452   get_property_and_range (pos, Qcomposition, prop, start, end, object);
   453   return 1;
   454 }
   455 
   456 /* Run a proper function to adjust the composition sitting between
   457    FROM and TO with property PROP.  */
   458 
   459 static void
   460 run_composition_function (ptrdiff_t from, ptrdiff_t to, Lisp_Object prop)
   461 {
   462   Lisp_Object func;
   463   ptrdiff_t start, end;
   464 
   465   func = COMPOSITION_MODIFICATION_FUNC (prop);
   466   /* If an invalid composition precedes or follows, try to make them
   467      valid too.  */
   468   if (from > BEGV
   469       && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
   470       && !composition_valid_p (start, end, prop))
   471     from = start;
   472   if (to < ZV
   473       && find_composition (to, -1, &start, &end, &prop, Qnil)
   474       && !composition_valid_p (start, end, prop))
   475     to = end;
   476   if (!NILP (Ffboundp (func)))
   477     call2 (func, make_fixnum (from), make_fixnum (to));
   478 }
   479 
   480 /* Make invalid compositions adjacent to or inside FROM and TO valid.
   481    CHECK_MASK is bitwise `or' of mask bits defined by macros
   482    CHECK_XXX (see the comment in composite.h).
   483 
   484    It also resets the text-property `auto-composed' to a proper region
   485    so that automatic character composition works correctly later while
   486    displaying the region.
   487 
   488    This function is called when a buffer text is changed.  If the
   489    change is deletion, FROM == TO.  Otherwise, FROM < TO.  */
   490 
   491 void
   492 update_compositions (ptrdiff_t from, ptrdiff_t to, int check_mask)
   493 {
   494   Lisp_Object prop;
   495   ptrdiff_t start, end;
   496   /* The beginning and end of the region to set the property
   497      `auto-composed' to nil.  */
   498   ptrdiff_t min_pos = from, max_pos = to;
   499 
   500   if (inhibit_modification_hooks)
   501     return;
   502 
   503   /* If FROM and TO are not in a valid range, do nothing.  */
   504   if (! (BEGV <= from && from <= to && to <= ZV))
   505     return;
   506 
   507   if (check_mask & CHECK_HEAD)
   508     {
   509       /* FROM should be at composition boundary.  But, insertion or
   510          deletion will make two compositions adjacent and
   511          indistinguishable when they have same (eq) property.  To
   512          avoid it, in such a case, we change the property of the
   513          latter to the copy of it.  */
   514       if (from > BEGV
   515           && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
   516           && composition_valid_p (start, end, prop))
   517         {
   518           min_pos = start;
   519           if (end > to)
   520             max_pos = end;
   521           if (from < end)
   522             Fput_text_property (make_fixnum (from), make_fixnum (end),
   523                                 Qcomposition,
   524                                 Fcons (XCAR (prop), XCDR (prop)), Qnil);
   525           run_composition_function (start, end, prop);
   526           from = end;
   527         }
   528       else if (from < ZV
   529                && find_composition (from, -1, &start, &from, &prop, Qnil)
   530                && composition_valid_p (start, from, prop))
   531         {
   532           if (from > to)
   533             max_pos = from;
   534           run_composition_function (start, from, prop);
   535         }
   536     }
   537 
   538   if (check_mask & CHECK_INSIDE)
   539     {
   540       /* In this case, we are sure that (check & CHECK_TAIL) is also
   541          nonzero.  Thus, here we should check only compositions before
   542          (to - 1).  */
   543       while (from < to - 1
   544              && find_composition (from, to, &start, &from, &prop, Qnil)
   545              && composition_valid_p (start, from, prop)
   546              && from < to - 1)
   547         run_composition_function (start, from, prop);
   548     }
   549 
   550   if (check_mask & CHECK_TAIL)
   551     {
   552       if (from < to
   553           && find_composition (to - 1, -1, &start, &end, &prop, Qnil)
   554           && composition_valid_p (start, end, prop))
   555         {
   556           /* TO should be also at composition boundary.  But,
   557              insertion or deletion will make two compositions adjacent
   558              and indistinguishable when they have same (eq) property.
   559              To avoid it, in such a case, we change the property of
   560              the former to the copy of it.  */
   561           if (to < end)
   562             {
   563               Fput_text_property (make_fixnum (start), make_fixnum (to),
   564                                   Qcomposition,
   565                                   Fcons (XCAR (prop), XCDR (prop)), Qnil);
   566               max_pos = end;
   567             }
   568           run_composition_function (start, end, prop);
   569         }
   570       else if (to < ZV
   571                && find_composition (to, -1, &start, &end, &prop, Qnil)
   572                && composition_valid_p (start, end, prop))
   573         {
   574           run_composition_function (start, end, prop);
   575           max_pos = end;
   576         }
   577     }
   578   if (min_pos < max_pos)
   579     {
   580       specpdl_ref count = SPECPDL_INDEX ();
   581 
   582       specbind (Qinhibit_read_only, Qt);
   583       specbind (Qinhibit_modification_hooks, Qt);
   584       specbind (Qinhibit_point_motion_hooks, Qt);
   585       Fremove_list_of_text_properties (make_fixnum (min_pos),
   586                                        make_fixnum (max_pos),
   587                                        list1 (Qauto_composed), Qnil);
   588       unbind_to (count, Qnil);
   589     }
   590 }
   591 
   592 
   593 /* Modify composition property values in LIST destructively.  LIST is
   594    a list as returned from text_property_list.  Change values to the
   595    top-level copies of them so that none of them are `eq'.  */
   596 
   597 void
   598 make_composition_value_copy (Lisp_Object list)
   599 {
   600   Lisp_Object plist, val;
   601 
   602   for (; CONSP (list); list = XCDR (list))
   603     {
   604       plist = XCAR (XCDR (XCDR (XCAR (list))));
   605       while (CONSP (plist) && CONSP (XCDR (plist)))
   606         {
   607           if (EQ (XCAR (plist), Qcomposition)
   608               && (val = XCAR (XCDR (plist)), CONSP (val)))
   609             XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val)));
   610           plist = XCDR (XCDR (plist));
   611         }
   612     }
   613 }
   614 
   615 
   616 /* Make text in the region between START and END a composition that
   617    has COMPONENTS and MODIFICATION-FUNC.
   618 
   619    If STRING is non-nil, then operate on characters contained between
   620    indices START and END in STRING.  */
   621 
   622 void
   623 compose_text (ptrdiff_t start, ptrdiff_t end, Lisp_Object components,
   624               Lisp_Object modification_func, Lisp_Object string)
   625 {
   626   Lisp_Object prop;
   627 
   628   prop = Fcons (Fcons (make_fixnum (end - start), components),
   629                 modification_func);
   630   Fput_text_property  (make_fixnum (start), make_fixnum (end),
   631                        Qcomposition, prop, string);
   632 }
   633 
   634 /* Lisp glyph-string handlers.  */
   635 
   636 /* Hash table for automatic composition.  The key is a header of a
   637    lgstring (Lispy glyph-string), and the value is a body of a
   638    lgstring.  */
   639 
   640 static Lisp_Object gstring_hash_table;
   641 
   642 Lisp_Object
   643 composition_gstring_lookup_cache (Lisp_Object header)
   644 {
   645   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
   646   ptrdiff_t i = hash_lookup (h, header, NULL);
   647 
   648   return (i >= 0 ? HASH_VALUE (h, i) : Qnil);
   649 }
   650 
   651 Lisp_Object
   652 composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
   653 {
   654   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
   655   Lisp_Object header = LGSTRING_HEADER (gstring);
   656   Lisp_Object hash = h->test.hashfn (header, h);
   657   if (len < 0)
   658     {
   659       ptrdiff_t glyph_len = LGSTRING_GLYPH_LEN (gstring);
   660       for (len = 0; len < glyph_len; len++)
   661         if (NILP (LGSTRING_GLYPH (gstring, len)))
   662           break;
   663     }
   664 
   665   Lisp_Object copy = make_nil_vector (len + 2);
   666   LGSTRING_SET_HEADER (copy, Fcopy_sequence (header));
   667   for (ptrdiff_t i = 0; i < len; i++)
   668     LGSTRING_SET_GLYPH (copy, i, Fcopy_sequence (LGSTRING_GLYPH (gstring, i)));
   669   ptrdiff_t id = hash_put (h, LGSTRING_HEADER (copy), copy, hash);
   670   LGSTRING_SET_ID (copy, make_fixnum (id));
   671   return copy;
   672 }
   673 
   674 Lisp_Object
   675 composition_gstring_from_id (ptrdiff_t id)
   676 {
   677   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
   678 
   679   return HASH_VALUE (h, id);
   680 }
   681 
   682 /* Remove from the composition hash table every lgstring that
   683    references the given FONT_OBJECT.  */
   684 void
   685 composition_gstring_cache_clear_font (Lisp_Object font_object)
   686 {
   687   struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
   688 
   689   for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
   690     {
   691       Lisp_Object k = HASH_KEY (h, i);
   692 
   693       if (!BASE_EQ (k, Qunbound))
   694         {
   695           Lisp_Object gstring = HASH_VALUE (h, i);
   696 
   697           if (EQ (LGSTRING_FONT (gstring), font_object))
   698             hash_remove_from_table (h, k);
   699         }
   700     }
   701 }
   702 
   703 DEFUN ("clear-composition-cache", Fclear_composition_cache,
   704        Sclear_composition_cache, 0, 0, 0,
   705        doc: /* Internal use only.
   706 Clear composition cache.  */)
   707   (void)
   708 {
   709   gstring_hash_table = CALLN (Fmake_hash_table, QCtest, Qequal,
   710                               QCsize, make_fixnum (311));
   711   /* Fixme: We call Fclear_face_cache to force complete re-building of
   712      display glyphs.  But, it may be better to call this function from
   713      Fclear_face_cache instead.  */
   714   return Fclear_face_cache (Qt);
   715 }
   716 
   717 bool
   718 composition_gstring_p (Lisp_Object gstring)
   719 {
   720   Lisp_Object header;
   721   ptrdiff_t i;
   722 
   723   if (! VECTORP (gstring) || ASIZE (gstring) < 2)
   724     return 0;
   725   header = LGSTRING_HEADER (gstring);
   726   if (! VECTORP (header) || ASIZE (header) < 2)
   727     return 0;
   728   if (! NILP (LGSTRING_FONT (gstring))
   729       && (! FONT_OBJECT_P (LGSTRING_FONT (gstring))
   730           && ! CODING_SYSTEM_P (LGSTRING_FONT (gstring))))
   731     return 0;
   732   for (i = 1; i < ASIZE (LGSTRING_HEADER (gstring)); i++)
   733     if (! FIXNATP (AREF (LGSTRING_HEADER (gstring), i)))
   734       return 0;
   735   if (! NILP (LGSTRING_ID (gstring)) && ! FIXNATP (LGSTRING_ID (gstring)))
   736     return 0;
   737   for (i = 0; i < LGSTRING_GLYPH_LEN (gstring); i++)
   738     {
   739       Lisp_Object glyph = LGSTRING_GLYPH (gstring, i);
   740       if (NILP (glyph))
   741         break;
   742       if (! VECTORP (glyph) || ASIZE (glyph) != LGLYPH_SIZE)
   743         return 0;
   744     }
   745   return 1;
   746 }
   747 
   748 int
   749 composition_gstring_width (Lisp_Object gstring, ptrdiff_t from, ptrdiff_t to,
   750                            struct font_metrics *metrics)
   751 {
   752   Lisp_Object *glyph;
   753   int width = 0;
   754 
   755   if (metrics)
   756     {
   757       Lisp_Object font_object = LGSTRING_FONT (gstring);
   758 
   759       if (FONT_OBJECT_P (font_object))
   760         {
   761           struct font *font = XFONT_OBJECT (font_object);
   762           int font_ascent, font_descent;
   763 
   764           get_font_ascent_descent (font, &font_ascent, &font_descent);
   765           metrics->ascent = font_ascent;
   766           metrics->descent = font_descent;
   767         }
   768       else
   769         {
   770           metrics->ascent = 1;
   771           metrics->descent = 0;
   772         }
   773       metrics->width = metrics->lbearing = metrics->rbearing = 0;
   774     }
   775   for (glyph = lgstring_glyph_addr (gstring, from); from < to; from++, glyph++)
   776     {
   777       int x;
   778 
   779       if (NILP (LGLYPH_ADJUSTMENT (*glyph)))
   780         width += LGLYPH_WIDTH (*glyph);
   781       else
   782         width += LGLYPH_WADJUST (*glyph);
   783       if (metrics)
   784         {
   785           x = metrics->width + LGLYPH_LBEARING (*glyph) + LGLYPH_XOFF (*glyph);
   786           if (metrics->lbearing > x)
   787             metrics->lbearing = x;
   788           x = metrics->width + LGLYPH_RBEARING (*glyph) + LGLYPH_XOFF (*glyph);
   789           if (metrics->rbearing < x)
   790             metrics->rbearing = x;
   791           metrics->width = width;
   792           x = LGLYPH_ASCENT (*glyph) - LGLYPH_YOFF (*glyph);
   793           if (metrics->ascent < x)
   794             metrics->ascent = x;
   795           x = LGLYPH_DESCENT (*glyph) + LGLYPH_YOFF (*glyph);
   796           if (metrics->descent < x)
   797             metrics->descent = x;
   798         }
   799     }
   800   return width;
   801 }
   802 
   803 /* Adjust the width of each grapheme cluster of GSTRING because
   804    zero-width grapheme clusters are not displayed.  If the width is
   805    zero, then the width of the last glyph in the cluster is
   806    incremented.  */
   807 
   808 void
   809 composition_gstring_adjust_zero_width (Lisp_Object gstring)
   810 {
   811   ptrdiff_t from = 0;
   812   int width = 0;
   813 
   814   for (ptrdiff_t i = 0; ; i++)
   815     {
   816       Lisp_Object glyph;
   817 
   818       if (i < LGSTRING_GLYPH_LEN (gstring))
   819         glyph = LGSTRING_GLYPH (gstring, i);
   820       else
   821         glyph = Qnil;
   822 
   823       if (NILP (glyph) || from != LGLYPH_FROM (glyph))
   824         {
   825           eassert (i > 0);
   826           Lisp_Object last = LGSTRING_GLYPH (gstring, i - 1);
   827 
   828           if (width == 0)
   829             {
   830               if (NILP (LGLYPH_ADJUSTMENT (last)))
   831                 LGLYPH_SET_ADJUSTMENT (last,
   832                                        CALLN (Fvector,
   833                                               make_fixnum (0), make_fixnum (0),
   834                                               make_fixnum (LGLYPH_WIDTH (last)
   835                                                            + 1)));
   836               else
   837                 ASET (LGLYPH_ADJUSTMENT (last), 2,
   838                       make_fixnum (LGLYPH_WADJUST (last) + 1));
   839             }
   840           if (NILP (glyph))
   841             break;
   842           from = LGLYPH_FROM (glyph);
   843           width = 0;
   844         }
   845       width += (NILP (LGLYPH_ADJUSTMENT (glyph))
   846                 ? LGLYPH_WIDTH (glyph) : LGLYPH_WADJUST (glyph));
   847     }
   848 }
   849 
   850 
   851 static Lisp_Object gstring_work;
   852 static Lisp_Object gstring_work_headers;
   853 
   854 static Lisp_Object
   855 fill_gstring_header (ptrdiff_t from, ptrdiff_t from_byte,
   856                      ptrdiff_t to, Lisp_Object font_object, Lisp_Object string)
   857 {
   858   ptrdiff_t len = to - from;
   859   if (len == 0)
   860     error ("Attempt to shape zero-length text");
   861   eassume (0 < len);
   862   Lisp_Object header = (len <= 8
   863                         ? AREF (gstring_work_headers, len - 1)
   864                         : make_uninit_vector (len + 1));
   865 
   866   ASET (header, 0, font_object);
   867   for (ptrdiff_t i = 0; i < len; i++)
   868     {
   869       int c
   870         = (NILP (string)
   871            ? fetch_char_advance_no_check (&from, &from_byte)
   872            : fetch_string_char_advance_no_check (string, &from, &from_byte));
   873       ASET (header, i + 1, make_fixnum (c));
   874     }
   875   return header;
   876 }
   877 
   878 static void
   879 fill_gstring_body (Lisp_Object gstring)
   880 {
   881   Lisp_Object font_object = LGSTRING_FONT (gstring);
   882   Lisp_Object header = AREF (gstring, 0);
   883   ptrdiff_t len = LGSTRING_CHAR_LEN (gstring);
   884   ptrdiff_t i;
   885   struct font *font = NULL;
   886   unsigned int code;
   887 
   888   if (FONT_OBJECT_P (font_object))
   889     font = XFONT_OBJECT (font_object);
   890 
   891   for (i = 0; i < len; i++)
   892     {
   893       Lisp_Object g = LGSTRING_GLYPH (gstring, i);
   894       int c = XFIXNAT (AREF (header, i + 1));
   895 
   896       if (NILP (g))
   897         {
   898           g = LGLYPH_NEW ();
   899           LGSTRING_SET_GLYPH (gstring, i, g);
   900         }
   901       LGLYPH_SET_FROM (g, i);
   902       LGLYPH_SET_TO (g, i);
   903       LGLYPH_SET_CHAR (g, c);
   904 
   905       if (font != NULL)
   906         code = font->driver->encode_char (font, LGLYPH_CHAR (g));
   907       else
   908         code = FONT_INVALID_CODE;
   909       if (code != FONT_INVALID_CODE)
   910         {
   911           font_fill_lglyph_metrics (g, font, code);
   912         }
   913       else
   914         {
   915           int width = XFIXNAT (CHAR_TABLE_REF (Vchar_width_table, c));
   916 
   917           LGLYPH_SET_CODE (g, c);
   918           LGLYPH_SET_LBEARING (g, 0);
   919           LGLYPH_SET_RBEARING (g, width);
   920           LGLYPH_SET_WIDTH (g, width);
   921           LGLYPH_SET_ASCENT (g, 1);
   922           LGLYPH_SET_DESCENT (g, 0);
   923         }
   924       LGLYPH_SET_ADJUSTMENT (g, Qnil);
   925     }
   926   len = LGSTRING_GLYPH_LEN (gstring);
   927   for (; i < len; i++)
   928     LGSTRING_SET_GLYPH (gstring, i, Qnil);
   929 }
   930 
   931 
   932 /* Try to compose the characters at CHARPOS according to composition
   933    rule RULE ([PATTERN PREV-CHARS FUNC]).  LIMIT limits the characters
   934    to compose.  STRING, if not nil, is a target string.  WIN is a
   935    window where the characters are being displayed.  CH is the
   936    character that triggered the composition check.  If characters are
   937    successfully composed, return the composition as a glyph-string
   938    object.  Otherwise return nil.  */
   939 
   940 static Lisp_Object
   941 autocmp_chars (Lisp_Object rule, ptrdiff_t charpos, ptrdiff_t bytepos,
   942                ptrdiff_t limit, struct window *win, struct face *face,
   943                Lisp_Object string, Lisp_Object direction, int ch)
   944 {
   945   specpdl_ref count = SPECPDL_INDEX ();
   946   Lisp_Object pos = make_fixnum (charpos);
   947   ptrdiff_t to;
   948   ptrdiff_t pt = PT, pt_byte = PT_BYTE;
   949   Lisp_Object re, font_object, lgstring;
   950   ptrdiff_t len;
   951 
   952   record_unwind_save_match_data ();
   953   re = AREF (rule, 0);
   954   if (NILP (re))
   955     len = 1;
   956   else if (! STRINGP (re))
   957     return unbind_to (count, Qnil);
   958   else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string))
   959            > 0)
   960     {
   961       if (NILP (string))
   962         len = BYTE_TO_CHAR (bytepos + len) - charpos;
   963       else
   964         len = string_byte_to_char (string, bytepos + len) - charpos;
   965     }
   966   if (len <= 0)
   967     return unbind_to (count, Qnil);
   968   to = limit = charpos + len;
   969   font_object = win->frame;
   970 #ifdef HAVE_WINDOW_SYSTEM
   971   struct frame *f = XFRAME (font_object);
   972   if (FRAME_WINDOW_P (f))
   973     {
   974       font_object = font_range (charpos, bytepos, &to, win, face, string, ch);
   975       if (! FONT_OBJECT_P (font_object)
   976           || (! NILP (re)
   977               && to < limit
   978               && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0)))
   979         return unbind_to (count, Qnil);
   980     }
   981 #endif
   982   lgstring = Fcomposition_get_gstring (pos, make_fixnum (to), font_object,
   983                                        string);
   984   if (NILP (LGSTRING_ID (lgstring)))
   985     {
   986       /* Save point as marker before calling out to lisp.  */
   987       if (NILP (string))
   988         record_unwind_protect (restore_point_unwind,
   989                                build_marker (current_buffer, pt, pt_byte));
   990       lgstring = safe_call (7, Vauto_composition_function, AREF (rule, 2),
   991                             pos, make_fixnum (to), font_object, string,
   992                             direction);
   993     }
   994   return unbind_to (count, lgstring);
   995 }
   996 
   997 /* 1 iff the character C is composable.  Characters of general
   998    category Z? or C? are not composable except for ZWNJ and ZWJ,
   999    and characters of category Zs. */
  1000 
  1001 static bool
  1002 char_composable_p (int c)
  1003 {
  1004   Lisp_Object val;
  1005   return (c >= ' '
  1006           && (c == ZERO_WIDTH_NON_JOINER || c == ZERO_WIDTH_JOINER
  1007               /* Per Unicode TR51, these tag characters can be part of
  1008                  Emoji sequences.  */
  1009               || (TAG_SPACE <= c && c <= CANCEL_TAG)
  1010               /* unicode-category-table may not be available during
  1011                  dumping.  */
  1012               || (CHAR_TABLE_P (Vunicode_category_table)
  1013                   && (val = CHAR_TABLE_REF (Vunicode_category_table, c),
  1014                       (FIXNUMP (val)
  1015                        && (XFIXNUM (val) <= UNICODE_CATEGORY_Zs))))));
  1016 }
  1017 
  1018 static inline bool
  1019 inhibit_auto_composition (void)
  1020 {
  1021   if (NILP (Vauto_composition_mode))
  1022     return true;
  1023 
  1024   if (STRINGP (Vauto_composition_mode))
  1025     {
  1026       char *name = tty_type_name (Qnil);
  1027 
  1028       if (name && ! strcmp (SSDATA (Vauto_composition_mode), name))
  1029         return true;
  1030     }
  1031 
  1032   return false;
  1033 }
  1034 
  1035 /* Update cmp_it->stop_pos to the next position after CHARPOS (and
  1036    BYTEPOS) where character composition may happen.  If BYTEPOS is
  1037    negative, compute it.  ENDPOS is a limit of searching.  If it is
  1038    less than CHARPOS, search backward to ENDPOS+1 assuming that
  1039    set_iterator_to_next works in reverse order.  In this case, if a
  1040    composition closest to CHARPOS is found, set cmp_it->stop_pos to
  1041    the last character of the composition.  STRING, if non-nil, is
  1042    the string (as opposed to a buffer) whose characters should be
  1043    tested for being composable.  INCLUDE_STATIC non-zero means
  1044    consider both static and automatic compositions; if zero, look
  1045    only for potential automatic compositions.
  1046 
  1047    If no composition is found, set cmp_it->ch to -2.  If a static
  1048    composition is found, set cmp_it->ch to -1.  Otherwise, set
  1049    cmp_it->ch to the character that triggers the automatic
  1050    composition.  */
  1051 
  1052 void
  1053 composition_compute_stop_pos (struct composition_it *cmp_it, ptrdiff_t charpos,
  1054                               ptrdiff_t bytepos, ptrdiff_t endpos,
  1055                               Lisp_Object string, bool include_static)
  1056 {
  1057   ptrdiff_t start, end;
  1058   int c;
  1059   Lisp_Object prop, val;
  1060   /* This is from forward_to_next_line_start in xdisp.c.  */
  1061   const int MAX_NEWLINE_DISTANCE = 500;
  1062 
  1063   if (charpos < endpos)
  1064     {
  1065       if (endpos > charpos + MAX_NEWLINE_DISTANCE)
  1066         endpos = charpos + MAX_NEWLINE_DISTANCE;
  1067     }
  1068   else if (endpos < charpos)
  1069     {
  1070       /* We search backward for a position to check composition.  */
  1071       if (endpos < 0)
  1072         {
  1073           /* But we don't know where to stop the searching.  */
  1074           endpos = NILP (string) ? BEGV - 1 : -1;
  1075           /* Usually we don't reach ENDPOS because we stop searching
  1076              at an uncomposable character (NL, LRE, etc).  In buffers
  1077              with long lines, however, NL might be far away, so
  1078              pretend that the buffer is smaller.  */
  1079           if (current_buffer->long_line_optimizations_p)
  1080             endpos = get_small_narrowing_begv (cmp_it->parent_it->w, charpos);
  1081         }
  1082     }
  1083   cmp_it->id = -1;
  1084   cmp_it->ch = -2;
  1085   cmp_it->reversed_p = 0;
  1086   cmp_it->stop_pos = endpos;
  1087   if (charpos == endpos)
  1088     return;
  1089   /* Look for static compositions.  */
  1090   /* FIXME: Bidi is not yet handled well in static composition.  */
  1091   if (include_static
  1092       && charpos < endpos
  1093       && find_composition (charpos, endpos, &start, &end, &prop, string)
  1094       && start >= charpos
  1095       && composition_valid_p (start, end, prop))
  1096     {
  1097       cmp_it->stop_pos = endpos = start;
  1098       cmp_it->ch = -1;
  1099     }
  1100   if ((NILP (string)
  1101        && NILP (BVAR (current_buffer, enable_multibyte_characters)))
  1102       || (STRINGP (string) && !STRING_MULTIBYTE (string))
  1103       || inhibit_auto_composition ())
  1104     return;
  1105   if (bytepos < 0)
  1106     {
  1107       if (NILP (string))
  1108         bytepos = CHAR_TO_BYTE (charpos);
  1109       else
  1110         bytepos = string_char_to_byte (string, charpos);
  1111     }
  1112 
  1113   /* Look for automatic compositions.  */
  1114   start = charpos;
  1115   if (charpos < endpos)
  1116     {
  1117       /* Forward search.  */
  1118       while (charpos < endpos)
  1119         {
  1120           c = (STRINGP (string)
  1121                ? fetch_string_char_advance (string, &charpos, &bytepos)
  1122                : fetch_char_advance (&charpos, &bytepos));
  1123           if (c == '\n')
  1124             {
  1125               cmp_it->ch = -2;
  1126               break;
  1127             }
  1128           val = CHAR_TABLE_REF (Vcomposition_function_table, c);
  1129           if (! NILP (val))
  1130             {
  1131               for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
  1132                 {
  1133                   Lisp_Object elt = XCAR (val);
  1134                   if (VECTORP (elt) && ASIZE (elt) == 3
  1135                       && FIXNATP (AREF (elt, 1))
  1136                       && charpos - 1 - XFIXNAT (AREF (elt, 1)) >= start)
  1137                     {
  1138                       cmp_it->rule_idx = ridx;
  1139                       cmp_it->lookback = XFIXNAT (AREF (elt, 1));
  1140                       cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
  1141                       cmp_it->ch = c;
  1142                       return;
  1143                     }
  1144                 }
  1145             }
  1146         }
  1147       if (charpos == endpos
  1148           && !(STRINGP (string) && endpos == SCHARS (string)))
  1149         {
  1150           /* We couldn't find a composition point before ENDPOS.  But,
  1151              some character after ENDPOS may be composed with
  1152              characters before ENDPOS.  So, we should stop at the safe
  1153              point.  */
  1154           charpos = endpos - MAX_AUTO_COMPOSITION_LOOKBACK;
  1155           if (charpos < start)
  1156             charpos = start;
  1157         }
  1158     }
  1159   else if (charpos > endpos)
  1160     {
  1161       /* Search backward for a pattern that may be composed and the
  1162          position of (possibly) the last character of the match is
  1163          closest to (but not after) START.  The reason for the last
  1164          character is that set_iterator_to_next works in reverse order,
  1165          and thus we must stop at the last character for composition
  1166          check.  */
  1167       unsigned char *p;
  1168       int len;
  1169       /* Limit byte position used in fast_looking_at.  This is the
  1170          byte position of the character after START. */
  1171       ptrdiff_t limit;
  1172 
  1173       if (NILP (string))
  1174         p = BYTE_POS_ADDR (bytepos);
  1175       else
  1176         p = SDATA (string) + bytepos;
  1177       c = string_char_and_length (p, &len);
  1178       limit = bytepos + len;
  1179       while (char_composable_p (c))
  1180         {
  1181           val = CHAR_TABLE_REF (Vcomposition_function_table, c);
  1182           for (EMACS_INT ridx = 0; CONSP (val); val = XCDR (val), ridx++)
  1183             {
  1184               Lisp_Object elt = XCAR (val);
  1185               if (VECTORP (elt) && ASIZE (elt) == 3
  1186                   && FIXNATP (AREF (elt, 1))
  1187                   && charpos - XFIXNAT (AREF (elt, 1)) > endpos)
  1188                 {
  1189                   ptrdiff_t back = XFIXNAT (AREF (elt, 1));
  1190                   ptrdiff_t cpos = charpos - back, bpos;
  1191 
  1192                   if (back == 0)
  1193                     bpos = bytepos;
  1194                   else
  1195                     bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
  1196                             : string_char_to_byte (string, cpos));
  1197                   ptrdiff_t blen
  1198                     = (STRINGP (AREF (elt, 0))
  1199                        ? fast_looking_at (AREF (elt, 0), cpos, bpos,
  1200                                           start + 1, limit, string)
  1201                        : 1);
  1202                   if (blen > 0)
  1203                     {
  1204                       /* Make CPOS point to the last character of
  1205                          match.  Note that BLEN is byte-length.  */
  1206                       if (blen > 1)
  1207                         {
  1208                           bpos += blen;
  1209                           if (NILP (string))
  1210                             cpos = BYTE_TO_CHAR (bpos) - 1;
  1211                           else
  1212                             cpos = string_byte_to_char (string, bpos) - 1;
  1213                         }
  1214                       back = cpos - (charpos - back);
  1215                       if (cmp_it->stop_pos < cpos
  1216                           || (cmp_it->stop_pos == cpos
  1217                               && cmp_it->lookback < back))
  1218                         {
  1219                           cmp_it->rule_idx = ridx;
  1220                           cmp_it->stop_pos = cpos;
  1221                           cmp_it->ch = c;
  1222                           cmp_it->lookback = back;
  1223                           cmp_it->nchars = back + 1;
  1224                         }
  1225                     }
  1226                 }
  1227             }
  1228           if (charpos - 1 == endpos)
  1229             break;
  1230           if (STRINGP (string))
  1231             {
  1232               p--, bytepos--;
  1233               while (! CHAR_HEAD_P (*p))
  1234                 p--, bytepos--;
  1235               charpos--;
  1236             }
  1237           else
  1238             {
  1239               dec_both (&charpos, &bytepos);
  1240               p = BYTE_POS_ADDR (bytepos);
  1241             }
  1242           c = STRING_CHAR (p);
  1243         }
  1244       if (cmp_it->ch >= 0)
  1245         /* We found a position to check.  */
  1246         return;
  1247       /* Skip all uncomposable characters.  */
  1248       if (NILP (string))
  1249         {
  1250           while (charpos - 1 > endpos && ! char_composable_p (c))
  1251             {
  1252               dec_both (&charpos, &bytepos);
  1253               c = FETCH_MULTIBYTE_CHAR (bytepos);
  1254             }
  1255         }
  1256       else
  1257         {
  1258           while (charpos - 1 > endpos && ! char_composable_p (c))
  1259             {
  1260               p--;
  1261               while (! CHAR_HEAD_P (*p))
  1262                 p--;
  1263               charpos--;
  1264               c = STRING_CHAR (p);
  1265             }
  1266         }
  1267     }
  1268   cmp_it->stop_pos = charpos;
  1269 }
  1270 
  1271 /* Check if the character at CHARPOS (and BYTEPOS) is composed
  1272    (possibly with the following characters) on window W.  ENDPOS limits
  1273    characters to be composed.  FACE, if non-NULL, is a base face of
  1274    the character.  If STRING is not nil, it is a string containing the
  1275    character to check, and CHARPOS and BYTEPOS are indices in the
  1276    string.  In that case, FACE must not be NULL.  BIDI_LEVEL is the bidi
  1277    embedding level of the current paragraph, and is used to calculate the
  1278    direction argument to pass to the font shaper; value of -1 means the
  1279    caller doesn't know the embedding level (used by callers which didn't
  1280    invoke the display routines that perform bidi-display-reordering).
  1281 
  1282    If the character is composed, setup members of CMP_IT (id, nglyphs,
  1283    from, to, reversed_p), and return true.  Otherwise, update
  1284    CMP_IT->stop_pos, and return false.  */
  1285 
  1286 bool
  1287 composition_reseat_it (struct composition_it *cmp_it, ptrdiff_t charpos,
  1288                        ptrdiff_t bytepos, ptrdiff_t endpos, struct window *w,
  1289                        signed char bidi_level, struct face *face, Lisp_Object string)
  1290 {
  1291   if (cmp_it->ch == -2)
  1292     {
  1293       composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string,
  1294                                     true);
  1295       if (cmp_it->ch == -2 || cmp_it->stop_pos != charpos)
  1296         /* The current position is not composed.  */
  1297         return 0;
  1298     }
  1299 
  1300   if (endpos < 0)
  1301     endpos = NILP (string) ? BEGV : 0;
  1302 
  1303   if (cmp_it->ch < 0)
  1304     {
  1305       /* We are looking at a static composition.  */
  1306       ptrdiff_t start, end;
  1307       Lisp_Object prop;
  1308 
  1309       find_composition (charpos, -1, &start, &end, &prop, string);
  1310       cmp_it->id = get_composition_id (charpos, bytepos, end - start,
  1311                                        prop, string);
  1312       if (cmp_it->id < 0)
  1313         goto no_composition;
  1314       cmp_it->nchars = end - start;
  1315       cmp_it->nglyphs = composition_table[cmp_it->id]->glyph_len;
  1316     }
  1317   else if (w)
  1318     {
  1319       Lisp_Object lgstring = Qnil;
  1320       Lisp_Object val, elt, direction = Qnil;
  1321 
  1322       val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
  1323       for (EMACS_INT i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val))
  1324         continue;
  1325       if (charpos < endpos)
  1326         {
  1327           if (bidi_level < 0)
  1328             direction = Qnil;
  1329           else if ((bidi_level & 1) == 0)
  1330             direction = QL2R;
  1331           else
  1332             direction = QR2L;
  1333           for (; CONSP (val); val = XCDR (val))
  1334             {
  1335               elt = XCAR (val);
  1336               if (! VECTORP (elt) || ASIZE (elt) != 3
  1337                   || ! FIXNUMP (AREF (elt, 1)))
  1338                 continue;
  1339               if (XFIXNAT (AREF (elt, 1)) != cmp_it->lookback)
  1340                 goto no_composition;
  1341               lgstring = autocmp_chars (elt, charpos, bytepos, endpos,
  1342                                         w, face, string, direction, cmp_it->ch);
  1343               if (composition_gstring_p (lgstring))
  1344                 break;
  1345               lgstring = Qnil;
  1346               /* Composition failed perhaps because the font doesn't
  1347                  support sufficient range of characters.  Try the
  1348                  other composition rules if any.  */
  1349             }
  1350           cmp_it->reversed_p = 0;
  1351         }
  1352       else
  1353         {
  1354           ptrdiff_t cpos = charpos, bpos = bytepos;
  1355 
  1356           cmp_it->reversed_p = 1;
  1357           elt = XCAR (val);
  1358           if (cmp_it->lookback > 0)
  1359             {
  1360               cpos = charpos - cmp_it->lookback;
  1361               /* Reject the composition if it starts before ENDPOS,
  1362                  which here can only happen if
  1363                  composition-break-at-point is non-nil and point is
  1364                  inside the composition.  */
  1365               if (cpos < endpos)
  1366                 {
  1367                   eassert (composition_break_at_point);
  1368                   eassert (endpos == PT);
  1369                   goto no_composition;
  1370                 }
  1371               if (STRINGP (string))
  1372                 bpos = string_char_to_byte (string, cpos);
  1373               else
  1374                 bpos = CHAR_TO_BYTE (cpos);
  1375             }
  1376           /* The bidi_level < 0 case below strictly speaking should
  1377              never happen, since we get here when bidi scan direction
  1378              is backward in the buffer, which can only happen if the
  1379              display routines were called to perform the bidi
  1380              reordering.  But it doesn't harm to test for that, and
  1381              avoid someone raising their brows and thinking it's a
  1382              subtle bug...  */
  1383           if (bidi_level < 0)
  1384             direction = Qnil;
  1385           else if ((bidi_level & 1) == 0)
  1386             direction = QL2R;
  1387           else
  1388             direction = QR2L;
  1389           lgstring = autocmp_chars (elt, cpos, bpos, charpos + 1, w, face,
  1390                                     string, direction, cmp_it->ch);
  1391           if (! composition_gstring_p (lgstring)
  1392               || cpos + LGSTRING_CHAR_LEN (lgstring) - 1 != charpos)
  1393             /* Composition failed or didn't cover the current
  1394                character.  */
  1395             goto no_composition;
  1396         }
  1397       if (NILP (lgstring))
  1398         goto no_composition;
  1399       if (NILP (LGSTRING_ID (lgstring)))
  1400         lgstring = composition_gstring_put_cache (lgstring, -1);
  1401       cmp_it->id = XFIXNUM (LGSTRING_ID (lgstring));
  1402       int i;
  1403       for (i = 0; i < LGSTRING_GLYPH_LEN (lgstring); i++)
  1404         if (NILP (LGSTRING_GLYPH (lgstring, i)))
  1405           break;
  1406       cmp_it->nglyphs = i;
  1407       cmp_it->from = 0;
  1408       cmp_it->to = i;
  1409     }
  1410   else
  1411     goto no_composition;
  1412   return 1;
  1413 
  1414  no_composition:
  1415   if (charpos == endpos)
  1416     return 0;
  1417   if (charpos < endpos)
  1418     {
  1419       charpos++;
  1420       if (NILP (string))
  1421         bytepos += next_char_len (bytepos);
  1422       else
  1423         bytepos += BYTES_BY_CHAR_HEAD (*(SDATA (string) + bytepos));
  1424     }
  1425   else
  1426     {
  1427       charpos--;
  1428       /* BYTEPOS is calculated in composition_compute_stop_pos */
  1429       bytepos = -1;
  1430     }
  1431   if (cmp_it->reversed_p)
  1432     endpos = -1;
  1433   composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string, true);
  1434   return 0;
  1435 }
  1436 
  1437 /* Update charpos, nchars, nbytes, and width of the current grapheme
  1438    cluster.
  1439 
  1440    If the composition is static or automatic in L2R context, the
  1441    cluster is identified by CMP_IT->from, and CHARPOS is the position
  1442    of the first character of the cluster.  In this case, update
  1443    CMP_IT->to too.
  1444 
  1445    If the composition is automatic in R2L context, the cluster is
  1446    identified by CMP_IT->to, and CHARPOS is the position of the last
  1447    character of the cluster.  In this case, update CMP_IT->from too.
  1448 
  1449    The return value is the character code of the first character of
  1450    the cluster, or -1 if the composition is somehow broken.  */
  1451 
  1452 int
  1453 composition_update_it (struct composition_it *cmp_it, ptrdiff_t charpos, ptrdiff_t bytepos, Lisp_Object string)
  1454 {
  1455   int i;
  1456   int c UNINIT;
  1457 
  1458   if (cmp_it->ch < 0)
  1459     {
  1460       /* static composition */
  1461       struct composition *cmp = composition_table[cmp_it->id];
  1462 
  1463       cmp_it->charpos = charpos;
  1464       cmp_it->to = cmp_it->nglyphs;
  1465       if (cmp_it->nglyphs == 0)
  1466         c = -1;
  1467       else
  1468         {
  1469           for (i = 0; i < cmp->glyph_len; i++)
  1470             /* TAB in a composition means display glyphs with padding
  1471                space on the left or right.  */
  1472             if ((c = COMPOSITION_GLYPH (cmp, i)) != '\t')
  1473               break;
  1474           if (c == '\t')
  1475             c = ' ';
  1476         }
  1477       cmp_it->width = cmp->width;
  1478       charpos += cmp_it->nchars;
  1479       if (STRINGP (string))
  1480         cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos;
  1481       else
  1482         cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos;
  1483     }
  1484   else
  1485     {
  1486       /* Automatic composition.  */
  1487       Lisp_Object gstring = composition_gstring_from_id (cmp_it->id);
  1488       Lisp_Object glyph;
  1489       ptrdiff_t from;
  1490 
  1491       if (cmp_it->nglyphs == 0)
  1492         {
  1493           cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
  1494           cmp_it->width = 0;
  1495           cmp_it->from = cmp_it->to = 0;
  1496           return -1;
  1497         }
  1498       if (! cmp_it->reversed_p)
  1499         {
  1500           glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
  1501           from = LGLYPH_FROM (glyph);
  1502           for (cmp_it->to = cmp_it->from + 1; cmp_it->to < cmp_it->nglyphs;
  1503                cmp_it->to++)
  1504             {
  1505               glyph = LGSTRING_GLYPH (gstring, cmp_it->to);
  1506               if (LGLYPH_FROM (glyph) != from)
  1507                 break;
  1508             }
  1509           cmp_it->charpos = charpos;
  1510         }
  1511       else
  1512         {
  1513           glyph = LGSTRING_GLYPH (gstring, cmp_it->to - 1);
  1514           from = LGLYPH_FROM (glyph);
  1515           cmp_it->charpos = charpos - (LGLYPH_TO (glyph) - from);
  1516           for (cmp_it->from = cmp_it->to - 1; cmp_it->from > 0;
  1517                cmp_it->from--)
  1518             {
  1519               glyph = LGSTRING_GLYPH (gstring, cmp_it->from - 1);
  1520               if (LGLYPH_FROM (glyph) != from)
  1521                 break;
  1522             }
  1523         }
  1524       glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
  1525       cmp_it->nchars = LGLYPH_TO (glyph) + 1 - from;
  1526       cmp_it->nbytes = 0;
  1527       cmp_it->width = 0;
  1528       for (i = cmp_it->nchars - 1; i >= 0; i--)
  1529         {
  1530           c = XFIXNUM (LGSTRING_CHAR (gstring, from + i));
  1531           cmp_it->nbytes += CHAR_BYTES (c);
  1532           cmp_it->width += CHARACTER_WIDTH (c);
  1533         }
  1534     }
  1535   return c;
  1536 }
  1537 
  1538 
  1539 struct position_record
  1540 {
  1541   ptrdiff_t pos, pos_byte;
  1542   unsigned char *p;
  1543 };
  1544 
  1545 /* Update the members of POSITION to the next character boundary.  */
  1546 #define FORWARD_CHAR(POSITION, STOP)                                    \
  1547   do {                                                                  \
  1548     (POSITION).pos++;                                                   \
  1549     if ((POSITION).pos == (STOP))                                       \
  1550       {                                                                 \
  1551         (POSITION).p = GAP_END_ADDR;                                    \
  1552         (POSITION).pos_byte = GPT_BYTE;                                 \
  1553       }                                                                 \
  1554     else                                                                \
  1555       {                                                                 \
  1556         (POSITION).pos_byte += BYTES_BY_CHAR_HEAD (*((POSITION).p));    \
  1557         (POSITION).p += BYTES_BY_CHAR_HEAD (*((POSITION).p));           \
  1558       }                                                                 \
  1559   } while (0)
  1560 
  1561 /* Update the members of POSITION to the previous character boundary.  */
  1562 #define BACKWARD_CHAR(POSITION, STOP)           \
  1563   do {                                          \
  1564     if ((POSITION).pos == (STOP))               \
  1565       (POSITION).p = GPT_ADDR;                  \
  1566     do {                                        \
  1567       (POSITION).pos_byte--;                    \
  1568       (POSITION).p--;                           \
  1569     } while (! CHAR_HEAD_P (*((POSITION).p)));  \
  1570     (POSITION).pos--;                           \
  1571   } while (0)
  1572 
  1573 /* Similar to find_composition, but find an automatic composition instead.
  1574 
  1575    This function looks for automatic composition at or near position
  1576    POS of STRING object, either a buffer or a Lisp string.  If STRING
  1577    is nil, it defaults to the current buffer.  It must be assured that
  1578    POS is not within a static composition.  Also, the current buffer
  1579    must be displayed in some window, otherwise the function will
  1580    return FALSE.
  1581 
  1582    If LIMIT is negative, and there's no composition that includes POS
  1583    (i.e. starts at or before POS and ends at or after POS), return
  1584    FALSE.  In this case, the function is allowed to look from POS as
  1585    far back as BACKLIM, and as far forward as POS+1 plus
  1586    MAX_AUTO_COMPOSITION_LOOKBACK, the maximum number of look-back for
  1587    automatic compositions (3) -- this is a limitation imposed by
  1588    composition rules in composition-function-table, which see.  If
  1589    BACKLIM is negative, it stands for the beginning of STRING object:
  1590    BEGV for a buffer or position zero for a string.
  1591 
  1592    If LIMIT is positive, search for a composition forward (LIMIT >
  1593    POS) or backward (LIMIT < POS).  In this case, LIMIT bounds the
  1594    search for the first character of a composed sequence.
  1595    (LIMIT == POS is the same as LIMIT < 0.)  If LIMIT > POS, the
  1596    function can find a composition that starts after POS.
  1597 
  1598    BACKLIM limits how far back is the function allowed to look in
  1599    STRING object while trying to find a position where it is safe to
  1600    start searching forward for compositions.  Such a safe place is
  1601    generally the position after a character that can never be
  1602    composed.
  1603 
  1604    If BACKLIM is negative, that means the first character position of
  1605    STRING object; this is useful when calling the function for the
  1606    first time for a given buffer or string, since it is possible that
  1607    a composition begins before POS.  However, if POS is very far from
  1608    the beginning of STRING object, a negative value of BACKLIM could
  1609    make the function slow.  For that reason, when STRING is a buffer
  1610    or nil, we restrict the search back to the first newline before
  1611    POS.  Also, in this case the function may return START and END that
  1612    do not include POS, something that is not necessarily wanted, and
  1613    needs to be explicitly checked by the caller.
  1614 
  1615    When calling the function in a loop for the same buffer/string, the
  1616    caller should generally set BACKLIM equal to POS, to avoid costly
  1617    repeated searches backward.  This is because if the previous
  1618    positions were already checked for compositions, there should be no
  1619    reason to re-check them.
  1620 
  1621    If BACKLIM is positive, it must be less or equal to LIMIT.
  1622 
  1623    If an automatic composition satisfying the above conditions is
  1624    found, set *GSTRING to the Lispy glyph-string representing the
  1625    composition, set *START and *END to the start and end of the
  1626    composed sequence, and return TRUE.  Otherwise, set *GSTRING to
  1627    nil, and return FALSE.  */
  1628 
  1629 bool
  1630 find_automatic_composition (ptrdiff_t pos, ptrdiff_t limit, ptrdiff_t backlim,
  1631                             ptrdiff_t *start, ptrdiff_t *end,
  1632                             Lisp_Object *gstring, Lisp_Object string)
  1633 {
  1634   ptrdiff_t head, tail, stop;
  1635   /* Forward limit position of checking a composition taking a
  1636      looking-back count into account.  */
  1637   ptrdiff_t fore_check_limit;
  1638   struct position_record cur, prev;
  1639   int c;
  1640   Lisp_Object window;
  1641   struct window *w;
  1642   bool need_adjustment = 0;
  1643 
  1644   window = Fget_buffer_window (Fcurrent_buffer (), Qnil);
  1645   if (NILP (window))
  1646     return 0;
  1647   w = XWINDOW (window);
  1648 
  1649   cur.pos = pos;
  1650   if (NILP (string))
  1651     {
  1652       if (backlim < 0)
  1653         {
  1654           /* This assumes a newline can never be composed.  */
  1655           head = find_newline (pos, -1, 0, -1, -1, NULL, NULL, false);
  1656         }
  1657       else
  1658         head = backlim;
  1659       if (current_buffer->long_line_optimizations_p)
  1660         {
  1661           /* In buffers with very long lines, this function becomes very
  1662              slow.  Pretend that the buffer is narrowed to make it fast.  */
  1663           ptrdiff_t begv = get_small_narrowing_begv (w, window_point (w));
  1664           if (pos > begv)
  1665             head = begv;
  1666         }
  1667       tail = ZV;
  1668       stop = GPT;
  1669       cur.pos_byte = CHAR_TO_BYTE (cur.pos);
  1670       cur.p = BYTE_POS_ADDR (cur.pos_byte);
  1671     }
  1672   else
  1673     {
  1674       head = backlim < 0 ? 0 : backlim, tail = SCHARS (string), stop = -1;
  1675       cur.pos_byte = string_char_to_byte (string, cur.pos);
  1676       cur.p = SDATA (string) + cur.pos_byte;
  1677     }
  1678   if (limit < 0)
  1679     /* Finding a composition covering the character after POS is the
  1680        same as setting LIMIT to POS.  */
  1681     limit = pos;
  1682 
  1683   eassert (backlim < 0 || backlim <= limit);
  1684 
  1685   if (limit <= pos)
  1686     fore_check_limit = min (tail, pos + 1 + MAX_AUTO_COMPOSITION_LOOKBACK);
  1687   else
  1688     fore_check_limit = min (tail, limit + MAX_AUTO_COMPOSITION_LOOKBACK);
  1689 
  1690   /* Provided that we have these possible compositions now:
  1691 
  1692            POS: 1 2 3 4 5 6 7 8 9
  1693                         |-A-|
  1694                   |-B-|-C-|--D--|
  1695 
  1696      Here, it is known that characters after positions 1 and 9 can
  1697      never be composed (i.e. ! char_composable_p (CH)), and
  1698      composition A is an invalid one because it's partially covered by
  1699      the valid composition C.  And to know whether a composition is
  1700      valid or not, the only way is to start searching forward from a
  1701      position that can not be a tail part of composition (it's 2 in
  1702      the above case).
  1703 
  1704      Now we have these cases (1 through 4):
  1705 
  1706                    -- character after POS is ... --
  1707                    not composable         composable
  1708      LIMIT <= POS  (1)                    (3)
  1709      POS < LIMIT   (2)                    (4)
  1710 
  1711      Among them, in case (2), we simply search forward from POS.
  1712 
  1713      In the other cases, we at first rewind back to the position where
  1714      the previous character is not composable or the beginning of
  1715      buffer (string), then search compositions forward.  In case (1)
  1716      and (3) we repeat this process until a composition is found.  */
  1717 
  1718   while (1)
  1719     {
  1720       c = STRING_CHAR (cur.p);
  1721       if (! char_composable_p (c))
  1722         {
  1723           if (limit <= pos)     /* case (1)  */
  1724             {
  1725               do {
  1726                 if (cur.pos <= limit)
  1727                   return 0;
  1728                 BACKWARD_CHAR (cur, stop);
  1729                 c = STRING_CHAR (cur.p);
  1730               } while (! char_composable_p (c));
  1731               fore_check_limit = cur.pos + 1;
  1732             }
  1733           else                  /* case (2) */
  1734             /* No need of rewinding back.  */
  1735             goto search_forward;
  1736         }
  1737 
  1738       /* Rewind back to the position where we can safely search
  1739          forward for compositions.  It is assured that the character
  1740          at cur.pos is composable.  */
  1741       while (head < cur.pos)
  1742         {
  1743           prev = cur;
  1744           BACKWARD_CHAR (cur, stop);
  1745           c = STRING_CHAR (cur.p);
  1746           if (! char_composable_p (c))
  1747             {
  1748               cur = prev;
  1749               break;
  1750             }
  1751         }
  1752 
  1753     search_forward:
  1754       /* Now search forward. */
  1755       *gstring = Qnil;
  1756       prev = cur;       /* remember the start of searching position. */
  1757       while (cur.pos < fore_check_limit)
  1758         {
  1759           Lisp_Object val;
  1760 
  1761           c = STRING_CHAR (cur.p);
  1762           for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
  1763                CONSP (val); val = XCDR (val))
  1764             {
  1765               Lisp_Object elt = XCAR (val);
  1766 
  1767               if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
  1768                 {
  1769                   EMACS_INT check_pos = cur.pos - XFIXNAT (AREF (elt, 1));
  1770                   struct position_record check;
  1771 
  1772                   if (check_pos < head
  1773                       || (limit <= pos ? pos < check_pos
  1774                           : limit <= check_pos))
  1775                     continue;
  1776                   for (check = cur; check_pos < check.pos; )
  1777                     BACKWARD_CHAR (check, stop);
  1778                   *gstring = autocmp_chars (elt, check.pos, check.pos_byte,
  1779                                             tail, w, NULL, string, Qnil, c);
  1780                   need_adjustment = 1;
  1781                   if (NILP (*gstring))
  1782                     {
  1783                       /* As we have called Lisp, there's a possibility
  1784                          that buffer/string is relocated.  */
  1785                       if (NILP (string))
  1786                         cur.p  = BYTE_POS_ADDR (cur.pos_byte);
  1787                       else
  1788                         cur.p = SDATA (string) + cur.pos_byte;
  1789                     }
  1790                   else
  1791                     {
  1792                       /* We found a candidate of a target composition.  */
  1793                       *start = check.pos;
  1794                       *end = check.pos + LGSTRING_CHAR_LEN (*gstring);
  1795                       if (pos < limit
  1796                           ? pos < *end
  1797                           : *start <= pos && pos < *end)
  1798                         /* This is the target composition. */
  1799                         return 1;
  1800                       cur.pos = *end;
  1801                       if (NILP (string))
  1802                         {
  1803                           cur.pos_byte = CHAR_TO_BYTE (cur.pos);
  1804                           cur.p  = BYTE_POS_ADDR (cur.pos_byte);
  1805                         }
  1806                       else
  1807                         {
  1808                           cur.pos_byte = string_char_to_byte (string, cur.pos);
  1809                           cur.p = SDATA (string) + cur.pos_byte;
  1810                         }
  1811                       break;
  1812                     }
  1813                 }
  1814             }
  1815           if (! CONSP (val))
  1816             /* We found no composition here.  */
  1817             FORWARD_CHAR (cur, stop);
  1818         }
  1819 
  1820       if (pos < limit)          /* case (2) and (4)*/
  1821         return 0;
  1822       if (! NILP (*gstring))
  1823         return 1;
  1824       if (prev.pos == head)
  1825         return 0;
  1826       cur = prev;
  1827       if (need_adjustment)
  1828         {
  1829           if (NILP (string))
  1830             cur.p  = BYTE_POS_ADDR (cur.pos_byte);
  1831           else
  1832             cur.p = SDATA (string) + cur.pos_byte;
  1833         }
  1834       BACKWARD_CHAR (cur, stop);
  1835     }
  1836 }
  1837 
  1838 /* Return the adjusted point provided that point is moved from LAST_PT
  1839    to NEW_PT.  */
  1840 
  1841 ptrdiff_t
  1842 composition_adjust_point (ptrdiff_t last_pt, ptrdiff_t new_pt)
  1843 {
  1844   ptrdiff_t i, beg, end;
  1845   Lisp_Object val;
  1846 
  1847   if (new_pt == BEGV || new_pt == ZV)
  1848     return new_pt;
  1849 
  1850   /* At first check the static composition. */
  1851   if (get_property_and_range (new_pt, Qcomposition, &val, &beg, &end, Qnil)
  1852       && composition_valid_p (beg, end, val))
  1853     {
  1854       if (beg < new_pt /* && end > new_pt   <- It's always the case.  */
  1855           && (last_pt <= beg || last_pt >= end))
  1856         return (new_pt < last_pt ? beg : end);
  1857       return new_pt;
  1858     }
  1859 
  1860   if (NILP (BVAR (current_buffer, enable_multibyte_characters))
  1861       || inhibit_auto_composition ())
  1862     return new_pt;
  1863 
  1864   /* Next check the automatic composition.  */
  1865   if (! find_automatic_composition (new_pt, (ptrdiff_t) -1, (ptrdiff_t) -1,
  1866                                     &beg, &end, &val, Qnil)
  1867       || beg == new_pt)
  1868     return new_pt;
  1869   for (i = 0; i < LGSTRING_GLYPH_LEN (val); i++)
  1870     {
  1871       Lisp_Object glyph = LGSTRING_GLYPH (val, i);
  1872 
  1873       if (NILP (glyph))
  1874         break;
  1875       if (beg + LGLYPH_FROM (glyph) == new_pt)
  1876         return new_pt;
  1877       if (beg + LGLYPH_TO (glyph) >= new_pt)
  1878         return (new_pt < last_pt
  1879                 ? beg + LGLYPH_FROM (glyph)
  1880                 : beg + LGLYPH_TO (glyph) + 1);
  1881     }
  1882   return new_pt;
  1883 }
  1884 
  1885 DEFUN ("composition-get-gstring", Fcomposition_get_gstring,
  1886        Scomposition_get_gstring, 4, 4, 0,
  1887        doc: /* Return a glyph-string for characters between FROM and TO.
  1888 If the glyph string is for graphic display, FONT-OBJECT must be
  1889 a font-object to use for those characters.
  1890 Otherwise (for terminal display), FONT-OBJECT must be a terminal ID, a
  1891 frame, or nil for the selected frame's terminal device.
  1892 
  1893 If the optional 4th argument STRING is not nil, it is a string
  1894 containing the target characters between indices FROM and TO,
  1895 which are treated as in `substring'.  Otherwise FROM and TO are
  1896 character positions in current buffer; they can be in either order,
  1897 and can be integers or markers.
  1898 
  1899 A glyph-string is a vector containing information about how to display
  1900 a specific character sequence.  The format is:
  1901    [HEADER ID GLYPH ...]
  1902 
  1903 HEADER is a vector of this form:
  1904     [FONT-OBJECT CHAR ...]
  1905 where
  1906     FONT-OBJECT is a font-object for all glyphs in the glyph-string,
  1907     or the terminal coding system of the specified terminal.
  1908     CHARs are characters to be composed by GLYPHs.
  1909 
  1910 ID is an identification number of the glyph-string.  It may be nil if
  1911 not yet shaped.
  1912 
  1913 GLYPH is a vector whose elements have this form:
  1914     [ FROM-IDX TO-IDX C CODE WIDTH LBEARING RBEARING ASCENT DESCENT
  1915       [ [X-OFF Y-OFF WADJUST] | nil] ]
  1916 where
  1917     FROM-IDX and TO-IDX are used internally and should not be touched.
  1918     C is the character of the glyph.
  1919     CODE is the glyph-code of C in FONT-OBJECT.
  1920     WIDTH thru DESCENT are the metrics (in pixels) of the glyph.
  1921     X-OFF and Y-OFF are offsets to the base position for the glyph.
  1922     WADJUST is the adjustment to the normal width of the glyph.
  1923 
  1924 If GLYPH is nil, the remaining elements of the glyph-string vector
  1925 should be ignored.  */)
  1926   (Lisp_Object from, Lisp_Object to, Lisp_Object font_object, Lisp_Object string)
  1927 {
  1928   Lisp_Object gstring, header;
  1929   ptrdiff_t frompos, frombyte, topos;
  1930 
  1931   if (! FONT_OBJECT_P (font_object))
  1932     {
  1933       struct coding_system *coding;
  1934       struct terminal *terminal = decode_live_terminal (font_object);
  1935 
  1936       coding = ((TERMINAL_TERMINAL_CODING (terminal)->common_flags
  1937                  & CODING_REQUIRE_ENCODING_MASK)
  1938                 ? TERMINAL_TERMINAL_CODING (terminal) : &safe_terminal_coding);
  1939       font_object = CODING_ID_NAME (coding->id);
  1940     }
  1941 
  1942   if (NILP (string))
  1943     {
  1944       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
  1945         error ("Attempt to shape unibyte text");
  1946       validate_region (&from, &to);
  1947       frompos = XFIXNAT (from);
  1948       topos = XFIXNAT (to);
  1949       frombyte = CHAR_TO_BYTE (frompos);
  1950     }
  1951   else
  1952     {
  1953       CHECK_STRING (string);
  1954       ptrdiff_t chars = SCHARS (string);
  1955       validate_subarray (string, from, to, chars, &frompos, &topos);
  1956       if (! STRING_MULTIBYTE (string))
  1957         {
  1958           ptrdiff_t i;
  1959 
  1960           for (i = SBYTES (string) - 1; i >= 0; i--)
  1961             if (!ASCII_CHAR_P (SREF (string, i)))
  1962               error ("Attempt to shape unibyte text");
  1963           /* STRING is a pure-ASCII string, so we can convert it (or,
  1964              rather, its copy) to multibyte and use that thereafter.  */
  1965           /* FIXME: Not clear why we need to do that: AFAICT the rest of
  1966              the code should work on an ASCII-only unibyte string just
  1967              as well (bug#56347).  */
  1968           string = make_multibyte_string (SSDATA (string), chars, chars);
  1969         }
  1970       frombyte = string_char_to_byte (string, frompos);
  1971     }
  1972 
  1973   header = fill_gstring_header (frompos, frombyte,
  1974                                 topos, font_object, string);
  1975   gstring = composition_gstring_lookup_cache (header);
  1976   if (! NILP (gstring))
  1977     return gstring;
  1978 
  1979   if (LGSTRING_GLYPH_LEN (gstring_work) < topos - frompos)
  1980     gstring_work = make_nil_vector (topos - frompos + 2);
  1981   LGSTRING_SET_HEADER (gstring_work, header);
  1982   LGSTRING_SET_ID (gstring_work, Qnil);
  1983   fill_gstring_body (gstring_work);
  1984   return gstring_work;
  1985 }
  1986 
  1987 
  1988 /* Emacs Lisp APIs.  */
  1989 
  1990 DEFUN ("compose-region-internal", Fcompose_region_internal,
  1991        Scompose_region_internal, 2, 4, 0,
  1992        doc: /* Internal use only.
  1993 
  1994 Compose text in the region between START and END.
  1995 Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
  1996 for the composition.  See `compose-region' for more details.  */)
  1997   (Lisp_Object start, Lisp_Object end, Lisp_Object components, Lisp_Object modification_func)
  1998 {
  1999   validate_region (&start, &end);
  2000   if (!NILP (components)
  2001       && !FIXNUMP (components)
  2002       && !CONSP (components)
  2003       && !STRINGP (components))
  2004     CHECK_VECTOR (components);
  2005 
  2006   compose_text (XFIXNUM (start), XFIXNUM (end), components, modification_func, Qnil);
  2007   return Qnil;
  2008 }
  2009 
  2010 DEFUN ("compose-string-internal", Fcompose_string_internal,
  2011        Scompose_string_internal, 3, 5, 0,
  2012        doc: /* Internal use only.
  2013 
  2014 Compose text between indices START and END of STRING, where
  2015 START and END are treated as in `substring'.  Optional 4th
  2016 and 5th arguments are COMPONENTS and MODIFICATION-FUNC
  2017 for the composition.  See `compose-string' for more details.  */)
  2018   (Lisp_Object string, Lisp_Object start, Lisp_Object end,
  2019    Lisp_Object components, Lisp_Object modification_func)
  2020 {
  2021   ptrdiff_t from, to;
  2022 
  2023   CHECK_STRING (string);
  2024   validate_subarray (string, start, end, SCHARS (string), &from, &to);
  2025   compose_text (from, to, components, modification_func, string);
  2026   return string;
  2027 }
  2028 
  2029 DEFUN ("find-composition-internal", Ffind_composition_internal,
  2030        Sfind_composition_internal, 4, 4, 0,
  2031        doc: /* Internal use only.
  2032 
  2033 Return information about composition at or nearest to position POS.
  2034 See `find-composition' for more details.  */)
  2035   (Lisp_Object pos, Lisp_Object limit, Lisp_Object string, Lisp_Object detail_p)
  2036 {
  2037   Lisp_Object prop, tail, gstring;
  2038   ptrdiff_t start, end, from, to;
  2039   int id;
  2040 
  2041   EMACS_INT fixed_pos = fix_position (pos);
  2042   if (!NILP (limit))
  2043     to = clip_to_bounds (PTRDIFF_MIN, fix_position (limit), ZV);
  2044   else
  2045     to = -1;
  2046 
  2047   if (!NILP (string))
  2048     {
  2049       CHECK_STRING (string);
  2050       if (! (0 <= fixed_pos && fixed_pos <= SCHARS (string)))
  2051         args_out_of_range (string, pos);
  2052     }
  2053   else
  2054     {
  2055       if (! (BEGV <= fixed_pos && fixed_pos <= ZV))
  2056         args_out_of_range (Fcurrent_buffer (), pos);
  2057     }
  2058   from = fixed_pos;
  2059 
  2060   if (!find_composition (from, to, &start, &end, &prop, string))
  2061     {
  2062       if (((NILP (string)
  2063             && !NILP (BVAR (current_buffer, enable_multibyte_characters)))
  2064            || (!NILP (string) && STRING_MULTIBYTE (string)))
  2065           && ! inhibit_auto_composition ()
  2066           && find_automatic_composition (from, to, (ptrdiff_t) -1,
  2067                                          &start, &end, &gstring, string))
  2068         return list3 (make_fixnum (start), make_fixnum (end), gstring);
  2069       return Qnil;
  2070     }
  2071   if (! (start <= fixed_pos && fixed_pos < end))
  2072     {
  2073       ptrdiff_t s, e;
  2074 
  2075       if (find_automatic_composition (from, to, (ptrdiff_t) -1,
  2076                                       &s, &e, &gstring, string)
  2077           && (e <= fixed_pos ? e > end : s < start))
  2078         return list3 (make_fixnum (s), make_fixnum (e), gstring);
  2079     }
  2080   if (!composition_valid_p (start, end, prop))
  2081     return list3 (make_fixnum (start), make_fixnum (end), Qnil);
  2082   if (NILP (detail_p))
  2083     return list3 (make_fixnum (start), make_fixnum (end), Qt);
  2084 
  2085   if (composition_registered_p (prop))
  2086     id = COMPOSITION_ID (prop);
  2087   else
  2088     {
  2089       ptrdiff_t start_byte = (NILP (string)
  2090                               ? CHAR_TO_BYTE (start)
  2091                               : string_char_to_byte (string, start));
  2092       id = get_composition_id (start, start_byte, end - start, prop, string);
  2093     }
  2094 
  2095   if (id >= 0)
  2096     {
  2097       Lisp_Object components, relative_p, mod_func;
  2098       enum composition_method method = composition_method (prop);
  2099       int width = composition_table[id]->width;
  2100 
  2101       components = Fcopy_sequence (COMPOSITION_COMPONENTS (prop));
  2102       relative_p = (method == COMPOSITION_WITH_RULE_ALTCHARS
  2103                     ? Qnil : Qt);
  2104       mod_func = COMPOSITION_MODIFICATION_FUNC (prop);
  2105       tail = list4 (components, relative_p, mod_func, make_fixnum (width));
  2106     }
  2107   else
  2108     tail = Qnil;
  2109 
  2110   return Fcons (make_fixnum (start), Fcons (make_fixnum (end), tail));
  2111 }
  2112 
  2113 static int
  2114 compare_composition_rules (const void *r1, const void *r2)
  2115 {
  2116   Lisp_Object vec1 = *(Lisp_Object *)r1, vec2 = *(Lisp_Object *)r2;
  2117 
  2118   return XFIXNAT (AREF (vec2, 1)) - XFIXNAT (AREF (vec1, 1));
  2119 }
  2120 
  2121 DEFUN ("composition-sort-rules", Fcomposition_sort_rules,
  2122        Scomposition_sort_rules, 1, 1, 0,
  2123        doc: /* Sort composition RULES by their LOOKBACK parameter.
  2124 
  2125 If RULES include just one rule, return RULES.
  2126 Otherwise, return a new list of rules where all the rules are
  2127 arranged in decreasing order of the LOOKBACK parameter of the
  2128 rules (the second element of the rule's vector).  This is required
  2129 when combining composition rules from different sources, because
  2130 of the way buffer text is examined for matching one of the rules.  */)
  2131   (Lisp_Object rules)
  2132 {
  2133   ptrdiff_t nrules;
  2134   USE_SAFE_ALLOCA;
  2135 
  2136   CHECK_LIST (rules);
  2137   nrules = list_length (rules);
  2138   if (nrules > 1)
  2139     {
  2140       ptrdiff_t i;
  2141       Lisp_Object *sortvec;
  2142 
  2143       SAFE_NALLOCA (sortvec, 1, nrules);
  2144       for (i = 0; i < nrules; i++)
  2145         {
  2146           Lisp_Object elt = XCAR (rules);
  2147           if (VECTORP (elt) && ASIZE (elt) == 3 && FIXNATP (AREF (elt, 1)))
  2148             sortvec[i] = elt;
  2149           else
  2150             error ("Invalid composition rule in RULES argument");
  2151           rules = XCDR (rules);
  2152         }
  2153       qsort (sortvec, nrules, sizeof (Lisp_Object), compare_composition_rules);
  2154       rules = Flist (nrules, sortvec);
  2155       SAFE_FREE ();
  2156     }
  2157 
  2158   return rules;
  2159 }
  2160 
  2161 
  2162 void
  2163 syms_of_composite (void)
  2164 {
  2165   int i;
  2166 
  2167   DEFSYM (Qcomposition, "composition");
  2168 
  2169   /* Make a hash table for static composition.  */
  2170   /* We used to make the hash table weak so that unreferenced
  2171      compositions can be garbage-collected.  But, usually once
  2172      created compositions are repeatedly used in an Emacs session,
  2173      and thus it's not worth to save memory in such a way.  So, we
  2174      make the table not weak.  */
  2175   Lisp_Object args[] = {QCtest, Qequal, QCsize, make_fixnum (311)};
  2176   composition_hash_table = CALLMANY (Fmake_hash_table, args);
  2177   staticpro (&composition_hash_table);
  2178 
  2179   /* Make a hash table for glyph-string.  */
  2180   gstring_hash_table = CALLMANY (Fmake_hash_table, args);
  2181   staticpro (&gstring_hash_table);
  2182 
  2183   staticpro (&gstring_work_headers);
  2184   gstring_work_headers = make_nil_vector (8);
  2185   for (i = 0; i < 8; i++)
  2186     ASET (gstring_work_headers, i, make_nil_vector (i + 2));
  2187   staticpro (&gstring_work);
  2188   gstring_work = make_nil_vector (10);
  2189 
  2190   /* Text property `composition' should be nonsticky by default.  */
  2191   Vtext_property_default_nonsticky
  2192     = Fcons (Fcons (Qcomposition, Qt), Vtext_property_default_nonsticky);
  2193 
  2194   DEFVAR_LISP ("compose-chars-after-function", Vcompose_chars_after_function,
  2195                doc: /* Function to adjust composition of buffer text.
  2196 
  2197 This function is called with three arguments: FROM, TO, and OBJECT.
  2198 FROM and TO specify the range of text whose composition should be
  2199 adjusted.  OBJECT, if non-nil, is a string that contains the text.
  2200 
  2201 This function is called after a text with `composition' property is
  2202 inserted or deleted to keep `composition' property of buffer text
  2203 valid.
  2204 
  2205 The default value is the function `compose-chars-after'.  */);
  2206   Vcompose_chars_after_function = intern_c_string ("compose-chars-after");
  2207 
  2208   DEFSYM (Qauto_composed, "auto-composed");
  2209 
  2210   DEFVAR_LISP ("auto-composition-mode", Vauto_composition_mode,
  2211                doc: /* Non-nil if Auto-Composition mode is enabled.
  2212 Use the command `auto-composition-mode' to change this variable.
  2213 
  2214 If this variable is a string, `auto-composition-mode' will be disabled in
  2215 buffers displayed on a terminal whose type, as reported by `tty-type',
  2216 compares equal to that string.  */);
  2217   Vauto_composition_mode = Qt;
  2218 
  2219   DEFVAR_LISP ("auto-composition-function", Vauto_composition_function,
  2220                doc: /* Function to call to compose characters automatically.
  2221 This function is called from the display engine with 6 arguments:
  2222 FUNC, FROM, TO, FONT-OBJECT, STRING, and DIRECTION.
  2223 
  2224 FUNC is the function to compose characters.  On text-mode display,
  2225 FUNC is ignored and `compose-gstring-for-terminal' is used instead.
  2226 
  2227 If STRING is nil, the function must compose characters in the region
  2228 between FROM and TO in the current buffer.
  2229 
  2230 Otherwise, STRING is a string, and FROM and TO are indices into the
  2231 string.  In this case, the function must compose characters in the
  2232 string.
  2233 
  2234 FONT-OBJECT is the font to use, or nil if characters are to be
  2235 composed on a text-mode display.
  2236 
  2237 DIRECTION is the bidi directionality of the text to shape.  It could
  2238 be L2R or R2L, or nil if unknown.  */);
  2239   Vauto_composition_function = Qnil;
  2240 
  2241   DEFVAR_LISP ("composition-function-table", Vcomposition_function_table,
  2242                doc: /* Char-table of functions for automatic character composition.
  2243 For each character that has to be composed automatically with
  2244 preceding and/or following characters, this char-table contains
  2245 a function to call to compose that character.
  2246 
  2247 The element at index C in the table, if non-nil, is a list of
  2248 composition rules of the form ([PATTERN PREV-CHARS FUNC] ...);
  2249 the rules must be specified in the descending order of PREV-CHARS
  2250 values.
  2251 
  2252 PATTERN is a regular expression which C and the surrounding
  2253 characters must match.
  2254 
  2255 PREV-CHARS is a non-negative integer (less than 4) specifying how many
  2256 characters before C to check the matching with PATTERN.  If it is 0,
  2257 PATTERN must match C and the following characters.  If it is 1,
  2258 PATTERN must match a character before C and the following characters.
  2259 
  2260 If PREV-CHARS is 0, PATTERN can be nil, which means that the
  2261 single character C should be composed.
  2262 
  2263 FUNC is a function to return a glyph-string representing a
  2264 composition of the characters that match PATTERN.  It is
  2265 called with one argument GSTRING.
  2266 
  2267 GSTRING is a template of a glyph-string to return.  It is already
  2268 filled with a proper header for the characters to compose, and
  2269 glyphs corresponding to those characters one by one.  The
  2270 function must return a new glyph-string with the same header as
  2271 GSTRING, or modify GSTRING itself and return it.
  2272 
  2273 See also the documentation of `auto-composition-mode'.  */);
  2274   Vcomposition_function_table = Fmake_char_table (Qnil, Qnil);
  2275 
  2276   DEFVAR_LISP ("auto-composition-emoji-eligible-codepoints", Vauto_composition_emoji_eligible_codepoints,
  2277                doc: /* List of codepoints for which auto-composition will check for an emoji font.
  2278 
  2279 These are codepoints which have Emoji_Presentation = No, and thus by
  2280 default are not displayed as emoji.  In certain circumstances, such as
  2281 when followed by U+FE0F (VS-16) the emoji font should be used for
  2282 them anyway.
  2283 
  2284 This list is auto-generated, you should not need to modify it.  */);
  2285   Vauto_composition_emoji_eligible_codepoints = Qnil;
  2286 
  2287   defsubr (&Scompose_region_internal);
  2288   defsubr (&Scompose_string_internal);
  2289   defsubr (&Sfind_composition_internal);
  2290   defsubr (&Scomposition_get_gstring);
  2291   defsubr (&Sclear_composition_cache);
  2292   defsubr (&Scomposition_sort_rules);
  2293 }

/* [<][>][^][v][top][bottom][index][help] */