root/src/bidi.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. bidi_get_type
  2. bidi_check_type
  3. bidi_get_category
  4. bidi_isolate_fmt_char
  5. bidi_mirror_char
  6. bidi_paired_bracket_type
  7. bidi_set_sos_type
  8. bidi_push_embedding_level
  9. bidi_pop_embedding_level
  10. bidi_remember_char
  11. bidi_copy_it
  12. bidi_cache_reset_to
  13. bidi_cache_reset
  14. bidi_cache_shrink
  15. bidi_cache_fetch_state
  16. bidi_cache_search
  17. bidi_cache_find_level_change
  18. bidi_cache_ensure_space
  19. bidi_cache_iterator_state
  20. bidi_cache_find
  21. bidi_peek_at_next_level
  22. bidi_push_it
  23. bidi_pop_it
  24. bidi_shelve_cache
  25. bidi_unshelve_cache
  26. bidi_initialize
  27. bidi_set_paragraph_end
  28. bidi_init_it
  29. bidi_line_init
  30. bidi_count_bytes
  31. bidi_char_at_pos
  32. bidi_fetch_char
  33. bidi_fetch_char_skip_isolates
  34. bidi_at_paragraph_end
  35. bidi_paragraph_cache_on_off
  36. bidi_find_paragraph_start
  37. find_first_strong_char
  38. bidi_paragraph_init
  39. bidi_explicit_dir_char
  40. bidi_resolve_explicit
  41. bidi_resolve_weak
  42. bidi_resolve_neutral_1
  43. bidi_find_bracket_pairs
  44. bidi_record_type_for_neutral
  45. bidi_resolve_brackets
  46. bidi_resolve_neutral
  47. bidi_type_of_next_char
  48. bidi_level_of_next_char
  49. bidi_find_other_level_edge
  50. bidi_move_to_visually_next
  51. bidi_find_first_overridden
  52. bidi_dump_cached_states

     1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
     2 
     3 Copyright (C) 2000-2001, 2004-2005, 2009-2023 Free Software Foundation,
     4 Inc.
     5 
     6 Author: Eli Zaretskii <eliz@gnu.org>
     7 
     8 This file is part of GNU Emacs.
     9 
    10 GNU Emacs is free software: you can redistribute it and/or modify
    11 it under the terms of the GNU General Public License as published by
    12 the Free Software Foundation, either version 3 of the License, or (at
    13 your option) any later version.
    14 
    15 GNU Emacs is distributed in the hope that it will be useful,
    16 but WITHOUT ANY WARRANTY; without even the implied warranty of
    17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    18 GNU General Public License for more details.
    19 
    20 You should have received a copy of the GNU General Public License
    21 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    22 
    23 /* A sequential implementation of the Unicode Bidirectional algorithm,
    24    (UBA) as per UAX#9, a part of the Unicode Standard.
    25 
    26    Unlike the Reference Implementation and most other implementations,
    27    this one is designed to be called once for every character in the
    28    buffer or string.  That way, we can leave intact the design of the
    29    Emacs display engine, whereby an iterator object is used to
    30    traverse buffer or string text character by character, and generate
    31    the necessary data for displaying each character in 'struct glyph'
    32    objects.  (See xdisp.c for the details of that iteration.)  The
    33    functions on this file replace the original linear iteration in the
    34    logical order of the text with a non-linear iteration in the visual
    35    order, i.e. in the order characters should be shown on display.
    36 
    37    The main entry point is bidi_move_to_visually_next.  Each time it
    38    is called, it finds the next character in the visual order, and
    39    returns its information in a special structure.  The caller is then
    40    expected to process this character for display or any other
    41    purposes, and call bidi_move_to_visually_next for the next
    42    character.  See the comments in bidi_move_to_visually_next for more
    43    details about its algorithm that finds the next visual-order
    44    character by resolving their levels on the fly.
    45 
    46    Two other entry points are bidi_paragraph_init and
    47    bidi_mirror_char.  The first determines the base direction of a
    48    paragraph, while the second returns the mirrored version of its
    49    argument character.
    50 
    51    A few auxiliary entry points are used to initialize the bidi
    52    iterator for iterating an object (buffer or string), push and pop
    53    the bidi iterator state, and save and restore the state of the bidi
    54    cache.
    55 
    56    If you want to understand the code, you will have to read it
    57    together with the relevant portions of UAX#9.  The comments include
    58    references to UAX#9 rules, for that very reason.
    59 
    60    A note about references to UAX#9 rules: if the reference says
    61    something like "X9/Retaining", it means that you need to refer to
    62    rule X9 and to its modifications described in the "Implementation
    63    Notes" section of UAX#9, under "Retaining Format Codes".
    64 
    65    Here's the overview of the design of the reordering engine
    66    implemented by this file.
    67 
    68    Basic implementation structure
    69    ------------------------------
    70 
    71    The sequential processing steps described by UAX#9 are implemented
    72    as recursive levels of processing, all of which examine the next
    73    character in the logical order.  This hierarchy of processing looks
    74    as follows, from the innermost (deepest) to the outermost level,
    75    omitting some subroutines used by each level:
    76 
    77      bidi_fetch_char         -- fetch next character
    78      bidi_resolve_explicit   -- resolve explicit levels and directions
    79      bidi_resolve_weak       -- resolve weak types
    80      bidi_resolve_brackets   -- resolve "paired brackets" neutral types
    81      bidi_resolve_neutral    -- resolve neutral types
    82      bidi_level_of_next_char -- resolve implicit levels
    83 
    84    Each level calls the level below it, and works on the result
    85    returned by the lower level, including all of its sub-levels.
    86 
    87    Unlike all the levels below it, bidi_level_of_next_char can return
    88    the information about either the next or previous character in the
    89    logical order, depending on the current direction of scanning the
    90    buffer or string.  For the next character, it calls all the levels
    91    below it; for the previous character, it uses the cache, described
    92    below.
    93 
    94    Thus, the result of calling bidi_level_of_next_char is the resolved
    95    level of the next or the previous character in the logical order.
    96    Based on this information, the function bidi_move_to_visually_next
    97    finds the next character in the visual order and updates the
    98    direction in which the buffer is scanned, either forward or
    99    backward, to find the next character to be displayed.  (Text is
   100    scanned backwards when it needs to be reversed for display, i.e. if
   101    the visual order is the inverse of the logical order.)  This
   102    implements the last, reordering steps of the UBA, by successively
   103    calling bidi_level_of_next_char until the character of the required
   104    embedding level is found; the scan direction is dynamically updated
   105    as a side effect.  See the commentary before the 'while' loop in
   106    bidi_move_to_visually_next, for the details.
   107 
   108    Fetching characters
   109    -------------------
   110 
   111    In a nutshell, fetching the next character boils down to calling
   112    string_char_and_length, passing it the address of a buffer or
   113    string position.  See bidi_fetch_char.  However, if the next
   114    character is "covered" by a display property of some kind,
   115    bidi_fetch_char returns the u+FFFC "object replacement character"
   116    that represents the entire run of text covered by the display
   117    property.  (The ch_len and nchars members of 'struct bidi_it'
   118    reflect the length in bytes and characters of that text.)  This is
   119    so we reorder text on both sides of the display property as
   120    appropriate for an image or embedded string.  Similarly, text
   121    covered by a display spec of the form '(space ...)', is replaced
   122    with the u+2029 paragraph separator character, so such display
   123    specs produce the same effect as a TAB under UBA.  Both these
   124    special characters are not actually displayed -- the display
   125    property is displayed instead -- but just used to compute the
   126    embedding level of the surrounding text so as to produce the
   127    required effect.
   128 
   129    Bidi iterator states
   130    --------------------
   131 
   132    The UBA is highly context dependent in some of its parts,
   133    i.e. results of processing a character can generally depend on
   134    characters very far away.  The UAX#9 description of the UBA
   135    prescribes a stateful processing of each character, whereby the
   136    results of this processing depend on various state variables, such
   137    as the current embedding level, level stack, and directional
   138    override status.  In addition, the UAX#9 description includes many
   139    passages like this (from rule W2 in this case):
   140 
   141      Search backward from each instance of a European number until the
   142      first strong type (R, L, AL, or sos) is found. If an AL is found,
   143      change the type of the European number to Arabic number.
   144 
   145    To support this, we use a bidi iterator object, 'struct bidi_it',
   146    which is a sub-structure of 'struct it' used by xdisp.c (see
   147    dispextern.h for the definition of both of these structures).  The
   148    bidi iterator holds the entire state of the iteration required by
   149    the UBA, and is updated as the text is traversed.  In particular,
   150    the embedding level of the current character being resolved is
   151    recorded in the iterator state.  To avoid costly searches backward
   152    in support of rules like W2 above, the necessary character types
   153    are also recorded in the iterator state as they are found during
   154    the forward scan, and then used when such rules need to be applied.
   155    (Forward scans cannot be avoided in this way; they need to be
   156    performed at least once, and the results recorded in the iterator
   157    state, to be reused until the forward scan oversteps the recorded
   158    position.)
   159 
   160    In this manner, the iterator state acts as a mini-cache of
   161    contextual information required for resolving the level of the
   162    current character by various UBA rules.
   163 
   164    Caching of bidi iterator states
   165    -------------------------------
   166 
   167    As described above, the reordering engine uses the information
   168    recorded in the bidi iterator state in order to resolve the
   169    embedding level of the current character.  When the reordering
   170    engine needs to process the next character in the logical order, it
   171    fetches it and applies to it all the UBA levels, updating the
   172    iterator state as it goes.  But when the buffer or string is
   173    scanned backwards, i.e. in the reverse order of buffer/string
   174    positions, the scanned characters were already processed during the
   175    preceding forward scan (see bidi_find_other_level_edge).  To avoid
   176    costly re-processing of characters that were already processed
   177    during the forward scan, the iterator states computed while
   178    scanning forward are cached.
   179 
   180    The cache is just a linear array of 'struct bidi_it' objects, which
   181    is dynamically allocated and reallocated as needed, since the size
   182    of the cache depends on the text being processed.  We only need the
   183    cache while processing embedded levels higher than the base
   184    paragraph embedding level, because these higher levels require
   185    changes in scan direction.  Therefore, as soon as we are back to
   186    the base embedding level, we can free the cache; see the calls to
   187    bidi_cache_reset and bidi_cache_shrink, for the conditions to do
   188    this.
   189 
   190    The cache maintains the index of the next unused cache slot -- this
   191    is where the next iterator state will be cached.  The function
   192    bidi_cache_iterator_state saves an instance of the state in the
   193    cache and increments the unused slot index.  The companion function
   194    bidi_cache_find looks up a cached state that corresponds to a given
   195    buffer/string position.  All of the cached states must correspond
   196    1:1 to the buffer or string region whose processing they reflect;
   197    bidi.c will abort if it finds cache slots that violate this 1:1
   198    correspondence.
   199 
   200    When the parent iterator 'struct it' is pushed (see push_it in
   201    xdisp.c) to pause the current iteration and start iterating over a
   202    different object (e.g., a 'display' string that covers some buffer
   203    text), the bidi iterator cache needs to be "pushed" as well, so
   204    that a new empty cache could be used while iterating over the new
   205    object.  Later, when the new object is exhausted, and xdisp.c calls
   206    pop_it, we need to "pop" the bidi cache as well and return to the
   207    original cache.  See bidi_push_it and bidi_pop_it for how this is
   208    done.
   209 
   210    Some functions of the display engine save copies of 'struct it' in
   211    local variables, and restore them later.  For examples, see
   212    pos_visible_p and move_it_in_display_line_to in xdisp.c, and
   213    window_scroll_pixel_based in window.c.  When this happens, we need
   214    to save and restore the bidi cache as well, because conceptually
   215    the cache is part of the 'struct it' state, and needs to be in
   216    perfect sync with the portion of the buffer/string that is being
   217    processed.  This saving and restoring of the cache state is handled
   218    by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
   219    SAVE_IT and RESTORE_IT defined on xdisp.c.
   220 
   221    Note that, because reordering is implemented below the level in
   222    xdisp.c that breaks glyphs into screen lines, we are violating
   223    paragraph 3.4 of UAX#9. which mandates that line breaking shall be
   224    done before reordering each screen line separately.  However,
   225    following UAX#9 to the letter in this matter goes against the basic
   226    design of the Emacs display engine, and so we choose here this
   227    minor deviation from the UBA letter in preference to redesign of
   228    the display engine.  The effect of this is only seen in continued
   229    lines that are broken into screen lines in the middle of a run
   230    whose direction is opposite to the paragraph's base direction.
   231 
   232    Important design and implementation note: when the code needs to
   233    scan far ahead, be sure to avoid such scans as much as possible
   234    when the buffer/string doesn't contain any RTL characters.  Users
   235    of left-to-right scripts will never forgive you if you introduce
   236    some slow-down due to bidi in situations that don't involve any
   237    bidirectional text.  See the large comment near the beginning of
   238    bidi_resolve_neutral, for one situation where such shortcut was
   239    necessary.  */
   240 
   241 #include <config.h>
   242 
   243 #include "lisp.h"
   244 #include "character.h"
   245 #include "buffer.h"
   246 #include "dispextern.h"
   247 #include "region-cache.h"
   248 #include "sysstdio.h"
   249 
   250 static bool bidi_initialized = 0;
   251 
   252 static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
   253 
   254 #define BIDI_EOB   (-1)
   255 
   256 /* Data type for describing the bidirectional character categories.  */
   257 typedef enum {
   258   UNKNOWN_BC,
   259   NEUTRAL,
   260   WEAK,
   261   STRONG,
   262   EXPLICIT_FORMATTING
   263 } bidi_category_t;
   264 
   265 static Lisp_Object paragraph_start_re, paragraph_separate_re;
   266 
   267 
   268 /***********************************************************************
   269                         Utilities
   270  ***********************************************************************/
   271 
   272 /* Return the bidi type of a character CH, subject to the current
   273    directional OVERRIDE.  */
   274 static bidi_type_t
   275 bidi_get_type (int ch, bidi_dir_t override)
   276 {
   277   bidi_type_t default_type;
   278 
   279   if (ch == BIDI_EOB)
   280     return NEUTRAL_B;
   281   if (ch < 0 || ch > MAX_CHAR)
   282     emacs_abort ();
   283 
   284   default_type = (bidi_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_type_table, ch));
   285   /* Every valid character code, even those that are unassigned by the
   286      UCD, have some bidi-class property, according to
   287      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
   288      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
   289   if (default_type == UNKNOWN_BT)
   290     emacs_abort ();
   291 
   292   switch (default_type)
   293     {
   294       case WEAK_BN:
   295       case NEUTRAL_B:
   296       case LRE:
   297       case LRO:
   298       case RLE:
   299       case RLO:
   300       case PDF:
   301       case LRI:
   302       case RLI:
   303       case FSI:
   304       case PDI:
   305         return default_type;
   306       default:
   307         if (override == L2R)
   308           return STRONG_L;
   309         else if (override == R2L)
   310           return STRONG_R;
   311         else
   312           return default_type;
   313     }
   314 }
   315 
   316 static void
   317 bidi_check_type (bidi_type_t type)
   318 {
   319   eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
   320 }
   321 
   322 /* Given a bidi TYPE of a character, return its category.  */
   323 static bidi_category_t
   324 bidi_get_category (bidi_type_t type)
   325 {
   326   switch (type)
   327     {
   328       case UNKNOWN_BT:
   329         return UNKNOWN_BC;
   330       case STRONG_L:
   331       case STRONG_R:
   332       case STRONG_AL:
   333         return STRONG;
   334       case WEAK_EN:
   335       case WEAK_ES:
   336       case WEAK_ET:
   337       case WEAK_AN:
   338       case WEAK_CS:
   339       case WEAK_NSM:
   340       case WEAK_BN:
   341         return WEAK;
   342       case NEUTRAL_B:
   343       case NEUTRAL_S:
   344       case NEUTRAL_WS:
   345       case NEUTRAL_ON:
   346         return NEUTRAL;
   347       case LRE:
   348       case LRO:
   349       case RLE:
   350       case RLO:
   351       case PDF:
   352       case LRI:
   353       case RLI:
   354       case FSI:
   355       case PDI:
   356         return EXPLICIT_FORMATTING;
   357       default:
   358         emacs_abort ();
   359     }
   360 }
   361 
   362 static bool
   363 bidi_isolate_fmt_char (bidi_type_t ch_type)
   364 {
   365   return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
   366 }
   367 
   368 /* Return the mirrored character of C, if it has one.  If C has no
   369    mirrored counterpart, return C.
   370    Note: The conditions in UAX#9 clause L4 regarding the surrounding
   371    context must be tested by the caller.  */
   372 int
   373 bidi_mirror_char (int c)
   374 {
   375   Lisp_Object val;
   376 
   377   if (c == BIDI_EOB)
   378     return c;
   379   if (c < 0 || c > MAX_CHAR)
   380     emacs_abort ();
   381 
   382   val = CHAR_TABLE_REF (bidi_mirror_table, c);
   383   if (FIXNUMP (val))
   384     {
   385       int v;
   386 
   387       /* When debugging, check before assigning to V, so that the check
   388          isn't broken by undefined behavior due to int overflow.  */
   389       eassert (CHAR_VALID_P (XFIXNUM (val)));
   390 
   391       v = XFIXNUM (val);
   392 
   393       /* Minimal test we must do in optimized builds, to prevent weird
   394          crashes further down the road.  */
   395       if (v < 0 || v > MAX_CHAR)
   396         emacs_abort ();
   397 
   398       return v;
   399     }
   400 
   401   return c;
   402 }
   403 
   404 /* Return the Bidi_Paired_Bracket_Type property of the character C.  */
   405 static bidi_bracket_type_t
   406 bidi_paired_bracket_type (int c)
   407 {
   408   if (c == BIDI_EOB || bidi_inhibit_bpa)
   409     return BIDI_BRACKET_NONE;
   410   if (c < 0 || c > MAX_CHAR)
   411     emacs_abort ();
   412 
   413   return (bidi_bracket_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_brackets_table, c));
   414 }
   415 
   416 /* Determine the start-of-sequence (sos) directional type given the two
   417    embedding levels on either side of the run boundary.  Also, update
   418    the saved info about previously seen characters, since that info is
   419    generally valid for a single level run.  */
   420 static void
   421 bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
   422 {
   423   int higher_level = (level_before > level_after ? level_before : level_after);
   424 
   425   /* FIXME: should the default sos direction be user selectable?  */
   426   bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
   427 
   428   bidi_it->prev.type = UNKNOWN_BT;
   429   bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
   430   bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
   431   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
   432   bidi_it->next_for_neutral.type
   433     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
   434 }
   435 
   436 #define ISOLATE_STATUS(BIDI_IT, IDX)  ((BIDI_IT)->level_stack[IDX].flags & 1)
   437 #define OVERRIDE(BIDI_IT, IDX)  (((BIDI_IT)->level_stack[IDX].flags >> 1) & 3)
   438 
   439 /* Push the current embedding level and override status; reset the
   440    current level to LEVEL and the current override status to OVERRIDE.  */
   441 static void
   442 bidi_push_embedding_level (struct bidi_it *bidi_it,
   443                            int level, bidi_dir_t override, bool isolate_status)
   444 {
   445   struct bidi_stack *st;
   446   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
   447 
   448   bidi_it->stack_idx++;
   449   eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
   450   st = &bidi_it->level_stack[bidi_it->stack_idx];
   451   eassert (level <= (1 << 7));
   452   st->level = level;
   453   st->flags = (((override & 3) << 1) | (isolate_status != 0));
   454   if (isolate_status)
   455     {
   456       st->last_strong_type = bidi_it->last_strong.type;
   457       st->prev_for_neutral_type = bidi_it->prev_for_neutral.type;
   458       st->next_for_neutral_type = bidi_it->next_for_neutral.type;
   459       st->next_for_neutral_pos = bidi_it->next_for_neutral.charpos;
   460       st->flags |= ((bidi_it->sos == L2R ? 0 : 1) << 3);
   461     }
   462   /* We've got a new isolating sequence, compute the directional type
   463      of sos and initialize per-sequence variables (UAX#9, clause X10).  */
   464   bidi_set_sos_type (bidi_it, prev_level, level);
   465 }
   466 
   467 /* Pop from the stack the embedding level, the directional override
   468    status, and optionally saved information for the isolating run
   469    sequence.  Return the new level.  */
   470 static int
   471 bidi_pop_embedding_level (struct bidi_it *bidi_it)
   472 {
   473   int level;
   474 
   475   /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
   476      and PDIs (X6a, 2nd bullet).  */
   477   if (bidi_it->stack_idx > 0)
   478     {
   479       bool isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
   480       int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
   481 
   482       struct bidi_stack st;
   483 
   484       st = bidi_it->level_stack[bidi_it->stack_idx];
   485       if (isolate_status)
   486         {
   487           bidi_dir_t sos = ((st.flags >> 3) & 1);
   488           /* PREV is used in W1 for resolving WEAK_NSM.  By the time
   489              we get to an NSM, we must have gotten past at least one
   490              character: the PDI that ends the isolate from which we
   491              are popping here.  So PREV will have been filled up by
   492              the time we first use it.  We initialize it here to
   493              UNKNOWN_BT to be able to catch any blunders in this
   494              logic.  */
   495           bidi_it->prev.orig_type = bidi_it->prev.type = UNKNOWN_BT;
   496           bidi_it->last_strong.type = st.last_strong_type;
   497           bidi_it->prev_for_neutral.type = st.prev_for_neutral_type;
   498           bidi_it->next_for_neutral.type = st.next_for_neutral_type;
   499           bidi_it->next_for_neutral.charpos = st.next_for_neutral_pos;
   500           bidi_it->sos = (sos == 0 ? L2R : R2L);
   501         }
   502       else
   503         bidi_set_sos_type (bidi_it, old_level,
   504                            bidi_it->level_stack[bidi_it->stack_idx - 1].level);
   505 
   506       bidi_it->stack_idx--;
   507     }
   508   level = bidi_it->level_stack[bidi_it->stack_idx].level;
   509   eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
   510   return level;
   511 }
   512 
   513 /* Record in SAVED_INFO the information about the current character.  */
   514 static void
   515 bidi_remember_char (struct bidi_saved_info *saved_info,
   516                     struct bidi_it *bidi_it, bool from_type)
   517 {
   518   saved_info->charpos = bidi_it->charpos;
   519   if (from_type)
   520     saved_info->type = bidi_it->type;
   521   else
   522     saved_info->type = bidi_it->type_after_wn;
   523   bidi_check_type (saved_info->type);
   524   saved_info->orig_type = bidi_it->orig_type;
   525   bidi_check_type (saved_info->orig_type);
   526 }
   527 
   528 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
   529    copies the part of the level stack that is actually in use.  */
   530 static void
   531 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
   532 {
   533   /* Copy everything from the start through the active part of
   534      the level stack.  */
   535   memcpy (to, from,
   536           (offsetof (struct bidi_it, level_stack) + sizeof from->level_stack[0]
   537            + from->stack_idx * sizeof from->level_stack[0]));
   538 }
   539 
   540 
   541 /***********************************************************************
   542                         Caching the bidi iterator states
   543  ***********************************************************************/
   544 
   545 /* We allocate and de-allocate the cache in chunks of this size (in
   546    characters).  200 was chosen as an upper limit for reasonably-long
   547    lines in a text file/buffer.  */
   548 #define BIDI_CACHE_CHUNK 200
   549 /* Maximum size we allow the cache to become, per iterator stack slot,
   550    in units of struct bidi_it size.  If we allow unlimited growth, we
   551    could run out of memory for pathologically long bracketed text or
   552    very long text lines that need to be reordered.  This is aggravated
   553    when word-wrap is in effect, since then functions display_line and
   554    move_it_in_display_line_to need to keep up to 4 copies of the
   555    cache.
   556 
   557    This limitation means there can be no more than that amount of
   558    contiguous RTL text on any single physical line in a LTR paragraph,
   559    and similarly with contiguous LTR + numeric text in a RTL
   560    paragraph.  (LTR text in a LTR paragraph and RTL text in a RTL
   561    paragraph are not reordered, and so don't need the cache, and
   562    cannot hit this limit.)  More importantly, no single line can have
   563    text longer than this inside paired brackets (because bracket pairs
   564    resolution uses the cache).  If the limit is exceeded, the fallback
   565    code will produce visual order that will be incorrect if there are
   566    RTL characters in the offending line of text.  */
   567 /* Do we need to allow customization of this limit?  */
   568 #define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000
   569 verify (BIDI_CACHE_CHUNK < BIDI_CACHE_MAX_ELTS_PER_SLOT);
   570 static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
   571 static struct bidi_it *bidi_cache;
   572 static ptrdiff_t bidi_cache_size = 0;
   573 enum { elsz = sizeof (struct bidi_it) };
   574 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
   575 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
   576 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
   577                                            "stack" level */
   578 
   579 /* 5-slot stack for saving the start of the previous level of the
   580    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
   581    and we need the same size of our stack.  */
   582 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
   583 static int bidi_cache_sp;
   584 
   585 /* Size of header used by bidi_shelve_cache.  */
   586 enum
   587   {
   588     bidi_shelve_header_size
   589       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
   590          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
   591          + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts))
   592   };
   593 
   594 /* Effectively remove the cached states beyond the Nth state from the
   595    part of the cache relevant to iteration of the current object
   596    (buffer or string).  */
   597 static void
   598 bidi_cache_reset_to (int n)
   599 {
   600   bidi_cache_idx = bidi_cache_start + n;
   601   bidi_cache_last_idx = -1;
   602 }
   603 
   604 /* Reset the cache state to the empty state.  We only reset the part
   605    of the cache relevant to iteration of the current object.  Previous
   606    objects, which are pushed on the display iterator's stack, are left
   607    intact.  This is called when the cached information is no more
   608    useful for the current iteration, e.g. when we were reseated to a
   609    new position on the same object.  */
   610 static void
   611 bidi_cache_reset (void)
   612 {
   613   bidi_cache_reset_to (0);
   614 }
   615 
   616 /* Shrink the cache to its minimal size.  Called when we init the bidi
   617    iterator for reordering a buffer or a string that does not come
   618    from display properties, because that means all the previously
   619    cached info is of no further use.  */
   620 static void
   621 bidi_cache_shrink (void)
   622 {
   623   if (bidi_cache_size > BIDI_CACHE_CHUNK)
   624     {
   625       bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
   626       bidi_cache_size = BIDI_CACHE_CHUNK;
   627     }
   628   bidi_cache_reset ();
   629   bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
   630 }
   631 
   632 static void
   633 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
   634 {
   635   int current_scan_dir = bidi_it->scan_dir;
   636 
   637   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
   638     emacs_abort ();
   639 
   640   bidi_copy_it (bidi_it, &bidi_cache[idx]);
   641   bidi_it->scan_dir = current_scan_dir;
   642   bidi_cache_last_idx = idx;
   643 }
   644 
   645 /* Find a cached state with a given CHARPOS and resolved embedding
   646    level less or equal to LEVEL.  If LEVEL is -1, disregard the
   647    resolved levels in cached states.  DIR, if non-zero, means search
   648    in that direction from the last cache hit.
   649 
   650    Value is the index of the cached state, or -1 if not found.  */
   651 static ptrdiff_t
   652 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
   653 {
   654   ptrdiff_t i, i_start;
   655 
   656   if (bidi_cache_idx > bidi_cache_start)
   657     {
   658       if (bidi_cache_last_idx == -1)
   659         bidi_cache_last_idx = bidi_cache_idx - 1;
   660       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
   661         {
   662           dir = -1;
   663           i_start = bidi_cache_last_idx - 1;
   664         }
   665       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
   666                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
   667         {
   668           dir = 1;
   669           i_start = bidi_cache_last_idx + 1;
   670         }
   671       else if (dir)
   672         i_start = bidi_cache_last_idx;
   673       else
   674         {
   675           dir = -1;
   676           i_start = bidi_cache_idx - 1;
   677         }
   678 
   679       if (dir < 0)
   680         {
   681           /* Linear search for now; FIXME!  */
   682           for (i = i_start; i >= bidi_cache_start; i--)
   683             if (bidi_cache[i].charpos <= charpos
   684                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
   685                 && (level == -1 || bidi_cache[i].resolved_level <= level))
   686               return i;
   687         }
   688       else
   689         {
   690           for (i = i_start; i < bidi_cache_idx; i++)
   691             if (bidi_cache[i].charpos <= charpos
   692                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
   693                 && (level == -1 || bidi_cache[i].resolved_level <= level))
   694               return i;
   695         }
   696     }
   697 
   698   return -1;
   699 }
   700 
   701 /* Find a cached state where the resolved level changes to a value
   702    that is lower than LEVEL, and return its cache slot index.  DIR is
   703    the direction to search, starting with the last used cache slot.
   704    If DIR is zero, we search backwards from the last occupied cache
   705    slot.  BEFORE means return the index of the slot that
   706    is ``before'' the level change in the search direction.  That is,
   707    given the cached levels like this:
   708 
   709          1122333442211
   710           AB        C
   711 
   712    and assuming we are at the position cached at the slot marked with
   713    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
   714    index of slot B or A, depending whether BEFORE is, respectively,
   715    true or false.  */
   716 static ptrdiff_t
   717 bidi_cache_find_level_change (int level, int dir, bool before)
   718 {
   719   if (bidi_cache_idx)
   720     {
   721       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
   722       int incr = before ? 1 : 0;
   723 
   724       if (i < 0)  /* cache overflowed? */
   725         i = 0;
   726 
   727       if (!dir)
   728         dir = -1;
   729       else if (!incr)
   730         i += dir;
   731 
   732       if (dir < 0)
   733         {
   734           while (i >= bidi_cache_start + incr)
   735             {
   736               if (bidi_cache[i - incr].resolved_level >= 0
   737                   && bidi_cache[i - incr].resolved_level < level)
   738                 return i;
   739               i--;
   740             }
   741         }
   742       else
   743         {
   744           while (i < bidi_cache_idx - incr)
   745             {
   746               if (bidi_cache[i + incr].resolved_level >= 0
   747                   && bidi_cache[i + incr].resolved_level < level)
   748                 return i;
   749               i++;
   750             }
   751         }
   752     }
   753 
   754   return -1;
   755 }
   756 
   757 static void
   758 bidi_cache_ensure_space (ptrdiff_t idx)
   759 {
   760   /* Enlarge the cache as needed.  */
   761   if (idx >= bidi_cache_size)
   762     {
   763       ptrdiff_t chunk_size = BIDI_CACHE_CHUNK;
   764 
   765       if (bidi_cache_size > bidi_cache_max_elts - chunk_size)
   766         chunk_size = bidi_cache_max_elts - bidi_cache_size;
   767 
   768       if (max (idx + 1,
   769                bidi_cache_size + chunk_size) <= bidi_cache_max_elts)
   770         {
   771           /* The bidi cache cannot be larger than the largest Lisp
   772              string or buffer.  */
   773           ptrdiff_t string_or_buffer_bound
   774             = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
   775 
   776           /* Also, it cannot be larger than what C can represent.  */
   777           ptrdiff_t c_bound
   778             = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
   779           ptrdiff_t max_elts = bidi_cache_max_elts;
   780 
   781           max_elts = min (max_elts, min (string_or_buffer_bound, c_bound));
   782 
   783           /* Force xpalloc not to over-allocate by passing it MAX_ELTS
   784              as its 4th argument.  */
   785           bidi_cache = xpalloc (bidi_cache, &bidi_cache_size,
   786                                 max (chunk_size, idx - bidi_cache_size + 1),
   787                                 max_elts, elsz);
   788           eassert (bidi_cache_size > idx);
   789         }
   790     }
   791 }
   792 
   793 static int
   794 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
   795                            bool update_only)
   796 {
   797   ptrdiff_t idx;
   798 
   799   /* We should never cache on backward scans.  */
   800   if (bidi_it->scan_dir == -1)
   801     emacs_abort ();
   802   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
   803 
   804   if (idx < 0 && update_only)
   805     return 0;
   806 
   807   if (idx < 0)
   808     {
   809       idx = bidi_cache_idx;
   810       bidi_cache_ensure_space (idx);
   811       /* Character positions should correspond to cache positions 1:1.
   812          If we are outside the range of cached positions, the cache is
   813          useless and must be reset.  */
   814       if (bidi_cache_start < idx && idx < bidi_cache_size
   815           && (bidi_it->charpos > (bidi_cache[idx - 1].charpos
   816                                   + bidi_cache[idx - 1].nchars)
   817               || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
   818         {
   819           bidi_cache_reset ();
   820           idx = bidi_cache_start;
   821         }
   822       if (bidi_it->nchars <= 0)
   823         emacs_abort ();
   824       /* Don't cache if no available space in the cache.  */
   825       if (bidi_cache_size > idx)
   826         {
   827           bidi_copy_it (&bidi_cache[idx], bidi_it);
   828           if (!resolved)
   829             bidi_cache[idx].resolved_level = -1;
   830         }
   831     }
   832   else
   833     {
   834       /* Copy only the members which could have changed, to avoid
   835          costly copying of the entire struct.  */
   836       bidi_cache[idx].type = bidi_it->type;
   837       bidi_check_type (bidi_it->type);
   838       bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
   839       bidi_check_type (bidi_it->type_after_wn);
   840       if (resolved)
   841         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
   842       else
   843         bidi_cache[idx].resolved_level = -1;
   844       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
   845       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
   846       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
   847       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
   848       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
   849       bidi_cache[idx].bracket_pairing_pos = bidi_it->bracket_pairing_pos;
   850       bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
   851     }
   852 
   853   if (bidi_cache_size > idx)
   854     {
   855       bidi_cache_last_idx = idx;
   856       if (idx >= bidi_cache_idx)
   857         bidi_cache_idx = idx + 1;
   858       return 1;
   859     }
   860   else
   861     {
   862       /* The cache overflowed.  */
   863       bidi_cache_last_idx = -1;
   864       return 0;
   865     }
   866 }
   867 
   868 /* Look for a cached iterator state that corresponds to CHARPOS.  If
   869    found, copy the cached state into BIDI_IT and return the type of
   870    the cached entry.  If not found, return UNKNOWN_BT.  RESOLVED_ONLY
   871    zero means it is OK to return cached states that were not fully
   872    resolved yet.  This can happen if the state was cached before it
   873    was resolved in bidi_resolve_neutral.  */
   874 static bidi_type_t
   875 bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it)
   876 {
   877   ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
   878 
   879   if (i >= bidi_cache_start
   880       && (!resolved_only
   881           /* Callers that want only fully resolved states (and set
   882              resolved_only = true) need to be sure that there's enough
   883              info in the cached state to return the state as final,
   884              and if not, they don't want the cached state.  */
   885           || bidi_cache[i].resolved_level >= 0))
   886     {
   887       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
   888 
   889       bidi_copy_it (bidi_it, &bidi_cache[i]);
   890       bidi_cache_last_idx = i;
   891       /* Don't let scan direction from the cached state override
   892          the current scan direction.  */
   893       bidi_it->scan_dir = current_scan_dir;
   894       return bidi_it->type;
   895     }
   896 
   897   return UNKNOWN_BT;
   898 }
   899 
   900 static int
   901 bidi_peek_at_next_level (struct bidi_it *bidi_it)
   902 {
   903   if (bidi_cache_idx == bidi_cache_start)
   904     emacs_abort ();
   905   /* If the cache overflowed, return the level of the last cached
   906      character.  */
   907   if (bidi_cache_last_idx == -1
   908       || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0))
   909     return bidi_cache[bidi_cache_idx - 1].resolved_level;
   910   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
   911 }
   912 
   913 
   914 /***********************************************************************
   915              Pushing and popping the bidi iterator state
   916  ***********************************************************************/
   917 
   918 /* Push the bidi iterator state in preparation for reordering a
   919    different object, e.g. display string found at certain buffer
   920    position.  Pushing the bidi iterator boils down to saving its
   921    entire state on the cache and starting a new cache "stacked" on top
   922    of the current cache.  */
   923 void
   924 bidi_push_it (struct bidi_it *bidi_it)
   925 {
   926   /* Give this stack slot its cache room.  */
   927   bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT;
   928   /* Save the current iterator state in its entirety after the last
   929      used cache slot.  */
   930   bidi_cache_ensure_space (bidi_cache_idx);
   931   bidi_cache[bidi_cache_idx++] = *bidi_it;
   932 
   933   /* Push the current cache start onto the stack.  */
   934   eassert (bidi_cache_sp < IT_STACK_SIZE);
   935   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
   936 
   937   /* Start a new level of cache, and make it empty.  */
   938   bidi_cache_start = bidi_cache_idx;
   939   bidi_cache_last_idx = -1;
   940 }
   941 
   942 /* Restore the iterator state saved by bidi_push_it and return the
   943    cache to the corresponding state.  */
   944 void
   945 bidi_pop_it (struct bidi_it *bidi_it)
   946 {
   947   if (bidi_cache_start <= 0)
   948     emacs_abort ();
   949 
   950   /* Reset the next free cache slot index to what it was before the
   951      call to bidi_push_it.  */
   952   bidi_cache_idx = bidi_cache_start - 1;
   953 
   954   /* Restore the bidi iterator state saved in the cache.  */
   955   *bidi_it = bidi_cache[bidi_cache_idx];
   956 
   957   /* Pop the previous cache start from the stack.  */
   958   if (bidi_cache_sp <= 0)
   959     emacs_abort ();
   960   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
   961 
   962   /* Invalidate the last-used cache slot data.  */
   963   bidi_cache_last_idx = -1;
   964 
   965   bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT;
   966   eassert (bidi_cache_max_elts > 0);
   967 }
   968 
   969 static ptrdiff_t bidi_cache_total_alloc;
   970 
   971 /* Stash away a copy of the cache and its control variables.  */
   972 void *
   973 bidi_shelve_cache (void)
   974 {
   975   unsigned char *databuf;
   976   ptrdiff_t alloc;
   977 
   978   /* Empty cache.  */
   979   if (bidi_cache_idx == 0)
   980     return NULL;
   981 
   982   alloc = (bidi_shelve_header_size
   983            + bidi_cache_idx * sizeof (struct bidi_it));
   984   databuf = xmalloc (alloc);
   985   bidi_cache_total_alloc += alloc;
   986 
   987   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
   988   memcpy (databuf + sizeof (bidi_cache_idx),
   989           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
   990   memcpy (databuf + sizeof (bidi_cache_idx)
   991           + bidi_cache_idx * sizeof (struct bidi_it),
   992           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
   993   memcpy (databuf + sizeof (bidi_cache_idx)
   994           + bidi_cache_idx * sizeof (struct bidi_it)
   995           + sizeof (bidi_cache_start_stack),
   996           &bidi_cache_sp, sizeof (bidi_cache_sp));
   997   memcpy (databuf + sizeof (bidi_cache_idx)
   998           + bidi_cache_idx * sizeof (struct bidi_it)
   999           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
  1000           &bidi_cache_start, sizeof (bidi_cache_start));
  1001   memcpy (databuf + sizeof (bidi_cache_idx)
  1002           + bidi_cache_idx * sizeof (struct bidi_it)
  1003           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
  1004           + sizeof (bidi_cache_start),
  1005           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
  1006   memcpy (databuf + sizeof (bidi_cache_idx)
  1007           + bidi_cache_idx * sizeof (struct bidi_it)
  1008           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
  1009           + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
  1010           &bidi_cache_max_elts, sizeof (bidi_cache_max_elts));
  1011 
  1012   return databuf;
  1013 }
  1014 
  1015 /* Restore the cache state from a copy stashed away by
  1016    bidi_shelve_cache, and free the buffer used to stash that copy.
  1017    JUST_FREE means free the buffer, but don't restore the
  1018    cache; used when the corresponding iterator is discarded instead of
  1019    being restored.  */
  1020 void
  1021 bidi_unshelve_cache (void *databuf, bool just_free)
  1022 {
  1023   unsigned char *p = databuf;
  1024 
  1025   if (!p)
  1026     {
  1027       if (!just_free)
  1028         {
  1029           /* A NULL pointer means an empty cache.  */
  1030           bidi_cache_start = 0;
  1031           bidi_cache_sp = 0;
  1032           bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
  1033           bidi_cache_reset ();
  1034         }
  1035     }
  1036   else
  1037     {
  1038       if (just_free)
  1039         {
  1040           ptrdiff_t idx;
  1041 
  1042           memcpy (&idx, p, sizeof (bidi_cache_idx));
  1043           bidi_cache_total_alloc
  1044             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
  1045         }
  1046       else
  1047         {
  1048           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
  1049           bidi_cache_ensure_space (bidi_cache_idx);
  1050           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
  1051                   bidi_cache_idx * sizeof (struct bidi_it));
  1052           memcpy (bidi_cache_start_stack,
  1053                   p + sizeof (bidi_cache_idx)
  1054                   + bidi_cache_idx * sizeof (struct bidi_it),
  1055                   sizeof (bidi_cache_start_stack));
  1056           memcpy (&bidi_cache_sp,
  1057                   p + sizeof (bidi_cache_idx)
  1058                   + bidi_cache_idx * sizeof (struct bidi_it)
  1059                   + sizeof (bidi_cache_start_stack),
  1060                   sizeof (bidi_cache_sp));
  1061           memcpy (&bidi_cache_start,
  1062                   p + sizeof (bidi_cache_idx)
  1063                   + bidi_cache_idx * sizeof (struct bidi_it)
  1064                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
  1065                   sizeof (bidi_cache_start));
  1066           memcpy (&bidi_cache_last_idx,
  1067                   p + sizeof (bidi_cache_idx)
  1068                   + bidi_cache_idx * sizeof (struct bidi_it)
  1069                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
  1070                   + sizeof (bidi_cache_start),
  1071                   sizeof (bidi_cache_last_idx));
  1072           memcpy (&bidi_cache_max_elts,
  1073                   p + sizeof (bidi_cache_idx)
  1074                   + bidi_cache_idx * sizeof (struct bidi_it)
  1075                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
  1076                   + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
  1077                   sizeof (bidi_cache_max_elts));
  1078           bidi_cache_total_alloc
  1079             -= (bidi_shelve_header_size
  1080                 + bidi_cache_idx * sizeof (struct bidi_it));
  1081         }
  1082 
  1083       xfree (p);
  1084     }
  1085 }
  1086 
  1087 
  1088 /***********************************************************************
  1089                         Initialization
  1090  ***********************************************************************/
  1091 static void
  1092 bidi_initialize (void)
  1093 {
  1094   bidi_type_table = uniprop_table (intern ("bidi-class"));
  1095   if (NILP (bidi_type_table))
  1096     emacs_abort ();
  1097   staticpro (&bidi_type_table);
  1098 
  1099   bidi_mirror_table = uniprop_table (intern ("mirroring"));
  1100   if (NILP (bidi_mirror_table))
  1101     emacs_abort ();
  1102   staticpro (&bidi_mirror_table);
  1103 
  1104   bidi_brackets_table = uniprop_table (intern ("bracket-type"));
  1105   if (NILP (bidi_brackets_table))
  1106     emacs_abort ();
  1107   staticpro (&bidi_brackets_table);
  1108 
  1109   paragraph_start_re = build_string ("^\\(\f\\|[ \t]*\\)$");
  1110   staticpro (&paragraph_start_re);
  1111   paragraph_separate_re = build_string ("^[ \t\f]*$");
  1112   staticpro (&paragraph_separate_re);
  1113 
  1114   bidi_cache_sp = 0;
  1115   bidi_cache_total_alloc = 0;
  1116   bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
  1117 
  1118   bidi_initialized = 1;
  1119 }
  1120 
  1121 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
  1122    end.  */
  1123 static void
  1124 bidi_set_paragraph_end (struct bidi_it *bidi_it)
  1125 {
  1126   bidi_it->invalid_levels = 0;
  1127   bidi_it->invalid_isolates = 0;
  1128   bidi_it->stack_idx = 0;
  1129   bidi_it->isolate_level = 0;
  1130   bidi_it->resolved_level = bidi_it->level_stack[0].level;
  1131 }
  1132 
  1133 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
  1134 void
  1135 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
  1136               struct bidi_it *bidi_it)
  1137 {
  1138   if (! bidi_initialized)
  1139     bidi_initialize ();
  1140   if (charpos >= 0)
  1141     bidi_it->charpos = charpos;
  1142   if (bytepos >= 0)
  1143     bidi_it->bytepos = bytepos;
  1144   bidi_it->frame_window_p = frame_window_p;
  1145   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
  1146   bidi_it->first_elt = 1;
  1147   bidi_set_paragraph_end (bidi_it);
  1148   bidi_it->new_paragraph = 1;
  1149   bidi_it->separator_limit = -1;
  1150   bidi_it->type = NEUTRAL_B;
  1151   bidi_it->type_after_wn = NEUTRAL_B;
  1152   bidi_it->orig_type = NEUTRAL_B;
  1153   /* FIXME: Review this!!! */
  1154   bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
  1155   bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
  1156   bidi_it->next_for_neutral.charpos = -1;
  1157   bidi_it->next_for_neutral.type
  1158     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
  1159   bidi_it->prev_for_neutral.charpos = -1;
  1160   bidi_it->prev_for_neutral.type
  1161     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
  1162   bidi_it->bracket_pairing_pos = -1;
  1163   bidi_it->sos = L2R;    /* FIXME: should it be user-selectable? */
  1164   bidi_it->disp_pos = -1;       /* invalid/unknown */
  1165   bidi_it->disp_prop = 0;
  1166   /* We can only shrink the cache if we are at the bottom level of its
  1167      "stack".  */
  1168   if (bidi_cache_start == 0)
  1169     bidi_cache_shrink ();
  1170   else
  1171     bidi_cache_reset ();
  1172 }
  1173 
  1174 /* Perform initializations for reordering a new line of bidi text.  */
  1175 static void
  1176 bidi_line_init (struct bidi_it *bidi_it)
  1177 {
  1178   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
  1179   bidi_it->stack_idx = 0;
  1180   bidi_it->resolved_level = bidi_it->level_stack[0].level;
  1181   bidi_it->level_stack[0].flags = 0; /* NEUTRAL_DIR, false per X1 */
  1182   bidi_it->invalid_levels = 0;
  1183   bidi_it->isolate_level = 0;    /* X1 */
  1184   bidi_it->invalid_isolates = 0; /* X1 */
  1185   /* Setting this to zero will force its recomputation the first time
  1186      we need it for W5.  */
  1187   bidi_it->next_en_pos = 0;
  1188   bidi_it->next_en_type = UNKNOWN_BT;
  1189   bidi_it->next_for_ws.charpos = -1;
  1190   bidi_it->next_for_ws.type = UNKNOWN_BT;
  1191   bidi_it->bracket_pairing_pos = -1;
  1192   bidi_set_sos_type (bidi_it,
  1193                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
  1194                      bidi_it->level_stack[0].level); /* X10 */
  1195 
  1196   bidi_cache_reset ();
  1197 }
  1198 
  1199 
  1200 /***********************************************************************
  1201                         Fetching characters
  1202  ***********************************************************************/
  1203 
  1204 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
  1205    are zero-based character positions in S, BEGBYTE is byte position
  1206    corresponding to BEG.  UNIBYTE means S is a unibyte string.  */
  1207 static ptrdiff_t
  1208 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
  1209                   ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
  1210 {
  1211   ptrdiff_t pos = beg;
  1212   const unsigned char *p = s + begbyte, *start = p;
  1213 
  1214   if (unibyte)
  1215     p = s + end;
  1216   else
  1217     {
  1218       if (!CHAR_HEAD_P (*p))
  1219         emacs_abort ();
  1220 
  1221       while (pos < end)
  1222         {
  1223           p += BYTES_BY_CHAR_HEAD (*p);
  1224           pos++;
  1225         }
  1226     }
  1227 
  1228   return p - start;
  1229 }
  1230 
  1231 /* Fetch and return the character at byte position BYTEPOS.  If S is
  1232    non-NULL, fetch the character from string S; otherwise fetch the
  1233    character from the current buffer.  UNIBYTE means S is a
  1234    unibyte string.  */
  1235 static int
  1236 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
  1237 {
  1238   if (s)
  1239     {
  1240       s += bytepos;
  1241       if (unibyte)
  1242         return *s;
  1243     }
  1244   else
  1245     s = BYTE_POS_ADDR (bytepos);
  1246   return STRING_CHAR (s);
  1247 }
  1248 
  1249 /* Fetch and return the character at CHARPOS/BYTEPOS.  If that
  1250    character is covered by a display string, treat the entire run of
  1251    covered characters as a single character, either u+2029 or u+FFFC,
  1252    and return their combined length in CH_LEN and NCHARS.  DISP_POS
  1253    specifies the character position of the next display string, or -1
  1254    if not yet computed.  When the next character is at or beyond that
  1255    position, the function updates DISP_POS with the position of the
  1256    next display string.  *DISP_PROP non-zero means that there's really
  1257    a display string at DISP_POS, as opposed to when we searched till
  1258    DISP_POS without finding one.  If *DISP_PROP is 2, it means the
  1259    display spec is of the form `(space ...)', which is replaced with
  1260    u+2029 to handle it as a paragraph separator.  STRING->s is the C
  1261    string to iterate, or NULL if iterating over a buffer or a Lisp
  1262    string; in the latter case, STRING->lstring is the Lisp string.  */
  1263 static int
  1264 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
  1265                  int *disp_prop, struct bidi_string_data *string,
  1266                  struct window *w,
  1267                  bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
  1268 {
  1269   int ch;
  1270   ptrdiff_t endpos
  1271     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
  1272   struct text_pos pos;
  1273 
  1274   /* If we got past the last known position of display string, compute
  1275      the position of the next one.  That position could be at CHARPOS.  */
  1276   if (charpos < endpos && charpos > *disp_pos)
  1277     {
  1278       SET_TEXT_POS (pos, charpos, bytepos);
  1279       *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
  1280                                               disp_prop);
  1281       /* The factor of 100 below is a heuristic that needs to be
  1282          tuned.  It means we consider 100 buffer positions examined by
  1283          the above call roughly equivalent to the display engine
  1284          iterating over a single buffer position.  */
  1285       if (max_redisplay_ticks > 0 && *disp_pos > charpos)
  1286         update_redisplay_ticks ((*disp_pos - charpos) / 100 + 1, w);
  1287     }
  1288 
  1289   /* Fetch the character at BYTEPOS.  */
  1290   if (charpos >= endpos)
  1291     {
  1292       ch = BIDI_EOB;
  1293       *ch_len = 1;
  1294       *nchars = 1;
  1295       *disp_pos = endpos;
  1296       *disp_prop = 0;
  1297     }
  1298   else if (charpos >= *disp_pos && *disp_prop)
  1299     {
  1300       ptrdiff_t disp_end_pos;
  1301 
  1302       /* We don't expect to find ourselves in the middle of a display
  1303          property.  Hopefully, it will never be needed.  */
  1304       if (charpos > *disp_pos)
  1305         emacs_abort ();
  1306       /* Text covered by `display' properties and overlays with
  1307          display properties or display strings is handled as a single
  1308          character that represents the entire run of characters
  1309          covered by the display property.  */
  1310       if (*disp_prop == 2)
  1311         {
  1312           /* `(space ...)' display specs are handled as paragraph
  1313              separators for the purposes of the reordering; see UAX#9
  1314              section 3 and clause HL1 in section 4.3 there.  */
  1315           ch = PARAGRAPH_SEPARATOR;
  1316         }
  1317       else
  1318         {
  1319           /* All other display specs are handled as the Unicode Object
  1320              Replacement Character.  */
  1321           ch = OBJECT_REPLACEMENT_CHARACTER;
  1322         }
  1323       disp_end_pos = compute_display_string_end (*disp_pos, string);
  1324       if (disp_end_pos < 0)
  1325         {
  1326           /* Somebody removed the display string from the buffer
  1327              behind our back.  Recover by processing this buffer
  1328              position as if no display property were present there to
  1329              begin with.  */
  1330           *disp_prop = 0;
  1331           goto normal_char;
  1332         }
  1333       *nchars = disp_end_pos - *disp_pos;
  1334       if (*nchars <= 0)
  1335         emacs_abort ();
  1336       if (string->s)
  1337         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
  1338                                     disp_end_pos, string->unibyte);
  1339       else if (STRINGP (string->lstring))
  1340         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
  1341                                     bytepos, disp_end_pos, string->unibyte);
  1342       else
  1343         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
  1344     }
  1345   else
  1346     {
  1347     normal_char:
  1348       if (string->s)
  1349         {
  1350           if (!string->unibyte)
  1351             {
  1352               int len;
  1353               ch = string_char_and_length (string->s + bytepos, &len);
  1354               *ch_len = len;
  1355             }
  1356           else
  1357             {
  1358               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
  1359               *ch_len = 1;
  1360             }
  1361         }
  1362       else if (STRINGP (string->lstring))
  1363         {
  1364           if (!string->unibyte)
  1365             {
  1366               int len;
  1367               ch = string_char_and_length (SDATA (string->lstring) + bytepos,
  1368                                            &len);
  1369               *ch_len = len;
  1370             }
  1371           else
  1372             {
  1373               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
  1374               *ch_len = 1;
  1375             }
  1376         }
  1377       else
  1378         {
  1379           int len;
  1380           ch = string_char_and_length (BYTE_POS_ADDR (bytepos), &len);
  1381           *ch_len = len;
  1382         }
  1383 
  1384       *nchars = 1;
  1385     }
  1386 
  1387   /* If we just entered a run of characters covered by a display
  1388      string, compute the position of the next display string.  */
  1389   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
  1390       && *disp_prop)
  1391     {
  1392       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
  1393       *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
  1394                                               disp_prop);
  1395       if (max_redisplay_ticks > 0 && *disp_pos > charpos + *nchars)
  1396         update_redisplay_ticks ((*disp_pos - charpos - *nchars) / 100 + 1, w);
  1397     }
  1398 
  1399   return ch;
  1400 }
  1401 
  1402 /* Like bidi_fetch_char, but ignore any text between an isolate
  1403    initiator and its matching PDI or, if it has no matching PDI, the
  1404    end of the paragraph.  If isolates were skipped, CH_LEN and NCHARS
  1405    are set to the number of bytes and characters between BYTEPOS/CHARPOS
  1406    and the character that was fetched after skipping the isolates.  */
  1407 static int
  1408 bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
  1409                                ptrdiff_t *disp_pos, int *disp_prop,
  1410                                struct bidi_string_data *string,
  1411                                struct window *w, bool frame_window_p,
  1412                                ptrdiff_t *ch_len, ptrdiff_t *nchars)
  1413 {
  1414   ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
  1415   int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
  1416                             frame_window_p, ch_len, nchars);
  1417   bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
  1418   ptrdiff_t level = 0;
  1419 
  1420   if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
  1421     {
  1422       level++;
  1423       while (level > 0 && ch_type != NEUTRAL_B)
  1424         {
  1425           charpos += *nchars;
  1426           bytepos += *ch_len;
  1427           ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
  1428                                 w, frame_window_p, ch_len, nchars);
  1429           ch_type = bidi_get_type (ch, NEUTRAL_DIR);
  1430           /* A Note to P2 says to ignore max_depth limit.  */
  1431           if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
  1432             level++;
  1433           else if (ch_type == PDI)
  1434             level--;
  1435         }
  1436     }
  1437 
  1438   /* Communicate to the caller how much did we skip, so it could get
  1439      past the last character position we examined.  */
  1440   *nchars += charpos - orig_charpos;
  1441   *ch_len += bytepos - orig_bytepos;
  1442   return ch;
  1443 }
  1444 
  1445 
  1446 
  1447 /***********************************************************************
  1448                         Determining paragraph direction
  1449  ***********************************************************************/
  1450 
  1451 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
  1452    Value is the non-negative length of the paragraph separator
  1453    following the buffer position, -1 if position is at the beginning
  1454    of a new paragraph, or -2 if position is neither at beginning nor
  1455    at end of a paragraph.  */
  1456 static ptrdiff_t
  1457 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
  1458 {
  1459   Lisp_Object sep_re;
  1460   Lisp_Object start_re;
  1461   ptrdiff_t val;
  1462 
  1463   if (STRINGP (BVAR (current_buffer, bidi_paragraph_separate_re)))
  1464     sep_re = BVAR (current_buffer, bidi_paragraph_separate_re);
  1465   else
  1466     sep_re = paragraph_separate_re;
  1467   if (STRINGP (BVAR (current_buffer, bidi_paragraph_start_re)))
  1468     start_re = BVAR (current_buffer, bidi_paragraph_start_re);
  1469   else
  1470     start_re = paragraph_start_re;
  1471 
  1472   /* Prevent quitting inside re_match_2, as redisplay_window could
  1473      have temporarily moved point.  */
  1474   specpdl_ref count = SPECPDL_INDEX ();
  1475   specbind (Qinhibit_quit, Qt);
  1476 
  1477   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
  1478   if (val < 0)
  1479     {
  1480       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
  1481         val = -1;
  1482       else
  1483         val = -2;
  1484     }
  1485 
  1486   unbind_to (count, Qnil);
  1487   return val;
  1488 }
  1489 
  1490 /* If the user has requested the long scans caching, make sure that
  1491    BIDI cache is enabled.  Otherwise, make sure it's disabled.  */
  1492 
  1493 static struct region_cache *
  1494 bidi_paragraph_cache_on_off (void)
  1495 {
  1496   struct buffer *cache_buffer = current_buffer;
  1497   bool indirect_p = false;
  1498 
  1499   /* For indirect buffers, make sure to use the cache of their base
  1500      buffer.  */
  1501   if (cache_buffer->base_buffer)
  1502     {
  1503       cache_buffer = cache_buffer->base_buffer;
  1504       indirect_p = true;
  1505     }
  1506 
  1507   /* Don't turn on or off the cache in the base buffer, if the value
  1508      of cache-long-scans of the base buffer is inconsistent with that.
  1509      This is because doing so will just make the cache pure overhead,
  1510      since if we turn it on via indirect buffer, it will be
  1511      immediately turned off by its base buffer.  */
  1512   if (NILP (BVAR (current_buffer, cache_long_scans)))
  1513     {
  1514       if (!indirect_p
  1515           || NILP (BVAR (cache_buffer, cache_long_scans)))
  1516         {
  1517           if (cache_buffer->bidi_paragraph_cache)
  1518             {
  1519               free_region_cache (cache_buffer->bidi_paragraph_cache);
  1520               cache_buffer->bidi_paragraph_cache = 0;
  1521             }
  1522         }
  1523       return NULL;
  1524     }
  1525   else
  1526     {
  1527       if (!indirect_p
  1528           || !NILP (BVAR (cache_buffer, cache_long_scans)))
  1529         {
  1530           if (!cache_buffer->bidi_paragraph_cache)
  1531             cache_buffer->bidi_paragraph_cache = new_region_cache ();
  1532         }
  1533       return cache_buffer->bidi_paragraph_cache;
  1534     }
  1535 }
  1536 
  1537 /* On my 2005-vintage machine, searching back for paragraph start
  1538    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
  1539    when user types C-p.  The number below limits each call to
  1540    bidi_paragraph_init to about 10 ms.  */
  1541 #define MAX_PARAGRAPH_SEARCH 7500
  1542 
  1543 /* Find the beginning of this paragraph by looking back in the buffer.
  1544    Value is the byte position of the paragraph's beginning, or
  1545    BEGV_BYTE if paragraph_start_re is still not found after looking
  1546    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
  1547 static ptrdiff_t
  1548 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
  1549 {
  1550   Lisp_Object re =
  1551     STRINGP (BVAR (current_buffer, bidi_paragraph_start_re))
  1552     ? BVAR (current_buffer, bidi_paragraph_start_re)
  1553     : paragraph_start_re;
  1554   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
  1555   struct region_cache *bpc = bidi_paragraph_cache_on_off ();
  1556   ptrdiff_t n = 0, oldpos = pos, next;
  1557   struct buffer *cache_buffer = current_buffer;
  1558 
  1559   if (cache_buffer->base_buffer)
  1560     cache_buffer = cache_buffer->base_buffer;
  1561 
  1562   /* Prevent quitting inside re_match_2, as redisplay_window could
  1563      have temporarily moved point.  */
  1564   specpdl_ref count = SPECPDL_INDEX ();
  1565   specbind (Qinhibit_quit, Qt);
  1566 
  1567   while (pos_byte > BEGV_BYTE
  1568          && n++ < MAX_PARAGRAPH_SEARCH
  1569          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
  1570     {
  1571       /* FIXME: What if the paragraph beginning is covered by a
  1572          display string?  And what if a display string covering some
  1573          of the text over which we scan back includes
  1574          paragraph_start_re?  */
  1575       dec_both (&pos, &pos_byte);
  1576       if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
  1577         {
  1578           pos = next, pos_byte = CHAR_TO_BYTE (pos);
  1579           break;
  1580         }
  1581       else
  1582         pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
  1583     }
  1584   unbind_to (count, Qnil);
  1585   if (n >= MAX_PARAGRAPH_SEARCH)
  1586     pos = BEGV, pos_byte = BEGV_BYTE;
  1587   if (bpc)
  1588     know_region_cache (cache_buffer, bpc, pos, oldpos);
  1589   /* Positions returned by the region cache are not limited to
  1590      BEGV..ZV range, so we limit them here.  */
  1591   pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
  1592   return pos_byte;
  1593 }
  1594 
  1595 /* This tracks how far we needed to search for first strong character.  */
  1596 static ptrdiff_t nsearch_for_strong;
  1597 
  1598 /* On a 3.4 GHz machine, searching forward for a strong directional
  1599    character in a long paragraph full of weaks or neutrals takes about
  1600    1 ms for each 20K characters.  The number below limits each call to
  1601    bidi_paragraph_init to less than 10 ms even on slow machines.  */
  1602 #define MAX_STRONG_CHAR_SEARCH 100000
  1603 
  1604 /* Starting from POS, find the first strong (L, R, or AL) character,
  1605    while skipping over any characters between an isolate initiator and
  1606    its matching PDI.  STOP_AT_PDI non-zero means stop at the PDI that
  1607    matches the isolate initiator at POS.  Return the bidi type of the
  1608    character where the search stopped.  Give up if after examining
  1609    MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
  1610    character was found.  */
  1611 static bidi_type_t
  1612 find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
  1613                         ptrdiff_t *disp_pos, int *disp_prop,
  1614                         struct bidi_string_data *string, struct window *w,
  1615                         bool string_p, bool frame_window_p,
  1616                         ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
  1617 {
  1618   ptrdiff_t pos1;
  1619   bidi_type_t type;
  1620   int ch;
  1621 
  1622   if (stop_at_pdi)
  1623     {
  1624       /* If STOP_AT_PDI is non-zero, we must have been called with FSI
  1625          at POS.  Get past it.  */
  1626 #ifdef ENABLE_CHECKING
  1627       ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
  1628                             frame_window_p, ch_len, nchars);
  1629       type = bidi_get_type (ch, NEUTRAL_DIR);
  1630       eassert (type == FSI /* || type == LRI || type == RLI */);
  1631 #endif
  1632       pos += *nchars;
  1633       bytepos += *ch_len;
  1634     }
  1635   ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
  1636                                       w, frame_window_p, ch_len, nchars);
  1637   type = bidi_get_type (ch, NEUTRAL_DIR);
  1638 
  1639   pos1 = pos;
  1640   for (pos += *nchars, bytepos += *ch_len;
  1641        bidi_get_category (type) != STRONG
  1642          /* If requested to stop at first PDI, stop there.  */
  1643          && !(stop_at_pdi && type == PDI)
  1644          /* Stop when searched too far into an abnormally large
  1645             paragraph full of weak or neutral characters.  */
  1646          && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
  1647        type = bidi_get_type (ch, NEUTRAL_DIR))
  1648     {
  1649       if (pos >= end)
  1650         {
  1651           /* Pretend there's a paragraph separator at end of
  1652              buffer/string.  */
  1653           type = NEUTRAL_B;
  1654           break;
  1655         }
  1656       if (!string_p
  1657           && type == NEUTRAL_B
  1658           && bidi_at_paragraph_end (pos, bytepos) >= -1)
  1659         break;
  1660       /* Fetch next character and advance to get past it.  */
  1661       ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
  1662                                           string, w, frame_window_p,
  1663                                           ch_len, nchars);
  1664       pos += *nchars;
  1665       bytepos += *ch_len;
  1666     }
  1667 
  1668   nsearch_for_strong += pos - pos1;
  1669   return type;
  1670 }
  1671 
  1672 /* Determine the base direction, a.k.a. base embedding level, of the
  1673    paragraph we are about to iterate through.  If DIR is either L2R or
  1674    R2L, just use that.  Otherwise, determine the paragraph direction
  1675    from the first strong directional character of the paragraph.
  1676 
  1677    NO_DEFAULT_P means don't default to L2R if the paragraph
  1678    has no strong directional characters and both DIR and
  1679    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
  1680    in the buffer until a paragraph is found with a strong character,
  1681    or until hitting BEGV.  In the latter case, fall back to L2R.  This
  1682    flag is used in current-bidi-paragraph-direction.
  1683 
  1684    Note that this function gives the paragraph separator the same
  1685    direction as the preceding paragraph, even though Emacs generally
  1686    views the separator as not belonging to any paragraph.  */
  1687 void
  1688 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
  1689 {
  1690   ptrdiff_t bytepos = bidi_it->bytepos;
  1691   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
  1692   ptrdiff_t pstartbyte;
  1693   /* Note that begbyte is a byte position, while end is a character
  1694      position.  Yes, this is ugly, but we are trying to avoid costly
  1695      calls to BYTE_TO_CHAR and its ilk.  */
  1696   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
  1697   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
  1698   ptrdiff_t pos = bidi_it->charpos;
  1699 
  1700   nsearch_for_strong = 0;
  1701 
  1702   /* Special case for an empty buffer. */
  1703   if (bytepos == begbyte && bidi_it->charpos == end)
  1704     dir = L2R;
  1705   /* We should never be called at EOB or before BEGV.  */
  1706   else if (bidi_it->charpos >= end || bytepos < begbyte)
  1707     emacs_abort ();
  1708 
  1709   if (dir == L2R)
  1710     {
  1711       bidi_it->paragraph_dir = L2R;
  1712       bidi_it->new_paragraph = 0;
  1713     }
  1714   else if (dir == R2L)
  1715     {
  1716       bidi_it->paragraph_dir = R2L;
  1717       bidi_it->new_paragraph = 0;
  1718     }
  1719   else if (dir == NEUTRAL_DIR)  /* P2 */
  1720     {
  1721       ptrdiff_t ch_len, nchars;
  1722       ptrdiff_t disp_pos = -1;
  1723       int disp_prop = 0;
  1724       bidi_type_t type;
  1725       const unsigned char *s;
  1726 
  1727       if (!bidi_initialized)
  1728         bidi_initialize ();
  1729 
  1730       /* If we are inside a paragraph separator, we are just waiting
  1731          for the separator to be exhausted; use the previous paragraph
  1732          direction.  But don't do that if we have been just reseated,
  1733          because we need to reinitialize below in that case.  */
  1734       if (!bidi_it->first_elt
  1735           && bidi_it->charpos < bidi_it->separator_limit)
  1736         return;
  1737 
  1738       /* If we are on a newline, get past it to where the next
  1739          paragraph might start.  But don't do that at BEGV since then
  1740          we are potentially in a new paragraph that doesn't yet
  1741          exist.  */
  1742       pos = bidi_it->charpos;
  1743       s = (STRINGP (bidi_it->string.lstring)
  1744            ? SDATA (bidi_it->string.lstring)
  1745            : bidi_it->string.s);
  1746       if (bytepos > begbyte
  1747           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
  1748         {
  1749           bytepos++;
  1750           pos++;
  1751         }
  1752 
  1753       /* We are either at the beginning of a paragraph or in the
  1754          middle of it.  Find where this paragraph starts.  */
  1755       if (string_p)
  1756         {
  1757           /* We don't support changes of paragraph direction inside a
  1758              string.  It is treated as a single paragraph.  */
  1759           pstartbyte = 0;
  1760         }
  1761       else
  1762         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
  1763       bidi_it->separator_limit = -1;
  1764       bidi_it->new_paragraph = 0;
  1765 
  1766       /* The following loop is run more than once only if NO_DEFAULT_P,
  1767          and only if we are iterating on a buffer.  */
  1768       do {
  1769         bytepos = pstartbyte;
  1770         if (!string_p)
  1771           pos = BYTE_TO_CHAR (bytepos);
  1772         type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
  1773                                        &bidi_it->string, bidi_it->w,
  1774                                        string_p, bidi_it->frame_window_p,
  1775                                        &ch_len, &nchars, false);
  1776         if (type == STRONG_R || type == STRONG_AL) /* P3 */
  1777           bidi_it->paragraph_dir = R2L;
  1778         else if (type == STRONG_L)
  1779           bidi_it->paragraph_dir = L2R;
  1780         if (!string_p
  1781             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
  1782           {
  1783             /* If this paragraph is at BEGV, default to L2R.  */
  1784             if (pstartbyte == BEGV_BYTE)
  1785               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
  1786             else
  1787               {
  1788                 ptrdiff_t prevpbyte = pstartbyte;
  1789                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
  1790 
  1791                 /* Find the beginning of the previous paragraph, if any.  */
  1792                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
  1793                   {
  1794                     /* FXIME: What if p is covered by a display
  1795                        string?  See also a FIXME inside
  1796                        bidi_find_paragraph_start.  */
  1797                     dec_both (&p, &pbyte);
  1798                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
  1799                   }
  1800                 pstartbyte = prevpbyte;
  1801               }
  1802           }
  1803       } while (!string_p
  1804                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
  1805     }
  1806   else
  1807     emacs_abort ();
  1808 
  1809   /* Contrary to UAX#9 clause P3, we only default the paragraph
  1810      direction to L2R if we have no previous usable paragraph
  1811      direction.  This is allowed by the HL1 clause.  */
  1812   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
  1813     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
  1814   if (bidi_it->paragraph_dir == R2L)
  1815     bidi_it->level_stack[0].level = 1;
  1816   else
  1817     bidi_it->level_stack[0].level = 0;
  1818 
  1819   bidi_line_init (bidi_it);
  1820 
  1821   /* The factor of 50 below is a heuristic that needs to be tuned.  It
  1822      means we consider 50 buffer positions examined by this function
  1823      roughly equivalent to the display engine iterating over a single
  1824      buffer position.  */
  1825   ptrdiff_t nexamined = bidi_it->charpos - pos + nsearch_for_strong;
  1826   if (max_redisplay_ticks > 0 && nexamined > 0)
  1827     update_redisplay_ticks (nexamined / 50, bidi_it->w);
  1828 }
  1829 
  1830 
  1831 /***********************************************************************
  1832                  Resolving explicit and implicit levels.
  1833   The rest of this file constitutes the core of the UBA implementation.
  1834  ***********************************************************************/
  1835 
  1836 static bool
  1837 bidi_explicit_dir_char (int ch)
  1838 {
  1839   bidi_type_t ch_type;
  1840 
  1841   if (!bidi_initialized)
  1842     emacs_abort ();
  1843   if (ch < 0)
  1844     {
  1845       eassert (ch == BIDI_EOB);
  1846       return false;
  1847     }
  1848   ch_type = (bidi_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_type_table, ch));
  1849   return (ch_type == LRE || ch_type == LRO
  1850           || ch_type == RLE || ch_type == RLO
  1851           || ch_type == PDF);
  1852 }
  1853 
  1854 /* Given an iterator state in BIDI_IT, advance one character position
  1855    in the buffer/string to the next character (in the logical order),
  1856    resolve any explicit embeddings, directional overrides, and isolate
  1857    initiators and terminators, and return the embedding level of the
  1858    character after resolving these explicit directives.  */
  1859 static int
  1860 bidi_resolve_explicit (struct bidi_it *bidi_it)
  1861 {
  1862   int curchar;
  1863   bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
  1864   int current_level;
  1865   int new_level;
  1866   bidi_dir_t override;
  1867   bool isolate_status;
  1868   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
  1869   ptrdiff_t ch_len, nchars, disp_pos, end;
  1870   int disp_prop;
  1871   ptrdiff_t eob
  1872     = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  1873        ? bidi_it->string.schars : ZV);
  1874 
  1875   /* Record the info about the previous character.  */
  1876   if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
  1877       && bidi_it->type != WEAK_BN)
  1878     {
  1879       /* This special case is needed in support of Unicode 8.0
  1880          correction to N0, as implemented in bidi_resolve_weak/W1
  1881          below.  */
  1882       if (bidi_it->type_after_wn == NEUTRAL_ON
  1883           && bidi_get_category (bidi_it->type) == STRONG
  1884           && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
  1885         bidi_remember_char (&bidi_it->prev, bidi_it, 1);
  1886       else
  1887         bidi_remember_char (&bidi_it->prev, bidi_it, 0);
  1888     }
  1889   if (bidi_it->type_after_wn == STRONG_R
  1890       || bidi_it->type_after_wn == STRONG_L
  1891       || bidi_it->type_after_wn == STRONG_AL)
  1892     bidi_remember_char (&bidi_it->last_strong, bidi_it, 0);
  1893   if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
  1894       || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
  1895     bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it, 1);
  1896 
  1897   /* If we overstepped the characters used for resolving neutrals
  1898      and whitespace, invalidate their info in the iterator.  */
  1899   if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
  1900     {
  1901       bidi_it->next_for_neutral.type = UNKNOWN_BT;
  1902       /* If needed, reset the "magical" value of pairing bracket
  1903          position, so that bidi_resolve_brackets will resume
  1904          resolution of brackets according to BPA.  */
  1905       if (bidi_it->bracket_pairing_pos == eob)
  1906         bidi_it->bracket_pairing_pos = -1;
  1907     }
  1908   if (bidi_it->next_en_pos >= 0
  1909       && bidi_it->charpos >= bidi_it->next_en_pos)
  1910     {
  1911       bidi_it->next_en_pos = 0;
  1912       bidi_it->next_en_type = UNKNOWN_BT;
  1913     }
  1914 
  1915   /* Reset the bracket resolution info, unless we previously decided
  1916      (in bidi_find_bracket_pairs) that brackets in this level run
  1917      should be resolved as neutrals.  */
  1918   if (bidi_it->bracket_pairing_pos != eob)
  1919     {
  1920       bidi_it->bracket_pairing_pos = -1;
  1921       bidi_it->bracket_enclosed_type = UNKNOWN_BT;
  1922     }
  1923 
  1924   /* If reseat()'ed, don't advance, so as to start iteration from the
  1925      position where we were reseated.  bidi_it->bytepos can be less
  1926      than BEGV_BYTE after reseat to BEGV.  */
  1927   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
  1928       || bidi_it->first_elt)
  1929     {
  1930       bidi_it->first_elt = 0;
  1931       if (string_p)
  1932         {
  1933           const unsigned char *p
  1934             = (STRINGP (bidi_it->string.lstring)
  1935                ? SDATA (bidi_it->string.lstring)
  1936                : bidi_it->string.s);
  1937 
  1938           if (bidi_it->charpos < 0)
  1939             bidi_it->charpos = bidi_it->bytepos = 0;
  1940           eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
  1941                                                          bidi_it->charpos,
  1942                                                          bidi_it->string.unibyte));
  1943         }
  1944       else
  1945         {
  1946           if (bidi_it->charpos < BEGV)
  1947             {
  1948               bidi_it->charpos = BEGV;
  1949               bidi_it->bytepos = BEGV_BYTE;
  1950             }
  1951           eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
  1952         }
  1953       /* Determine the original bidi type of the previous character,
  1954          which is needed for handling isolate initiators and PDF.  The
  1955          type of the previous character will be non-trivial only if
  1956          our caller moved through some previous text in
  1957          get_visually_first_element, in which case bidi_it->prev holds
  1958          the information we want.  */
  1959       if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
  1960         {
  1961           eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
  1962           prev_type = bidi_it->prev.orig_type;
  1963         }
  1964     }
  1965   /* Don't move at end of buffer/string.  */
  1966   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
  1967     {
  1968       /* Advance to the next character, skipping characters covered by
  1969          display strings (nchars > 1).  */
  1970       if (bidi_it->nchars <= 0)
  1971         emacs_abort ();
  1972       bidi_it->charpos += bidi_it->nchars;
  1973       if (bidi_it->ch_len == 0)
  1974         emacs_abort ();
  1975       bidi_it->bytepos += bidi_it->ch_len;
  1976       prev_type = bidi_it->orig_type;
  1977     }
  1978   else  /* EOB or end of string */
  1979     prev_type = NEUTRAL_B;
  1980 
  1981   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
  1982   isolate_status = ISOLATE_STATUS (bidi_it, bidi_it->stack_idx);
  1983   override = OVERRIDE (bidi_it, bidi_it->stack_idx);
  1984   new_level = current_level;
  1985 
  1986   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
  1987     {
  1988       curchar = BIDI_EOB;
  1989       bidi_it->ch_len = 1;
  1990       bidi_it->nchars = 1;
  1991       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
  1992       bidi_it->disp_prop = 0;
  1993     }
  1994   else
  1995     {
  1996       /* LRI, RLI, and FSI increment, and PDF decrements, the
  1997          embedding level of the _following_ characters, so we must
  1998          first look at the type of the previous character to support
  1999          that.  */
  2000       switch (prev_type)
  2001         {
  2002         case RLI:       /* X5a */
  2003           if (current_level < BIDI_MAXDEPTH
  2004               && bidi_it->invalid_levels == 0
  2005               && bidi_it->invalid_isolates == 0)
  2006             {
  2007               new_level = ((current_level + 1) & ~1) + 1;
  2008               bidi_it->isolate_level++;
  2009               bidi_push_embedding_level (bidi_it, new_level,
  2010                                          NEUTRAL_DIR, true);
  2011             }
  2012           else
  2013             bidi_it->invalid_isolates++;
  2014           break;
  2015         case LRI:       /* X5b */
  2016           if (current_level < BIDI_MAXDEPTH - 1
  2017               && bidi_it->invalid_levels == 0
  2018               && bidi_it->invalid_isolates == 0)
  2019             {
  2020               new_level = ((current_level + 2) & ~1);
  2021               bidi_it->isolate_level++;
  2022               bidi_push_embedding_level (bidi_it, new_level,
  2023                                          NEUTRAL_DIR, true);
  2024             }
  2025           else
  2026             bidi_it->invalid_isolates++;
  2027           break;
  2028         case PDF:       /* X7 */
  2029           if (!bidi_it->invalid_isolates)
  2030             {
  2031               if (bidi_it->invalid_levels)
  2032                 bidi_it->invalid_levels--;
  2033               else if (!isolate_status && bidi_it->stack_idx >= 1)
  2034                 new_level = bidi_pop_embedding_level (bidi_it);
  2035             }
  2036           break;
  2037         default:
  2038           eassert (prev_type != FSI);
  2039           /* Nothing.  */
  2040           break;
  2041         }
  2042       /* Fetch the character at BYTEPOS.  If it is covered by a
  2043          display string, treat the entire run of covered characters as
  2044          a single character u+FFFC.  */
  2045       curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
  2046                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
  2047                                  &bidi_it->string, bidi_it->w,
  2048                                  bidi_it->frame_window_p,
  2049                                  &bidi_it->ch_len, &bidi_it->nchars);
  2050     }
  2051   bidi_it->ch = curchar;
  2052   bidi_it->resolved_level = new_level;
  2053 
  2054   /* Don't apply directional override here, as all the types we handle
  2055      below will not be affected by the override anyway, and we need
  2056      the original type unaltered.  The override will be applied in
  2057      bidi_resolve_weak.  */
  2058   type = bidi_get_type (curchar, NEUTRAL_DIR);
  2059   bidi_it->orig_type = type;
  2060   bidi_check_type (bidi_it->orig_type);
  2061 
  2062   bidi_it->type_after_wn = UNKNOWN_BT;
  2063 
  2064   switch (type)
  2065     {
  2066     case RLE:   /* X2 */
  2067     case RLO:   /* X4 */
  2068       bidi_it->type_after_wn = type;
  2069       bidi_check_type (bidi_it->type_after_wn);
  2070       type = WEAK_BN; /* X9/Retaining */
  2071       if (new_level < BIDI_MAXDEPTH
  2072           && bidi_it->invalid_levels == 0
  2073           && bidi_it->invalid_isolates == 0)
  2074         {
  2075           /* Compute the least odd embedding level greater than
  2076              the current level.  */
  2077           new_level = ((new_level + 1) & ~1) + 1;
  2078           if (bidi_it->type_after_wn == RLE)
  2079             override = NEUTRAL_DIR;
  2080           else
  2081             override = R2L;
  2082           bidi_push_embedding_level (bidi_it, new_level, override, false);
  2083           bidi_it->resolved_level = new_level;
  2084         }
  2085       else
  2086         {
  2087           if (bidi_it->invalid_isolates == 0)
  2088             bidi_it->invalid_levels++;
  2089         }
  2090       break;
  2091     case LRE:   /* X3 */
  2092     case LRO:   /* X5 */
  2093       bidi_it->type_after_wn = type;
  2094       bidi_check_type (bidi_it->type_after_wn);
  2095       type = WEAK_BN; /* X9/Retaining */
  2096       if (new_level < BIDI_MAXDEPTH - 1
  2097           && bidi_it->invalid_levels == 0
  2098           && bidi_it->invalid_isolates == 0)
  2099         {
  2100           /* Compute the least even embedding level greater than
  2101              the current level.  */
  2102           new_level = ((new_level + 2) & ~1);
  2103           if (bidi_it->type_after_wn == LRE)
  2104             override = NEUTRAL_DIR;
  2105           else
  2106             override = L2R;
  2107           bidi_push_embedding_level (bidi_it, new_level, override, false);
  2108           bidi_it->resolved_level = new_level;
  2109         }
  2110       else
  2111         {
  2112           if (bidi_it->invalid_isolates == 0)
  2113             bidi_it->invalid_levels++;
  2114         }
  2115       break;
  2116     case FSI:   /* X5c */
  2117       end = string_p ? bidi_it->string.schars : ZV;
  2118       disp_pos = bidi_it->disp_pos;
  2119       disp_prop = bidi_it->disp_prop;
  2120       nchars = bidi_it->nchars;
  2121       ch_len = bidi_it->ch_len;
  2122       typ1 = find_first_strong_char (bidi_it->charpos,
  2123                                      bidi_it->bytepos, end,
  2124                                      &disp_pos, &disp_prop,
  2125                                      &bidi_it->string, bidi_it->w,
  2126                                      string_p, bidi_it->frame_window_p,
  2127                                      &ch_len, &nchars, true);
  2128       if (typ1 != STRONG_R && typ1 != STRONG_AL)
  2129         {
  2130           type = LRI;
  2131           /* Override orig_type, which will be needed when we come to
  2132              examine the next character, which is the first character
  2133              inside the isolate.  */
  2134           bidi_it->orig_type = type;
  2135           goto fsi_as_lri;
  2136         }
  2137       else
  2138         {
  2139           type = RLI;
  2140           bidi_it->orig_type = type;
  2141         }
  2142       FALLTHROUGH;
  2143     case RLI:   /* X5a */
  2144       if (override == NEUTRAL_DIR)
  2145         bidi_it->type_after_wn = type;
  2146       else      /* Unicode 8.0 correction.  */
  2147         bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
  2148       bidi_check_type (bidi_it->type_after_wn);
  2149       break;
  2150     case LRI:   /* X5b */
  2151     fsi_as_lri:
  2152       if (override == NEUTRAL_DIR)
  2153         bidi_it->type_after_wn = type;
  2154       else      /* Unicode 8.0 correction.  */
  2155         bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
  2156       bidi_check_type (bidi_it->type_after_wn);
  2157       break;
  2158     case PDI:   /* X6a */
  2159       if (bidi_it->invalid_isolates)
  2160         bidi_it->invalid_isolates--;
  2161       else if (bidi_it->isolate_level > 0)
  2162         {
  2163           bidi_it->invalid_levels = 0;
  2164           while (!ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
  2165             bidi_pop_embedding_level (bidi_it);
  2166           eassert (bidi_it->stack_idx > 0);
  2167           new_level = bidi_pop_embedding_level (bidi_it);
  2168           bidi_it->isolate_level--;
  2169         }
  2170       bidi_it->resolved_level = new_level;
  2171       /* Unicode 8.0 correction.  */
  2172       {
  2173         bidi_dir_t stack_override = OVERRIDE (bidi_it, bidi_it->stack_idx);
  2174         if (stack_override == L2R)
  2175           bidi_it->type_after_wn = STRONG_L;
  2176         else if (stack_override == R2L)
  2177           bidi_it->type_after_wn = STRONG_R;
  2178         else
  2179           bidi_it->type_after_wn = type;
  2180       }
  2181       break;
  2182     case PDF:   /* X7 */
  2183       bidi_it->type_after_wn = type;
  2184       bidi_check_type (bidi_it->type_after_wn);
  2185       type = WEAK_BN; /* X9/Retaining */
  2186       break;
  2187     default:
  2188       /* Nothing.  */
  2189       break;
  2190     }
  2191 
  2192   bidi_it->type = type;
  2193   bidi_check_type (bidi_it->type);
  2194 
  2195   if (bidi_it->type == NEUTRAL_B)       /* X8 */
  2196     {
  2197       bidi_set_paragraph_end (bidi_it);
  2198       /* This is needed by bidi_resolve_weak below, and in L1.  */
  2199       bidi_it->type_after_wn = bidi_it->type;
  2200     }
  2201 
  2202   eassert (bidi_it->resolved_level >= 0);
  2203   return bidi_it->resolved_level;
  2204 }
  2205 
  2206 /* Advance in the buffer/string, resolve weak types and return the
  2207    type of the next character after weak type resolution.  */
  2208 static bidi_type_t
  2209 bidi_resolve_weak (struct bidi_it *bidi_it)
  2210 {
  2211   bidi_type_t type;
  2212   bidi_dir_t override;
  2213   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  2214   int new_level  = bidi_resolve_explicit (bidi_it);
  2215   int next_char;
  2216   bidi_type_t type_of_next;
  2217   struct bidi_it saved_it;
  2218   ptrdiff_t eob
  2219     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
  2220        ? bidi_it->string.schars : ZV);
  2221 
  2222   type = bidi_it->type;
  2223   override = OVERRIDE (bidi_it, bidi_it->stack_idx);
  2224 
  2225   eassert (!(type == UNKNOWN_BT
  2226              || type == LRE
  2227              || type == LRO
  2228              || type == RLE
  2229              || type == RLO
  2230              || type == PDF));
  2231 
  2232   eassert (prev_level >= 0);
  2233   if (bidi_it->type == NEUTRAL_B)
  2234     {
  2235       /* We've got a new isolating sequence, compute the directional
  2236          type of sos and initialize per-run variables (UAX#9, clause
  2237          X10).  */
  2238       bidi_set_sos_type (bidi_it, prev_level, new_level);
  2239     }
  2240   if (type == NEUTRAL_S || type == NEUTRAL_WS
  2241       || type == WEAK_BN || type == STRONG_AL)
  2242     bidi_it->type_after_wn = type;      /* needed in L1 */
  2243   bidi_check_type (bidi_it->type_after_wn);
  2244 
  2245   /* Level and directional override status are already recorded in
  2246      bidi_it, and do not need any change; see X6.  */
  2247   if (override == R2L)          /* X6 */
  2248     type = STRONG_R;
  2249   else if (override == L2R)
  2250     type = STRONG_L;
  2251   else
  2252     {
  2253       if (type == WEAK_NSM)     /* W1 */
  2254         {
  2255           /* Note that we don't need to consider the case where the
  2256              prev character has its type overridden by an RLO or LRO,
  2257              because then either the type of this NSM would have been
  2258              also overridden, or the previous character is outside the
  2259              current level run, and thus not relevant to this NSM.
  2260              This is why NSM gets the type_after_wn of the previous
  2261              character.  */
  2262           /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT.  */
  2263           if (bidi_it->prev.type != UNKNOWN_BT
  2264               /* If type_after_wn is NEUTRAL_B, this NSM is at sos.  */
  2265               && bidi_it->prev.type != NEUTRAL_B)
  2266             {
  2267               if (bidi_isolate_fmt_char (bidi_it->prev.type))
  2268                 {
  2269                   /* From W1: "Note that in an isolating run sequence,
  2270                      an isolate initiator followed by an NSM or any
  2271                      type other than PDI must be an overflow isolate
  2272                      initiator."  */
  2273                   eassert (bidi_it->invalid_isolates > 0);
  2274                   type = NEUTRAL_ON;
  2275                 }
  2276               else
  2277                 {
  2278                   /* This includes the Unicode 8.0 correction for N0,
  2279                      due to how we set prev.type in bidi_resolve_explicit,
  2280                      which see.  */
  2281                   type = bidi_it->prev.type;
  2282                 }
  2283             }
  2284           else if (bidi_it->sos == R2L)
  2285             type = STRONG_R;
  2286           else if (bidi_it->sos == L2R)
  2287             type = STRONG_L;
  2288           else /* shouldn't happen! */
  2289             emacs_abort ();
  2290         }
  2291       if (type == WEAK_EN       /* W2 */
  2292           && bidi_it->last_strong.type == STRONG_AL)
  2293         type = WEAK_AN;
  2294       else if (type == STRONG_AL) /* W3 */
  2295         type = STRONG_R;
  2296       else if ((type == WEAK_ES /* W4 */
  2297                 && bidi_it->prev.type == WEAK_EN
  2298                 && bidi_it->prev.orig_type == WEAK_EN)
  2299                || (type == WEAK_CS
  2300                    && ((bidi_it->prev.type == WEAK_EN
  2301                         && bidi_it->prev.orig_type == WEAK_EN)
  2302                        || bidi_it->prev.type == WEAK_AN)))
  2303         {
  2304           const unsigned char *s
  2305             = (STRINGP (bidi_it->string.lstring)
  2306                ? SDATA (bidi_it->string.lstring)
  2307                : bidi_it->string.s);
  2308 
  2309           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
  2310                        ? BIDI_EOB
  2311                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
  2312                                            s, bidi_it->string.unibyte));
  2313           type_of_next = bidi_get_type (next_char, override);
  2314 
  2315           if (type_of_next == WEAK_BN
  2316               || bidi_explicit_dir_char (next_char))
  2317             {
  2318               bidi_copy_it (&saved_it, bidi_it);
  2319               while (bidi_resolve_explicit (bidi_it) == new_level
  2320                      && bidi_it->type == WEAK_BN)
  2321                 type_of_next = bidi_it->type;
  2322               bidi_copy_it (bidi_it, &saved_it);
  2323             }
  2324 
  2325           /* If the next character is EN, but the last strong-type
  2326              character is AL, that next EN will be changed to AN when
  2327              we process it in W2 above.  So in that case, this ES
  2328              should not be changed into EN.  */
  2329           if (type == WEAK_ES
  2330               && type_of_next == WEAK_EN
  2331               && bidi_it->last_strong.type != STRONG_AL)
  2332             type = WEAK_EN;
  2333           else if (type == WEAK_CS)
  2334             {
  2335               if (bidi_it->prev.type == WEAK_AN
  2336                   && (type_of_next == WEAK_AN
  2337                       /* If the next character is EN, but the last
  2338                          strong-type character is AL, EN will be later
  2339                          changed to AN when we process it in W2 above.
  2340                          So in that case, this ES should not be
  2341                          changed into EN.  */
  2342                       || (type_of_next == WEAK_EN
  2343                           && bidi_it->last_strong.type == STRONG_AL)))
  2344                 type = WEAK_AN;
  2345               else if (bidi_it->prev.type == WEAK_EN
  2346                        && type_of_next == WEAK_EN
  2347                        && bidi_it->last_strong.type != STRONG_AL)
  2348                 type = WEAK_EN;
  2349             }
  2350         }
  2351       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
  2352                || type == WEAK_BN)      /* W5/Retaining */
  2353         {
  2354           if (bidi_it->prev.type == WEAK_EN) /* ET/BN w/EN before it */
  2355             type = WEAK_EN;
  2356           else if (bidi_it->next_en_pos > bidi_it->charpos
  2357                    && bidi_it->next_en_type != WEAK_BN)
  2358             {
  2359               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
  2360                 type = WEAK_EN;
  2361             }
  2362           else if (type == WEAK_BN
  2363                    /* This condition is for the following important case:
  2364 
  2365                       . we are at level zero
  2366                       . either previous strong character was L,
  2367                          or we've seen no strong characters since sos
  2368                          and the base paragraph direction is L2R
  2369                       . this BN is NOT a bidi directional control
  2370 
  2371                       For such a situation, either this BN will be
  2372                       converted to EN per W5, and then to L by virtue
  2373                       of W7; or it will become ON per W6, and then L
  2374                       because of N1/N2.  So we take a shortcut here
  2375                       and make it L right away, to avoid the
  2376                       potentially costly loop below.  This is
  2377                       important when the buffer has a long series of
  2378                       control characters, like binary nulls, and no
  2379                       R2L characters at all.  */
  2380                    && new_level == 0
  2381                    && !bidi_explicit_dir_char (bidi_it->ch)
  2382                    && ((bidi_it->last_strong.type == STRONG_L)
  2383                        || (bidi_it->last_strong.type == UNKNOWN_BT
  2384                            && bidi_it->sos == L2R)))
  2385             type = STRONG_L;
  2386           else if (bidi_it->next_en_pos >= 0)
  2387             {
  2388               /* We overstepped the last known position for ET
  2389                  resolution but there could be other such characters
  2390                  in this paragraph (when we are sure there are no more
  2391                  such positions, we set next_en_pos to a negative
  2392                  value).  Try to find the next position for ET
  2393                  resolution.  */
  2394               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
  2395               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
  2396                                         ? SDATA (bidi_it->string.lstring)
  2397                                         : bidi_it->string.s);
  2398 
  2399               if (bidi_it->nchars <= 0)
  2400                 emacs_abort ();
  2401               next_char
  2402                 = (bidi_it->charpos + bidi_it->nchars >= eob
  2403                    ? BIDI_EOB
  2404                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
  2405                                        bidi_it->string.unibyte));
  2406               type_of_next = bidi_get_type (next_char, override);
  2407 
  2408               if (type_of_next == WEAK_ET
  2409                   || type_of_next == WEAK_BN
  2410                   || bidi_explicit_dir_char (next_char))
  2411                 {
  2412                   bidi_copy_it (&saved_it, bidi_it);
  2413                   while (bidi_resolve_explicit (bidi_it) == new_level
  2414                          && (bidi_it->type == WEAK_BN
  2415                              || bidi_it->type == WEAK_ET))
  2416                     type_of_next = bidi_it->type;
  2417                   if (type == WEAK_BN
  2418                       && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
  2419                     {
  2420                       /* If we entered the above loop with a BN that
  2421                          changes the level, the type of next
  2422                          character, which is in a different level, is
  2423                          not relevant to resolving this series of ET
  2424                          and BN.  */
  2425                       en_pos = saved_it.charpos;
  2426                       type_of_next = type;
  2427                     }
  2428                   else
  2429                     en_pos = bidi_it->charpos;
  2430                   bidi_copy_it (bidi_it, &saved_it);
  2431                 }
  2432               /* Remember this position, to speed up processing of the
  2433                  next ETs.  */
  2434               bidi_it->next_en_pos = en_pos;
  2435               if (type_of_next == WEAK_EN)
  2436                 {
  2437                   /* If the last strong character is AL, the EN we've
  2438                      found will become AN when we get to it (W2). */
  2439                   if (bidi_it->last_strong.type == STRONG_AL)
  2440                     type_of_next = WEAK_AN;
  2441                   else if (type == WEAK_BN)
  2442                     type = NEUTRAL_ON; /* W6/Retaining */
  2443                   else
  2444                     type = WEAK_EN;
  2445                 }
  2446               else if (type_of_next == NEUTRAL_B)
  2447                 /* Record the fact that there are no more ENs from
  2448                    here to the end of paragraph, to avoid entering the
  2449                    loop above ever again in this paragraph.  */
  2450                 bidi_it->next_en_pos = -1;
  2451               /* Record the type of the character where we ended our search.  */
  2452               bidi_it->next_en_type = type_of_next;
  2453             }
  2454         }
  2455     }
  2456 
  2457   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
  2458       || (type == WEAK_BN
  2459           && (bidi_it->prev.type == WEAK_CS         /* W6/Retaining */
  2460               || bidi_it->prev.type == WEAK_ES
  2461               || bidi_it->prev.type == WEAK_ET)))
  2462     type = NEUTRAL_ON;
  2463 
  2464   /* Store the type we've got so far, before we clobber it with strong
  2465      types in W7 and while resolving neutral types.  But leave alone
  2466      the original types that were recorded above, because we will need
  2467      them for the L1 clause.  */
  2468   if (bidi_it->type_after_wn == UNKNOWN_BT)
  2469     bidi_it->type_after_wn = type;
  2470   bidi_check_type (bidi_it->type_after_wn);
  2471 
  2472   if (type == WEAK_EN)  /* W7 */
  2473     {
  2474       if ((bidi_it->last_strong.type == STRONG_L)
  2475           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
  2476         type = STRONG_L;
  2477     }
  2478 
  2479   bidi_it->type = type;
  2480   bidi_check_type (bidi_it->type);
  2481   return type;
  2482 }
  2483 
  2484 /* Resolve the type of a neutral character according to the type of
  2485    surrounding strong text and the current embedding level.  */
  2486 static bidi_type_t
  2487 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
  2488 {
  2489   /* N1: "European and Arabic numbers act as if they were R in terms
  2490      of their influence on NIs."  */
  2491   if (next_type == WEAK_EN || next_type == WEAK_AN)
  2492     next_type = STRONG_R;
  2493   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
  2494     prev_type = STRONG_R;
  2495 
  2496   if (next_type == prev_type)   /* N1 */
  2497     return next_type;
  2498   else if ((lev & 1) == 0)      /* N2 */
  2499     return STRONG_L;
  2500   else
  2501     return STRONG_R;
  2502 }
  2503 
  2504 #define FLAG_EMBEDDING_INSIDE  1
  2505 #define FLAG_OPPOSITE_INSIDE   2
  2506 
  2507 /* A data type used in the stack maintained by
  2508    bidi_find_bracket_pairs below.  */
  2509 typedef struct bpa_stack_entry {
  2510   int close_bracket_char;
  2511   int open_bracket_idx;
  2512 #ifdef ENABLE_CHECKING
  2513   ptrdiff_t open_bracket_pos;
  2514 #endif
  2515   unsigned flags : 2;
  2516 } bpa_stack_entry;
  2517 
  2518 /* Allow for the two struct bidi_it objects too, since they can be big.
  2519    With MAX_ALLOCA of 16 KiB, this should allow at least 900 slots in the
  2520    BPA stack, which should be more than enough for actual bidi text.  */
  2521 enum { MAX_BPA_STACK = max (1, ((MAX_ALLOCA - 2 * sizeof (struct bidi_it))
  2522                                 / sizeof (bpa_stack_entry))) };
  2523 
  2524 /* UAX#9 says to match opening brackets with the matching closing
  2525    brackets or their canonical equivalents.  As of Unicode 8.0, there
  2526    are only 2 bracket characters that have canonical equivalence
  2527    decompositions: u+2329 and u+232A.  So instead of accessing the
  2528    table in uni-decomposition.el, we just handle these 2 characters
  2529    with this simple macro.  Note that ASCII characters don't have
  2530    canonical equivalents by definition.  */
  2531 
  2532 /* To find all the characters that need to be processed by
  2533    CANONICAL_EQU, first find all the characters which have
  2534    decompositions in UnicodeData.txt, with this Awk script:
  2535 
  2536     awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt
  2537 
  2538    Then produce a list of all the bracket characters in BidiBrackets.txt:
  2539 
  2540     awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt
  2541 
  2542    And finally, cross-reference these two:
  2543 
  2544     grep -Fw -f brackets.txt decompositions.txt
  2545 
  2546    where "decompositions.txt" was produced by the 1st script, and
  2547    "brackets.txt" by the 2nd script.  In the output of grep, look
  2548    only for decompositions that don't begin with some compatibility
  2549    formatting tag, such as "<compat>".  Only decompositions that
  2550    consist solely of character codepoints are relevant to bidi
  2551    brackets processing.  */
  2552 
  2553 #define CANONICAL_EQU(c)                                        \
  2554   ( ASCII_CHAR_P (c) ? c                                        \
  2555     : (c) == LEFT_POINTING_ANGLE_BRACKET ? LEFT_ANGLE_BRACKET   \
  2556     : (c) == RIGHT_POINTING_ANGLE_BRACKET ? RIGHT_ANGLE_BRACKET \
  2557     : c )
  2558 
  2559 #ifdef ENABLE_CHECKING
  2560 # define STORE_BRACKET_CHARPOS \
  2561    bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
  2562 #else
  2563 # define STORE_BRACKET_CHARPOS  /* nothing */
  2564 #endif
  2565 
  2566 #define PUSH_BPA_STACK                                                  \
  2567   do {                                                                  \
  2568     int ch;                                                             \
  2569     if (bpa_sp < MAX_BPA_STACK - 1 && bidi_cache_last_idx <= INT_MAX)   \
  2570       {                                                                 \
  2571         bpa_sp++;                                                       \
  2572         ch = CANONICAL_EQU (bidi_it->ch);                               \
  2573         bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch);   \
  2574         bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx;       \
  2575         bpa_stack[bpa_sp].flags = 0;                                    \
  2576         STORE_BRACKET_CHARPOS;                                          \
  2577       }                                                                 \
  2578   } while (0)
  2579 
  2580 
  2581 /* This function implements BPA, the Bidi Parenthesis Algorithm,
  2582    described in BD16 and N0 of UAX#9.  It finds all the bracket pairs
  2583    in the current isolating sequence, and records the enclosed type
  2584    and the position of the matching bracket in the cache.  It returns
  2585    non-zero if called with the iterator on the opening bracket which
  2586    has a matching closing bracket in the current isolating sequence,
  2587    zero otherwise.  */
  2588 static bool
  2589 bidi_find_bracket_pairs (struct bidi_it *bidi_it)
  2590 {
  2591   bidi_bracket_type_t btype;
  2592   bidi_type_t type = bidi_it->type;
  2593   bool retval = false;
  2594   ptrdiff_t n = 0;
  2595 
  2596   /* When scanning backwards, we don't expect any unresolved bidi
  2597      bracket characters.  */
  2598   if (bidi_it->scan_dir != 1)
  2599     emacs_abort ();
  2600 
  2601   btype = bidi_paired_bracket_type (bidi_it->ch);
  2602   if (btype == BIDI_BRACKET_OPEN)
  2603     {
  2604       bpa_stack_entry bpa_stack[MAX_BPA_STACK];
  2605       int bpa_sp = -1;
  2606       struct bidi_it saved_it;
  2607       int base_level = bidi_it->level_stack[0].level;
  2608       int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  2609       int maxlevel = embedding_level;
  2610       bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
  2611       struct bidi_it tem_it;
  2612       bool l2r_seen = false, r2l_seen = false;
  2613       ptrdiff_t pairing_pos;
  2614       int idx_at_entry = bidi_cache_idx;
  2615 
  2616       verify (MAX_BPA_STACK >= 100);
  2617       bidi_copy_it (&saved_it, bidi_it);
  2618       /* bidi_cache_iterator_state refuses to cache on backward scans,
  2619          and bidi_cache_fetch_state doesn't bring scan_dir from the
  2620          cache, so we must initialize this explicitly.  */
  2621       tem_it.scan_dir = 1;
  2622 
  2623       while (1)
  2624         {
  2625           int old_sidx, new_sidx;
  2626           int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  2627 
  2628           if (maxlevel < current_level)
  2629             maxlevel = current_level;
  2630           /* Mark every opening bracket character we've traversed by
  2631              putting its own position into bracket_pairing_pos.  This
  2632              is examined in bidi_resolve_brackets to distinguish
  2633              brackets that were already resolved to stay NEUTRAL_ON,
  2634              and those that were not yet processed by this function
  2635              (because they were skipped when we skip higher embedding
  2636              levels below).  */
  2637           if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1)
  2638             bidi_it->bracket_pairing_pos = bidi_it->charpos;
  2639           if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0))
  2640             {
  2641               /* No more space in cache -- give up and let the opening
  2642                  bracket that started this be processed as a
  2643                  NEUTRAL_ON.  */
  2644               bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
  2645               bidi_copy_it (bidi_it, &saved_it);
  2646               goto give_up;
  2647             }
  2648           if (btype == BIDI_BRACKET_OPEN)
  2649             PUSH_BPA_STACK;
  2650           else if (btype == BIDI_BRACKET_CLOSE)
  2651             {
  2652               int sp = bpa_sp;
  2653               int curchar = CANONICAL_EQU (bidi_it->ch);
  2654 
  2655               eassert (sp >= 0);
  2656               while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
  2657                 sp--;
  2658               if (sp >= 0)
  2659                 {
  2660                   /* Update and cache the corresponding opening bracket.  */
  2661                   bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
  2662                                           &tem_it);
  2663 #ifdef ENABLE_CHECKING
  2664                   eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
  2665 #endif
  2666                   /* Determine the enclosed type for this bracket
  2667                      pair's type resolution according to N0.  */
  2668                   if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
  2669                     tem_it.bracket_enclosed_type = embedding_type; /* N0b */
  2670                   else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
  2671                     tem_it.bracket_enclosed_type                   /* N0c */
  2672                       = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
  2673                   else                                             /* N0d */
  2674                     tem_it.bracket_enclosed_type = UNKNOWN_BT;
  2675 
  2676                   /* Record the position of the matching closing
  2677                      bracket, and update the cache.  */
  2678                   tem_it.bracket_pairing_pos = bidi_it->charpos;
  2679                   bidi_cache_iterator_state (&tem_it, 0, 1);
  2680 
  2681                   /* Pop the BPA stack.  */
  2682                   bpa_sp = sp - 1;
  2683                 }
  2684               if (bpa_sp < 0)
  2685                 {
  2686                   retval = true;
  2687                   break;
  2688                 }
  2689             }
  2690           else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
  2691             {
  2692               unsigned flag = 0;
  2693               int sp;
  2694 
  2695               /* Whenever we see a strong type, update the flags of
  2696                  all the slots on the stack.  */
  2697               switch (bidi_it->type)
  2698                 {
  2699                 case STRONG_L:
  2700                   flag = ((embedding_level & 1) == 0
  2701                           ? FLAG_EMBEDDING_INSIDE
  2702                           : FLAG_OPPOSITE_INSIDE);
  2703                   l2r_seen = true;
  2704                   break;
  2705                 case STRONG_R:
  2706                 case WEAK_EN:
  2707                 case WEAK_AN:
  2708                   flag = ((embedding_level & 1) == 1
  2709                           ? FLAG_EMBEDDING_INSIDE
  2710                           : FLAG_OPPOSITE_INSIDE);
  2711                   r2l_seen = true;
  2712                   break;
  2713                 default:
  2714                   break;
  2715                 }
  2716               if (flag)
  2717                 {
  2718                   for (sp = bpa_sp; sp >= 0; sp--)
  2719                     bpa_stack[sp].flags |= flag;
  2720                 }
  2721             }
  2722           old_sidx = bidi_it->stack_idx;
  2723           type = bidi_resolve_weak (bidi_it);
  2724           n++;
  2725           /* Skip level runs excluded from this isolating run sequence.  */
  2726           new_sidx = bidi_it->stack_idx;
  2727           if (bidi_it->level_stack[new_sidx].level > current_level
  2728               && (ISOLATE_STATUS (bidi_it, new_sidx)
  2729                   || (new_sidx > old_sidx + 1
  2730                       && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
  2731             {
  2732               while (bidi_it->level_stack[bidi_it->stack_idx].level
  2733                      > current_level)
  2734                 {
  2735                   if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level)
  2736                     maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level;
  2737                   if (!bidi_cache_iterator_state (bidi_it,
  2738                                                   type == NEUTRAL_B, 0))
  2739                     {
  2740                       /* No more space in cache -- give up and let the
  2741                          opening bracket that started this be
  2742                          processed as any other NEUTRAL_ON.  */
  2743                       bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
  2744                       bidi_copy_it (bidi_it, &saved_it);
  2745                       goto give_up;
  2746                     }
  2747                   type = bidi_resolve_weak (bidi_it);
  2748                   n++;
  2749                 }
  2750             }
  2751           if (type == NEUTRAL_B
  2752               || (bidi_it->level_stack[bidi_it->stack_idx].level
  2753                   != current_level))
  2754             {
  2755               /* We've marched all the way to the end of this
  2756                  isolating run sequence, and didn't find matching
  2757                  closing brackets for some opening brackets.  Leave
  2758                  their type unchanged.  */
  2759               pairing_pos = bidi_it->charpos;
  2760               break;
  2761             }
  2762           if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
  2763             btype = bidi_paired_bracket_type (bidi_it->ch);
  2764           else
  2765             btype = BIDI_BRACKET_NONE;
  2766         }
  2767 
  2768       /* Restore bidi_it from the cache, which should have the bracket
  2769          resolution members set as determined by the above loop.  */
  2770       type = bidi_cache_find (saved_it.charpos, 0, bidi_it);
  2771       eassert (type == NEUTRAL_ON);
  2772 
  2773       /* The following is an optimization for bracketed text that has
  2774          only one level which is equal to the paragraph's base
  2775          embedding level.  That is, only L2R and weak/neutral
  2776          characters in a L2R paragraph, or only R2L and weak/neutral
  2777          characters in a R2L paragraph.  Such brackets can be resolved
  2778          by bidi_resolve_neutral, which has a further shortcut for
  2779          this case.  So we pretend we did not resolve the brackets in
  2780          this case, set up next_for_neutral for the entire bracketed
  2781          text, and reset the cache to the character before the opening
  2782          bracket.  The upshot is to allow bidi_move_to_visually_next
  2783          reset the cache when it returns this opening bracket, thus
  2784          cutting significantly on the size of the cache, which is
  2785          important with long lines, especially if word-wrap is non-nil
  2786          (which requires the display engine to copy the cache back and
  2787          forth many times).  */
  2788       if (maxlevel == base_level
  2789           && (l2r_seen || r2l_seen) /* N0d */
  2790           && ((base_level == 0 && !r2l_seen)
  2791               || (base_level == 1 && !l2r_seen)))
  2792         {
  2793           ptrdiff_t eob
  2794             = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  2795                ? bidi_it->string.schars : ZV);
  2796 
  2797           if (retval)
  2798             pairing_pos = bidi_it->bracket_pairing_pos;
  2799 
  2800           /* This special value (which cannot possibly happen when
  2801              brackets are resolved, since there's no character at ZV)
  2802              will be noticed by bidi_resolve_explicit, and will be
  2803              copied to the following iterator states, instead of being
  2804              reset to -1.  */
  2805           bidi_it->bracket_pairing_pos = eob;
  2806           /* This type value will be used for resolving the outermost
  2807              closing bracket in bidi_resolve_brackets.  */
  2808           bidi_it->bracket_enclosed_type = embedding_type;
  2809           /* bidi_cache_last_idx is set to the index of the current
  2810              state, because we just called bidi_cache_find above.
  2811              That state describes the outermost opening bracket, the
  2812              one with which we entered this function.  Force the cache
  2813              to "forget" all the cached states starting from that state.  */
  2814           bidi_cache_reset_to (bidi_cache_last_idx - bidi_cache_start);
  2815           /* Set up the next_for_neutral member, to help
  2816              bidi_resolve_neutral.  */
  2817           bidi_it->next_for_neutral.type = embedding_type;
  2818           bidi_it->next_for_neutral.charpos = pairing_pos;
  2819           /* Pretend we didn't resolve this bracket.  */
  2820           retval = false;
  2821         }
  2822     }
  2823 
  2824  give_up:
  2825   /* The factor of 20 below is a heuristic that needs to be tuned.  It
  2826      means we consider 20 buffer positions examined by this function
  2827      roughly equivalent to the display engine iterating over a single
  2828      buffer position.  */
  2829   if (max_redisplay_ticks > 0 && n > 0)
  2830     update_redisplay_ticks (n / 20 + 1, bidi_it->w);
  2831   return retval;
  2832 }
  2833 
  2834 static void
  2835 bidi_record_type_for_neutral (struct bidi_saved_info *info, int level,
  2836                               bool nextp)
  2837 {
  2838   int idx;
  2839 
  2840   for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++)
  2841     {
  2842       int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level;
  2843 
  2844       if (lev <= level)
  2845         {
  2846           eassert (lev == level);
  2847           if (nextp)
  2848             bidi_cache[idx].next_for_neutral = *info;
  2849           else
  2850             bidi_cache[idx].prev_for_neutral = *info;
  2851           break;
  2852         }
  2853     }
  2854 }
  2855 
  2856 static bidi_type_t
  2857 bidi_resolve_brackets (struct bidi_it *bidi_it)
  2858 {
  2859   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  2860   bool resolve_bracket = false;
  2861   bidi_type_t type = UNKNOWN_BT;
  2862   int ch;
  2863   struct bidi_saved_info prev_for_neutral, next_for_neutral;
  2864   ptrdiff_t eob
  2865     = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  2866        ? bidi_it->string.schars : ZV);
  2867 
  2868   /* Record the prev_for_neutral type either from the previous
  2869      character, if it was a strong or AN/EN, or from the
  2870      prev_for_neutral information recorded previously.  */
  2871   if (bidi_it->type == STRONG_L || bidi_it->type == STRONG_R
  2872       || bidi_it->type == WEAK_AN || bidi_it->type == WEAK_EN)
  2873     bidi_remember_char (&prev_for_neutral, bidi_it, 1);
  2874   else
  2875     prev_for_neutral = bidi_it->prev_for_neutral;
  2876   /* Record the next_for_neutral type information.  */
  2877   if (bidi_it->next_for_neutral.charpos > bidi_it->charpos)
  2878     next_for_neutral = bidi_it->next_for_neutral;
  2879   else
  2880     next_for_neutral.charpos = -1;
  2881   if (!bidi_it->first_elt)
  2882     {
  2883       type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 0, bidi_it);
  2884       ch = bidi_it->ch;
  2885     }
  2886   if (type == UNKNOWN_BT)
  2887     {
  2888       type = bidi_resolve_weak (bidi_it);
  2889       if (type == NEUTRAL_ON)
  2890         {
  2891           /* bracket_pairing_pos == eob means this bracket does not
  2892              need to be resolved as a bracket, but as a neutral, see
  2893              the optimization trick we play near the end of
  2894              bidi_find_bracket_pairs.  */
  2895           if (bidi_it->bracket_pairing_pos == eob)
  2896             {
  2897               /* If this is the outermost closing bracket of a run of
  2898                  characters in which we decided to resolve brackets as
  2899                  neutrals, use the embedding level's type, recorded in
  2900                  bracket_enclosed_type, to resolve the bracket.  */
  2901               if (bidi_it->next_for_neutral.charpos == bidi_it->charpos
  2902                   && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
  2903                 type = bidi_it->bracket_enclosed_type;
  2904             }
  2905           else if (bidi_find_bracket_pairs (bidi_it))
  2906             resolve_bracket = true;
  2907         }
  2908     }
  2909   else if (bidi_it->bracket_pairing_pos != eob)
  2910     {
  2911       eassert (bidi_it->resolved_level == -1);
  2912       /* If the cached state shows an increase of embedding level due
  2913          to an isolate initiator, we need to update the 1st cached
  2914          state of the next run of the current isolating sequence with
  2915          the prev_for_neutral and next_for_neutral information, so
  2916          that it will be picked up when we advance to that next run.  */
  2917       if (bidi_it->level_stack[bidi_it->stack_idx].level > prev_level
  2918           && ISOLATE_STATUS (bidi_it, bidi_it->stack_idx))
  2919         {
  2920           bidi_record_type_for_neutral (&prev_for_neutral, prev_level, 0);
  2921           bidi_record_type_for_neutral (&next_for_neutral, prev_level, 1);
  2922         }
  2923       if (type == NEUTRAL_ON
  2924           && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN)
  2925         {
  2926           if (bidi_it->bracket_pairing_pos > bidi_it->charpos)
  2927             {
  2928               /* A cached opening bracket that wasn't completely
  2929                  resolved yet.  */
  2930               resolve_bracket = true;
  2931             }
  2932           else if (bidi_it->bracket_pairing_pos == -1)
  2933             {
  2934               /* Higher levels were not BPA-resolved yet, even if
  2935                  cached by bidi_find_bracket_pairs.  Force application
  2936                  of BPA to the new level now.  */
  2937               if (bidi_find_bracket_pairs (bidi_it))
  2938                 resolve_bracket = true;
  2939             }
  2940         }
  2941       /* Keep track of the prev_for_neutral and next_for_neutral
  2942          types, needed for resolving brackets below and for resolving
  2943          neutrals in bidi_resolve_neutral.  */
  2944       if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level)
  2945         {
  2946           bidi_it->prev_for_neutral = prev_for_neutral;
  2947           if (next_for_neutral.charpos > 0)
  2948             bidi_it->next_for_neutral = next_for_neutral;
  2949         }
  2950     }
  2951 
  2952   /* If needed, resolve the bracket type according to N0.  */
  2953   if (resolve_bracket)
  2954     {
  2955       int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  2956       bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
  2957 
  2958       eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
  2959       if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
  2960         type = embedding_type;
  2961       else if (bidi_it->bracket_enclosed_type == STRONG_L   /* N0c, N0d */
  2962                || bidi_it->bracket_enclosed_type == STRONG_R)
  2963         {
  2964           bidi_type_t prev_type_for_neutral = bidi_it->prev_for_neutral.type;
  2965 
  2966           if (prev_type_for_neutral == UNKNOWN_BT)
  2967             prev_type_for_neutral = embedding_type;
  2968           switch (prev_type_for_neutral)
  2969             {
  2970             case STRONG_R:
  2971             case WEAK_EN:
  2972             case WEAK_AN:
  2973               type =
  2974                 (bidi_it->bracket_enclosed_type == STRONG_R) /* N0c */
  2975                 ? STRONG_R                                   /* N0c1 */
  2976                 : embedding_type;                            /* N0c2 */
  2977               break;
  2978             case STRONG_L:
  2979               type =
  2980                 (bidi_it->bracket_enclosed_type == STRONG_L) /* N0c */
  2981                 ? STRONG_L                                   /* N0c1 */
  2982                 : embedding_type;                            /* N0c2 */
  2983               break;
  2984             default:
  2985               /* N0d: Do not set the type for that bracket pair.  */
  2986               break;
  2987             }
  2988         }
  2989       eassert (type == STRONG_L || type == STRONG_R || type == NEUTRAL_ON);
  2990 
  2991       /* Update the type of the paired closing bracket to the same
  2992          type as for the resolved opening bracket.  */
  2993       if (type != NEUTRAL_ON)
  2994         {
  2995           ptrdiff_t idx = bidi_cache_search (bidi_it->bracket_pairing_pos,
  2996                                              -1, 1);
  2997 
  2998           if (idx < bidi_cache_start)
  2999             emacs_abort ();
  3000           bidi_cache[idx].type = type;
  3001         }
  3002     }
  3003 
  3004   return type;
  3005 }
  3006 
  3007 static bidi_type_t
  3008 bidi_resolve_neutral (struct bidi_it *bidi_it)
  3009 {
  3010   bidi_type_t type = bidi_resolve_brackets (bidi_it);
  3011   int current_level;
  3012   bool is_neutral;
  3013 
  3014   eassert (type == STRONG_R
  3015            || type == STRONG_L
  3016            || type == WEAK_BN
  3017            || type == WEAK_EN
  3018            || type == WEAK_AN
  3019            || type == NEUTRAL_B
  3020            || type == NEUTRAL_S
  3021            || type == NEUTRAL_WS
  3022            || type == NEUTRAL_ON
  3023            || type == LRI
  3024            || type == RLI
  3025            || type == PDI);
  3026 
  3027   current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
  3028   eassert (current_level >= 0);
  3029   is_neutral = bidi_get_category (type) == NEUTRAL;
  3030 
  3031   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
  3032                             we are already at paragraph end.  */
  3033        && (is_neutral || bidi_isolate_fmt_char (type)))
  3034       /* N1-N2/Retaining */
  3035       || type == WEAK_BN)
  3036     {
  3037       if (bidi_it->next_for_neutral.type != UNKNOWN_BT
  3038           && (bidi_it->next_for_neutral.charpos > bidi_it->charpos
  3039               /* PDI defines an eos, so it's OK for it to serve as its
  3040                  own next_for_neutral.  */
  3041               || (bidi_it->next_for_neutral.charpos == bidi_it->charpos
  3042                   && bidi_it->type == PDI)))
  3043         {
  3044           type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
  3045                                          bidi_it->next_for_neutral.type,
  3046                                          current_level);
  3047         }
  3048       /* The next two "else if" clauses are shortcuts for the
  3049          important special case when we have a long sequence of
  3050          neutral or WEAK_BN characters, such as whitespace or nulls or
  3051          other control characters, on the base embedding level of the
  3052          paragraph, and that sequence goes all the way to the end of
  3053          the paragraph and follows a character whose resolved
  3054          directionality is identical to the base embedding level.
  3055          (This is what happens in a buffer with plain L2R text that
  3056          happens to include long sequences of control characters.)  By
  3057          virtue of N1, the result of examining this long sequence will
  3058          always be either STRONG_L or STRONG_R, depending on the base
  3059          embedding level.  So we use this fact directly instead of
  3060          entering the expensive loop in the "else" clause.  */
  3061       else if (current_level == 0
  3062                && bidi_it->prev_for_neutral.type == STRONG_L
  3063                && (ASCII_CHAR_P (bidi_it->ch)
  3064                    || (type != WEAK_BN
  3065                        && !bidi_explicit_dir_char (bidi_it->ch)
  3066                        && !bidi_isolate_fmt_char (type))))
  3067         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
  3068                                        STRONG_L, current_level);
  3069       else if (/* current level is 1 */
  3070                current_level == 1
  3071                /* base embedding level is also 1 */
  3072                && bidi_it->level_stack[0].level == 1
  3073                /* previous character is one of those considered R for
  3074                   the purposes of W5 */
  3075                && (bidi_it->prev_for_neutral.type == STRONG_R
  3076                    || bidi_it->prev_for_neutral.type == WEAK_EN
  3077                    || bidi_it->prev_for_neutral.type == WEAK_AN)
  3078                && type != WEAK_BN
  3079                && !bidi_explicit_dir_char (bidi_it->ch)
  3080                && !bidi_isolate_fmt_char (type))
  3081         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
  3082                                        STRONG_R, current_level);
  3083       else
  3084         {
  3085           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
  3086              the assumption of batch-style processing; see clauses W4,
  3087              W5, and especially N1, which require looking far forward
  3088              (as well as back) in the buffer/string.  May the fleas of
  3089              a thousand camels infest the armpits of those who design
  3090              supposedly general-purpose algorithms by looking at their
  3091              own implementations, and fail to consider other possible
  3092              implementations!  */
  3093           struct bidi_it saved_it;
  3094           bidi_type_t next_type;
  3095           bool adjacent_to_neutrals = is_neutral;
  3096 
  3097           bidi_copy_it (&saved_it, bidi_it);
  3098           /* Scan the text forward until we find the first non-neutral
  3099              character, and then use that to resolve the neutral we
  3100              are dealing with now.  We also cache the scanned iterator
  3101              states, to salvage some of the effort later.  */
  3102           do {
  3103             int old_sidx, new_sidx;
  3104 
  3105             /* Paragraph separators have their levels fully resolved
  3106                at this point, so cache them as resolved.  */
  3107             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
  3108             old_sidx = bidi_it->stack_idx;
  3109             type = bidi_resolve_brackets (bidi_it);
  3110             /* Skip level runs excluded from this isolating run sequence.  */
  3111             new_sidx = bidi_it->stack_idx;
  3112             if (bidi_it->level_stack[new_sidx].level > current_level
  3113                 && (ISOLATE_STATUS (bidi_it, new_sidx)
  3114                     /* This is for when we have an isolate initiator
  3115                        immediately followed by an embedding or
  3116                        override initiator, in which case we get the
  3117                        level stack pushed twice by the single call to
  3118                        bidi_resolve_weak above.  */
  3119                     || (new_sidx > old_sidx + 1
  3120                         && ISOLATE_STATUS (bidi_it, new_sidx - 1))))
  3121               {
  3122                 while (bidi_it->level_stack[bidi_it->stack_idx].level
  3123                        > current_level)
  3124                   {
  3125                     bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
  3126                     type = bidi_resolve_brackets (bidi_it);
  3127                   }
  3128               }
  3129             if (!adjacent_to_neutrals
  3130                 && (bidi_get_category (type) == NEUTRAL
  3131                     || bidi_isolate_fmt_char (type)))
  3132               adjacent_to_neutrals = true;
  3133           } while (!(type == NEUTRAL_B
  3134                      || (type != WEAK_BN
  3135                          && bidi_get_category (type) != NEUTRAL
  3136                          && !bidi_isolate_fmt_char (type))
  3137                      /* This is all per level run, so stop when we
  3138                         reach the end of this level run.  */
  3139                      || (bidi_it->level_stack[bidi_it->stack_idx].level
  3140                          != current_level)));
  3141 
  3142           /* Record the character we stopped at.  */
  3143           bidi_remember_char (&saved_it.next_for_neutral, bidi_it, 1);
  3144 
  3145           if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
  3146               || type == NEUTRAL_B)
  3147             {
  3148               /* Marched all the way to the end of this level run.  We
  3149                  need to use the eos type, whose information is stored
  3150                  by bidi_set_sos_type in the prev_for_neutral
  3151                  member.  */
  3152               if (adjacent_to_neutrals)
  3153                 next_type = bidi_it->prev_for_neutral.type;
  3154               else
  3155                 {
  3156                   /* This is a BN which does not adjoin neutrals.
  3157                      Leave its type alone.  */
  3158                   bidi_copy_it (bidi_it, &saved_it);
  3159                   return bidi_it->type;
  3160                 }
  3161             }
  3162           else
  3163             {
  3164               switch (type)
  3165                 {
  3166                 case STRONG_L:
  3167                 case STRONG_R:
  3168                 case STRONG_AL:
  3169                   /* Actually, STRONG_AL cannot happen here, because
  3170                      bidi_resolve_weak converts it to STRONG_R, per W3.  */
  3171                   eassert (type != STRONG_AL);
  3172                   next_type = type;
  3173                   break;
  3174                 case WEAK_EN:
  3175                 case WEAK_AN:
  3176                   /* N1: "European and Arabic numbers act as if they
  3177                      were R in terms of their influence on NIs."  */
  3178                   next_type = STRONG_R;
  3179                   break;
  3180                 default:
  3181                   emacs_abort ();
  3182                   break;
  3183                 }
  3184             }
  3185           /* Resolve the type of all the NIs found during the above loop.  */
  3186           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
  3187                                          next_type, current_level);
  3188           /* Update next_for_neutral with the resolved type, so we
  3189              could use it for all the other NIs up to the place where
  3190              we exited the loop.  */
  3191           saved_it.next_for_neutral.type = next_type;
  3192           bidi_check_type (type);
  3193           /* Update the character which caused us to enter the above loop.  */
  3194           saved_it.type = type;
  3195           bidi_check_type (next_type);
  3196           bidi_copy_it (bidi_it, &saved_it);
  3197         }
  3198     }
  3199   return type;
  3200 }
  3201 
  3202 /* Given an iterator state in BIDI_IT, advance one character position
  3203    in the buffer/string to the next character (in the logical order),
  3204    resolve the bidi type of that next character, and return that
  3205    type.  */
  3206 static bidi_type_t
  3207 bidi_type_of_next_char (struct bidi_it *bidi_it)
  3208 {
  3209   bidi_type_t type;
  3210 
  3211   /* This should always be called during a forward scan.  */
  3212   if (bidi_it->scan_dir != 1)
  3213     emacs_abort ();
  3214 
  3215   type = bidi_resolve_neutral (bidi_it);
  3216 
  3217   return type;
  3218 }
  3219 
  3220 /* Given an iterator state BIDI_IT, advance one character position in
  3221    the buffer/string to the next character (in the current scan
  3222    direction), resolve the embedding and implicit levels of that next
  3223    character, and return the resulting level.  */
  3224 static int
  3225 bidi_level_of_next_char (struct bidi_it *bidi_it)
  3226 {
  3227   bidi_type_t type = UNKNOWN_BT;
  3228   int level;
  3229   ptrdiff_t next_char_pos = -2;
  3230 
  3231   if (bidi_it->scan_dir == 1)
  3232     {
  3233       ptrdiff_t eob
  3234         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  3235            ? bidi_it->string.schars : ZV);
  3236 
  3237       /* There's no sense in trying to advance if we've already hit
  3238          the end of text.  */
  3239       if (bidi_it->charpos >= eob)
  3240         {
  3241           eassert (bidi_it->resolved_level >= 0);
  3242           return bidi_it->resolved_level;
  3243         }
  3244     }
  3245 
  3246   /* Perhaps the character we want is already cached as fully resolved.
  3247      If it is, the call to bidi_cache_find below will return a type
  3248      other than UNKNOWN_BT.  */
  3249   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
  3250     {
  3251       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  3252                  ? 0 : 1);
  3253 
  3254       if (bidi_it->scan_dir > 0)
  3255         {
  3256           if (bidi_it->nchars <= 0)
  3257             emacs_abort ();
  3258           next_char_pos = bidi_it->charpos + bidi_it->nchars;
  3259         }
  3260       else if (bidi_it->charpos >= bob)
  3261         /* Implementation note: we allow next_char_pos to be as low as
  3262            0 for buffers or -1 for strings, and that is okay because
  3263            that's the "position" of the sentinel iterator state we
  3264            cached at the beginning of the iteration.  */
  3265         next_char_pos = bidi_it->charpos - 1;
  3266       if (next_char_pos >= bob - 1)
  3267         type = bidi_cache_find (next_char_pos, 1, bidi_it);
  3268       if (type != UNKNOWN_BT)
  3269         {
  3270           /* We asked the cache for fully resolved states.  */
  3271           eassert (bidi_it->resolved_level >= 0);
  3272           return bidi_it->resolved_level;
  3273         }
  3274     }
  3275 
  3276   if (bidi_it->scan_dir == -1)
  3277     /* If we are going backwards, the iterator state is already cached
  3278        from previous scans, and should be fully resolved.  */
  3279     emacs_abort ();
  3280 
  3281   if (type == UNKNOWN_BT)
  3282     type = bidi_type_of_next_char (bidi_it);
  3283 
  3284   if (type == NEUTRAL_B)
  3285     {
  3286       eassert (bidi_it->resolved_level >= 0);
  3287       return bidi_it->resolved_level;
  3288     }
  3289 
  3290   level = bidi_it->level_stack[bidi_it->stack_idx].level;
  3291 
  3292   eassert ((type == STRONG_R
  3293             || type == STRONG_L
  3294             || type == WEAK_BN
  3295             || type == WEAK_EN
  3296             || type == WEAK_AN));
  3297   bidi_it->type = type;
  3298   bidi_check_type (bidi_it->type);
  3299 
  3300   /* For L1 below, we need to know, for each WS character, whether
  3301      it belongs to a sequence of WS characters preceding a newline
  3302      or a TAB or a paragraph separator.  */
  3303   if ((bidi_it->orig_type == NEUTRAL_WS
  3304        || (bidi_it->orig_type == WEAK_BN
  3305            /* If this BN character is already at base level, we don't
  3306               need to consider resetting it, since I1 and I2 below
  3307               will not change the level, so avoid the potentially
  3308               costly loop below.  */
  3309            && level != bidi_it->level_stack[0].level)
  3310        || bidi_isolate_fmt_char (bidi_it->orig_type))
  3311       /* This means the informaition about WS resolution is not valid.  */
  3312       && bidi_it->next_for_ws.charpos < bidi_it->charpos)
  3313     {
  3314       int ch;
  3315       ptrdiff_t clen = bidi_it->ch_len;
  3316       ptrdiff_t bpos = bidi_it->bytepos;
  3317       ptrdiff_t cpos = bidi_it->charpos;
  3318       ptrdiff_t disp_pos = bidi_it->disp_pos;
  3319       ptrdiff_t nc = bidi_it->nchars;
  3320       struct bidi_string_data bs = bidi_it->string;
  3321       bidi_type_t chtype;
  3322       bool fwp = bidi_it->frame_window_p;
  3323       int dpp = bidi_it->disp_prop;
  3324 
  3325       if (bidi_it->nchars <= 0)
  3326         emacs_abort ();
  3327       do {
  3328         ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
  3329                               bidi_it->w, fwp, &clen, &nc);
  3330         chtype = bidi_get_type (ch, NEUTRAL_DIR);
  3331       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
  3332                || bidi_isolate_fmt_char (chtype)
  3333                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
  3334       bidi_it->next_for_ws.type = chtype;
  3335       bidi_check_type (bidi_it->next_for_ws.type);
  3336       bidi_it->next_for_ws.charpos = cpos;
  3337     }
  3338 
  3339   /* Update the cache, but only if this state was already cached.  */
  3340   bidi_cache_iterator_state (bidi_it, 1, 1);
  3341 
  3342   /* Resolve implicit levels.  */
  3343   if (bidi_it->orig_type == NEUTRAL_B /* L1 */
  3344       || bidi_it->orig_type == NEUTRAL_S
  3345       || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
  3346       || ((bidi_it->orig_type == NEUTRAL_WS
  3347            || bidi_it->orig_type == WEAK_BN /* L1/Retaining */
  3348            || bidi_isolate_fmt_char (bidi_it->orig_type)
  3349            || bidi_explicit_dir_char (bidi_it->ch))
  3350           && (bidi_it->next_for_ws.type == NEUTRAL_B
  3351               || bidi_it->next_for_ws.type == NEUTRAL_S)))
  3352     level = bidi_it->level_stack[0].level;
  3353   else if ((level & 1) == 0) /* I1 */
  3354     {
  3355       if (type == STRONG_R)
  3356         level++;
  3357       else if (type == WEAK_EN || type == WEAK_AN)
  3358         level += 2;
  3359     }
  3360   else                  /* I2 */
  3361     {
  3362       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
  3363         level++;
  3364     }
  3365 
  3366   bidi_it->resolved_level = level;
  3367   return level;
  3368 }
  3369 
  3370 /* Move to the other edge of a level given by LEVEL.  If END_FLAG,
  3371    we are at the end of a level, and we need to prepare to
  3372    resume the scan of the lower level.
  3373 
  3374    If this level's other edge is cached, we simply jump to it, filling
  3375    the iterator structure with the iterator state on the other edge.
  3376    Otherwise, we walk the buffer or string until we come back to the
  3377    same level as LEVEL.
  3378 
  3379    Note: we are not talking here about a ``level run'' in the UAX#9
  3380    sense of the term, but rather about a ``level'' which includes
  3381    all the levels higher than it.  In other words, given the levels
  3382    like this:
  3383 
  3384          11111112222222333333334443343222222111111112223322111
  3385                 A      B                    C
  3386 
  3387    and assuming we are at point A scanning left to right, this
  3388    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
  3389    at point B.  */
  3390 static void
  3391 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
  3392 {
  3393   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
  3394   ptrdiff_t idx;
  3395 
  3396   /* Try the cache first.  */
  3397   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
  3398       >= bidi_cache_start)
  3399     bidi_cache_fetch_state (idx, bidi_it);
  3400   else
  3401     {
  3402       int new_level;
  3403       ptrdiff_t pos0 = bidi_it->charpos;
  3404 
  3405       /* If we are at end of level, its edges must be cached.  */
  3406       if (end_flag)
  3407         emacs_abort ();
  3408 
  3409       if (!bidi_cache_iterator_state (bidi_it, 1, 0))
  3410         {
  3411           /* Can't happen: if the cache needs to grow, it means we
  3412              were at base embedding level, so the cache should have
  3413              been either empty or already large enough to cover this
  3414              character position.  */
  3415           emacs_abort ();
  3416         }
  3417       do {
  3418         new_level = bidi_level_of_next_char (bidi_it);
  3419         /* If the cache is full, perform an emergency return by
  3420            pretending that the level ended.  */
  3421         if (!bidi_cache_iterator_state (bidi_it, 1, 0))
  3422           {
  3423             new_level = level - 1;
  3424             /* Since the cache should only grow when we are scanning
  3425                forward looking for the edge of the level that is one
  3426                above the base embedding level, we can only have this
  3427                contingency when LEVEL - 1 is the base embedding
  3428                level.  */
  3429             eassert (new_level == bidi_it->level_stack[0].level);
  3430             /* Plan B, for when the cache overflows: Back up to the
  3431                previous character by fetching the last cached state,
  3432                and force the resolved level of that character be the
  3433                base embedding level.  */
  3434             bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it);
  3435             bidi_it->resolved_level = new_level;
  3436             bidi_cache_iterator_state (bidi_it, 1, 1);
  3437           }
  3438       } while (new_level >= level);
  3439       /* The factor of 50 below is a heuristic that needs to be
  3440          tuned.  It means we consider 50 buffer positions examined by
  3441          the above call roughly equivalent to the display engine
  3442          iterating over a single buffer position.  */
  3443       if (max_redisplay_ticks > 0 && bidi_it->charpos > pos0)
  3444         update_redisplay_ticks ((bidi_it->charpos - pos0) / 50 + 1, bidi_it->w);
  3445     }
  3446 }
  3447 
  3448 void
  3449 bidi_move_to_visually_next (struct bidi_it *bidi_it)
  3450 {
  3451   int old_level, new_level, next_level;
  3452   struct bidi_it sentinel;
  3453 
  3454   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
  3455     emacs_abort ();
  3456 
  3457   if (bidi_it->scan_dir == 0)
  3458     {
  3459       bidi_it->scan_dir = 1;    /* default to logical order */
  3460     }
  3461 
  3462   /* If we just passed a newline, initialize for the next line.  */
  3463   if (!bidi_it->first_elt
  3464       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
  3465     bidi_line_init (bidi_it);
  3466 
  3467   /* Prepare the sentinel iterator state, and cache it.  When we bump
  3468      into it, scanning backwards, we'll know that the last non-base
  3469      level is exhausted.  */
  3470   if (bidi_cache_idx == bidi_cache_start)
  3471     {
  3472       bidi_copy_it (&sentinel, bidi_it);
  3473       if (bidi_it->first_elt)
  3474         {
  3475           sentinel.charpos--;   /* cached charpos needs to be monotonic */
  3476           sentinel.bytepos--;
  3477           sentinel.ch = '\n';   /* doesn't matter, but why not? */
  3478           sentinel.ch_len = 1;
  3479           sentinel.nchars = 1;
  3480         }
  3481       bidi_cache_iterator_state (&sentinel, 1, 0);
  3482     }
  3483 
  3484   old_level = bidi_it->resolved_level;
  3485   new_level = bidi_level_of_next_char (bidi_it);
  3486 
  3487   /* Reordering of resolved levels (clause L2) is implemented by
  3488      jumping to the other edge of the level and flipping direction of
  3489      scanning the text whenever we find a level change.  */
  3490   if (new_level != old_level)
  3491     {
  3492       bool ascending = new_level > old_level;
  3493       int level_to_search = ascending ? old_level + 1 : old_level;
  3494       int incr = ascending ? 1 : -1;
  3495       int expected_next_level = old_level + incr;
  3496 
  3497       /* Jump (or walk) to the other edge of this level.  */
  3498       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
  3499       /* Switch scan direction and peek at the next character in the
  3500          new direction.  */
  3501       bidi_it->scan_dir = -bidi_it->scan_dir;
  3502 
  3503       /* The following loop handles the case where the resolved level
  3504          jumps by more than one.  This is typical for numbers inside a
  3505          run of text with left-to-right embedding direction, but can
  3506          also happen in other situations.  In those cases the decision
  3507          where to continue after a level change, and in what direction,
  3508          is tricky.  For example, given a text like below:
  3509 
  3510                   abcdefgh
  3511                   11336622
  3512 
  3513          (where the numbers below the text show the resolved levels),
  3514          the result of reordering according to UAX#9 should be this:
  3515 
  3516                   efdcghba
  3517 
  3518          This is implemented by the loop below which flips direction
  3519          and jumps to the other edge of the level each time it finds
  3520          the new level not to be the expected one.  The expected level
  3521          is always one more or one less than the previous one.  */
  3522       next_level = bidi_peek_at_next_level (bidi_it);
  3523       while (next_level != expected_next_level)
  3524         {
  3525           /* If next_level is -1, it means we have an unresolved level
  3526              in the cache, which at this point should not happen.  If
  3527              it does, we will infloop.  */
  3528           eassert (next_level >= 0);
  3529           /* If next_level is not consistent with incr, we might
  3530              infloop.  */
  3531           eassert (incr > 0
  3532                    ? next_level > expected_next_level
  3533                    : next_level < expected_next_level);
  3534           expected_next_level += incr;
  3535           level_to_search += incr;
  3536           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
  3537           bidi_it->scan_dir = -bidi_it->scan_dir;
  3538           next_level = bidi_peek_at_next_level (bidi_it);
  3539         }
  3540 
  3541       /* Finally, deliver the next character in the new direction.  */
  3542       next_level = bidi_level_of_next_char (bidi_it);
  3543     }
  3544 
  3545   /* Take note when we have just processed the newline that precedes
  3546      the end of the paragraph.  The next time we are about to be
  3547      called, set_iterator_to_next will automatically reinit the
  3548      paragraph direction, if needed.  We do this at the newline before
  3549      the paragraph separator, because the next character might not be
  3550      the first character of the next paragraph, due to the bidi
  3551      reordering, whereas we _must_ know the paragraph base direction
  3552      _before_ we process the paragraph's text, since the base
  3553      direction affects the reordering.  */
  3554   if (bidi_it->scan_dir == 1
  3555       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
  3556     {
  3557       /* The paragraph direction of the entire string, once
  3558          determined, is in effect for the entire string.  Setting the
  3559          separator limit to the end of the string prevents
  3560          bidi_paragraph_init from being called automatically on this
  3561          string.  */
  3562       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
  3563         bidi_it->separator_limit = bidi_it->string.schars;
  3564       else if (bidi_it->bytepos < ZV_BYTE)
  3565         {
  3566           ptrdiff_t sep_len
  3567             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
  3568                                      bidi_it->bytepos + bidi_it->ch_len);
  3569           if (bidi_it->nchars <= 0)
  3570             emacs_abort ();
  3571           if (sep_len >= 0)
  3572             {
  3573               bidi_it->new_paragraph = 1;
  3574               /* Record the buffer position of the last character of
  3575                  the paragraph separator.  If the paragraph separator
  3576                  is an empty string (e.g., the regex is "^"), the
  3577                  newline that precedes the end of the paragraph is
  3578                  that last character.  */
  3579               if (sep_len > 0)
  3580                 bidi_it->separator_limit
  3581                   = bidi_it->charpos + bidi_it->nchars + sep_len;
  3582               else
  3583                 bidi_it->separator_limit = bidi_it->charpos;
  3584             }
  3585         }
  3586     }
  3587 
  3588   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
  3589     {
  3590       /* If we are at paragraph's base embedding level and beyond the
  3591          last cached position, the cache's job is done and we can
  3592          discard it.  */
  3593       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
  3594           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
  3595                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
  3596         bidi_cache_reset ();
  3597       /* Also reset the cache if it overflowed and we have just
  3598          emergency-exited using Plan B.  */
  3599       else if (bidi_it->resolved_level == bidi_it->level_stack[0].level
  3600                && bidi_cache_idx >= bidi_cache_size
  3601                && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos)
  3602         bidi_cache_reset ();
  3603         /* But as long as we are caching during forward scan, we must
  3604            cache each state, or else the cache integrity will be
  3605            compromised: it assumes cached states correspond to buffer
  3606            positions 1:1.  */
  3607       else
  3608         bidi_cache_iterator_state (bidi_it, 1, 0);
  3609     }
  3610 
  3611   eassert (bidi_it->resolved_level >= 0
  3612            && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
  3613 }
  3614 
  3615 /* Utility function for looking for strong directional characters
  3616    whose bidi type was overridden by directional override or embedding
  3617    or isolate control characters.  */
  3618 ptrdiff_t
  3619 bidi_find_first_overridden (struct bidi_it *bidi_it)
  3620 {
  3621   ptrdiff_t eob
  3622     = STRINGP (bidi_it->string.lstring) ? bidi_it->string.schars : ZV;
  3623   ptrdiff_t found_pos = eob;
  3624   /* Maximum bidi levels we allow for L2R and R2L characters.  Note
  3625      that these are levels after resolving explicit embeddings,
  3626      overrides, and isolates, i.e. before resolving implicit levels.  */
  3627   int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2;
  3628   int max_r2l = 1;
  3629   /* Same for WEAK and NEUTRAL_ON types.  */
  3630   int max_weak = bidi_it->paragraph_dir == L2R ? 1 : 2;
  3631 
  3632   do
  3633     {
  3634       /* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
  3635          because the directional overrides are applied by the
  3636          former.  */
  3637       bidi_type_t type = bidi_resolve_weak (bidi_it);
  3638       unsigned level = bidi_it->level_stack[bidi_it->stack_idx].level;
  3639       bidi_category_t category = bidi_get_category (bidi_it->orig_type);
  3640 
  3641       /* Detect strong L or R types that have been overridden by
  3642          explicit overrides.  */
  3643       if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
  3644           || (type == STRONG_L
  3645               && (bidi_it->orig_type == STRONG_R
  3646                   || bidi_it->orig_type == STRONG_AL))
  3647           /* Detect strong L or R types or WEAK_EN types that were
  3648              pushed into higher embedding levels (and will thus
  3649              reorder) by explicit embeddings and isolates.  */
  3650           || ((bidi_it->orig_type == STRONG_L
  3651                || bidi_it->orig_type == WEAK_EN)
  3652               && level > max_l2r)
  3653           || ((bidi_it->orig_type == STRONG_R
  3654                || bidi_it->orig_type == STRONG_AL)
  3655               && level > max_r2l)
  3656           /* Detect other weak or neutral types whose level was
  3657              tweaked by explicit embeddings and isolates.  */
  3658           || ((category == WEAK || bidi_it->orig_type == NEUTRAL_ON)
  3659               && level > max_weak))
  3660         found_pos = bidi_it->charpos;
  3661     } while (found_pos == eob
  3662              && bidi_it->charpos < eob
  3663              && bidi_it->ch != BIDI_EOB
  3664              && bidi_it->ch != '\n');
  3665 
  3666   return found_pos;
  3667 }
  3668 
  3669 /* This is meant to be called from within the debugger, whenever you
  3670    wish to examine the cache contents.  */
  3671 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
  3672 void
  3673 bidi_dump_cached_states (void)
  3674 {
  3675   ptrdiff_t i;
  3676   int ndigits = 1;
  3677 
  3678   if (bidi_cache_idx == 0)
  3679     {
  3680       fputs ("The cache is empty.\n", stderr);
  3681       return;
  3682     }
  3683   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
  3684            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
  3685 
  3686   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
  3687     ndigits++;
  3688   fputs ("ch  ", stderr);
  3689   for (i = 0; i < bidi_cache_idx; i++)
  3690     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
  3691   fputs ("\nlvl ", stderr);
  3692   for (i = 0; i < bidi_cache_idx; i++)
  3693     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
  3694   fputs ("\npos ", stderr);
  3695   for (i = 0; i < bidi_cache_idx; i++)
  3696     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
  3697   putc ('\n', stderr);
  3698 }

/* [<][>][^][v][top][bottom][index][help] */