root/src/charset.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. load_charset_map
  2. read_hex
  3. load_charset_map_from_file
  4. load_charset_map_from_vector
  5. load_charset
  6. DEFUN
  7. map_charset_for_dump
  8. map_charset_chars
  9. define_charset_internal
  10. DEFUN
  11. check_iso_charset_parameter
  12. string_xstring_p
  13. find_charsets_in_text
  14. maybe_unify_char
  15. decode_char
  16. encode_char
  17. char_charset
  18. DEFUN
  19. DEFUN
  20. DEFUN
  21. DEFUN
  22. DEFUN
  23. charset_compare
  24. DEFUN
  25. init_charset
  26. init_charset_once
  27. syms_of_charset

     1 /* Basic character set support.
     2 
     3 Copyright (C) 2001-2023 Free Software Foundation, Inc.
     4 
     5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
     6   2005, 2006, 2007, 2008, 2009, 2010, 2011
     7   National Institute of Advanced Industrial Science and Technology (AIST)
     8   Registration Number H14PRO021
     9 
    10 Copyright (C) 2003, 2004
    11   National Institute of Advanced Industrial Science and Technology (AIST)
    12   Registration Number H13PRO009
    13 
    14 This file is part of GNU Emacs.
    15 
    16 GNU Emacs is free software: you can redistribute it and/or modify
    17 it under the terms of the GNU General Public License as published by
    18 the Free Software Foundation, either version 3 of the License, or (at
    19 your option) any later version.
    20 
    21 GNU Emacs is distributed in the hope that it will be useful,
    22 but WITHOUT ANY WARRANTY; without even the implied warranty of
    23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    24 GNU General Public License for more details.
    25 
    26 You should have received a copy of the GNU General Public License
    27 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    28 
    29 #include <config.h>
    30 
    31 #include <errno.h>
    32 #include <stdlib.h>
    33 #include <unistd.h>
    34 #include <limits.h>
    35 #include <sys/types.h>
    36 #include "lisp.h"
    37 #include "character.h"
    38 #include "charset.h"
    39 #include "coding.h"
    40 #include "buffer.h"
    41 #include "sysstdio.h"
    42 #include "pdumper.h"
    43 
    44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
    45 
    46   A coded character set ("charset" hereafter) is a meaningful
    47   collection (i.e. language, culture, functionality, etc.) of
    48   characters.  Emacs handles multiple charsets at once.  In Emacs Lisp
    49   code, a charset is represented by a symbol.  In C code, a charset is
    50   represented by its ID number or by a pointer to a struct charset.
    51 
    52   The actual information about each charset is stored in two places.
    53   Lispy information is stored in the hash table Vcharset_hash_table as
    54   a vector (charset attributes).  The other information is stored in
    55   charset_table as a struct charset.
    56 
    57 */
    58 
    59 /* Hash table that contains attributes of each charset.  Keys are
    60    charset symbols, and values are vectors of charset attributes.  */
    61 Lisp_Object Vcharset_hash_table;
    62 
    63 /* Table of struct charset.  */
    64 struct charset *charset_table;
    65 int charset_table_size;
    66 int charset_table_used;
    67 
    68 /* Special charsets corresponding to symbols.  */
    69 int charset_ascii;
    70 int charset_eight_bit;
    71 static int charset_iso_8859_1;
    72 int charset_unicode;
    73 static int charset_emacs;
    74 
    75 /* The other special charsets.  */
    76 int charset_jisx0201_roman;
    77 int charset_jisx0208_1978;
    78 int charset_jisx0208;
    79 int charset_ksc5601;
    80 
    81 /* Charset of unibyte characters.  */
    82 int charset_unibyte;
    83 
    84 /* List of charsets ordered by the priority.  */
    85 Lisp_Object Vcharset_ordered_list;
    86 
    87 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
    88    charsets.  */
    89 Lisp_Object Vcharset_non_preferred_head;
    90 
    91 /* Incremented every time we change the priority of charsets.
    92    Wraps around.  */
    93 EMACS_UINT charset_ordered_list_tick;
    94 
    95 /* List of iso-2022 charsets.  */
    96 Lisp_Object Viso_2022_charset_list;
    97 
    98 /* List of emacs-mule charsets.  */
    99 Lisp_Object Vemacs_mule_charset_list;
   100 
   101 int emacs_mule_charset[256];
   102 
   103 /* Mapping table from ISO2022's charset (specified by DIMENSION,
   104    CHARS, and FINAL-CHAR) to Emacs' charset.  */
   105 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
   106 
   107 #define CODE_POINT_TO_INDEX(charset, code)                              \
   108   ((charset)->code_linear_p                                             \
   109    ? (int) ((code) - (charset)->min_code)                               \
   110    : (((charset)->code_space_mask[(code) >> 24] & 0x8)                  \
   111       && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)      \
   112       && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)       \
   113       && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))             \
   114    ? (int) (((((code) >> 24) - (charset)->code_space[12])               \
   115              * (charset)->code_space[11])                               \
   116             + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])     \
   117                * (charset)->code_space[7])                              \
   118             + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])      \
   119                * (charset)->code_space[3])                              \
   120             + (((code) & 0xFF) - (charset)->code_space[0])              \
   121             - ((charset)->char_index_offset))                           \
   122    : -1)
   123 
   124 
   125 /* Return the code-point for the character index IDX in CHARSET.
   126    IDX should be an unsigned int variable in a valid range (which is
   127    always in nonnegative int range too).  IDX contains garbage afterwards.  */
   128 
   129 #define INDEX_TO_CODE_POINT(charset, idx)                                    \
   130   ((charset)->code_linear_p                                                  \
   131    ? (idx) + (charset)->min_code                                             \
   132    : (idx += (charset)->char_index_offset,                                   \
   133       (((charset)->code_space[0] + (idx) % (charset)->code_space[2])         \
   134        | (((charset)->code_space[4]                                          \
   135            + ((idx) / (charset)->code_space[3] % (charset)->code_space[6]))  \
   136           << 8)                                                              \
   137        | (((charset)->code_space[8]                                          \
   138            + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
   139           << 16)                                                             \
   140        | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11]))  \
   141           << 24))))
   142 
   143 /* Structure to hold mapping tables for a charset.  Used by temacs
   144    invoked for dumping.  */
   145 
   146 static struct
   147 {
   148   /* The current charset for which the following tables are setup.  */
   149   struct charset *current;
   150 
   151   /* 1 iff the following table is used for encoder.  */
   152   short for_encoder;
   153 
   154   /* When the following table is used for encoding, minimum and
   155      maximum character of the current charset.  */
   156   int min_char, max_char;
   157 
   158   /* A Unicode character corresponding to the code index 0 (i.e. the
   159      minimum code-point) of the current charset, or -1 if the code
   160      index 0 is not a Unicode character.  This is checked when
   161      table.encoder[CHAR] is zero.  */
   162   int zero_index_char;
   163 
   164   union {
   165     /* Table mapping code-indices (not code-points) of the current
   166        charset to Unicode characters.  If decoder[CHAR] is -1, CHAR
   167        doesn't belong to the current charset.  */
   168     int decoder[0x10000];
   169     /* Table mapping Unicode characters to code-indices of the current
   170        charset.  The first 0x10000 elements are for BMP (0..0xFFFF),
   171        and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
   172        (0x20000..0x2FFFF).  Note that there is no charset map that
   173        uses both SMP and SIP.  */
   174     unsigned short encoder[0x20000];
   175   } table;
   176 } *temp_charset_work;
   177 
   178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE)                  \
   179   do {                                                          \
   180     if ((CODE) == 0)                                            \
   181       temp_charset_work->zero_index_char = (C);                 \
   182     else if ((C) < 0x20000)                                     \
   183       temp_charset_work->table.encoder[(C)] = (CODE);           \
   184     else                                                        \
   185       temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
   186   } while (0)
   187 
   188 #define GET_TEMP_CHARSET_WORK_ENCODER(C)                                  \
   189   ((C) == temp_charset_work->zero_index_char ? 0                          \
   190    : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)]               \
   191                       ? (int) temp_charset_work->table.encoder[(C)] : -1) \
   192    : temp_charset_work->table.encoder[(C) - 0x10000]                      \
   193    ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
   194 
   195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE)  \
   196   (temp_charset_work->table.decoder[(CODE)] = (C))
   197 
   198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE)     \
   199   (temp_charset_work->table.decoder[(CODE)])
   200 
   201 
   202 /* Set to 1 to warn that a charset map is loaded and thus a buffer
   203    text and a string data may be relocated.  */
   204 bool charset_map_loaded;
   205 
   206 struct charset_map_entries
   207 {
   208   struct {
   209     unsigned from, to;
   210     int c;
   211   } entry[0x10000];
   212   struct charset_map_entries *next;
   213 };
   214 
   215 /* Load the mapping information of CHARSET from ENTRIES for
   216    initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
   217    encoding (CONTROL_FLAG == 2).
   218 
   219    If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
   220    and CHARSET->fast_map.
   221 
   222    If CONTROL_FLAG is 1, setup the following tables according to
   223    CHARSET->method and inhibit_load_charset_map.
   224 
   225    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
   226    ----------------------+--------------------+---------------------------
   227    CHARSET_METHOD_MAP    | CHARSET->decoder   | temp_charset_work->decoder
   228    ----------------------+--------------------+---------------------------
   229    CHARSET_METHOD_OFFSET | Vchar_unify_table  | temp_charset_work->decoder
   230 
   231    If CONTROL_FLAG is 2, setup the following tables.
   232 
   233    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
   234    ----------------------+--------------------+---------------------------
   235    CHARSET_METHOD_MAP    | CHARSET->encoder   | temp_charset_work->encoder
   236    ----------------------+--------------------+--------------------------
   237    CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
   238 */
   239 
   240 static void
   241 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
   242 {
   243   Lisp_Object vec UNINIT;
   244   Lisp_Object table UNINIT;
   245   unsigned max_code = CHARSET_MAX_CODE (charset);
   246   bool ascii_compatible_p = charset->ascii_compatible_p;
   247   int min_char, max_char, nonascii_min_char;
   248   int i;
   249   unsigned char *fast_map = charset->fast_map;
   250 
   251   if (n_entries <= 0)
   252     return;
   253 
   254   if (control_flag)
   255     {
   256       if (! inhibit_load_charset_map)
   257         {
   258           if (control_flag == 1)
   259             {
   260               if (charset->method == CHARSET_METHOD_MAP)
   261                 {
   262                   int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
   263 
   264                   vec = make_vector (n, make_fixnum (-1));
   265                   set_charset_attr (charset, charset_decoder, vec);
   266                 }
   267               else
   268                 {
   269                   char_table_set_range (Vchar_unify_table,
   270                                         charset->min_char, charset->max_char,
   271                                         Qnil);
   272                 }
   273             }
   274           else
   275             {
   276               table = Fmake_char_table (Qnil, Qnil);
   277               set_charset_attr (charset,
   278                                 (charset->method == CHARSET_METHOD_MAP
   279                                  ? charset_encoder : charset_deunifier),
   280                                 table);
   281             }
   282         }
   283       else
   284         {
   285           if (! temp_charset_work)
   286             temp_charset_work = xmalloc (sizeof *temp_charset_work);
   287           if (control_flag == 1)
   288             {
   289               memset (temp_charset_work->table.decoder, -1,
   290                       sizeof (int) * 0x10000);
   291             }
   292           else
   293             {
   294               memset (temp_charset_work->table.encoder, 0,
   295                       sizeof (unsigned short) * 0x20000);
   296               temp_charset_work->zero_index_char = -1;
   297             }
   298           temp_charset_work->current = charset;
   299           temp_charset_work->for_encoder = (control_flag == 2);
   300           control_flag += 2;
   301         }
   302       charset_map_loaded = 1;
   303     }
   304 
   305   min_char = max_char = entries->entry[0].c;
   306   nonascii_min_char = MAX_CHAR;
   307   for (i = 0; i < n_entries; i++)
   308     {
   309       unsigned from, to;
   310       int from_index, to_index, lim_index;
   311       int from_c, to_c;
   312       int idx = i % 0x10000;
   313 
   314       if (i > 0 && idx == 0)
   315         entries = entries->next;
   316       from = entries->entry[idx].from;
   317       to = entries->entry[idx].to;
   318       from_c = entries->entry[idx].c;
   319       from_index = CODE_POINT_TO_INDEX (charset, from);
   320       if (from == to)
   321         {
   322           to_index = from_index;
   323           to_c = from_c;
   324         }
   325       else
   326         {
   327           to_index = CODE_POINT_TO_INDEX (charset, to);
   328           to_c = from_c + (to_index - from_index);
   329         }
   330       if (from_index < 0 || to_index < 0)
   331         continue;
   332       lim_index = to_index + 1;
   333 
   334       if (to_c > max_char)
   335         max_char = to_c;
   336       else if (from_c < min_char)
   337         min_char = from_c;
   338 
   339       if (control_flag == 1)
   340         {
   341           if (charset->method == CHARSET_METHOD_MAP)
   342             for (; from_index < lim_index; from_index++, from_c++)
   343               ASET (vec, from_index, make_fixnum (from_c));
   344           else
   345             for (; from_index < lim_index; from_index++, from_c++)
   346               CHAR_TABLE_SET (Vchar_unify_table,
   347                               CHARSET_CODE_OFFSET (charset) + from_index,
   348                               make_fixnum (from_c));
   349         }
   350       else if (control_flag == 2)
   351         {
   352           if (charset->method == CHARSET_METHOD_MAP
   353               && CHARSET_COMPACT_CODES_P (charset))
   354             for (; from_index < lim_index; from_index++, from_c++)
   355               {
   356                 unsigned code = from_index;
   357                 code = INDEX_TO_CODE_POINT (charset, code);
   358 
   359                 if (NILP (CHAR_TABLE_REF (table, from_c)))
   360                   CHAR_TABLE_SET (table, from_c, make_fixnum (code));
   361               }
   362           else
   363             for (; from_index < lim_index; from_index++, from_c++)
   364               {
   365                 if (NILP (CHAR_TABLE_REF (table, from_c)))
   366                   CHAR_TABLE_SET (table, from_c, make_fixnum (from_index));
   367               }
   368         }
   369       else if (control_flag == 3)
   370         for (; from_index < lim_index; from_index++, from_c++)
   371           SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
   372       else if (control_flag == 4)
   373         for (; from_index < lim_index; from_index++, from_c++)
   374           SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
   375       else                      /* control_flag == 0 */
   376         {
   377           if (ascii_compatible_p)
   378             {
   379               if (! ASCII_CHAR_P (from_c))
   380                 {
   381                   if (from_c < nonascii_min_char)
   382                     nonascii_min_char = from_c;
   383                 }
   384               else if (! ASCII_CHAR_P (to_c))
   385                 {
   386                   nonascii_min_char = 0x80;
   387                 }
   388             }
   389 
   390           for (; from_c <= to_c; from_c++)
   391             CHARSET_FAST_MAP_SET (from_c, fast_map);
   392         }
   393     }
   394 
   395   if (control_flag == 0)
   396     {
   397       CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
   398                                     ? nonascii_min_char : min_char);
   399       CHARSET_MAX_CHAR (charset) = max_char;
   400     }
   401   else if (control_flag == 4)
   402     {
   403       temp_charset_work->min_char = min_char;
   404       temp_charset_work->max_char = max_char;
   405     }
   406 }
   407 
   408 
   409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
   410    paying attention to comment character '#'.  LOOKAHEAD is the
   411    lookahead byte if it is nonnegative.  Store into *TERMINATOR the
   412    input byte after the number, or EOF if an end-of-file or input
   413    error occurred.  Set *OVERFLOW if the number overflows.  */
   414 
   415 static unsigned
   416 read_hex (FILE *fp, int lookahead, int *terminator, bool *overflow)
   417 {
   418   int c = lookahead < 0 ? getc (fp) : lookahead;
   419 
   420   while (true)
   421     {
   422       if (c == '#')
   423         do
   424           c = getc (fp);
   425         while (0 <= c && c != '\n');
   426       else if (c == '0')
   427         {
   428           c = getc (fp);
   429           if (c < 0 || c == 'x')
   430             break;
   431         }
   432       if (c < 0)
   433         break;
   434       c = getc (fp);
   435     }
   436 
   437   unsigned n = 0;
   438   bool v = false;
   439 
   440   if (0 <= c)
   441     while (true)
   442       {
   443         c = getc (fp);
   444         int digit = char_hexdigit (c);
   445         if (digit < 0)
   446           break;
   447         v |= INT_LEFT_SHIFT_OVERFLOW (n, 4);
   448         n = (n << 4) + digit;
   449       }
   450 
   451   *terminator = c;
   452   *overflow |= v;
   453   return n;
   454 }
   455 
   456 /* Return a mapping vector for CHARSET loaded from MAPFILE.
   457    Each line of MAPFILE has this form
   458         0xAAAA 0xCCCC
   459    where 0xAAAA is a code-point and 0xCCCC is the corresponding
   460    character code, or this form
   461         0xAAAA-0xBBBB 0xCCCC
   462    where 0xAAAA and 0xBBBB are code-points specifying a range, and
   463    0xCCCC is the first character code of the range.
   464 
   465    The returned vector has this form:
   466         [ CODE1 CHAR1 CODE2 CHAR2 .... ]
   467    where CODE1 is a code-point or a cons of code-points specifying a
   468    range.
   469 
   470    Note that this function uses `openp' to open MAPFILE but ignores
   471    `file-name-handler-alist' to avoid running any Lisp code.  */
   472 
   473 static void
   474 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
   475                             int control_flag)
   476 {
   477   unsigned min_code = CHARSET_MIN_CODE (charset);
   478   unsigned max_code = CHARSET_MAX_CODE (charset);
   479   int fd;
   480   FILE *fp;
   481   struct charset_map_entries *head, *entries;
   482   int n_entries;
   483   AUTO_STRING (map, ".map");
   484   AUTO_STRING (txt, ".txt");
   485   AUTO_LIST2 (suffixes, map, txt);
   486   specpdl_ref count = SPECPDL_INDEX ();
   487   record_unwind_protect_nothing ();
   488   specbind (Qfile_name_handler_alist, Qnil);
   489   fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false, false);
   490   fp = fd < 0 ? 0 : fdopen (fd, "r");
   491   if (!fp)
   492     {
   493       int open_errno = errno;
   494       emacs_close (fd);
   495       report_file_errno ("Loading charset map", mapfile, open_errno);
   496     }
   497   set_unwind_protect_ptr (count, fclose_unwind, fp);
   498   unbind_to (specpdl_ref_add (count, 1), Qnil);
   499 
   500   /* Use record_xmalloc, as `charset_map_entries' is
   501      large (larger than MAX_ALLOCA).  */
   502   head = record_xmalloc (sizeof *head);
   503   entries = head;
   504   memset (entries, 0, sizeof (struct charset_map_entries));
   505 
   506   n_entries = 0;
   507   int ch = -1;
   508   while (true)
   509     {
   510       bool overflow = false;
   511       unsigned from = read_hex (fp, ch, &ch, &overflow), to;
   512       if (ch < 0)
   513         break;
   514       if (ch == '-')
   515         {
   516           to = read_hex (fp, -1, &ch, &overflow);
   517           if (ch < 0)
   518             break;
   519         }
   520       else
   521         {
   522           to = from;
   523           ch = -1;
   524         }
   525       unsigned c = read_hex (fp, ch, &ch, &overflow);
   526       if (ch < 0)
   527         break;
   528 
   529       if (overflow)
   530         continue;
   531       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
   532         continue;
   533 
   534       if (n_entries == 0x10000)
   535         {
   536           entries->next = record_xmalloc (sizeof *entries->next);
   537           entries = entries->next;
   538           memset (entries, 0, sizeof (struct charset_map_entries));
   539           n_entries = 0;
   540         }
   541       int idx = n_entries;
   542       entries->entry[idx].from = from;
   543       entries->entry[idx].to = to;
   544       entries->entry[idx].c = c;
   545       n_entries++;
   546     }
   547   fclose (fp);
   548   clear_unwind_protect (count);
   549 
   550   load_charset_map (charset, head, n_entries, control_flag);
   551   unbind_to (count, Qnil);
   552 }
   553 
   554 static void
   555 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
   556 {
   557   unsigned min_code = CHARSET_MIN_CODE (charset);
   558   unsigned max_code = CHARSET_MAX_CODE (charset);
   559   struct charset_map_entries *head, *entries;
   560   int n_entries;
   561   int len = ASIZE (vec);
   562   int i;
   563   USE_SAFE_ALLOCA;
   564 
   565   if (len % 2 == 1)
   566     {
   567       add_to_log ("Failure in loading charset map: %V", vec);
   568       return;
   569     }
   570 
   571   /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
   572      large (larger than MAX_ALLOCA).  */
   573   head = SAFE_ALLOCA (sizeof *head);
   574   entries = head;
   575   memset (entries, 0, sizeof (struct charset_map_entries));
   576 
   577   n_entries = 0;
   578   for (i = 0; i < len; i += 2)
   579     {
   580       Lisp_Object val, val2;
   581       unsigned from, to;
   582       EMACS_INT c;
   583       int idx;
   584 
   585       val = AREF (vec, i);
   586       if (CONSP (val))
   587         {
   588           val2 = XCDR (val);
   589           val = XCAR (val);
   590           from = XFIXNAT (val);
   591           to = XFIXNAT (val2);
   592         }
   593       else
   594         from = to = XFIXNAT (val);
   595       val = AREF (vec, i + 1);
   596       CHECK_FIXNAT (val);
   597       c = XFIXNAT (val);
   598 
   599       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
   600         continue;
   601 
   602       if (n_entries > 0 && (n_entries % 0x10000) == 0)
   603         {
   604           entries->next = SAFE_ALLOCA (sizeof *entries->next);
   605           entries = entries->next;
   606           memset (entries, 0, sizeof (struct charset_map_entries));
   607         }
   608       idx = n_entries % 0x10000;
   609       entries->entry[idx].from = from;
   610       entries->entry[idx].to = to;
   611       entries->entry[idx].c = c;
   612       n_entries++;
   613     }
   614 
   615   load_charset_map (charset, head, n_entries, control_flag);
   616   SAFE_FREE ();
   617 }
   618 
   619 
   620 /* Load a mapping table for CHARSET.  CONTROL-FLAG tells what kind of
   621    map it is (see the comment of load_charset_map for the detail).  */
   622 
   623 static void
   624 load_charset (struct charset *charset, int control_flag)
   625 {
   626   Lisp_Object map;
   627 
   628   if (inhibit_load_charset_map
   629       && temp_charset_work
   630       && charset == temp_charset_work->current
   631       && ((control_flag == 2) == temp_charset_work->for_encoder))
   632     return;
   633 
   634   if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
   635     map = CHARSET_MAP (charset);
   636   else
   637     {
   638       if (! CHARSET_UNIFIED_P (charset))
   639         emacs_abort ();
   640       map = CHARSET_UNIFY_MAP (charset);
   641     }
   642   if (STRINGP (map))
   643     load_charset_map_from_file (charset, map, control_flag);
   644   else
   645     load_charset_map_from_vector (charset, map, control_flag);
   646 }
   647 
   648 
   649 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
   650        doc: /* Return non-nil if and only if OBJECT is a charset.*/)
   651   (Lisp_Object object)
   652 {
   653   return (CHARSETP (object) ? Qt : Qnil);
   654 }
   655 
   656 
   657 static void
   658 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
   659                       Lisp_Object function, Lisp_Object arg,
   660                       unsigned int from, unsigned int to)
   661 {
   662   int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
   663   int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
   664   Lisp_Object range = Fcons (Qnil, Qnil);
   665   int c, stop;
   666 
   667   c = temp_charset_work->min_char;
   668   stop = (temp_charset_work->max_char < 0x20000
   669           ? temp_charset_work->max_char : 0xFFFF);
   670 
   671   while (1)
   672     {
   673       int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
   674 
   675       if (idx >= from_idx && idx <= to_idx)
   676         {
   677           if (NILP (XCAR (range)))
   678             XSETCAR (range, make_fixnum (c));
   679         }
   680       else if (! NILP (XCAR (range)))
   681         {
   682           XSETCDR (range, make_fixnum (c - 1));
   683           if (c_function)
   684             (*c_function) (arg, range);
   685           else
   686             call2 (function, range, arg);
   687           XSETCAR (range, Qnil);
   688         }
   689       if (c == stop)
   690         {
   691           if (c == temp_charset_work->max_char)
   692             {
   693               if (! NILP (XCAR (range)))
   694                 {
   695                   XSETCDR (range, make_fixnum (c));
   696                   if (c_function)
   697                     (*c_function) (arg, range);
   698                   else
   699                     call2 (function, range, arg);
   700                 }
   701               break;
   702             }
   703           c = 0x1FFFF;
   704           stop = temp_charset_work->max_char;
   705         }
   706       c++;
   707     }
   708 }
   709 
   710 void
   711 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
   712                    Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
   713 {
   714   Lisp_Object range;
   715   bool partial = (from > CHARSET_MIN_CODE (charset)
   716                   || to < CHARSET_MAX_CODE (charset));
   717 
   718   if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
   719     {
   720       int from_idx = CODE_POINT_TO_INDEX (charset, from);
   721       int to_idx = CODE_POINT_TO_INDEX (charset, to);
   722       int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
   723       int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
   724 
   725       if (CHARSET_UNIFIED_P (charset))
   726         {
   727           if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
   728             load_charset (charset, 2);
   729           if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
   730             map_char_table_for_charset (c_function, function,
   731                                         CHARSET_DEUNIFIER (charset), arg,
   732                                         partial ? charset : NULL, from, to);
   733           else
   734             map_charset_for_dump (c_function, function, arg, from, to);
   735         }
   736 
   737       range = Fcons (make_fixnum (from_c), make_fixnum (to_c));
   738       if (NILP (function))
   739         (*c_function) (arg, range);
   740       else
   741         call2 (function, range, arg);
   742     }
   743   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
   744     {
   745       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
   746         load_charset (charset, 2);
   747       if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
   748         map_char_table_for_charset (c_function, function,
   749                                     CHARSET_ENCODER (charset), arg,
   750                                     partial ? charset : NULL, from, to);
   751       else
   752         map_charset_for_dump (c_function, function, arg, from, to);
   753     }
   754   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
   755     {
   756       Lisp_Object subset_info;
   757       int offset;
   758 
   759       subset_info = CHARSET_SUBSET (charset);
   760       charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
   761       offset = XFIXNUM (AREF (subset_info, 3));
   762       from -= offset;
   763       if (from < XFIXNAT (AREF (subset_info, 1)))
   764         from = XFIXNAT (AREF (subset_info, 1));
   765       to -= offset;
   766       if (to > XFIXNAT (AREF (subset_info, 2)))
   767         to = XFIXNAT (AREF (subset_info, 2));
   768       map_charset_chars (c_function, function, arg, charset, from, to);
   769     }
   770   else                          /* i.e. CHARSET_METHOD_SUPERSET */
   771     {
   772       Lisp_Object parents;
   773 
   774       for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
   775            parents = XCDR (parents))
   776         {
   777           int offset;
   778           unsigned this_from, this_to;
   779 
   780           charset = CHARSET_FROM_ID (XFIXNAT (XCAR (XCAR (parents))));
   781           offset = XFIXNUM (XCDR (XCAR (parents)));
   782           this_from = from > offset ? from - offset : 0;
   783           this_to = to > offset ? to - offset : 0;
   784           if (this_from < CHARSET_MIN_CODE (charset))
   785             this_from = CHARSET_MIN_CODE (charset);
   786           if (this_to > CHARSET_MAX_CODE (charset))
   787             this_to = CHARSET_MAX_CODE (charset);
   788           map_charset_chars (c_function, function, arg, charset,
   789                              this_from, this_to);
   790         }
   791     }
   792 }
   793 
   794 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
   795        doc: /* Call FUNCTION for all characters in CHARSET.
   796 Optional 3rd argument ARG is an additional argument to be passed
   797 to FUNCTION, see below.
   798 Optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
   799 range of code points (in CHARSET) of target characters on which to
   800 map the FUNCTION.  Note that these are not character codes, but code
   801 points of CHARSET; for the difference see `decode-char' and
   802 `list-charset-chars'.  If FROM-CODE is nil or imitted, it stands for
   803 the first code point of CHARSET; if TO-CODE is nil or omitted, it
   804 stands for the last code point of CHARSET.
   805 
   806 FUNCTION will be called with two arguments: RANGE and ARG.
   807 RANGE is a cons (FROM .  TO), where FROM and TO specify a range of
   808 characters that belong to CHARSET on which FUNCTION should do its
   809 job.  FROM and TO are Emacs character codes, unlike FROM-CODE and
   810 TO-CODE, which are CHARSET code points.  */)
   811   (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
   812 {
   813   struct charset *cs;
   814   unsigned from, to;
   815 
   816   CHECK_CHARSET_GET_CHARSET (charset, cs);
   817   if (NILP (from_code))
   818     from = CHARSET_MIN_CODE (cs);
   819   else
   820     {
   821       from = XFIXNUM (from_code);
   822       if (from < CHARSET_MIN_CODE (cs))
   823         from = CHARSET_MIN_CODE (cs);
   824     }
   825   if (NILP (to_code))
   826     to = CHARSET_MAX_CODE (cs);
   827   else
   828     {
   829       to = XFIXNUM (to_code);
   830       if (to > CHARSET_MAX_CODE (cs))
   831         to = CHARSET_MAX_CODE (cs);
   832     }
   833   map_charset_chars (NULL, function, arg, cs, from, to);
   834   return Qnil;
   835 }
   836 
   837 
   838 /* Define a charset according to the arguments.  The Nth argument is
   839    the Nth attribute of the charset (the last attribute `charset-id'
   840    is not included).  See the docstring of `define-charset' for the
   841    detail.  */
   842 
   843 DEFUN ("define-charset-internal", Fdefine_charset_internal,
   844        Sdefine_charset_internal, charset_arg_max, MANY, 0,
   845        doc: /* For internal use only.
   846 usage: (define-charset-internal ...)  */)
   847   (ptrdiff_t nargs, Lisp_Object *args)
   848 {
   849   /* Charset attr vector.  */
   850   Lisp_Object attrs;
   851   Lisp_Object val;
   852   Lisp_Object hash_code;
   853   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
   854   int i, j;
   855   struct charset charset;
   856   int id;
   857   int dimension;
   858   bool new_definition_p;
   859   int nchars;
   860 
   861   memset (&charset, 0, sizeof (charset));
   862 
   863   if (nargs != charset_arg_max)
   864     Fsignal (Qwrong_number_of_arguments,
   865              Fcons (intern ("define-charset-internal"),
   866                     make_fixnum (nargs)));
   867 
   868   attrs = make_nil_vector (charset_attr_max);
   869 
   870   CHECK_SYMBOL (args[charset_arg_name]);
   871   ASET (attrs, charset_name, args[charset_arg_name]);
   872 
   873   val = args[charset_arg_code_space];
   874   for (i = 0, dimension = 0, nchars = 1; ; i++)
   875     {
   876       Lisp_Object min_byte_obj = Faref (val, make_fixnum (i * 2));
   877       Lisp_Object max_byte_obj = Faref (val, make_fixnum (i * 2 + 1));
   878       int min_byte = check_integer_range (min_byte_obj, 0, 255);
   879       int max_byte = check_integer_range (max_byte_obj, min_byte, 255);
   880       charset.code_space[i * 4] = min_byte;
   881       charset.code_space[i * 4 + 1] = max_byte;
   882       charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
   883       if (max_byte > 0)
   884         dimension = i + 1;
   885       if (i == 3)
   886         break;
   887       nchars *= charset.code_space[i * 4 + 2];
   888       charset.code_space[i * 4 + 3] = nchars;
   889     }
   890 
   891   val = args[charset_arg_dimension];
   892   charset.dimension
   893     = !NILP (val) ? check_integer_range (val, 1, 4) : dimension;
   894 
   895   charset.code_linear_p
   896     = (charset.dimension == 1
   897        || (charset.code_space[2] == 256
   898            && (charset.dimension == 2
   899                || (charset.code_space[6] == 256
   900                    && (charset.dimension == 3
   901                        || charset.code_space[10] == 256)))));
   902 
   903   if (! charset.code_linear_p)
   904     {
   905       charset.code_space_mask = xzalloc (256);
   906       for (i = 0; i < 4; i++)
   907         for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
   908              j++)
   909           charset.code_space_mask[j] |= (1 << i);
   910     }
   911 
   912   charset.iso_chars_96 = charset.code_space[2] == 96;
   913 
   914   charset.min_code = (charset.code_space[0]
   915                       | (charset.code_space[4] << 8)
   916                       | (charset.code_space[8] << 16)
   917                       | ((unsigned) charset.code_space[12] << 24));
   918   charset.max_code = (charset.code_space[1]
   919                       | (charset.code_space[5] << 8)
   920                       | (charset.code_space[9] << 16)
   921                       | ((unsigned) charset.code_space[13] << 24));
   922   charset.char_index_offset = 0;
   923 
   924   val = args[charset_arg_min_code];
   925   if (! NILP (val))
   926     {
   927       unsigned code = cons_to_unsigned (val, UINT_MAX);
   928 
   929       if (code < charset.min_code
   930           || code > charset.max_code)
   931         args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
   932                              INT_TO_INTEGER (charset.max_code), val);
   933       charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
   934       charset.min_code = code;
   935     }
   936 
   937   val = args[charset_arg_max_code];
   938   if (! NILP (val))
   939     {
   940       unsigned code = cons_to_unsigned (val, UINT_MAX);
   941 
   942       if (code < charset.min_code
   943           || code > charset.max_code)
   944         args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
   945                              INT_TO_INTEGER (charset.max_code), val);
   946       charset.max_code = code;
   947     }
   948 
   949   charset.compact_codes_p = charset.max_code < 0x10000;
   950 
   951   val = args[charset_arg_invalid_code];
   952   if (NILP (val))
   953     {
   954       if (charset.min_code > 0)
   955         charset.invalid_code = 0;
   956       else
   957         {
   958           if (charset.max_code < UINT_MAX)
   959             charset.invalid_code = charset.max_code + 1;
   960           else
   961             error ("Attribute :invalid-code must be specified");
   962         }
   963     }
   964   else
   965     charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
   966 
   967   val = args[charset_arg_iso_final];
   968   if (NILP (val))
   969     charset.iso_final = -1;
   970   else
   971     {
   972       CHECK_FIXNUM (val);
   973       if (XFIXNUM (val) < '0' || XFIXNUM (val) > 127)
   974         error ("Invalid iso-final-char: %"pI"d", XFIXNUM (val));
   975       charset.iso_final = XFIXNUM (val);
   976     }
   977 
   978   val = args[charset_arg_iso_revision];
   979   charset.iso_revision = !NILP (val) ? check_integer_range (val, -1, 63) : -1;
   980 
   981   val = args[charset_arg_emacs_mule_id];
   982   if (NILP (val))
   983     charset.emacs_mule_id = -1;
   984   else
   985     {
   986       CHECK_FIXNAT (val);
   987       if ((XFIXNUM (val) > 0 && XFIXNUM (val) <= 128) || XFIXNUM (val) >= 256)
   988         error ("Invalid emacs-mule-id: %"pI"d", XFIXNUM (val));
   989       charset.emacs_mule_id = XFIXNUM (val);
   990     }
   991 
   992   charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
   993 
   994   charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
   995 
   996   charset.unified_p = 0;
   997 
   998   memset (charset.fast_map, 0, sizeof (charset.fast_map));
   999 
  1000   if (! NILP (args[charset_arg_code_offset]))
  1001     {
  1002       val = args[charset_arg_code_offset];
  1003       CHECK_CHARACTER (val);
  1004 
  1005       charset.method = CHARSET_METHOD_OFFSET;
  1006       charset.code_offset = XFIXNUM (val);
  1007 
  1008       i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
  1009       if (MAX_CHAR - charset.code_offset < i)
  1010         error ("Unsupported max char: %d", charset.max_char);
  1011       charset.max_char = i + charset.code_offset;
  1012       i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
  1013       charset.min_char = i + charset.code_offset;
  1014 
  1015       i = (charset.min_char >> 7) << 7;
  1016       for (; i < 0x10000 && i <= charset.max_char; i += 128)
  1017         CHARSET_FAST_MAP_SET (i, charset.fast_map);
  1018       i = (i >> 12) << 12;
  1019       for (; i <= charset.max_char; i += 0x1000)
  1020         CHARSET_FAST_MAP_SET (i, charset.fast_map);
  1021       if (charset.code_offset == 0 && charset.max_char >= 0x80)
  1022         charset.ascii_compatible_p = 1;
  1023     }
  1024   else if (! NILP (args[charset_arg_map]))
  1025     {
  1026       val = args[charset_arg_map];
  1027       ASET (attrs, charset_map, val);
  1028       charset.method = CHARSET_METHOD_MAP;
  1029     }
  1030   else if (! NILP (args[charset_arg_subset]))
  1031     {
  1032       Lisp_Object parent;
  1033       Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
  1034       struct charset *parent_charset;
  1035 
  1036       val = args[charset_arg_subset];
  1037       parent = Fcar (val);
  1038       CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
  1039       parent_min_code = Fnth (make_fixnum (1), val);
  1040       CHECK_FIXNAT (parent_min_code);
  1041       parent_max_code = Fnth (make_fixnum (2), val);
  1042       CHECK_FIXNAT (parent_max_code);
  1043       parent_code_offset = Fnth (make_fixnum (3), val);
  1044       CHECK_FIXNUM (parent_code_offset);
  1045       ASET (attrs, charset_subset,
  1046             CALLN (Fvector, make_fixnum (parent_charset->id),
  1047                    parent_min_code, parent_max_code, parent_code_offset));
  1048 
  1049       charset.method = CHARSET_METHOD_SUBSET;
  1050       /* Here, we just copy the parent's fast_map.  It's not accurate,
  1051          but at least it works for quickly detecting which character
  1052          DOESN'T belong to this charset.  */
  1053       memcpy (charset.fast_map, parent_charset->fast_map,
  1054               sizeof charset.fast_map);
  1055 
  1056       /* We also copy these for parents.  */
  1057       charset.min_char = parent_charset->min_char;
  1058       charset.max_char = parent_charset->max_char;
  1059     }
  1060   else if (! NILP (args[charset_arg_superset]))
  1061     {
  1062       val = args[charset_arg_superset];
  1063       charset.method = CHARSET_METHOD_SUPERSET;
  1064       val = Fcopy_sequence (val);
  1065       ASET (attrs, charset_superset, val);
  1066 
  1067       charset.min_char = MAX_CHAR;
  1068       charset.max_char = 0;
  1069       for (; ! NILP (val); val = Fcdr (val))
  1070         {
  1071           Lisp_Object elt, car_part, cdr_part;
  1072           int this_id, offset;
  1073           struct charset *this_charset;
  1074 
  1075           elt = Fcar (val);
  1076           if (CONSP (elt))
  1077             {
  1078               car_part = XCAR (elt);
  1079               cdr_part = XCDR (elt);
  1080               CHECK_CHARSET_GET_ID (car_part, this_id);
  1081               offset = check_integer_range (cdr_part, INT_MIN, INT_MAX);
  1082             }
  1083           else
  1084             {
  1085               CHECK_CHARSET_GET_ID (elt, this_id);
  1086               offset = 0;
  1087             }
  1088           XSETCAR (val, Fcons (make_fixnum (this_id), make_fixnum (offset)));
  1089 
  1090           this_charset = CHARSET_FROM_ID (this_id);
  1091           if (charset.min_char > this_charset->min_char)
  1092             charset.min_char = this_charset->min_char;
  1093           if (charset.max_char < this_charset->max_char)
  1094             charset.max_char = this_charset->max_char;
  1095           for (i = 0; i < 190; i++)
  1096             charset.fast_map[i] |= this_charset->fast_map[i];
  1097         }
  1098     }
  1099   else
  1100     error ("None of :code-offset, :map, :parents are specified");
  1101 
  1102   val = args[charset_arg_unify_map];
  1103   if (! NILP (val) && !STRINGP (val))
  1104     CHECK_VECTOR (val);
  1105   ASET (attrs, charset_unify_map, val);
  1106 
  1107   CHECK_LIST (args[charset_arg_plist]);
  1108   ASET (attrs, charset_plist, args[charset_arg_plist]);
  1109 
  1110   charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
  1111                                     &hash_code);
  1112   if (charset.hash_index >= 0)
  1113     {
  1114       new_definition_p = 0;
  1115       id = XFIXNAT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
  1116       set_hash_value_slot (hash_table, charset.hash_index, attrs);
  1117     }
  1118   else
  1119     {
  1120       charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
  1121                                      hash_code);
  1122       if (charset_table_used == charset_table_size)
  1123         {
  1124           /* Ensure that charset IDs fit into 'int' as well as into the
  1125              restriction imposed by fixnums.  Although the 'int' restriction
  1126              could be removed, too much other code would need altering; for
  1127              example, the IDs are stuffed into struct
  1128              coding_system.charbuf[i] entries, which are 'int'.  */
  1129           int old_size = charset_table_size;
  1130           ptrdiff_t new_size = old_size;
  1131           struct charset *new_table =
  1132             xpalloc (0, &new_size, 1,
  1133                      min (INT_MAX, MOST_POSITIVE_FIXNUM),
  1134                      sizeof *charset_table);
  1135           memcpy (new_table, charset_table, old_size * sizeof *new_table);
  1136           charset_table = new_table;
  1137           charset_table_size = new_size;
  1138           /* FIXME: This leaks memory, as the old charset_table becomes
  1139              unreachable.  If the old charset table is charset_table_init
  1140              then this leak is intentional; otherwise, it's unclear.
  1141              If the latter memory leak is intentional, a
  1142              comment should be added to explain this.  If not, the old
  1143              charset_table should be freed, by passing it as the 1st argument
  1144              to xpalloc and removing the memcpy.  */
  1145         }
  1146       id = charset_table_used++;
  1147       new_definition_p = 1;
  1148     }
  1149 
  1150   ASET (attrs, charset_id, make_fixnum (id));
  1151   charset.id = id;
  1152   charset_table[id] = charset;
  1153 
  1154   if (charset.method == CHARSET_METHOD_MAP)
  1155     {
  1156       load_charset (&charset, 0);
  1157       charset_table[id] = charset;
  1158     }
  1159 
  1160   if (charset.iso_final >= 0)
  1161     {
  1162       ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
  1163                          charset.iso_final) = id;
  1164       if (new_definition_p)
  1165         Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, list1i (id));
  1166       if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
  1167         charset_jisx0201_roman = id;
  1168       else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
  1169         charset_jisx0208_1978 = id;
  1170       else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
  1171         charset_jisx0208 = id;
  1172       else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
  1173         charset_ksc5601 = id;
  1174     }
  1175 
  1176   if (charset.emacs_mule_id >= 0)
  1177     {
  1178       emacs_mule_charset[charset.emacs_mule_id] = id;
  1179       if (charset.emacs_mule_id < 0xA0)
  1180         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
  1181       else
  1182         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
  1183       if (new_definition_p)
  1184         Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
  1185                                            list1i (id));
  1186     }
  1187 
  1188   if (new_definition_p)
  1189     {
  1190       Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
  1191       if (charset.supplementary_p)
  1192         Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, list1i (id));
  1193       else
  1194         {
  1195           Lisp_Object tail;
  1196 
  1197           for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
  1198             {
  1199               struct charset *cs = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
  1200 
  1201               if (cs->supplementary_p)
  1202                 break;
  1203             }
  1204           if (EQ (tail, Vcharset_ordered_list))
  1205             Vcharset_ordered_list = Fcons (make_fixnum (id),
  1206                                            Vcharset_ordered_list);
  1207           else if (NILP (tail))
  1208             Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
  1209                                             list1i (id));
  1210           else
  1211             {
  1212               val = Fcons (XCAR (tail), XCDR (tail));
  1213               XSETCDR (tail, val);
  1214               XSETCAR (tail, make_fixnum (id));
  1215             }
  1216         }
  1217       charset_ordered_list_tick++;
  1218     }
  1219 
  1220   return Qnil;
  1221 }
  1222 
  1223 
  1224 /* Same as Fdefine_charset_internal but arguments are more convenient
  1225    to call from C (typically in syms_of_charset).  This can define a
  1226    charset of `offset' method only.  Return the ID of the new
  1227    charset.  */
  1228 
  1229 static int
  1230 define_charset_internal (Lisp_Object name,
  1231                          int dimension,
  1232                          const char *code_space_chars,
  1233                          unsigned min_code, unsigned max_code,
  1234                          int iso_final, int iso_revision, int emacs_mule_id,
  1235                          bool ascii_compatible, bool supplementary,
  1236                          int code_offset)
  1237 {
  1238   const unsigned char *code_space = (const unsigned char *) code_space_chars;
  1239   Lisp_Object args[charset_arg_max];
  1240   Lisp_Object val;
  1241   int i;
  1242 
  1243   args[charset_arg_name] = name;
  1244   args[charset_arg_dimension] = make_fixnum (dimension);
  1245   val = make_uninit_vector (8);
  1246   for (i = 0; i < 8; i++)
  1247     ASET (val, i, make_fixnum (code_space[i]));
  1248   args[charset_arg_code_space] = val;
  1249   args[charset_arg_min_code] = make_fixnum (min_code);
  1250   args[charset_arg_max_code] = make_fixnum (max_code);
  1251   args[charset_arg_iso_final]
  1252     = (iso_final < 0 ? Qnil : make_fixnum (iso_final));
  1253   args[charset_arg_iso_revision] = make_fixnum (iso_revision);
  1254   args[charset_arg_emacs_mule_id]
  1255     = (emacs_mule_id < 0 ? Qnil : make_fixnum (emacs_mule_id));
  1256   args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
  1257   args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
  1258   args[charset_arg_invalid_code] = Qnil;
  1259   args[charset_arg_code_offset] = make_fixnum (code_offset);
  1260   args[charset_arg_map] = Qnil;
  1261   args[charset_arg_subset] = Qnil;
  1262   args[charset_arg_superset] = Qnil;
  1263   args[charset_arg_unify_map] = Qnil;
  1264 
  1265   args[charset_arg_plist] =
  1266      list (QCname,
  1267            args[charset_arg_name],
  1268            intern_c_string (":dimension"),
  1269            args[charset_arg_dimension],
  1270            intern_c_string (":code-space"),
  1271            args[charset_arg_code_space],
  1272            intern_c_string (":iso-final-char"),
  1273            args[charset_arg_iso_final],
  1274            intern_c_string (":emacs-mule-id"),
  1275            args[charset_arg_emacs_mule_id],
  1276            QCascii_compatible_p,
  1277            args[charset_arg_ascii_compatible_p],
  1278            intern_c_string (":code-offset"),
  1279            args[charset_arg_code_offset]);
  1280   Fdefine_charset_internal (charset_arg_max, args);
  1281 
  1282   return XFIXNUM (CHARSET_SYMBOL_ID (name));
  1283 }
  1284 
  1285 
  1286 DEFUN ("define-charset-alias", Fdefine_charset_alias,
  1287        Sdefine_charset_alias, 2, 2, 0,
  1288        doc: /* Define ALIAS as an alias for charset CHARSET.  */)
  1289   (Lisp_Object alias, Lisp_Object charset)
  1290 {
  1291   Lisp_Object attr;
  1292 
  1293   CHECK_CHARSET_GET_ATTR (charset, attr);
  1294   Fputhash (alias, attr, Vcharset_hash_table);
  1295   Vcharset_list = Fcons (alias, Vcharset_list);
  1296   return Qnil;
  1297 }
  1298 
  1299 
  1300 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
  1301        doc: /* Return the property list of CHARSET.  */)
  1302   (Lisp_Object charset)
  1303 {
  1304   Lisp_Object attrs;
  1305 
  1306   CHECK_CHARSET_GET_ATTR (charset, attrs);
  1307   return CHARSET_ATTR_PLIST (attrs);
  1308 }
  1309 
  1310 
  1311 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
  1312        doc: /* Set CHARSET's property list to PLIST.  */)
  1313   (Lisp_Object charset, Lisp_Object plist)
  1314 {
  1315   Lisp_Object attrs;
  1316 
  1317   CHECK_CHARSET_GET_ATTR (charset, attrs);
  1318   ASET (attrs, charset_plist, plist);
  1319   return plist;
  1320 }
  1321 
  1322 
  1323 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
  1324        doc: /* Unify characters of CHARSET with Unicode.
  1325 This means reading the relevant file and installing the table defined
  1326 by CHARSET's `:unify-map' property.
  1327 
  1328 Optional second arg UNIFY-MAP is a file name string or a vector.  It has
  1329 the same meaning as the `:unify-map' attribute in the function
  1330 `define-charset' (which see).
  1331 
  1332 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
  1333   (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
  1334 {
  1335   int id;
  1336   struct charset *cs;
  1337 
  1338   CHECK_CHARSET_GET_ID (charset, id);
  1339   cs = CHARSET_FROM_ID (id);
  1340   if (NILP (deunify)
  1341       ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
  1342       : ! CHARSET_UNIFIED_P (cs))
  1343     return Qnil;
  1344 
  1345   CHARSET_UNIFIED_P (cs) = 0;
  1346   if (NILP (deunify))
  1347     {
  1348       if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
  1349           || CHARSET_CODE_OFFSET (cs) < 0x110000)
  1350         error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
  1351       if (NILP (unify_map))
  1352         unify_map = CHARSET_UNIFY_MAP (cs);
  1353       else
  1354         {
  1355           if (! STRINGP (unify_map) && ! VECTORP (unify_map))
  1356             signal_error ("Bad unify-map", unify_map);
  1357           set_charset_attr (cs, charset_unify_map, unify_map);
  1358         }
  1359       if (NILP (Vchar_unify_table))
  1360         Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
  1361       char_table_set_range (Vchar_unify_table,
  1362                             cs->min_char, cs->max_char, charset);
  1363       CHARSET_UNIFIED_P (cs) = 1;
  1364     }
  1365   else if (CHAR_TABLE_P (Vchar_unify_table))
  1366     {
  1367       unsigned min_code = CHARSET_MIN_CODE (cs);
  1368       unsigned max_code = CHARSET_MAX_CODE (cs);
  1369       int min_char = DECODE_CHAR (cs, min_code);
  1370       int max_char = DECODE_CHAR (cs, max_code);
  1371 
  1372       char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
  1373     }
  1374 
  1375   return Qnil;
  1376 }
  1377 
  1378 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
  1379    Return true if it's a 96-character set, false if 94.  */
  1380 
  1381 static bool
  1382 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
  1383                              Lisp_Object final_char)
  1384 {
  1385   CHECK_FIXNUM (dimension);
  1386   CHECK_FIXNUM (chars);
  1387   CHECK_CHARACTER (final_char);
  1388 
  1389   if (! (1 <= XFIXNUM (dimension) && XFIXNUM (dimension) <= 3))
  1390     error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
  1391            XFIXNUM (dimension));
  1392 
  1393   bool chars_flag = XFIXNUM (chars) == 96;
  1394   if (! (chars_flag || XFIXNUM (chars) == 94))
  1395     error ("Invalid CHARS %"pI"d, it should be 94 or 96", XFIXNUM (chars));
  1396 
  1397   int final_ch = XFIXNAT (final_char);
  1398   if (! ('0' <= final_ch && final_ch <= '~'))
  1399     error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
  1400 
  1401   return chars_flag;
  1402 }
  1403 
  1404 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
  1405        Sget_unused_iso_final_char, 2, 2, 0,
  1406        doc: /*
  1407 Return an unused ISO final char for a charset of DIMENSION and CHARS.
  1408 DIMENSION is the number of bytes to represent a character: 1 or 2.
  1409 CHARS is the number of characters in a dimension: 94 or 96.
  1410 
  1411 This final char is for private use, thus the range is `0' (48) .. `?' (63).
  1412 If there's no unused final char for the specified kind of charset,
  1413 return nil.  */)
  1414   (Lisp_Object dimension, Lisp_Object chars)
  1415 {
  1416   bool chars_flag = check_iso_charset_parameter (dimension, chars,
  1417                                                  make_fixnum ('0'));
  1418   for (int final_char = '0'; final_char <= '?'; final_char++)
  1419     if (ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, final_char) < 0)
  1420       return make_fixnum (final_char);
  1421   return Qnil;
  1422 }
  1423 
  1424 
  1425 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
  1426        4, 4, 0,
  1427        doc: /* Declare an equivalent charset for ISO-2022 decoding.
  1428 
  1429 On decoding by an ISO-2022 base coding system, when a charset
  1430 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
  1431 if CHARSET is designated instead.  */)
  1432   (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
  1433 {
  1434   int id;
  1435 
  1436   CHECK_CHARSET_GET_ID (charset, id);
  1437   bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
  1438   ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, XFIXNAT (final_char)) = id;
  1439   return Qnil;
  1440 }
  1441 
  1442 
  1443 /* Return information about charsets in the text at PTR of NBYTES
  1444    bytes, which are NCHARS characters.  The value is:
  1445 
  1446         0: Each character is represented by one byte.  This is always
  1447            true for a unibyte string.  For a multibyte string, true if
  1448            it contains only ASCII characters.
  1449 
  1450         1: No charsets other than ascii, control-1, and latin-1 are
  1451            found.
  1452 
  1453         2: Otherwise.
  1454 */
  1455 
  1456 int
  1457 string_xstring_p (Lisp_Object string)
  1458 {
  1459   const unsigned char *p = SDATA (string);
  1460   const unsigned char *endp = p + SBYTES (string);
  1461 
  1462   if (SCHARS (string) == SBYTES (string))
  1463     return 0;
  1464 
  1465   while (p < endp)
  1466     {
  1467       int c = string_char_advance (&p);
  1468 
  1469       if (c >= 0x100)
  1470         return 2;
  1471     }
  1472   return 1;
  1473 }
  1474 
  1475 
  1476 /* Find charsets in the string at PTR of NCHARS and NBYTES.
  1477 
  1478    CHARSETS is a vector.  If Nth element is non-nil, it means the
  1479    charset whose id is N is already found.
  1480 
  1481    It may lookup a translation table TABLE if supplied.  */
  1482 
  1483 static void
  1484 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
  1485                        ptrdiff_t nbytes, Lisp_Object charsets,
  1486                        Lisp_Object table, bool multibyte)
  1487 {
  1488   const unsigned char *pend = ptr + nbytes;
  1489 
  1490   if (nchars == nbytes)
  1491     {
  1492       if (multibyte)
  1493         ASET (charsets, charset_ascii, Qt);
  1494       else
  1495         while (ptr < pend)
  1496           {
  1497             int c = *ptr++;
  1498 
  1499             if (!NILP (table))
  1500               c = translate_char (table, c);
  1501             if (ASCII_CHAR_P (c))
  1502               ASET (charsets, charset_ascii, Qt);
  1503             else
  1504               ASET (charsets, charset_eight_bit, Qt);
  1505           }
  1506     }
  1507   else
  1508     {
  1509       while (ptr < pend)
  1510         {
  1511           int c = string_char_advance (&ptr);
  1512           struct charset *charset;
  1513 
  1514           if (!NILP (table))
  1515             c = translate_char (table, c);
  1516           charset = CHAR_CHARSET (c);
  1517           ASET (charsets, CHARSET_ID (charset), Qt);
  1518         }
  1519     }
  1520 }
  1521 
  1522 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
  1523        2, 3, 0,
  1524        doc: /* Return a list of charsets in the region between BEG and END.
  1525 BEG and END are buffer positions.
  1526 Optional arg TABLE if non-nil is a translation table to look up.
  1527 
  1528 If the current buffer is unibyte, the returned list may contain
  1529 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
  1530   (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
  1531 {
  1532   Lisp_Object charsets;
  1533   ptrdiff_t from, from_byte, to, stop, stop_byte;
  1534   int i;
  1535   Lisp_Object val;
  1536   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1537 
  1538   validate_region (&beg, &end);
  1539   from = XFIXNAT (beg);
  1540   stop = to = XFIXNAT (end);
  1541 
  1542   if (from < GPT && GPT < to)
  1543     {
  1544       stop = GPT;
  1545       stop_byte = GPT_BYTE;
  1546     }
  1547   else
  1548     stop_byte = CHAR_TO_BYTE (stop);
  1549 
  1550   from_byte = CHAR_TO_BYTE (from);
  1551 
  1552   charsets = make_nil_vector (charset_table_used);
  1553   while (1)
  1554     {
  1555       find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
  1556                              stop_byte - from_byte, charsets, table,
  1557                              multibyte);
  1558       if (stop < to)
  1559         {
  1560           from = stop, from_byte = stop_byte;
  1561           stop = to, stop_byte = CHAR_TO_BYTE (stop);
  1562         }
  1563       else
  1564         break;
  1565     }
  1566 
  1567   val = Qnil;
  1568   for (i = charset_table_used - 1; i >= 0; i--)
  1569     if (!NILP (AREF (charsets, i)))
  1570       val = Fcons (CHARSET_NAME (charset_table + i), val);
  1571   return val;
  1572 }
  1573 
  1574 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
  1575        1, 2, 0,
  1576        doc: /* Return a list of charsets in STR.
  1577 Optional arg TABLE if non-nil is a translation table to look up.
  1578 
  1579 If STR is unibyte, the returned list may contain
  1580 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
  1581   (Lisp_Object str, Lisp_Object table)
  1582 {
  1583   CHECK_STRING (str);
  1584 
  1585   Lisp_Object charsets = make_nil_vector (charset_table_used);
  1586   find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
  1587                          charsets, table,
  1588                          STRING_MULTIBYTE (str));
  1589   Lisp_Object val = Qnil;
  1590   for (int i = charset_table_used - 1; i >= 0; i--)
  1591     if (!NILP (AREF (charsets, i)))
  1592       val = Fcons (CHARSET_NAME (charset_table + i), val);
  1593   return val;
  1594 }
  1595 
  1596 
  1597 
  1598 /* Return a unified character code for C (>= 0x110000).  VAL is a
  1599    value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
  1600    charset symbol.  */
  1601 static int
  1602 maybe_unify_char (int c, Lisp_Object val)
  1603 {
  1604   struct charset *charset;
  1605 
  1606   if (FIXNUMP (val))
  1607     return XFIXNAT (val);
  1608   if (NILP (val))
  1609     return c;
  1610 
  1611   CHECK_CHARSET_GET_CHARSET (val, charset);
  1612 #ifdef REL_ALLOC
  1613   /* The call to load_charset below can allocate memory, which screws
  1614      callers of this function through STRING_CHAR_* macros that hold C
  1615      pointers to buffer text, if REL_ALLOC is used.  */
  1616   r_alloc_inhibit_buffer_relocation (1);
  1617 #endif
  1618   load_charset (charset, 1);
  1619   if (! inhibit_load_charset_map)
  1620     {
  1621       val = CHAR_TABLE_REF (Vchar_unify_table, c);
  1622       if (! NILP (val))
  1623         c = XFIXNAT (val);
  1624     }
  1625   else
  1626     {
  1627       int code_index = c - CHARSET_CODE_OFFSET (charset);
  1628       int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
  1629 
  1630       if (unified > 0)
  1631         c = unified;
  1632     }
  1633 #ifdef REL_ALLOC
  1634   r_alloc_inhibit_buffer_relocation (0);
  1635 #endif
  1636   return c;
  1637 }
  1638 
  1639 
  1640 /* Return a character corresponding to the code-point CODE of
  1641    CHARSET.  */
  1642 
  1643 int
  1644 decode_char (struct charset *charset, unsigned int code)
  1645 {
  1646   int c, char_index;
  1647   enum charset_method method = CHARSET_METHOD (charset);
  1648 
  1649   if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
  1650     return -1;
  1651 
  1652   if (method == CHARSET_METHOD_SUBSET)
  1653     {
  1654       Lisp_Object subset_info;
  1655 
  1656       subset_info = CHARSET_SUBSET (charset);
  1657       charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
  1658       code -= XFIXNUM (AREF (subset_info, 3));
  1659       if (code < XFIXNAT (AREF (subset_info, 1))
  1660           || code > XFIXNAT (AREF (subset_info, 2)))
  1661         c = -1;
  1662       else
  1663         c = DECODE_CHAR (charset, code);
  1664     }
  1665   else if (method == CHARSET_METHOD_SUPERSET)
  1666     {
  1667       Lisp_Object parents;
  1668 
  1669       parents = CHARSET_SUPERSET (charset);
  1670       c = -1;
  1671       for (; CONSP (parents); parents = XCDR (parents))
  1672         {
  1673           int id = XFIXNUM (XCAR (XCAR (parents)));
  1674           int code_offset = XFIXNUM (XCDR (XCAR (parents)));
  1675           unsigned this_code = code - code_offset;
  1676 
  1677           charset = CHARSET_FROM_ID (id);
  1678           if ((c = DECODE_CHAR (charset, this_code)) >= 0)
  1679             break;
  1680         }
  1681     }
  1682   else
  1683     {
  1684       char_index = CODE_POINT_TO_INDEX (charset, code);
  1685       if (char_index < 0)
  1686         return -1;
  1687 
  1688       if (method == CHARSET_METHOD_MAP)
  1689         {
  1690           Lisp_Object decoder;
  1691 
  1692           decoder = CHARSET_DECODER (charset);
  1693           if (! VECTORP (decoder))
  1694             {
  1695               load_charset (charset, 1);
  1696               decoder = CHARSET_DECODER (charset);
  1697             }
  1698           if (VECTORP (decoder))
  1699             c = XFIXNUM (AREF (decoder, char_index));
  1700           else
  1701             c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
  1702         }
  1703       else                      /* method == CHARSET_METHOD_OFFSET */
  1704         {
  1705           c = char_index + CHARSET_CODE_OFFSET (charset);
  1706           if (CHARSET_UNIFIED_P (charset)
  1707               && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
  1708             {
  1709               /* Unify C with a Unicode character if possible.  */
  1710               Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
  1711               c = maybe_unify_char (c, val);
  1712             }
  1713         }
  1714     }
  1715 
  1716   return c;
  1717 }
  1718 
  1719 /* Variable used temporarily by the macro ENCODE_CHAR.  */
  1720 Lisp_Object charset_work;
  1721 
  1722 /* Return a code-point of C in CHARSET.  If C doesn't belong to
  1723    CHARSET, return CHARSET_INVALID_CODE (CHARSET).  If STRICT is true,
  1724    use CHARSET's strict_max_char instead of max_char.  */
  1725 
  1726 unsigned
  1727 encode_char (struct charset *charset, int c)
  1728 {
  1729   unsigned code;
  1730   enum charset_method method = CHARSET_METHOD (charset);
  1731 
  1732   if (CHARSET_UNIFIED_P (charset))
  1733     {
  1734       Lisp_Object deunifier;
  1735       int code_index = -1;
  1736 
  1737       deunifier = CHARSET_DEUNIFIER (charset);
  1738       if (! CHAR_TABLE_P (deunifier))
  1739         {
  1740           load_charset (charset, 2);
  1741           deunifier = CHARSET_DEUNIFIER (charset);
  1742         }
  1743       if (CHAR_TABLE_P (deunifier))
  1744         {
  1745           Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
  1746 
  1747           if (FIXNUMP (deunified))
  1748             code_index = XFIXNUM (deunified);
  1749         }
  1750       else
  1751         {
  1752           code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
  1753         }
  1754       if (code_index >= 0)
  1755         c = CHARSET_CODE_OFFSET (charset) + code_index;
  1756     }
  1757 
  1758   if (method == CHARSET_METHOD_SUBSET)
  1759     {
  1760       Lisp_Object subset_info;
  1761       struct charset *this_charset;
  1762 
  1763       subset_info = CHARSET_SUBSET (charset);
  1764       this_charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
  1765       code = ENCODE_CHAR (this_charset, c);
  1766       if (code == CHARSET_INVALID_CODE (this_charset)
  1767           || code < XFIXNAT (AREF (subset_info, 1))
  1768           || code > XFIXNAT (AREF (subset_info, 2)))
  1769         return CHARSET_INVALID_CODE (charset);
  1770       code += XFIXNUM (AREF (subset_info, 3));
  1771       return code;
  1772     }
  1773 
  1774   if (method == CHARSET_METHOD_SUPERSET)
  1775     {
  1776       Lisp_Object parents;
  1777 
  1778       parents = CHARSET_SUPERSET (charset);
  1779       for (; CONSP (parents); parents = XCDR (parents))
  1780         {
  1781           int id = XFIXNUM (XCAR (XCAR (parents)));
  1782           int code_offset = XFIXNUM (XCDR (XCAR (parents)));
  1783           struct charset *this_charset = CHARSET_FROM_ID (id);
  1784 
  1785           code = ENCODE_CHAR (this_charset, c);
  1786           if (code != CHARSET_INVALID_CODE (this_charset))
  1787             return code + code_offset;
  1788         }
  1789       return CHARSET_INVALID_CODE (charset);
  1790     }
  1791 
  1792   if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
  1793       || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
  1794     return CHARSET_INVALID_CODE (charset);
  1795 
  1796   if (method == CHARSET_METHOD_MAP)
  1797     {
  1798       Lisp_Object encoder;
  1799       Lisp_Object val;
  1800 
  1801       encoder = CHARSET_ENCODER (charset);
  1802       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
  1803         {
  1804           load_charset (charset, 2);
  1805           encoder = CHARSET_ENCODER (charset);
  1806         }
  1807       if (CHAR_TABLE_P (encoder))
  1808         {
  1809           val = CHAR_TABLE_REF (encoder, c);
  1810           if (NILP (val))
  1811             return CHARSET_INVALID_CODE (charset);
  1812           code = XFIXNUM (val);
  1813           if (! CHARSET_COMPACT_CODES_P (charset))
  1814             code = INDEX_TO_CODE_POINT (charset, code);
  1815         }
  1816       else
  1817         {
  1818           code = GET_TEMP_CHARSET_WORK_ENCODER (c);
  1819           code = INDEX_TO_CODE_POINT (charset, code);
  1820         }
  1821     }
  1822   else                          /* method == CHARSET_METHOD_OFFSET */
  1823     {
  1824       unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
  1825 
  1826       code = INDEX_TO_CODE_POINT (charset, code_index);
  1827     }
  1828 
  1829   return code;
  1830 }
  1831 
  1832 
  1833 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
  1834        doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
  1835 Return nil if CODE-POINT is not valid in CHARSET.
  1836 
  1837 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE),
  1838 although this usage is obsolescent.  */)
  1839   (Lisp_Object charset, Lisp_Object code_point)
  1840 {
  1841   int c, id;
  1842   unsigned code;
  1843   struct charset *charsetp;
  1844 
  1845   CHECK_CHARSET_GET_ID (charset, id);
  1846   code = cons_to_unsigned (code_point, UINT_MAX);
  1847   charsetp = CHARSET_FROM_ID (id);
  1848   c = DECODE_CHAR (charsetp, code);
  1849   return (c >= 0 ? make_fixnum (c) : Qnil);
  1850 }
  1851 
  1852 
  1853 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
  1854        doc: /* Encode the character CH into a code-point of CHARSET.
  1855 Return the encoded code-point as an integer,
  1856 or nil if CHARSET doesn't support CH.  */)
  1857   (Lisp_Object ch, Lisp_Object charset)
  1858 {
  1859   int c, id;
  1860   unsigned code;
  1861   struct charset *charsetp;
  1862 
  1863   CHECK_CHARSET_GET_ID (charset, id);
  1864   CHECK_CHARACTER (ch);
  1865   c = XFIXNAT (ch);
  1866   charsetp = CHARSET_FROM_ID (id);
  1867   code = ENCODE_CHAR (charsetp, c);
  1868   if (code == CHARSET_INVALID_CODE (charsetp))
  1869     return Qnil;
  1870   /* There are much fewer codepoints in the world than we have positive
  1871      fixnums, so it could be argued that we never really need a bignum,
  1872      e.g. Unicode codepoints only need 21bit, and China's GB-10830
  1873      can fit in 22bit.  Yet we encode GB-10830's chars in a sparse way
  1874      (we just take the 4byte sequences as a 32bit int), so some
  1875      GB-10830 chars (such as 0x81308130 in etc/charsets/gb108304.map) end
  1876      up represented as bignums if EMACS_INT is 32 bits.  */
  1877   return INT_TO_INTEGER (code);
  1878 }
  1879 
  1880 
  1881 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
  1882        doc:
  1883        /* Return a character of CHARSET whose position codes are CODEn.
  1884 
  1885 CODE1 through CODE4 are optional, but if you don't supply sufficient
  1886 position codes, it is assumed that the minimum code in each dimension
  1887 is specified.  */)
  1888   (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
  1889 {
  1890   int id, dimension;
  1891   struct charset *charsetp;
  1892   unsigned code;
  1893   int c;
  1894 
  1895   CHECK_CHARSET_GET_ID (charset, id);
  1896   charsetp = CHARSET_FROM_ID (id);
  1897 
  1898   dimension = CHARSET_DIMENSION (charsetp);
  1899   if (NILP (code1))
  1900     code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
  1901             ? 0 : CHARSET_MIN_CODE (charsetp));
  1902   else
  1903     {
  1904       CHECK_FIXNAT (code1);
  1905       if (XFIXNAT (code1) >= 0x100)
  1906         args_out_of_range (make_fixnum (0xFF), code1);
  1907       code = XFIXNAT (code1);
  1908 
  1909       if (dimension > 1)
  1910         {
  1911           code <<= 8;
  1912           if (NILP (code2))
  1913             code |= charsetp->code_space[(dimension - 2) * 4];
  1914           else
  1915             {
  1916               CHECK_FIXNAT (code2);
  1917               if (XFIXNAT (code2) >= 0x100)
  1918                 args_out_of_range (make_fixnum (0xFF), code2);
  1919               code |= XFIXNAT (code2);
  1920             }
  1921 
  1922           if (dimension > 2)
  1923             {
  1924               code <<= 8;
  1925               if (NILP (code3))
  1926                 code |= charsetp->code_space[(dimension - 3) * 4];
  1927               else
  1928                 {
  1929                   CHECK_FIXNAT (code3);
  1930                   if (XFIXNAT (code3) >= 0x100)
  1931                     args_out_of_range (make_fixnum (0xFF), code3);
  1932                   code |= XFIXNAT (code3);
  1933                 }
  1934 
  1935               if (dimension > 3)
  1936                 {
  1937                   code <<= 8;
  1938                   if (NILP (code4))
  1939                     code |= charsetp->code_space[0];
  1940                   else
  1941                     {
  1942                       CHECK_FIXNAT (code4);
  1943                       if (XFIXNAT (code4) >= 0x100)
  1944                         args_out_of_range (make_fixnum (0xFF), code4);
  1945                       code |= XFIXNAT (code4);
  1946                     }
  1947                 }
  1948             }
  1949         }
  1950     }
  1951 
  1952   if (CHARSET_ISO_FINAL (charsetp) >= 0)
  1953     code &= 0x7F7F7F7F;
  1954   c = DECODE_CHAR (charsetp, code);
  1955   if (c < 0)
  1956     error ("Invalid code(s)");
  1957   return make_fixnum (c);
  1958 }
  1959 
  1960 
  1961 /* Return the first charset in CHARSET_LIST that contains C.
  1962    CHARSET_LIST is a list of charset IDs.  If it is nil, use
  1963    Vcharset_ordered_list.  */
  1964 
  1965 struct charset *
  1966 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
  1967 {
  1968   bool maybe_null = 0;
  1969 
  1970   if (NILP (charset_list))
  1971     charset_list = Vcharset_ordered_list;
  1972   else
  1973     maybe_null = 1;
  1974 
  1975   while (CONSP (charset_list))
  1976     {
  1977       struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
  1978       unsigned code = ENCODE_CHAR (charset, c);
  1979 
  1980       if (code != CHARSET_INVALID_CODE (charset))
  1981         {
  1982           if (code_return)
  1983             *code_return = code;
  1984           return charset;
  1985         }
  1986       charset_list = XCDR (charset_list);
  1987       if (! maybe_null
  1988           && c <= MAX_UNICODE_CHAR
  1989           && EQ (charset_list, Vcharset_non_preferred_head))
  1990         return CHARSET_FROM_ID (charset_unicode);
  1991     }
  1992   return (maybe_null ? NULL
  1993           : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
  1994           : CHARSET_FROM_ID (charset_eight_bit));
  1995 }
  1996 
  1997 
  1998 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
  1999        doc:
  2000        /*Return list of charset and one to four position-codes of CH.
  2001 The charset is decided by the current priority order of charsets.
  2002 A position-code is a byte value of each dimension of the code-point of
  2003 CH in the charset.  */)
  2004   (Lisp_Object ch)
  2005 {
  2006   struct charset *charset;
  2007   int c, dimension;
  2008   unsigned code;
  2009   Lisp_Object val;
  2010 
  2011   CHECK_CHARACTER (ch);
  2012   c = XFIXNAT (ch);
  2013   charset = CHAR_CHARSET (c);
  2014   if (! charset)
  2015     emacs_abort ();
  2016   code = ENCODE_CHAR (charset, c);
  2017   if (code == CHARSET_INVALID_CODE (charset))
  2018     emacs_abort ();
  2019   dimension = CHARSET_DIMENSION (charset);
  2020   for (val = Qnil; dimension > 0; dimension--)
  2021     {
  2022       val = Fcons (make_fixnum (code & 0xFF), val);
  2023       code >>= 8;
  2024     }
  2025   return Fcons (CHARSET_NAME (charset), val);
  2026 }
  2027 
  2028 
  2029 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
  2030        doc: /* Return the charset of highest priority that contains CH.
  2031 ASCII characters are an exception: for them, this function always
  2032 returns `ascii'.
  2033 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
  2034 from which to find the charset.  It may also be a coding system.  In
  2035 that case, find the charset from what supported by that coding system.  */)
  2036   (Lisp_Object ch, Lisp_Object restriction)
  2037 {
  2038   struct charset *charset;
  2039 
  2040   CHECK_CHARACTER (ch);
  2041   if (NILP (restriction))
  2042     charset = CHAR_CHARSET (XFIXNUM (ch));
  2043   else
  2044     {
  2045       if (CONSP (restriction))
  2046         {
  2047           int c = XFIXNAT (ch);
  2048 
  2049           for (; CONSP (restriction); restriction = XCDR (restriction))
  2050             {
  2051               struct charset *rcharset;
  2052 
  2053               CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
  2054               if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
  2055                 return XCAR (restriction);
  2056             }
  2057           return Qnil;
  2058         }
  2059       restriction = coding_system_charset_list (restriction);
  2060       charset = char_charset (XFIXNUM (ch), restriction, NULL);
  2061       if (! charset)
  2062         return Qnil;
  2063     }
  2064   return (CHARSET_NAME (charset));
  2065 }
  2066 
  2067 
  2068 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
  2069        doc: /*
  2070 Return charset of a character in the current buffer at position POS.
  2071 If POS is nil, it defaults to the current point.
  2072 If POS is out of range, the value is nil.  */)
  2073   (Lisp_Object pos)
  2074 {
  2075   Lisp_Object ch;
  2076   struct charset *charset;
  2077 
  2078   ch = Fchar_after (pos);
  2079   if (! FIXNUMP (ch))
  2080     return ch;
  2081   charset = CHAR_CHARSET (XFIXNUM (ch));
  2082   return (CHARSET_NAME (charset));
  2083 }
  2084 
  2085 
  2086 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
  2087        doc: /*
  2088 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
  2089 
  2090 ISO 2022's designation sequence (escape sequence) distinguishes charsets
  2091 by their DIMENSION, CHARS, and FINAL-CHAR,
  2092 whereas Emacs distinguishes them by charset symbol.
  2093 See the documentation of the function `charset-info' for the meanings of
  2094 DIMENSION, CHARS, and FINAL-CHAR.  */)
  2095   (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
  2096 {
  2097   bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
  2098   int id = ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag,
  2099                               XFIXNAT (final_char));
  2100   return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
  2101 }
  2102 
  2103 
  2104 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
  2105        0, 0, 0,
  2106        doc: /*
  2107 Internal use only.
  2108 Clear temporary charset mapping tables.
  2109 It should be called only from temacs invoked for dumping.  */)
  2110   (void)
  2111 {
  2112   if (temp_charset_work)
  2113     {
  2114       xfree (temp_charset_work);
  2115       temp_charset_work = NULL;
  2116     }
  2117 
  2118   if (CHAR_TABLE_P (Vchar_unify_table))
  2119     Foptimize_char_table (Vchar_unify_table, Qnil);
  2120 
  2121   return Qnil;
  2122 }
  2123 
  2124 DEFUN ("charset-priority-list", Fcharset_priority_list,
  2125        Scharset_priority_list, 0, 1, 0,
  2126        doc: /* Return the list of charsets ordered by priority.
  2127 HIGHESTP non-nil means just return the highest priority one.  */)
  2128   (Lisp_Object highestp)
  2129 {
  2130   Lisp_Object val = Qnil, list = Vcharset_ordered_list;
  2131 
  2132   if (!NILP (highestp))
  2133     return CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (Fcar (list))));
  2134 
  2135   while (!NILP (list))
  2136     {
  2137       val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (XCAR (list)))), val);
  2138       list = XCDR (list);
  2139     }
  2140   return Fnreverse (val);
  2141 }
  2142 
  2143 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
  2144        1, MANY, 0,
  2145        doc: /* Assign higher priority to the charsets given as arguments.
  2146 usage: (set-charset-priority &rest charsets)  */)
  2147   (ptrdiff_t nargs, Lisp_Object *args)
  2148 {
  2149   Lisp_Object new_head, old_list;
  2150   Lisp_Object list_2022, list_emacs_mule;
  2151   ptrdiff_t i;
  2152   int id;
  2153 
  2154   old_list = Fcopy_sequence (Vcharset_ordered_list);
  2155   new_head = Qnil;
  2156   for (i = 0; i < nargs; i++)
  2157     {
  2158       CHECK_CHARSET_GET_ID (args[i], id);
  2159       if (! NILP (Fmemq (make_fixnum (id), old_list)))
  2160         {
  2161           old_list = Fdelq (make_fixnum (id), old_list);
  2162           new_head = Fcons (make_fixnum (id), new_head);
  2163         }
  2164     }
  2165   Vcharset_non_preferred_head = old_list;
  2166   Vcharset_ordered_list = nconc2 (Fnreverse (new_head), old_list);
  2167 
  2168   charset_ordered_list_tick++;
  2169 
  2170   charset_unibyte = -1;
  2171   for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
  2172        CONSP (old_list); old_list = XCDR (old_list))
  2173     {
  2174       if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
  2175         list_2022 = Fcons (XCAR (old_list), list_2022);
  2176       if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
  2177         list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
  2178       if (charset_unibyte < 0)
  2179         {
  2180           struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (old_list)));
  2181 
  2182           if (CHARSET_DIMENSION (charset) == 1
  2183               && CHARSET_ASCII_COMPATIBLE_P (charset)
  2184               && CHARSET_MAX_CHAR (charset) >= 0x80)
  2185             charset_unibyte = CHARSET_ID (charset);
  2186         }
  2187     }
  2188   Viso_2022_charset_list = Fnreverse (list_2022);
  2189   Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
  2190   if (charset_unibyte < 0)
  2191     charset_unibyte = charset_iso_8859_1;
  2192 
  2193   return Qnil;
  2194 }
  2195 
  2196 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
  2197        0, 1, 0,
  2198        doc: /* Internal use only.
  2199 Return charset identification number of CHARSET.  */)
  2200   (Lisp_Object charset)
  2201 {
  2202   int id;
  2203 
  2204   CHECK_CHARSET_GET_ID (charset, id);
  2205   return make_fixnum (id);
  2206 }
  2207 
  2208 struct charset_sort_data
  2209 {
  2210   Lisp_Object charset;
  2211   int id;
  2212   ptrdiff_t priority;
  2213 };
  2214 
  2215 static int
  2216 charset_compare (const void *d1, const void *d2)
  2217 {
  2218   const struct charset_sort_data *data1 = d1, *data2 = d2;
  2219   if (data1->priority != data2->priority)
  2220     return data1->priority < data2->priority ? -1 : 1;
  2221   return 0;
  2222 }
  2223 
  2224 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
  2225        doc: /* Sort charset list CHARSETS by a priority of each charset.
  2226 Return the sorted list.  CHARSETS is modified by side effects.
  2227 See also `charset-priority-list' and `set-charset-priority'.  */)
  2228      (Lisp_Object charsets)
  2229 {
  2230   ptrdiff_t n = list_length (charsets), i, j;
  2231   int done;
  2232   Lisp_Object tail, elt, attrs;
  2233   struct charset_sort_data *sort_data;
  2234   int id, min_id = INT_MAX, max_id = INT_MIN;
  2235   USE_SAFE_ALLOCA;
  2236 
  2237   if (n == 0)
  2238     return Qnil;
  2239   SAFE_NALLOCA (sort_data, 1, n);
  2240   for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
  2241     {
  2242       elt = XCAR (tail);
  2243       CHECK_CHARSET_GET_ATTR (elt, attrs);
  2244       sort_data[i].charset = elt;
  2245       sort_data[i].id = id = XFIXNUM (CHARSET_ATTR_ID (attrs));
  2246       if (id < min_id)
  2247         min_id = id;
  2248       if (id > max_id)
  2249         max_id = id;
  2250     }
  2251   for (done = 0, tail = Vcharset_ordered_list, i = 0;
  2252        done < n && CONSP (tail); tail = XCDR (tail), i++)
  2253     {
  2254       elt = XCAR (tail);
  2255       id = XFIXNAT (elt);
  2256       if (id >= min_id && id <= max_id)
  2257         for (j = 0; j < n; j++)
  2258           if (sort_data[j].id == id)
  2259             {
  2260               sort_data[j].priority = i;
  2261               done++;
  2262             }
  2263     }
  2264   qsort (sort_data, n, sizeof *sort_data, charset_compare);
  2265   for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
  2266     XSETCAR (tail, sort_data[i].charset);
  2267   SAFE_FREE ();
  2268   return charsets;
  2269 }
  2270 
  2271 
  2272 void
  2273 init_charset (void)
  2274 {
  2275   Lisp_Object tempdir;
  2276   tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
  2277   if (! file_accessible_directory_p (tempdir))
  2278     {
  2279       /* This used to be non-fatal (dir_warning), but it should not
  2280          happen, and if it does sooner or later it will cause some
  2281          obscure problem (eg bug#6401), so better exit.  */
  2282       fprintf (stderr,
  2283                ("Error: %s: %s\n"
  2284                 "Emacs will not function correctly "
  2285                 "without the character map files.\n"
  2286                 "%s"
  2287                 "Please check your installation!\n"),
  2288                SDATA (tempdir), strerror (errno),
  2289                (egetenv ("EMACSDATA")
  2290                 ? ("The EMACSDATA environment variable is set.  "
  2291                    "Maybe it has the wrong value?\n")
  2292                 : ""));
  2293       exit (1);
  2294     }
  2295 
  2296   Vcharset_map_path = list1 (tempdir);
  2297 }
  2298 
  2299 
  2300 void
  2301 init_charset_once (void)
  2302 {
  2303   int i, j, k;
  2304 
  2305   for (i = 0; i < ISO_MAX_DIMENSION; i++)
  2306     for (j = 0; j < ISO_MAX_CHARS; j++)
  2307       for (k = 0; k < ISO_MAX_FINAL; k++)
  2308         iso_charset_table[i][j][k] = -1;
  2309 
  2310   PDUMPER_REMEMBER_SCALAR (iso_charset_table);
  2311 
  2312   for (i = 0; i < 256; i++)
  2313     emacs_mule_charset[i] = -1;
  2314 
  2315   PDUMPER_REMEMBER_SCALAR (emacs_mule_charset);
  2316 
  2317   charset_jisx0201_roman = -1;
  2318   PDUMPER_REMEMBER_SCALAR (charset_jisx0201_roman);
  2319 
  2320   charset_jisx0208_1978 = -1;
  2321   PDUMPER_REMEMBER_SCALAR (charset_jisx0208_1978);
  2322 
  2323   charset_jisx0208 = -1;
  2324   PDUMPER_REMEMBER_SCALAR (charset_jisx0208);
  2325 
  2326   charset_ksc5601 = -1;
  2327   PDUMPER_REMEMBER_SCALAR (charset_ksc5601);
  2328 }
  2329 
  2330 /* Allocate an initial charset table that is large enough to handle
  2331    Emacs while it is bootstrapping.  As of September 2011, the size
  2332    needs to be at least 166; make it a bit bigger to allow for future
  2333    expansion.
  2334 
  2335    Don't make the value so small that the table is reallocated during
  2336    bootstrapping, as glibc malloc calls larger than just under 64 KiB
  2337    during an initial bootstrap wreak havoc after dumping; see the
  2338    M_MMAP_THRESHOLD value in alloc.c, plus there is an extra overhead
  2339    internal to glibc malloc and perhaps to Emacs malloc debugging.  */
  2340 static struct charset charset_table_init[180];
  2341 
  2342 void
  2343 syms_of_charset (void)
  2344 {
  2345   DEFSYM (Qcharsetp, "charsetp");
  2346 
  2347   /* Special charset symbols.  */
  2348   DEFSYM (Qascii, "ascii");
  2349   DEFSYM (Qunicode, "unicode");
  2350   DEFSYM (Qemacs, "emacs");
  2351   DEFSYM (Qeight_bit, "eight-bit");
  2352   DEFSYM (Qiso_8859_1, "iso-8859-1");
  2353 
  2354   staticpro (&Vcharset_ordered_list);
  2355   Vcharset_ordered_list = Qnil;
  2356 
  2357   staticpro (&Viso_2022_charset_list);
  2358   Viso_2022_charset_list = Qnil;
  2359 
  2360   staticpro (&Vemacs_mule_charset_list);
  2361   Vemacs_mule_charset_list = Qnil;
  2362 
  2363   staticpro (&Vcharset_hash_table);
  2364   Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
  2365 
  2366   charset_table = charset_table_init;
  2367   charset_table_size = ARRAYELTS (charset_table_init);
  2368   PDUMPER_REMEMBER_SCALAR (charset_table_size);
  2369   charset_table_used = 0;
  2370   PDUMPER_REMEMBER_SCALAR (charset_table_used);
  2371 
  2372   defsubr (&Scharsetp);
  2373   defsubr (&Smap_charset_chars);
  2374   defsubr (&Sdefine_charset_internal);
  2375   defsubr (&Sdefine_charset_alias);
  2376   defsubr (&Scharset_plist);
  2377   defsubr (&Sset_charset_plist);
  2378   defsubr (&Sunify_charset);
  2379   defsubr (&Sget_unused_iso_final_char);
  2380   defsubr (&Sdeclare_equiv_charset);
  2381   defsubr (&Sfind_charset_region);
  2382   defsubr (&Sfind_charset_string);
  2383   defsubr (&Sdecode_char);
  2384   defsubr (&Sencode_char);
  2385   defsubr (&Ssplit_char);
  2386   defsubr (&Smake_char);
  2387   defsubr (&Schar_charset);
  2388   defsubr (&Scharset_after);
  2389   defsubr (&Siso_charset);
  2390   defsubr (&Sclear_charset_maps);
  2391   defsubr (&Scharset_priority_list);
  2392   defsubr (&Sset_charset_priority);
  2393   defsubr (&Scharset_id_internal);
  2394   defsubr (&Ssort_charsets);
  2395 
  2396   DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
  2397                doc: /* List of directories to search for charset map files.  */);
  2398   Vcharset_map_path = Qnil;
  2399 
  2400   DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
  2401                doc: /* Inhibit loading of charset maps.  Used when dumping Emacs.  */);
  2402   inhibit_load_charset_map = 0;
  2403 
  2404   DEFVAR_LISP ("charset-list", Vcharset_list,
  2405                doc: /* List of all charsets ever defined.  */);
  2406   Vcharset_list = Qnil;
  2407 
  2408   DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
  2409                doc: /* ISO639 language mnemonic symbol for the current language environment.
  2410 If the current language environment is for multiple languages (e.g. "Latin-1"),
  2411 the value may be a list of mnemonics.  */);
  2412   Vcurrent_iso639_language = Qnil;
  2413 
  2414   charset_ascii
  2415     = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
  2416                                0, 127, 'B', -1, 0, 1, 0, 0);
  2417   PDUMPER_REMEMBER_SCALAR (charset_ascii);
  2418 
  2419   charset_iso_8859_1
  2420     = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
  2421                                0, 255, -1, -1, -1, 1, 0, 0);
  2422   PDUMPER_REMEMBER_SCALAR (charset_iso_8859_1);
  2423 
  2424   charset_unicode
  2425     = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
  2426                                0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
  2427   PDUMPER_REMEMBER_SCALAR (charset_unicode);
  2428 
  2429   charset_emacs
  2430     = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
  2431                                0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
  2432   PDUMPER_REMEMBER_SCALAR (charset_emacs);
  2433 
  2434   charset_eight_bit
  2435     = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
  2436                                128, 255, -1, 0, -1, 0, 1,
  2437                                MAX_5_BYTE_CHAR + 1);
  2438   PDUMPER_REMEMBER_SCALAR (charset_eight_bit);
  2439 
  2440   charset_unibyte = charset_iso_8859_1;
  2441   PDUMPER_REMEMBER_SCALAR (charset_unibyte);
  2442 }

/* [<][>][^][v][top][bottom][index][help] */