root/src/charset.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. load_charset_map
  2. read_hex
  3. load_charset_map_from_file
  4. load_charset_map_from_vector
  5. load_charset
  6. DEFUN
  7. map_charset_for_dump
  8. map_charset_chars
  9. define_charset_internal
  10. DEFUN
  11. check_iso_charset_parameter
  12. string_xstring_p
  13. find_charsets_in_text
  14. maybe_unify_char
  15. decode_char
  16. encode_char
  17. char_charset
  18. DEFUN
  19. DEFUN
  20. DEFUN
  21. DEFUN
  22. DEFUN
  23. charset_compare
  24. DEFUN
  25. init_charset
  26. init_charset_once
  27. syms_of_charset

     1 /* Basic character set support.
     2 
     3 Copyright (C) 2001-2023 Free Software Foundation, Inc.
     4 
     5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
     6   2005, 2006, 2007, 2008, 2009, 2010, 2011
     7   National Institute of Advanced Industrial Science and Technology (AIST)
     8   Registration Number H14PRO021
     9 
    10 Copyright (C) 2003, 2004
    11   National Institute of Advanced Industrial Science and Technology (AIST)
    12   Registration Number H13PRO009
    13 
    14 This file is part of GNU Emacs.
    15 
    16 GNU Emacs is free software: you can redistribute it and/or modify
    17 it under the terms of the GNU General Public License as published by
    18 the Free Software Foundation, either version 3 of the License, or (at
    19 your option) any later version.
    20 
    21 GNU Emacs is distributed in the hope that it will be useful,
    22 but WITHOUT ANY WARRANTY; without even the implied warranty of
    23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    24 GNU General Public License for more details.
    25 
    26 You should have received a copy of the GNU General Public License
    27 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    28 
    29 #include <config.h>
    30 
    31 #include <errno.h>
    32 #include <stdlib.h>
    33 #include <unistd.h>
    34 #include <limits.h>
    35 #include <sys/types.h>
    36 #include "lisp.h"
    37 #include "character.h"
    38 #include "charset.h"
    39 #include "coding.h"
    40 #include "buffer.h"
    41 #include "sysstdio.h"
    42 #include "pdumper.h"
    43 
    44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
    45 
    46   A coded character set ("charset" hereafter) is a meaningful
    47   collection (i.e. language, culture, functionality, etc.) of
    48   characters.  Emacs handles multiple charsets at once.  In Emacs Lisp
    49   code, a charset is represented by a symbol.  In C code, a charset is
    50   represented by its ID number or by a pointer to a struct charset.
    51 
    52   The actual information about each charset is stored in two places.
    53   Lispy information is stored in the hash table Vcharset_hash_table as
    54   a vector (charset attributes).  The other information is stored in
    55   charset_table as a struct charset.
    56 
    57 */
    58 
    59 /* Hash table that contains attributes of each charset.  Keys are
    60    charset symbols, and values are vectors of charset attributes.  */
    61 Lisp_Object Vcharset_hash_table;
    62 
    63 /* Table of struct charset.  */
    64 struct charset *charset_table;
    65 int charset_table_size;
    66 int charset_table_used;
    67 
    68 /* Special charsets corresponding to symbols.  */
    69 int charset_ascii;
    70 int charset_eight_bit;
    71 static int charset_iso_8859_1;
    72 int charset_unicode;
    73 static int charset_emacs;
    74 
    75 /* The other special charsets.  */
    76 int charset_jisx0201_roman;
    77 int charset_jisx0208_1978;
    78 int charset_jisx0208;
    79 int charset_ksc5601;
    80 
    81 /* Charset of unibyte characters.  */
    82 int charset_unibyte;
    83 
    84 /* List of charsets ordered by the priority.  */
    85 Lisp_Object Vcharset_ordered_list;
    86 
    87 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
    88    charsets.  */
    89 Lisp_Object Vcharset_non_preferred_head;
    90 
    91 /* Incremented every time we change the priority of charsets.
    92    Wraps around.  */
    93 EMACS_UINT charset_ordered_list_tick;
    94 
    95 /* List of iso-2022 charsets.  */
    96 Lisp_Object Viso_2022_charset_list;
    97 
    98 /* List of emacs-mule charsets.  */
    99 Lisp_Object Vemacs_mule_charset_list;
   100 
   101 int emacs_mule_charset[256];
   102 
   103 /* Mapping table from ISO2022's charset (specified by DIMENSION,
   104    CHARS, and FINAL-CHAR) to Emacs' charset.  */
   105 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
   106 
   107 #define CODE_POINT_TO_INDEX(charset, code)                              \
   108   ((charset)->code_linear_p                                             \
   109    ? (int) ((code) - (charset)->min_code)                               \
   110    : (((charset)->code_space_mask[(code) >> 24] & 0x8)                  \
   111       && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)      \
   112       && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)       \
   113       && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))             \
   114    ? (int) (((((code) >> 24) - (charset)->code_space[12])               \
   115              * (charset)->code_space[11])                               \
   116             + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])     \
   117                * (charset)->code_space[7])                              \
   118             + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])      \
   119                * (charset)->code_space[3])                              \
   120             + (((code) & 0xFF) - (charset)->code_space[0])              \
   121             - ((charset)->char_index_offset))                           \
   122    : -1)
   123 
   124 
   125 /* Return the code-point for the character index IDX in CHARSET.
   126    IDX should be an unsigned int variable in a valid range (which is
   127    always in nonnegative int range too).  IDX contains garbage afterwards.  */
   128 
   129 #define INDEX_TO_CODE_POINT(charset, idx)                                    \
   130   ((charset)->code_linear_p                                                  \
   131    ? (idx) + (charset)->min_code                                             \
   132    : (idx += (charset)->char_index_offset,                                   \
   133       (((charset)->code_space[0] + (idx) % (charset)->code_space[2])         \
   134        | (((charset)->code_space[4]                                          \
   135            + ((idx) / (charset)->code_space[3] % (charset)->code_space[6]))  \
   136           << 8)                                                              \
   137        | (((charset)->code_space[8]                                          \
   138            + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
   139           << 16)                                                             \
   140        | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11]))  \
   141           << 24))))
   142 
   143 /* Structure to hold mapping tables for a charset.  Used by temacs
   144    invoked for dumping.  */
   145 
   146 static struct
   147 {
   148   /* The current charset for which the following tables are setup.  */
   149   struct charset *current;
   150 
   151   /* 1 iff the following table is used for encoder.  */
   152   short for_encoder;
   153 
   154   /* When the following table is used for encoding, minimum and
   155      maximum character of the current charset.  */
   156   int min_char, max_char;
   157 
   158   /* A Unicode character corresponding to the code index 0 (i.e. the
   159      minimum code-point) of the current charset, or -1 if the code
   160      index 0 is not a Unicode character.  This is checked when
   161      table.encoder[CHAR] is zero.  */
   162   int zero_index_char;
   163 
   164   union {
   165     /* Table mapping code-indices (not code-points) of the current
   166        charset to Unicode characters.  If decoder[CHAR] is -1, CHAR
   167        doesn't belong to the current charset.  */
   168     int decoder[0x10000];
   169     /* Table mapping Unicode characters to code-indices of the current
   170        charset.  The first 0x10000 elements are for BMP (0..0xFFFF),
   171        and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
   172        (0x20000..0x2FFFF).  Note that there is no charset map that
   173        uses both SMP and SIP.  */
   174     unsigned short encoder[0x20000];
   175   } table;
   176 } *temp_charset_work;
   177 
   178 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE)                  \
   179   do {                                                          \
   180     if ((CODE) == 0)                                            \
   181       temp_charset_work->zero_index_char = (C);                 \
   182     else if ((C) < 0x20000)                                     \
   183       temp_charset_work->table.encoder[(C)] = (CODE);           \
   184     else                                                        \
   185       temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
   186   } while (0)
   187 
   188 #define GET_TEMP_CHARSET_WORK_ENCODER(C)                                  \
   189   ((C) == temp_charset_work->zero_index_char ? 0                          \
   190    : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)]               \
   191                       ? (int) temp_charset_work->table.encoder[(C)] : -1) \
   192    : temp_charset_work->table.encoder[(C) - 0x10000]                      \
   193    ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
   194 
   195 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE)  \
   196   (temp_charset_work->table.decoder[(CODE)] = (C))
   197 
   198 #define GET_TEMP_CHARSET_WORK_DECODER(CODE)     \
   199   (temp_charset_work->table.decoder[(CODE)])
   200 
   201 
   202 /* Set to 1 to warn that a charset map is loaded and thus a buffer
   203    text and a string data may be relocated.  */
   204 bool charset_map_loaded;
   205 
   206 struct charset_map_entries
   207 {
   208   struct {
   209     unsigned from, to;
   210     int c;
   211   } entry[0x10000];
   212   struct charset_map_entries *next;
   213 };
   214 
   215 /* Load the mapping information of CHARSET from ENTRIES for
   216    initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
   217    encoding (CONTROL_FLAG == 2).
   218 
   219    If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
   220    and CHARSET->fast_map.
   221 
   222    If CONTROL_FLAG is 1, setup the following tables according to
   223    CHARSET->method and inhibit_load_charset_map.
   224 
   225    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
   226    ----------------------+--------------------+---------------------------
   227    CHARSET_METHOD_MAP    | CHARSET->decoder   | temp_charset_work->decoder
   228    ----------------------+--------------------+---------------------------
   229    CHARSET_METHOD_OFFSET | Vchar_unify_table  | temp_charset_work->decoder
   230 
   231    If CONTROL_FLAG is 2, setup the following tables.
   232 
   233    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
   234    ----------------------+--------------------+---------------------------
   235    CHARSET_METHOD_MAP    | CHARSET->encoder   | temp_charset_work->encoder
   236    ----------------------+--------------------+--------------------------
   237    CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
   238 */
   239 
   240 static void
   241 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
   242 {
   243   Lisp_Object vec UNINIT;
   244   Lisp_Object table UNINIT;
   245   unsigned max_code = CHARSET_MAX_CODE (charset);
   246   bool ascii_compatible_p = charset->ascii_compatible_p;
   247   int min_char, max_char, nonascii_min_char;
   248   int i;
   249   unsigned char *fast_map = charset->fast_map;
   250 
   251   if (n_entries <= 0)
   252     return;
   253 
   254   if (control_flag)
   255     {
   256       if (! inhibit_load_charset_map)
   257         {
   258           if (control_flag == 1)
   259             {
   260               if (charset->method == CHARSET_METHOD_MAP)
   261                 {
   262                   int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
   263 
   264                   vec = make_vector (n, make_fixnum (-1));
   265                   set_charset_attr (charset, charset_decoder, vec);
   266                 }
   267               else
   268                 {
   269                   char_table_set_range (Vchar_unify_table,
   270                                         charset->min_char, charset->max_char,
   271                                         Qnil);
   272                 }
   273             }
   274           else
   275             {
   276               table = Fmake_char_table (Qnil, Qnil);
   277               set_charset_attr (charset,
   278                                 (charset->method == CHARSET_METHOD_MAP
   279                                  ? charset_encoder : charset_deunifier),
   280                                 table);
   281             }
   282         }
   283       else
   284         {
   285           if (! temp_charset_work)
   286             temp_charset_work = xmalloc (sizeof *temp_charset_work);
   287           if (control_flag == 1)
   288             {
   289               memset (temp_charset_work->table.decoder, -1,
   290                       sizeof (int) * 0x10000);
   291             }
   292           else
   293             {
   294               memset (temp_charset_work->table.encoder, 0,
   295                       sizeof (unsigned short) * 0x20000);
   296               temp_charset_work->zero_index_char = -1;
   297             }
   298           temp_charset_work->current = charset;
   299           temp_charset_work->for_encoder = (control_flag == 2);
   300           control_flag += 2;
   301         }
   302       charset_map_loaded = 1;
   303     }
   304 
   305   min_char = max_char = entries->entry[0].c;
   306   nonascii_min_char = MAX_CHAR;
   307   for (i = 0; i < n_entries; i++)
   308     {
   309       unsigned from, to;
   310       int from_index, to_index, lim_index;
   311       int from_c, to_c;
   312       int idx = i % 0x10000;
   313 
   314       if (i > 0 && idx == 0)
   315         entries = entries->next;
   316       from = entries->entry[idx].from;
   317       to = entries->entry[idx].to;
   318       from_c = entries->entry[idx].c;
   319       from_index = CODE_POINT_TO_INDEX (charset, from);
   320       if (from == to)
   321         {
   322           to_index = from_index;
   323           to_c = from_c;
   324         }
   325       else
   326         {
   327           to_index = CODE_POINT_TO_INDEX (charset, to);
   328           to_c = from_c + (to_index - from_index);
   329         }
   330       if (from_index < 0 || to_index < 0)
   331         continue;
   332       lim_index = to_index + 1;
   333 
   334       if (to_c > max_char)
   335         max_char = to_c;
   336       else if (from_c < min_char)
   337         min_char = from_c;
   338 
   339       if (control_flag == 1)
   340         {
   341           if (charset->method == CHARSET_METHOD_MAP)
   342             for (; from_index < lim_index; from_index++, from_c++)
   343               ASET (vec, from_index, make_fixnum (from_c));
   344           else
   345             for (; from_index < lim_index; from_index++, from_c++)
   346               CHAR_TABLE_SET (Vchar_unify_table,
   347                               CHARSET_CODE_OFFSET (charset) + from_index,
   348                               make_fixnum (from_c));
   349         }
   350       else if (control_flag == 2)
   351         {
   352           if (charset->method == CHARSET_METHOD_MAP
   353               && CHARSET_COMPACT_CODES_P (charset))
   354             for (; from_index < lim_index; from_index++, from_c++)
   355               {
   356                 unsigned code = from_index;
   357                 code = INDEX_TO_CODE_POINT (charset, code);
   358 
   359                 if (NILP (CHAR_TABLE_REF (table, from_c)))
   360                   CHAR_TABLE_SET (table, from_c, make_fixnum (code));
   361               }
   362           else
   363             for (; from_index < lim_index; from_index++, from_c++)
   364               {
   365                 if (NILP (CHAR_TABLE_REF (table, from_c)))
   366                   CHAR_TABLE_SET (table, from_c, make_fixnum (from_index));
   367               }
   368         }
   369       else if (control_flag == 3)
   370         for (; from_index < lim_index; from_index++, from_c++)
   371           SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
   372       else if (control_flag == 4)
   373         for (; from_index < lim_index; from_index++, from_c++)
   374           SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
   375       else                      /* control_flag == 0 */
   376         {
   377           if (ascii_compatible_p)
   378             {
   379               if (! ASCII_CHAR_P (from_c))
   380                 {
   381                   if (from_c < nonascii_min_char)
   382                     nonascii_min_char = from_c;
   383                 }
   384               else if (! ASCII_CHAR_P (to_c))
   385                 {
   386                   nonascii_min_char = 0x80;
   387                 }
   388             }
   389 
   390           for (; from_c <= to_c; from_c++)
   391             CHARSET_FAST_MAP_SET (from_c, fast_map);
   392         }
   393     }
   394 
   395   if (control_flag == 0)
   396     {
   397       CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
   398                                     ? nonascii_min_char : min_char);
   399       CHARSET_MAX_CHAR (charset) = max_char;
   400     }
   401   else if (control_flag == 4)
   402     {
   403       temp_charset_work->min_char = min_char;
   404       temp_charset_work->max_char = max_char;
   405     }
   406 }
   407 
   408 
   409 /* Read a hexadecimal number (preceded by "0x") from the file FP while
   410    paying attention to comment character '#'.  LOOKAHEAD is the
   411    lookahead byte if it is nonnegative.  Store into *TERMINATOR the
   412    input byte after the number, or EOF if an end-of-file or input
   413    error occurred.  Set *OVERFLOW if the number overflows.  */
   414 
   415 static unsigned
   416 read_hex (FILE *fp, int lookahead, int *terminator, bool *overflow)
   417 {
   418   int c = lookahead < 0 ? getc (fp) : lookahead;
   419 
   420   while (true)
   421     {
   422       if (c == '#')
   423         do
   424           c = getc (fp);
   425         while (0 <= c && c != '\n');
   426       else if (c == '0')
   427         {
   428           c = getc (fp);
   429           if (c < 0 || c == 'x')
   430             break;
   431         }
   432       if (c < 0)
   433         break;
   434       c = getc (fp);
   435     }
   436 
   437   unsigned n = 0;
   438   bool v = false;
   439 
   440   if (0 <= c)
   441     while (true)
   442       {
   443         c = getc (fp);
   444         int digit = char_hexdigit (c);
   445         if (digit < 0)
   446           break;
   447         v |= INT_LEFT_SHIFT_OVERFLOW (n, 4);
   448         n = (n << 4) + digit;
   449       }
   450 
   451   *terminator = c;
   452   *overflow |= v;
   453   return n;
   454 }
   455 
   456 /* Return a mapping vector for CHARSET loaded from MAPFILE.
   457    Each line of MAPFILE has this form
   458         0xAAAA 0xCCCC
   459    where 0xAAAA is a code-point and 0xCCCC is the corresponding
   460    character code, or this form
   461         0xAAAA-0xBBBB 0xCCCC
   462    where 0xAAAA and 0xBBBB are code-points specifying a range, and
   463    0xCCCC is the first character code of the range.
   464 
   465    The returned vector has this form:
   466         [ CODE1 CHAR1 CODE2 CHAR2 .... ]
   467    where CODE1 is a code-point or a cons of code-points specifying a
   468    range.
   469 
   470    Note that this function uses `openp' to open MAPFILE but ignores
   471    `file-name-handler-alist' to avoid running any Lisp code.  */
   472 
   473 static void
   474 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
   475                             int control_flag)
   476 {
   477   unsigned min_code = CHARSET_MIN_CODE (charset);
   478   unsigned max_code = CHARSET_MAX_CODE (charset);
   479   int fd;
   480   FILE *fp;
   481   struct charset_map_entries *head, *entries;
   482   int n_entries;
   483   AUTO_STRING (map, ".map");
   484   AUTO_STRING (txt, ".txt");
   485   AUTO_LIST2 (suffixes, map, txt);
   486   specpdl_ref count = SPECPDL_INDEX ();
   487   record_unwind_protect_nothing ();
   488   specbind (Qfile_name_handler_alist, Qnil);
   489   fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false, false,
   490               NULL);
   491   fp = fd < 0 ? 0 : emacs_fdopen (fd, "r");
   492   if (!fp)
   493     {
   494       int open_errno = errno;
   495       emacs_close (fd);
   496       report_file_errno ("Loading charset map", mapfile, open_errno);
   497     }
   498   set_unwind_protect_ptr (count, fclose_unwind, fp);
   499   unbind_to (specpdl_ref_add (count, 1), Qnil);
   500 
   501   /* Use record_xmalloc, as `charset_map_entries' is
   502      large (larger than MAX_ALLOCA).  */
   503   head = record_xmalloc (sizeof *head);
   504   entries = head;
   505   memset (entries, 0, sizeof (struct charset_map_entries));
   506 
   507   n_entries = 0;
   508   int ch = -1;
   509   while (true)
   510     {
   511       bool overflow = false;
   512       unsigned from = read_hex (fp, ch, &ch, &overflow), to;
   513       if (ch < 0)
   514         break;
   515       if (ch == '-')
   516         {
   517           to = read_hex (fp, -1, &ch, &overflow);
   518           if (ch < 0)
   519             break;
   520         }
   521       else
   522         {
   523           to = from;
   524           ch = -1;
   525         }
   526       unsigned c = read_hex (fp, ch, &ch, &overflow);
   527       if (ch < 0)
   528         break;
   529 
   530       if (overflow)
   531         continue;
   532       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
   533         continue;
   534 
   535       if (n_entries == 0x10000)
   536         {
   537           entries->next = record_xmalloc (sizeof *entries->next);
   538           entries = entries->next;
   539           memset (entries, 0, sizeof (struct charset_map_entries));
   540           n_entries = 0;
   541         }
   542       int idx = n_entries;
   543       entries->entry[idx].from = from;
   544       entries->entry[idx].to = to;
   545       entries->entry[idx].c = c;
   546       n_entries++;
   547     }
   548   emacs_fclose (fp);
   549   clear_unwind_protect (count);
   550 
   551   load_charset_map (charset, head, n_entries, control_flag);
   552   unbind_to (count, Qnil);
   553 }
   554 
   555 static void
   556 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
   557 {
   558   unsigned min_code = CHARSET_MIN_CODE (charset);
   559   unsigned max_code = CHARSET_MAX_CODE (charset);
   560   struct charset_map_entries *head, *entries;
   561   int n_entries;
   562   int len = ASIZE (vec);
   563   int i;
   564   USE_SAFE_ALLOCA;
   565 
   566   if (len % 2 == 1)
   567     {
   568       add_to_log ("Failure in loading charset map: %V", vec);
   569       return;
   570     }
   571 
   572   /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
   573      large (larger than MAX_ALLOCA).  */
   574   head = SAFE_ALLOCA (sizeof *head);
   575   entries = head;
   576   memset (entries, 0, sizeof (struct charset_map_entries));
   577 
   578   n_entries = 0;
   579   for (i = 0; i < len; i += 2)
   580     {
   581       Lisp_Object val, val2;
   582       unsigned from, to;
   583       EMACS_INT c;
   584       int idx;
   585 
   586       val = AREF (vec, i);
   587       if (CONSP (val))
   588         {
   589           val2 = XCDR (val);
   590           val = XCAR (val);
   591           from = XFIXNAT (val);
   592           to = XFIXNAT (val2);
   593         }
   594       else
   595         from = to = XFIXNAT (val);
   596       val = AREF (vec, i + 1);
   597       CHECK_FIXNAT (val);
   598       c = XFIXNAT (val);
   599 
   600       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
   601         continue;
   602 
   603       if (n_entries > 0 && (n_entries % 0x10000) == 0)
   604         {
   605           entries->next = SAFE_ALLOCA (sizeof *entries->next);
   606           entries = entries->next;
   607           memset (entries, 0, sizeof (struct charset_map_entries));
   608         }
   609       idx = n_entries % 0x10000;
   610       entries->entry[idx].from = from;
   611       entries->entry[idx].to = to;
   612       entries->entry[idx].c = c;
   613       n_entries++;
   614     }
   615 
   616   load_charset_map (charset, head, n_entries, control_flag);
   617   SAFE_FREE ();
   618 }
   619 
   620 
   621 /* Load a mapping table for CHARSET.  CONTROL-FLAG tells what kind of
   622    map it is (see the comment of load_charset_map for the detail).  */
   623 
   624 static void
   625 load_charset (struct charset *charset, int control_flag)
   626 {
   627   Lisp_Object map;
   628 
   629   if (inhibit_load_charset_map
   630       && temp_charset_work
   631       && charset == temp_charset_work->current
   632       && ((control_flag == 2) == temp_charset_work->for_encoder))
   633     return;
   634 
   635   if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
   636     map = CHARSET_MAP (charset);
   637   else
   638     {
   639       if (! CHARSET_UNIFIED_P (charset))
   640         emacs_abort ();
   641       map = CHARSET_UNIFY_MAP (charset);
   642     }
   643   if (STRINGP (map))
   644     load_charset_map_from_file (charset, map, control_flag);
   645   else
   646     load_charset_map_from_vector (charset, map, control_flag);
   647 }
   648 
   649 
   650 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
   651        doc: /* Return non-nil if and only if OBJECT is a charset.*/)
   652   (Lisp_Object object)
   653 {
   654   return (CHARSETP (object) ? Qt : Qnil);
   655 }
   656 
   657 
   658 static void
   659 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
   660                       Lisp_Object function, Lisp_Object arg,
   661                       unsigned int from, unsigned int to)
   662 {
   663   int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
   664   int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
   665   Lisp_Object range = Fcons (Qnil, Qnil);
   666   int c, stop;
   667 
   668   c = temp_charset_work->min_char;
   669   stop = (temp_charset_work->max_char < 0x20000
   670           ? temp_charset_work->max_char : 0xFFFF);
   671 
   672   while (1)
   673     {
   674       int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
   675 
   676       if (idx >= from_idx && idx <= to_idx)
   677         {
   678           if (NILP (XCAR (range)))
   679             XSETCAR (range, make_fixnum (c));
   680         }
   681       else if (! NILP (XCAR (range)))
   682         {
   683           XSETCDR (range, make_fixnum (c - 1));
   684           if (c_function)
   685             (*c_function) (arg, range);
   686           else
   687             call2 (function, range, arg);
   688           XSETCAR (range, Qnil);
   689         }
   690       if (c == stop)
   691         {
   692           if (c == temp_charset_work->max_char)
   693             {
   694               if (! NILP (XCAR (range)))
   695                 {
   696                   XSETCDR (range, make_fixnum (c));
   697                   if (c_function)
   698                     (*c_function) (arg, range);
   699                   else
   700                     call2 (function, range, arg);
   701                 }
   702               break;
   703             }
   704           c = 0x1FFFF;
   705           stop = temp_charset_work->max_char;
   706         }
   707       c++;
   708     }
   709 }
   710 
   711 void
   712 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
   713                    Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
   714 {
   715   Lisp_Object range;
   716   bool partial = (from > CHARSET_MIN_CODE (charset)
   717                   || to < CHARSET_MAX_CODE (charset));
   718 
   719   if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
   720     {
   721       int from_idx = CODE_POINT_TO_INDEX (charset, from);
   722       int to_idx = CODE_POINT_TO_INDEX (charset, to);
   723       int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
   724       int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
   725 
   726       if (CHARSET_UNIFIED_P (charset))
   727         {
   728           if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
   729             load_charset (charset, 2);
   730           if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
   731             map_char_table_for_charset (c_function, function,
   732                                         CHARSET_DEUNIFIER (charset), arg,
   733                                         partial ? charset : NULL, from, to);
   734           else
   735             map_charset_for_dump (c_function, function, arg, from, to);
   736         }
   737 
   738       range = Fcons (make_fixnum (from_c), make_fixnum (to_c));
   739       if (NILP (function))
   740         (*c_function) (arg, range);
   741       else
   742         call2 (function, range, arg);
   743     }
   744   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
   745     {
   746       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
   747         load_charset (charset, 2);
   748       if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
   749         map_char_table_for_charset (c_function, function,
   750                                     CHARSET_ENCODER (charset), arg,
   751                                     partial ? charset : NULL, from, to);
   752       else
   753         map_charset_for_dump (c_function, function, arg, from, to);
   754     }
   755   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
   756     {
   757       Lisp_Object subset_info;
   758       int offset;
   759 
   760       subset_info = CHARSET_SUBSET (charset);
   761       charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
   762       offset = XFIXNUM (AREF (subset_info, 3));
   763       from -= offset;
   764       if (from < XFIXNAT (AREF (subset_info, 1)))
   765         from = XFIXNAT (AREF (subset_info, 1));
   766       to -= offset;
   767       if (to > XFIXNAT (AREF (subset_info, 2)))
   768         to = XFIXNAT (AREF (subset_info, 2));
   769       map_charset_chars (c_function, function, arg, charset, from, to);
   770     }
   771   else                          /* i.e. CHARSET_METHOD_SUPERSET */
   772     {
   773       Lisp_Object parents;
   774 
   775       for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
   776            parents = XCDR (parents))
   777         {
   778           int offset;
   779           unsigned this_from, this_to;
   780 
   781           charset = CHARSET_FROM_ID (XFIXNAT (XCAR (XCAR (parents))));
   782           offset = XFIXNUM (XCDR (XCAR (parents)));
   783           this_from = from > offset ? from - offset : 0;
   784           this_to = to > offset ? to - offset : 0;
   785           if (this_from < CHARSET_MIN_CODE (charset))
   786             this_from = CHARSET_MIN_CODE (charset);
   787           if (this_to > CHARSET_MAX_CODE (charset))
   788             this_to = CHARSET_MAX_CODE (charset);
   789           map_charset_chars (c_function, function, arg, charset,
   790                              this_from, this_to);
   791         }
   792     }
   793 }
   794 
   795 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
   796        doc: /* Call FUNCTION for all characters in CHARSET.
   797 Optional 3rd argument ARG is an additional argument to be passed
   798 to FUNCTION, see below.
   799 Optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
   800 range of code points (in CHARSET) of target characters on which to
   801 map the FUNCTION.  Note that these are not character codes, but code
   802 points of CHARSET; for the difference see `decode-char' and
   803 `list-charset-chars'.  If FROM-CODE is nil or imitted, it stands for
   804 the first code point of CHARSET; if TO-CODE is nil or omitted, it
   805 stands for the last code point of CHARSET.
   806 
   807 FUNCTION will be called with two arguments: RANGE and ARG.
   808 RANGE is a cons (FROM .  TO), where FROM and TO specify a range of
   809 characters that belong to CHARSET on which FUNCTION should do its
   810 job.  FROM and TO are Emacs character codes, unlike FROM-CODE and
   811 TO-CODE, which are CHARSET code points.  */)
   812   (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
   813 {
   814   struct charset *cs;
   815   unsigned from, to;
   816 
   817   CHECK_CHARSET_GET_CHARSET (charset, cs);
   818   if (NILP (from_code))
   819     from = CHARSET_MIN_CODE (cs);
   820   else
   821     {
   822       from = XFIXNUM (from_code);
   823       if (from < CHARSET_MIN_CODE (cs))
   824         from = CHARSET_MIN_CODE (cs);
   825     }
   826   if (NILP (to_code))
   827     to = CHARSET_MAX_CODE (cs);
   828   else
   829     {
   830       to = XFIXNUM (to_code);
   831       if (to > CHARSET_MAX_CODE (cs))
   832         to = CHARSET_MAX_CODE (cs);
   833     }
   834   map_charset_chars (NULL, function, arg, cs, from, to);
   835   return Qnil;
   836 }
   837 
   838 
   839 /* Define a charset according to the arguments.  The Nth argument is
   840    the Nth attribute of the charset (the last attribute `charset-id'
   841    is not included).  See the docstring of `define-charset' for the
   842    detail.  */
   843 
   844 DEFUN ("define-charset-internal", Fdefine_charset_internal,
   845        Sdefine_charset_internal, charset_arg_max, MANY, 0,
   846        doc: /* For internal use only.
   847 usage: (define-charset-internal ...)  */)
   848   (ptrdiff_t nargs, Lisp_Object *args)
   849 {
   850   /* Charset attr vector.  */
   851   Lisp_Object attrs;
   852   Lisp_Object val;
   853   Lisp_Object hash_code;
   854   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
   855   int i, j;
   856   struct charset charset;
   857   int id;
   858   int dimension;
   859   bool new_definition_p;
   860   int nchars;
   861 
   862   memset (&charset, 0, sizeof (charset));
   863 
   864   if (nargs != charset_arg_max)
   865     Fsignal (Qwrong_number_of_arguments,
   866              Fcons (intern ("define-charset-internal"),
   867                     make_fixnum (nargs)));
   868 
   869   attrs = make_nil_vector (charset_attr_max);
   870 
   871   CHECK_SYMBOL (args[charset_arg_name]);
   872   ASET (attrs, charset_name, args[charset_arg_name]);
   873 
   874   val = args[charset_arg_code_space];
   875   for (i = 0, dimension = 0, nchars = 1; ; i++)
   876     {
   877       Lisp_Object min_byte_obj = Faref (val, make_fixnum (i * 2));
   878       Lisp_Object max_byte_obj = Faref (val, make_fixnum (i * 2 + 1));
   879       int min_byte = check_integer_range (min_byte_obj, 0, 255);
   880       int max_byte = check_integer_range (max_byte_obj, min_byte, 255);
   881       charset.code_space[i * 4] = min_byte;
   882       charset.code_space[i * 4 + 1] = max_byte;
   883       charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
   884       if (max_byte > 0)
   885         dimension = i + 1;
   886       if (i == 3)
   887         break;
   888       nchars *= charset.code_space[i * 4 + 2];
   889       charset.code_space[i * 4 + 3] = nchars;
   890     }
   891 
   892   val = args[charset_arg_dimension];
   893   charset.dimension
   894     = !NILP (val) ? check_integer_range (val, 1, 4) : dimension;
   895 
   896   charset.code_linear_p
   897     = (charset.dimension == 1
   898        || (charset.code_space[2] == 256
   899            && (charset.dimension == 2
   900                || (charset.code_space[6] == 256
   901                    && (charset.dimension == 3
   902                        || charset.code_space[10] == 256)))));
   903 
   904   if (! charset.code_linear_p)
   905     {
   906       charset.code_space_mask = xzalloc (256);
   907       for (i = 0; i < 4; i++)
   908         for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
   909              j++)
   910           charset.code_space_mask[j] |= (1 << i);
   911     }
   912 
   913   charset.iso_chars_96 = charset.code_space[2] == 96;
   914 
   915   charset.min_code = (charset.code_space[0]
   916                       | (charset.code_space[4] << 8)
   917                       | (charset.code_space[8] << 16)
   918                       | ((unsigned) charset.code_space[12] << 24));
   919   charset.max_code = (charset.code_space[1]
   920                       | (charset.code_space[5] << 8)
   921                       | (charset.code_space[9] << 16)
   922                       | ((unsigned) charset.code_space[13] << 24));
   923   charset.char_index_offset = 0;
   924 
   925   val = args[charset_arg_min_code];
   926   if (! NILP (val))
   927     {
   928       unsigned code = cons_to_unsigned (val, UINT_MAX);
   929 
   930       if (code < charset.min_code
   931           || code > charset.max_code)
   932         args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
   933                              INT_TO_INTEGER (charset.max_code), val);
   934       charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
   935       charset.min_code = code;
   936     }
   937 
   938   val = args[charset_arg_max_code];
   939   if (! NILP (val))
   940     {
   941       unsigned code = cons_to_unsigned (val, UINT_MAX);
   942 
   943       if (code < charset.min_code
   944           || code > charset.max_code)
   945         args_out_of_range_3 (INT_TO_INTEGER (charset.min_code),
   946                              INT_TO_INTEGER (charset.max_code), val);
   947       charset.max_code = code;
   948     }
   949 
   950   charset.compact_codes_p = charset.max_code < 0x10000;
   951 
   952   val = args[charset_arg_invalid_code];
   953   if (NILP (val))
   954     {
   955       if (charset.min_code > 0)
   956         charset.invalid_code = 0;
   957       else
   958         {
   959           if (charset.max_code < UINT_MAX)
   960             charset.invalid_code = charset.max_code + 1;
   961           else
   962             error ("Attribute :invalid-code must be specified");
   963         }
   964     }
   965   else
   966     charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
   967 
   968   val = args[charset_arg_iso_final];
   969   if (NILP (val))
   970     charset.iso_final = -1;
   971   else
   972     {
   973       CHECK_FIXNUM (val);
   974       if (XFIXNUM (val) < '0' || XFIXNUM (val) > 127)
   975         error ("Invalid iso-final-char: %"pI"d", XFIXNUM (val));
   976       charset.iso_final = XFIXNUM (val);
   977     }
   978 
   979   val = args[charset_arg_iso_revision];
   980   charset.iso_revision = !NILP (val) ? check_integer_range (val, -1, 63) : -1;
   981 
   982   val = args[charset_arg_emacs_mule_id];
   983   if (NILP (val))
   984     charset.emacs_mule_id = -1;
   985   else
   986     {
   987       CHECK_FIXNAT (val);
   988       if ((XFIXNUM (val) > 0 && XFIXNUM (val) <= 128) || XFIXNUM (val) >= 256)
   989         error ("Invalid emacs-mule-id: %"pI"d", XFIXNUM (val));
   990       charset.emacs_mule_id = XFIXNUM (val);
   991     }
   992 
   993   charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
   994 
   995   charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
   996 
   997   charset.unified_p = 0;
   998 
   999   memset (charset.fast_map, 0, sizeof (charset.fast_map));
  1000 
  1001   if (! NILP (args[charset_arg_code_offset]))
  1002     {
  1003       val = args[charset_arg_code_offset];
  1004       CHECK_CHARACTER (val);
  1005 
  1006       charset.method = CHARSET_METHOD_OFFSET;
  1007       charset.code_offset = XFIXNUM (val);
  1008 
  1009       i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
  1010       if (MAX_CHAR - charset.code_offset < i)
  1011         error ("Unsupported max char: %d", charset.max_char);
  1012       charset.max_char = i + charset.code_offset;
  1013       i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
  1014       charset.min_char = i + charset.code_offset;
  1015 
  1016       i = (charset.min_char >> 7) << 7;
  1017       for (; i < 0x10000 && i <= charset.max_char; i += 128)
  1018         CHARSET_FAST_MAP_SET (i, charset.fast_map);
  1019       i = (i >> 12) << 12;
  1020       for (; i <= charset.max_char; i += 0x1000)
  1021         CHARSET_FAST_MAP_SET (i, charset.fast_map);
  1022       if (charset.code_offset == 0 && charset.max_char >= 0x80)
  1023         charset.ascii_compatible_p = 1;
  1024     }
  1025   else if (! NILP (args[charset_arg_map]))
  1026     {
  1027       val = args[charset_arg_map];
  1028       ASET (attrs, charset_map, val);
  1029       charset.method = CHARSET_METHOD_MAP;
  1030     }
  1031   else if (! NILP (args[charset_arg_subset]))
  1032     {
  1033       Lisp_Object parent;
  1034       Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
  1035       struct charset *parent_charset;
  1036 
  1037       val = args[charset_arg_subset];
  1038       parent = Fcar (val);
  1039       CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
  1040       parent_min_code = Fnth (make_fixnum (1), val);
  1041       CHECK_FIXNAT (parent_min_code);
  1042       parent_max_code = Fnth (make_fixnum (2), val);
  1043       CHECK_FIXNAT (parent_max_code);
  1044       parent_code_offset = Fnth (make_fixnum (3), val);
  1045       CHECK_FIXNUM (parent_code_offset);
  1046       ASET (attrs, charset_subset,
  1047             CALLN (Fvector, make_fixnum (parent_charset->id),
  1048                    parent_min_code, parent_max_code, parent_code_offset));
  1049 
  1050       charset.method = CHARSET_METHOD_SUBSET;
  1051       /* Here, we just copy the parent's fast_map.  It's not accurate,
  1052          but at least it works for quickly detecting which character
  1053          DOESN'T belong to this charset.  */
  1054       memcpy (charset.fast_map, parent_charset->fast_map,
  1055               sizeof charset.fast_map);
  1056 
  1057       /* We also copy these for parents.  */
  1058       charset.min_char = parent_charset->min_char;
  1059       charset.max_char = parent_charset->max_char;
  1060     }
  1061   else if (! NILP (args[charset_arg_superset]))
  1062     {
  1063       val = args[charset_arg_superset];
  1064       charset.method = CHARSET_METHOD_SUPERSET;
  1065       val = Fcopy_sequence (val);
  1066       ASET (attrs, charset_superset, val);
  1067 
  1068       charset.min_char = MAX_CHAR;
  1069       charset.max_char = 0;
  1070       for (; ! NILP (val); val = Fcdr (val))
  1071         {
  1072           Lisp_Object elt, car_part, cdr_part;
  1073           int this_id, offset;
  1074           struct charset *this_charset;
  1075 
  1076           elt = Fcar (val);
  1077           if (CONSP (elt))
  1078             {
  1079               car_part = XCAR (elt);
  1080               cdr_part = XCDR (elt);
  1081               CHECK_CHARSET_GET_ID (car_part, this_id);
  1082               offset = check_integer_range (cdr_part, INT_MIN, INT_MAX);
  1083             }
  1084           else
  1085             {
  1086               CHECK_CHARSET_GET_ID (elt, this_id);
  1087               offset = 0;
  1088             }
  1089           XSETCAR (val, Fcons (make_fixnum (this_id), make_fixnum (offset)));
  1090 
  1091           this_charset = CHARSET_FROM_ID (this_id);
  1092           if (charset.min_char > this_charset->min_char)
  1093             charset.min_char = this_charset->min_char;
  1094           if (charset.max_char < this_charset->max_char)
  1095             charset.max_char = this_charset->max_char;
  1096           for (i = 0; i < 190; i++)
  1097             charset.fast_map[i] |= this_charset->fast_map[i];
  1098         }
  1099     }
  1100   else
  1101     error ("None of :code-offset, :map, :parents are specified");
  1102 
  1103   val = args[charset_arg_unify_map];
  1104   if (! NILP (val) && !STRINGP (val))
  1105     CHECK_VECTOR (val);
  1106   ASET (attrs, charset_unify_map, val);
  1107 
  1108   CHECK_LIST (args[charset_arg_plist]);
  1109   ASET (attrs, charset_plist, args[charset_arg_plist]);
  1110 
  1111   charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
  1112                                     &hash_code);
  1113   if (charset.hash_index >= 0)
  1114     {
  1115       new_definition_p = 0;
  1116       id = XFIXNAT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
  1117       set_hash_value_slot (hash_table, charset.hash_index, attrs);
  1118     }
  1119   else
  1120     {
  1121       charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
  1122                                      hash_code);
  1123       if (charset_table_used == charset_table_size)
  1124         {
  1125           /* Ensure that charset IDs fit into 'int' as well as into the
  1126              restriction imposed by fixnums.  Although the 'int' restriction
  1127              could be removed, too much other code would need altering; for
  1128              example, the IDs are stuffed into struct
  1129              coding_system.charbuf[i] entries, which are 'int'.  */
  1130           int old_size = charset_table_size;
  1131           ptrdiff_t new_size = old_size;
  1132           struct charset *new_table =
  1133             xpalloc (0, &new_size, 1,
  1134                      min (INT_MAX, MOST_POSITIVE_FIXNUM),
  1135                      sizeof *charset_table);
  1136           memcpy (new_table, charset_table, old_size * sizeof *new_table);
  1137           charset_table = new_table;
  1138           charset_table_size = new_size;
  1139           /* FIXME: This leaks memory, as the old charset_table becomes
  1140              unreachable.  If the old charset table is charset_table_init
  1141              then this leak is intentional; otherwise, it's unclear.
  1142              If the latter memory leak is intentional, a
  1143              comment should be added to explain this.  If not, the old
  1144              charset_table should be freed, by passing it as the 1st argument
  1145              to xpalloc and removing the memcpy.  */
  1146         }
  1147       id = charset_table_used++;
  1148       new_definition_p = 1;
  1149     }
  1150 
  1151   ASET (attrs, charset_id, make_fixnum (id));
  1152   charset.id = id;
  1153   charset_table[id] = charset;
  1154 
  1155   if (charset.method == CHARSET_METHOD_MAP)
  1156     {
  1157       load_charset (&charset, 0);
  1158       charset_table[id] = charset;
  1159     }
  1160 
  1161   if (charset.iso_final >= 0)
  1162     {
  1163       ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
  1164                          charset.iso_final) = id;
  1165       if (new_definition_p)
  1166         Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, list1i (id));
  1167       if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
  1168         charset_jisx0201_roman = id;
  1169       else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
  1170         charset_jisx0208_1978 = id;
  1171       else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
  1172         charset_jisx0208 = id;
  1173       else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
  1174         charset_ksc5601 = id;
  1175     }
  1176 
  1177   if (charset.emacs_mule_id >= 0)
  1178     {
  1179       emacs_mule_charset[charset.emacs_mule_id] = id;
  1180       if (charset.emacs_mule_id < 0xA0)
  1181         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
  1182       else
  1183         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
  1184       if (new_definition_p)
  1185         Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
  1186                                            list1i (id));
  1187     }
  1188 
  1189   if (new_definition_p)
  1190     {
  1191       Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
  1192       if (charset.supplementary_p)
  1193         Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, list1i (id));
  1194       else
  1195         {
  1196           Lisp_Object tail;
  1197 
  1198           for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
  1199             {
  1200               struct charset *cs = CHARSET_FROM_ID (XFIXNUM (XCAR (tail)));
  1201 
  1202               if (cs->supplementary_p)
  1203                 break;
  1204             }
  1205           if (EQ (tail, Vcharset_ordered_list))
  1206             Vcharset_ordered_list = Fcons (make_fixnum (id),
  1207                                            Vcharset_ordered_list);
  1208           else if (NILP (tail))
  1209             Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
  1210                                             list1i (id));
  1211           else
  1212             {
  1213               val = Fcons (XCAR (tail), XCDR (tail));
  1214               XSETCDR (tail, val);
  1215               XSETCAR (tail, make_fixnum (id));
  1216             }
  1217         }
  1218       charset_ordered_list_tick++;
  1219     }
  1220 
  1221   return Qnil;
  1222 }
  1223 
  1224 
  1225 /* Same as Fdefine_charset_internal but arguments are more convenient
  1226    to call from C (typically in syms_of_charset).  This can define a
  1227    charset of `offset' method only.  Return the ID of the new
  1228    charset.  */
  1229 
  1230 static int
  1231 define_charset_internal (Lisp_Object name,
  1232                          int dimension,
  1233                          const char *code_space_chars,
  1234                          unsigned min_code, unsigned max_code,
  1235                          int iso_final, int iso_revision, int emacs_mule_id,
  1236                          bool ascii_compatible, bool supplementary,
  1237                          int code_offset)
  1238 {
  1239   const unsigned char *code_space = (const unsigned char *) code_space_chars;
  1240   Lisp_Object args[charset_arg_max];
  1241   Lisp_Object val;
  1242   int i;
  1243 
  1244   args[charset_arg_name] = name;
  1245   args[charset_arg_dimension] = make_fixnum (dimension);
  1246   val = make_uninit_vector (8);
  1247   for (i = 0; i < 8; i++)
  1248     ASET (val, i, make_fixnum (code_space[i]));
  1249   args[charset_arg_code_space] = val;
  1250   args[charset_arg_min_code] = make_fixnum (min_code);
  1251   args[charset_arg_max_code] = make_fixnum (max_code);
  1252   args[charset_arg_iso_final]
  1253     = (iso_final < 0 ? Qnil : make_fixnum (iso_final));
  1254   args[charset_arg_iso_revision] = make_fixnum (iso_revision);
  1255   args[charset_arg_emacs_mule_id]
  1256     = (emacs_mule_id < 0 ? Qnil : make_fixnum (emacs_mule_id));
  1257   args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
  1258   args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
  1259   args[charset_arg_invalid_code] = Qnil;
  1260   args[charset_arg_code_offset] = make_fixnum (code_offset);
  1261   args[charset_arg_map] = Qnil;
  1262   args[charset_arg_subset] = Qnil;
  1263   args[charset_arg_superset] = Qnil;
  1264   args[charset_arg_unify_map] = Qnil;
  1265 
  1266   args[charset_arg_plist] =
  1267      list (QCname,
  1268            args[charset_arg_name],
  1269            intern_c_string (":dimension"),
  1270            args[charset_arg_dimension],
  1271            intern_c_string (":code-space"),
  1272            args[charset_arg_code_space],
  1273            intern_c_string (":iso-final-char"),
  1274            args[charset_arg_iso_final],
  1275            intern_c_string (":emacs-mule-id"),
  1276            args[charset_arg_emacs_mule_id],
  1277            QCascii_compatible_p,
  1278            args[charset_arg_ascii_compatible_p],
  1279            intern_c_string (":code-offset"),
  1280            args[charset_arg_code_offset]);
  1281   Fdefine_charset_internal (charset_arg_max, args);
  1282 
  1283   return XFIXNUM (CHARSET_SYMBOL_ID (name));
  1284 }
  1285 
  1286 
  1287 DEFUN ("define-charset-alias", Fdefine_charset_alias,
  1288        Sdefine_charset_alias, 2, 2, 0,
  1289        doc: /* Define ALIAS as an alias for charset CHARSET.  */)
  1290   (Lisp_Object alias, Lisp_Object charset)
  1291 {
  1292   Lisp_Object attr;
  1293 
  1294   CHECK_CHARSET_GET_ATTR (charset, attr);
  1295   Fputhash (alias, attr, Vcharset_hash_table);
  1296   Vcharset_list = Fcons (alias, Vcharset_list);
  1297   return Qnil;
  1298 }
  1299 
  1300 
  1301 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
  1302        doc: /* Return the property list of CHARSET.  */)
  1303   (Lisp_Object charset)
  1304 {
  1305   Lisp_Object attrs;
  1306 
  1307   CHECK_CHARSET_GET_ATTR (charset, attrs);
  1308   return CHARSET_ATTR_PLIST (attrs);
  1309 }
  1310 
  1311 
  1312 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
  1313        doc: /* Set CHARSET's property list to PLIST.  */)
  1314   (Lisp_Object charset, Lisp_Object plist)
  1315 {
  1316   Lisp_Object attrs;
  1317 
  1318   CHECK_CHARSET_GET_ATTR (charset, attrs);
  1319   ASET (attrs, charset_plist, plist);
  1320   return plist;
  1321 }
  1322 
  1323 
  1324 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
  1325        doc: /* Unify characters of CHARSET with Unicode.
  1326 This means reading the relevant file and installing the table defined
  1327 by CHARSET's `:unify-map' property.
  1328 
  1329 Optional second arg UNIFY-MAP is a file name string or a vector.  It has
  1330 the same meaning as the `:unify-map' attribute in the function
  1331 `define-charset' (which see).
  1332 
  1333 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
  1334   (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
  1335 {
  1336   int id;
  1337   struct charset *cs;
  1338 
  1339   CHECK_CHARSET_GET_ID (charset, id);
  1340   cs = CHARSET_FROM_ID (id);
  1341   if (NILP (deunify)
  1342       ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
  1343       : ! CHARSET_UNIFIED_P (cs))
  1344     return Qnil;
  1345 
  1346   CHARSET_UNIFIED_P (cs) = 0;
  1347   if (NILP (deunify))
  1348     {
  1349       if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
  1350           || CHARSET_CODE_OFFSET (cs) < 0x110000)
  1351         error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
  1352       if (NILP (unify_map))
  1353         unify_map = CHARSET_UNIFY_MAP (cs);
  1354       else
  1355         {
  1356           if (! STRINGP (unify_map) && ! VECTORP (unify_map))
  1357             signal_error ("Bad unify-map", unify_map);
  1358           set_charset_attr (cs, charset_unify_map, unify_map);
  1359         }
  1360       if (NILP (Vchar_unify_table))
  1361         Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
  1362       char_table_set_range (Vchar_unify_table,
  1363                             cs->min_char, cs->max_char, charset);
  1364       CHARSET_UNIFIED_P (cs) = 1;
  1365     }
  1366   else if (CHAR_TABLE_P (Vchar_unify_table))
  1367     {
  1368       unsigned min_code = CHARSET_MIN_CODE (cs);
  1369       unsigned max_code = CHARSET_MAX_CODE (cs);
  1370       int min_char = DECODE_CHAR (cs, min_code);
  1371       int max_char = DECODE_CHAR (cs, max_code);
  1372 
  1373       char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
  1374     }
  1375 
  1376   return Qnil;
  1377 }
  1378 
  1379 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
  1380    Return true if it's a 96-character set, false if 94.  */
  1381 
  1382 static bool
  1383 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
  1384                              Lisp_Object final_char)
  1385 {
  1386   CHECK_FIXNUM (dimension);
  1387   CHECK_FIXNUM (chars);
  1388   CHECK_CHARACTER (final_char);
  1389 
  1390   if (! (1 <= XFIXNUM (dimension) && XFIXNUM (dimension) <= 3))
  1391     error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
  1392            XFIXNUM (dimension));
  1393 
  1394   bool chars_flag = XFIXNUM (chars) == 96;
  1395   if (! (chars_flag || XFIXNUM (chars) == 94))
  1396     error ("Invalid CHARS %"pI"d, it should be 94 or 96", XFIXNUM (chars));
  1397 
  1398   int final_ch = XFIXNAT (final_char);
  1399   if (! ('0' <= final_ch && final_ch <= '~'))
  1400     error ("Invalid FINAL-CHAR `%c', it should be `0'..`~'", final_ch);
  1401 
  1402   return chars_flag;
  1403 }
  1404 
  1405 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
  1406        Sget_unused_iso_final_char, 2, 2, 0,
  1407        doc: /*
  1408 Return an unused ISO final char for a charset of DIMENSION and CHARS.
  1409 DIMENSION is the number of bytes to represent a character: 1 or 2.
  1410 CHARS is the number of characters in a dimension: 94 or 96.
  1411 
  1412 This final char is for private use, thus the range is `0' (48) .. `?' (63).
  1413 If there's no unused final char for the specified kind of charset,
  1414 return nil.  */)
  1415   (Lisp_Object dimension, Lisp_Object chars)
  1416 {
  1417   bool chars_flag = check_iso_charset_parameter (dimension, chars,
  1418                                                  make_fixnum ('0'));
  1419   for (int final_char = '0'; final_char <= '?'; final_char++)
  1420     if (ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, final_char) < 0)
  1421       return make_fixnum (final_char);
  1422   return Qnil;
  1423 }
  1424 
  1425 
  1426 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
  1427        4, 4, 0,
  1428        doc: /* Declare an equivalent charset for ISO-2022 decoding.
  1429 
  1430 On decoding by an ISO-2022 base coding system, when a charset
  1431 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
  1432 if CHARSET is designated instead.  */)
  1433   (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
  1434 {
  1435   int id;
  1436 
  1437   CHECK_CHARSET_GET_ID (charset, id);
  1438   bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
  1439   ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag, XFIXNAT (final_char)) = id;
  1440   return Qnil;
  1441 }
  1442 
  1443 
  1444 /* Return information about charsets in the text at PTR of NBYTES
  1445    bytes, which are NCHARS characters.  The value is:
  1446 
  1447         0: Each character is represented by one byte.  This is always
  1448            true for a unibyte string.  For a multibyte string, true if
  1449            it contains only ASCII characters.
  1450 
  1451         1: No charsets other than ascii, control-1, and latin-1 are
  1452            found.
  1453 
  1454         2: Otherwise.
  1455 */
  1456 
  1457 int
  1458 string_xstring_p (Lisp_Object string)
  1459 {
  1460   const unsigned char *p = SDATA (string);
  1461   const unsigned char *endp = p + SBYTES (string);
  1462 
  1463   if (SCHARS (string) == SBYTES (string))
  1464     return 0;
  1465 
  1466   while (p < endp)
  1467     {
  1468       int c = string_char_advance (&p);
  1469 
  1470       if (c >= 0x100)
  1471         return 2;
  1472     }
  1473   return 1;
  1474 }
  1475 
  1476 
  1477 /* Find charsets in the string at PTR of NCHARS and NBYTES.
  1478 
  1479    CHARSETS is a vector.  If Nth element is non-nil, it means the
  1480    charset whose id is N is already found.
  1481 
  1482    It may lookup a translation table TABLE if supplied.  */
  1483 
  1484 static void
  1485 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
  1486                        ptrdiff_t nbytes, Lisp_Object charsets,
  1487                        Lisp_Object table, bool multibyte)
  1488 {
  1489   const unsigned char *pend = ptr + nbytes;
  1490 
  1491   if (nchars == nbytes)
  1492     {
  1493       if (multibyte)
  1494         ASET (charsets, charset_ascii, Qt);
  1495       else
  1496         while (ptr < pend)
  1497           {
  1498             int c = *ptr++;
  1499 
  1500             if (!NILP (table))
  1501               c = translate_char (table, c);
  1502             if (ASCII_CHAR_P (c))
  1503               ASET (charsets, charset_ascii, Qt);
  1504             else
  1505               ASET (charsets, charset_eight_bit, Qt);
  1506           }
  1507     }
  1508   else
  1509     {
  1510       while (ptr < pend)
  1511         {
  1512           int c = string_char_advance (&ptr);
  1513           struct charset *charset;
  1514 
  1515           if (!NILP (table))
  1516             c = translate_char (table, c);
  1517           charset = CHAR_CHARSET (c);
  1518           ASET (charsets, CHARSET_ID (charset), Qt);
  1519         }
  1520     }
  1521 }
  1522 
  1523 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
  1524        2, 3, 0,
  1525        doc: /* Return a list of charsets in the region between BEG and END.
  1526 BEG and END are buffer positions.
  1527 Optional arg TABLE if non-nil is a translation table to look up.
  1528 
  1529 If the current buffer is unibyte, the returned list may contain
  1530 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
  1531   (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
  1532 {
  1533   Lisp_Object charsets;
  1534   ptrdiff_t from, from_byte, to, stop, stop_byte;
  1535   int i;
  1536   Lisp_Object val;
  1537   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1538 
  1539   validate_region (&beg, &end);
  1540   from = XFIXNAT (beg);
  1541   stop = to = XFIXNAT (end);
  1542 
  1543   if (from < GPT && GPT < to)
  1544     {
  1545       stop = GPT;
  1546       stop_byte = GPT_BYTE;
  1547     }
  1548   else
  1549     stop_byte = CHAR_TO_BYTE (stop);
  1550 
  1551   from_byte = CHAR_TO_BYTE (from);
  1552 
  1553   charsets = make_nil_vector (charset_table_used);
  1554   while (1)
  1555     {
  1556       find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
  1557                              stop_byte - from_byte, charsets, table,
  1558                              multibyte);
  1559       if (stop < to)
  1560         {
  1561           from = stop, from_byte = stop_byte;
  1562           stop = to, stop_byte = CHAR_TO_BYTE (stop);
  1563         }
  1564       else
  1565         break;
  1566     }
  1567 
  1568   val = Qnil;
  1569   for (i = charset_table_used - 1; i >= 0; i--)
  1570     if (!NILP (AREF (charsets, i)))
  1571       val = Fcons (CHARSET_NAME (charset_table + i), val);
  1572   return val;
  1573 }
  1574 
  1575 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
  1576        1, 2, 0,
  1577        doc: /* Return a list of charsets in STR.
  1578 Optional arg TABLE if non-nil is a translation table to look up.
  1579 
  1580 If STR is unibyte, the returned list may contain
  1581 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
  1582   (Lisp_Object str, Lisp_Object table)
  1583 {
  1584   CHECK_STRING (str);
  1585 
  1586   Lisp_Object charsets = make_nil_vector (charset_table_used);
  1587   find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
  1588                          charsets, table,
  1589                          STRING_MULTIBYTE (str));
  1590   Lisp_Object val = Qnil;
  1591   for (int i = charset_table_used - 1; i >= 0; i--)
  1592     if (!NILP (AREF (charsets, i)))
  1593       val = Fcons (CHARSET_NAME (charset_table + i), val);
  1594   return val;
  1595 }
  1596 
  1597 
  1598 
  1599 /* Return a unified character code for C (>= 0x110000).  VAL is a
  1600    value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
  1601    charset symbol.  */
  1602 static int
  1603 maybe_unify_char (int c, Lisp_Object val)
  1604 {
  1605   struct charset *charset;
  1606 
  1607   if (FIXNUMP (val))
  1608     return XFIXNAT (val);
  1609   if (NILP (val))
  1610     return c;
  1611 
  1612   CHECK_CHARSET_GET_CHARSET (val, charset);
  1613 #ifdef REL_ALLOC
  1614   /* The call to load_charset below can allocate memory, which screws
  1615      callers of this function through STRING_CHAR_* macros that hold C
  1616      pointers to buffer text, if REL_ALLOC is used.  */
  1617   r_alloc_inhibit_buffer_relocation (1);
  1618 #endif
  1619   load_charset (charset, 1);
  1620   if (! inhibit_load_charset_map)
  1621     {
  1622       val = CHAR_TABLE_REF (Vchar_unify_table, c);
  1623       if (! NILP (val))
  1624         c = XFIXNAT (val);
  1625     }
  1626   else
  1627     {
  1628       int code_index = c - CHARSET_CODE_OFFSET (charset);
  1629       int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
  1630 
  1631       if (unified > 0)
  1632         c = unified;
  1633     }
  1634 #ifdef REL_ALLOC
  1635   r_alloc_inhibit_buffer_relocation (0);
  1636 #endif
  1637   return c;
  1638 }
  1639 
  1640 
  1641 /* Return a character corresponding to the code-point CODE of
  1642    CHARSET.  */
  1643 
  1644 int
  1645 decode_char (struct charset *charset, unsigned int code)
  1646 {
  1647   int c, char_index;
  1648   enum charset_method method = CHARSET_METHOD (charset);
  1649 
  1650   if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
  1651     return -1;
  1652 
  1653   if (method == CHARSET_METHOD_SUBSET)
  1654     {
  1655       Lisp_Object subset_info;
  1656 
  1657       subset_info = CHARSET_SUBSET (charset);
  1658       charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
  1659       code -= XFIXNUM (AREF (subset_info, 3));
  1660       if (code < XFIXNAT (AREF (subset_info, 1))
  1661           || code > XFIXNAT (AREF (subset_info, 2)))
  1662         c = -1;
  1663       else
  1664         c = DECODE_CHAR (charset, code);
  1665     }
  1666   else if (method == CHARSET_METHOD_SUPERSET)
  1667     {
  1668       Lisp_Object parents;
  1669 
  1670       parents = CHARSET_SUPERSET (charset);
  1671       c = -1;
  1672       for (; CONSP (parents); parents = XCDR (parents))
  1673         {
  1674           int id = XFIXNUM (XCAR (XCAR (parents)));
  1675           int code_offset = XFIXNUM (XCDR (XCAR (parents)));
  1676           unsigned this_code = code - code_offset;
  1677 
  1678           charset = CHARSET_FROM_ID (id);
  1679           if ((c = DECODE_CHAR (charset, this_code)) >= 0)
  1680             break;
  1681         }
  1682     }
  1683   else
  1684     {
  1685       char_index = CODE_POINT_TO_INDEX (charset, code);
  1686       if (char_index < 0)
  1687         return -1;
  1688 
  1689       if (method == CHARSET_METHOD_MAP)
  1690         {
  1691           Lisp_Object decoder;
  1692 
  1693           decoder = CHARSET_DECODER (charset);
  1694           if (! VECTORP (decoder))
  1695             {
  1696               load_charset (charset, 1);
  1697               decoder = CHARSET_DECODER (charset);
  1698             }
  1699           if (VECTORP (decoder))
  1700             c = XFIXNUM (AREF (decoder, char_index));
  1701           else
  1702             c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
  1703         }
  1704       else                      /* method == CHARSET_METHOD_OFFSET */
  1705         {
  1706           c = char_index + CHARSET_CODE_OFFSET (charset);
  1707           if (CHARSET_UNIFIED_P (charset)
  1708               && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
  1709             {
  1710               /* Unify C with a Unicode character if possible.  */
  1711               Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
  1712               c = maybe_unify_char (c, val);
  1713             }
  1714         }
  1715     }
  1716 
  1717   return c;
  1718 }
  1719 
  1720 /* Variable used temporarily by the macro ENCODE_CHAR.  */
  1721 Lisp_Object charset_work;
  1722 
  1723 /* Return a code-point of C in CHARSET.  If C doesn't belong to
  1724    CHARSET, return CHARSET_INVALID_CODE (CHARSET).  If STRICT is true,
  1725    use CHARSET's strict_max_char instead of max_char.  */
  1726 
  1727 unsigned
  1728 encode_char (struct charset *charset, int c)
  1729 {
  1730   unsigned code;
  1731   enum charset_method method = CHARSET_METHOD (charset);
  1732 
  1733   if (CHARSET_UNIFIED_P (charset))
  1734     {
  1735       Lisp_Object deunifier;
  1736       int code_index = -1;
  1737 
  1738       deunifier = CHARSET_DEUNIFIER (charset);
  1739       if (! CHAR_TABLE_P (deunifier))
  1740         {
  1741           load_charset (charset, 2);
  1742           deunifier = CHARSET_DEUNIFIER (charset);
  1743         }
  1744       if (CHAR_TABLE_P (deunifier))
  1745         {
  1746           Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
  1747 
  1748           if (FIXNUMP (deunified))
  1749             code_index = XFIXNUM (deunified);
  1750         }
  1751       else
  1752         {
  1753           code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
  1754         }
  1755       if (code_index >= 0)
  1756         c = CHARSET_CODE_OFFSET (charset) + code_index;
  1757     }
  1758 
  1759   if (method == CHARSET_METHOD_SUBSET)
  1760     {
  1761       Lisp_Object subset_info;
  1762       struct charset *this_charset;
  1763 
  1764       subset_info = CHARSET_SUBSET (charset);
  1765       this_charset = CHARSET_FROM_ID (XFIXNAT (AREF (subset_info, 0)));
  1766       code = ENCODE_CHAR (this_charset, c);
  1767       if (code == CHARSET_INVALID_CODE (this_charset)
  1768           || code < XFIXNAT (AREF (subset_info, 1))
  1769           || code > XFIXNAT (AREF (subset_info, 2)))
  1770         return CHARSET_INVALID_CODE (charset);
  1771       code += XFIXNUM (AREF (subset_info, 3));
  1772       return code;
  1773     }
  1774 
  1775   if (method == CHARSET_METHOD_SUPERSET)
  1776     {
  1777       Lisp_Object parents;
  1778 
  1779       parents = CHARSET_SUPERSET (charset);
  1780       for (; CONSP (parents); parents = XCDR (parents))
  1781         {
  1782           int id = XFIXNUM (XCAR (XCAR (parents)));
  1783           int code_offset = XFIXNUM (XCDR (XCAR (parents)));
  1784           struct charset *this_charset = CHARSET_FROM_ID (id);
  1785 
  1786           code = ENCODE_CHAR (this_charset, c);
  1787           if (code != CHARSET_INVALID_CODE (this_charset))
  1788             return code + code_offset;
  1789         }
  1790       return CHARSET_INVALID_CODE (charset);
  1791     }
  1792 
  1793   if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
  1794       || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
  1795     return CHARSET_INVALID_CODE (charset);
  1796 
  1797   if (method == CHARSET_METHOD_MAP)
  1798     {
  1799       Lisp_Object encoder;
  1800       Lisp_Object val;
  1801 
  1802       encoder = CHARSET_ENCODER (charset);
  1803       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
  1804         {
  1805           load_charset (charset, 2);
  1806           encoder = CHARSET_ENCODER (charset);
  1807         }
  1808       if (CHAR_TABLE_P (encoder))
  1809         {
  1810           val = CHAR_TABLE_REF (encoder, c);
  1811           if (NILP (val))
  1812             return CHARSET_INVALID_CODE (charset);
  1813           code = XFIXNUM (val);
  1814           if (! CHARSET_COMPACT_CODES_P (charset))
  1815             code = INDEX_TO_CODE_POINT (charset, code);
  1816         }
  1817       else
  1818         {
  1819           code = GET_TEMP_CHARSET_WORK_ENCODER (c);
  1820           code = INDEX_TO_CODE_POINT (charset, code);
  1821         }
  1822     }
  1823   else                          /* method == CHARSET_METHOD_OFFSET */
  1824     {
  1825       unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
  1826 
  1827       code = INDEX_TO_CODE_POINT (charset, code_index);
  1828     }
  1829 
  1830   return code;
  1831 }
  1832 
  1833 
  1834 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 2, 0,
  1835        doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
  1836 Return nil if CODE-POINT is not valid in CHARSET.
  1837 
  1838 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE),
  1839 although this usage is obsolescent.  */)
  1840   (Lisp_Object charset, Lisp_Object code_point)
  1841 {
  1842   int c, id;
  1843   unsigned code;
  1844   struct charset *charsetp;
  1845 
  1846   CHECK_CHARSET_GET_ID (charset, id);
  1847   code = cons_to_unsigned (code_point, UINT_MAX);
  1848   charsetp = CHARSET_FROM_ID (id);
  1849   c = DECODE_CHAR (charsetp, code);
  1850   return (c >= 0 ? make_fixnum (c) : Qnil);
  1851 }
  1852 
  1853 
  1854 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 2, 0,
  1855        doc: /* Encode the character CH into a code-point of CHARSET.
  1856 Return the encoded code-point as an integer,
  1857 or nil if CHARSET doesn't support CH.  */)
  1858   (Lisp_Object ch, Lisp_Object charset)
  1859 {
  1860   int c, id;
  1861   unsigned code;
  1862   struct charset *charsetp;
  1863 
  1864   CHECK_CHARSET_GET_ID (charset, id);
  1865   CHECK_CHARACTER (ch);
  1866   c = XFIXNAT (ch);
  1867   charsetp = CHARSET_FROM_ID (id);
  1868   code = ENCODE_CHAR (charsetp, c);
  1869   if (code == CHARSET_INVALID_CODE (charsetp))
  1870     return Qnil;
  1871   /* There are much fewer codepoints in the world than we have positive
  1872      fixnums, so it could be argued that we never really need a bignum,
  1873      e.g. Unicode codepoints only need 21bit, and China's GB-10830
  1874      can fit in 22bit.  Yet we encode GB-10830's chars in a sparse way
  1875      (we just take the 4byte sequences as a 32bit int), so some
  1876      GB-10830 chars (such as 0x81308130 in etc/charsets/gb108304.map) end
  1877      up represented as bignums if EMACS_INT is 32 bits.  */
  1878   return INT_TO_INTEGER (code);
  1879 }
  1880 
  1881 
  1882 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
  1883        doc:
  1884        /* Return a character of CHARSET whose position codes are CODEn.
  1885 
  1886 CODE1 through CODE4 are optional, but if you don't supply sufficient
  1887 position codes, it is assumed that the minimum code in each dimension
  1888 is specified.  */)
  1889   (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
  1890 {
  1891   int id, dimension;
  1892   struct charset *charsetp;
  1893   unsigned code;
  1894   int c;
  1895 
  1896   CHECK_CHARSET_GET_ID (charset, id);
  1897   charsetp = CHARSET_FROM_ID (id);
  1898 
  1899   dimension = CHARSET_DIMENSION (charsetp);
  1900   if (NILP (code1))
  1901     code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
  1902             ? 0 : CHARSET_MIN_CODE (charsetp));
  1903   else
  1904     {
  1905       CHECK_FIXNAT (code1);
  1906       if (XFIXNAT (code1) >= 0x100)
  1907         args_out_of_range (make_fixnum (0xFF), code1);
  1908       code = XFIXNAT (code1);
  1909 
  1910       if (dimension > 1)
  1911         {
  1912           code <<= 8;
  1913           if (NILP (code2))
  1914             code |= charsetp->code_space[(dimension - 2) * 4];
  1915           else
  1916             {
  1917               CHECK_FIXNAT (code2);
  1918               if (XFIXNAT (code2) >= 0x100)
  1919                 args_out_of_range (make_fixnum (0xFF), code2);
  1920               code |= XFIXNAT (code2);
  1921             }
  1922 
  1923           if (dimension > 2)
  1924             {
  1925               code <<= 8;
  1926               if (NILP (code3))
  1927                 code |= charsetp->code_space[(dimension - 3) * 4];
  1928               else
  1929                 {
  1930                   CHECK_FIXNAT (code3);
  1931                   if (XFIXNAT (code3) >= 0x100)
  1932                     args_out_of_range (make_fixnum (0xFF), code3);
  1933                   code |= XFIXNAT (code3);
  1934                 }
  1935 
  1936               if (dimension > 3)
  1937                 {
  1938                   code <<= 8;
  1939                   if (NILP (code4))
  1940                     code |= charsetp->code_space[0];
  1941                   else
  1942                     {
  1943                       CHECK_FIXNAT (code4);
  1944                       if (XFIXNAT (code4) >= 0x100)
  1945                         args_out_of_range (make_fixnum (0xFF), code4);
  1946                       code |= XFIXNAT (code4);
  1947                     }
  1948                 }
  1949             }
  1950         }
  1951     }
  1952 
  1953   if (CHARSET_ISO_FINAL (charsetp) >= 0)
  1954     code &= 0x7F7F7F7F;
  1955   c = DECODE_CHAR (charsetp, code);
  1956   if (c < 0)
  1957     error ("Invalid code(s)");
  1958   return make_fixnum (c);
  1959 }
  1960 
  1961 
  1962 /* Return the first charset in CHARSET_LIST that contains C.
  1963    CHARSET_LIST is a list of charset IDs.  If it is nil, use
  1964    Vcharset_ordered_list.  */
  1965 
  1966 struct charset *
  1967 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
  1968 {
  1969   bool maybe_null = 0;
  1970 
  1971   if (NILP (charset_list))
  1972     charset_list = Vcharset_ordered_list;
  1973   else
  1974     maybe_null = 1;
  1975 
  1976   while (CONSP (charset_list))
  1977     {
  1978       struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
  1979       unsigned code = ENCODE_CHAR (charset, c);
  1980 
  1981       if (code != CHARSET_INVALID_CODE (charset))
  1982         {
  1983           if (code_return)
  1984             *code_return = code;
  1985           return charset;
  1986         }
  1987       charset_list = XCDR (charset_list);
  1988       if (! maybe_null
  1989           && c <= MAX_UNICODE_CHAR
  1990           && EQ (charset_list, Vcharset_non_preferred_head))
  1991         return CHARSET_FROM_ID (charset_unicode);
  1992     }
  1993   return (maybe_null ? NULL
  1994           : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
  1995           : CHARSET_FROM_ID (charset_eight_bit));
  1996 }
  1997 
  1998 
  1999 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
  2000        doc:
  2001        /*Return list of charset and one to four position-codes of CH.
  2002 The charset is decided by the current priority order of charsets.
  2003 A position-code is a byte value of each dimension of the code-point of
  2004 CH in the charset.  */)
  2005   (Lisp_Object ch)
  2006 {
  2007   struct charset *charset;
  2008   int c, dimension;
  2009   unsigned code;
  2010   Lisp_Object val;
  2011 
  2012   CHECK_CHARACTER (ch);
  2013   c = XFIXNAT (ch);
  2014   charset = CHAR_CHARSET (c);
  2015   if (! charset)
  2016     emacs_abort ();
  2017   code = ENCODE_CHAR (charset, c);
  2018   if (code == CHARSET_INVALID_CODE (charset))
  2019     emacs_abort ();
  2020   dimension = CHARSET_DIMENSION (charset);
  2021   for (val = Qnil; dimension > 0; dimension--)
  2022     {
  2023       val = Fcons (make_fixnum (code & 0xFF), val);
  2024       code >>= 8;
  2025     }
  2026   return Fcons (CHARSET_NAME (charset), val);
  2027 }
  2028 
  2029 
  2030 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
  2031        doc: /* Return the charset of highest priority that contains CH.
  2032 ASCII characters are an exception: for them, this function always
  2033 returns `ascii'.
  2034 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
  2035 from which to find the charset.  It may also be a coding system.  In
  2036 that case, find the charset from what supported by that coding system.  */)
  2037   (Lisp_Object ch, Lisp_Object restriction)
  2038 {
  2039   struct charset *charset;
  2040 
  2041   CHECK_CHARACTER (ch);
  2042   if (NILP (restriction))
  2043     charset = CHAR_CHARSET (XFIXNUM (ch));
  2044   else
  2045     {
  2046       if (CONSP (restriction))
  2047         {
  2048           int c = XFIXNAT (ch);
  2049 
  2050           for (; CONSP (restriction); restriction = XCDR (restriction))
  2051             {
  2052               struct charset *rcharset;
  2053 
  2054               CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
  2055               if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
  2056                 return XCAR (restriction);
  2057             }
  2058           return Qnil;
  2059         }
  2060       restriction = coding_system_charset_list (restriction);
  2061       charset = char_charset (XFIXNUM (ch), restriction, NULL);
  2062       if (! charset)
  2063         return Qnil;
  2064     }
  2065   return (CHARSET_NAME (charset));
  2066 }
  2067 
  2068 
  2069 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
  2070        doc: /*
  2071 Return charset of a character in the current buffer at position POS.
  2072 If POS is nil, it defaults to the current point.
  2073 If POS is out of range, the value is nil.  */)
  2074   (Lisp_Object pos)
  2075 {
  2076   Lisp_Object ch;
  2077   struct charset *charset;
  2078 
  2079   ch = Fchar_after (pos);
  2080   if (! FIXNUMP (ch))
  2081     return ch;
  2082   charset = CHAR_CHARSET (XFIXNUM (ch));
  2083   return (CHARSET_NAME (charset));
  2084 }
  2085 
  2086 
  2087 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
  2088        doc: /*
  2089 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
  2090 
  2091 ISO 2022's designation sequence (escape sequence) distinguishes charsets
  2092 by their DIMENSION, CHARS, and FINAL-CHAR,
  2093 whereas Emacs distinguishes them by charset symbol.
  2094 See the documentation of the function `charset-info' for the meanings of
  2095 DIMENSION, CHARS, and FINAL-CHAR.  */)
  2096   (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
  2097 {
  2098   bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
  2099   int id = ISO_CHARSET_TABLE (XFIXNUM (dimension), chars_flag,
  2100                               XFIXNAT (final_char));
  2101   return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
  2102 }
  2103 
  2104 
  2105 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
  2106        0, 0, 0,
  2107        doc: /*
  2108 Internal use only.
  2109 Clear temporary charset mapping tables.
  2110 It should be called only from temacs invoked for dumping.  */)
  2111   (void)
  2112 {
  2113   if (temp_charset_work)
  2114     {
  2115       xfree (temp_charset_work);
  2116       temp_charset_work = NULL;
  2117     }
  2118 
  2119   if (CHAR_TABLE_P (Vchar_unify_table))
  2120     Foptimize_char_table (Vchar_unify_table, Qnil);
  2121 
  2122   return Qnil;
  2123 }
  2124 
  2125 DEFUN ("charset-priority-list", Fcharset_priority_list,
  2126        Scharset_priority_list, 0, 1, 0,
  2127        doc: /* Return the list of charsets ordered by priority.
  2128 HIGHESTP non-nil means just return the highest priority one.  */)
  2129   (Lisp_Object highestp)
  2130 {
  2131   Lisp_Object val = Qnil, list = Vcharset_ordered_list;
  2132 
  2133   if (!NILP (highestp))
  2134     return CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (Fcar (list))));
  2135 
  2136   while (!NILP (list))
  2137     {
  2138       val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XFIXNUM (XCAR (list)))), val);
  2139       list = XCDR (list);
  2140     }
  2141   return Fnreverse (val);
  2142 }
  2143 
  2144 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
  2145        1, MANY, 0,
  2146        doc: /* Assign higher priority to the charsets given as arguments.
  2147 usage: (set-charset-priority &rest charsets)  */)
  2148   (ptrdiff_t nargs, Lisp_Object *args)
  2149 {
  2150   Lisp_Object new_head, old_list;
  2151   Lisp_Object list_2022, list_emacs_mule;
  2152   ptrdiff_t i;
  2153   int id;
  2154 
  2155   old_list = Fcopy_sequence (Vcharset_ordered_list);
  2156   new_head = Qnil;
  2157   for (i = 0; i < nargs; i++)
  2158     {
  2159       CHECK_CHARSET_GET_ID (args[i], id);
  2160       if (! NILP (Fmemq (make_fixnum (id), old_list)))
  2161         {
  2162           old_list = Fdelq (make_fixnum (id), old_list);
  2163           new_head = Fcons (make_fixnum (id), new_head);
  2164         }
  2165     }
  2166   Vcharset_non_preferred_head = old_list;
  2167   Vcharset_ordered_list = nconc2 (Fnreverse (new_head), old_list);
  2168 
  2169   charset_ordered_list_tick++;
  2170 
  2171   charset_unibyte = -1;
  2172   for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
  2173        CONSP (old_list); old_list = XCDR (old_list))
  2174     {
  2175       if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
  2176         list_2022 = Fcons (XCAR (old_list), list_2022);
  2177       if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
  2178         list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
  2179       if (charset_unibyte < 0)
  2180         {
  2181           struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (old_list)));
  2182 
  2183           if (CHARSET_DIMENSION (charset) == 1
  2184               && CHARSET_ASCII_COMPATIBLE_P (charset)
  2185               && CHARSET_MAX_CHAR (charset) >= 0x80)
  2186             charset_unibyte = CHARSET_ID (charset);
  2187         }
  2188     }
  2189   Viso_2022_charset_list = Fnreverse (list_2022);
  2190   Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
  2191   if (charset_unibyte < 0)
  2192     charset_unibyte = charset_iso_8859_1;
  2193 
  2194   return Qnil;
  2195 }
  2196 
  2197 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
  2198        0, 1, 0,
  2199        doc: /* Internal use only.
  2200 Return charset identification number of CHARSET.  */)
  2201   (Lisp_Object charset)
  2202 {
  2203   int id;
  2204 
  2205   CHECK_CHARSET_GET_ID (charset, id);
  2206   return make_fixnum (id);
  2207 }
  2208 
  2209 struct charset_sort_data
  2210 {
  2211   Lisp_Object charset;
  2212   int id;
  2213   ptrdiff_t priority;
  2214 };
  2215 
  2216 static int
  2217 charset_compare (const void *d1, const void *d2)
  2218 {
  2219   const struct charset_sort_data *data1 = d1, *data2 = d2;
  2220   if (data1->priority != data2->priority)
  2221     return data1->priority < data2->priority ? -1 : 1;
  2222   return 0;
  2223 }
  2224 
  2225 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
  2226        doc: /* Sort charset list CHARSETS by a priority of each charset.
  2227 Return the sorted list.  CHARSETS is modified by side effects.
  2228 See also `charset-priority-list' and `set-charset-priority'.  */)
  2229      (Lisp_Object charsets)
  2230 {
  2231   ptrdiff_t n = list_length (charsets), i, j;
  2232   int done;
  2233   Lisp_Object tail, elt, attrs;
  2234   struct charset_sort_data *sort_data;
  2235   int id, min_id = INT_MAX, max_id = INT_MIN;
  2236   USE_SAFE_ALLOCA;
  2237 
  2238   if (n == 0)
  2239     return Qnil;
  2240   SAFE_NALLOCA (sort_data, 1, n);
  2241   for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
  2242     {
  2243       elt = XCAR (tail);
  2244       CHECK_CHARSET_GET_ATTR (elt, attrs);
  2245       sort_data[i].charset = elt;
  2246       sort_data[i].id = id = XFIXNUM (CHARSET_ATTR_ID (attrs));
  2247       if (id < min_id)
  2248         min_id = id;
  2249       if (id > max_id)
  2250         max_id = id;
  2251     }
  2252   for (done = 0, tail = Vcharset_ordered_list, i = 0;
  2253        done < n && CONSP (tail); tail = XCDR (tail), i++)
  2254     {
  2255       elt = XCAR (tail);
  2256       id = XFIXNAT (elt);
  2257       if (id >= min_id && id <= max_id)
  2258         for (j = 0; j < n; j++)
  2259           if (sort_data[j].id == id)
  2260             {
  2261               sort_data[j].priority = i;
  2262               done++;
  2263             }
  2264     }
  2265   qsort (sort_data, n, sizeof *sort_data, charset_compare);
  2266   for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
  2267     XSETCAR (tail, sort_data[i].charset);
  2268   SAFE_FREE ();
  2269   return charsets;
  2270 }
  2271 
  2272 
  2273 void
  2274 init_charset (void)
  2275 {
  2276   Lisp_Object tempdir;
  2277   tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
  2278   if (! file_accessible_directory_p (tempdir))
  2279     {
  2280       /* This used to be non-fatal (dir_warning), but it should not
  2281          happen, and if it does sooner or later it will cause some
  2282          obscure problem (eg bug#6401), so better exit.  */
  2283       fprintf (stderr,
  2284                ("Error: %s: %s\n"
  2285                 "Emacs will not function correctly "
  2286                 "without the character map files.\n"
  2287                 "%s"
  2288                 "Please check your installation!\n"),
  2289                SDATA (tempdir), strerror (errno),
  2290                (egetenv ("EMACSDATA")
  2291                 ? ("The EMACSDATA environment variable is set.  "
  2292                    "Maybe it has the wrong value?\n")
  2293                 : ""));
  2294       exit (1);
  2295     }
  2296 
  2297   Vcharset_map_path = list1 (tempdir);
  2298 }
  2299 
  2300 
  2301 void
  2302 init_charset_once (void)
  2303 {
  2304   int i, j, k;
  2305 
  2306   for (i = 0; i < ISO_MAX_DIMENSION; i++)
  2307     for (j = 0; j < ISO_MAX_CHARS; j++)
  2308       for (k = 0; k < ISO_MAX_FINAL; k++)
  2309         iso_charset_table[i][j][k] = -1;
  2310 
  2311   PDUMPER_REMEMBER_SCALAR (iso_charset_table);
  2312 
  2313   for (i = 0; i < 256; i++)
  2314     emacs_mule_charset[i] = -1;
  2315 
  2316   PDUMPER_REMEMBER_SCALAR (emacs_mule_charset);
  2317 
  2318   charset_jisx0201_roman = -1;
  2319   PDUMPER_REMEMBER_SCALAR (charset_jisx0201_roman);
  2320 
  2321   charset_jisx0208_1978 = -1;
  2322   PDUMPER_REMEMBER_SCALAR (charset_jisx0208_1978);
  2323 
  2324   charset_jisx0208 = -1;
  2325   PDUMPER_REMEMBER_SCALAR (charset_jisx0208);
  2326 
  2327   charset_ksc5601 = -1;
  2328   PDUMPER_REMEMBER_SCALAR (charset_ksc5601);
  2329 }
  2330 
  2331 /* Allocate an initial charset table that is large enough to handle
  2332    Emacs while it is bootstrapping.  As of September 2011, the size
  2333    needs to be at least 166; make it a bit bigger to allow for future
  2334    expansion.
  2335 
  2336    Don't make the value so small that the table is reallocated during
  2337    bootstrapping, as glibc malloc calls larger than just under 64 KiB
  2338    during an initial bootstrap wreak havoc after dumping; see the
  2339    M_MMAP_THRESHOLD value in alloc.c, plus there is an extra overhead
  2340    internal to glibc malloc and perhaps to Emacs malloc debugging.  */
  2341 static struct charset charset_table_init[180];
  2342 
  2343 void
  2344 syms_of_charset (void)
  2345 {
  2346   DEFSYM (Qcharsetp, "charsetp");
  2347 
  2348   /* Special charset symbols.  */
  2349   DEFSYM (Qascii, "ascii");
  2350   DEFSYM (Qunicode, "unicode");
  2351   DEFSYM (Qemacs, "emacs");
  2352   DEFSYM (Qeight_bit, "eight-bit");
  2353   DEFSYM (Qiso_8859_1, "iso-8859-1");
  2354 
  2355   staticpro (&Vcharset_ordered_list);
  2356   Vcharset_ordered_list = Qnil;
  2357 
  2358   staticpro (&Viso_2022_charset_list);
  2359   Viso_2022_charset_list = Qnil;
  2360 
  2361   staticpro (&Vemacs_mule_charset_list);
  2362   Vemacs_mule_charset_list = Qnil;
  2363 
  2364   staticpro (&Vcharset_hash_table);
  2365   Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
  2366 
  2367   charset_table = charset_table_init;
  2368   charset_table_size = ARRAYELTS (charset_table_init);
  2369   PDUMPER_REMEMBER_SCALAR (charset_table_size);
  2370   charset_table_used = 0;
  2371   PDUMPER_REMEMBER_SCALAR (charset_table_used);
  2372 
  2373   defsubr (&Scharsetp);
  2374   defsubr (&Smap_charset_chars);
  2375   defsubr (&Sdefine_charset_internal);
  2376   defsubr (&Sdefine_charset_alias);
  2377   defsubr (&Scharset_plist);
  2378   defsubr (&Sset_charset_plist);
  2379   defsubr (&Sunify_charset);
  2380   defsubr (&Sget_unused_iso_final_char);
  2381   defsubr (&Sdeclare_equiv_charset);
  2382   defsubr (&Sfind_charset_region);
  2383   defsubr (&Sfind_charset_string);
  2384   defsubr (&Sdecode_char);
  2385   defsubr (&Sencode_char);
  2386   defsubr (&Ssplit_char);
  2387   defsubr (&Smake_char);
  2388   defsubr (&Schar_charset);
  2389   defsubr (&Scharset_after);
  2390   defsubr (&Siso_charset);
  2391   defsubr (&Sclear_charset_maps);
  2392   defsubr (&Scharset_priority_list);
  2393   defsubr (&Sset_charset_priority);
  2394   defsubr (&Scharset_id_internal);
  2395   defsubr (&Ssort_charsets);
  2396 
  2397   DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
  2398                doc: /* List of directories to search for charset map files.  */);
  2399   Vcharset_map_path = Qnil;
  2400 
  2401   DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
  2402                doc: /* Inhibit loading of charset maps.  Used when dumping Emacs.  */);
  2403   inhibit_load_charset_map = 0;
  2404 
  2405   DEFVAR_LISP ("charset-list", Vcharset_list,
  2406                doc: /* List of all charsets ever defined.  */);
  2407   Vcharset_list = Qnil;
  2408 
  2409   DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
  2410                doc: /* ISO639 language mnemonic symbol for the current language environment.
  2411 If the current language environment is for multiple languages (e.g. "Latin-1"),
  2412 the value may be a list of mnemonics.  */);
  2413   Vcurrent_iso639_language = Qnil;
  2414 
  2415   charset_ascii
  2416     = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
  2417                                0, 127, 'B', -1, 0, 1, 0, 0);
  2418   PDUMPER_REMEMBER_SCALAR (charset_ascii);
  2419 
  2420   charset_iso_8859_1
  2421     = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
  2422                                0, 255, -1, -1, -1, 1, 0, 0);
  2423   PDUMPER_REMEMBER_SCALAR (charset_iso_8859_1);
  2424 
  2425   charset_unicode
  2426     = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
  2427                                0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
  2428   PDUMPER_REMEMBER_SCALAR (charset_unicode);
  2429 
  2430   charset_emacs
  2431     = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
  2432                                0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
  2433   PDUMPER_REMEMBER_SCALAR (charset_emacs);
  2434 
  2435   charset_eight_bit
  2436     = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
  2437                                128, 255, -1, 0, -1, 0, 1,
  2438                                MAX_5_BYTE_CHAR + 1);
  2439   PDUMPER_REMEMBER_SCALAR (charset_eight_bit);
  2440 
  2441   charset_unibyte = charset_iso_8859_1;
  2442   PDUMPER_REMEMBER_SCALAR (charset_unibyte);
  2443 }

/* [<][>][^][v][top][bottom][index][help] */