root/src/category.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. bset_category_table
  2. hash_get_category_set
  3. set_category_set
  4. DEFUN
  5. DEFUN
  6. DEFUN
  7. check_category_table
  8. DEFUN
  9. DEFUN
  10. copy_category_entry
  11. copy_category_table
  12. DEFUN
  13. DEFUN
  14. DEFUN
  15. char_category_set
  16. DEFUN
  17. DEFUN
  18. word_boundary_p
  19. init_category_once
  20. syms_of_category

     1 /* GNU Emacs routines to deal with category tables.
     2 
     3 Copyright (C) 1998, 2001-2023 Free Software Foundation, Inc.
     4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
     5   2005, 2006, 2007, 2008, 2009, 2010, 2011
     6   National Institute of Advanced Industrial Science and Technology (AIST)
     7   Registration Number H14PRO021
     8 Copyright (C) 2003
     9   National Institute of Advanced Industrial Science and Technology (AIST)
    10   Registration Number H13PRO009
    11 
    12 This file is part of GNU Emacs.
    13 
    14 GNU Emacs is free software: you can redistribute it and/or modify
    15 it under the terms of the GNU General Public License as published by
    16 the Free Software Foundation, either version 3 of the License, or (at
    17 your option) any later version.
    18 
    19 GNU Emacs is distributed in the hope that it will be useful,
    20 but WITHOUT ANY WARRANTY; without even the implied warranty of
    21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    22 GNU General Public License for more details.
    23 
    24 You should have received a copy of the GNU General Public License
    25 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    26 
    27 
    28 /* Here we handle three objects: category, category set, and category
    29    table.  Read comments in the file category.h to understand them.  */
    30 
    31 #include <config.h>
    32 
    33 #include "lisp.h"
    34 #include "character.h"
    35 #include "buffer.h"
    36 #include "category.h"
    37 
    38 /* This setter is used only in this file, so it can be private.  */
    39 static void
    40 bset_category_table (struct buffer *b, Lisp_Object val)
    41 {
    42   b->category_table_ = val;
    43 }
    44 
    45 
    46 /* Category set staff.  */
    47 
    48 static Lisp_Object
    49 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
    50 {
    51   if (NILP (XCHAR_TABLE (table)->extras[1]))
    52     set_char_table_extras
    53       (table, 1,
    54        make_hash_table (hashtest_equal, DEFAULT_HASH_SIZE,
    55                         DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD,
    56                         Qnil, false));
    57   struct Lisp_Hash_Table *h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
    58   Lisp_Object hash;
    59   ptrdiff_t i = hash_lookup (h, category_set, &hash);
    60   if (i >= 0)
    61     return HASH_KEY (h, i);
    62   hash_put (h, category_set, Qnil, hash);
    63   return category_set;
    64 }
    65 
    66 /* Make CATEGORY_SET include (if VAL) or exclude (if !VAL) CATEGORY.  */
    67 
    68 static void
    69 set_category_set (Lisp_Object category_set, EMACS_INT category, bool val)
    70 {
    71   bool_vector_set (category_set, category, val);
    72 }
    73 
    74 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
    75        doc: /* Return a newly created category-set which contains CATEGORIES.
    76 CATEGORIES is a string of category mnemonics.
    77 The value is a bool-vector which has t at the indices corresponding to
    78 those categories.  */)
    79   (Lisp_Object categories)
    80 {
    81   Lisp_Object val;
    82   ptrdiff_t len;
    83 
    84   CHECK_STRING (categories);
    85   val = MAKE_CATEGORY_SET;
    86 
    87   if (STRING_MULTIBYTE (categories))
    88     error ("Multibyte string in `make-category-set'");
    89 
    90   len = SCHARS (categories);
    91   while (--len >= 0)
    92     {
    93       unsigned char cat = SREF (categories, len);
    94       Lisp_Object category = make_fixnum (cat);
    95 
    96       CHECK_CATEGORY (category);
    97       set_category_set (val, cat, 1);
    98     }
    99   return val;
   100 }
   101 
   102 
   103 /* Category staff.  */
   104 
   105 static Lisp_Object check_category_table (Lisp_Object table);
   106 
   107 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
   108        doc: /* Define CATEGORY as a category which is described by DOCSTRING.
   109 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
   110 DOCSTRING is the documentation string of the category.  The first line
   111 should be a terse text (preferably less than 16 characters),
   112 and the rest lines should be the full description.
   113 The category is defined only in category table TABLE, which defaults to
   114 the current buffer's category table.  */)
   115   (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
   116 {
   117   CHECK_CATEGORY (category);
   118   CHECK_STRING (docstring);
   119   table = check_category_table (table);
   120 
   121   if (!NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
   122     error ("Category `%c' is already defined", (int) XFIXNAT (category));
   123   if (!NILP (Vpurify_flag))
   124     docstring = Fpurecopy (docstring);
   125   SET_CATEGORY_DOCSTRING (table, XFIXNAT (category), docstring);
   126 
   127   return Qnil;
   128 }
   129 
   130 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
   131        doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
   132 TABLE should be a category table and defaults to the current buffer's
   133 category table.  */)
   134   (Lisp_Object category, Lisp_Object table)
   135 {
   136   CHECK_CATEGORY (category);
   137   table = check_category_table (table);
   138 
   139   return CATEGORY_DOCSTRING (table, XFIXNAT (category));
   140 }
   141 
   142 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
   143        0, 1, 0,
   144        doc: /* Return a category which is not yet defined in TABLE.
   145 If no category remains available, return nil.
   146 The optional argument TABLE specifies which category table to modify;
   147 it defaults to the current buffer's category table.  */)
   148   (Lisp_Object table)
   149 {
   150   int i;
   151 
   152   table = check_category_table (table);
   153 
   154   for (i = ' '; i <= '~'; i++)
   155     if (NILP (CATEGORY_DOCSTRING (table, i)))
   156       return make_fixnum (i);
   157 
   158   return Qnil;
   159 }
   160 
   161 
   162 /* Category-table staff.  */
   163 
   164 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
   165        doc: /* Return t if ARG is a category table.  */)
   166   (Lisp_Object arg)
   167 {
   168   if (CHAR_TABLE_P (arg)
   169       && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
   170     return Qt;
   171   return Qnil;
   172 }
   173 
   174 /* If TABLE is nil, return the current category table.  If TABLE is
   175    not nil, check the validity of TABLE as a category table.  If
   176    valid, return TABLE itself, but if not valid, signal an error of
   177    wrong-type-argument.  */
   178 
   179 static Lisp_Object
   180 check_category_table (Lisp_Object table)
   181 {
   182   if (NILP (table))
   183     return BVAR (current_buffer, category_table);
   184   CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
   185   return table;
   186 }
   187 
   188 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
   189        doc: /* Return the current category table.
   190 This is the one specified by the current buffer.  */)
   191   (void)
   192 {
   193   return BVAR (current_buffer, category_table);
   194 }
   195 
   196 DEFUN ("standard-category-table", Fstandard_category_table,
   197    Sstandard_category_table, 0, 0, 0,
   198        doc: /* Return the standard category table.
   199 This is the one used for new buffers.  */)
   200   (void)
   201 {
   202   return Vstandard_category_table;
   203 }
   204 
   205 
   206 static void
   207 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
   208 {
   209   val = Fcopy_sequence (val);
   210   if (CONSP (c))
   211     char_table_set_range (table, XFIXNUM (XCAR (c)), XFIXNUM (XCDR (c)), val);
   212   else
   213     char_table_set (table, XFIXNUM (c), val);
   214 }
   215 
   216 /* Return a copy of category table TABLE.  We can't simply use the
   217    function copy-sequence because no contents should be shared between
   218    the original and the copy.  This function is called recursively by
   219    binding TABLE to a sub char table.  */
   220 
   221 static Lisp_Object
   222 copy_category_table (Lisp_Object table)
   223 {
   224   table = copy_char_table (table);
   225 
   226   if (! NILP (XCHAR_TABLE (table)->defalt))
   227     set_char_table_defalt (table,
   228                            Fcopy_sequence (XCHAR_TABLE (table)->defalt));
   229   set_char_table_extras
   230     (table, 0, Fcopy_sequence (XCHAR_TABLE (table)->extras[0]));
   231   map_char_table (copy_category_entry, Qnil, table, table);
   232 
   233   return table;
   234 }
   235 
   236 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
   237        0, 1, 0,
   238        doc: /* Construct a new category table and return it.
   239 It is a copy of the TABLE, which defaults to the standard category table.  */)
   240   (Lisp_Object table)
   241 {
   242   if (!NILP (table))
   243     check_category_table (table);
   244   else
   245     table = Vstandard_category_table;
   246 
   247   return copy_category_table (table);
   248 }
   249 
   250 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
   251        0, 0, 0,
   252        doc: /* Construct a new and empty category table and return it.  */)
   253   (void)
   254 {
   255   Lisp_Object val;
   256   int i;
   257 
   258   val = Fmake_char_table (Qcategory_table, Qnil);
   259   set_char_table_defalt (val, MAKE_CATEGORY_SET);
   260   for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
   261     set_char_table_contents (val, i, MAKE_CATEGORY_SET);
   262   Fset_char_table_extra_slot (val, make_fixnum (0), make_nil_vector (95));
   263   return val;
   264 }
   265 
   266 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
   267        doc: /* Specify TABLE as the category table for the current buffer.
   268 Return TABLE.  */)
   269   (Lisp_Object table)
   270 {
   271   int idx;
   272   table = check_category_table (table);
   273   bset_category_table (current_buffer, table);
   274   /* Indicate that this buffer now has a specified category table.  */
   275   idx = PER_BUFFER_VAR_IDX (category_table);
   276   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
   277   return table;
   278 }
   279 
   280 
   281 Lisp_Object
   282 char_category_set (int c)
   283 {
   284   return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
   285 }
   286 
   287 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
   288        doc: /* Return the category set of CHAR.
   289 usage: (char-category-set CHAR)  */)
   290   (Lisp_Object ch)
   291 {
   292   CHECK_CHARACTER (ch);
   293   return CATEGORY_SET (XFIXNAT (ch));
   294 }
   295 
   296 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
   297        Scategory_set_mnemonics, 1, 1, 0,
   298        doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
   299 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
   300 that are indexes where t occurs in the bool-vector.
   301 The return value is a string containing those same categories.  */)
   302   (Lisp_Object category_set)
   303 {
   304   int i, j;
   305   char str[96];
   306 
   307   CHECK_CATEGORY_SET (category_set);
   308 
   309   j = 0;
   310   for (i = 32; i < 127; i++)
   311     if (CATEGORY_MEMBER (i, category_set))
   312       str[j++] = i;
   313   str[j] = '\0';
   314 
   315   return build_string (str);
   316 }
   317 
   318 DEFUN ("modify-category-entry", Fmodify_category_entry,
   319        Smodify_category_entry, 2, 4, 0,
   320        doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
   321 The category is changed only for table TABLE, which defaults to
   322 the current buffer's category table.
   323 CHARACTER can be either a single character or a cons representing the
   324 lower and upper ends of an inclusive character range to modify.
   325 CATEGORY must be a category name (a character between ` ' and `~').
   326 Use `describe-categories' to see existing category names.
   327 If optional fourth argument RESET is non-nil,
   328 then delete CATEGORY from the category set instead of adding it.  */)
   329   (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
   330 {
   331   bool set_value;       /* Actual value to be set in category sets.  */
   332   Lisp_Object category_set;
   333   int start, end;
   334   int from, to;
   335 
   336   if (FIXNUMP (character))
   337     {
   338       CHECK_CHARACTER (character);
   339       start = end = XFIXNAT (character);
   340     }
   341   else
   342     {
   343       CHECK_CONS (character);
   344       CHECK_CHARACTER_CAR (character);
   345       CHECK_CHARACTER_CDR (character);
   346       start = XFIXNAT (XCAR (character));
   347       end = XFIXNAT (XCDR (character));
   348     }
   349 
   350   CHECK_CATEGORY (category);
   351   table = check_category_table (table);
   352 
   353   if (NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
   354     error ("Undefined category: %c", (int) XFIXNAT (category));
   355 
   356   set_value = NILP (reset);
   357 
   358   while (start <= end)
   359     {
   360       from = start, to = end;
   361       category_set = char_table_ref_and_range (table, start, &from, &to);
   362       if (CATEGORY_MEMBER (XFIXNAT (category), category_set) != NILP (reset))
   363         {
   364           category_set = Fcopy_sequence (category_set);
   365           set_category_set (category_set, XFIXNAT (category), set_value);
   366           category_set = hash_get_category_set (table, category_set);
   367           char_table_set_range (table, start, to, category_set);
   368         }
   369       start = to + 1;
   370     }
   371 
   372   return Qnil;
   373 }
   374 
   375 /* Return true if there is a word boundary between two word-constituent
   376    characters C1 and C2 if they appear in this order.
   377    Use the macro WORD_BOUNDARY_P instead of calling this function
   378    directly.  */
   379 
   380 bool
   381 word_boundary_p (int c1, int c2)
   382 {
   383   Lisp_Object category_set1, category_set2;
   384   Lisp_Object tail;
   385   bool default_result;
   386 
   387   if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
   388           CHAR_TABLE_REF (Vchar_script_table, c2)))
   389     {
   390       tail = Vword_separating_categories;
   391       default_result = 0;
   392     }
   393   else
   394     {
   395       tail = Vword_combining_categories;
   396       default_result = 1;
   397     }
   398 
   399   category_set1 = CATEGORY_SET (c1);
   400   if (NILP (category_set1))
   401     return default_result;
   402   category_set2 = CATEGORY_SET (c2);
   403   if (NILP (category_set2))
   404     return default_result;
   405 
   406   for (; CONSP (tail); tail = XCDR (tail))
   407     {
   408       Lisp_Object elt = XCAR (tail);
   409 
   410       if (CONSP (elt)
   411           && (NILP (XCAR (elt))
   412               || (CATEGORYP (XCAR (elt))
   413                   && CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set1)
   414                   && ! CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set2)))
   415           && (NILP (XCDR (elt))
   416               || (CATEGORYP (XCDR (elt))
   417                   && ! CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set1)
   418                   && CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set2))))
   419         return !default_result;
   420     }
   421   return default_result;
   422 }
   423 
   424 
   425 void
   426 init_category_once (void)
   427 {
   428   /* This has to be done here, before we call Fmake_char_table.  */
   429   DEFSYM (Qcategory_table, "category-table");
   430   Fput (Qcategory_table, Qchar_table_extra_slots, make_fixnum (2));
   431 
   432   Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
   433   /* Set a category set which contains nothing to the default.  */
   434   set_char_table_defalt (Vstandard_category_table, MAKE_CATEGORY_SET);
   435   Fset_char_table_extra_slot (Vstandard_category_table, make_fixnum (0),
   436                               make_nil_vector (95));
   437 }
   438 
   439 void
   440 syms_of_category (void)
   441 {
   442   DEFSYM (Qcategoryp, "categoryp");
   443   DEFSYM (Qcategorysetp, "categorysetp");
   444   DEFSYM (Qcategory_table_p, "category-table-p");
   445 
   446   DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
   447                doc: /* List of pair (cons) of categories to determine word boundary.
   448 
   449 Emacs treats a sequence of word constituent characters as a single
   450 word (i.e. finds no word boundary between them) only if they belong to
   451 the same script.  But, exceptions are allowed in the following cases.
   452 
   453 \(1) The case that characters are in different scripts is controlled
   454 by the variable `word-combining-categories'.
   455 
   456 Emacs finds no word boundary between characters of different scripts
   457 if they have categories matching some element of this list.
   458 
   459 More precisely, if an element of this list is a cons of category CAT1
   460 and CAT2, and a multibyte character C1 which has CAT1 is followed by
   461 C2 which has CAT2, there's no word boundary between C1 and C2.
   462 
   463 For instance, to tell that Han characters followed by Hiragana
   464 characters can form a single word, the element `(?C . ?H)' should be
   465 in this list.
   466 
   467 \(2) The case that character are in the same script is controlled by
   468 the variable `word-separating-categories'.
   469 
   470 Emacs finds a word boundary between characters of the same script
   471 if they have categories matching some element of this list.
   472 
   473 More precisely, if an element of this list is a cons of category CAT1
   474 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
   475 followed by C2 which has CAT2 but not CAT1, there's a word boundary
   476 between C1 and C2.
   477 
   478 For instance, to tell that there's a word boundary between Hiragana
   479 and Katakana (both are in the same script `kana'),
   480 the element `(?H . ?K)' should be in this list.  */);
   481 
   482   Vword_combining_categories = Qnil;
   483 
   484   DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
   485                doc: /* List of pair (cons) of categories to determine word boundary.
   486 See the documentation of the variable `word-combining-categories'.  */);
   487 
   488   Vword_separating_categories = Qnil;
   489 
   490   defsubr (&Smake_category_set);
   491   defsubr (&Sdefine_category);
   492   defsubr (&Scategory_docstring);
   493   defsubr (&Sget_unused_category);
   494   defsubr (&Scategory_table_p);
   495   defsubr (&Scategory_table);
   496   defsubr (&Sstandard_category_table);
   497   defsubr (&Scopy_category_table);
   498   defsubr (&Smake_category_table);
   499   defsubr (&Sset_category_table);
   500   defsubr (&Schar_category_set);
   501   defsubr (&Scategory_set_mnemonics);
   502   defsubr (&Smodify_category_entry);
   503 }

/* [<][>][^][v][top][bottom][index][help] */