1 /* GNU Emacs routines to deal with category tables.
2 
3 Copyright (C) 1998, 2001-2021 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5   2005, 2006, 2007, 2008, 2009, 2010, 2011
6   National Institute of Advanced Industrial Science and Technology (AIST)
7   Registration Number H14PRO021
8 Copyright (C) 2003
9   National Institute of Advanced Industrial Science and Technology (AIST)
10   Registration Number H13PRO009
11 
12 This file is part of GNU Emacs.
13 
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or (at
17 your option) any later version.
18 
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 GNU General Public License for more details.
23 
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
26 
27 
28 /* Here we handle three objects: category, category set, and category
29    table.  Read comments in the file category.h to understand them.  */
30 
31 #include <config.h>
32 
33 #include "lisp.h"
34 #include "character.h"
35 #include "buffer.h"
36 #include "category.h"
37 
38 /* This setter is used only in this file, so it can be private.  */
39 static void
bset_category_table(struct buffer * b,Lisp_Object val)40 bset_category_table (struct buffer *b, Lisp_Object val)
41 {
42   b->category_table_ = val;
43 }
44 
45 
46 /* Category set staff.  */
47 
48 static Lisp_Object
hash_get_category_set(Lisp_Object table,Lisp_Object category_set)49 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
50 {
51   if (NILP (XCHAR_TABLE (table)->extras[1]))
52     set_char_table_extras
53       (table, 1,
54        make_hash_table (hashtest_equal, DEFAULT_HASH_SIZE,
55 			DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD,
56 			Qnil, false));
57   struct Lisp_Hash_Table *h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
58   Lisp_Object hash;
59   ptrdiff_t i = hash_lookup (h, category_set, &hash);
60   if (i >= 0)
61     return HASH_KEY (h, i);
62   hash_put (h, category_set, Qnil, hash);
63   return category_set;
64 }
65 
66 /* Make CATEGORY_SET include (if VAL) or exclude (if !VAL) CATEGORY.  */
67 
68 static void
set_category_set(Lisp_Object category_set,EMACS_INT category,bool val)69 set_category_set (Lisp_Object category_set, EMACS_INT category, bool val)
70 {
71   bool_vector_set (category_set, category, val);
72 }
73 
74 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
75        doc: /* Return a newly created category-set which contains CATEGORIES.
76 CATEGORIES is a string of category mnemonics.
77 The value is a bool-vector which has t at the indices corresponding to
78 those categories.  */)
79   (Lisp_Object categories)
80 {
81   Lisp_Object val;
82   ptrdiff_t len;
83 
84   CHECK_STRING (categories);
85   val = MAKE_CATEGORY_SET;
86 
87   if (STRING_MULTIBYTE (categories))
88     error ("Multibyte string in `make-category-set'");
89 
90   len = SCHARS (categories);
91   while (--len >= 0)
92     {
93       unsigned char cat = SREF (categories, len);
94       Lisp_Object category = make_fixnum (cat);
95 
96       CHECK_CATEGORY (category);
97       set_category_set (val, cat, 1);
98     }
99   return val;
100 }
101 
102 
103 /* Category staff.  */
104 
105 static Lisp_Object check_category_table (Lisp_Object table);
106 
107 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
108        doc: /* Define CATEGORY as a category which is described by DOCSTRING.
109 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
110 DOCSTRING is the documentation string of the category.  The first line
111 should be a terse text (preferably less than 16 characters),
112 and the rest lines should be the full description.
113 The category is defined only in category table TABLE, which defaults to
114 the current buffer's category table.  */)
115   (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
116 {
117   CHECK_CATEGORY (category);
118   CHECK_STRING (docstring);
119   table = check_category_table (table);
120 
121   if (!NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
122     error ("Category `%c' is already defined", (int) XFIXNAT (category));
123   if (!NILP (Vpurify_flag))
124     docstring = Fpurecopy (docstring);
125   SET_CATEGORY_DOCSTRING (table, XFIXNAT (category), docstring);
126 
127   return Qnil;
128 }
129 
130 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
131        doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
132 TABLE should be a category table and defaults to the current buffer's
133 category table.  */)
134   (Lisp_Object category, Lisp_Object table)
135 {
136   CHECK_CATEGORY (category);
137   table = check_category_table (table);
138 
139   return CATEGORY_DOCSTRING (table, XFIXNAT (category));
140 }
141 
142 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
143        0, 1, 0,
144        doc: /* Return a category which is not yet defined in TABLE.
145 If no category remains available, return nil.
146 The optional argument TABLE specifies which category table to modify;
147 it defaults to the current buffer's category table.  */)
148   (Lisp_Object table)
149 {
150   int i;
151 
152   table = check_category_table (table);
153 
154   for (i = ' '; i <= '~'; i++)
155     if (NILP (CATEGORY_DOCSTRING (table, i)))
156       return make_fixnum (i);
157 
158   return Qnil;
159 }
160 
161 
162 /* Category-table staff.  */
163 
164 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
165        doc: /* Return t if ARG is a category table.  */)
166   (Lisp_Object arg)
167 {
168   if (CHAR_TABLE_P (arg)
169       && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
170     return Qt;
171   return Qnil;
172 }
173 
174 /* If TABLE is nil, return the current category table.  If TABLE is
175    not nil, check the validity of TABLE as a category table.  If
176    valid, return TABLE itself, but if not valid, signal an error of
177    wrong-type-argument.  */
178 
179 static Lisp_Object
check_category_table(Lisp_Object table)180 check_category_table (Lisp_Object table)
181 {
182   if (NILP (table))
183     return BVAR (current_buffer, category_table);
184   CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
185   return table;
186 }
187 
188 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
189        doc: /* Return the current category table.
190 This is the one specified by the current buffer.  */)
191   (void)
192 {
193   return BVAR (current_buffer, category_table);
194 }
195 
196 DEFUN ("standard-category-table", Fstandard_category_table,
197    Sstandard_category_table, 0, 0, 0,
198        doc: /* Return the standard category table.
199 This is the one used for new buffers.  */)
200   (void)
201 {
202   return Vstandard_category_table;
203 }
204 
205 
206 static void
copy_category_entry(Lisp_Object table,Lisp_Object c,Lisp_Object val)207 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
208 {
209   val = Fcopy_sequence (val);
210   if (CONSP (c))
211     char_table_set_range (table, XFIXNUM (XCAR (c)), XFIXNUM (XCDR (c)), val);
212   else
213     char_table_set (table, XFIXNUM (c), val);
214 }
215 
216 /* Return a copy of category table TABLE.  We can't simply use the
217    function copy-sequence because no contents should be shared between
218    the original and the copy.  This function is called recursively by
219    binding TABLE to a sub char table.  */
220 
221 static Lisp_Object
copy_category_table(Lisp_Object table)222 copy_category_table (Lisp_Object table)
223 {
224   table = copy_char_table (table);
225 
226   if (! NILP (XCHAR_TABLE (table)->defalt))
227     set_char_table_defalt (table,
228 			   Fcopy_sequence (XCHAR_TABLE (table)->defalt));
229   set_char_table_extras
230     (table, 0, Fcopy_sequence (XCHAR_TABLE (table)->extras[0]));
231   map_char_table (copy_category_entry, Qnil, table, table);
232 
233   return table;
234 }
235 
236 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
237        0, 1, 0,
238        doc: /* Construct a new category table and return it.
239 It is a copy of the TABLE, which defaults to the standard category table.  */)
240   (Lisp_Object table)
241 {
242   if (!NILP (table))
243     check_category_table (table);
244   else
245     table = Vstandard_category_table;
246 
247   return copy_category_table (table);
248 }
249 
250 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
251        0, 0, 0,
252        doc: /* Construct a new and empty category table and return it.  */)
253   (void)
254 {
255   Lisp_Object val;
256   int i;
257 
258   val = Fmake_char_table (Qcategory_table, Qnil);
259   set_char_table_defalt (val, MAKE_CATEGORY_SET);
260   for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
261     set_char_table_contents (val, i, MAKE_CATEGORY_SET);
262   Fset_char_table_extra_slot (val, make_fixnum (0), make_nil_vector (95));
263   return val;
264 }
265 
266 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
267        doc: /* Specify TABLE as the category table for the current buffer.
268 Return TABLE.  */)
269   (Lisp_Object table)
270 {
271   int idx;
272   table = check_category_table (table);
273   bset_category_table (current_buffer, table);
274   /* Indicate that this buffer now has a specified category table.  */
275   idx = PER_BUFFER_VAR_IDX (category_table);
276   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
277   return table;
278 }
279 
280 
281 Lisp_Object
char_category_set(int c)282 char_category_set (int c)
283 {
284   return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
285 }
286 
287 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
288        doc: /* Return the category set of CHAR.
289 usage: (char-category-set CHAR)  */)
290   (Lisp_Object ch)
291 {
292   CHECK_CHARACTER (ch);
293   return CATEGORY_SET (XFIXNAT (ch));
294 }
295 
296 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
297        Scategory_set_mnemonics, 1, 1, 0,
298        doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
299 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
300 that are indexes where t occurs in the bool-vector.
301 The return value is a string containing those same categories.  */)
302   (Lisp_Object category_set)
303 {
304   int i, j;
305   char str[96];
306 
307   CHECK_CATEGORY_SET (category_set);
308 
309   j = 0;
310   for (i = 32; i < 127; i++)
311     if (CATEGORY_MEMBER (i, category_set))
312       str[j++] = i;
313   str[j] = '\0';
314 
315   return build_string (str);
316 }
317 
318 DEFUN ("modify-category-entry", Fmodify_category_entry,
319        Smodify_category_entry, 2, 4, 0,
320        doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
321 The category is changed only for table TABLE, which defaults to
322 the current buffer's category table.
323 CHARACTER can be either a single character or a cons representing the
324 lower and upper ends of an inclusive character range to modify.
325 CATEGORY must be a category name (a character between ` ' and `~').
326 Use `describe-categories' to see existing category names.
327 If optional fourth argument RESET is non-nil,
328 then delete CATEGORY from the category set instead of adding it.  */)
329   (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
330 {
331   bool set_value;	/* Actual value to be set in category sets.  */
332   Lisp_Object category_set;
333   int start, end;
334   int from, to;
335 
336   if (FIXNUMP (character))
337     {
338       CHECK_CHARACTER (character);
339       start = end = XFIXNAT (character);
340     }
341   else
342     {
343       CHECK_CONS (character);
344       CHECK_CHARACTER_CAR (character);
345       CHECK_CHARACTER_CDR (character);
346       start = XFIXNAT (XCAR (character));
347       end = XFIXNAT (XCDR (character));
348     }
349 
350   CHECK_CATEGORY (category);
351   table = check_category_table (table);
352 
353   if (NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
354     error ("Undefined category: %c", (int) XFIXNAT (category));
355 
356   set_value = NILP (reset);
357 
358   while (start <= end)
359     {
360       from = start, to = end;
361       category_set = char_table_ref_and_range (table, start, &from, &to);
362       if (CATEGORY_MEMBER (XFIXNAT (category), category_set) != NILP (reset))
363 	{
364 	  category_set = Fcopy_sequence (category_set);
365 	  set_category_set (category_set, XFIXNAT (category), set_value);
366 	  category_set = hash_get_category_set (table, category_set);
367 	  char_table_set_range (table, start, to, category_set);
368 	}
369       start = to + 1;
370     }
371 
372   return Qnil;
373 }
374 
375 /* Return true if there is a word boundary between two word-constituent
376    characters C1 and C2 if they appear in this order.
377    Use the macro WORD_BOUNDARY_P instead of calling this function
378    directly.  */
379 
380 bool
word_boundary_p(int c1,int c2)381 word_boundary_p (int c1, int c2)
382 {
383   Lisp_Object category_set1, category_set2;
384   Lisp_Object tail;
385   bool default_result;
386 
387   if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
388 	  CHAR_TABLE_REF (Vchar_script_table, c2)))
389     {
390       tail = Vword_separating_categories;
391       default_result = 0;
392     }
393   else
394     {
395       tail = Vword_combining_categories;
396       default_result = 1;
397     }
398 
399   category_set1 = CATEGORY_SET (c1);
400   if (NILP (category_set1))
401     return default_result;
402   category_set2 = CATEGORY_SET (c2);
403   if (NILP (category_set2))
404     return default_result;
405 
406   for (; CONSP (tail); tail = XCDR (tail))
407     {
408       Lisp_Object elt = XCAR (tail);
409 
410       if (CONSP (elt)
411 	  && (NILP (XCAR (elt))
412 	      || (CATEGORYP (XCAR (elt))
413 		  && CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set1)
414 		  && ! CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set2)))
415 	  && (NILP (XCDR (elt))
416 	      || (CATEGORYP (XCDR (elt))
417 		  && ! CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set1)
418 		  && CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set2))))
419 	return !default_result;
420     }
421   return default_result;
422 }
423 
424 
425 void
init_category_once(void)426 init_category_once (void)
427 {
428   /* This has to be done here, before we call Fmake_char_table.  */
429   DEFSYM (Qcategory_table, "category-table");
430   Fput (Qcategory_table, Qchar_table_extra_slots, make_fixnum (2));
431 
432   Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
433   /* Set a category set which contains nothing to the default.  */
434   set_char_table_defalt (Vstandard_category_table, MAKE_CATEGORY_SET);
435   Fset_char_table_extra_slot (Vstandard_category_table, make_fixnum (0),
436 			      make_nil_vector (95));
437 }
438 
439 void
syms_of_category(void)440 syms_of_category (void)
441 {
442   DEFSYM (Qcategoryp, "categoryp");
443   DEFSYM (Qcategorysetp, "categorysetp");
444   DEFSYM (Qcategory_table_p, "category-table-p");
445 
446   DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
447 	       doc: /* List of pair (cons) of categories to determine word boundary.
448 
449 Emacs treats a sequence of word constituent characters as a single
450 word (i.e. finds no word boundary between them) only if they belong to
451 the same script.  But, exceptions are allowed in the following cases.
452 
453 \(1) The case that characters are in different scripts is controlled
454 by the variable `word-combining-categories'.
455 
456 Emacs finds no word boundary between characters of different scripts
457 if they have categories matching some element of this list.
458 
459 More precisely, if an element of this list is a cons of category CAT1
460 and CAT2, and a multibyte character C1 which has CAT1 is followed by
461 C2 which has CAT2, there's no word boundary between C1 and C2.
462 
463 For instance, to tell that Han characters followed by Hiragana
464 characters can form a single word, the element `(?C . ?H)' should be
465 in this list.
466 
467 \(2) The case that character are in the same script is controlled by
468 the variable `word-separating-categories'.
469 
470 Emacs finds a word boundary between characters of the same script
471 if they have categories matching some element of this list.
472 
473 More precisely, if an element of this list is a cons of category CAT1
474 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
475 followed by C2 which has CAT2 but not CAT1, there's a word boundary
476 between C1 and C2.
477 
478 For instance, to tell that there's a word boundary between Hiragana
479 and Katakana (both are in the same script `kana'),
480 the element `(?H . ?K)' should be in this list.  */);
481 
482   Vword_combining_categories = Qnil;
483 
484   DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
485 	       doc: /* List of pair (cons) of categories to determine word boundary.
486 See the documentation of the variable `word-combining-categories'.  */);
487 
488   Vword_separating_categories = Qnil;
489 
490   defsubr (&Smake_category_set);
491   defsubr (&Sdefine_category);
492   defsubr (&Scategory_docstring);
493   defsubr (&Sget_unused_category);
494   defsubr (&Scategory_table_p);
495   defsubr (&Scategory_table);
496   defsubr (&Sstandard_category_table);
497   defsubr (&Scopy_category_table);
498   defsubr (&Smake_category_table);
499   defsubr (&Sset_category_table);
500   defsubr (&Schar_category_set);
501   defsubr (&Scategory_set_mnemonics);
502   defsubr (&Smodify_category_entry);
503 }
504