1 /* GNU Emacs routines to deal with category tables.
2
3 Copyright (C) 1998, 2001-2021 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8 Copyright (C) 2003
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
11
12 This file is part of GNU Emacs.
13
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or (at
17 your option) any later version.
18
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
26
27
28 /* Here we handle three objects: category, category set, and category
29 table. Read comments in the file category.h to understand them. */
30
31 #include <config.h>
32
33 #include "lisp.h"
34 #include "character.h"
35 #include "buffer.h"
36 #include "category.h"
37
38 /* This setter is used only in this file, so it can be private. */
39 static void
bset_category_table(struct buffer * b,Lisp_Object val)40 bset_category_table (struct buffer *b, Lisp_Object val)
41 {
42 b->category_table_ = val;
43 }
44
45
46 /* Category set staff. */
47
48 static Lisp_Object
hash_get_category_set(Lisp_Object table,Lisp_Object category_set)49 hash_get_category_set (Lisp_Object table, Lisp_Object category_set)
50 {
51 if (NILP (XCHAR_TABLE (table)->extras[1]))
52 set_char_table_extras
53 (table, 1,
54 make_hash_table (hashtest_equal, DEFAULT_HASH_SIZE,
55 DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD,
56 Qnil, false));
57 struct Lisp_Hash_Table *h = XHASH_TABLE (XCHAR_TABLE (table)->extras[1]);
58 Lisp_Object hash;
59 ptrdiff_t i = hash_lookup (h, category_set, &hash);
60 if (i >= 0)
61 return HASH_KEY (h, i);
62 hash_put (h, category_set, Qnil, hash);
63 return category_set;
64 }
65
66 /* Make CATEGORY_SET include (if VAL) or exclude (if !VAL) CATEGORY. */
67
68 static void
set_category_set(Lisp_Object category_set,EMACS_INT category,bool val)69 set_category_set (Lisp_Object category_set, EMACS_INT category, bool val)
70 {
71 bool_vector_set (category_set, category, val);
72 }
73
74 DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
75 doc: /* Return a newly created category-set which contains CATEGORIES.
76 CATEGORIES is a string of category mnemonics.
77 The value is a bool-vector which has t at the indices corresponding to
78 those categories. */)
79 (Lisp_Object categories)
80 {
81 Lisp_Object val;
82 ptrdiff_t len;
83
84 CHECK_STRING (categories);
85 val = MAKE_CATEGORY_SET;
86
87 if (STRING_MULTIBYTE (categories))
88 error ("Multibyte string in `make-category-set'");
89
90 len = SCHARS (categories);
91 while (--len >= 0)
92 {
93 unsigned char cat = SREF (categories, len);
94 Lisp_Object category = make_fixnum (cat);
95
96 CHECK_CATEGORY (category);
97 set_category_set (val, cat, 1);
98 }
99 return val;
100 }
101
102
103 /* Category staff. */
104
105 static Lisp_Object check_category_table (Lisp_Object table);
106
107 DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
108 doc: /* Define CATEGORY as a category which is described by DOCSTRING.
109 CATEGORY should be an ASCII printing character in the range ` ' to `~'.
110 DOCSTRING is the documentation string of the category. The first line
111 should be a terse text (preferably less than 16 characters),
112 and the rest lines should be the full description.
113 The category is defined only in category table TABLE, which defaults to
114 the current buffer's category table. */)
115 (Lisp_Object category, Lisp_Object docstring, Lisp_Object table)
116 {
117 CHECK_CATEGORY (category);
118 CHECK_STRING (docstring);
119 table = check_category_table (table);
120
121 if (!NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
122 error ("Category `%c' is already defined", (int) XFIXNAT (category));
123 if (!NILP (Vpurify_flag))
124 docstring = Fpurecopy (docstring);
125 SET_CATEGORY_DOCSTRING (table, XFIXNAT (category), docstring);
126
127 return Qnil;
128 }
129
130 DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
131 doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
132 TABLE should be a category table and defaults to the current buffer's
133 category table. */)
134 (Lisp_Object category, Lisp_Object table)
135 {
136 CHECK_CATEGORY (category);
137 table = check_category_table (table);
138
139 return CATEGORY_DOCSTRING (table, XFIXNAT (category));
140 }
141
142 DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
143 0, 1, 0,
144 doc: /* Return a category which is not yet defined in TABLE.
145 If no category remains available, return nil.
146 The optional argument TABLE specifies which category table to modify;
147 it defaults to the current buffer's category table. */)
148 (Lisp_Object table)
149 {
150 int i;
151
152 table = check_category_table (table);
153
154 for (i = ' '; i <= '~'; i++)
155 if (NILP (CATEGORY_DOCSTRING (table, i)))
156 return make_fixnum (i);
157
158 return Qnil;
159 }
160
161
162 /* Category-table staff. */
163
164 DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
165 doc: /* Return t if ARG is a category table. */)
166 (Lisp_Object arg)
167 {
168 if (CHAR_TABLE_P (arg)
169 && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
170 return Qt;
171 return Qnil;
172 }
173
174 /* If TABLE is nil, return the current category table. If TABLE is
175 not nil, check the validity of TABLE as a category table. If
176 valid, return TABLE itself, but if not valid, signal an error of
177 wrong-type-argument. */
178
179 static Lisp_Object
check_category_table(Lisp_Object table)180 check_category_table (Lisp_Object table)
181 {
182 if (NILP (table))
183 return BVAR (current_buffer, category_table);
184 CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
185 return table;
186 }
187
188 DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
189 doc: /* Return the current category table.
190 This is the one specified by the current buffer. */)
191 (void)
192 {
193 return BVAR (current_buffer, category_table);
194 }
195
196 DEFUN ("standard-category-table", Fstandard_category_table,
197 Sstandard_category_table, 0, 0, 0,
198 doc: /* Return the standard category table.
199 This is the one used for new buffers. */)
200 (void)
201 {
202 return Vstandard_category_table;
203 }
204
205
206 static void
copy_category_entry(Lisp_Object table,Lisp_Object c,Lisp_Object val)207 copy_category_entry (Lisp_Object table, Lisp_Object c, Lisp_Object val)
208 {
209 val = Fcopy_sequence (val);
210 if (CONSP (c))
211 char_table_set_range (table, XFIXNUM (XCAR (c)), XFIXNUM (XCDR (c)), val);
212 else
213 char_table_set (table, XFIXNUM (c), val);
214 }
215
216 /* Return a copy of category table TABLE. We can't simply use the
217 function copy-sequence because no contents should be shared between
218 the original and the copy. This function is called recursively by
219 binding TABLE to a sub char table. */
220
221 static Lisp_Object
copy_category_table(Lisp_Object table)222 copy_category_table (Lisp_Object table)
223 {
224 table = copy_char_table (table);
225
226 if (! NILP (XCHAR_TABLE (table)->defalt))
227 set_char_table_defalt (table,
228 Fcopy_sequence (XCHAR_TABLE (table)->defalt));
229 set_char_table_extras
230 (table, 0, Fcopy_sequence (XCHAR_TABLE (table)->extras[0]));
231 map_char_table (copy_category_entry, Qnil, table, table);
232
233 return table;
234 }
235
236 DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
237 0, 1, 0,
238 doc: /* Construct a new category table and return it.
239 It is a copy of the TABLE, which defaults to the standard category table. */)
240 (Lisp_Object table)
241 {
242 if (!NILP (table))
243 check_category_table (table);
244 else
245 table = Vstandard_category_table;
246
247 return copy_category_table (table);
248 }
249
250 DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
251 0, 0, 0,
252 doc: /* Construct a new and empty category table and return it. */)
253 (void)
254 {
255 Lisp_Object val;
256 int i;
257
258 val = Fmake_char_table (Qcategory_table, Qnil);
259 set_char_table_defalt (val, MAKE_CATEGORY_SET);
260 for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++)
261 set_char_table_contents (val, i, MAKE_CATEGORY_SET);
262 Fset_char_table_extra_slot (val, make_fixnum (0), make_nil_vector (95));
263 return val;
264 }
265
266 DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
267 doc: /* Specify TABLE as the category table for the current buffer.
268 Return TABLE. */)
269 (Lisp_Object table)
270 {
271 int idx;
272 table = check_category_table (table);
273 bset_category_table (current_buffer, table);
274 /* Indicate that this buffer now has a specified category table. */
275 idx = PER_BUFFER_VAR_IDX (category_table);
276 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
277 return table;
278 }
279
280
281 Lisp_Object
char_category_set(int c)282 char_category_set (int c)
283 {
284 return CHAR_TABLE_REF (BVAR (current_buffer, category_table), c);
285 }
286
287 DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
288 doc: /* Return the category set of CHAR.
289 usage: (char-category-set CHAR) */)
290 (Lisp_Object ch)
291 {
292 CHECK_CHARACTER (ch);
293 return CATEGORY_SET (XFIXNAT (ch));
294 }
295
296 DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
297 Scategory_set_mnemonics, 1, 1, 0,
298 doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
299 CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
300 that are indexes where t occurs in the bool-vector.
301 The return value is a string containing those same categories. */)
302 (Lisp_Object category_set)
303 {
304 int i, j;
305 char str[96];
306
307 CHECK_CATEGORY_SET (category_set);
308
309 j = 0;
310 for (i = 32; i < 127; i++)
311 if (CATEGORY_MEMBER (i, category_set))
312 str[j++] = i;
313 str[j] = '\0';
314
315 return build_string (str);
316 }
317
318 DEFUN ("modify-category-entry", Fmodify_category_entry,
319 Smodify_category_entry, 2, 4, 0,
320 doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
321 The category is changed only for table TABLE, which defaults to
322 the current buffer's category table.
323 CHARACTER can be either a single character or a cons representing the
324 lower and upper ends of an inclusive character range to modify.
325 CATEGORY must be a category name (a character between ` ' and `~').
326 Use `describe-categories' to see existing category names.
327 If optional fourth argument RESET is non-nil,
328 then delete CATEGORY from the category set instead of adding it. */)
329 (Lisp_Object character, Lisp_Object category, Lisp_Object table, Lisp_Object reset)
330 {
331 bool set_value; /* Actual value to be set in category sets. */
332 Lisp_Object category_set;
333 int start, end;
334 int from, to;
335
336 if (FIXNUMP (character))
337 {
338 CHECK_CHARACTER (character);
339 start = end = XFIXNAT (character);
340 }
341 else
342 {
343 CHECK_CONS (character);
344 CHECK_CHARACTER_CAR (character);
345 CHECK_CHARACTER_CDR (character);
346 start = XFIXNAT (XCAR (character));
347 end = XFIXNAT (XCDR (character));
348 }
349
350 CHECK_CATEGORY (category);
351 table = check_category_table (table);
352
353 if (NILP (CATEGORY_DOCSTRING (table, XFIXNAT (category))))
354 error ("Undefined category: %c", (int) XFIXNAT (category));
355
356 set_value = NILP (reset);
357
358 while (start <= end)
359 {
360 from = start, to = end;
361 category_set = char_table_ref_and_range (table, start, &from, &to);
362 if (CATEGORY_MEMBER (XFIXNAT (category), category_set) != NILP (reset))
363 {
364 category_set = Fcopy_sequence (category_set);
365 set_category_set (category_set, XFIXNAT (category), set_value);
366 category_set = hash_get_category_set (table, category_set);
367 char_table_set_range (table, start, to, category_set);
368 }
369 start = to + 1;
370 }
371
372 return Qnil;
373 }
374
375 /* Return true if there is a word boundary between two word-constituent
376 characters C1 and C2 if they appear in this order.
377 Use the macro WORD_BOUNDARY_P instead of calling this function
378 directly. */
379
380 bool
word_boundary_p(int c1,int c2)381 word_boundary_p (int c1, int c2)
382 {
383 Lisp_Object category_set1, category_set2;
384 Lisp_Object tail;
385 bool default_result;
386
387 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
388 CHAR_TABLE_REF (Vchar_script_table, c2)))
389 {
390 tail = Vword_separating_categories;
391 default_result = 0;
392 }
393 else
394 {
395 tail = Vword_combining_categories;
396 default_result = 1;
397 }
398
399 category_set1 = CATEGORY_SET (c1);
400 if (NILP (category_set1))
401 return default_result;
402 category_set2 = CATEGORY_SET (c2);
403 if (NILP (category_set2))
404 return default_result;
405
406 for (; CONSP (tail); tail = XCDR (tail))
407 {
408 Lisp_Object elt = XCAR (tail);
409
410 if (CONSP (elt)
411 && (NILP (XCAR (elt))
412 || (CATEGORYP (XCAR (elt))
413 && CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set1)
414 && ! CATEGORY_MEMBER (XFIXNAT (XCAR (elt)), category_set2)))
415 && (NILP (XCDR (elt))
416 || (CATEGORYP (XCDR (elt))
417 && ! CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set1)
418 && CATEGORY_MEMBER (XFIXNAT (XCDR (elt)), category_set2))))
419 return !default_result;
420 }
421 return default_result;
422 }
423
424
425 void
init_category_once(void)426 init_category_once (void)
427 {
428 /* This has to be done here, before we call Fmake_char_table. */
429 DEFSYM (Qcategory_table, "category-table");
430 Fput (Qcategory_table, Qchar_table_extra_slots, make_fixnum (2));
431
432 Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
433 /* Set a category set which contains nothing to the default. */
434 set_char_table_defalt (Vstandard_category_table, MAKE_CATEGORY_SET);
435 Fset_char_table_extra_slot (Vstandard_category_table, make_fixnum (0),
436 make_nil_vector (95));
437 }
438
439 void
syms_of_category(void)440 syms_of_category (void)
441 {
442 DEFSYM (Qcategoryp, "categoryp");
443 DEFSYM (Qcategorysetp, "categorysetp");
444 DEFSYM (Qcategory_table_p, "category-table-p");
445
446 DEFVAR_LISP ("word-combining-categories", Vword_combining_categories,
447 doc: /* List of pair (cons) of categories to determine word boundary.
448
449 Emacs treats a sequence of word constituent characters as a single
450 word (i.e. finds no word boundary between them) only if they belong to
451 the same script. But, exceptions are allowed in the following cases.
452
453 \(1) The case that characters are in different scripts is controlled
454 by the variable `word-combining-categories'.
455
456 Emacs finds no word boundary between characters of different scripts
457 if they have categories matching some element of this list.
458
459 More precisely, if an element of this list is a cons of category CAT1
460 and CAT2, and a multibyte character C1 which has CAT1 is followed by
461 C2 which has CAT2, there's no word boundary between C1 and C2.
462
463 For instance, to tell that Han characters followed by Hiragana
464 characters can form a single word, the element `(?C . ?H)' should be
465 in this list.
466
467 \(2) The case that character are in the same script is controlled by
468 the variable `word-separating-categories'.
469
470 Emacs finds a word boundary between characters of the same script
471 if they have categories matching some element of this list.
472
473 More precisely, if an element of this list is a cons of category CAT1
474 and CAT2, and a multibyte character C1 which has CAT1 but not CAT2 is
475 followed by C2 which has CAT2 but not CAT1, there's a word boundary
476 between C1 and C2.
477
478 For instance, to tell that there's a word boundary between Hiragana
479 and Katakana (both are in the same script `kana'),
480 the element `(?H . ?K)' should be in this list. */);
481
482 Vword_combining_categories = Qnil;
483
484 DEFVAR_LISP ("word-separating-categories", Vword_separating_categories,
485 doc: /* List of pair (cons) of categories to determine word boundary.
486 See the documentation of the variable `word-combining-categories'. */);
487
488 Vword_separating_categories = Qnil;
489
490 defsubr (&Smake_category_set);
491 defsubr (&Sdefine_category);
492 defsubr (&Scategory_docstring);
493 defsubr (&Sget_unused_category);
494 defsubr (&Scategory_table_p);
495 defsubr (&Scategory_table);
496 defsubr (&Sstandard_category_table);
497 defsubr (&Scopy_category_table);
498 defsubr (&Smake_category_table);
499 defsubr (&Sset_category_table);
500 defsubr (&Schar_category_set);
501 defsubr (&Scategory_set_mnemonics);
502 defsubr (&Smodify_category_entry);
503 }
504