This source file includes following definitions.
- prepare_casing_context
- case_character_impl
- case_single_character
- case_character
- make_char_unibyte
- do_casify_natnum
- do_casify_multibyte_string
- ascii_casify_character
- do_casify_unibyte_string
- casify_object
- DEFUN
- DEFUN
- DEFUN
- DEFUN
- do_casify_unibyte_region
- do_casify_multibyte_region
- casify_region
- casify_pnc_region
- casify_word
- DEFUN
- DEFUN
- DEFUN
- syms_of_casefiddle
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #include <config.h>
24
25 #include "lisp.h"
26 #include "character.h"
27 #include "buffer.h"
28 #include "commands.h"
29 #include "syntax.h"
30 #include "composite.h"
31 #include "keymap.h"
32
33 #ifdef HAVE_TREE_SITTER
34 #include "treesit.h"
35 #endif
36
37 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
38
39
40 struct casing_context
41 {
42
43
44 Lisp_Object titlecase_char_table;
45
46
47
48 Lisp_Object specialcase_char_tables[3];
49
50
51 enum case_action flag;
52
53
54
55
56
57 bool inbuffer;
58
59
60 bool inword;
61
62
63 bool downcase_last;
64 };
65
66
67 static void
68 prepare_casing_context (struct casing_context *ctx,
69 enum case_action flag, bool inbuffer)
70 {
71 ctx->flag = flag;
72 ctx->inbuffer = inbuffer;
73 ctx->inword = false;
74 ctx->titlecase_char_table
75 = (flag < CASE_CAPITALIZE ? Qnil
76 : uniprop_table (Qtitlecase));
77 ctx->specialcase_char_tables[CASE_UP]
78 = (flag == CASE_DOWN ? Qnil
79 : uniprop_table (Qspecial_uppercase));
80 ctx->specialcase_char_tables[CASE_DOWN]
81 = (flag == CASE_UP ? Qnil
82 : uniprop_table (Qspecial_lowercase));
83 ctx->specialcase_char_tables[CASE_CAPITALIZE]
84 = (flag < CASE_CAPITALIZE ? Qnil
85 : uniprop_table (Qspecial_titlecase));
86
87
88 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
89 Fset_case_table (BVAR (current_buffer, downcase_table));
90
91 if (inbuffer && flag >= CASE_CAPITALIZE)
92 SETUP_BUFFER_SYNTAX_TABLE ();
93 }
94
95 struct casing_str_buf
96 {
97 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
98 unsigned char len_chars;
99 unsigned char len_bytes;
100 };
101
102
103
104
105
106
107
108 static int
109 case_character_impl (struct casing_str_buf *buf,
110 struct casing_context *ctx, int ch)
111 {
112 enum case_action flag;
113 Lisp_Object prop;
114 int cased;
115
116
117 bool was_inword = ctx->inword;
118 ctx->inword = SYNTAX (ch) == Sword &&
119 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
120
121
122 if (ctx->flag == CASE_CAPITALIZE)
123 flag = ctx->flag - was_inword;
124 else if (ctx->flag != CASE_CAPITALIZE_UP)
125 flag = ctx->flag;
126 else if (!was_inword)
127 flag = CASE_CAPITALIZE;
128 else
129 {
130 cased = ch;
131 goto done;
132 }
133
134
135 if (buf && !NILP (ctx->specialcase_char_tables[flag]))
136 {
137 prop = CHAR_TABLE_REF (ctx->specialcase_char_tables[flag], ch);
138 if (STRINGP (prop))
139 {
140 struct Lisp_String *str = XSTRING (prop);
141 if (STRING_BYTES (str) <= sizeof buf->data)
142 {
143 buf->len_chars = str->u.s.size;
144 buf->len_bytes = STRING_BYTES (str);
145 memcpy (buf->data, str->u.s.data, buf->len_bytes);
146 return 1;
147 }
148 }
149 }
150
151
152 if (flag == CASE_DOWN)
153 {
154 cased = downcase (ch);
155 ctx->downcase_last = true;
156 }
157 else
158 {
159 bool cased_is_set = false;
160 ctx->downcase_last = false;
161 if (!NILP (ctx->titlecase_char_table))
162 {
163 prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch);
164 if (CHARACTERP (prop))
165 {
166 cased = XFIXNAT (prop);
167 cased_is_set = true;
168 }
169 }
170 if (!cased_is_set)
171 cased = upcase (ch);
172 }
173
174
175 done:
176 if (!buf)
177 return cased;
178 buf->len_chars = 1;
179 buf->len_bytes = CHAR_STRING (cased, buf->data);
180 return cased != ch;
181 }
182
183
184
185
186
187
188
189
190 enum { GREEK_CAPITAL_LETTER_SIGMA = 0x03A3 };
191 enum { GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2 };
192
193
194
195
196
197
198
199 static inline int
200 case_single_character (struct casing_context *ctx, int ch)
201 {
202 return case_character_impl (NULL, ctx, ch);
203 }
204
205
206
207
208
209
210
211
212
213
214 static bool
215 case_character (struct casing_str_buf *buf, struct casing_context *ctx,
216 int ch, const unsigned char *next)
217 {
218 bool was_inword = ctx->inword;
219 bool changed = case_character_impl (buf, ctx, ch);
220
221
222
223
224 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
225 && (!next || SYNTAX (STRING_CHAR (next)) != Sword))
226 {
227 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
228 buf->len_chars = 1;
229 }
230
231 return changed;
232 }
233
234
235 static inline int
236 make_char_unibyte (int c)
237 {
238 return ASCII_CHAR_P (c) ? c : CHAR_TO_BYTE8 (c);
239 }
240
241 static Lisp_Object
242 do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
243 {
244 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
245 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
246 int ch = XFIXNAT (obj);
247
248
249
250 if (! (0 <= ch && ch <= flagbits))
251 return obj;
252
253 int flags = ch & flagbits;
254 ch = ch & ~flagbits;
255
256
257
258
259
260 bool multibyte = (ch >= 256
261 || !NILP (BVAR (current_buffer,
262 enable_multibyte_characters)));
263 if (! multibyte)
264 ch = make_char_multibyte (ch);
265 int cased = case_single_character (ctx, ch);
266 if (cased == ch)
267 return obj;
268
269 if (! multibyte)
270 cased = make_char_unibyte (cased);
271 return make_fixed_natnum (cased | flags);
272 }
273
274 static Lisp_Object
275 do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
276 {
277
278
279
280
281
282 verify (offsetof (struct casing_str_buf, data) == 0);
283
284 ptrdiff_t size = SCHARS (obj), n;
285 USE_SAFE_ALLOCA;
286 if (ckd_mul (&n, size, MAX_MULTIBYTE_LENGTH)
287 || ckd_add (&n, n, sizeof (struct casing_str_buf)))
288 n = PTRDIFF_MAX;
289 unsigned char *dst = SAFE_ALLOCA (n);
290 unsigned char *dst_end = dst + n;
291 unsigned char *o = dst;
292
293 const unsigned char *src = SDATA (obj);
294
295 for (n = 0; size; --size)
296 {
297 if (dst_end - o < sizeof (struct casing_str_buf))
298 string_overflow ();
299 int ch = string_char_advance (&src);
300 case_character ((struct casing_str_buf *) o, ctx, ch,
301 size > 1 ? src : NULL);
302 n += ((struct casing_str_buf *) o)->len_chars;
303 o += ((struct casing_str_buf *) o)->len_bytes;
304 }
305 eassert (o <= dst_end);
306 obj = make_multibyte_string ((char *) dst, n, o - dst);
307 SAFE_FREE ();
308 return obj;
309 }
310
311 static int
312 ascii_casify_character (bool downcase, int c)
313 {
314 Lisp_Object cased = CHAR_TABLE_REF (downcase?
315 uniprop_table (Qlowercase) :
316 uniprop_table (Quppercase),
317 c);
318 return FIXNATP (cased) ? XFIXNAT (cased) : c;
319 }
320
321 static Lisp_Object
322 do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
323 {
324 ptrdiff_t i, size = SCHARS (obj);
325 int ch, cased;
326
327 obj = Fcopy_sequence (obj);
328 for (i = 0; i < size; i++)
329 {
330 ch = make_char_multibyte (SREF (obj, i));
331 cased = case_single_character (ctx, ch);
332 if (ch == cased)
333 continue;
334
335
336
337 if (ASCII_CHAR_P (ch) && !SINGLE_BYTE_CHAR_P (cased))
338 cased = ascii_casify_character (ctx->downcase_last, ch);
339 SSET (obj, i, make_char_unibyte (cased));
340 }
341 return obj;
342 }
343
344 static Lisp_Object
345 casify_object (enum case_action flag, Lisp_Object obj)
346 {
347 struct casing_context ctx;
348 prepare_casing_context (&ctx, flag, false);
349
350 if (FIXNATP (obj))
351 return do_casify_natnum (&ctx, obj);
352 else if (!STRINGP (obj))
353 wrong_type_argument (Qchar_or_string_p, obj);
354 else if (!SCHARS (obj))
355 return obj;
356 else if (STRING_MULTIBYTE (obj))
357 return do_casify_multibyte_string (&ctx, obj);
358 else
359 return do_casify_unibyte_string (&ctx, obj);
360 }
361
362 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
363 doc:
364
365
366
367
368
369
370
371 )
372 (Lisp_Object obj)
373 {
374 return casify_object (CASE_UP, obj);
375 }
376
377 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
378 doc:
379
380
381
382
383
384
385
386
387
388 )
389 (Lisp_Object obj)
390 {
391 return casify_object (CASE_DOWN, obj);
392 }
393
394 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
395 doc:
396
397
398
399
400
401
402
403
404 )
405 (Lisp_Object obj)
406 {
407 return casify_object (CASE_CAPITALIZE, obj);
408 }
409
410
411
412 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
413 doc:
414
415
416
417
418
419
420
421
422 )
423 (Lisp_Object obj)
424 {
425 return casify_object (CASE_CAPITALIZE_UP, obj);
426 }
427
428
429
430
431
432
433
434
435
436 static ptrdiff_t
437 do_casify_unibyte_region (struct casing_context *ctx,
438 ptrdiff_t *startp, ptrdiff_t *endp)
439 {
440 ptrdiff_t first = -1, last = -1;
441 ptrdiff_t end = *endp;
442
443 for (ptrdiff_t pos = *startp; pos < end; ++pos)
444 {
445 int ch = make_char_multibyte (FETCH_BYTE (pos));
446 int cased = case_single_character (ctx, ch);
447 if (cased == ch)
448 continue;
449
450 last = pos + 1;
451 if (first < 0)
452 first = pos;
453
454 FETCH_BYTE (pos) = make_char_unibyte (cased);
455 }
456
457 *startp = first;
458 *endp = last;
459 return 0;
460 }
461
462
463
464
465
466
467
468 static ptrdiff_t
469 do_casify_multibyte_region (struct casing_context *ctx,
470 ptrdiff_t *startp, ptrdiff_t *endp)
471 {
472 ptrdiff_t first = -1, last = -1;
473 ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos;
474 ptrdiff_t opoint = PT, added = 0;
475
476 for (; size; --size)
477 {
478 int len, ch = string_char_and_length (BYTE_POS_ADDR (pos_byte), &len);
479 struct casing_str_buf buf;
480 if (!case_character (&buf, ctx, ch,
481 size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL))
482 {
483 pos_byte += len;
484 ++pos;
485 continue;
486 }
487
488 last = pos + buf.len_chars;
489 if (first < 0)
490 first = pos;
491
492 if (buf.len_chars == 1 && buf.len_bytes == len)
493 memcpy (BYTE_POS_ADDR (pos_byte), buf.data, len);
494 else
495 {
496
497
498 replace_range_2 (pos, pos_byte, pos + 1, pos_byte + len,
499 (const char *) buf.data, buf.len_chars,
500 buf.len_bytes,
501 0);
502 added += (ptrdiff_t) buf.len_chars - 1;
503 if (opoint > pos)
504 opoint += (ptrdiff_t) buf.len_chars - 1;
505 }
506
507 pos_byte += buf.len_bytes;
508 pos += buf.len_chars;
509 }
510
511 if (PT != opoint)
512 TEMP_SET_PT_BOTH (opoint, CHAR_TO_BYTE (opoint));
513
514 *startp = first;
515 *endp = last;
516 return added;
517 }
518
519
520
521
522 static ptrdiff_t
523 casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
524 {
525 ptrdiff_t added;
526 struct casing_context ctx;
527
528 validate_region (&b, &e);
529 ptrdiff_t start = XFIXNAT (b);
530 ptrdiff_t end = XFIXNAT (e);
531 if (start == end)
532
533 return end;
534 modify_text (start, end);
535 prepare_casing_context (&ctx, flag, true);
536
537 #ifdef HAVE_TREE_SITTER
538 ptrdiff_t start_byte = CHAR_TO_BYTE (start);
539 ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
540 #endif
541
542 ptrdiff_t orig_end = end;
543 record_delete (start, make_buffer_string (start, end, true), false);
544 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
545 {
546 record_insert (start, end - start);
547 added = do_casify_unibyte_region (&ctx, &start, &end);
548 }
549 else
550 {
551 ptrdiff_t len = end - start, ostart = start;
552 added = do_casify_multibyte_region (&ctx, &start, &end);
553 record_insert (ostart, len + added);
554 }
555
556 if (start >= 0)
557 {
558 signal_after_change (start, end - start - added, end - start);
559 update_compositions (start, end, CHECK_ALL);
560 }
561 #ifdef HAVE_TREE_SITTER
562 treesit_record_change (start_byte, old_end_byte,
563 CHAR_TO_BYTE (orig_end + added));
564 #endif
565
566 return orig_end + added;
567 }
568
569
570
571
572
573
574 static Lisp_Object
575 casify_pnc_region (enum case_action flag, Lisp_Object beg, Lisp_Object end,
576 Lisp_Object region_noncontiguous_p)
577 {
578 if (!NILP (region_noncontiguous_p))
579 {
580 Lisp_Object bounds = call1 (Vregion_extract_function, Qbounds);
581 FOR_EACH_TAIL (bounds)
582 {
583 CHECK_CONS (XCAR (bounds));
584 casify_region (flag, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
585 }
586 CHECK_LIST_END (bounds, bounds);
587 }
588 else
589 casify_region (flag, beg, end);
590
591 return Qnil;
592 }
593
594 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
595 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
596 doc:
597
598
599
600 )
601 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
602 {
603 return casify_pnc_region (CASE_UP, beg, end, region_noncontiguous_p);
604 }
605
606 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
607 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
608 doc:
609
610
611 )
612 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
613 {
614 return casify_pnc_region (CASE_DOWN, beg, end, region_noncontiguous_p);
615 }
616
617 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 3,
618 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
619 doc:
620
621
622
623 )
624 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
625 {
626 return casify_pnc_region (CASE_CAPITALIZE, beg, end, region_noncontiguous_p);
627 }
628
629
630
631 DEFUN ("upcase-initials-region", Fupcase_initials_region,
632 Supcase_initials_region, 2, 3,
633 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
634 doc:
635
636
637
638 )
639 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
640 {
641 return casify_pnc_region (CASE_CAPITALIZE_UP, beg, end,
642 region_noncontiguous_p);
643 }
644
645 static Lisp_Object
646 casify_word (enum case_action flag, Lisp_Object arg)
647 {
648 CHECK_FIXNUM (arg);
649 ptrdiff_t farend = scan_words (PT, XFIXNUM (arg));
650 if (!farend)
651 farend = XFIXNUM (arg) <= 0 ? BEGV : ZV;
652 SET_PT (casify_region (flag, make_fixnum (PT), make_fixnum (farend)));
653 return Qnil;
654 }
655
656 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
657 doc:
658
659
660
661
662
663 )
664 (Lisp_Object arg)
665 {
666 return casify_word (CASE_UP, arg);
667 }
668
669 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
670 doc:
671
672
673
674
675 )
676 (Lisp_Object arg)
677 {
678 return casify_word (CASE_DOWN, arg);
679 }
680
681 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
682 doc:
683
684
685
686
687
688
689
690 )
691 (Lisp_Object arg)
692 {
693 return casify_word (CASE_CAPITALIZE, arg);
694 }
695
696 void
697 syms_of_casefiddle (void)
698 {
699 DEFSYM (Qbounds, "bounds");
700 DEFSYM (Qidentity, "identity");
701 DEFSYM (Qtitlecase, "titlecase");
702 DEFSYM (Qlowercase, "lowercase");
703 DEFSYM (Quppercase, "uppercase");
704 DEFSYM (Qspecial_uppercase, "special-uppercase");
705 DEFSYM (Qspecial_lowercase, "special-lowercase");
706 DEFSYM (Qspecial_titlecase, "special-titlecase");
707
708 DEFVAR_LISP ("region-extract-function", Vregion_extract_function,
709 doc:
710
711
712
713
714
715
716
717
718
719
720 );
721 Vregion_extract_function = Qnil;
722
723 defsubr (&Supcase);
724 defsubr (&Sdowncase);
725 defsubr (&Scapitalize);
726 defsubr (&Supcase_initials);
727 defsubr (&Supcase_region);
728 defsubr (&Sdowncase_region);
729 defsubr (&Scapitalize_region);
730 defsubr (&Supcase_initials_region);
731 defsubr (&Supcase_word);
732 defsubr (&Sdowncase_word);
733 defsubr (&Scapitalize_word);
734 }