1 /* Emacs regular expression API 2 3 Copyright (C) 1985, 1989-1993, 1995, 2000-2023 Free Software 4 Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 18 19 #ifndef EMACS_REGEX_H 20 #define EMACS_REGEX_H 1 21 22 #include <stddef.h> 23 24 /* This is the structure we store register match data in. 25 Declare this before including lisp.h, since lisp.h (via thread.h) 26 uses struct re_registers. */ 27 struct re_registers 28 { 29 ptrdiff_t num_regs; 30 ptrdiff_t *start; 31 ptrdiff_t *end; 32 }; 33 34 #include "lisp.h" 35 36 /* The string or buffer being matched. 37 It is used for looking up syntax properties. 38 39 If the value is a Lisp string object, match text in that string; if 40 it's nil, match text in the current buffer; if it's t, match text 41 in a C string. 42 43 This value is effectively another parameter to re_search_2 and 44 re_match_2. No calls into Lisp or thread switches are allowed 45 before setting re_match_object and calling into the regex search 46 and match functions. These functions capture the current value of 47 re_match_object into gl_state on entry. 48 49 TODO: turn into an actual function parameter. */ 50 extern Lisp_Object re_match_object; 51 52 /* Roughly the maximum number of failure points on the stack. */ 53 extern ptrdiff_t emacs_re_max_failures; 54 55 /* Amount of memory that we can safely stack allocate. */ 56 extern ptrdiff_t emacs_re_safe_alloca; 57 58 /* This data structure represents a compiled pattern. Before calling 59 the pattern compiler, the fields 'buffer', 'allocated', 'fastmap', 60 and 'translate' can be set. After the pattern has been 61 compiled, the 're_nsub' field is available. All other fields are 62 private to the regex routines. */ 63 64 struct re_pattern_buffer 65 { 66 /* Space that holds the compiled pattern. It is declared as 67 'unsigned char *' because its elements are 68 sometimes used as array indexes. */ 69 unsigned char *buffer; 70 71 /* Number of bytes to which 'buffer' points. */ 72 ptrdiff_t allocated; 73 74 /* Number of bytes actually used in 'buffer'. */ 75 ptrdiff_t used; 76 77 /* Charset of unibyte characters at compiling time. */ 78 int charset_unibyte; 79 80 /* Pointer to a fastmap, if any, otherwise zero. re_search uses 81 the fastmap, if there is one, to skip over impossible 82 starting points for matches. */ 83 char *fastmap; 84 85 /* Either a translate table to apply to all characters before 86 comparing them, or zero for no translation. The translation 87 applies to a pattern when it is compiled and to a string 88 when it is matched. */ 89 Lisp_Object translate; 90 91 /* Number of subexpressions found by the compiler. */ 92 ptrdiff_t re_nsub; 93 94 /* True if and only if this pattern can match the empty string. 95 Well, in truth it's used only in 're_search_2', to see 96 whether or not we should use the fastmap, so we don't set 97 this absolutely perfectly; see 're_compile_fastmap'. */ 98 bool_bf can_be_null : 1; 99 100 /* If REGS_UNALLOCATED, allocate space in the 'regs' structure 101 for at least (re_nsub + 1) groups. 102 If REGS_REALLOCATE, reallocate space if necessary. 103 If REGS_FIXED, use what's there. */ 104 unsigned regs_allocated : 2; 105 106 /* Set to false when 'regex_compile' compiles a pattern; set to true 107 by 're_compile_fastmap' if it updates the fastmap. */ 108 bool_bf fastmap_accurate : 1; 109 110 /* If true, the compilation of the pattern had to look up the syntax table, 111 so the compiled pattern is valid for the current syntax table only. */ 112 bool_bf used_syntax : 1; 113 114 /* If true, multi-byte form in the regexp pattern should be 115 recognized as a multibyte character. */ 116 bool_bf multibyte : 1; 117 118 /* If true, multi-byte form in the target of match should be 119 recognized as a multibyte character. */ 120 bool_bf target_multibyte : 1; 121 }; 122 123 /* Declarations for routines. */ 124 125 /* Compile the regular expression PATTERN, with length LENGTH 126 and syntax given by the global 're_syntax_options', into the buffer 127 BUFFER. Return NULL if successful, and an error string if not. */ 128 extern const char *re_compile_pattern (const char *pattern, ptrdiff_t length, 129 bool posix_backtracking, 130 const char *whitespace_regexp, 131 struct re_pattern_buffer *buffer); 132 133 134 /* Search in the string STRING (with length LENGTH) for the pattern 135 compiled into BUFFER. Start searching at position START, for RANGE 136 characters. Return the starting position of the match, -1 for no 137 match, or -2 for an internal error. Also return register 138 information in REGS (if REGS is non-null). */ 139 extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, 140 const char *string, ptrdiff_t length, 141 ptrdiff_t start, ptrdiff_t range, 142 struct re_registers *regs); 143 144 145 /* Like 're_search', but search in the concatenation of STRING1 and 146 STRING2. Also, stop searching at index START + STOP. */ 147 extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, 148 const char *string1, ptrdiff_t length1, 149 const char *string2, ptrdiff_t length2, 150 ptrdiff_t start, ptrdiff_t range, 151 struct re_registers *regs, 152 ptrdiff_t stop); 153 154 155 /* Like 're_search_2', but return how many characters in STRING the regexp 156 in BUFFER matched, starting at position START. */ 157 extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, 158 const char *string1, ptrdiff_t length1, 159 const char *string2, ptrdiff_t length2, 160 ptrdiff_t start, struct re_registers *regs, 161 ptrdiff_t stop); 162 163 164 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 165 ENDS. Subsequent matches using BUFFER and REGS will use this memory 166 for recording register information. STARTS and ENDS must be 167 allocated with malloc, and must each be at least 'NUM_REGS * sizeof 168 (ptrdiff_t)' bytes long. 169 170 If NUM_REGS == 0, then subsequent matches should allocate their own 171 register data. 172 173 Unless this function is called, the first search or match using 174 PATTERN_BUFFER will allocate its own register data, without 175 freeing the old data. */ 176 extern void re_set_registers (struct re_pattern_buffer *buffer, 177 struct re_registers *regs, 178 ptrdiff_t num_regs, 179 ptrdiff_t *starts, ptrdiff_t *ends); 180 181 /* Character classes. */ 182 typedef enum { RECC_ERROR = 0, 183 RECC_ALNUM, RECC_ALPHA, RECC_WORD, 184 RECC_GRAPH, RECC_PRINT, 185 RECC_LOWER, RECC_UPPER, 186 RECC_PUNCT, RECC_CNTRL, 187 RECC_DIGIT, RECC_XDIGIT, 188 RECC_BLANK, RECC_SPACE, 189 RECC_MULTIBYTE, RECC_NONASCII, 190 RECC_ASCII, RECC_UNIBYTE, 191 RECC_NUM_CLASSES = RECC_UNIBYTE 192 } re_wctype_t; 193 194 extern bool re_iswctype (int ch, re_wctype_t cc); 195 extern re_wctype_t re_wctype_parse (const unsigned char **strp, 196 ptrdiff_t limit); 197 198 #endif /* EMACS_REGEX_H */