root/lib-src/etags.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. memcpyz
  2. streq
  3. strcaseeq
  4. strneq
  5. strncaseeq
  6. notinname
  7. begtoken
  8. intoken
  9. endtoken
  10. print_language_names
  11. print_version
  12. print_help
  13. main
  14. cleanup_tags_file
  15. get_compressor_from_suffix
  16. get_language_from_langname
  17. get_language_from_interpreter
  18. get_language_from_filename
  19. process_file_name
  20. process_file
  21. reset_input
  22. find_entries
  23. make_tag
  24. pfnote
  25. push_node
  26. pop_node
  27. free_tree
  28. free_fdesc
  29. add_node
  30. invalidate_nodes
  31. number_len
  32. total_size_of_entries
  33. put_entry
  34. put_entries
  35. hash
  36. in_word_set
  37. C_symtype
  38. pushclass_above
  39. popclass_above
  40. write_classname
  41. consider_token
  42. make_C_tag
  43. perhaps_more_input
  44. C_entries
  45. default_C_entries
  46. plain_C_entries
  47. Cplusplus_entries
  48. Cjava_entries
  49. Cstar_entries
  50. Yacc_entries
  51. just_read_file
  52. F_takeprec
  53. F_getit
  54. Fortran_functions
  55. Go_functions
  56. Ada_getit
  57. Ada_funcs
  58. Asm_labels
  59. Perl_functions
  60. Python_functions
  61. Ruby_functions
  62. Rust_entries
  63. PHP_functions
  64. Cobol_paragraphs
  65. Makefile_targets
  66. Pascal_functions
  67. L_getit
  68. Lisp_functions
  69. Lua_functions
  70. PS_functions
  71. Forth_words
  72. Scheme_functions
  73. TeX_commands
  74. TEX_decode_env
  75. Texinfo_nodes
  76. HTML_labels
  77. Prolog_functions
  78. prolog_skip_comment
  79. prolog_pr
  80. prolog_atom
  81. test_objc_is_mercury
  82. Mercury_functions
  83. mercury_skip_comment
  84. mercury_decl
  85. mercury_pr
  86. Erlang_functions
  87. erlang_func
  88. erlang_attribute
  89. erlang_atom
  90. scan_separators
  91. analyze_regex
  92. add_regex
  93. substitute
  94. free_regexps
  95. regex_tag_multiline
  96. nocase_tail
  97. get_tag
  98. get_lispy_tag
  99. readline_internal
  100. readline
  101. savestr
  102. savenstr
  103. skip_spaces
  104. skip_non_spaces
  105. skip_name
  106. fatal
  107. pfatal
  108. suggest_asking_for_help
  109. error
  110. verror
  111. concat
  112. etags_getcwd
  113. etags_mktmp
  114. escape_shell_arg_string
  115. do_move_file
  116. relative_filename
  117. absolute_filename
  118. absolute_dirname
  119. filename_is_absolute
  120. canonicalize_filename
  121. linebuffer_init
  122. linebuffer_setlen
  123. memory_full
  124. xmalloc
  125. xnmalloc
  126. xnrealloc

     1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
     2 
     3 Copyright (C) 1984 The Regents of the University of California
     4 
     5 Redistribution and use in source and binary forms, with or without
     6 modification, are permitted provided that the following conditions are
     7 met:
     8 1. Redistributions of source code must retain the above copyright
     9    notice, this list of conditions and the following disclaimer.
    10 2. Redistributions in binary form must reproduce the above copyright
    11    notice, this list of conditions and the following disclaimer in the
    12    documentation and/or other materials provided with the
    13    distribution.
    14 3. Neither the name of the University nor the names of its
    15    contributors may be used to endorse or promote products derived
    16    from this software without specific prior written permission.
    17 
    18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
    19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
    20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
    22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
    25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
    27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
    28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29 
    30 
    31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2023 Free Software
    32 Foundation, Inc.
    33 
    34 This file is not considered part of GNU Emacs.
    35 
    36 This program is free software: you can redistribute it and/or modify
    37 it under the terms of the GNU General Public License as published by
    38 the Free Software Foundation, either version 3 of the License, or (at
    39 your option) any later version.
    40 
    41 This program is distributed in the hope that it will be useful,
    42 but WITHOUT ANY WARRANTY; without even the implied warranty of
    43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    44 GNU General Public License for more details.
    45 
    46 You should have received a copy of the GNU General Public License
    47 along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
    48 
    49 
    50 /* NB To comply with the above BSD license, copyright information is
    51 reproduced in etc/ETAGS.README.  That file should be updated when the
    52 above notices are.
    53 
    54 To the best of our knowledge, this code was originally based on the
    55 ctags.c distributed with BSD4.2, which was copyrighted by the
    56 University of California, as described above. */
    57 
    58 
    59 /*
    60  * Authors:
    61  * 1983 Ctags originally by Ken Arnold.
    62  * 1984 Fortran added by Jim Kleckner.
    63  * 1984 Ed Pelegri-Llopart added C typedefs.
    64  * 1985 Emacs TAGS format by Richard Stallman.
    65  * 1989 Sam Kendall added C++.
    66  * 1992 Joseph B. Wells improved C and C++ parsing.
    67  * 1993 Francesco Potortì reorganized C and C++.
    68  * 1994 Line-by-line regexp tags by Tom Tromey.
    69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
    70  * 2002 #line directives by Francesco Potortì.
    71  * Francesco Potortì maintained and improved it for many years
    72    starting in 1993.
    73  */
    74 
    75 /*
    76  * If you want to add support for a new language, start by looking at the LUA
    77  * language, which is the simplest.  Alternatively, consider distributing etags
    78  * together with a configuration file containing regexp definitions for etags.
    79  */
    80 
    81 #ifdef DEBUG
    82 #  undef DEBUG
    83 #  define DEBUG true
    84 #else
    85 #  define DEBUG false
    86 #endif
    87 
    88 #include <config.h>
    89 
    90 #ifdef MSDOS
    91 # undef MSDOS
    92 # define MSDOS true
    93 # include <sys/param.h>
    94 #else
    95 # define MSDOS false
    96 #endif /* MSDOS */
    97 
    98 #ifdef WINDOWSNT
    99 # include <direct.h>
   100 # undef HAVE_NTGUI
   101 # undef  DOS_NT
   102 # define DOS_NT
   103 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h.  */
   104 # define O_CLOEXEC O_NOINHERIT
   105 #endif /* WINDOWSNT */
   106 
   107 #include <attribute.h>
   108 #include <inttypes.h>
   109 #include <limits.h>
   110 #include <unistd.h>
   111 #include <stdarg.h>
   112 #include <stdlib.h>
   113 #include <string.h>
   114 #include <sysstdio.h>
   115 #include <errno.h>
   116 #include <fcntl.h>
   117 #include <binary-io.h>
   118 #include <intprops.h>
   119 #include <unlocked-io.h>
   120 #include <verify.h>
   121 #include <c-ctype.h>
   122 #include <c-strcase.h>
   123 
   124 #include <assert.h>
   125 #include <getopt.h>
   126 #include <regex.h>
   127 
   128 /* Define CTAGS to make the program "ctags" compatible with the usual one.
   129  Leave it undefined to make the program "etags", which makes emacs-style
   130  tag tables and tags typedefs, #defines and struct/union/enum by default. */
   131 #ifdef CTAGS
   132 # undef  CTAGS
   133 # define CTAGS true
   134 #else
   135 # define CTAGS false
   136 #endif
   137 
   138 /* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
   139    Mercury from Objective C, which have same file extensions .m
   140    See comments before function test_objc_is_mercury for details.  */
   141 #ifndef  MERCURY_HEURISTICS_RATIO
   142 # define MERCURY_HEURISTICS_RATIO 0.5
   143 #endif
   144 
   145 /* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
   146 static void
   147 memcpyz (void *dest, void const *src, ptrdiff_t len)
   148 {
   149   char *e = mempcpy (dest, src, len);
   150   *e = '\0';
   151 }
   152 
   153 static bool
   154 streq (char const *s, char const *t)
   155 {
   156   return strcmp (s, t) == 0;
   157 }
   158 
   159 static bool
   160 strcaseeq (char const *s, char const *t)
   161 {
   162   return c_strcasecmp (s, t) == 0;
   163 }
   164 
   165 static bool
   166 strneq (char const *s, char const *t, size_t n)
   167 {
   168   return strncmp (s, t, n) == 0;
   169 }
   170 
   171 static bool
   172 strncaseeq (char const *s, char const *t, size_t n)
   173 {
   174   return c_strncasecmp (s, t, n) == 0;
   175 }
   176 
   177 /* C is not in a name.  */
   178 static bool
   179 notinname (unsigned char c)
   180 {
   181   /* Look at make_tag before modifying!  */
   182   static bool const table[UCHAR_MAX + 1] = {
   183     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
   184     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
   185   };
   186   return table[c];
   187 }
   188 
   189 /* C can start a token.  */
   190 static bool
   191 begtoken (unsigned char c)
   192 {
   193   static bool const table[UCHAR_MAX + 1] = {
   194     ['$']=1, ['@']=1,
   195     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
   196     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
   197     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
   198     ['Y']=1, ['Z']=1,
   199     ['_']=1,
   200     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
   201     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
   202     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
   203     ['y']=1, ['z']=1,
   204     ['~']=1
   205   };
   206   return table[c];
   207 }
   208 
   209 /* C can be in the middle of a token.  */
   210 static bool
   211 intoken (unsigned char c)
   212 {
   213   static bool const table[UCHAR_MAX + 1] = {
   214     ['$']=1,
   215     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
   216     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
   217     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
   218     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
   219     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
   220     ['Y']=1, ['Z']=1,
   221     ['_']=1,
   222     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
   223     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
   224     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
   225     ['y']=1, ['z']=1
   226   };
   227   return table[c];
   228 }
   229 
   230 /* C can end a token.  */
   231 static bool
   232 endtoken (unsigned char c)
   233 {
   234   static bool const table[UCHAR_MAX + 1] = {
   235     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
   236     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
   237     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
   238     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
   239     ['{']=1, ['|']=1, ['}']=1, ['~']=1
   240   };
   241   return table[c];
   242 }
   243 
   244 /*
   245  *      xnew, xrnew -- allocate, reallocate storage
   246  *
   247  * SYNOPSIS:    Type *xnew (ptrdiff_t n, Type);
   248  *              void xrnew (OldPointer, ptrdiff_t n, int multiplier);
   249  */
   250 #define xnew(n, Type) ((Type *) xnmalloc (n, sizeof (Type)))
   251 #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op)))
   252 
   253 typedef void Lang_function (FILE *);
   254 
   255 typedef struct
   256 {
   257   const char *suffix;           /* file name suffix for this compressor */
   258   const char *command;          /* takes one arg and decompresses to stdout */
   259 } compressor;
   260 
   261 typedef struct
   262 {
   263   const char *name;             /* language name */
   264   const char *help;             /* detailed help for the language */
   265   Lang_function *function;      /* parse function */
   266   const char **suffixes;        /* name suffixes of this language's files */
   267   const char **filenames;       /* names of this language's files */
   268   const char **interpreters;    /* interpreters for this language */
   269   bool metasource;              /* source used to generate other sources */
   270 } language;
   271 
   272 typedef struct fdesc
   273 {
   274   struct fdesc *next;           /* for the linked list */
   275   char *infname;                /* uncompressed input file name */
   276   char *infabsname;             /* absolute uncompressed input file name */
   277   char *infabsdir;              /* absolute dir of input file */
   278   char *taggedfname;            /* file name to write in tagfile */
   279   language *lang;               /* language of file */
   280   char *prop;                   /* file properties to write in tagfile */
   281   bool usecharno;               /* etags tags shall contain char number */
   282   bool written;                 /* entry written in the tags file */
   283 } fdesc;
   284 
   285 typedef struct node_st
   286 {                               /* sorting structure */
   287   struct node_st *left, *right; /* left and right sons */
   288   fdesc *fdp;                   /* description of file to whom tag belongs */
   289   char *name;                   /* tag name */
   290   char *regex;                  /* search regexp */
   291   bool valid;                   /* write this tag on the tag file */
   292   bool is_func;                 /* function tag: use regexp in CTAGS mode */
   293   bool been_warned;             /* warning already given for duplicated tag */
   294   intmax_t lno;                 /* line number tag is on */
   295   intmax_t cno;                 /* character number line starts on */
   296 } node;
   297 
   298 /*
   299  * A `linebuffer' is a structure which holds a line of text.
   300  * `readline_internal' reads a line from a stream into a linebuffer
   301  * and works regardless of the length of the line.
   302  * SIZE is the size of BUFFER, LEN is the length of the string in
   303  * BUFFER after readline reads it.
   304  */
   305 typedef struct
   306 {
   307   ptrdiff_t size;
   308   ptrdiff_t len;
   309   char *buffer;
   310 } linebuffer;
   311 
   312 /* Used to support mixing of --lang and file names. */
   313 typedef struct
   314 {
   315   enum {
   316     at_language,                /* a language specification */
   317     at_regexp,                  /* a regular expression */
   318     at_filename,                /* a file name */
   319     at_stdin,                   /* read from stdin here */
   320     at_end                      /* stop parsing the list */
   321   } arg_type;                   /* argument type */
   322   language *lang;               /* language associated with the argument */
   323   char *what;                   /* the argument itself */
   324 } argument;
   325 
   326 /* Structure defining a regular expression. */
   327 typedef struct regexp
   328 {
   329   struct regexp *p_next;        /* pointer to next in list */
   330   language *lang;               /* if set, use only for this language */
   331   char *pattern;                /* the regexp pattern */
   332   char *name;                   /* tag name */
   333   struct re_pattern_buffer *pat; /* the compiled pattern */
   334   struct re_registers regs;     /* re registers */
   335   bool error_signaled;          /* already signaled for this regexp */
   336   bool ignore_case;             /* ignore case when matching */
   337   bool multi_line;              /* do a multi-line match on the whole file */
   338 } regexp;
   339 
   340 
   341 /* Many compilers barf on this:
   342         Lang_function Ada_funcs;
   343    so let's write it this way */
   344 static void Ada_funcs (FILE *);
   345 static void Asm_labels (FILE *);
   346 static void C_entries (int c_ext, FILE *);
   347 static void default_C_entries (FILE *);
   348 static void plain_C_entries (FILE *);
   349 static void Cjava_entries (FILE *);
   350 static void Cobol_paragraphs (FILE *);
   351 static void Cplusplus_entries (FILE *);
   352 static void Cstar_entries (FILE *);
   353 static void Erlang_functions (FILE *);
   354 static void Forth_words (FILE *);
   355 static void Fortran_functions (FILE *);
   356 static void Go_functions (FILE *);
   357 static void HTML_labels (FILE *);
   358 static void Lisp_functions (FILE *);
   359 static void Lua_functions (FILE *);
   360 static void Makefile_targets (FILE *);
   361 static void Mercury_functions (FILE *);
   362 static void Pascal_functions (FILE *);
   363 static void Perl_functions (FILE *);
   364 static void PHP_functions (FILE *);
   365 static void PS_functions (FILE *);
   366 static void Prolog_functions (FILE *);
   367 static void Python_functions (FILE *);
   368 static void Ruby_functions (FILE *);
   369 static void Rust_entries (FILE *);
   370 static void Scheme_functions (FILE *);
   371 static void TeX_commands (FILE *);
   372 static void Texinfo_nodes (FILE *);
   373 static void Yacc_entries (FILE *);
   374 static void just_read_file (FILE *);
   375 
   376 static language *get_language_from_langname (const char *);
   377 static void readline (linebuffer *, FILE *);
   378 static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *, const bool);
   379 static bool nocase_tail (const char *);
   380 static void get_tag (char *, char **);
   381 static void get_lispy_tag (char *);
   382 static void test_objc_is_mercury (char *, language **);
   383 
   384 static void analyze_regex (char *);
   385 static void free_regexps (void);
   386 static void regex_tag_multiline (void);
   387 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
   388 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
   389 static _Noreturn void suggest_asking_for_help (void);
   390 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
   391 static _Noreturn void pfatal (const char *);
   392 static void add_node (node *, node **);
   393 
   394 static void process_file_name (char *, language *);
   395 static void process_file (FILE *, char *, language *);
   396 static void find_entries (FILE *);
   397 static void free_tree (node *);
   398 static void free_fdesc (fdesc *);
   399 static void pfnote (char *, bool, char *, ptrdiff_t, intmax_t, intmax_t);
   400 static void invalidate_nodes (fdesc *, node **);
   401 static void put_entries (node *);
   402 static void cleanup_tags_file (char const * const, char const * const);
   403 
   404 #if !MSDOS && !defined (DOS_NT)
   405 static char *escape_shell_arg_string (char *);
   406 #endif
   407 static void do_move_file (const char *, const char *);
   408 static char *concat (const char *, const char *, const char *);
   409 static char *skip_spaces (char *);
   410 static char *skip_non_spaces (char *);
   411 static char *skip_name (char *);
   412 static char *savenstr (const char *, ptrdiff_t);
   413 static char *savestr (const char *);
   414 static char *etags_getcwd (void);
   415 static char *relative_filename (char *, char *);
   416 static char *absolute_filename (char *, char *);
   417 static char *absolute_dirname (char *, char *);
   418 static bool filename_is_absolute (char *f);
   419 static void canonicalize_filename (char *);
   420 static char *etags_mktmp (void);
   421 static void linebuffer_init (linebuffer *);
   422 static void linebuffer_setlen (linebuffer *, ptrdiff_t);
   423 static void *xmalloc (ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1));
   424 static void *xnmalloc (ptrdiff_t, ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1,2));
   425 static void *xnrealloc (void *, ptrdiff_t, ptrdiff_t)
   426   ATTRIBUTE_ALLOC_SIZE ((2,3));
   427 
   428 
   429 static char searchar = '/';     /* use /.../ searches */
   430 
   431 static char *tagfile;           /* output file */
   432 static char *progname;          /* name this program was invoked with */
   433 static char *cwd;               /* current working directory */
   434 static char *tagfiledir;        /* directory of tagfile */
   435 static FILE *tagf;              /* ioptr for tags file */
   436 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
   437 
   438 static fdesc *fdhead;           /* head of file description list */
   439 static fdesc *curfdp;           /* current file description */
   440 static char *infilename;        /* current input file name */
   441 static intmax_t lineno;         /* line number of current line */
   442 static intmax_t charno;         /* current character number */
   443 static intmax_t linecharno;     /* charno of start of current line */
   444 static char *dbp;               /* pointer to start of current tag */
   445 
   446 static intmax_t const invalidcharno = -1;
   447 
   448 static node *nodehead;          /* the head of the binary tree of tags */
   449 static node *last_node;         /* the last node created */
   450 
   451 static linebuffer lb;           /* the current line */
   452 static linebuffer filebuf;      /* a buffer containing the whole file */
   453 static linebuffer token_name;   /* a buffer containing a tag name */
   454 
   455 static bool append_to_tagfile;  /* -a: append to tags */
   456 /* The next five default to true in C and derived languages.  */
   457 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
   458 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
   459                                 /* 0 struct/enum/union decls, and C++ */
   460                                 /* member functions. */
   461 static bool constantypedefs;    /* -d: create tags for C #define, enum */
   462                                 /* constants and variables. */
   463                                 /* -D: opposite of -d.  Default under ctags. */
   464 static int globals;             /* create tags for global variables */
   465 static int members;             /* create tags for C member variables */
   466 static int declarations;        /* --declarations: tag them and extern in C&Co*/
   467 static int no_line_directive;   /* ignore #line directives (undocumented) */
   468 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
   469 static bool update;             /* -u: update tags */
   470 static bool vgrind_style;       /* -v: create vgrind style index output */
   471 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
   472 static bool cxref_style;        /* -x: create cxref style output */
   473 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
   474 static bool ignoreindent;       /* -I: ignore indentation in C */
   475 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
   476 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
   477 static int debug;               /* --debug */
   478 
   479 /* STDIN is defined in LynxOS system headers */
   480 #ifdef STDIN
   481 # undef STDIN
   482 #endif
   483 
   484 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
   485 static bool parsing_stdin;      /* --parse-stdin used */
   486 
   487 static regexp *p_head;          /* list of all regexps */
   488 static bool need_filebuf;       /* some regexes are multi-line */
   489 
   490 static struct option longopts[] =
   491 {
   492   { "append",             no_argument,       NULL,               'a'   },
   493   { "packages-only",      no_argument,       &packages_only,     1     },
   494   { "c++",                no_argument,       NULL,               'C'   },
   495   { "debug",              no_argument,       &debug,             1     },
   496   { "declarations",       no_argument,       &declarations,      1     },
   497   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
   498   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
   499   { "help",               no_argument,       NULL,               'h'   },
   500   { "help",               no_argument,       NULL,               'H'   },
   501   { "ignore-indentation", no_argument,       NULL,               'I'   },
   502   { "language",           required_argument, NULL,               'l'   },
   503   { "members",            no_argument,       &members,           1     },
   504   { "no-members",         no_argument,       &members,           0     },
   505   { "output",             required_argument, NULL,               'o'   },
   506   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
   507   { "regex",              required_argument, NULL,               'r'   },
   508   { "no-regex",           no_argument,       NULL,               'R'   },
   509   { "ignore-case-regex",  required_argument, NULL,               'c'   },
   510   { "parse-stdin",        required_argument, NULL,               STDIN },
   511   { "version",            no_argument,       NULL,               'V'   },
   512 
   513 #if CTAGS /* Ctags options */
   514   { "backward-search",    no_argument,       NULL,               'B'   },
   515   { "cxref",              no_argument,       NULL,               'x'   },
   516   { "defines",            no_argument,       NULL,               'd'   },
   517   { "globals",            no_argument,       &globals,           1     },
   518   { "typedefs",           no_argument,       NULL,               't'   },
   519   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
   520   { "update",             no_argument,       NULL,               'u'   },
   521   { "vgrind",             no_argument,       NULL,               'v'   },
   522   { "no-warn",            no_argument,       NULL,               'w'   },
   523 
   524 #else /* Etags options */
   525   { "no-defines",         no_argument,       NULL,               'D'   },
   526   { "no-globals",         no_argument,       &globals,           0     },
   527   { "include",            required_argument, NULL,               'i'   },
   528 #endif
   529   { NULL }
   530 };
   531 
   532 static compressor compressors[] =
   533 {
   534   { "z", "gzip -d -c"},
   535   { "Z", "gzip -d -c"},
   536   { "gz", "gzip -d -c"},
   537   { "GZ", "gzip -d -c"},
   538   { "bz2", "bzip2 -d -c" },
   539   { "xz", "xz -d -c" },
   540   { "zst", "zstd -d -c" },
   541   { NULL }
   542 };
   543 
   544 /*
   545  * Language stuff.
   546  */
   547 
   548 /* Ada code */
   549 static const char *Ada_suffixes [] =
   550   { "ads", "adb", "ada", NULL };
   551 static const char Ada_help [] =
   552 "In Ada code, functions, procedures, packages, tasks and types are\n\
   553 tags.  Use the '--packages-only' option to create tags for\n\
   554 packages only.\n\
   555 Ada tag names have suffixes indicating the type of entity:\n\
   556         Entity type:    Qualifier:\n\
   557         ------------    ----------\n\
   558         function        /f\n\
   559         procedure       /p\n\
   560         package spec    /s\n\
   561         package body    /b\n\
   562         type            /t\n\
   563         task            /k\n\
   564 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
   565 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
   566 will just search for any tag 'bidule'.";
   567 
   568 /* Assembly code */
   569 static const char *Asm_suffixes [] =
   570   { "a",        /* Unix assembler */
   571     "asm", /* Microcontroller assembly */
   572     "def", /* BSO/Tasking definition includes  */
   573     "inc", /* Microcontroller include files */
   574     "ins", /* Microcontroller include files */
   575     "s", "sa", /* Unix assembler */
   576     "S",   /* cpp-processed Unix assembler */
   577     "src", /* BSO/Tasking C compiler output */
   578     NULL
   579   };
   580 static const char Asm_help [] =
   581 "In assembler code, labels appearing at the beginning of a line,\n\
   582 followed by a colon, are tags.";
   583 
   584 
   585 /* Note that .c and .h can be considered C++, if the --c++ flag was
   586    given, or if the `class' or `template' keywords are met inside the file.
   587    That is why default_C_entries is called for these. */
   588 static const char *default_C_suffixes [] =
   589   { "c", "h", NULL };
   590 #if CTAGS                               /* C help for Ctags */
   591 static const char default_C_help [] =
   592 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
   593 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
   594 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
   595 Use --globals to tag global variables.\n\
   596 You can tag function declarations and external variables by\n\
   597 using '--declarations', and struct members by using '--members'.";
   598 #else                                   /* C help for Etags */
   599 static const char default_C_help [] =
   600 "In C code, any C function or typedef is a tag, and so are\n\
   601 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
   602 definitions and 'enum' constants are tags unless you specify\n\
   603 '--no-defines'.  Global variables are tags unless you specify\n\
   604 '--no-globals' and so are struct members unless you specify\n\
   605 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
   606 '--no-members' can make the tags table file much smaller.\n\
   607 You can tag function declarations and external variables by\n\
   608 using '--declarations'.";
   609 #endif  /* C help for Ctags and Etags */
   610 
   611 static const char *Cplusplus_suffixes [] =
   612   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
   613     "M",                        /* Objective C++ */
   614     "pdb",                      /* PostScript with C syntax */
   615     NULL };
   616 static const char Cplusplus_help [] =
   617 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
   618 --help --lang=c --lang=c++ for full help.)\n\
   619 In addition to C tags, member functions are also recognized.  Member\n\
   620 variables are recognized unless you use the '--no-members' option.\n\
   621 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
   622 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
   623 'operator+'.";
   624 
   625 static const char *Cjava_suffixes [] =
   626   { "java", NULL };
   627 static char Cjava_help [] =
   628 "In Java code, all the tags constructs of C and C++ code are\n\
   629 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
   630 
   631 
   632 static const char *Cobol_suffixes [] =
   633   { "COB", "cob", NULL };
   634 static char Cobol_help [] =
   635 "In Cobol code, tags are paragraph names; that is, any word\n\
   636 starting in column 8 and followed by a period.";
   637 
   638 static const char *Cstar_suffixes [] =
   639   { "cs", "hs", NULL };
   640 
   641 static const char *Erlang_suffixes [] =
   642   { "erl", "hrl", NULL };
   643 static const char Erlang_help [] =
   644 "In Erlang code, the tags are the functions, records and macros\n\
   645 defined in the file.";
   646 static const char *Erlang_interpreters [] =
   647   { "escript", NULL };
   648 
   649 static const char *Forth_suffixes [] =
   650   { "fth", "tok", NULL };
   651 static const char Forth_help [] =
   652 "In Forth code, tags are words defined by ':',\n\
   653 constant, code, create, defer, value, variable, buffer:, field.";
   654 
   655 static const char *Fortran_suffixes [] =
   656   { "F", "f", "f90", "for", NULL };
   657 static const char Fortran_help [] =
   658 "In Fortran code, functions, subroutines and block data are tags.";
   659 
   660 static const char *Go_suffixes [] = {"go", NULL};
   661 static const char Go_help [] =
   662   "In Go code, functions, interfaces and packages are tags.";
   663 
   664 static const char *HTML_suffixes [] =
   665   { "htm", "html", "shtml", NULL };
   666 static const char HTML_help [] =
   667 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
   668 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
   669 occurrences of 'id='.";
   670 
   671 static const char *Lisp_suffixes [] =
   672   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
   673 static const char Lisp_help [] =
   674 "In Lisp code, any function defined with 'defun', any variable\n\
   675 defined with 'defvar' or 'defconst', and in general the first\n\
   676 argument of any expression that starts with '(def' in column zero\n\
   677 is a tag.\n\
   678 The '--declarations' option tags \"(defvar foo)\" constructs too.";
   679 
   680 static const char *Lua_suffixes [] =
   681   { "lua", "LUA", NULL };
   682 static const char Lua_help [] =
   683 "In Lua scripts, all functions are tags.";
   684 static const char *Lua_interpreters [] =
   685   { "lua", NULL };
   686 
   687 static const char *Makefile_filenames [] =
   688   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
   689 static const char Makefile_help [] =
   690 "In makefiles, targets are tags; additionally, variables are tags\n\
   691 unless you specify '--no-globals'.";
   692 
   693 /* Mercury and Objective C share the same .m file extensions.  */
   694 static const char *Mercury_suffixes [] =
   695   {"m",
   696    NULL};
   697 static const char Mercury_help [] =
   698   "In Mercury code, tags are all declarations beginning a line with ':-'\n\
   699 and optionally Prolog-like definitions (first rule for a predicate or \
   700 function).\n\
   701 To enable this behavior, run etags using --declarations.";
   702 static bool with_mercury_definitions = false;
   703 float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
   704 
   705 static const char *Objc_suffixes [] =
   706   { "lm",                       /* Objective lex file  */
   707     "m",                        /* By default, Objective C file will be assumed.  */
   708      NULL};
   709 static const char Objc_help [] =
   710 "In Objective C code, tags include Objective C definitions for classes,\n\
   711 class categories, methods and protocols.  Tags for variables and\n\
   712 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
   713 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
   714 
   715 static const char *Pascal_suffixes [] =
   716   { "p", "pas", NULL };
   717 static const char Pascal_help [] =
   718 "In Pascal code, the tags are the functions and procedures defined\n\
   719 in the file.";
   720 /* " // this is for working around an Emacs highlighting bug... */
   721 
   722 static const char *Perl_suffixes [] =
   723   { "pl", "pm", NULL };
   724 static const char *Perl_interpreters [] =
   725   { "perl", "@PERL@", NULL };
   726 static const char Perl_help [] =
   727 "In Perl code, the tags are the packages, subroutines and variables\n\
   728 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
   729 '--globals' if you want to tag global variables.  Tags for\n\
   730 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
   731 defined in the default package is 'main::SUB'.";
   732 
   733 static const char *PHP_suffixes [] =
   734   { "php", "php3", "php4", NULL };
   735 static const char PHP_help [] =
   736 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
   737 the '--no-members' option, vars are tags too.";
   738 
   739 static const char *plain_C_suffixes [] =
   740   { "pc",                       /* Pro*C file */
   741      NULL };
   742 
   743 static const char *PS_suffixes [] =
   744   { "ps", "psw", NULL };        /* .psw is for PSWrap */
   745 static const char PS_help [] =
   746 "In PostScript code, the tags are the functions.";
   747 
   748 static const char *Prolog_suffixes [] =
   749   { "prolog", NULL };
   750 static const char Prolog_help [] =
   751 "In Prolog code, tags are predicates and rules at the beginning of\n\
   752 line.";
   753 static const char *Prolog_interpreters [] =
   754   { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
   755 
   756 static const char *Python_suffixes [] =
   757   { "py", NULL };
   758 static const char Python_help [] =
   759 "In Python code, 'def' or 'class' at the beginning of a line\n\
   760 generate a tag.";
   761 static const char *Python_interpreters [] =
   762   { "python", NULL };
   763 
   764 static const char *Ruby_suffixes [] =
   765   { "rb", "ru", "rbw", NULL };
   766 static const char *Ruby_filenames [] =
   767   { "Rakefile", "Thorfile", NULL };
   768 static const char Ruby_help [] =
   769   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
   770 a line generate a tag.  Constants also generate a tag.";
   771 static const char *Ruby_interpreters [] =
   772   { "ruby", NULL };
   773 
   774 static const char *Rust_suffixes [] =
   775   { "rs", NULL };
   776 static const char Rust_help [] =
   777   "In Rust code, tags anything defined with 'fn', 'enum', \n\
   778 'struct' or 'macro_rules!'.";
   779 
   780 /* Can't do the `SCM' or `scm' prefix with a version number. */
   781 static const char *Scheme_suffixes [] =
   782   { "oak", "rkt", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
   783 static const char Scheme_help [] =
   784 "In Scheme code, tags include anything defined with 'def' or with a\n\
   785 construct whose name starts with 'def'.  They also include\n\
   786 variables set with 'set!' at top level in the file.";
   787 
   788 static const char *TeX_suffixes [] =
   789   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
   790 static const char TeX_help [] =
   791 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
   792 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
   793 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
   794 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
   795 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
   796 \n\
   797 Other commands can be specified by setting the environment variable\n\
   798 'TEXTAGS' to a colon-separated list like, for example,\n\
   799      TEXTAGS=\"mycommand:myothercommand\".";
   800 
   801 
   802 static const char *Texinfo_suffixes [] =
   803   { "texi", "texinfo", "txi", NULL };
   804 static const char Texinfo_help [] =
   805 "for texinfo files, lines starting with @node are tagged.";
   806 
   807 static const char *Yacc_suffixes [] =
   808   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
   809 static const char Yacc_help [] =
   810 "In Bison or Yacc input files, each rule defines as a tag the\n\
   811 nonterminal it constructs.  The portions of the file that contain\n\
   812 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
   813 for full help).";
   814 
   815 static const char auto_help [] =
   816 "'auto' is not a real language, it indicates to use\n\
   817 a default language for files base on file name suffix and file contents.";
   818 
   819 static const char none_help [] =
   820 "'none' is not a real language, it indicates to only do\n\
   821 regexp processing on files.";
   822 
   823 static const char no_lang_help [] =
   824 "No detailed help available for this language.";
   825 
   826 
   827 /*
   828  * Table of languages.
   829  *
   830  * It is ok for a given function to be listed under more than one
   831  * name.  I just didn't.
   832  */
   833 
   834 static language lang_names [] =
   835 {
   836   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
   837   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
   838   { "c",         default_C_help, default_C_entries, default_C_suffixes },
   839   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
   840   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
   841   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
   842   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes,
   843                  NULL,           Erlang_interpreters },
   844   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
   845   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
   846   { "go",        Go_help,        Go_functions,      Go_suffixes        },
   847   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
   848   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
   849   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
   850   { "lua",       Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
   851   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
   852   /* objc listed before mercury as it is a better default for .m extensions.  */
   853   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
   854   { "mercury",   Mercury_help,   Mercury_functions, Mercury_suffixes   },
   855   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
   856   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
   857   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
   858   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
   859   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
   860   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes,
   861                  NULL,           Prolog_interpreters },
   862   { "python",    Python_help,    Python_functions,  Python_suffixes,
   863                  NULL,           Python_interpreters },
   864   { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes,
   865                  Ruby_filenames, Ruby_interpreters },
   866   { "rust",      Rust_help,      Rust_entries,      Rust_suffixes      },
   867   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
   868   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
   869   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
   870   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
   871   { "auto",      auto_help },                      /* default guessing scheme */
   872   { "none",      none_help,      just_read_file }, /* regexp matching only */
   873   { NULL }                /* end of list */
   874 };
   875 
   876 
   877 static void
   878 print_language_names (void)
   879 {
   880   language *lang;
   881   const char **name, **ext;
   882 
   883   puts ("\nThese are the currently supported languages, along with the\n\
   884 default file names and dot suffixes:");
   885   for (lang = lang_names; lang->name != NULL; lang++)
   886     {
   887       printf ("  %-*s", 10, lang->name);
   888       if (lang->filenames != NULL)
   889         for (name = lang->filenames; *name != NULL; name++)
   890           printf (" %s", *name);
   891       if (lang->suffixes != NULL)
   892         for (ext = lang->suffixes; *ext != NULL; ext++)
   893           printf (" .%s", *ext);
   894       puts ("");
   895     }
   896   puts ("where 'auto' means use default language for files based on file\n\
   897 name suffix, and 'none' means only do regexp processing on files.\n\
   898 If no language is specified and no matching suffix is found,\n\
   899 the first line of the file is read for a sharp-bang (#!) sequence\n\
   900 followed by the name of an interpreter.  If no such sequence is found,\n\
   901 Fortran is tried first; if no tags are found, C is tried next.\n\
   902 When parsing any C file, a \"class\" or \"template\" keyword\n\
   903 switches to C++.");
   904   puts ("Compressed files are supported using gzip, bzip2, xz, and zstd.\n\
   905 \n\
   906 For detailed help on a given language use, for example,\n\
   907 etags --help --lang=ada.");
   908 }
   909 
   910 #if CTAGS
   911 # define PROGRAM_NAME "ctags"
   912 #else
   913 # define PROGRAM_NAME "etags"
   914 #endif
   915 static _Noreturn void
   916 print_version (void)
   917 {
   918   fputs ((PROGRAM_NAME " (" PACKAGE_NAME " " PACKAGE_VERSION ")\n"
   919           COPYRIGHT "\n"
   920           "This program is distributed under the terms in ETAGS.README\n"),
   921          stdout);
   922   exit (EXIT_SUCCESS);
   923 }
   924 
   925 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
   926 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
   927 #endif
   928 
   929 static _Noreturn void
   930 print_help (argument *argbuffer)
   931 {
   932   bool help_for_lang = false;
   933 
   934   for (; argbuffer->arg_type != at_end; argbuffer++)
   935     if (argbuffer->arg_type == at_language)
   936       {
   937         if (help_for_lang)
   938           puts ("");
   939         puts (argbuffer->lang->help);
   940         help_for_lang = true;
   941       }
   942 
   943   if (help_for_lang)
   944     exit (EXIT_SUCCESS);
   945 
   946   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
   947 \n\
   948 These are the options accepted by %s.\n", progname, progname);
   949   puts ("You may use unambiguous abbreviations for the long option names.");
   950   puts ("  A - as file name means read names from stdin (one per line).\n\
   951 Absolute names are stored in the output file as they are.\n\
   952 Relative ones are stored relative to the output file's directory.\n");
   953 
   954   puts ("-a, --append\n\
   955         Append tag entries to existing tags file.");
   956 
   957   puts ("--packages-only\n\
   958         For Ada files, only generate tags for packages.");
   959 
   960   if (CTAGS)
   961     puts ("-B, --backward-search\n\
   962         Write the search commands for the tag entries using '?', the\n\
   963         backward-search command instead of '/', the forward-search command.");
   964 
   965   /* This option is mostly obsolete, because etags can now automatically
   966      detect C++.  Retained for backward compatibility and for debugging and
   967      experimentation.  In principle, we could want to tag as C++ even
   968      before any "class" or "template" keyword.
   969   puts ("-C, --c++\n\
   970         Treat files whose name suffix defaults to C language as C++ files.");
   971   */
   972 
   973   puts ("--declarations\n\
   974         In C and derived languages, create tags for function declarations,");
   975   if (CTAGS)
   976     puts ("\tand create tags for extern variables if --globals is used.");
   977   else
   978     puts
   979       ("\tand create tags for extern variables unless --no-globals is used.");
   980 
   981   puts ("\tIn Mercury, tag both declarations starting a line with ':-' and\n\
   982         first predicates or functions in clauses.");
   983 
   984   if (CTAGS)
   985     puts ("-d, --defines\n\
   986         Create tag entries for C #define constants and enum constants, too.");
   987   else
   988     puts ("-D, --no-defines\n\
   989         Don't create tag entries for C #define constants and enum constants.\n\
   990         This makes the tags file smaller.");
   991 
   992   if (!CTAGS)
   993     puts ("-i FILE, --include=FILE\n\
   994         Include a note in tag file indicating that, when searching for\n\
   995         a tag, one should also consult the tags file FILE after\n\
   996         checking the current file.");
   997 
   998   puts ("-l LANG, --language=LANG\n\
   999         Force the following files to be considered as written in the\n\
  1000         named language up to the next --language=LANG option.");
  1001 
  1002   if (CTAGS)
  1003     puts ("--globals\n\
  1004         Create tag entries for global variables in some languages.");
  1005   else
  1006     puts ("--no-globals\n\
  1007         Do not create tag entries for global variables in some\n\
  1008         languages.  This makes the tags file smaller.");
  1009 
  1010   puts ("--no-line-directive\n\
  1011         Ignore #line preprocessor directives in C and derived languages.");
  1012 
  1013   if (CTAGS)
  1014     puts ("--members\n\
  1015         Create tag entries for members of structures in some languages.");
  1016   else
  1017     puts ("--no-members\n\
  1018         Do not create tag entries for members of structures\n\
  1019         in some languages.");
  1020 
  1021   puts ("-Q, --class-qualify\n\
  1022         Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
  1023         This produces tag names of the form \"class::member\" for C++,\n\
  1024         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
  1025         For Objective C, this also produces class methods qualified with\n\
  1026         their arguments, as in \"foo:bar:baz:more\".\n\
  1027         For Perl, this produces \"package::member\".");
  1028   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
  1029         Make a tag for each line matching a regular expression pattern\n\
  1030         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
  1031         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
  1032         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
  1033         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
  1034   puts ("       If TAGNAME/ is present, the tags created are named.\n\
  1035         For example Tcl named tags can be created with:\n\
  1036           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
  1037         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
  1038         'm' means to allow multi-line matches, 's' implies 'm' and\n\
  1039         causes dot to match any character, including newline.");
  1040 
  1041   puts ("-R, --no-regex\n\
  1042         Don't create tags from regexps for the following files.");
  1043 
  1044   puts ("-I, --ignore-indentation\n\
  1045         In C and C++ do not assume that a closing brace in the first\n\
  1046         column is the final brace of a function or structure definition.");
  1047 
  1048   puts ("-o FILE, --output=FILE\n\
  1049         Write the tags to FILE.");
  1050 
  1051   puts ("--parse-stdin=NAME\n\
  1052         Read from standard input and record tags as belonging to file NAME.");
  1053 
  1054   if (CTAGS)
  1055     {
  1056       puts ("-t, --typedefs\n\
  1057         Generate tag entries for C and Ada typedefs.");
  1058       puts ("-T, --typedefs-and-c++\n\
  1059         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
  1060         and C++ member functions.");
  1061     }
  1062 
  1063   if (CTAGS)
  1064     puts ("-u, --update\n\
  1065         Update the tag entries for the given files, leaving tag\n\
  1066         entries for other files in place.  Currently, this is\n\
  1067         implemented by deleting the existing entries for the given\n\
  1068         files and then rewriting the new entries at the end of the\n\
  1069         tags file.  It is often faster to simply rebuild the entire\n\
  1070         tag file than to use this.");
  1071 
  1072   if (CTAGS)
  1073     {
  1074       puts ("-v, --vgrind\n\
  1075         Print on the standard output an index of items intended for\n\
  1076         human consumption, similar to the output of vgrind.  The index\n\
  1077         is sorted, and gives the page number of each item.");
  1078 
  1079       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
  1080         puts ("-w, --no-duplicates\n\
  1081         Do not create duplicate tag entries, for compatibility with\n\
  1082         traditional ctags.");
  1083 
  1084       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
  1085         puts ("-w, --no-warn\n\
  1086         Suppress warning messages about duplicate tag entries.");
  1087 
  1088       puts ("-x, --cxref\n\
  1089         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
  1090         The output uses line numbers instead of page numbers, but\n\
  1091         beyond that the differences are cosmetic; try both to see\n\
  1092         which you like.");
  1093     }
  1094 
  1095   puts ("-V, --version\n\
  1096         Print the version of the program.\n\
  1097 -h, --help\n\
  1098         Print this help message.\n\
  1099         Followed by one or more '--language' options prints detailed\n\
  1100         help about tag generation for the specified languages.");
  1101 
  1102   print_language_names ();
  1103 
  1104   puts ("");
  1105   puts ("Report bugs to bug-gnu-emacs@gnu.org");
  1106 
  1107   exit (EXIT_SUCCESS);
  1108 }
  1109 
  1110 
  1111 int
  1112 main (int argc, char **argv)
  1113 {
  1114   int i;
  1115   int nincluded_files;
  1116   char **included_files;
  1117   argument *argbuffer;
  1118   int current_arg, file_count;
  1119   linebuffer filename_lb;
  1120   bool help_asked = false;
  1121   ptrdiff_t len;
  1122   char *optstring;
  1123   int opt;
  1124 
  1125   progname = argv[0];
  1126   nincluded_files = 0;
  1127   included_files = xnew (argc, char *);
  1128   current_arg = 0;
  1129   file_count = 0;
  1130 
  1131   /* Allocate enough no matter what happens.  Overkill, but each one
  1132      is small. */
  1133   argbuffer = xnew (argc, argument);
  1134 
  1135   /*
  1136    * Always find typedefs and structure tags.
  1137    * Also default to find macro constants, enum constants, struct
  1138    * members and global variables.  Do it for both etags and ctags.
  1139    */
  1140   typedefs = typedefs_or_cplusplus = constantypedefs = true;
  1141   globals = members = true;
  1142 
  1143   /* When the optstring begins with a '-' getopt_long does not rearrange the
  1144      non-options arguments to be at the end, but leaves them alone. */
  1145   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
  1146                       (CTAGS) ? "BxdtTuvw" : "Di:",
  1147                       "");
  1148 
  1149   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
  1150     switch (opt)
  1151       {
  1152       case 0:
  1153         /* If getopt returns 0, then it has already processed a
  1154            long-named option.  We should do nothing.  */
  1155         break;
  1156 
  1157       case 1:
  1158         /* This means that a file name has been seen.  Record it. */
  1159         argbuffer[current_arg].arg_type = at_filename;
  1160         argbuffer[current_arg].what     = optarg;
  1161         len = strlen (optarg);
  1162         if (whatlen_max < len)
  1163           whatlen_max = len;
  1164         ++current_arg;
  1165         ++file_count;
  1166         break;
  1167 
  1168       case STDIN:
  1169         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
  1170         argbuffer[current_arg].arg_type = at_stdin;
  1171         argbuffer[current_arg].what     = optarg;
  1172         len = strlen (optarg);
  1173         if (whatlen_max < len)
  1174           whatlen_max = len;
  1175         ++current_arg;
  1176         ++file_count;
  1177         if (parsing_stdin)
  1178           fatal ("cannot parse standard input more than once");
  1179         parsing_stdin = true;
  1180         break;
  1181 
  1182         /* Common options. */
  1183       case 'a': append_to_tagfile = true;       break;
  1184       case 'C': cplusplus = true;               break;
  1185       case 'f':         /* for compatibility with old makefiles */
  1186       case 'o':
  1187         if (tagfile)
  1188           {
  1189             error ("-o option may only be given once.");
  1190             suggest_asking_for_help ();
  1191           }
  1192         tagfile = optarg;
  1193         break;
  1194       case 'I':
  1195       case 'S':         /* for backward compatibility */
  1196         ignoreindent = true;
  1197         break;
  1198       case 'l':
  1199         {
  1200           language *lang = get_language_from_langname (optarg);
  1201           if (lang != NULL)
  1202             {
  1203               argbuffer[current_arg].lang = lang;
  1204               argbuffer[current_arg].arg_type = at_language;
  1205               ++current_arg;
  1206             }
  1207         }
  1208         break;
  1209       case 'c':
  1210         /* Backward compatibility: support obsolete --ignore-case-regexp. */
  1211         optarg = concat (optarg, "i", ""); /* memory leak here */
  1212         FALLTHROUGH;
  1213       case 'r':
  1214         argbuffer[current_arg].arg_type = at_regexp;
  1215         argbuffer[current_arg].what = optarg;
  1216         len = strlen (optarg);
  1217         if (whatlen_max < len)
  1218           whatlen_max = len;
  1219         ++current_arg;
  1220         break;
  1221       case 'R':
  1222         argbuffer[current_arg].arg_type = at_regexp;
  1223         argbuffer[current_arg].what = NULL;
  1224         ++current_arg;
  1225         break;
  1226       case 'V':
  1227         print_version ();
  1228         break;
  1229       case 'h':
  1230       case 'H':
  1231         help_asked = true;
  1232         break;
  1233       case 'Q':
  1234         class_qualify = 1;
  1235         break;
  1236 
  1237         /* Etags options */
  1238       case 'D': constantypedefs = false;                        break;
  1239       case 'i': included_files[nincluded_files++] = optarg;     break;
  1240 
  1241         /* Ctags options. */
  1242       case 'B': searchar = '?';                                 break;
  1243       case 'd': constantypedefs = true;                         break;
  1244       case 't': typedefs = true;                                break;
  1245       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
  1246       case 'u': update = true;                                  break;
  1247       case 'v': vgrind_style = true;                            FALLTHROUGH;
  1248       case 'x': cxref_style = true;                             break;
  1249       case 'w': no_warnings = true;                             break;
  1250       default:
  1251         suggest_asking_for_help ();
  1252       }
  1253 
  1254   /* No more options.  Store the rest of arguments. */
  1255   for (; optind < argc; optind++)
  1256     {
  1257       argbuffer[current_arg].arg_type = at_filename;
  1258       argbuffer[current_arg].what = argv[optind];
  1259       len = strlen (argv[optind]);
  1260       if (whatlen_max < len)
  1261         whatlen_max = len;
  1262       ++current_arg;
  1263       ++file_count;
  1264     }
  1265 
  1266   argbuffer[current_arg].arg_type = at_end;
  1267 
  1268   if (help_asked)
  1269     print_help (argbuffer);
  1270 
  1271   if (nincluded_files == 0 && file_count == 0)
  1272     {
  1273       error ("no input files specified.");
  1274       suggest_asking_for_help ();
  1275     }
  1276 
  1277   if (tagfile == NULL)
  1278     tagfile = savestr (CTAGS ? "tags" : "TAGS");
  1279   cwd = etags_getcwd ();        /* the current working directory */
  1280   if (cwd[strlen (cwd) - 1] != '/')
  1281     {
  1282       char *oldcwd = cwd;
  1283       cwd = concat (oldcwd, "/", "");
  1284       free (oldcwd);
  1285     }
  1286 
  1287   /* Compute base directory for relative file names. */
  1288   if (streq (tagfile, "-")
  1289       || strneq (tagfile, "/dev/", 5))
  1290     tagfiledir = cwd;            /* relative file names are relative to cwd */
  1291   else
  1292     {
  1293       canonicalize_filename (tagfile);
  1294       tagfiledir = absolute_dirname (tagfile, cwd);
  1295     }
  1296 
  1297   linebuffer_init (&lb);
  1298   linebuffer_init (&filename_lb);
  1299   linebuffer_init (&filebuf);
  1300   linebuffer_init (&token_name);
  1301 
  1302   if (!CTAGS)
  1303     {
  1304       if (streq (tagfile, "-"))
  1305         {
  1306           tagf = stdout;
  1307           set_binary_mode (STDOUT_FILENO, O_BINARY);
  1308         }
  1309       else
  1310         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
  1311       if (tagf == NULL)
  1312         pfatal (tagfile);
  1313     }
  1314 
  1315   /*
  1316    * Loop through files finding functions.
  1317    */
  1318   for (i = 0; i < current_arg; i++)
  1319     {
  1320       static language *lang;    /* non-NULL if language is forced */
  1321       char *this_file;
  1322 
  1323       switch (argbuffer[i].arg_type)
  1324         {
  1325         case at_language:
  1326           lang = argbuffer[i].lang;
  1327           break;
  1328         case at_regexp:
  1329           analyze_regex (argbuffer[i].what);
  1330           break;
  1331         case at_filename:
  1332               this_file = argbuffer[i].what;
  1333               /* Input file named "-" means read file names from stdin
  1334                  (one per line) and use them. */
  1335               if (streq (this_file, "-"))
  1336                 {
  1337                   if (parsing_stdin)
  1338                     fatal ("cannot parse standard input "
  1339                            "AND read file names from it");
  1340                   while (readline_internal (&filename_lb, stdin, "-", false) > 0)
  1341                     process_file_name (filename_lb.buffer, lang);
  1342                 }
  1343               else
  1344                 process_file_name (this_file, lang);
  1345           break;
  1346         case at_stdin:
  1347           this_file = argbuffer[i].what;
  1348           process_file (stdin, this_file, lang);
  1349           break;
  1350         default:
  1351           error ("internal error: arg_type");
  1352         }
  1353     }
  1354 
  1355   free_regexps ();
  1356   free (lb.buffer);
  1357   free (filebuf.buffer);
  1358   free (token_name.buffer);
  1359 
  1360   if (!CTAGS || cxref_style)
  1361     {
  1362       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
  1363       put_entries (nodehead);
  1364       free_tree (nodehead);
  1365       nodehead = NULL;
  1366       if (!CTAGS)
  1367         {
  1368           fdesc *fdp;
  1369 
  1370           /* Output file entries that have no tags. */
  1371           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  1372             if (!fdp->written)
  1373               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
  1374 
  1375           while (nincluded_files-- > 0)
  1376             fprintf (tagf, "\f\n%s,include\n", *included_files++);
  1377 
  1378           if (fclose (tagf) == EOF)
  1379             pfatal (tagfile);
  1380         }
  1381 
  1382       return EXIT_SUCCESS;
  1383     }
  1384 
  1385   /* From here on, we are in (CTAGS && !cxref_style) */
  1386   if (update)
  1387     {
  1388       for (i = 0; i < current_arg; ++i)
  1389         {
  1390           switch (argbuffer[i].arg_type)
  1391             {
  1392             case at_filename:
  1393             case at_stdin:
  1394               break;
  1395             default:
  1396               continue;         /* the for loop */
  1397             }
  1398           cleanup_tags_file (tagfile, argbuffer[i].what);
  1399         }
  1400       append_to_tagfile = true;
  1401     }
  1402 
  1403   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
  1404   if (tagf == NULL)
  1405     pfatal (tagfile);
  1406   put_entries (nodehead);       /* write all the tags (CTAGS) */
  1407   free_tree (nodehead);
  1408   nodehead = NULL;
  1409   if (fclose (tagf) == EOF)
  1410     pfatal (tagfile);
  1411 
  1412   if (CTAGS)
  1413     if (append_to_tagfile || update)
  1414       {
  1415         /* Maybe these should be used:
  1416            setenv ("LC_COLLATE", "C", 1);
  1417            setenv ("LC_ALL", "C", 1); */
  1418         char *cmd = xmalloc (8 * strlen (tagfile) + sizeof "sort -u -o '' ''");
  1419 #if defined WINDOWSNT || MSDOS
  1420         /* Quote "like this".  No need to escape the quotes in the file name,
  1421            since it is not allowed in file names on these systems.  */
  1422         char *z = stpcpy (cmd, "sort -u -o \"");
  1423         z = stpcpy (z, tagfile);
  1424         z = stpcpy (z, "\" \"");
  1425         z = stpcpy (z, tagfile);
  1426         stpcpy (z, "\"");
  1427 #else
  1428         /* Quote 'like this', and escape the apostrophe in the file name.  */
  1429         char *z = stpcpy (cmd, "sort -u -o '");
  1430         char *escaped_tagfile = z;
  1431         for (; *tagfile; *z++ = *tagfile++)
  1432           if (*tagfile == '\'')
  1433             z = stpcpy (z, "'\\'");
  1434         ptrdiff_t escaped_tagfile_len = z - escaped_tagfile;
  1435         z = stpcpy (z, "' '");
  1436         z = mempcpy (z, escaped_tagfile, escaped_tagfile_len);
  1437         strcpy (z, "'");
  1438 #endif
  1439         return system (cmd);
  1440       }
  1441   return EXIT_SUCCESS;
  1442 }
  1443 
  1444 /*
  1445  * Equivalent to: mv tags OTAGS;grep -Fv ' filename ' OTAGS >tags;rm OTAGS
  1446  */
  1447 static void
  1448 cleanup_tags_file (const char* tagfile, const char* match_file_name)
  1449 {
  1450   FILE *otags_f = fopen ("OTAGS", "wb");
  1451   FILE *tag_f = fopen (tagfile, "rb");
  1452 
  1453   if (otags_f == NULL)
  1454     pfatal ("OTAGS");
  1455 
  1456   if (tag_f == NULL)
  1457     pfatal (tagfile);
  1458 
  1459   int buf_len = strlen (match_file_name) + sizeof ("\t\t ") + 1;
  1460   char *buf = xmalloc (buf_len);
  1461   snprintf (buf, buf_len, "\t%s\t", match_file_name);
  1462 
  1463   linebuffer line;
  1464   linebuffer_init (&line);
  1465   while (readline_internal (&line, tag_f, tagfile, true) > 0)
  1466     {
  1467       if (ferror (tag_f))
  1468         pfatal (tagfile);
  1469 
  1470       if (strstr (line.buffer, buf) == NULL)
  1471         {
  1472           fprintf (otags_f, "%s\n", line.buffer);
  1473           if (ferror (tag_f))
  1474             pfatal (tagfile);
  1475         }
  1476     }
  1477   free (buf);
  1478   free (line.buffer);
  1479 
  1480   if (fclose (otags_f) == EOF)
  1481     pfatal ("OTAGS");
  1482 
  1483   if (fclose (tag_f) == EOF)
  1484     pfatal (tagfile);
  1485 
  1486   do_move_file ("OTAGS", tagfile);
  1487   return;
  1488 }
  1489 
  1490 /*
  1491  * Return a compressor given the file name.  If EXTPTR is non-zero,
  1492  * return a pointer into FILE where the compressor-specific
  1493  * extension begins.  If no compressor is found, NULL is returned
  1494  * and EXTPTR is not significant.
  1495  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
  1496  */
  1497 static compressor *
  1498 get_compressor_from_suffix (char *file, char **extptr)
  1499 {
  1500   compressor *compr;
  1501   char *slash, *suffix;
  1502 
  1503   /* File has been processed by canonicalize_filename,
  1504      so we don't need to consider backslashes on DOS_NT.  */
  1505   slash = strrchr (file, '/');
  1506   suffix = strrchr (file, '.');
  1507   if (suffix == NULL || suffix < slash)
  1508     return NULL;
  1509   if (extptr != NULL)
  1510     *extptr = suffix;
  1511   suffix += 1;
  1512   /* Let those poor souls who live with DOS 8+3 file name limits get
  1513      some solace by treating foo.cgz as if it were foo.c.gz, etc.
  1514      Only the first do loop is run if not MSDOS */
  1515   do
  1516     {
  1517       for (compr = compressors; compr->suffix != NULL; compr++)
  1518         if (streq (compr->suffix, suffix))
  1519           return compr;
  1520       if (!MSDOS)
  1521         break;                  /* do it only once: not really a loop */
  1522       if (extptr != NULL)
  1523         *extptr = ++suffix;
  1524     } while (*suffix != '\0');
  1525   return NULL;
  1526 }
  1527 
  1528 
  1529 
  1530 /*
  1531  * Return a language given the name.
  1532  */
  1533 static language *
  1534 get_language_from_langname (const char *name)
  1535 {
  1536   language *lang;
  1537 
  1538   if (name == NULL)
  1539     error ("empty language name");
  1540   else
  1541     {
  1542       for (lang = lang_names; lang->name != NULL; lang++)
  1543         if (streq (name, lang->name))
  1544           return lang;
  1545       error ("unknown language \"%s\"", name);
  1546     }
  1547 
  1548   return NULL;
  1549 }
  1550 
  1551 
  1552 /*
  1553  * Return a language given the interpreter name.
  1554  */
  1555 static language *
  1556 get_language_from_interpreter (char *interpreter)
  1557 {
  1558   language *lang;
  1559   const char **iname;
  1560 
  1561   if (interpreter == NULL)
  1562     return NULL;
  1563   for (lang = lang_names; lang->name != NULL; lang++)
  1564     if (lang->interpreters != NULL)
  1565       for (iname = lang->interpreters; *iname != NULL; iname++)
  1566         if (streq (*iname, interpreter))
  1567             return lang;
  1568 
  1569   return NULL;
  1570 }
  1571 
  1572 
  1573 
  1574 /*
  1575  * Return a language given the file name.
  1576  */
  1577 static language *
  1578 get_language_from_filename (char *file, bool case_sensitive)
  1579 {
  1580   language *lang;
  1581   const char **name, **ext, *suffix;
  1582   char *slash;
  1583 
  1584   /* Try whole file name first. */
  1585   slash = strrchr (file, '/');
  1586   if (slash != NULL)
  1587     file = slash + 1;
  1588 #ifdef DOS_NT
  1589   else if (file[0] && file[1] == ':')
  1590     file += 2;
  1591 #endif
  1592   for (lang = lang_names; lang->name != NULL; lang++)
  1593     if (lang->filenames != NULL)
  1594       for (name = lang->filenames; *name != NULL; name++)
  1595         if ((case_sensitive)
  1596             ? streq (*name, file)
  1597             : strcaseeq (*name, file))
  1598           return lang;
  1599 
  1600   /* If not found, try suffix after last dot. */
  1601   suffix = strrchr (file, '.');
  1602   if (suffix == NULL)
  1603     return NULL;
  1604   suffix += 1;
  1605   for (lang = lang_names; lang->name != NULL; lang++)
  1606     if (lang->suffixes != NULL)
  1607       for (ext = lang->suffixes; *ext != NULL; ext++)
  1608         if ((case_sensitive)
  1609             ? streq (*ext, suffix)
  1610             : strcaseeq (*ext, suffix))
  1611           return lang;
  1612   return NULL;
  1613 }
  1614 
  1615 
  1616 /*
  1617  * This routine is called on each file argument.
  1618  */
  1619 static void
  1620 process_file_name (char *file, language *lang)
  1621 {
  1622   FILE *inf;
  1623   fdesc *fdp;
  1624   compressor *compr;
  1625   char *compressed_name, *uncompressed_name;
  1626   char *ext, *real_name UNINIT, *tmp_name UNINIT;
  1627   int retval;
  1628 
  1629   canonicalize_filename (file);
  1630   if (streq (file, tagfile) && !streq (tagfile, "-"))
  1631     {
  1632       error ("skipping inclusion of %s in self.", file);
  1633       return;
  1634     }
  1635   compr = get_compressor_from_suffix (file, &ext);
  1636   if (compr)
  1637     {
  1638       compressed_name = file;
  1639       uncompressed_name = savenstr (file, ext - file);
  1640     }
  1641   else
  1642     {
  1643       compressed_name = NULL;
  1644       uncompressed_name = file;
  1645     }
  1646 
  1647   /* If the canonicalized uncompressed name
  1648      has already been dealt with, skip it silently. */
  1649   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  1650     {
  1651       assert (fdp->infname != NULL);
  1652       if (streq (uncompressed_name, fdp->infname))
  1653         goto cleanup;
  1654     }
  1655 
  1656   inf = fopen (file, "r" FOPEN_BINARY);
  1657   if (inf)
  1658     real_name = file;
  1659   else
  1660     {
  1661       int file_errno = errno;
  1662       if (compressed_name)
  1663         {
  1664           /* Try with the given suffix.  */
  1665           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
  1666           if (inf)
  1667             real_name = uncompressed_name;
  1668         }
  1669       else
  1670         {
  1671           /* Try all possible suffixes.  */
  1672           for (compr = compressors; compr->suffix != NULL; compr++)
  1673             {
  1674               compressed_name = concat (file, ".", compr->suffix);
  1675               inf = fopen (compressed_name, "r" FOPEN_BINARY);
  1676               if (inf)
  1677                 {
  1678                   real_name = compressed_name;
  1679                   break;
  1680                 }
  1681               if (MSDOS)
  1682                 {
  1683                   char *suf = compressed_name + strlen (file);
  1684                   size_t suflen = strlen (compr->suffix) + 1;
  1685                   for ( ; suf[1]; suf++, suflen--)
  1686                     {
  1687                       memmove (suf, suf + 1, suflen);
  1688                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
  1689                       if (inf)
  1690                         {
  1691                           real_name = compressed_name;
  1692                           break;
  1693                         }
  1694                     }
  1695                   if (inf)
  1696                     break;
  1697                 }
  1698               free (compressed_name);
  1699               compressed_name = NULL;
  1700             }
  1701         }
  1702       if (! inf)
  1703         {
  1704           errno = file_errno;
  1705           perror (file);
  1706           goto cleanup;
  1707         }
  1708     }
  1709 
  1710   if (real_name == compressed_name)
  1711     {
  1712       fclose (inf);
  1713       tmp_name = etags_mktmp ();
  1714       if (!tmp_name)
  1715         inf = NULL;
  1716       else
  1717         {
  1718 #if MSDOS || defined (DOS_NT)
  1719           int buf_len =
  1720             strlen (compr->command)
  1721             + strlen (" \"\" > \"\"") + strlen (real_name)
  1722             + strlen (tmp_name) + 1;
  1723           char *cmd = xmalloc (buf_len);
  1724           snprintf (cmd, buf_len, "%s \"%s\" > \"%s\"",
  1725                     compr->command, real_name, tmp_name);
  1726 #else
  1727           char *new_real_name = escape_shell_arg_string (real_name);
  1728           char *new_tmp_name = escape_shell_arg_string (tmp_name);
  1729           int buf_len =
  1730             strlen (compr->command) + strlen ("  > ") + strlen (new_real_name)
  1731             + strlen (new_tmp_name) + 1;
  1732           char *cmd = xmalloc (buf_len);
  1733           snprintf (cmd, buf_len, "%s %s > %s",
  1734                     compr->command, new_real_name, new_tmp_name);
  1735           free (new_real_name);
  1736           free (new_tmp_name);
  1737 #endif
  1738           inf = (system (cmd) == -1
  1739                  ? NULL
  1740                  : fopen (tmp_name, "r" FOPEN_BINARY));
  1741           free (cmd);
  1742         }
  1743 
  1744       if (!inf)
  1745         {
  1746           perror (real_name);
  1747           goto cleanup;
  1748         }
  1749     }
  1750 
  1751   process_file (inf, uncompressed_name, lang);
  1752 
  1753   retval = fclose (inf);
  1754   if (real_name == compressed_name)
  1755     {
  1756       remove (tmp_name);
  1757       free (tmp_name);
  1758     }
  1759   if (retval < 0)
  1760     pfatal (file);
  1761 
  1762  cleanup:
  1763   if (compressed_name != file)
  1764     free (compressed_name);
  1765   if (uncompressed_name != file)
  1766     free (uncompressed_name);
  1767   last_node = NULL;
  1768   curfdp = NULL;
  1769   return;
  1770 }
  1771 
  1772 static void
  1773 process_file (FILE *fh, char *fn, language *lang)
  1774 {
  1775   static const fdesc emptyfdesc;
  1776   fdesc *fdp;
  1777 
  1778   infilename = fn;
  1779   /* Create a new input file description entry. */
  1780   fdp = xnew (1, fdesc);
  1781   *fdp = emptyfdesc;
  1782   fdp->next = fdhead;
  1783   fdp->infname = savestr (fn);
  1784   fdp->lang = lang;
  1785   fdp->infabsname = absolute_filename (fn, cwd);
  1786   fdp->infabsdir = absolute_dirname (fn, cwd);
  1787   if (filename_is_absolute (fn))
  1788     {
  1789       /* An absolute file name.  Canonicalize it. */
  1790       fdp->taggedfname = absolute_filename (fn, NULL);
  1791     }
  1792   else
  1793     {
  1794       /* A file name relative to cwd.  Make it relative
  1795          to the directory of the tags file. */
  1796       fdp->taggedfname = relative_filename (fn, tagfiledir);
  1797     }
  1798   fdp->usecharno = true;        /* use char position when making tags */
  1799   fdp->prop = NULL;
  1800   fdp->written = false;         /* not written on tags file yet */
  1801 
  1802   fdhead = fdp;
  1803   curfdp = fdhead;              /* the current file description */
  1804 
  1805   find_entries (fh);
  1806 
  1807   /* If not Ctags, and if this is not metasource and if it contained no #line
  1808      directives, we can write the tags and free all nodes pointing to
  1809      curfdp. */
  1810   if (!CTAGS
  1811       && curfdp->usecharno      /* no #line directives in this file */
  1812       && !curfdp->lang->metasource)
  1813     {
  1814       node *np, *prev;
  1815 
  1816       /* Look for the head of the sublist relative to this file.  See add_node
  1817          for the structure of the node tree. */
  1818       prev = NULL;
  1819       for (np = nodehead; np != NULL; prev = np, np = np->left)
  1820         if (np->fdp == curfdp)
  1821           break;
  1822 
  1823       /* If we generated tags for this file, write and delete them. */
  1824       if (np != NULL)
  1825         {
  1826           /* This is the head of the last sublist, if any.  The following
  1827              instructions depend on this being true. */
  1828           assert (np->left == NULL);
  1829 
  1830           assert (fdhead == curfdp);
  1831           assert (last_node->fdp == curfdp);
  1832           put_entries (np);     /* write tags for file curfdp->taggedfname */
  1833           free_tree (np);       /* remove the written nodes */
  1834           if (prev == NULL)
  1835             nodehead = NULL;    /* no nodes left */
  1836           else
  1837             prev->left = NULL;  /* delete the pointer to the sublist */
  1838         }
  1839     }
  1840 }
  1841 
  1842 static void
  1843 reset_input (FILE *inf)
  1844 {
  1845   if (fseek (inf, 0, SEEK_SET) != 0)
  1846     perror (infilename);
  1847 }
  1848 
  1849 /*
  1850  * This routine opens the specified file and calls the function
  1851  * which finds the function and type definitions.
  1852  */
  1853 static void
  1854 find_entries (FILE *inf)
  1855 {
  1856   char *cp;
  1857   language *lang = curfdp->lang;
  1858   Lang_function *parser = NULL;
  1859 
  1860   /* If user specified a language, use it. */
  1861   if (lang != NULL && lang->function != NULL)
  1862     {
  1863       parser = lang->function;
  1864     }
  1865 
  1866   /* Else try to guess the language given the file name. */
  1867   if (parser == NULL)
  1868     {
  1869       lang = get_language_from_filename (curfdp->infname, true);
  1870 
  1871       /* Disambiguate file names between Objc and Mercury. */
  1872       if (lang != NULL && strcmp (lang->name, "objc") == 0)
  1873         test_objc_is_mercury (curfdp->infname, &lang);
  1874 
  1875       if (lang != NULL && lang->function != NULL)
  1876         {
  1877           curfdp->lang = lang;
  1878           parser = lang->function;
  1879         }
  1880     }
  1881 
  1882   /* Else look for sharp-bang as the first two characters. */
  1883   if (parser == NULL
  1884       && readline_internal (&lb, inf, infilename, false) > 0
  1885       && lb.len >= 2
  1886       && lb.buffer[0] == '#'
  1887       && lb.buffer[1] == '!')
  1888     {
  1889       char *lp;
  1890 
  1891       /* Set lp to point at the first char after the last slash in the
  1892          line or, if no slashes, at the first nonblank.  Then set cp to
  1893          the first successive blank and terminate the string. */
  1894       lp = strrchr (lb.buffer+2, '/');
  1895       if (lp != NULL)
  1896         lp += 1;
  1897       else
  1898         lp = skip_spaces (lb.buffer + 2);
  1899       cp = skip_non_spaces (lp);
  1900       /* If the "interpreter" turns out to be "env", the real interpreter is
  1901          the next word.  */
  1902       if (cp > lp && strneq (lp, "env", cp - lp))
  1903         {
  1904           lp = skip_spaces (cp);
  1905           cp = skip_non_spaces (lp);
  1906         }
  1907       *cp = '\0';
  1908 
  1909       if (*lp)
  1910         {
  1911           lang = get_language_from_interpreter (lp);
  1912           if (lang != NULL && lang->function != NULL)
  1913             {
  1914               curfdp->lang = lang;
  1915               parser = lang->function;
  1916             }
  1917         }
  1918     }
  1919 
  1920   reset_input (inf);
  1921 
  1922   /* Else try to guess the language given the case insensitive file name. */
  1923   if (parser == NULL)
  1924     {
  1925       lang = get_language_from_filename (curfdp->infname, false);
  1926       if (lang != NULL && lang->function != NULL)
  1927         {
  1928           curfdp->lang = lang;
  1929           parser = lang->function;
  1930         }
  1931     }
  1932 
  1933   /* Else try Fortran or C. */
  1934   if (parser == NULL)
  1935     {
  1936       node *old_last_node = last_node;
  1937 
  1938       curfdp->lang = get_language_from_langname ("fortran");
  1939       find_entries (inf);
  1940 
  1941       if (old_last_node == last_node)
  1942         /* No Fortran entries found.  Try C. */
  1943         {
  1944           reset_input (inf);
  1945           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
  1946           find_entries (inf);
  1947         }
  1948       return;
  1949     }
  1950 
  1951   if (!no_line_directive
  1952       && curfdp->lang != NULL && curfdp->lang->metasource)
  1953     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
  1954        file, or anyway we parsed a file that is automatically generated from
  1955        this one.  If this is the case, the bingo.c file contained #line
  1956        directives that generated tags pointing to this file.  Let's delete
  1957        them all before parsing this file, which is the real source. */
  1958     {
  1959       fdesc **fdpp = &fdhead;
  1960       while (*fdpp != NULL)
  1961         if (*fdpp != curfdp
  1962             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
  1963           /* We found one of those!  We must delete both the file description
  1964              and all tags referring to it. */
  1965           {
  1966             fdesc *badfdp = *fdpp;
  1967 
  1968             /* Delete the tags referring to badfdp->taggedfname
  1969                that were obtained from badfdp->infname. */
  1970             invalidate_nodes (badfdp, &nodehead);
  1971 
  1972             *fdpp = badfdp->next; /* remove the bad description from the list */
  1973             free_fdesc (badfdp);
  1974           }
  1975         else
  1976           fdpp = &(*fdpp)->next; /* advance the list pointer */
  1977     }
  1978 
  1979   assert (parser != NULL);
  1980 
  1981   /* Generic initializations before reading from file. */
  1982   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
  1983 
  1984   /* Generic initializations before parsing file with readline. */
  1985   lineno = 0;                  /* reset global line number */
  1986   charno = 0;                  /* reset global char number */
  1987   linecharno = 0;              /* reset global char number of line start */
  1988 
  1989   parser (inf);
  1990 
  1991   regex_tag_multiline ();
  1992 }
  1993 
  1994 
  1995 /*
  1996  * Check whether an implicitly named tag should be created,
  1997  * then call `pfnote'.
  1998  * NAME is a string that is internally copied by this function.
  1999  *
  2000  * TAGS format specification
  2001  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
  2002  * The following is explained in some more detail in etc/ETAGS.EBNF.
  2003  *
  2004  * make_tag creates tags with "implicit tag names" (unnamed tags)
  2005  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
  2006  *  1. NAME does not contain any of the characters in NONAM;
  2007  *  2. LINESTART contains name as either a rightmost, or rightmost but
  2008  *     one character, substring;
  2009  *  3. the character, if any, immediately before NAME in LINESTART must
  2010  *     be a character in NONAM;
  2011  *  4. the character, if any, immediately after NAME in LINESTART must
  2012  *     also be a character in NONAM.
  2013  *
  2014  * The implementation uses the notinname() macro, which recognizes the
  2015  * characters stored in the string `nonam'.
  2016  * etags.el needs to use the same characters that are in NONAM.
  2017  */
  2018 static void
  2019 make_tag (const char *name,     /* tag name, or NULL if unnamed */
  2020           ptrdiff_t namelen,    /* tag length */
  2021           bool is_func,         /* tag is a function */
  2022           char *linestart,      /* start of the line where tag is */
  2023           ptrdiff_t linelen,    /* length of the line where tag is */
  2024           intmax_t lno,         /* line number */
  2025           intmax_t cno)         /* character number */
  2026 {
  2027   bool named = (name != NULL && namelen > 0);
  2028   char *nname = NULL;
  2029 
  2030   if (debug)
  2031     fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  2032              named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
  2033 
  2034   if (!CTAGS && named)          /* maybe set named to false */
  2035     /* Let's try to make an implicit tag name, that is, create an unnamed tag
  2036        such that etags.el can guess a name from it. */
  2037     {
  2038       ptrdiff_t i;
  2039       const char *cp = name;
  2040 
  2041       for (i = 0; i < namelen; i++)
  2042         if (notinname (*cp++))
  2043           break;
  2044       if (i == namelen)                         /* rule #1 */
  2045         {
  2046           cp = linestart + linelen - namelen;
  2047           if (notinname (linestart[linelen-1]))
  2048             cp -= 1;                            /* rule #4 */
  2049           if (cp >= linestart                   /* rule #2 */
  2050               && (cp == linestart
  2051                   || notinname (cp[-1]))        /* rule #3 */
  2052               && strneq (name, cp, namelen))    /* rule #2 */
  2053             named = false;      /* use implicit tag name */
  2054         }
  2055     }
  2056 
  2057   if (named)
  2058     nname = savenstr (name, namelen);
  2059 
  2060   pfnote (nname, is_func, linestart, linelen, lno, cno);
  2061 }
  2062 
  2063 /* Record a tag. */
  2064 static void
  2065 pfnote (char *name,             /* tag name, or NULL if unnamed */
  2066         bool is_func,           /* tag is a function */
  2067         char *linestart,        /* start of the line where tag is */
  2068         ptrdiff_t linelen,      /* length of the line where tag is */
  2069         intmax_t lno,           /* line number */
  2070         intmax_t cno)           /* character number */
  2071 
  2072 {
  2073   register node *np;
  2074 
  2075   if ((CTAGS && name == NULL)
  2076       /* We used to have an assertion here for the case below, but if we hit
  2077          that case, it just means our parser got confused, and there's nothing
  2078          to do about such empty "tags".  */
  2079       || (!CTAGS && name && name[0] == '\0'))
  2080     return;
  2081 
  2082   np = xnew (1, node);
  2083 
  2084   /* If ctags mode, change name "main" to M<thisfilename>. */
  2085   if (CTAGS && !cxref_style && streq (name, "main"))
  2086     {
  2087       char *fp = strrchr (curfdp->taggedfname, '/');
  2088       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
  2089       fp = strrchr (np->name, '.');
  2090       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
  2091         fp[0] = '\0';
  2092     }
  2093   else
  2094     np->name = name;
  2095   np->valid = true;
  2096   np->been_warned = false;
  2097   np->fdp = curfdp;
  2098   np->is_func = is_func;
  2099   np->lno = lno;
  2100   if (np->fdp->usecharno)
  2101     /* Our char numbers are 0-base, because of C language tradition?
  2102        ctags compatibility?  old versions compatibility?   I don't know.
  2103        Anyway, since emacs's are 1-base we expect etags.el to take care
  2104        of the difference.  If we wanted to have 1-based numbers, we would
  2105        uncomment the +1 below. */
  2106     np->cno = cno /* + 1 */ ;
  2107   else
  2108     np->cno = invalidcharno;
  2109   np->left = np->right = NULL;
  2110   if (CTAGS && !cxref_style)
  2111     {
  2112       if (strnlen (linestart, 50) < 50)
  2113         np->regex = concat (linestart, "$", "");
  2114       else
  2115         np->regex = savenstr (linestart, 50);
  2116     }
  2117   else
  2118     np->regex = savenstr (linestart, linelen);
  2119 
  2120   add_node (np, &nodehead);
  2121 }
  2122 
  2123 /*
  2124  * Utility functions and data to avoid recursion.
  2125  */
  2126 
  2127 typedef struct stack_entry {
  2128   node *np;
  2129   struct stack_entry *next;
  2130 } stkentry;
  2131 
  2132 static void
  2133 push_node (node *np, stkentry **stack_top)
  2134 {
  2135   if (np)
  2136     {
  2137       stkentry *new = xnew (1, stkentry);
  2138 
  2139       new->np = np;
  2140       new->next = *stack_top;
  2141       *stack_top = new;
  2142     }
  2143 }
  2144 
  2145 static node *
  2146 pop_node (stkentry **stack_top)
  2147 {
  2148   node *ret = NULL;
  2149 
  2150   if (*stack_top)
  2151     {
  2152       stkentry *old_start = *stack_top;
  2153 
  2154       ret = (*stack_top)->np;
  2155       *stack_top = (*stack_top)->next;
  2156       free (old_start);
  2157     }
  2158   return ret;
  2159 }
  2160 
  2161 /*
  2162  * free_tree ()
  2163  *      emulate recursion on left children, iterate on right children.
  2164  */
  2165 static void
  2166 free_tree (register node *np)
  2167 {
  2168   stkentry *stack = NULL;
  2169 
  2170   while (np)
  2171     {
  2172       /* Descent on left children.  */
  2173       while (np->left)
  2174         {
  2175           push_node (np, &stack);
  2176           np = np->left;
  2177         }
  2178       /* Free node without left children.  */
  2179       node *node_right = np->right;
  2180       free (np->name);
  2181       free (np->regex);
  2182       free (np);
  2183       if (!node_right)
  2184         {
  2185           /* Backtrack to find a node with right children, while freeing nodes
  2186              that don't have right children.  */
  2187           while (node_right == NULL && (np = pop_node (&stack)) != NULL)
  2188             {
  2189               node_right = np->right;
  2190               free (np->name);
  2191               free (np->regex);
  2192               free (np);
  2193             }
  2194         }
  2195       /* Free right children.  */
  2196       np = node_right;
  2197     }
  2198 }
  2199 
  2200 /*
  2201  * free_fdesc ()
  2202  *      delete a file description
  2203  */
  2204 static void
  2205 free_fdesc (register fdesc *fdp)
  2206 {
  2207   free (fdp->infname);
  2208   free (fdp->infabsname);
  2209   free (fdp->infabsdir);
  2210   free (fdp->taggedfname);
  2211   free (fdp->prop);
  2212   free (fdp);
  2213 }
  2214 
  2215 /*
  2216  * add_node ()
  2217  *      Adds a node to the tree of nodes.  In etags mode, sort by file
  2218  *      name.  In ctags mode, sort by tag name.  Make no attempt at
  2219  *      balancing.
  2220  *
  2221  *      add_node is the only function allowed to add nodes, so it can
  2222  *      maintain state.
  2223  */
  2224 static void
  2225 add_node (node *np, node **cur_node_p)
  2226 {
  2227   node *cur_node = *cur_node_p;
  2228 
  2229   /* Make the first node.  */
  2230   if (cur_node == NULL)
  2231     {
  2232       *cur_node_p = np;
  2233       last_node = np;
  2234       return;
  2235     }
  2236 
  2237   if (!CTAGS)
  2238     /* Etags Mode */
  2239     {
  2240       /* For each file name, tags are in a linked sublist on the right
  2241          pointer.  The first tags of different files are a linked list
  2242          on the left pointer.  last_node points to the end of the last
  2243          used sublist. */
  2244       if (last_node != NULL && last_node->fdp == np->fdp)
  2245         {
  2246           /* Let's use the same sublist as the last added node. */
  2247           assert (last_node->right == NULL);
  2248           last_node->right = np;
  2249           last_node = np;
  2250         }
  2251       else
  2252         {
  2253            while (cur_node->fdp != np->fdp)
  2254              {
  2255                if (cur_node->left == NULL)
  2256                  break;
  2257                /* The head of this sublist is not good for us.  Let's try the
  2258                   next one. */
  2259                cur_node = cur_node->left;
  2260              }
  2261            if (cur_node->left)
  2262              {
  2263                /* Scanning the list we found the head of a sublist which is
  2264                   good for us.  Let's scan this sublist. */
  2265                if (cur_node->right)
  2266                  {
  2267                    cur_node = cur_node->right;
  2268                    while (cur_node->right)
  2269                      cur_node = cur_node->right;
  2270                  }
  2271                /* Make a new node in this sublist.  */
  2272                cur_node->right = np;
  2273              }
  2274            else
  2275              {
  2276                /* Make a new sublist.  */
  2277                cur_node->left = np;
  2278              }
  2279            last_node = np;
  2280         }
  2281     } /* if ETAGS mode */
  2282   else
  2283     {
  2284       /* Ctags Mode */
  2285       node **next_node = &cur_node;
  2286 
  2287       while ((cur_node = *next_node) != NULL)
  2288         {
  2289           int dif = strcmp (np->name, cur_node->name);
  2290           /*
  2291            * If this tag name matches an existing one, then
  2292            * do not add the node, but maybe print a warning.
  2293            */
  2294           if (!dif && no_duplicates)
  2295             {
  2296               if (np->fdp == cur_node->fdp)
  2297                 {
  2298                   if (!no_warnings)
  2299                     {
  2300                       fprintf (stderr,
  2301                                ("Duplicate entry in file %s, "
  2302                                 "line %"PRIdMAX": %s\n"),
  2303                                np->fdp->infname, lineno, np->name);
  2304                       fprintf (stderr, "Second entry ignored\n");
  2305                     }
  2306                 }
  2307               else if (!cur_node->been_warned && !no_warnings)
  2308                 {
  2309                   fprintf
  2310                     (stderr,
  2311                      "Duplicate entry in files %s and %s: %s (Warning only)\n",
  2312                      np->fdp->infname, cur_node->fdp->infname, np->name);
  2313                   cur_node->been_warned = true;
  2314                 }
  2315               return;
  2316             }
  2317           else
  2318             next_node = dif < 0 ? &cur_node->left : &cur_node->right;
  2319         }
  2320       *next_node = np;
  2321       last_node = np;
  2322     } /* if CTAGS mode */
  2323 }
  2324 
  2325 /*
  2326  * invalidate_nodes ()
  2327  *      Scan the node tree and invalidate all nodes pointing to the
  2328  *      given file description (CTAGS case) or free them (ETAGS case).
  2329  */
  2330 static void
  2331 invalidate_nodes (fdesc *badfdp, node **npp)
  2332 {
  2333   node *np = *npp;
  2334   stkentry *stack = NULL;
  2335 
  2336   if (CTAGS)
  2337     {
  2338       while (np)
  2339         {
  2340           /* Push all the left children on the stack.  */
  2341           while (np->left != NULL)
  2342             {
  2343               push_node (np, &stack);
  2344               np = np->left;
  2345             }
  2346           /* Invalidate this node.  */
  2347           if (np->fdp == badfdp)
  2348             np->valid = false;
  2349           if (!np->right)
  2350             {
  2351               /* Pop nodes from stack, invalidating them, until we find one
  2352                  with a right child.  */
  2353               while ((np = pop_node (&stack)) != NULL)
  2354                 {
  2355                   if (np->fdp == badfdp)
  2356                     np->valid = false;
  2357                   if (np->right != NULL)
  2358                     break;
  2359                 }
  2360             }
  2361           /* Process the right child, if any.  */
  2362           if (np)
  2363             np = np->right;
  2364         }
  2365     }
  2366   else
  2367     {
  2368       node super_root, *np_parent = NULL;
  2369 
  2370       super_root.left = np;
  2371       super_root.fdp = (fdesc *) -1;
  2372       np = &super_root;
  2373 
  2374       while (np)
  2375         {
  2376           /* Descent on left children until node with BADFP.  */
  2377           while (np && np->fdp != badfdp)
  2378             {
  2379               assert (np->fdp != NULL);
  2380               np_parent = np;
  2381               np = np->left;
  2382             }
  2383           if (np)
  2384             {
  2385               np_parent->left = np->left; /* detach subtree from the tree */
  2386               np->left = NULL;            /* isolate it */
  2387               free_tree (np);             /* free it */
  2388 
  2389               /* Continue with rest of tree.  */
  2390               np = np_parent->left;
  2391             }
  2392         }
  2393       *npp = super_root.left;
  2394     }
  2395 }
  2396 
  2397 
  2398 static ptrdiff_t total_size_of_entries (node *);
  2399 static int number_len (intmax_t) ATTRIBUTE_CONST;
  2400 
  2401 /* Length of a non-negative number's decimal representation. */
  2402 static int
  2403 number_len (intmax_t num)
  2404 {
  2405   int len = 1;
  2406   while ((num /= 10) > 0)
  2407     len += 1;
  2408   return len;
  2409 }
  2410 
  2411 /*
  2412  * Return total number of characters that put_entries will output for
  2413  * the nodes in the linked list at the right of the specified node.
  2414  * This count is irrelevant with etags.el since emacs 19.34 at least,
  2415  * but is still supplied for backward compatibility.
  2416  */
  2417 static ptrdiff_t
  2418 total_size_of_entries (node *np)
  2419 {
  2420   ptrdiff_t total = 0;
  2421 
  2422   for (; np != NULL; np = np->right)
  2423     if (np->valid)
  2424       {
  2425         total += strlen (np->regex) + 1;                /* pat\177 */
  2426         if (np->name != NULL)
  2427           total += strlen (np->name) + 1;               /* name\001 */
  2428         total += number_len (np->lno) + 1;              /* lno, */
  2429         if (np->cno != invalidcharno)                   /* cno */
  2430           total += number_len (np->cno);
  2431         total += 1;                                     /* newline */
  2432       }
  2433 
  2434   return total;
  2435 }
  2436 
  2437 static void
  2438 put_entry (node *np)
  2439 {
  2440   register char *sp;
  2441   static fdesc *fdp = NULL;
  2442 
  2443   /* Output this entry */
  2444   if (np->valid)
  2445     {
  2446       if (!CTAGS)
  2447         {
  2448           /* Etags mode */
  2449           if (fdp != np->fdp)
  2450             {
  2451               fdp = np->fdp;
  2452               fprintf (tagf, "\f\n%s,%"PRIdPTR"\n",
  2453                        fdp->taggedfname, total_size_of_entries (np));
  2454               fdp->written = true;
  2455             }
  2456           fputs (np->regex, tagf);
  2457           fputc ('\177', tagf);
  2458           if (np->name != NULL)
  2459             {
  2460               fputs (np->name, tagf);
  2461               fputc ('\001', tagf);
  2462             }
  2463           fprintf (tagf, "%"PRIdMAX",", np->lno);
  2464           if (np->cno != invalidcharno)
  2465             fprintf (tagf, "%"PRIdMAX, np->cno);
  2466           fputs ("\n", tagf);
  2467         }
  2468       else
  2469         {
  2470           /* Ctags mode */
  2471           if (np->name == NULL)
  2472             error ("internal error: NULL name in ctags mode.");
  2473 
  2474           if (cxref_style)
  2475             {
  2476               if (vgrind_style)
  2477                 fprintf (stdout, "%s %s %"PRIdMAX"\n",
  2478                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
  2479               else
  2480                 fprintf (stdout, "%-16s %3"PRIdMAX" %-16s %s\n",
  2481                          np->name, np->lno, np->fdp->taggedfname, np->regex);
  2482             }
  2483           else
  2484             {
  2485               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
  2486 
  2487               if (np->is_func)
  2488                 {               /* function or #define macro with args */
  2489                   putc (searchar, tagf);
  2490                   putc ('^', tagf);
  2491 
  2492                   for (sp = np->regex; *sp; sp++)
  2493                     {
  2494                       if (*sp == '\\' || *sp == searchar)
  2495                         putc ('\\', tagf);
  2496                       putc (*sp, tagf);
  2497                     }
  2498                   putc (searchar, tagf);
  2499                 }
  2500               else
  2501                 {               /* anything else; text pattern inadequate */
  2502                   fprintf (tagf, "%"PRIdMAX, np->lno);
  2503                 }
  2504               putc ('\n', tagf);
  2505             }
  2506         }
  2507     } /* if this node contains a valid tag */
  2508 }
  2509 
  2510 static void
  2511 put_entries (node *np)
  2512 {
  2513   stkentry *stack = NULL;
  2514 
  2515   if (np == NULL)
  2516     return;
  2517 
  2518   if (CTAGS)
  2519     {
  2520       while (np)
  2521         {
  2522           /* Stack subentries that precede this one.  */
  2523           while (np->left)
  2524             {
  2525               push_node (np, &stack);
  2526               np = np->left;
  2527             }
  2528           /* Output this subentry.  */
  2529           put_entry (np);
  2530           /* Stack subentries that follow this one.  */
  2531           while (!np->right)
  2532             {
  2533               /* Output subentries that precede the next one.  */
  2534               np = pop_node (&stack);
  2535               if (!np)
  2536                 break;
  2537               put_entry (np);
  2538             }
  2539           if (np)
  2540             np = np->right;
  2541         }
  2542     }
  2543   else
  2544     {
  2545       push_node (np, &stack);
  2546       while ((np = pop_node (&stack)) != NULL)
  2547         {
  2548           /* Output this subentry.  */
  2549           put_entry (np);
  2550           while (np->right)
  2551             {
  2552               /* Output subentries that follow this one.  */
  2553               put_entry (np->right);
  2554               /* Stack subentries from the following files.  */
  2555               push_node (np->left, &stack);
  2556               np = np->right;
  2557             }
  2558           push_node (np->left, &stack);
  2559         }
  2560     }
  2561 }
  2562 
  2563 
  2564 /* C extensions. */
  2565 #define C_EXT   0x00fff         /* C extensions */
  2566 #define C_PLAIN 0x00000         /* C */
  2567 #define C_PLPL  0x00001         /* C++ */
  2568 #define C_STAR  0x00003         /* C* */
  2569 #define C_JAVA  0x00005         /* JAVA */
  2570 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
  2571 #define YACC    0x10000         /* yacc file */
  2572 
  2573 /*
  2574  * The C symbol tables.
  2575  */
  2576 enum sym_type
  2577 {
  2578   st_none,
  2579   st_C_objprot, st_C_objimpl, st_C_objend,
  2580   st_C_gnumacro,
  2581   st_C_ignore, st_C_attribute, st_C_enum_bf,
  2582   st_C_javastruct,
  2583   st_C_operator,
  2584   st_C_class, st_C_template,
  2585   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
  2586 };
  2587 
  2588 /* Feed stuff between (but not including) %[ and %] lines to:
  2589      gperf -m 5
  2590 %[
  2591 %compare-strncmp
  2592 %enum
  2593 %struct-type
  2594 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
  2595 %%
  2596 if,             0,                      st_C_ignore
  2597 for,            0,                      st_C_ignore
  2598 while,          0,                      st_C_ignore
  2599 switch,         0,                      st_C_ignore
  2600 return,         0,                      st_C_ignore
  2601 __attribute__,  0,                      st_C_attribute
  2602 GTY,            0,                      st_C_attribute
  2603 @interface,     0,                      st_C_objprot
  2604 @protocol,      0,                      st_C_objprot
  2605 @implementation,0,                      st_C_objimpl
  2606 @end,           0,                      st_C_objend
  2607 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
  2608 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
  2609 friend,         C_PLPL,                 st_C_ignore
  2610 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
  2611 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
  2612 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
  2613 class,          0,                      st_C_class
  2614 namespace,      C_PLPL,                 st_C_struct
  2615 domain,         C_STAR,                 st_C_struct
  2616 union,          0,                      st_C_struct
  2617 struct,         0,                      st_C_struct
  2618 extern,         0,                      st_C_extern
  2619 enum,           0,                      st_C_enum
  2620 typedef,        0,                      st_C_typedef
  2621 define,         0,                      st_C_define
  2622 undef,          0,                      st_C_define
  2623 operator,       C_PLPL,                 st_C_operator
  2624 template,       0,                      st_C_template
  2625 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
  2626 DEFUN,          0,                      st_C_gnumacro
  2627 SYSCALL,        0,                      st_C_gnumacro
  2628 ENTRY,          0,                      st_C_gnumacro
  2629 PSEUDO,         0,                      st_C_gnumacro
  2630 ENUM_BF,        0,                      st_C_enum_bf
  2631 # These are defined inside C functions, so currently they are not met.
  2632 # EXFUN used in glibc, DEFVAR_* in emacs.
  2633 #EXFUN,         0,                      st_C_gnumacro
  2634 #DEFVAR_,       0,                      st_C_gnumacro
  2635 %]
  2636 and replace lines between %< and %> with its output, then:
  2637  - remove the #if characterset check
  2638  - remove any #line directives
  2639  - make in_word_set static and not inline
  2640  - remove any 'register' qualifications from variable decls. */
  2641 /*%<*/
  2642 /* C code produced by gperf version 3.0.1 */
  2643 /* Command-line: gperf -m 5 */
  2644 /* Computed positions: -k'2-3' */
  2645 
  2646 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
  2647 /* maximum key range = 34, duplicates = 0 */
  2648 
  2649 static int
  2650 hash (const char *str, int len)
  2651 {
  2652   static char const asso_values[] =
  2653     {
  2654       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2655       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2656       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2657       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2658       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2659       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2660       36, 36, 36, 36, 36, 36, 36, 36, 36,  3,
  2661       27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
  2662       36, 36, 36, 25,  0,  0, 36, 36, 36,  0,
  2663       36, 36, 36, 36, 36,  1, 36, 16, 36,  6,
  2664       23,  0,  0, 36, 22,  0, 36, 36,  5,  0,
  2665        0, 15,  1, 36,  6, 36,  8, 19, 36, 16,
  2666        4,  5, 36, 36, 36, 36, 36, 36, 36, 36,
  2667       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2668       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2669       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2670       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2671       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2672       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2673       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2674       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2675       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2676       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2677       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2678       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2679       36, 36, 36, 36, 36, 36
  2680     };
  2681   int hval = len;
  2682 
  2683   switch (hval)
  2684     {
  2685       default:
  2686         hval += asso_values[(unsigned char) str[2]];
  2687         FALLTHROUGH;
  2688       case 2:
  2689         hval += asso_values[(unsigned char) str[1]];
  2690         break;
  2691     }
  2692   return hval;
  2693 }
  2694 
  2695 static struct C_stab_entry *
  2696 in_word_set (const char *str, ptrdiff_t len)
  2697 {
  2698   enum
  2699     {
  2700       TOTAL_KEYWORDS = 34,
  2701       MIN_WORD_LENGTH = 2,
  2702       MAX_WORD_LENGTH = 15,
  2703       MIN_HASH_VALUE = 2,
  2704       MAX_HASH_VALUE = 35
  2705     };
  2706 
  2707   static struct C_stab_entry wordlist[] =
  2708     {
  2709       {""}, {""},
  2710       {"if",            0,                      st_C_ignore},
  2711       {"GTY",           0,                      st_C_attribute},
  2712       {"@end",          0,                      st_C_objend},
  2713       {"union",         0,                      st_C_struct},
  2714       {"define",                0,                      st_C_define},
  2715       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
  2716       {"template",      0,                      st_C_template},
  2717       {"operator",      C_PLPL,                 st_C_operator},
  2718       {"@interface",    0,                      st_C_objprot},
  2719       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
  2720       {"friend",                C_PLPL,                 st_C_ignore},
  2721       {"typedef",       0,                      st_C_typedef},
  2722       {"return",                0,                      st_C_ignore},
  2723       {"@implementation",0,                     st_C_objimpl},
  2724       {"@protocol",     0,                      st_C_objprot},
  2725       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
  2726       {"extern",                0,                      st_C_extern},
  2727       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
  2728       {"struct",                0,                      st_C_struct},
  2729       {"domain",                C_STAR,                 st_C_struct},
  2730       {"switch",                0,                      st_C_ignore},
  2731       {"enum",          0,                      st_C_enum},
  2732       {"for",           0,                      st_C_ignore},
  2733       {"namespace",     C_PLPL,                 st_C_struct},
  2734       {"class",         0,                      st_C_class},
  2735       {"while",         0,                      st_C_ignore},
  2736       {"undef",         0,                      st_C_define},
  2737       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
  2738       {"__attribute__", 0,                      st_C_attribute},
  2739       {"ENTRY",         0,                      st_C_gnumacro},
  2740       {"SYSCALL",       0,                      st_C_gnumacro},
  2741       {"ENUM_BF",       0,                      st_C_enum_bf},
  2742       {"PSEUDO",                0,                      st_C_gnumacro},
  2743       {"DEFUN",         0,                      st_C_gnumacro}
  2744     };
  2745 
  2746   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
  2747     {
  2748       int key = hash (str, len);
  2749 
  2750       if (key <= MAX_HASH_VALUE && key >= 0)
  2751         {
  2752           const char *s = wordlist[key].name;
  2753 
  2754           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
  2755             return &wordlist[key];
  2756         }
  2757     }
  2758   return 0;
  2759 }
  2760 /*%>*/
  2761 
  2762 static enum sym_type
  2763 C_symtype (char *str, ptrdiff_t len, int c_ext)
  2764 {
  2765   struct C_stab_entry *se = in_word_set (str, len);
  2766 
  2767   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
  2768     return st_none;
  2769   return se->type;
  2770 }
  2771 
  2772 
  2773 /*
  2774  * Ignoring __attribute__ ((list))
  2775  */
  2776 static bool inattribute;        /* looking at an __attribute__ construct */
  2777 
  2778 /* Ignoring ENUM_BF (type)
  2779  *
  2780  */
  2781 static bool in_enum_bf;         /* inside parentheses following ENUM_BF */
  2782 
  2783 /*
  2784  * C functions and variables are recognized using a simple
  2785  * finite automaton.  fvdef is its state variable.
  2786  */
  2787 static enum
  2788 {
  2789   fvnone,                       /* nothing seen */
  2790   fdefunkey,                    /* Emacs DEFUN keyword seen */
  2791   fdefunname,                   /* Emacs DEFUN name seen */
  2792   foperator,                    /* func: operator keyword seen (cplpl) */
  2793   fvnameseen,                   /* function or variable name seen */
  2794   fstartlist,                   /* func: just after open parenthesis */
  2795   finlist,                      /* func: in parameter list */
  2796   flistseen,                    /* func: after parameter list */
  2797   fignore,                      /* func: before open brace */
  2798   vignore                       /* var-like: ignore until ';' */
  2799 } fvdef;
  2800 
  2801 static bool fvextern;           /* func or var: extern keyword seen; */
  2802 
  2803 /*
  2804  * typedefs are recognized using a simple finite automaton.
  2805  * typdef is its state variable.
  2806  */
  2807 static enum
  2808 {
  2809   tnone,                        /* nothing seen */
  2810   tkeyseen,                     /* typedef keyword seen */
  2811   ttypeseen,                    /* defined type seen */
  2812   tinbody,                      /* inside typedef body */
  2813   tend,                         /* just before typedef tag */
  2814   tignore                       /* junk after typedef tag */
  2815 } typdef;
  2816 
  2817 /*
  2818  * struct-like structures (enum, struct and union) are recognized
  2819  * using another simple finite automaton.  `structdef' is its state
  2820  * variable.
  2821  */
  2822 static enum
  2823 {
  2824   snone,                        /* nothing seen yet,
  2825                                    or in struct body if bracelev > 0 */
  2826   skeyseen,                     /* struct-like keyword seen */
  2827   stagseen,                     /* struct-like tag seen */
  2828   scolonseen                    /* colon seen after struct-like tag */
  2829 } structdef;
  2830 
  2831 /*
  2832  * When objdef is different from onone, objtag is the name of the class.
  2833  */
  2834 static const char *objtag = "<uninited>";
  2835 
  2836 /*
  2837  * Yet another little state machine to deal with preprocessor lines.
  2838  */
  2839 static enum
  2840 {
  2841   dnone,                        /* nothing seen */
  2842   dsharpseen,                   /* '#' seen as first char on line */
  2843   ddefineseen,                  /* '#' and 'define' seen */
  2844   dignorerest                   /* ignore rest of line */
  2845 } definedef;
  2846 
  2847 /*
  2848  * State machine for Objective C protocols and implementations.
  2849  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
  2850  */
  2851 static enum
  2852 {
  2853   onone,                        /* nothing seen */
  2854   oprotocol,                    /* @interface or @protocol seen */
  2855   oimplementation,              /* @implementations seen */
  2856   otagseen,                     /* class name seen */
  2857   oparenseen,                   /* parenthesis before category seen */
  2858   ocatseen,                     /* category name seen */
  2859   oinbody,                      /* in @implementation body */
  2860   omethodsign,                  /* in @implementation body, after +/- */
  2861   omethodtag,                   /* after method name */
  2862   omethodcolon,                 /* after method colon */
  2863   omethodparm,                  /* after method parameter */
  2864   oignore                       /* wait for @end */
  2865 } objdef;
  2866 
  2867 
  2868 /*
  2869  * Use this structure to keep info about the token read, and how it
  2870  * should be tagged.  Used by the make_C_tag function to build a tag.
  2871  */
  2872 static struct tok
  2873 {
  2874   char *line;                   /* string containing the token */
  2875   ptrdiff_t offset;             /* where the token starts in LINE */
  2876   ptrdiff_t length;             /* token length */
  2877   /*
  2878     The previous members can be used to pass strings around for generic
  2879     purposes.  The following ones specifically refer to creating tags.  In this
  2880     case the token contained here is the pattern that will be used to create a
  2881     tag.
  2882   */
  2883   bool valid;                   /* do not create a tag; the token should be
  2884                                    invalidated whenever a state machine is
  2885                                    reset prematurely */
  2886   bool named;                   /* create a named tag */
  2887   intmax_t lineno;              /* source line number of tag */
  2888   intmax_t linepos;             /* source char number of tag */
  2889 } token;                        /* latest token read */
  2890 
  2891 /*
  2892  * Variables and functions for dealing with nested structures.
  2893  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
  2894  */
  2895 static void pushclass_above (ptrdiff_t, char *, ptrdiff_t);
  2896 static void popclass_above (ptrdiff_t);
  2897 static void write_classname (linebuffer *, const char *qualifier);
  2898 
  2899 static struct {
  2900   char **cname;                 /* nested class names */
  2901   ptrdiff_t *bracelev;          /* nested class brace level */
  2902   ptrdiff_t nl;                 /* class nesting level (elements used) */
  2903   ptrdiff_t size;               /* length of the array */
  2904 } cstack;                       /* stack for nested declaration tags */
  2905 /* Current struct nesting depth (namespace, class, struct, union, enum). */
  2906 #define nestlev         (cstack.nl)
  2907 /* After struct keyword or in struct body, not inside a nested function. */
  2908 #define instruct        (structdef == snone && nestlev > 0                      \
  2909                          && bracelev == cstack.bracelev[nestlev-1] + 1)
  2910 
  2911 static void
  2912 pushclass_above (ptrdiff_t bracelev, char *str, ptrdiff_t len)
  2913 {
  2914   ptrdiff_t nl;
  2915 
  2916   popclass_above (bracelev);
  2917   nl = cstack.nl;
  2918   if (nl >= cstack.size)
  2919     {
  2920       xrnew (cstack.cname, cstack.size, 2);
  2921       xrnew (cstack.bracelev, cstack.size, 2);
  2922       cstack.size *= 2;
  2923     }
  2924   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
  2925   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
  2926   cstack.bracelev[nl] = bracelev;
  2927   cstack.nl = nl + 1;
  2928 }
  2929 
  2930 static void
  2931 popclass_above (ptrdiff_t bracelev)
  2932 {
  2933   for (ptrdiff_t nl = cstack.nl - 1;
  2934        nl >= 0 && cstack.bracelev[nl] >= bracelev;
  2935        nl--)
  2936     {
  2937       free (cstack.cname[nl]);
  2938       cstack.nl = nl;
  2939     }
  2940 }
  2941 
  2942 static void
  2943 write_classname (linebuffer *cn, const char *qualifier)
  2944 {
  2945   ptrdiff_t len;
  2946 
  2947   if (cstack.nl == 0 || cstack.cname[0] == NULL)
  2948     {
  2949       len = 0;
  2950       cn->len = 0;
  2951       cn->buffer[0] = '\0';
  2952     }
  2953   else
  2954     {
  2955       len = strlen (cstack.cname[0]);
  2956       linebuffer_setlen (cn, len);
  2957       strcpy (cn->buffer, cstack.cname[0]);
  2958     }
  2959   for (ptrdiff_t i = 1; i < cstack.nl; i++)
  2960     {
  2961       char *s = cstack.cname[i];
  2962       if (s == NULL)
  2963         continue;
  2964       int qlen = strlen (qualifier);
  2965       ptrdiff_t slen = strlen (s);
  2966       linebuffer_setlen (cn, len + qlen + slen);
  2967       memcpyz (stpcpy (cn->buffer + len, qualifier), s, slen);
  2968       len += qlen + slen;
  2969     }
  2970 }
  2971 
  2972 
  2973 static bool consider_token (char *, ptrdiff_t, int, int *,
  2974                             ptrdiff_t, ptrdiff_t, bool *);
  2975 static void make_C_tag (bool);
  2976 
  2977 /*
  2978  * consider_token ()
  2979  *      checks to see if the current token is at the start of a
  2980  *      function or variable, or corresponds to a typedef, or
  2981  *      is a struct/union/enum tag, or #define, or an enum constant.
  2982  *
  2983  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
  2984  *      with args.  C_EXTP points to which language we are looking at.
  2985  *
  2986  * Globals
  2987  *      fvdef                   IN OUT
  2988  *      structdef               IN OUT
  2989  *      definedef               IN OUT
  2990  *      typdef                  IN OUT
  2991  *      objdef                  IN OUT
  2992  */
  2993 
  2994 static bool
  2995 consider_token (char *str,            /* IN: token pointer */
  2996                 ptrdiff_t len,        /* IN: token length */
  2997                 int c,                /* IN: first char after the token */
  2998                 int *c_extp,          /* IN, OUT: C extensions mask */
  2999                 ptrdiff_t bracelev,   /* IN: brace level */
  3000                 ptrdiff_t parlev,     /* IN: parenthesis level */
  3001                 bool *is_func_or_var) /* OUT: function or variable found */
  3002 {
  3003   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
  3004      structtype is the type of the preceding struct-like keyword, and
  3005      structbracelev is the brace level where it has been seen. */
  3006   static enum sym_type structtype;
  3007   static ptrdiff_t structbracelev;
  3008   static enum sym_type toktype;
  3009 
  3010 
  3011   toktype = C_symtype (str, len, *c_extp);
  3012 
  3013   /*
  3014    * Skip __attribute__
  3015    */
  3016   if (toktype == st_C_attribute)
  3017     {
  3018       inattribute = true;
  3019       return false;
  3020      }
  3021 
  3022   /*
  3023    * Skip ENUM_BF
  3024    */
  3025   if (toktype == st_C_enum_bf && definedef == dnone)
  3026     {
  3027       in_enum_bf = true;
  3028       return false;
  3029     }
  3030 
  3031    /*
  3032     * Advance the definedef state machine.
  3033     */
  3034    switch (definedef)
  3035      {
  3036      case dnone:
  3037        /* We're not on a preprocessor line. */
  3038        if (toktype == st_C_gnumacro)
  3039          {
  3040            fvdef = fdefunkey;
  3041            return false;
  3042          }
  3043        break;
  3044      case dsharpseen:
  3045        if (toktype == st_C_define)
  3046          {
  3047            definedef = ddefineseen;
  3048          }
  3049        else
  3050          {
  3051            definedef = dignorerest;
  3052          }
  3053        return false;
  3054      case ddefineseen:
  3055        /*
  3056         * Make a tag for any macro, unless it is a constant
  3057         * and constantypedefs is false.
  3058         */
  3059        definedef = dignorerest;
  3060        *is_func_or_var = (c == '(');
  3061        if (!*is_func_or_var && !constantypedefs)
  3062          return false;
  3063        else
  3064          return true;
  3065      case dignorerest:
  3066        return false;
  3067      default:
  3068        error ("internal error: definedef value.");
  3069      }
  3070 
  3071    /*
  3072     * Now typedefs
  3073     */
  3074    switch (typdef)
  3075      {
  3076      case tnone:
  3077        if (toktype == st_C_typedef)
  3078          {
  3079            if (typedefs)
  3080              typdef = tkeyseen;
  3081            fvextern = false;
  3082            fvdef = fvnone;
  3083            return false;
  3084          }
  3085        break;
  3086      case tkeyseen:
  3087        switch (toktype)
  3088          {
  3089          case st_none:
  3090          case st_C_class:
  3091          case st_C_struct:
  3092          case st_C_enum:
  3093            typdef = ttypeseen;
  3094            break;
  3095          default:
  3096            break;
  3097          }
  3098        break;
  3099      case ttypeseen:
  3100        if (structdef == snone && fvdef == fvnone)
  3101          {
  3102            fvdef = fvnameseen;
  3103            return true;
  3104          }
  3105        break;
  3106      case tend:
  3107        switch (toktype)
  3108          {
  3109          case st_C_class:
  3110          case st_C_struct:
  3111          case st_C_enum:
  3112            return false;
  3113          default:
  3114            return true;
  3115          }
  3116      default:
  3117        break;
  3118      }
  3119 
  3120    switch (toktype)
  3121      {
  3122      case st_C_javastruct:
  3123        if (structdef == stagseen)
  3124          structdef = scolonseen;
  3125        return false;
  3126      case st_C_template:
  3127      case st_C_class:
  3128        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
  3129            && bracelev == 0
  3130            && definedef == dnone && structdef == snone
  3131            && typdef == tnone && fvdef == fvnone)
  3132          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
  3133        if (toktype == st_C_template)
  3134          break;
  3135        FALLTHROUGH;
  3136      case st_C_struct:
  3137      case st_C_enum:
  3138        if (parlev == 0
  3139            && fvdef != vignore
  3140            && (typdef == tkeyseen
  3141                || (typedefs_or_cplusplus && structdef == snone)))
  3142          {
  3143            structdef = skeyseen;
  3144            structtype = toktype;
  3145            structbracelev = bracelev;
  3146            if (fvdef == fvnameseen)
  3147              fvdef = fvnone;
  3148          }
  3149        return false;
  3150      default:
  3151        break;
  3152      }
  3153 
  3154    if (structdef == skeyseen)
  3155      {
  3156        structdef = stagseen;
  3157        return true;
  3158      }
  3159 
  3160    if (typdef != tnone)
  3161      definedef = dnone;
  3162 
  3163    /* Detect Objective C constructs. */
  3164    switch (objdef)
  3165      {
  3166      case onone:
  3167        switch (toktype)
  3168          {
  3169          case st_C_objprot:
  3170            objdef = oprotocol;
  3171            return false;
  3172          case st_C_objimpl:
  3173            objdef = oimplementation;
  3174            return false;
  3175          default:
  3176            break;
  3177          }
  3178        break;
  3179      case oimplementation:
  3180        /* Save the class tag for functions or variables defined inside. */
  3181        objtag = savenstr (str, len);
  3182        objdef = oinbody;
  3183        return false;
  3184      case oprotocol:
  3185        /* Save the class tag for categories. */
  3186        objtag = savenstr (str, len);
  3187        objdef = otagseen;
  3188        *is_func_or_var = true;
  3189        return true;
  3190      case oparenseen:
  3191        objdef = ocatseen;
  3192        *is_func_or_var = true;
  3193        return true;
  3194      case oinbody:
  3195        break;
  3196      case omethodsign:
  3197        if (parlev == 0)
  3198          {
  3199            fvdef = fvnone;
  3200            objdef = omethodtag;
  3201            linebuffer_setlen (&token_name, len);
  3202            memcpyz (token_name.buffer, str, len);
  3203            return true;
  3204          }
  3205        return false;
  3206      case omethodcolon:
  3207        if (parlev == 0)
  3208          objdef = omethodparm;
  3209        return false;
  3210      case omethodparm:
  3211        if (parlev == 0)
  3212          {
  3213            objdef = omethodtag;
  3214            if (class_qualify)
  3215              {
  3216                ptrdiff_t oldlen = token_name.len;
  3217                fvdef = fvnone;
  3218                linebuffer_setlen (&token_name, oldlen + len);
  3219                memcpyz (token_name.buffer + oldlen, str, len);
  3220              }
  3221            return true;
  3222          }
  3223        return false;
  3224      case oignore:
  3225        if (toktype == st_C_objend)
  3226          {
  3227            /* Memory leakage here: the string pointed by objtag is
  3228               never released, because many tests would be needed to
  3229               avoid breaking on incorrect input code.  The amount of
  3230               memory leaked here is the sum of the lengths of the
  3231               class tags.
  3232            free (objtag); */
  3233            objdef = onone;
  3234          }
  3235        return false;
  3236      default:
  3237        break;
  3238      }
  3239 
  3240    /* A function, variable or enum constant? */
  3241    switch (toktype)
  3242      {
  3243      case st_C_extern:
  3244        fvextern = true;
  3245        switch  (fvdef)
  3246          {
  3247          case finlist:
  3248          case flistseen:
  3249          case fignore:
  3250          case vignore:
  3251            break;
  3252          default:
  3253            fvdef = fvnone;
  3254          }
  3255        return false;
  3256      case st_C_ignore:
  3257        fvextern = false;
  3258        fvdef = vignore;
  3259        return false;
  3260      case st_C_operator:
  3261        fvdef = foperator;
  3262        *is_func_or_var = true;
  3263        return true;
  3264      case st_none:
  3265        if (constantypedefs
  3266            && structdef == snone
  3267            && structtype == st_C_enum && bracelev > structbracelev
  3268            /* Don't tag tokens in expressions that assign values to enum
  3269               constants.  */
  3270            && fvdef != vignore)
  3271          return true;           /* enum constant */
  3272        switch (fvdef)
  3273          {
  3274          case fdefunkey:
  3275            if (bracelev > 0)
  3276              break;
  3277            fvdef = fdefunname;  /* GNU macro */
  3278            *is_func_or_var = true;
  3279            return true;
  3280          case fvnone:
  3281            switch (typdef)
  3282              {
  3283              case ttypeseen:
  3284                return false;
  3285              case tnone:
  3286                if ((strneq (str, "asm", 3) && endtoken (str[3]))
  3287                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
  3288                  {
  3289                    fvdef = vignore;
  3290                    return false;
  3291                  }
  3292                break;
  3293              default:
  3294                break;
  3295              }
  3296            FALLTHROUGH;
  3297           case fvnameseen:
  3298           if (len >= 10 && strneq (str+len-10, "::operator", 10))
  3299             {
  3300               if (*c_extp & C_AUTO) /* automatic detection of C++ */
  3301                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
  3302               fvdef = foperator;
  3303               *is_func_or_var = true;
  3304               return true;
  3305             }
  3306           if (bracelev > 0 && !instruct)
  3307             break;
  3308           fvdef = fvnameseen;   /* function or variable */
  3309           *is_func_or_var = true;
  3310           return true;
  3311          default:
  3312            break;
  3313         }
  3314       break;
  3315      default:
  3316        break;
  3317     }
  3318 
  3319   return false;
  3320 }
  3321 
  3322 
  3323 /*
  3324  * C_entries often keeps pointers to tokens or lines which are older than
  3325  * the line currently read.  By keeping two line buffers, and switching
  3326  * them at end of line, it is possible to use those pointers.
  3327  */
  3328 static struct
  3329 {
  3330   intmax_t linepos;
  3331   linebuffer lb;
  3332 } lbs[2];
  3333 
  3334 #define current_lb_is_new (newndx == curndx)
  3335 #define switch_line_buffers() (curndx = 1 - curndx)
  3336 
  3337 #define curlb (lbs[curndx].lb)
  3338 #define newlb (lbs[newndx].lb)
  3339 #define curlinepos (lbs[curndx].linepos)
  3340 #define newlinepos (lbs[newndx].linepos)
  3341 
  3342 #define plainc ((c_ext & C_EXT) == C_PLAIN)
  3343 #define cplpl (c_ext & C_PLPL)
  3344 #define cjava ((c_ext & C_JAVA) == C_JAVA)
  3345 
  3346 #define CNL_SAVE_DEFINEDEF()                                            \
  3347 do {                                                                    \
  3348   curlinepos = charno;                                                  \
  3349   readline (&curlb, inf);                                               \
  3350   lp = curlb.buffer;                                                    \
  3351   quotednl = false;                                                     \
  3352   newndx = curndx;                                                      \
  3353 } while (0)
  3354 
  3355 #define CNL()                                                           \
  3356 do {                                                                    \
  3357   CNL_SAVE_DEFINEDEF ();                                                \
  3358   if (savetoken.valid)                                                  \
  3359     {                                                                   \
  3360       token = savetoken;                                                \
  3361       savetoken.valid = false;                                          \
  3362     }                                                                   \
  3363   definedef = dnone;                                                    \
  3364 } while (0)
  3365 
  3366 
  3367 static void
  3368 make_C_tag (bool isfun)
  3369 {
  3370   /* This function is never called when token.valid is false, but
  3371      we must protect against invalid input or internal errors. */
  3372   if (token.valid)
  3373     make_tag (token_name.buffer, token_name.len, isfun, token.line,
  3374               token.offset+token.length+1, token.lineno, token.linepos);
  3375   else if (DEBUG)
  3376     {                             /* this branch is optimized away if !DEBUG */
  3377       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
  3378                 token_name.len + 17, isfun, token.line,
  3379                 token.offset+token.length+1, token.lineno, token.linepos);
  3380       error ("INVALID TOKEN");
  3381     }
  3382 
  3383   token.valid = false;
  3384 }
  3385 
  3386 static bool
  3387 perhaps_more_input (FILE *inf)
  3388 {
  3389   return !feof (inf) && !ferror (inf);
  3390 }
  3391 
  3392 
  3393 /*
  3394  * C_entries ()
  3395  *      This routine finds functions, variables, typedefs,
  3396  *      #define's, enum constants and struct/union/enum definitions in
  3397  *      C syntax and adds them to the list.
  3398  */
  3399 static void
  3400 C_entries (int c_ext,           /* extension of C */
  3401            FILE *inf)           /* input file */
  3402 {
  3403   char c;                       /* latest char read; '\0' for end of line */
  3404   char *lp;                     /* pointer one beyond the character `c' */
  3405   bool curndx, newndx;          /* indices for current and new lb */
  3406   ptrdiff_t tokoff;             /* offset in line of start of current token */
  3407   ptrdiff_t toklen;             /* length of current token */
  3408   const char *qualifier;        /* string used to qualify names */
  3409   int qlen;                     /* length of qualifier */
  3410   ptrdiff_t bracelev;           /* current brace level */
  3411   ptrdiff_t bracketlev;         /* current bracket level */
  3412   ptrdiff_t parlev;             /* current parenthesis level */
  3413   ptrdiff_t attrparlev;         /* __attribute__ parenthesis level */
  3414   ptrdiff_t templatelev;        /* current template level */
  3415   ptrdiff_t typdefbracelev;     /* bracelev where a typedef struct body begun */
  3416   bool incomm, inquote, inchar, quotednl, midtoken;
  3417   bool yacc_rules;              /* in the rules part of a yacc file */
  3418   struct tok savetoken = {0};   /* token saved during preprocessor handling */
  3419 
  3420 
  3421   linebuffer_init (&lbs[0].lb);
  3422   linebuffer_init (&lbs[1].lb);
  3423   if (cstack.size == 0)
  3424     {
  3425       cstack.size = (DEBUG) ? 1 : 4;
  3426       cstack.nl = 0;
  3427       cstack.cname = xnew (cstack.size, char *);
  3428       cstack.bracelev = xnew (cstack.size, ptrdiff_t);
  3429     }
  3430 
  3431   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
  3432   curndx = newndx = 0;
  3433   lp = curlb.buffer;
  3434   *lp = 0;
  3435 
  3436   fvdef = fvnone; fvextern = false; typdef = tnone;
  3437   structdef = snone; definedef = dnone; objdef = onone;
  3438   yacc_rules = false;
  3439   midtoken = inquote = inchar = incomm = quotednl = false;
  3440   token.valid = savetoken.valid = false;
  3441   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
  3442   if (cjava)
  3443     { qualifier = "."; qlen = 1; }
  3444   else
  3445     { qualifier = "::"; qlen = 2; }
  3446 
  3447 
  3448   while (perhaps_more_input (inf))
  3449     {
  3450       c = *lp++;
  3451       if (c == '\\')
  3452         {
  3453           /* If we are at the end of the line, the next character is a
  3454              '\0'; do not skip it, because it is what tells us
  3455              to read the next line.  */
  3456           if (*lp == '\0')
  3457             {
  3458               quotednl = true;
  3459               continue;
  3460             }
  3461           lp++;
  3462           c = ' ';
  3463         }
  3464       else if (incomm)
  3465         {
  3466           switch (c)
  3467             {
  3468             case '*':
  3469               if (*lp == '/')
  3470                 {
  3471                   c = *lp++;
  3472                   incomm = false;
  3473                 }
  3474               break;
  3475             case '\0':
  3476               /* Newlines inside comments do not end macro definitions in
  3477                  traditional cpp. */
  3478               CNL_SAVE_DEFINEDEF ();
  3479               break;
  3480             }
  3481           continue;
  3482         }
  3483       else if (inquote)
  3484         {
  3485           switch (c)
  3486             {
  3487             case '"':
  3488               inquote = false;
  3489               break;
  3490             case '\0':
  3491               /* Newlines inside strings do not end macro definitions
  3492                  in traditional cpp, even though compilers don't
  3493                  usually accept them. */
  3494               CNL_SAVE_DEFINEDEF ();
  3495               break;
  3496             }
  3497           continue;
  3498         }
  3499       else if (inchar)
  3500         {
  3501           switch (c)
  3502             {
  3503             case '\0':
  3504               /* Hmmm, something went wrong. */
  3505               CNL ();
  3506               FALLTHROUGH;
  3507             case '\'':
  3508               inchar = false;
  3509               break;
  3510             }
  3511           continue;
  3512         }
  3513       else switch (c)
  3514         {
  3515         case '"':
  3516           inquote = true;
  3517           if (bracketlev > 0)
  3518             continue;
  3519           if (inattribute)
  3520             break;
  3521           switch (fvdef)
  3522             {
  3523             case fdefunkey:
  3524             case fstartlist:
  3525             case finlist:
  3526             case fignore:
  3527             case vignore:
  3528               break;
  3529             default:
  3530               fvextern = false;
  3531               fvdef = fvnone;
  3532             }
  3533           continue;
  3534         case '\'':
  3535           inchar = true;
  3536           if (bracketlev > 0)
  3537             continue;
  3538           if (inattribute)
  3539             break;
  3540           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
  3541             {
  3542               fvextern = false;
  3543               fvdef = fvnone;
  3544             }
  3545           continue;
  3546         case '/':
  3547           if (*lp == '*')
  3548             {
  3549               incomm = true;
  3550               lp++;
  3551               c = ' ';
  3552               if (bracketlev > 0)
  3553                 continue;
  3554             }
  3555           else if (/* cplpl && */ *lp == '/')
  3556             {
  3557               c = '\0';
  3558             }
  3559           break;
  3560         case '%':
  3561           if ((c_ext & YACC) && *lp == '%')
  3562             {
  3563               /* Entering or exiting rules section in yacc file. */
  3564               lp++;
  3565               definedef = dnone; fvdef = fvnone; fvextern = false;
  3566               typdef = tnone; structdef = snone;
  3567               midtoken = inquote = inchar = incomm = quotednl = false;
  3568               bracelev = 0;
  3569               yacc_rules = !yacc_rules;
  3570               continue;
  3571             }
  3572           else
  3573             break;
  3574         case '#':
  3575           if (definedef == dnone)
  3576             {
  3577               char *cp;
  3578               bool cpptoken = true;
  3579 
  3580               /* Look back on this line.  If all blanks, or nonblanks
  3581                  followed by an end of comment, this is a preprocessor
  3582                  token. */
  3583               for (cp = newlb.buffer; cp < lp-1; cp++)
  3584                 if (!c_isspace (*cp))
  3585                   {
  3586                     if (*cp == '*' && cp[1] == '/')
  3587                       {
  3588                         cp++;
  3589                         cpptoken = true;
  3590                       }
  3591                     else
  3592                       cpptoken = false;
  3593                   }
  3594               if (cpptoken)
  3595                 {
  3596                   definedef = dsharpseen;
  3597                   /* This is needed for tagging enum values: when there are
  3598                      preprocessor conditionals inside the enum, we need to
  3599                      reset the value of fvdef so that the next enum value is
  3600                      tagged even though the one before it did not end in a
  3601                      comma.  */
  3602                   if (fvdef == vignore && instruct && parlev == 0)
  3603                     {
  3604                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
  3605                         fvdef = fvnone;
  3606                     }
  3607                 }
  3608             } /* if (definedef == dnone) */
  3609           continue;
  3610         case '[':
  3611           bracketlev++;
  3612           continue;
  3613         default:
  3614           if (bracketlev > 0)
  3615             {
  3616               if (c == ']')
  3617                 --bracketlev;
  3618               else if (c == '\0')
  3619                 CNL_SAVE_DEFINEDEF ();
  3620               continue;
  3621             }
  3622           break;
  3623         } /* switch (c) */
  3624 
  3625 
  3626       /* Consider token only if some involved conditions are satisfied. */
  3627       if (typdef != tignore
  3628           && definedef != dignorerest
  3629           && fvdef != finlist
  3630           && templatelev == 0
  3631           && (definedef != dnone
  3632               || structdef != scolonseen)
  3633           && !inattribute
  3634           && !in_enum_bf)
  3635         {
  3636           if (midtoken)
  3637             {
  3638               if (endtoken (c))
  3639                 {
  3640                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
  3641                     /* This handles :: in the middle,
  3642                        but not at the beginning of an identifier.
  3643                        Also, space-separated :: is not recognized. */
  3644                     {
  3645                       if (c_ext & C_AUTO) /* automatic detection of C++ */
  3646                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
  3647                       lp += 2;
  3648                       toklen += 2;
  3649                       c = lp[-1];
  3650                       goto still_in_token;
  3651                     }
  3652                   else
  3653                     {
  3654                       bool funorvar = false;
  3655 
  3656                       if (yacc_rules
  3657                           || consider_token (newlb.buffer + tokoff, toklen, c,
  3658                                              &c_ext, bracelev, parlev,
  3659                                              &funorvar))
  3660                         {
  3661                           if (fvdef == foperator)
  3662                             {
  3663                               char *oldlp = lp;
  3664                               lp = skip_spaces (lp-1);
  3665                               if (*lp != '\0')
  3666                                 lp += 1;
  3667                               while (*lp != '\0'
  3668                                      && !c_isspace (*lp) && *lp != '(')
  3669                                 lp += 1;
  3670                               c = *lp++;
  3671                               toklen += lp - oldlp;
  3672                             }
  3673                           token.named = false;
  3674                           if (!plainc
  3675                               && nestlev > 0 && definedef == dnone)
  3676                             /* in struct body */
  3677                             {
  3678                               if (class_qualify)
  3679                                 {
  3680                                   write_classname (&token_name, qualifier);
  3681                                   ptrdiff_t len = token_name.len;
  3682                                   linebuffer_setlen (&token_name,
  3683                                                      len + qlen + toklen);
  3684                                   memcpyz (stpcpy (token_name.buffer + len,
  3685                                                    qualifier),
  3686                                            newlb.buffer + tokoff, toklen);
  3687                                 }
  3688                               else
  3689                                 {
  3690                                   linebuffer_setlen (&token_name, toklen);
  3691                                   memcpyz (token_name.buffer,
  3692                                            newlb.buffer + tokoff, toklen);
  3693                                 }
  3694                               token.named = true;
  3695                             }
  3696                           else if (objdef == ocatseen)
  3697                             /* Objective C category */
  3698                             {
  3699                               if (class_qualify)
  3700                                 {
  3701                                   ptrdiff_t len = strlen (objtag) + 2 + toklen;
  3702                                   linebuffer_setlen (&token_name, len);
  3703                                   char *p1 = stpcpy (token_name.buffer, objtag);
  3704                                   char *p2 = stpcpy (p1, "(");
  3705                                   char *p3 = mempcpy (p2, newlb.buffer + tokoff,
  3706                                                       toklen);
  3707                                   strcpy (p3, ")");
  3708                                 }
  3709                               else
  3710                                 {
  3711                                   linebuffer_setlen (&token_name, toklen);
  3712                                   memcpyz (token_name.buffer,
  3713                                            newlb.buffer + tokoff, toklen);
  3714                                 }
  3715                               token.named = true;
  3716                             }
  3717                           else if (objdef == omethodtag
  3718                                    || objdef == omethodparm)
  3719                             /* Objective C method */
  3720                             {
  3721                               token.named = true;
  3722                             }
  3723                           else if (fvdef == fdefunname)
  3724                             /* GNU DEFUN and similar macros */
  3725                             {
  3726                               bool defun = (newlb.buffer[tokoff] == 'F');
  3727                               ptrdiff_t off = tokoff;
  3728                               ptrdiff_t len = toklen;
  3729 
  3730                               if (defun)
  3731                                 {
  3732                                   off += 1;
  3733                                   len -= 1;
  3734 
  3735                                   /* First, tag it as its C name */
  3736                                   linebuffer_setlen (&token_name, toklen);
  3737                                   memcpyz (token_name.buffer,
  3738                                            newlb.buffer + tokoff, toklen);
  3739                                   token.named = true;
  3740                                   token.lineno = lineno;
  3741                                   token.offset = tokoff;
  3742                                   token.length = toklen;
  3743                                   token.line = newlb.buffer;
  3744                                   token.linepos = newlinepos;
  3745                                   token.valid = true;
  3746                                   make_C_tag (funorvar);
  3747                                 }
  3748                               /* Rewrite the tag so that emacs lisp DEFUNs
  3749                                  can be found also by their elisp name */
  3750                               linebuffer_setlen (&token_name, len);
  3751                               memcpyz (token_name.buffer,
  3752                                        newlb.buffer + off, len);
  3753                               if (defun)
  3754                                 while (--len >= 0)
  3755                                   if (token_name.buffer[len] == '_')
  3756                                     token_name.buffer[len] = '-';
  3757                               token.named = defun;
  3758                             }
  3759                           else
  3760                             {
  3761                               linebuffer_setlen (&token_name, toklen);
  3762                               memcpyz (token_name.buffer,
  3763                                        newlb.buffer + tokoff, toklen);
  3764                               /* Name macros and members. */
  3765                               token.named = (structdef == stagseen
  3766                                              || typdef == ttypeseen
  3767                                              || typdef == tend
  3768                                              || (funorvar
  3769                                                  && definedef == dignorerest)
  3770                                              || (funorvar
  3771                                                  && definedef == dnone
  3772                                                  && structdef == snone
  3773                                                  && bracelev > 0));
  3774                             }
  3775                           token.lineno = lineno;
  3776                           token.offset = tokoff;
  3777                           token.length = toklen;
  3778                           token.line = newlb.buffer;
  3779                           token.linepos = newlinepos;
  3780                           token.valid = true;
  3781 
  3782                           if (definedef == dnone
  3783                               && (fvdef == fvnameseen
  3784                                   || fvdef == foperator
  3785                                   || structdef == stagseen
  3786                                   || typdef == tend
  3787                                   || typdef == ttypeseen
  3788                                   || objdef != onone))
  3789                             {
  3790                               if (current_lb_is_new)
  3791                                 switch_line_buffers ();
  3792                             }
  3793                           else if (definedef != dnone
  3794                                    || fvdef == fdefunname
  3795                                    || instruct)
  3796                             make_C_tag (funorvar);
  3797                         }
  3798                       else /* not yacc and consider_token failed */
  3799                         {
  3800                           if (inattribute && fvdef == fignore)
  3801                             {
  3802                               /* We have just met __attribute__ after a
  3803                                  function parameter list: do not tag the
  3804                                  function again. */
  3805                               fvdef = fvnone;
  3806                             }
  3807                         }
  3808                       midtoken = false;
  3809                     }
  3810                 } /* if (endtoken (c)) */
  3811               else if (intoken (c))
  3812                 still_in_token:
  3813                 {
  3814                   toklen++;
  3815                   continue;
  3816                 }
  3817             } /* if (midtoken) */
  3818           else if (begtoken (c))
  3819             {
  3820               switch (definedef)
  3821                 {
  3822                 case dnone:
  3823                   switch (fvdef)
  3824                     {
  3825                     case fstartlist:
  3826                       /* This prevents tagging fb in
  3827                          void (__attribute__((noreturn)) *fb) (void);
  3828                          Fixing this is not easy and not very important. */
  3829                       fvdef = finlist;
  3830                       continue;
  3831                     case flistseen:
  3832                       if (plainc || declarations)
  3833                         {
  3834                           make_C_tag (true); /* a function */
  3835                           fvdef = fignore;
  3836                         }
  3837                       break;
  3838                     default:
  3839                       break;
  3840                     }
  3841                   if (structdef == stagseen && !cjava)
  3842                     {
  3843                       popclass_above (bracelev);
  3844                       structdef = snone;
  3845                     }
  3846                   break;
  3847                 case dsharpseen:
  3848                   savetoken = token;
  3849                   break;
  3850                 default:
  3851                   break;
  3852                 }
  3853               if (!yacc_rules || lp == newlb.buffer + 1)
  3854                 {
  3855                   tokoff = lp - 1 - newlb.buffer;
  3856                   toklen = 1;
  3857                   midtoken = true;
  3858                 }
  3859               continue;
  3860             } /* if (begtoken) */
  3861         } /* if must look at token */
  3862 
  3863 
  3864       /* Detect end of line, colon, comma, semicolon and various braces
  3865          after having handled a token.*/
  3866       switch (c)
  3867         {
  3868         case ':':
  3869           if (inattribute)
  3870             break;
  3871           if (yacc_rules && token.offset == 0 && token.valid)
  3872             {
  3873               make_C_tag (false); /* a yacc function */
  3874               break;
  3875             }
  3876           if (definedef != dnone)
  3877             break;
  3878           switch (objdef)
  3879             {
  3880             case otagseen:
  3881               objdef = oignore;
  3882               make_C_tag (true); /* an Objective C class */
  3883               break;
  3884             case omethodtag:
  3885             case omethodparm:
  3886               objdef = omethodcolon;
  3887               if (class_qualify)
  3888                 {
  3889                   ptrdiff_t toklen = token_name.len;
  3890                   linebuffer_setlen (&token_name, toklen + 1);
  3891                   strcpy (token_name.buffer + toklen, ":");
  3892                 }
  3893               break;
  3894             default:
  3895               break;
  3896             }
  3897           if (structdef == stagseen)
  3898             {
  3899               structdef = scolonseen;
  3900               break;
  3901             }
  3902           /* Should be useless, but may be work as a safety net. */
  3903           if (cplpl && fvdef == flistseen)
  3904             {
  3905               make_C_tag (true); /* a function */
  3906               fvdef = fignore;
  3907               break;
  3908             }
  3909           break;
  3910         case ';':
  3911           if (definedef != dnone || inattribute)
  3912             break;
  3913           switch (typdef)
  3914             {
  3915             case tend:
  3916             case ttypeseen:
  3917               make_C_tag (false); /* a typedef */
  3918               typdef = tnone;
  3919               fvdef = fvnone;
  3920               break;
  3921             case tnone:
  3922             case tinbody:
  3923             case tignore:
  3924               switch (fvdef)
  3925                 {
  3926                 case fignore:
  3927                   if (typdef == tignore || cplpl)
  3928                     fvdef = fvnone;
  3929                   break;
  3930                 case fvnameseen:
  3931                   if ((globals && bracelev == 0 && (!fvextern || declarations))
  3932                       || (members && instruct))
  3933                     make_C_tag (false); /* a variable */
  3934                   fvextern = false;
  3935                   fvdef = fvnone;
  3936                   token.valid = false;
  3937                   break;
  3938                 case flistseen:
  3939                   if ((declarations
  3940                        && (cplpl || !instruct)
  3941                        && (typdef == tnone || (typdef != tignore && instruct)))
  3942                       || (members
  3943                           && plainc && instruct))
  3944                     make_C_tag (true);  /* a function */
  3945                   FALLTHROUGH;
  3946                 default:
  3947                   fvextern = false;
  3948                   fvdef = fvnone;
  3949                   if (declarations
  3950                        && cplpl && structdef == stagseen)
  3951                     make_C_tag (false); /* forward declaration */
  3952                   else
  3953                     token.valid = false;
  3954                 } /* switch (fvdef) */
  3955               FALLTHROUGH;
  3956             default:
  3957               if (!instruct)
  3958                 typdef = tnone;
  3959             }
  3960           if (structdef == stagseen)
  3961             structdef = snone;
  3962           break;
  3963         case ',':
  3964           if (definedef != dnone || inattribute)
  3965             break;
  3966           switch (objdef)
  3967             {
  3968             case omethodtag:
  3969             case omethodparm:
  3970               make_C_tag (true); /* an Objective C method */
  3971               objdef = oinbody;
  3972               break;
  3973             default:
  3974               break;
  3975             }
  3976           switch (fvdef)
  3977             {
  3978             case fdefunkey:
  3979             case foperator:
  3980             case fstartlist:
  3981             case finlist:
  3982             case fignore:
  3983               break;
  3984             case vignore:
  3985               if (instruct && parlev == 0)
  3986                 fvdef = fvnone;
  3987               break;
  3988             case fdefunname:
  3989               fvdef = fignore;
  3990               break;
  3991             case fvnameseen:
  3992               if (parlev == 0
  3993                   && ((globals
  3994                        && bracelev == 0
  3995                        && templatelev == 0
  3996                        && (!fvextern || declarations))
  3997                       || (members && instruct)))
  3998                   make_C_tag (false); /* a variable */
  3999               break;
  4000             case flistseen:
  4001               if ((declarations && typdef == tnone && !instruct)
  4002                   || (members && typdef != tignore && instruct))
  4003                 {
  4004                   make_C_tag (true); /* a function */
  4005                   fvdef = fvnameseen;
  4006                 }
  4007               else if (!declarations)
  4008                 fvdef = fvnone;
  4009               token.valid = false;
  4010               break;
  4011             default:
  4012               fvdef = fvnone;
  4013             }
  4014           if (structdef == stagseen)
  4015             structdef = snone;
  4016           break;
  4017         case ']':
  4018           if (definedef != dnone || inattribute)
  4019             break;
  4020           if (structdef == stagseen)
  4021             structdef = snone;
  4022           switch (typdef)
  4023             {
  4024             case ttypeseen:
  4025             case tend:
  4026               typdef = tignore;
  4027               make_C_tag (false);       /* a typedef */
  4028               break;
  4029             case tnone:
  4030             case tinbody:
  4031               switch (fvdef)
  4032                 {
  4033                 case foperator:
  4034                 case finlist:
  4035                 case fignore:
  4036                 case vignore:
  4037                   break;
  4038                 case fvnameseen:
  4039                   if ((members && bracelev == 1)
  4040                       || (globals && bracelev == 0
  4041                           && (!fvextern || declarations)))
  4042                     make_C_tag (false); /* a variable */
  4043                   FALLTHROUGH;
  4044                 default:
  4045                   fvdef = fvnone;
  4046                 }
  4047               break;
  4048             default:
  4049               break;
  4050             }
  4051           break;
  4052         case '(':
  4053           if (inattribute)
  4054             {
  4055               attrparlev++;
  4056               break;
  4057             }
  4058           if (definedef != dnone)
  4059             break;
  4060           if (objdef == otagseen && parlev == 0)
  4061             objdef = oparenseen;
  4062           switch (fvdef)
  4063             {
  4064             case fvnameseen:
  4065               if (typdef == ttypeseen
  4066                   && *lp != '*'
  4067                   && !instruct)
  4068                 {
  4069                   /* This handles constructs like:
  4070                      typedef void OperatorFun (int fun); */
  4071                   make_C_tag (false);
  4072                   typdef = tignore;
  4073                   fvdef = fignore;
  4074                   break;
  4075                 }
  4076               FALLTHROUGH;
  4077             case foperator:
  4078               fvdef = fstartlist;
  4079               break;
  4080             case flistseen:
  4081               fvdef = finlist;
  4082               break;
  4083             default:
  4084               break;
  4085             }
  4086           parlev++;
  4087           break;
  4088         case ')':
  4089           if (inattribute)
  4090             {
  4091               if (--attrparlev == 0)
  4092                 inattribute = false;
  4093               break;
  4094             }
  4095           if (in_enum_bf)
  4096             {
  4097               if (--parlev == 0)
  4098                 in_enum_bf = false;
  4099               break;
  4100             }
  4101           if (definedef != dnone)
  4102             break;
  4103           if (objdef == ocatseen && parlev == 1)
  4104             {
  4105               make_C_tag (true); /* an Objective C category */
  4106               objdef = oignore;
  4107             }
  4108           if (--parlev == 0)
  4109             {
  4110               switch (fvdef)
  4111                 {
  4112                 case fstartlist:
  4113                 case finlist:
  4114                   fvdef = flistseen;
  4115                   break;
  4116                 default:
  4117                   break;
  4118                 }
  4119               if (!instruct
  4120                   && (typdef == tend
  4121                       || typdef == ttypeseen))
  4122                 {
  4123                   typdef = tignore;
  4124                   make_C_tag (false); /* a typedef */
  4125                 }
  4126             }
  4127           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
  4128             parlev = 0;
  4129           break;
  4130         case '{':
  4131           if (definedef != dnone)
  4132             break;
  4133           if (typdef == ttypeseen)
  4134             {
  4135               /* Whenever typdef is set to tinbody (currently only
  4136                  here), typdefbracelev should be set to bracelev. */
  4137               typdef = tinbody;
  4138               typdefbracelev = bracelev;
  4139             }
  4140           switch (fvdef)
  4141             {
  4142             case flistseen:
  4143               if (cplpl && !class_qualify)
  4144                 {
  4145                   /* Remove class and namespace qualifiers from the token,
  4146                      leaving only the method/member name.  */
  4147                   char *cc, *uqname = token_name.buffer;
  4148                   char *tok_end = token_name.buffer + token_name.len;
  4149 
  4150                   for (cc = token_name.buffer; cc < tok_end; cc++)
  4151                     {
  4152                       if (*cc == ':' && cc[1] == ':')
  4153                         {
  4154                           uqname = cc + 2;
  4155                           cc++;
  4156                         }
  4157                     }
  4158                   if (uqname > token_name.buffer)
  4159                     {
  4160                       ptrdiff_t uqlen = strlen (uqname);
  4161                       linebuffer_setlen (&token_name, uqlen);
  4162                       memmove (token_name.buffer, uqname, uqlen + 1);
  4163                     }
  4164                 }
  4165               make_C_tag (true);    /* a function */
  4166               FALLTHROUGH;
  4167             case fignore:
  4168               fvdef = fvnone;
  4169               break;
  4170             case fvnone:
  4171               switch (objdef)
  4172                 {
  4173                 case otagseen:
  4174                   make_C_tag (true); /* an Objective C class */
  4175                   objdef = oignore;
  4176                   break;
  4177                 case omethodtag:
  4178                 case omethodparm:
  4179                   make_C_tag (true); /* an Objective C method */
  4180                   objdef = oinbody;
  4181                   break;
  4182                 default:
  4183                   /* Neutralize `extern "C" {' grot. */
  4184                   if (bracelev == 0 && structdef == snone && nestlev == 0
  4185                       && typdef == tnone)
  4186                     bracelev = -1;
  4187                 }
  4188               break;
  4189             default:
  4190               break;
  4191             }
  4192           switch (structdef)
  4193             {
  4194             case skeyseen:         /* unnamed struct */
  4195               pushclass_above (bracelev, NULL, 0);
  4196               structdef = snone;
  4197               break;
  4198             case stagseen:         /* named struct or enum */
  4199             case scolonseen:       /* a class */
  4200               pushclass_above (bracelev,token.line+token.offset, token.length);
  4201               structdef = snone;
  4202               make_C_tag (false);  /* a struct or enum */
  4203               break;
  4204             default:
  4205               break;
  4206             }
  4207           bracelev += 1;
  4208           break;
  4209         case '*':
  4210           if (definedef != dnone)
  4211             break;
  4212           if (fvdef == fstartlist)
  4213             {
  4214               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
  4215               token.valid = false;
  4216             }
  4217           break;
  4218         case '}':
  4219           if (definedef != dnone)
  4220             break;
  4221           bracelev -= 1;
  4222           /* If we see a closing brace in column zero, and we weren't told to
  4223              ignore indentation, we assume this the final brace of a function
  4224              or struct definition, and reset bracelev to zero.  */
  4225           if (!ignoreindent && lp == newlb.buffer + 1)
  4226             {
  4227               if (bracelev != 0)
  4228                 token.valid = false; /* unexpected value, token unreliable */
  4229               bracelev = 0;     /* reset brace level if first column */
  4230               parlev = 0;       /* also reset paren level, just in case... */
  4231             }
  4232           else if (bracelev < 0)
  4233             {
  4234               token.valid = false; /* something gone amiss, token unreliable */
  4235               bracelev = 0;
  4236             }
  4237           if (bracelev == 0 && fvdef == vignore)
  4238             fvdef = fvnone;             /* end of function */
  4239           popclass_above (bracelev);
  4240           structdef = snone;
  4241           /* Only if typdef == tinbody is typdefbracelev significant. */
  4242           if (typdef == tinbody && bracelev <= typdefbracelev)
  4243             {
  4244               assert (bracelev == typdefbracelev);
  4245               typdef = tend;
  4246             }
  4247           break;
  4248         case '=':
  4249           if (definedef != dnone)
  4250             break;
  4251           switch (fvdef)
  4252             {
  4253             case foperator:
  4254             case finlist:
  4255             case fignore:
  4256             case vignore:
  4257               break;
  4258             case fvnameseen:
  4259               if ((members && bracelev == 1)
  4260                   || (globals && bracelev == 0 && (!fvextern || declarations)))
  4261                 make_C_tag (false); /* a variable */
  4262               FALLTHROUGH;
  4263             default:
  4264               fvdef = vignore;
  4265             }
  4266           break;
  4267         case '<':
  4268           if (cplpl
  4269               && (structdef == stagseen || fvdef == fvnameseen))
  4270             {
  4271               templatelev++;
  4272               break;
  4273             }
  4274           goto resetfvdef;
  4275         case '>':
  4276           if (templatelev > 0)
  4277             {
  4278               templatelev--;
  4279               break;
  4280             }
  4281           goto resetfvdef;
  4282         case '+':
  4283         case '-':
  4284           if (objdef == oinbody && bracelev == 0)
  4285             {
  4286               objdef = omethodsign;
  4287               break;
  4288             }
  4289           FALLTHROUGH;
  4290         case '#': case '~': case '&': case '%': case '/':
  4291         case '|': case '^': case '!': case '.': case '?':
  4292         resetfvdef:
  4293           if (definedef != dnone)
  4294             break;
  4295           /* These surely cannot follow a function tag in C. */
  4296           switch (fvdef)
  4297             {
  4298             case foperator:
  4299             case finlist:
  4300             case fignore:
  4301             case vignore:
  4302               break;
  4303             default:
  4304               fvdef = fvnone;
  4305             }
  4306           break;
  4307         case '\0':
  4308           if (objdef == otagseen)
  4309             {
  4310               make_C_tag (true); /* an Objective C class */
  4311               objdef = oignore;
  4312             }
  4313           /* If a macro spans multiple lines don't reset its state. */
  4314           if (quotednl)
  4315             CNL_SAVE_DEFINEDEF ();
  4316           else
  4317             CNL ();
  4318           break;
  4319         } /* switch (c) */
  4320 
  4321     } /* while not eof */
  4322 
  4323   free (lbs[0].lb.buffer);
  4324   free (lbs[1].lb.buffer);
  4325 }
  4326 
  4327 /*
  4328  * Process either a C++ file or a C file depending on the setting
  4329  * of a global flag.
  4330  */
  4331 static void
  4332 default_C_entries (FILE *inf)
  4333 {
  4334   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
  4335 }
  4336 
  4337 /* Always do plain C. */
  4338 static void
  4339 plain_C_entries (FILE *inf)
  4340 {
  4341   C_entries (0, inf);
  4342 }
  4343 
  4344 /* Always do C++. */
  4345 static void
  4346 Cplusplus_entries (FILE *inf)
  4347 {
  4348   C_entries (C_PLPL, inf);
  4349 }
  4350 
  4351 /* Always do Java. */
  4352 static void
  4353 Cjava_entries (FILE *inf)
  4354 {
  4355   C_entries (C_JAVA, inf);
  4356 }
  4357 
  4358 /* Always do C*. */
  4359 static void
  4360 Cstar_entries (FILE *inf)
  4361 {
  4362   C_entries (C_STAR, inf);
  4363 }
  4364 
  4365 /* Always do Yacc. */
  4366 static void
  4367 Yacc_entries (FILE *inf)
  4368 {
  4369   C_entries (YACC, inf);
  4370 }
  4371 
  4372 
  4373 /* Useful macros. */
  4374 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
  4375   while (perhaps_more_input (file_pointer)                              \
  4376          && (readline (&(line_buffer), file_pointer),                   \
  4377              (char_pointer) = (line_buffer).buffer,                     \
  4378              true))
  4379 
  4380 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
  4381   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
  4382    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
  4383    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
  4384    && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
  4385 
  4386 /* Similar to LOOKING_AT but does not use notinname, does not skip */
  4387 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
  4388   ((assert ("" kw), true) /* syntax error if not a literal string */    \
  4389    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
  4390    && ((cp) += sizeof (kw) - 1, true))          /* skip spaces */
  4391 
  4392 /*
  4393  * Read a file, but do no processing.  This is used to do regexp
  4394  * matching on files that have no language defined.
  4395  */
  4396 static void
  4397 just_read_file (FILE *inf)
  4398 {
  4399   while (perhaps_more_input (inf))
  4400     readline (&lb, inf);
  4401 }
  4402 
  4403 
  4404 /* Fortran parsing */
  4405 
  4406 static void F_takeprec (void);
  4407 static void F_getit (FILE *);
  4408 
  4409 static void
  4410 F_takeprec (void)
  4411 {
  4412   dbp = skip_spaces (dbp);
  4413   if (*dbp != '*')
  4414     return;
  4415   dbp++;
  4416   dbp = skip_spaces (dbp);
  4417   if (strneq (dbp, "(*)", 3))
  4418     {
  4419       dbp += 3;
  4420       return;
  4421     }
  4422   if (!c_isdigit (*dbp))
  4423     {
  4424       --dbp;                    /* force failure */
  4425       return;
  4426     }
  4427   do
  4428     dbp++;
  4429   while (c_isdigit (*dbp));
  4430 }
  4431 
  4432 static void
  4433 F_getit (FILE *inf)
  4434 {
  4435   register char *cp;
  4436 
  4437   dbp = skip_spaces (dbp);
  4438   if (*dbp == '\0')
  4439     {
  4440       readline (&lb, inf);
  4441       dbp = lb.buffer;
  4442       if (dbp[5] != '&')
  4443         return;
  4444       dbp += 6;
  4445       dbp = skip_spaces (dbp);
  4446     }
  4447   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
  4448     return;
  4449   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
  4450     continue;
  4451   make_tag (dbp, cp-dbp, true,
  4452             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4453 }
  4454 
  4455 
  4456 static void
  4457 Fortran_functions (FILE *inf)
  4458 {
  4459   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  4460     {
  4461       if (*dbp == '%')
  4462         dbp++;                  /* Ratfor escape to fortran */
  4463       dbp = skip_spaces (dbp);
  4464       if (*dbp == '\0')
  4465         continue;
  4466 
  4467       if (LOOKING_AT_NOCASE (dbp, "recursive"))
  4468         dbp = skip_spaces (dbp);
  4469 
  4470       if (LOOKING_AT_NOCASE (dbp, "pure"))
  4471         dbp = skip_spaces (dbp);
  4472 
  4473       if (LOOKING_AT_NOCASE (dbp, "elemental"))
  4474         dbp = skip_spaces (dbp);
  4475 
  4476       switch (c_tolower (*dbp))
  4477         {
  4478         case 'i':
  4479           if (nocase_tail ("integer"))
  4480             F_takeprec ();
  4481           break;
  4482         case 'r':
  4483           if (nocase_tail ("real"))
  4484             F_takeprec ();
  4485           break;
  4486         case 'l':
  4487           if (nocase_tail ("logical"))
  4488             F_takeprec ();
  4489           break;
  4490         case 'c':
  4491           if (nocase_tail ("complex") || nocase_tail ("character"))
  4492             F_takeprec ();
  4493           break;
  4494         case 'd':
  4495           if (nocase_tail ("double"))
  4496             {
  4497               dbp = skip_spaces (dbp);
  4498               if (*dbp == '\0')
  4499                 continue;
  4500               if (nocase_tail ("precision"))
  4501                 break;
  4502               continue;
  4503             }
  4504           break;
  4505         }
  4506       dbp = skip_spaces (dbp);
  4507       if (*dbp == '\0')
  4508         continue;
  4509       switch (c_tolower (*dbp))
  4510         {
  4511         case 'f':
  4512           if (nocase_tail ("function"))
  4513             F_getit (inf);
  4514           continue;
  4515         case 's':
  4516           if (nocase_tail ("subroutine"))
  4517             F_getit (inf);
  4518           continue;
  4519         case 'e':
  4520           if (nocase_tail ("entry"))
  4521             F_getit (inf);
  4522           continue;
  4523         case 'b':
  4524           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
  4525             {
  4526               dbp = skip_spaces (dbp);
  4527               if (*dbp == '\0') /* assume un-named */
  4528                 make_tag ("blockdata", 9, true,
  4529                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
  4530               else
  4531                 F_getit (inf);  /* look for name */
  4532             }
  4533           continue;
  4534         }
  4535     }
  4536 }
  4537 
  4538 
  4539 /*
  4540  * Go language support
  4541  * Original code by Xi Lu <lx@shellcodes.org> (2016)
  4542  */
  4543 static void
  4544 Go_functions(FILE *inf)
  4545 {
  4546   char *cp, *name;
  4547 
  4548   LOOP_ON_INPUT_LINES(inf, lb, cp)
  4549     {
  4550       cp = skip_spaces (cp);
  4551 
  4552       if (LOOKING_AT (cp, "package"))
  4553         {
  4554           name = cp;
  4555           while (!notinname (*cp) && *cp != '\0')
  4556             cp++;
  4557           make_tag (name, cp - name, false, lb.buffer,
  4558                     cp - lb.buffer + 1, lineno, linecharno);
  4559         }
  4560       else if (LOOKING_AT (cp, "func"))
  4561         {
  4562           /* Go implementation of interface, such as:
  4563              func (n *Integer) Add(m Integer) ...
  4564              skip `(n *Integer)` part.
  4565           */
  4566           if (*cp == '(')
  4567             {
  4568               while (*cp != ')')
  4569                 cp++;
  4570               cp = skip_spaces (cp+1);
  4571             }
  4572 
  4573           if (*cp)
  4574             {
  4575               name = cp;
  4576 
  4577               while (!notinname (*cp))
  4578                 cp++;
  4579 
  4580               make_tag (name, cp - name, true, lb.buffer,
  4581                         cp - lb.buffer + 1, lineno, linecharno);
  4582             }
  4583         }
  4584       else if (members && LOOKING_AT (cp, "type"))
  4585         {
  4586           name = cp;
  4587 
  4588           /* Ignore the likes of the following:
  4589              type (
  4590                     A
  4591              )
  4592            */
  4593           if (*cp == '(')
  4594             return;
  4595 
  4596           while (!notinname (*cp) && *cp != '\0')
  4597             cp++;
  4598 
  4599           make_tag (name, cp - name, false, lb.buffer,
  4600                     cp - lb.buffer + 1, lineno, linecharno);
  4601         }
  4602     }
  4603 }
  4604 
  4605 
  4606 /*
  4607  * Ada parsing
  4608  * Original code by
  4609  * Philippe Waroquiers (1998)
  4610  */
  4611 
  4612 /* Once we are positioned after an "interesting" keyword, let's get
  4613    the real tag value necessary. */
  4614 static void
  4615 Ada_getit (FILE *inf, const char *name_qualifier)
  4616 {
  4617   register char *cp;
  4618   char *name;
  4619   char c;
  4620 
  4621   while (perhaps_more_input (inf))
  4622     {
  4623       dbp = skip_spaces (dbp);
  4624       if (*dbp == '\0'
  4625           || (dbp[0] == '-' && dbp[1] == '-'))
  4626         {
  4627           readline (&lb, inf);
  4628           dbp = lb.buffer;
  4629         }
  4630       switch (c_tolower (*dbp))
  4631         {
  4632         case 'b':
  4633           if (nocase_tail ("body"))
  4634             {
  4635               /* Skipping body of   procedure body   or   package body or ....
  4636                  resetting qualifier to body instead of spec. */
  4637               name_qualifier = "/b";
  4638               continue;
  4639             }
  4640           break;
  4641         case 't':
  4642           /* Skipping type of   task type   or   protected type ... */
  4643           if (nocase_tail ("type"))
  4644             continue;
  4645           break;
  4646         }
  4647       if (*dbp == '"')
  4648         {
  4649           dbp += 1;
  4650           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
  4651             continue;
  4652         }
  4653       else
  4654         {
  4655           dbp = skip_spaces (dbp);
  4656           for (cp = dbp;
  4657                c_isalnum (*cp) || *cp == '_' || *cp == '.';
  4658                cp++)
  4659             continue;
  4660           if (cp == dbp)
  4661             return;
  4662         }
  4663       c = *cp;
  4664       *cp = '\0';
  4665       name = concat (dbp, name_qualifier, "");
  4666       *cp = c;
  4667       make_tag (name, strlen (name), true,
  4668                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4669       free (name);
  4670       if (c == '"')
  4671         dbp = cp + 1;
  4672       return;
  4673     }
  4674 }
  4675 
  4676 static void
  4677 Ada_funcs (FILE *inf)
  4678 {
  4679   bool inquote = false;
  4680   bool skip_till_semicolumn = false;
  4681 
  4682   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  4683     {
  4684       while (*dbp != '\0')
  4685         {
  4686           /* Skip a string i.e. "abcd". */
  4687           if (inquote || (*dbp == '"'))
  4688             {
  4689               dbp = strchr (dbp + !inquote, '"');
  4690               if (dbp != NULL)
  4691                 {
  4692                   inquote = false;
  4693                   dbp += 1;
  4694                   continue;     /* advance char */
  4695                 }
  4696               else
  4697                 {
  4698                   inquote = true;
  4699                   break;        /* advance line */
  4700                 }
  4701             }
  4702 
  4703           /* Skip comments. */
  4704           if (dbp[0] == '-' && dbp[1] == '-')
  4705             break;              /* advance line */
  4706 
  4707           /* Skip character enclosed in single quote i.e. 'a'
  4708              and skip single quote starting an attribute i.e. 'Image. */
  4709           if (*dbp == '\'')
  4710             {
  4711               dbp++ ;
  4712               if (*dbp != '\0')
  4713                 dbp++;
  4714               continue;
  4715             }
  4716 
  4717           if (skip_till_semicolumn)
  4718             {
  4719               if (*dbp == ';')
  4720                 skip_till_semicolumn = false;
  4721               dbp++;
  4722               continue;         /* advance char */
  4723             }
  4724 
  4725           /* Search for beginning of a token.  */
  4726           if (!begtoken (*dbp))
  4727             {
  4728               dbp++;
  4729               continue;         /* advance char */
  4730             }
  4731 
  4732           /* We are at the beginning of a token. */
  4733           switch (c_tolower (*dbp))
  4734             {
  4735             case 'f':
  4736               if (!packages_only && nocase_tail ("function"))
  4737                 Ada_getit (inf, "/f");
  4738               else
  4739                 break;          /* from switch */
  4740               continue;         /* advance char */
  4741             case 'p':
  4742               if (!packages_only && nocase_tail ("procedure"))
  4743                 Ada_getit (inf, "/p");
  4744               else if (nocase_tail ("package"))
  4745                 Ada_getit (inf, "/s");
  4746               else if (nocase_tail ("protected")) /* protected type */
  4747                 Ada_getit (inf, "/t");
  4748               else
  4749                 break;          /* from switch */
  4750               continue;         /* advance char */
  4751 
  4752             case 'u':
  4753               if (typedefs && !packages_only && nocase_tail ("use"))
  4754                 {
  4755                   /* when tagging types, avoid tagging  use type Pack.Typename;
  4756                      for this, we will skip everything till a ; */
  4757                   skip_till_semicolumn = true;
  4758                   continue;     /* advance char */
  4759                 }
  4760 
  4761             case 't':
  4762               if (!packages_only && nocase_tail ("task"))
  4763                 Ada_getit (inf, "/k");
  4764               else if (typedefs && !packages_only && nocase_tail ("type"))
  4765                 {
  4766                   Ada_getit (inf, "/t");
  4767                   while (*dbp != '\0')
  4768                     dbp += 1;
  4769                 }
  4770               else
  4771                 break;          /* from switch */
  4772               continue;         /* advance char */
  4773             }
  4774 
  4775           /* Look for the end of the token. */
  4776           while (!endtoken (*dbp))
  4777             dbp++;
  4778 
  4779         } /* advance char */
  4780     } /* advance line */
  4781 }
  4782 
  4783 
  4784 /*
  4785  * Unix and microcontroller assembly tag handling
  4786  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
  4787  * Idea by Bob Weiner, Motorola Inc. (1994)
  4788  */
  4789 static void
  4790 Asm_labels (FILE *inf)
  4791 {
  4792   register char *cp;
  4793 
  4794   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4795     {
  4796       /* If first char is alphabetic or one of [_.$], test for colon
  4797          following identifier. */
  4798       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
  4799         {
  4800           /* Read past label. */
  4801           cp++;
  4802           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
  4803             cp++;
  4804           if (*cp == ':' || c_isspace (*cp))
  4805             /* Found end of label, so copy it and add it to the table. */
  4806             make_tag (lb.buffer, cp - lb.buffer, true,
  4807                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4808         }
  4809     }
  4810 }
  4811 
  4812 
  4813 /*
  4814  * Perl support
  4815  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
  4816  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
  4817  * Perl variable names: /^(my|local).../
  4818  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
  4819  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
  4820  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
  4821  */
  4822 static void
  4823 Perl_functions (FILE *inf)
  4824 {
  4825   char *package = savestr ("main"); /* current package name */
  4826   register char *cp;
  4827 
  4828   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4829     {
  4830       cp = skip_spaces (cp);
  4831 
  4832       if (LOOKING_AT (cp, "package"))
  4833         {
  4834           free (package);
  4835           get_tag (cp, &package);
  4836         }
  4837       else if (LOOKING_AT (cp, "sub"))
  4838         {
  4839           char *pos, *sp;
  4840 
  4841         subr:
  4842           sp = cp;
  4843           while (!notinname (*cp))
  4844             cp++;
  4845           if (cp == sp)
  4846             continue;           /* nothing found */
  4847           pos = strchr (sp, ':');
  4848           if (pos && pos < cp && pos[1] == ':')
  4849             {
  4850               /* The name is already qualified. */
  4851               if (!class_qualify)
  4852                 {
  4853                   char *q = pos + 2, *qpos;
  4854                   while ((qpos = strchr (q, ':')) != NULL
  4855                          && qpos < cp
  4856                          && qpos[1] == ':')
  4857                     q = qpos + 2;
  4858                   sp = q;
  4859                 }
  4860               make_tag (sp, cp - sp, true,
  4861                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4862             }
  4863           else if (class_qualify)
  4864             /* Qualify it. */
  4865             {
  4866               char savechar, *name;
  4867 
  4868               savechar = *cp;
  4869               *cp = '\0';
  4870               name = concat (package, "::", sp);
  4871               *cp = savechar;
  4872               make_tag (name, strlen (name), true,
  4873                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4874               free (name);
  4875             }
  4876           else
  4877             make_tag (sp, cp - sp, true,
  4878                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4879         }
  4880       else if (LOOKING_AT (cp, "use constant")
  4881                || LOOKING_AT (cp, "use constant::defer"))
  4882         {
  4883           /* For hash style multi-constant like
  4884                 use constant { FOO => 123,
  4885                                BAR => 456 };
  4886              only the first FOO is picked up.  Parsing across the value
  4887              expressions would be difficult in general, due to possible nested
  4888              hashes, here-documents, etc.  */
  4889           if (*cp == '{')
  4890             cp = skip_spaces (cp+1);
  4891           goto subr;
  4892         }
  4893       else if (globals) /* only if we are tagging global vars */
  4894         {
  4895           /* Skip a qualifier, if any. */
  4896           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
  4897           /* After "my" or "local", but before any following paren or space. */
  4898           char *varstart = cp;
  4899 
  4900           if (qual              /* should this be removed?  If yes, how? */
  4901               && (*cp == '$' || *cp == '@' || *cp == '%'))
  4902             {
  4903               varstart += 1;
  4904               do
  4905                 cp++;
  4906               while (c_isalnum (*cp) || *cp == '_');
  4907             }
  4908           else if (qual)
  4909             {
  4910               /* Should be examining a variable list at this point;
  4911                  could insist on seeing an open parenthesis. */
  4912               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
  4913                 cp++;
  4914             }
  4915           else
  4916             continue;
  4917 
  4918           make_tag (varstart, cp - varstart, false,
  4919                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4920         }
  4921     }
  4922   free (package);
  4923 }
  4924 
  4925 
  4926 /*
  4927  * Python support
  4928  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
  4929  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
  4930  * More ideas by seb bacon <seb@jamkit.com> (2002)
  4931  */
  4932 static void
  4933 Python_functions (FILE *inf)
  4934 {
  4935   register char *cp;
  4936 
  4937   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4938     {
  4939       cp = skip_spaces (cp);
  4940       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
  4941         {
  4942           char *name = cp;
  4943           while (!notinname (*cp) && *cp != ':')
  4944             cp++;
  4945           make_tag (name, cp - name, true,
  4946                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4947         }
  4948     }
  4949 }
  4950 
  4951 /*
  4952  * Ruby support
  4953  * Original code by Xi Lu <lx@shellcodes.org> (2015)
  4954  */
  4955 static void
  4956 Ruby_functions (FILE *inf)
  4957 {
  4958   char *cp = NULL;
  4959   bool reader = false, writer = false, alias = false, continuation = false;
  4960 
  4961   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4962     {
  4963       bool is_class = false;
  4964       bool is_method = false;
  4965       char *name;
  4966 
  4967       cp = skip_spaces (cp);
  4968       if (!continuation
  4969           /* Constants.  */
  4970           && c_isalpha (*cp) && c_isupper (*cp))
  4971         {
  4972           char *bp, *colon = NULL;
  4973 
  4974           name = cp;
  4975 
  4976           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
  4977             {
  4978               if (*cp == ':')
  4979                 colon = cp;
  4980             }
  4981           if (cp > name + 1)
  4982             {
  4983               bp = skip_spaces (cp);
  4984               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
  4985                 {
  4986                   if (colon && !c_isspace (colon[1]))
  4987                     name = colon + 1;
  4988                   make_tag (name, cp - name, false,
  4989                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4990                 }
  4991             }
  4992         }
  4993       else if (!continuation
  4994                /* Modules, classes, methods.  */
  4995                && ((is_method = LOOKING_AT (cp, "def"))
  4996                    || (is_class = LOOKING_AT (cp, "class"))
  4997                    || LOOKING_AT (cp, "module")))
  4998         {
  4999           const char self_name[] = "self.";
  5000           const size_t self_size1 = sizeof (self_name) - 1;
  5001 
  5002           name = cp;
  5003 
  5004          /* Ruby method names can end in a '='.  Also, operator overloading can
  5005             define operators whose names include '='.  */
  5006           while (!notinname (*cp) || *cp == '=')
  5007             cp++;
  5008 
  5009           /* Remove "self." from the method name.  */
  5010           if (cp - name > self_size1
  5011               && strneq (name, self_name, self_size1))
  5012             name += self_size1;
  5013 
  5014           /* Remove the class/module qualifiers from method names.  */
  5015           if (is_method)
  5016             {
  5017               char *q;
  5018 
  5019               for (q = name; q < cp && *q != '.'; q++)
  5020                 ;
  5021               if (q < cp - 1)   /* punt if we see just "FOO." */
  5022                 name = q + 1;
  5023             }
  5024 
  5025           /* Don't tag singleton classes.  */
  5026           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
  5027             continue;
  5028 
  5029           make_tag (name, cp - name, true,
  5030                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5031         }
  5032       else
  5033         {
  5034           /* Tag accessors and aliases.  */
  5035 
  5036           if (!continuation)
  5037             reader = writer = alias = false;
  5038 
  5039           while (*cp && *cp != '#')
  5040             {
  5041               if (!continuation)
  5042                 {
  5043                   reader = writer = alias = false;
  5044                   if (LOOKING_AT (cp, "attr_reader"))
  5045                     reader = true;
  5046                   else if (LOOKING_AT (cp, "attr_writer"))
  5047                     writer = true;
  5048                   else if (LOOKING_AT (cp, "attr_accessor"))
  5049                     {
  5050                       reader = true;
  5051                       writer = true;
  5052                     }
  5053                   else if (LOOKING_AT (cp, "alias_method"))
  5054                     alias = true;
  5055                 }
  5056               if (reader || writer || alias)
  5057                 {
  5058                   do {
  5059                     char *np;
  5060 
  5061                     cp = skip_spaces (cp);
  5062                     if (*cp == '(')
  5063                       cp = skip_spaces (cp + 1);
  5064                     np = cp;
  5065                     cp = skip_name (cp);
  5066                     if (*np != ':')
  5067                       continue;
  5068                     np++;
  5069                     if (reader)
  5070                       {
  5071                         make_tag (np, cp - np, true,
  5072                                   lb.buffer, cp - lb.buffer + 1,
  5073                                   lineno, linecharno);
  5074                         continuation = false;
  5075                       }
  5076                     if (writer)
  5077                       {
  5078                         size_t name_len = cp - np + 1;
  5079                         char *wr_name = xnew (name_len + 1, char);
  5080 
  5081                         strcpy (mempcpy (wr_name, np, name_len - 1), "=");
  5082                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
  5083                                 lineno, linecharno);
  5084                         if (debug)
  5085                           fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n", wr_name,
  5086                                    curfdp->taggedfname, lineno, lb.buffer);
  5087                         continuation = false;
  5088                       }
  5089                     if (alias)
  5090                       {
  5091                         if (!continuation)
  5092                           make_tag (np, cp - np, true,
  5093                                     lb.buffer, cp - lb.buffer + 1,
  5094                                     lineno, linecharno);
  5095                         continuation = false;
  5096                         while (*cp && *cp != '#' && *cp != ';')
  5097                           {
  5098                             if (*cp == ',')
  5099                               continuation = true;
  5100                             else if (!c_isspace (*cp))
  5101                               continuation = false;
  5102                             cp++;
  5103                           }
  5104                         if (*cp == ';')
  5105                           continuation = false;
  5106                       }
  5107                     cp = skip_spaces (cp);
  5108                   } while ((alias
  5109                             ? (*cp == ',')
  5110                             : (continuation = (*cp == ',')))
  5111                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
  5112                 }
  5113               if (*cp != '#')
  5114                 cp = skip_name (cp);
  5115               while (*cp && *cp != '#' && notinname (*cp))
  5116                 cp++;
  5117             }
  5118         }
  5119     }
  5120 }
  5121 
  5122 
  5123 /*
  5124  * Rust support
  5125  * Look for:
  5126  *  - fn: Function
  5127  *  - struct: Structure
  5128  *  - enum: Enumeration
  5129  *  - macro_rules!: Macro
  5130  */
  5131 static void
  5132 Rust_entries (FILE *inf)
  5133 {
  5134   char *cp, *name;
  5135   bool is_func = false;
  5136 
  5137   LOOP_ON_INPUT_LINES(inf, lb, cp)
  5138     {
  5139       cp = skip_spaces(cp);
  5140       name = cp;
  5141 
  5142       // Skip 'pub' keyworld
  5143       (void)LOOKING_AT (cp, "pub");
  5144 
  5145       // Look for define
  5146       if ((is_func = LOOKING_AT (cp, "fn"))
  5147           || LOOKING_AT (cp, "enum")
  5148           || LOOKING_AT (cp, "struct")
  5149           || (is_func = LOOKING_AT (cp, "macro_rules!")))
  5150         {
  5151           cp = skip_spaces (cp);
  5152           name = cp;
  5153 
  5154           while (!notinname (*cp))
  5155             cp++;
  5156 
  5157           make_tag (name, cp - name, is_func,
  5158                     lb.buffer, cp - lb.buffer + 1,
  5159                     lineno, linecharno);
  5160           is_func = false;
  5161         }
  5162     }
  5163 }
  5164 
  5165 
  5166 /*
  5167  * PHP support
  5168  * Look for:
  5169  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
  5170  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
  5171  *  - /^[ \t]*define\(\"[^\"]+/
  5172  * Only with --members:
  5173  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
  5174  * Idea by Diez B. Roggisch (2001)
  5175  */
  5176 static void
  5177 PHP_functions (FILE *inf)
  5178 {
  5179   char *cp, *name;
  5180   bool search_identifier = false;
  5181 
  5182   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5183     {
  5184       cp = skip_spaces (cp);
  5185       name = cp;
  5186       if (search_identifier
  5187           && *cp != '\0')
  5188         {
  5189           while (!notinname (*cp))
  5190             cp++;
  5191           make_tag (name, cp - name, true,
  5192                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5193           search_identifier = false;
  5194         }
  5195       else if (LOOKING_AT (cp, "function"))
  5196         {
  5197           if (*cp == '&')
  5198             cp = skip_spaces (cp+1);
  5199           if (*cp != '\0')
  5200             {
  5201               name = cp;
  5202               while (!notinname (*cp))
  5203                 cp++;
  5204               make_tag (name, cp - name, true,
  5205                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5206             }
  5207           else
  5208             search_identifier = true;
  5209         }
  5210       else if (LOOKING_AT (cp, "class"))
  5211         {
  5212           if (*cp != '\0')
  5213             {
  5214               name = cp;
  5215               while (*cp != '\0' && !c_isspace (*cp))
  5216                 cp++;
  5217               make_tag (name, cp - name, false,
  5218                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5219             }
  5220           else
  5221             search_identifier = true;
  5222         }
  5223       else if (strneq (cp, "define", 6)
  5224                && (cp = skip_spaces (cp+6))
  5225                && *cp++ == '('
  5226                && (*cp == '"' || *cp == '\''))
  5227         {
  5228           char quote = *cp++;
  5229           name = cp;
  5230           while (*cp != quote && *cp != '\0')
  5231             cp++;
  5232           make_tag (name, cp - name, false,
  5233                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5234         }
  5235       else if (members
  5236                && LOOKING_AT (cp, "var")
  5237                && *cp == '$')
  5238         {
  5239           name = cp;
  5240           while (!notinname (*cp))
  5241             cp++;
  5242           make_tag (name, cp - name, false,
  5243                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5244         }
  5245     }
  5246 }
  5247 
  5248 
  5249 /*
  5250  * Cobol tag functions
  5251  * We could look for anything that could be a paragraph name.
  5252  * i.e. anything that starts in column 8 is one word and ends in a full stop.
  5253  * Idea by Corny de Souza (1993)
  5254  */
  5255 static void
  5256 Cobol_paragraphs (FILE *inf)
  5257 {
  5258   register char *bp, *ep;
  5259 
  5260   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5261     {
  5262       if (lb.len < 9)
  5263         continue;
  5264       bp += 8;
  5265 
  5266       /* If eoln, compiler option or comment ignore whole line. */
  5267       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
  5268         continue;
  5269 
  5270       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
  5271         continue;
  5272       if (*ep++ == '.')
  5273         make_tag (bp, ep - bp, true,
  5274                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
  5275     }
  5276 }
  5277 
  5278 
  5279 /*
  5280  * Makefile support
  5281  * Ideas by Assar Westerlund <assar@sics.se> (2001)
  5282  */
  5283 static void
  5284 Makefile_targets (FILE *inf)
  5285 {
  5286   register char *bp;
  5287 
  5288   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5289     {
  5290       if (*bp == '\t' || *bp == '#')
  5291         continue;
  5292       while (*bp != '\0' && *bp != '=' && *bp != ':')
  5293         bp++;
  5294       if (*bp == ':' || (globals && *bp == '='))
  5295         {
  5296           /* We should detect if there is more than one tag, but we do not.
  5297              We just skip initial and final spaces. */
  5298           char * namestart = skip_spaces (lb.buffer);
  5299           while (--bp > namestart)
  5300             if (!notinname (*bp))
  5301               break;
  5302           make_tag (namestart, bp - namestart + 1, true,
  5303                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
  5304         }
  5305     }
  5306 }
  5307 
  5308 
  5309 /*
  5310  * Pascal parsing
  5311  * Original code by Mosur K. Mohan (1989)
  5312  *
  5313  *  Locates tags for procedures & functions.  Doesn't do any type- or
  5314  *  var-definitions.  It does look for the keyword "extern" or
  5315  *  "forward" immediately following the procedure statement; if found,
  5316  *  the tag is skipped.
  5317  */
  5318 static void
  5319 Pascal_functions (FILE *inf)
  5320 {
  5321   linebuffer tline;             /* mostly copied from C_entries */
  5322   intmax_t save_lcno, save_lineno;
  5323   ptrdiff_t namelen, taglen;
  5324   char c, *name;
  5325 
  5326   bool                          /* each of these flags is true if: */
  5327     incomment,                  /* point is inside a comment */
  5328     inquote,                    /* point is inside '..' string */
  5329     get_tagname,                /* point is after PROCEDURE/FUNCTION
  5330                                    keyword, so next item = potential tag */
  5331     found_tag,                  /* point is after a potential tag */
  5332     inparms,                    /* point is within parameter-list */
  5333     verify_tag;                 /* point has passed the parm-list, so the
  5334                                    next token will determine whether this
  5335                                    is a FORWARD/EXTERN to be ignored, or
  5336                                    whether it is a real tag */
  5337 
  5338   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
  5339   name = NULL;                  /* keep compiler quiet */
  5340   dbp = lb.buffer;
  5341   *dbp = '\0';
  5342   linebuffer_init (&tline);
  5343 
  5344   incomment = inquote = false;
  5345   found_tag = false;            /* have a proc name; check if extern */
  5346   get_tagname = false;          /* found "procedure" keyword         */
  5347   inparms = false;              /* found '(' after "proc"            */
  5348   verify_tag = false;           /* check if "extern" is ahead        */
  5349 
  5350 
  5351   while (perhaps_more_input (inf)) /* long main loop to get next char */
  5352     {
  5353       c = *dbp++;
  5354       if (c == '\0')            /* if end of line */
  5355         {
  5356           readline (&lb, inf);
  5357           dbp = lb.buffer;
  5358           if (*dbp == '\0')
  5359             continue;
  5360           if (!((found_tag && verify_tag)
  5361                 || get_tagname))
  5362             c = *dbp++;         /* only if don't need *dbp pointing
  5363                                    to the beginning of the name of
  5364                                    the procedure or function */
  5365         }
  5366       if (incomment)
  5367         {
  5368           if (c == '}')         /* within { } comments */
  5369             incomment = false;
  5370           else if (c == '*' && *dbp == ')') /* within (* *) comments */
  5371             {
  5372               dbp++;
  5373               incomment = false;
  5374             }
  5375           continue;
  5376         }
  5377       else if (inquote)
  5378         {
  5379           if (c == '\'')
  5380             inquote = false;
  5381           continue;
  5382         }
  5383       else
  5384         switch (c)
  5385           {
  5386           case '\'':
  5387             inquote = true;     /* found first quote */
  5388             continue;
  5389           case '{':             /* found open { comment */
  5390             incomment = true;
  5391             continue;
  5392           case '(':
  5393             if (*dbp == '*')    /* found open (* comment */
  5394               {
  5395                 incomment = true;
  5396                 dbp++;
  5397               }
  5398             else if (found_tag) /* found '(' after tag, i.e., parm-list */
  5399               inparms = true;
  5400             continue;
  5401           case ')':             /* end of parms list */
  5402             if (inparms)
  5403               inparms = false;
  5404             continue;
  5405           case ';':
  5406             if (found_tag && !inparms) /* end of proc or fn stmt */
  5407               {
  5408                 verify_tag = true;
  5409                 break;
  5410               }
  5411             continue;
  5412           }
  5413       if (found_tag && verify_tag && (*dbp != ' '))
  5414         {
  5415           /* Check if this is an "extern" declaration. */
  5416           if (*dbp == '\0')
  5417             continue;
  5418           if (c_tolower (*dbp) == 'e')
  5419             {
  5420               if (nocase_tail ("extern")) /* superfluous, really! */
  5421                 {
  5422                   found_tag = false;
  5423                   verify_tag = false;
  5424                 }
  5425             }
  5426           else if (c_tolower (*dbp) == 'f')
  5427             {
  5428               if (nocase_tail ("forward")) /* check for forward reference */
  5429                 {
  5430                   found_tag = false;
  5431                   verify_tag = false;
  5432                 }
  5433             }
  5434           if (found_tag && verify_tag) /* not external proc, so make tag */
  5435             {
  5436               found_tag = false;
  5437               verify_tag = false;
  5438               make_tag (name, namelen, true,
  5439                         tline.buffer, taglen, save_lineno, save_lcno);
  5440               continue;
  5441             }
  5442         }
  5443       if (get_tagname)          /* grab name of proc or fn */
  5444         {
  5445           char *cp;
  5446 
  5447           if (*dbp == '\0')
  5448             continue;
  5449 
  5450           /* Find block name. */
  5451           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
  5452             continue;
  5453 
  5454           /* Save all values for later tagging. */
  5455           linebuffer_setlen (&tline, lb.len);
  5456           strcpy (tline.buffer, lb.buffer);
  5457           save_lineno = lineno;
  5458           save_lcno = linecharno;
  5459           name = tline.buffer + (dbp - lb.buffer);
  5460           namelen = cp - dbp;
  5461           taglen = cp - lb.buffer + 1;
  5462 
  5463           dbp = cp;             /* set dbp to e-o-token */
  5464           get_tagname = false;
  5465           found_tag = true;
  5466           continue;
  5467 
  5468           /* And proceed to check for "extern". */
  5469         }
  5470       else if (!incomment && !inquote && !found_tag)
  5471         {
  5472           /* Check for proc/fn keywords. */
  5473           switch (c_tolower (c))
  5474             {
  5475             case 'p':
  5476               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
  5477                 get_tagname = true;
  5478               continue;
  5479             case 'f':
  5480               if (nocase_tail ("unction"))
  5481                 get_tagname = true;
  5482               continue;
  5483             }
  5484         }
  5485     } /* while not eof */
  5486 
  5487   free (tline.buffer);
  5488 }
  5489 
  5490 
  5491 /*
  5492  * Lisp tag functions
  5493  *  look for (def or (DEF, quote or QUOTE
  5494  */
  5495 
  5496 static void L_getit (void);
  5497 
  5498 static void
  5499 L_getit (void)
  5500 {
  5501   if (*dbp == '\'')             /* Skip prefix quote */
  5502     dbp++;
  5503   else if (*dbp == '(')
  5504   {
  5505     dbp++;
  5506     /* Try to skip "(quote " */
  5507     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
  5508       /* Ok, then skip "(" before name in (defstruct (foo)) */
  5509       dbp = skip_spaces (dbp);
  5510   }
  5511   get_lispy_tag (dbp);
  5512 }
  5513 
  5514 static void
  5515 Lisp_functions (FILE *inf)
  5516 {
  5517   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  5518     {
  5519       if (dbp[0] != '(')
  5520         continue;
  5521 
  5522       /* "(defvar foo)" is a declaration rather than a definition.  */
  5523       if (! declarations)
  5524         {
  5525           char *p = dbp + 1;
  5526           if (LOOKING_AT (p, "defvar"))
  5527             {
  5528               p = skip_name (p); /* past var name */
  5529               p = skip_spaces (p);
  5530               if (*p == ')')
  5531                 continue;
  5532             }
  5533         }
  5534 
  5535       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
  5536         dbp += 3;
  5537 
  5538       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
  5539         {
  5540           dbp = skip_non_spaces (dbp);
  5541           dbp = skip_spaces (dbp);
  5542           L_getit ();
  5543         }
  5544       else
  5545         {
  5546           /* Check for (foo::defmumble name-defined ... */
  5547           do
  5548             dbp++;
  5549           while (!notinname (*dbp) && *dbp != ':');
  5550           if (*dbp == ':')
  5551             {
  5552               do
  5553                 dbp++;
  5554               while (*dbp == ':');
  5555 
  5556               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
  5557                 {
  5558                   dbp = skip_non_spaces (dbp);
  5559                   dbp = skip_spaces (dbp);
  5560                   L_getit ();
  5561                 }
  5562             }
  5563         }
  5564     }
  5565 }
  5566 
  5567 
  5568 /*
  5569  * Lua script language parsing
  5570  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
  5571  *
  5572  *  "function" and "local function" are tags if they start at column 1.
  5573  */
  5574 static void
  5575 Lua_functions (FILE *inf)
  5576 {
  5577   register char *bp;
  5578 
  5579   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5580     {
  5581       bp = skip_spaces (bp);
  5582       if (bp[0] != 'f' && bp[0] != 'l')
  5583         continue;
  5584 
  5585       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
  5586 
  5587       if (LOOKING_AT (bp, "function"))
  5588         {
  5589           char *tag_name, *tp_dot, *tp_colon;
  5590 
  5591           get_tag (bp, &tag_name);
  5592           /* If the tag ends with ".foo" or ":foo", make an additional tag for
  5593              "foo".  */
  5594           tp_dot = strrchr (tag_name, '.');
  5595           tp_colon = strrchr (tag_name, ':');
  5596           if (tp_dot || tp_colon)
  5597             {
  5598               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
  5599               ptrdiff_t len_add = p - tag_name + 1;
  5600 
  5601               get_tag (bp + len_add, NULL);
  5602             }
  5603         }
  5604     }
  5605 }
  5606 
  5607 
  5608 /*
  5609  * PostScript tags
  5610  * Just look for lines where the first character is '/'
  5611  * Also look at "defineps" for PSWrap
  5612  * Ideas by:
  5613  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
  5614  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
  5615  */
  5616 static void
  5617 PS_functions (FILE *inf)
  5618 {
  5619   register char *bp, *ep;
  5620 
  5621   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5622     {
  5623       if (bp[0] == '/')
  5624         {
  5625           for (ep = bp+1;
  5626                *ep != '\0' && *ep != ' ' && *ep != '{';
  5627                ep++)
  5628             continue;
  5629           make_tag (bp, ep - bp, true,
  5630                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
  5631         }
  5632       else if (LOOKING_AT (bp, "defineps"))
  5633         get_tag (bp, NULL);
  5634     }
  5635 }
  5636 
  5637 
  5638 /*
  5639  * Forth tags
  5640  * Ignore anything after \ followed by space or in ( )
  5641  * Look for words defined by :
  5642  * Look for constant, code, create, defer, value, and variable
  5643  * OBP extensions:  Look for buffer:, field,
  5644  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
  5645  */
  5646 static void
  5647 Forth_words (FILE *inf)
  5648 {
  5649   register char *bp;
  5650 
  5651   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5652     while ((bp = skip_spaces (bp))[0] != '\0')
  5653       if (bp[0] == '\\' && c_isspace (bp[1]))
  5654         break;                  /* read next line */
  5655       else if (bp[0] == '(' && c_isspace (bp[1]))
  5656         do                      /* skip to ) or eol */
  5657           bp++;
  5658         while (*bp != ')' && *bp != '\0');
  5659       else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
  5660                 || LOOKING_AT_NOCASE (bp, "constant")
  5661                 || LOOKING_AT_NOCASE (bp, "2constant")
  5662                 || LOOKING_AT_NOCASE (bp, "fconstant")
  5663                 || LOOKING_AT_NOCASE (bp, "code")
  5664                 || LOOKING_AT_NOCASE (bp, "create")
  5665                 || LOOKING_AT_NOCASE (bp, "defer")
  5666                 || LOOKING_AT_NOCASE (bp, "value")
  5667                 || LOOKING_AT_NOCASE (bp, "2value")
  5668                 || LOOKING_AT_NOCASE (bp, "fvalue")
  5669                 || LOOKING_AT_NOCASE (bp, "variable")
  5670                 || LOOKING_AT_NOCASE (bp, "2variable")
  5671                 || LOOKING_AT_NOCASE (bp, "fvariable")
  5672                 || LOOKING_AT_NOCASE (bp, "buffer:")
  5673                 || LOOKING_AT_NOCASE (bp, "field:")
  5674                 || LOOKING_AT_NOCASE (bp, "+field")
  5675                 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
  5676                 || LOOKING_AT_NOCASE (bp, "begin-structure")
  5677                 || LOOKING_AT_NOCASE (bp, "synonym")
  5678                 )
  5679                && c_isspace (bp[0]))
  5680         {
  5681           /* Yay!  A definition! */
  5682           char* name_start = skip_spaces (bp);
  5683           char* name_end = skip_non_spaces (name_start);
  5684           if (name_start < name_end)
  5685             make_tag (name_start, name_end - name_start,
  5686                       true, lb.buffer, name_end - lb.buffer,
  5687                       lineno, linecharno);
  5688           bp = name_end;
  5689         }
  5690       else
  5691         bp = skip_non_spaces (bp);
  5692 }
  5693 
  5694 
  5695 /*
  5696  * Scheme tag functions
  5697  * look for (def... xyzzy
  5698  *          (def... (xyzzy
  5699  *          (def ... ((...(xyzzy ....
  5700  *          (set! xyzzy
  5701  * Original code by Ken Haase (1985?)
  5702  */
  5703 static void
  5704 Scheme_functions (FILE *inf)
  5705 {
  5706   register char *bp;
  5707 
  5708   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5709     {
  5710       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
  5711         {
  5712           bp = skip_non_spaces (bp+4);
  5713           /* Skip over open parens and white space.
  5714              Don't continue past '\0' or '='. */
  5715           while (*bp && notinname (*bp) && *bp != '=')
  5716             bp++;
  5717           get_lispy_tag (bp);
  5718         }
  5719       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
  5720         get_lispy_tag (bp);
  5721     }
  5722 }
  5723 
  5724 
  5725 /* Find tags in TeX and LaTeX input files.  */
  5726 
  5727 /* TEX_toktab is a table of TeX control sequences that define tags.
  5728  * Each entry records one such control sequence.
  5729  *
  5730  * Original code from who knows whom.
  5731  * Ideas by:
  5732  *   Stefan Monnier (2002)
  5733  */
  5734 
  5735 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
  5736 
  5737 /* Default set of control sequences to put into TEX_toktab.
  5738    The value of environment var TEXTAGS is prepended to this.  */
  5739 static const char *TEX_defenv = "\
  5740 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
  5741 :part:appendix:entry:index:def\
  5742 :newcommand:renewcommand:newenvironment:renewenvironment";
  5743 
  5744 static void TEX_decode_env (const char *, const char *);
  5745 
  5746 /*
  5747  * TeX/LaTeX scanning loop.
  5748  */
  5749 static void
  5750 TeX_commands (FILE *inf)
  5751 {
  5752   char *cp;
  5753   linebuffer *key;
  5754 
  5755   char TEX_esc = '\0';
  5756   char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
  5757 
  5758   /* Initialize token table once from environment. */
  5759   if (TEX_toktab == NULL)
  5760     TEX_decode_env ("TEXTAGS", TEX_defenv);
  5761 
  5762   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5763     {
  5764       /* Look at each TEX keyword in line. */
  5765       for (;;)
  5766         {
  5767           /* Look for a TEX escape. */
  5768           while (true)
  5769             {
  5770               char c = *cp++;
  5771               if (c == '\0' || c == '%')
  5772                 goto tex_next_line;
  5773 
  5774               /* Select either \ or ! as escape character, whichever comes
  5775                  first outside a comment.  */
  5776               if (!TEX_esc)
  5777                 switch (c)
  5778                   {
  5779                   case '\\':
  5780                     TEX_esc = c;
  5781                     TEX_opgrp = '{';
  5782                     TEX_clgrp = '}';
  5783                     break;
  5784 
  5785                   case '!':
  5786                     TEX_esc = c;
  5787                     TEX_opgrp = '<';
  5788                     TEX_clgrp = '>';
  5789                     break;
  5790                   }
  5791 
  5792               if (c == TEX_esc)
  5793                 break;
  5794             }
  5795 
  5796           for (key = TEX_toktab; key->buffer != NULL; key++)
  5797             if (strneq (cp, key->buffer, key->len))
  5798               {
  5799                 char *p;
  5800                 ptrdiff_t namelen, linelen;
  5801                 bool opgrp = false;
  5802 
  5803                 cp = skip_spaces (cp + key->len);
  5804                 if (*cp == TEX_opgrp)
  5805                   {
  5806                     opgrp = true;
  5807                     cp++;
  5808                   }
  5809                 for (p = cp;
  5810                      (!c_isspace (*p) && *p != '#' &&
  5811                       *p != TEX_opgrp && *p != TEX_clgrp);
  5812                      p++)
  5813                   continue;
  5814                 namelen = p - cp;
  5815                 linelen = lb.len;
  5816                 if (!opgrp || *p == TEX_clgrp)
  5817                   {
  5818                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
  5819                       p++;
  5820                     linelen = p - lb.buffer + 1;
  5821                   }
  5822                 make_tag (cp, namelen, true,
  5823                           lb.buffer, linelen, lineno, linecharno);
  5824                 goto tex_next_line; /* We only tag a line once */
  5825               }
  5826         }
  5827     tex_next_line:
  5828       ;
  5829     }
  5830 }
  5831 
  5832 /* Read environment and prepend it to the default string.
  5833    Build token table. */
  5834 static void
  5835 TEX_decode_env (const char *evarname, const char *defenv)
  5836 {
  5837   const char *env, *p;
  5838   ptrdiff_t len = 1;
  5839 
  5840   /* Append default string to environment. */
  5841   env = getenv (evarname);
  5842   if (!env)
  5843     env = defenv;
  5844   else
  5845     env = concat (env, defenv, "");
  5846 
  5847   /* If the environment variable doesn't start with a colon, increase
  5848      the length of the token table.  */
  5849   if (*env != ':')
  5850     len++;
  5851 
  5852   /* Allocate a token table */
  5853   for (p = env; (p = strchr (p, ':')); )
  5854     if (*++p)
  5855       len++;
  5856   TEX_toktab = xnew (len, linebuffer);
  5857 
  5858   /* Unpack environment string into token table. Be careful about */
  5859   /* zero-length strings (leading ':', "::" and trailing ':') */
  5860   for (ptrdiff_t i = 0; *env != '\0'; )
  5861     {
  5862       p = strchr (env, ':');
  5863       if (!p)                   /* End of environment string. */
  5864         p = env + strlen (env);
  5865       if (p - env > 0)
  5866         {                       /* Only non-zero strings. */
  5867           TEX_toktab[i].buffer = savenstr (env, p - env);
  5868           TEX_toktab[i].len = p - env;
  5869           i++;
  5870         }
  5871       if (*p)
  5872         env = p + 1;
  5873       else
  5874         {
  5875           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
  5876           TEX_toktab[i].len = 0;
  5877           break;
  5878         }
  5879     }
  5880 }
  5881 
  5882 
  5883 /* Texinfo support.  Dave Love, Mar. 2000.  */
  5884 static void
  5885 Texinfo_nodes (FILE *inf)
  5886 {
  5887   char *cp, *start;
  5888   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5889     if (LOOKING_AT (cp, "@node"))
  5890       {
  5891         start = cp;
  5892         while (*cp != '\0' && *cp != ',')
  5893           cp++;
  5894         make_tag (start, cp - start, true,
  5895                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5896       }
  5897 }
  5898 
  5899 
  5900 /*
  5901  * HTML support.
  5902  * Contents of <title>, <h1>, <h2>, <h3> are tags.
  5903  * Contents of <a name=xxx> are tags with name xxx.
  5904  *
  5905  * Francesco Potortì, 2002.
  5906  */
  5907 static void
  5908 HTML_labels (FILE *inf)
  5909 {
  5910   bool getnext = false;         /* next text outside of HTML tags is a tag */
  5911   bool skiptag = false;         /* skip to the end of the current HTML tag */
  5912   bool intag = false;           /* inside an html tag, looking for ID= */
  5913   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
  5914   char *end;
  5915 
  5916 
  5917   linebuffer_setlen (&token_name, 0); /* no name in buffer */
  5918 
  5919   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  5920     for (;;)                    /* loop on the same line */
  5921       {
  5922         if (skiptag)            /* skip HTML tag */
  5923           {
  5924             while (*dbp != '\0' && *dbp != '>')
  5925               dbp++;
  5926             if (*dbp == '>')
  5927               {
  5928                 dbp += 1;
  5929                 skiptag = false;
  5930                 continue;       /* look on the same line */
  5931               }
  5932             break;              /* go to next line */
  5933           }
  5934 
  5935         else if (intag) /* look for "name=" or "id=" */
  5936           {
  5937             while (*dbp != '\0' && *dbp != '>'
  5938                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
  5939               dbp++;
  5940             if (*dbp == '\0')
  5941               break;            /* go to next line */
  5942             if (*dbp == '>')
  5943               {
  5944                 dbp += 1;
  5945                 intag = false;
  5946                 continue;       /* look on the same line */
  5947               }
  5948             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
  5949                 || LOOKING_AT_NOCASE (dbp, "id="))
  5950               {
  5951                 bool quoted = (dbp[0] == '"');
  5952 
  5953                 if (quoted)
  5954                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
  5955                     continue;
  5956                 else
  5957                   for (end = dbp; *end != '\0' && intoken (*end); end++)
  5958                     continue;
  5959                 linebuffer_setlen (&token_name, end - dbp);
  5960                 memcpyz (token_name.buffer, dbp, end - dbp);
  5961 
  5962                 dbp = end;
  5963                 intag = false;  /* we found what we looked for */
  5964                 skiptag = true; /* skip to the end of the tag */
  5965                 getnext = true; /* then grab the text */
  5966                 continue;       /* look on the same line */
  5967               }
  5968             dbp += 1;
  5969           }
  5970 
  5971         else if (getnext)       /* grab next tokens and tag them */
  5972           {
  5973             dbp = skip_spaces (dbp);
  5974             if (*dbp == '\0')
  5975               break;            /* go to next line */
  5976             if (*dbp == '<')
  5977               {
  5978                 intag = true;
  5979                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
  5980                 continue;       /* look on the same line */
  5981               }
  5982 
  5983             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
  5984               continue;
  5985             make_tag (token_name.buffer, token_name.len, true,
  5986                       dbp, end - dbp, lineno, linecharno);
  5987             linebuffer_setlen (&token_name, 0); /* no name in buffer */
  5988             getnext = false;
  5989             break;              /* go to next line */
  5990           }
  5991 
  5992         else                    /* look for an interesting HTML tag */
  5993           {
  5994             while (*dbp != '\0' && *dbp != '<')
  5995               dbp++;
  5996             if (*dbp == '\0')
  5997               break;            /* go to next line */
  5998             intag = true;
  5999             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
  6000               {
  6001                 inanchor = true;
  6002                 continue;       /* look on the same line */
  6003               }
  6004             else if (LOOKING_AT_NOCASE (dbp, "<title>")
  6005                      || LOOKING_AT_NOCASE (dbp, "<h1>")
  6006                      || LOOKING_AT_NOCASE (dbp, "<h2>")
  6007                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
  6008               {
  6009                 intag = false;
  6010                 getnext = true;
  6011                 continue;       /* look on the same line */
  6012               }
  6013             dbp += 1;
  6014           }
  6015       }
  6016 }
  6017 
  6018 
  6019 /*
  6020  * Prolog support
  6021  *
  6022  * Assumes that the predicate or rule starts at column 0.
  6023  * Only the first clause of a predicate or rule is added.
  6024  * Original code by Sunichirou Sugou (1989)
  6025  * Rewritten by Anders Lindgren (1996)
  6026  */
  6027 static ptrdiff_t prolog_pr (char *, char *, ptrdiff_t);
  6028 static void prolog_skip_comment (linebuffer *, FILE *);
  6029 static size_t prolog_atom (char *, size_t);
  6030 
  6031 static void
  6032 Prolog_functions (FILE *inf)
  6033 {
  6034   char *cp, *last = NULL;
  6035   ptrdiff_t lastlen = 0, allocated = 0;
  6036 
  6037   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6038     {
  6039       if (cp[0] == '\0')        /* Empty line */
  6040         continue;
  6041       else if (c_isspace (cp[0])) /* Not a predicate */
  6042         continue;
  6043       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
  6044         prolog_skip_comment (&lb, inf);
  6045       else
  6046         {
  6047           ptrdiff_t len = prolog_pr (cp, last, lastlen);
  6048           if (0 < len)
  6049             {
  6050               /* Store the predicate name to avoid generating duplicate
  6051                  tags later.  */
  6052               if (allocated <= len)
  6053                 {
  6054                   xrnew (last, len + 1, 1);
  6055                   allocated = len + 1;
  6056                 }
  6057               memcpyz (last, cp, len);
  6058               lastlen = len;
  6059             }
  6060         }
  6061     }
  6062   free (last);
  6063 }
  6064 
  6065 
  6066 static void
  6067 prolog_skip_comment (linebuffer *plb, FILE *inf)
  6068 {
  6069   char *cp;
  6070 
  6071   do
  6072     {
  6073       for (cp = plb->buffer; *cp != '\0'; cp++)
  6074         if (cp[0] == '*' && cp[1] == '/')
  6075           return;
  6076       readline (plb, inf);
  6077     }
  6078   while (perhaps_more_input (inf));
  6079 }
  6080 
  6081 /*
  6082  * A predicate or rule definition is added if it matches:
  6083  *     <beginning of line><Prolog Atom><whitespace>(
  6084  * or  <beginning of line><Prolog Atom><whitespace>:-
  6085  *
  6086  * It is added to the tags database if it doesn't match the
  6087  * name of the previous clause header.
  6088  *
  6089  * Return the size of the name of the predicate or rule, or 0 if no
  6090  * header was found.
  6091  */
  6092 static ptrdiff_t
  6093 prolog_pr (char *s, char *last, ptrdiff_t lastlen)
  6094 {
  6095   ptrdiff_t len = prolog_atom (s, 0);
  6096   if (len == 0)
  6097     return 0;
  6098   ptrdiff_t pos = skip_spaces (s + len) - s;
  6099 
  6100   /* Save only the first clause.  */
  6101   if ((s[pos] == '.'
  6102        || (s[pos] == '(' && (pos += 1))
  6103        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
  6104       && ! (lastlen == len && memcmp (s, last, len) == 0))
  6105     {
  6106       make_tag (s, len, true, s, pos, lineno, linecharno);
  6107       return len;
  6108     }
  6109 
  6110   return 0;
  6111 }
  6112 
  6113 /*
  6114  * Consume a Prolog atom.
  6115  * Return the number of bytes consumed, or 0 if there was an error.
  6116  *
  6117  * A prolog atom, in this context, could be one of:
  6118  * - An alphanumeric sequence, starting with a lower case letter.
  6119  * - A quoted arbitrary string. Single quotes can escape themselves.
  6120  *   Backslash quotes everything.
  6121  */
  6122 static size_t
  6123 prolog_atom (char *s, size_t pos)
  6124 {
  6125   size_t origpos;
  6126 
  6127   origpos = pos;
  6128 
  6129   if (c_islower (s[pos]) || s[pos] == '_')
  6130     {
  6131       /* The atom is unquoted. */
  6132       pos++;
  6133       while (c_isalnum (s[pos]) || s[pos] == '_')
  6134         {
  6135           pos++;
  6136         }
  6137       return pos - origpos;
  6138     }
  6139   else if (s[pos] == '\'')
  6140     {
  6141       pos++;
  6142 
  6143       for (;;)
  6144         {
  6145           if (s[pos] == '\'')
  6146             {
  6147               pos++;
  6148               if (s[pos] != '\'')
  6149                 break;
  6150               pos++;            /* A double quote  */
  6151             }
  6152           else if (s[pos] == '\0')
  6153             /* Multiline quoted atoms are ignored.  */
  6154             return 0;
  6155           else if (s[pos] == '\\')
  6156             {
  6157               if (s[pos+1] == '\0')
  6158                 return 0;
  6159               pos += 2;
  6160             }
  6161           else
  6162             pos++;
  6163         }
  6164       return pos - origpos;
  6165     }
  6166   else
  6167     return 0;
  6168 }
  6169 
  6170 
  6171 /*
  6172  * Support for Mercury
  6173  *
  6174  * Assumes that the declarations start at column 0.
  6175  * Original code by Sunichirou Sugou (1989) for Prolog.
  6176  * Rewritten by Anders Lindgren (1996) for Prolog.
  6177  * Adapted by Fabrice Nicol (2021) for Mercury.
  6178  * Note: Prolog-support behavior is preserved if
  6179  * --declarations is used, corresponding to
  6180  * with_mercury_definitions=true.
  6181  */
  6182 
  6183 static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
  6184 static void mercury_skip_comment (linebuffer *, FILE *);
  6185 static bool is_mercury_type = false;
  6186 static bool is_mercury_quantifier = false;
  6187 static bool is_mercury_declaration = false;
  6188 typedef struct
  6189 {
  6190   size_t pos;          /* Position reached in parsing tag name.  */
  6191   size_t namelength;   /* Length of tag name  */
  6192   size_t totlength;    /* Total length of parsed tag: this field is currently
  6193                           reserved for control and debugging.   */
  6194 } mercury_pos_t;
  6195 
  6196 /*
  6197  * Objective-C and Mercury have identical file extension .m.
  6198  * To disambiguate between Objective C and Mercury, parse file
  6199  * with the following heuristics hook:
  6200  *   - if line starts with :-, choose Mercury unconditionally;
  6201  *   - if line starts with #, @, choose Objective-C;
  6202  *   - otherwise compute the following ratio:
  6203  *
  6204  *     r = (number of lines with :-
  6205  *          or % in non-commented parts or . at trimmed EOL)
  6206  *         / (number of lines - number of lines starting by any amount
  6207  *                        of whitespace, optionally followed by comment(s))
  6208  *
  6209  * Note: strings are neglected in counts.
  6210  *
  6211  * If r > mercury_heuristics_ratio, choose Mercury.
  6212  * Experimental tests show that a possibly optimal default value for
  6213  * this floor value is around 0.5.  This is the default value for
  6214  * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
  6215  * The closer r is to 0.5, the closer the source code to pure Prolog.
  6216  * Idiomatic Mercury is scored either with r = 1.0 or higher.
  6217  * Objective-C is scored with r = 0.0.  When this fails, the r-score
  6218  * never rose above 0.1 in Objective-C tests.
  6219  */
  6220 
  6221 static void
  6222 test_objc_is_mercury (char *this_file, language **lang)
  6223 {
  6224   if (this_file == NULL) return;
  6225   FILE* fp = fopen (this_file, "r");
  6226   if (fp == NULL)
  6227     pfatal (this_file);
  6228 
  6229   bool blank_line = false; /* Line starting with any amount of white space
  6230                               followed by optional comment(s).  */
  6231   bool commented_line = false;
  6232   bool found_dot = false;
  6233   bool only_space_before = true;
  6234   bool start_of_line = true;
  6235   int c;
  6236   intmax_t lines = 1;
  6237   intmax_t mercury_dots = 0;
  6238   intmax_t percentage_signs = 0;
  6239   intmax_t rule_signs = 0;
  6240   float ratio = 0;
  6241 
  6242   while ((c = fgetc (fp)) != EOF)
  6243     {
  6244       switch (c)
  6245         {
  6246         case '\n':
  6247           if (! blank_line) ++lines;
  6248           blank_line = true;
  6249           commented_line = false;
  6250           start_of_line = true;
  6251           if (found_dot) ++mercury_dots;
  6252           found_dot = false;
  6253           only_space_before = true;
  6254           break;
  6255         case '.':
  6256           found_dot = ! commented_line;
  6257           only_space_before = false;
  6258           break;
  6259         case  '%': /* More frequent in Mercury.  May be modulo in Obj.-C.  */
  6260           if (! commented_line)
  6261             {
  6262               ++percentage_signs;
  6263               /* Cannot tell if it is a comment or modulo yet for sure.
  6264                  Yet works for heuristic purposes.  */
  6265               commented_line = true;
  6266             }
  6267           found_dot = false;
  6268           start_of_line = false;
  6269           only_space_before = false;
  6270           break;
  6271         case  '/':
  6272           {
  6273             int d = fgetc (fp);
  6274             found_dot = false;
  6275             only_space_before = false;
  6276             if (! commented_line)
  6277               {
  6278                 if (d == '*')
  6279                   commented_line = true;
  6280                 else
  6281                   /* If d == '/', cannot tell if it is an Obj.-C comment:
  6282                      may be Mercury integ. division.  */
  6283                     blank_line = false;
  6284               }
  6285           }
  6286           FALLTHROUGH;
  6287         case  ' ':
  6288         case '\t':
  6289           start_of_line = false;
  6290           break;
  6291         case ':':
  6292           c = fgetc (fp);
  6293           if (start_of_line)
  6294             {
  6295               if (c == '-')
  6296                 {
  6297                   ratio = 1.0; /* Failsafe, not an operator in Obj.-C.  */
  6298                   goto out;
  6299                 }
  6300               start_of_line = false;
  6301             }
  6302           else
  6303             {
  6304               /* p :- q.  Frequent in Mercury.
  6305                  Rare or in quoted exprs in Obj.-C.  */
  6306               if (c == '-' && ! commented_line)
  6307                 ++rule_signs;
  6308             }
  6309           blank_line = false;
  6310           found_dot = false;
  6311           only_space_before = false;
  6312           break;
  6313         case '@':
  6314         case '#':
  6315           if (start_of_line || only_space_before)
  6316             {
  6317               ratio = 0.0;
  6318               goto out;
  6319             }
  6320           FALLTHROUGH;
  6321         default:
  6322           start_of_line = false;
  6323           blank_line = false;
  6324           found_dot = false;
  6325           only_space_before = false;
  6326         }
  6327     }
  6328 
  6329   /* Fallback heuristic test.  Not failsafe but errless in practice.  */
  6330   ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
  6331 
  6332  out:
  6333   if (fclose (fp) == EOF)
  6334     pfatal (this_file);
  6335 
  6336   if (ratio > mercury_heuristics_ratio)
  6337     {
  6338       /* Change the language from Objective-C to Mercury.  */
  6339       static language lang0 = { "mercury", Mercury_help, Mercury_functions,
  6340         Mercury_suffixes };
  6341       *lang = &lang0;
  6342     }
  6343 }
  6344 
  6345 static void
  6346 Mercury_functions (FILE *inf)
  6347 {
  6348   char *cp, *last = NULL;
  6349   ptrdiff_t lastlen = 0, allocated = 0;
  6350   if (declarations) with_mercury_definitions = true;
  6351 
  6352   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6353     {
  6354       if (cp[0] == '\0')   /* Empty line.  */
  6355         continue;
  6356       else if (c_isspace (cp[0]) || cp[0] == '%')
  6357         /*  A Prolog-type comment or anything other than a declaration.  */
  6358         continue;
  6359       else if (cp[0] == '/' && cp[1] == '*')  /* Mercury C-type comment.  */
  6360         mercury_skip_comment (&lb, inf);
  6361       else
  6362         {
  6363           is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
  6364 
  6365           if (is_mercury_declaration
  6366               || with_mercury_definitions)
  6367             {
  6368               ptrdiff_t len = mercury_pr (cp, last, lastlen);
  6369               if (0 < len)
  6370                 {
  6371                   /* Store the declaration to avoid generating duplicate
  6372                      tags later.  */
  6373                   if (allocated <= len)
  6374                     {
  6375                       xrnew (last, len + 1, 1);
  6376                       allocated = len + 1;
  6377                     }
  6378                   memcpyz (last, cp, len);
  6379                   lastlen = len;
  6380                 }
  6381             }
  6382         }
  6383     }
  6384   free (last);
  6385 }
  6386 
  6387 static void
  6388 mercury_skip_comment (linebuffer *plb, FILE *inf)
  6389 {
  6390   char *cp;
  6391 
  6392   do
  6393     {
  6394       for (cp = plb->buffer; *cp != '\0'; ++cp)
  6395         if (cp[0] == '*' && cp[1] == '/')
  6396           return;
  6397       readline (plb, inf);
  6398     }
  6399   while (perhaps_more_input (inf));
  6400 }
  6401 
  6402 /*
  6403  * A declaration is added if it matches:
  6404  *     <beginning of line>:-<whitespace><Mercury Term><whitespace>(
  6405  * If with_mercury_definitions == true, we also add:
  6406  *     <beginning of line><Mercury item><whitespace>(
  6407  * or  <beginning of line><Mercury item><whitespace>:-
  6408  * As for Prolog support, different arities and types are not taken into
  6409  * consideration.
  6410  * Item is added to the tags database if it doesn't match the
  6411  * name of the previous declaration.
  6412  *
  6413  * Consume a Mercury declaration.
  6414  * Return the number of bytes consumed, or 0 if there was an error.
  6415  *
  6416  * A Mercury declaration must be one of:
  6417  *  :- type
  6418  *  :- solver type
  6419  *  :- pred
  6420  *  :- func
  6421  *  :- inst
  6422  *  :- mode
  6423  *  :- typeclass
  6424  *  :- instance
  6425  *  :- pragma
  6426  *  :- promise
  6427  *  :- initialise
  6428  *  :- finalise
  6429  *  :- mutable
  6430  *  :- module
  6431  *  :- interface
  6432  *  :- implementation
  6433  *  :- import_module
  6434  *  :- use_module
  6435  *  :- include_module
  6436  *  :- end_module
  6437  * followed on the same line by an alphanumeric sequence, starting with a lower
  6438  * case letter or by a single-quoted arbitrary string.
  6439  * Single quotes can escape themselves.  Backslash quotes everything.
  6440  *
  6441  * Return the size of the name of the declaration or 0 if no header was found.
  6442  * As quantifiers may precede functions or predicates, we must list them too.
  6443  */
  6444 
  6445 static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
  6446   "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
  6447   "initialise", "finalise", "mutable", "module", "interface", "implementation",
  6448   "import_module", "use_module", "include_module", "end_module", "some", "all"};
  6449 
  6450 static mercury_pos_t
  6451 mercury_decl (char *s, size_t pos)
  6452 {
  6453   mercury_pos_t null_pos = {0, 0, 0};
  6454 
  6455   if (s == NULL) return null_pos;
  6456 
  6457   size_t origpos;
  6458   origpos = pos;
  6459 
  6460   while (c_isalnum (s[pos]) || s[pos] == '_')
  6461     pos++;
  6462 
  6463   unsigned char decl_type_length = pos - origpos;
  6464   char buf[decl_type_length + 1];
  6465   memset (buf, 0, decl_type_length + 1);
  6466 
  6467   /* Mercury declaration tags.  Consume them, then check the declaration item
  6468      following :- is legitimate, then go on as in the prolog case.  */
  6469 
  6470   memcpy (buf, &s[origpos], decl_type_length);
  6471 
  6472   bool found_decl_tag = false;
  6473 
  6474   if (is_mercury_quantifier)
  6475     {
  6476       if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax.  */
  6477         return null_pos;
  6478 
  6479       is_mercury_quantifier = false; /* Reset to base value.  */
  6480       found_decl_tag = true;
  6481     }
  6482   else
  6483     {
  6484       for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
  6485         {
  6486           if (strcmp (buf, Mercury_decl_tags[j]) == 0)
  6487             {
  6488               found_decl_tag = true;
  6489               if (strcmp (buf, "type") == 0)
  6490                 is_mercury_type = true;
  6491 
  6492               if (strcmp (buf, "some") == 0
  6493                   || strcmp (buf, "all") == 0)
  6494                 {
  6495                   is_mercury_quantifier = true;
  6496                 }
  6497 
  6498               break;  /* Found declaration tag of rank j.  */
  6499             }
  6500           else
  6501             /* 'solver type' has a blank in the middle,
  6502                so this is the hard case.  */
  6503             if (strcmp (buf, "solver") == 0)
  6504               {
  6505                 do
  6506                   pos++;
  6507                 while (c_isalnum (s[pos]) || s[pos] == '_');
  6508 
  6509                 decl_type_length = pos - origpos;
  6510                 char buf2[decl_type_length + 1];
  6511                 memset (buf2, 0, decl_type_length + 1);
  6512                 memcpy (buf2, &s[origpos], decl_type_length);
  6513 
  6514                 if (strcmp (buf2, "solver type") == 0)
  6515                   {
  6516                     found_decl_tag = false;
  6517                     break;  /* Found declaration tag of rank j.  */
  6518                   }
  6519               }
  6520         }
  6521     }
  6522 
  6523   /* If with_mercury_definitions == false
  6524    * this is a Mercury syntax error, ignoring... */
  6525 
  6526   if (with_mercury_definitions)
  6527     {
  6528       if (found_decl_tag)
  6529         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
  6530       else
  6531         /* Prolog-like behavior
  6532          * we have parsed the predicate once, yet inappropriately
  6533          * so restarting again the parsing step.  */
  6534         pos = 0;
  6535     }
  6536   else
  6537     {
  6538       if (found_decl_tag)
  6539         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
  6540       else
  6541         return null_pos;
  6542     }
  6543 
  6544   /* From now on it is the same as for Prolog except for module dots.  */
  6545 
  6546   size_t start_of_name = pos;
  6547 
  6548   if (c_islower (s[pos]) || s[pos] == '_' )
  6549     {
  6550       /* The name is unquoted.
  6551          Do not confuse module dots with end-of-declaration dots.  */
  6552       int module_dot_pos = 0;
  6553 
  6554       while (c_isalnum (s[pos])
  6555              || s[pos] == '_'
  6556              || (s[pos] == '.' /* A module dot.  */
  6557                  && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')
  6558                  && (module_dot_pos = pos)))  /* Record module dot position.
  6559                                                  Erase module from name.  */
  6560         ++pos;
  6561 
  6562       if (module_dot_pos)
  6563         {
  6564           start_of_name = module_dot_pos + 2;
  6565           ++pos;
  6566         }
  6567 
  6568       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
  6569       return position;
  6570     }
  6571   else if (s[pos] == '\'')
  6572     {
  6573       ++pos;
  6574       for (;;)
  6575         {
  6576           if (s[pos] == '\'')
  6577             {
  6578               ++pos;
  6579               if (s[pos] != '\'')
  6580                 break;
  6581               ++pos; /* A double quote.  */
  6582             }
  6583           else if (s[pos] == '\0')  /* Multiline quoted atoms are ignored.  */
  6584             return null_pos;
  6585           else if (s[pos] == '\\')
  6586             {
  6587               if (s[pos+1] == '\0')
  6588                 return null_pos;
  6589               pos += 2;
  6590             }
  6591           else
  6592             ++pos;
  6593         }
  6594 
  6595       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
  6596       return position;
  6597     }
  6598   else if (is_mercury_quantifier && s[pos] == '[')   /* :- some [T] pred/func.  */
  6599     {
  6600       char *close_bracket = strchr (s + pos + 1, ']');
  6601       if (!close_bracket)
  6602         return null_pos;
  6603       pos = skip_spaces (close_bracket + 1) - s;
  6604       mercury_pos_t position = mercury_decl (s, pos);
  6605       position.totlength += pos - origpos;
  6606       return position;
  6607     }
  6608   else if (s[pos] == '.')  /* as in ':- interface.'  */
  6609     {
  6610       mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos};
  6611       return position;
  6612     }
  6613   else
  6614     return null_pos;
  6615 }
  6616 
  6617 static ptrdiff_t
  6618 mercury_pr (char *s, char *last, ptrdiff_t lastlen)
  6619 {
  6620   size_t len0 = 0;
  6621   is_mercury_type = false;
  6622   is_mercury_quantifier = false;
  6623   bool stop_at_rule = false;
  6624 
  6625   if (is_mercury_declaration)
  6626     {
  6627       /* Skip len0 blanks only for declarations.  */
  6628       len0 = skip_spaces (s + 2) - s;
  6629     }
  6630 
  6631   mercury_pos_t position = mercury_decl (s, len0);
  6632   size_t pos = position.pos;
  6633   int offset = 0;  /* may be < 0  */
  6634   if (pos == 0) return 0;
  6635 
  6636   /* Skip white space for:
  6637      a. rules in definitions before :-
  6638      b. 0-arity predicates with inlined modes.
  6639      c. possibly multiline type definitions  */
  6640 
  6641   while (c_isspace (s[pos])) { ++pos; ++offset; }
  6642 
  6643   if (( ((s[pos] == '.' && (pos += 1))     /* case 1
  6644                                               This is a statement dot,
  6645                                               not a module dot. */
  6646          || c_isalnum(s[pos])              /* 0-arity procedures  */
  6647          || (s[pos] == '(' && (pos += 1))  /* case 2: arity > 0   */
  6648          || ((s[pos] == ':')               /* case 3: rules  */
  6649              && s[pos + 1] == '-' && (stop_at_rule = true)))
  6650      && (lastlen != pos || memcmp (s, last, pos) != 0)
  6651         )
  6652       /* Types are often declared on several lines so keeping just
  6653          the first line.  */
  6654 
  6655       || is_mercury_type)  /* When types are implemented.  */
  6656     {
  6657       size_t namelength = position.namelength;
  6658       if (stop_at_rule && offset) --offset;
  6659 
  6660       /* Left-trim type definitions.  */
  6661 
  6662       while (pos > namelength + offset
  6663              && c_isspace (s[pos - namelength - offset]))
  6664         --offset;
  6665 
  6666       make_tag (s + pos - namelength - offset, namelength - 1, true,
  6667                                 s, pos - offset - 1, lineno, linecharno);
  6668       return pos;
  6669     }
  6670 
  6671   return 0;
  6672 }
  6673 
  6674 
  6675 /*
  6676  * Support for Erlang
  6677  *
  6678  * Generates tags for functions, defines, and records.
  6679  * Assumes that Erlang functions start at column 0.
  6680  * Original code by Anders Lindgren (1996)
  6681  */
  6682 static ptrdiff_t erlang_func (char *, char *, ptrdiff_t, ptrdiff_t *);
  6683 static void erlang_attribute (char *);
  6684 static ptrdiff_t erlang_atom (char *);
  6685 
  6686 static void
  6687 Erlang_functions (FILE *inf)
  6688 {
  6689   char *cp, *last = NULL;
  6690   ptrdiff_t lastlen = 0, allocated = 0;
  6691 
  6692   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6693     {
  6694       if (cp[0] == '\0')        /* Empty line */
  6695         continue;
  6696       else if (c_isspace (cp[0])) /* Not function nor attribute */
  6697         continue;
  6698       else if (cp[0] == '%')    /* comment */
  6699         continue;
  6700       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
  6701         continue;
  6702       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
  6703         {
  6704           erlang_attribute (cp);
  6705           if (last != NULL)
  6706             {
  6707               free (last);
  6708               last = NULL;
  6709               allocated = lastlen = 0;
  6710             }
  6711         }
  6712       else
  6713         {
  6714           ptrdiff_t name_offset;
  6715           ptrdiff_t len = erlang_func (cp, last, lastlen, &name_offset);
  6716           if (0 < len)
  6717             {
  6718               /* Store the function name to avoid generating duplicate
  6719                  tags later.  */
  6720               if (allocated <= len)
  6721                 {
  6722                   xrnew (last, len + 1, 1);
  6723                   allocated = len + 1;
  6724                 }
  6725               memcpyz (last, cp + name_offset, len);
  6726               lastlen = len;
  6727             }
  6728         }
  6729     }
  6730   free (last);
  6731 }
  6732 
  6733 
  6734 /*
  6735  * A function definition is added if it matches:
  6736  *     <beginning of line><Erlang Atom><whitespace>(
  6737  *
  6738  * It is added to the tags database if it doesn't match the
  6739  * name of the previous clause header.
  6740  *
  6741  * Return the size of the name of the function, or 0 if no function
  6742  * was found.
  6743  */
  6744 static ptrdiff_t
  6745 erlang_func (char *s, char *last, ptrdiff_t lastlen, ptrdiff_t *name_offset)
  6746 {
  6747   char *name = s;
  6748   ptrdiff_t len = erlang_atom (s);
  6749   if (len == 0)
  6750     return 0;
  6751   ptrdiff_t pos = skip_spaces (s + len) - s;
  6752 
  6753   /* If the name is quoted, the quotes are not part of the name. */
  6754   bool quoted = 2 < len && name[0] == '\'' && name[len - 1] == '\'';
  6755   name += quoted;
  6756   len -= 2 * quoted;
  6757 
  6758   /* Save only the first clause. */
  6759   if (s[pos++] == '('
  6760       && ! (lastlen == len && memcmp (name, last, len) == 0))
  6761     {
  6762       make_tag (s, len, true, s, pos, lineno, linecharno);
  6763       *name_offset = quoted;
  6764       return len;
  6765     }
  6766 
  6767   return 0;
  6768 }
  6769 
  6770 
  6771 /*
  6772  * Handle attributes.  Currently, tags are generated for defines
  6773  * and records.
  6774  *
  6775  * They are on the form:
  6776  * -define(foo, bar).
  6777  * -define(Foo(M, N), M+N).
  6778  * -record(graph, {vtab = notable, cyclic = true}).
  6779  */
  6780 static void
  6781 erlang_attribute (char *s)
  6782 {
  6783   char *cp = s;
  6784 
  6785   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
  6786       && *cp++ == '(')
  6787     {
  6788       cp = skip_spaces (cp);
  6789       ptrdiff_t len = erlang_atom (cp);
  6790       ptrdiff_t pos = cp + len - s;
  6791       if (len > 0)
  6792         {
  6793           /* If the name is quoted, the quotes are not part of the name. */
  6794           if (len > 2 && cp[0] == '\'' && cp[len - 1] == '\'')
  6795             {
  6796               cp++;
  6797               len -= 2;
  6798             }
  6799           make_tag (cp, len, true, s, pos, lineno, linecharno);
  6800         }
  6801     }
  6802   return;
  6803 }
  6804 
  6805 
  6806 /*
  6807  * Consume an Erlang atom (or variable).
  6808  * Return the number of bytes consumed, or -1 if there was an error.
  6809  */
  6810 static ptrdiff_t
  6811 erlang_atom (char *s)
  6812 {
  6813   ptrdiff_t pos = 0;
  6814 
  6815   if (c_isalpha (s[pos]) || s[pos] == '_')
  6816     {
  6817       /* The atom is unquoted. */
  6818       do
  6819         pos++;
  6820       while (c_isalnum (s[pos]) || s[pos] == '_');
  6821     }
  6822   else if (s[pos] == '\'')
  6823     {
  6824       for (pos++; s[pos] != '\''; pos++)
  6825         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
  6826             || (s[pos] == '\\' && s[++pos] == '\0'))
  6827           return 0;
  6828       pos++;
  6829     }
  6830 
  6831   return pos;
  6832 }
  6833 
  6834 
  6835 static char *scan_separators (char *);
  6836 static void add_regex (char *, language *);
  6837 static char *substitute (char *, char *, struct re_registers *);
  6838 
  6839 /*
  6840  * Take a string like "/blah/" and turn it into "blah", verifying
  6841  * that the first and last characters are the same, and handling
  6842  * quoted separator characters.  Actually, stops on the occurrence of
  6843  * an unquoted separator.  Also process \t, \n, etc. and turn into
  6844  * appropriate characters. Works in place.  Null terminates name string.
  6845  * Returns pointer to terminating separator, or NULL for
  6846  * unterminated regexps.
  6847  */
  6848 static char *
  6849 scan_separators (char *name)
  6850 {
  6851   char sep = name[0];
  6852   char *copyto = name;
  6853   bool quoted = false;
  6854 
  6855   for (++name; *name != '\0'; ++name)
  6856     {
  6857       if (quoted)
  6858         {
  6859           switch (*name)
  6860             {
  6861             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
  6862             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
  6863             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
  6864             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
  6865             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
  6866             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
  6867             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
  6868             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
  6869             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
  6870             default:
  6871               if (*name == sep)
  6872                 *copyto++ = sep;
  6873               else
  6874                 {
  6875                   /* Something else is quoted, so preserve the quote. */
  6876                   *copyto++ = '\\';
  6877                   *copyto++ = *name;
  6878                 }
  6879               break;
  6880             }
  6881           quoted = false;
  6882         }
  6883       else if (*name == '\\')
  6884         quoted = true;
  6885       else if (*name == sep)
  6886         break;
  6887       else
  6888         *copyto++ = *name;
  6889     }
  6890   if (*name != sep)
  6891     name = NULL;                /* signal unterminated regexp */
  6892 
  6893   /* Terminate copied string. */
  6894   *copyto = '\0';
  6895   return name;
  6896 }
  6897 
  6898 /* Look at the argument of --regex or --no-regex and do the right
  6899    thing.  Same for each line of a regexp file. */
  6900 static void
  6901 analyze_regex (char *regex_arg)
  6902 {
  6903   if (regex_arg == NULL)
  6904     {
  6905       free_regexps ();          /* --no-regex: remove existing regexps */
  6906       return;
  6907     }
  6908 
  6909   /* A real --regexp option or a line in a regexp file. */
  6910   switch (regex_arg[0])
  6911     {
  6912       /* Comments in regexp file or null arg to --regex. */
  6913     case '\0':
  6914     case ' ':
  6915     case '\t':
  6916       break;
  6917 
  6918       /* Read a regex file.  This is recursive and may result in a
  6919          loop, which will stop when the file descriptors are exhausted. */
  6920     case '@':
  6921       {
  6922         FILE *regexfp;
  6923         linebuffer regexbuf;
  6924         char *regexfile = regex_arg + 1;
  6925 
  6926         /* regexfile is a file containing regexps, one per line. */
  6927         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
  6928         if (regexfp == NULL)
  6929           pfatal (regexfile);
  6930         linebuffer_init (&regexbuf);
  6931         while (readline_internal (&regexbuf, regexfp, regexfile, false) > 0)
  6932           analyze_regex (regexbuf.buffer);
  6933         free (regexbuf.buffer);
  6934         if (fclose (regexfp) != 0)
  6935           pfatal (regexfile);
  6936       }
  6937       break;
  6938 
  6939       /* Regexp to be used for a specific language only. */
  6940     case '{':
  6941       {
  6942         language *lang;
  6943         char *lang_name = regex_arg + 1;
  6944         char *cp;
  6945 
  6946         for (cp = lang_name; *cp != '}'; cp++)
  6947           if (*cp == '\0')
  6948             {
  6949               error ("unterminated language name in regex: %s", regex_arg);
  6950               return;
  6951             }
  6952         *cp++ = '\0';
  6953         lang = get_language_from_langname (lang_name);
  6954         if (lang == NULL)
  6955           return;
  6956         add_regex (cp, lang);
  6957       }
  6958       break;
  6959 
  6960       /* Regexp to be used for any language. */
  6961     default:
  6962       add_regex (regex_arg, NULL);
  6963       break;
  6964     }
  6965 }
  6966 
  6967 /* Separate the regexp pattern, compile it,
  6968    and care for optional name and modifiers. */
  6969 static void
  6970 add_regex (char *regexp_pattern, language *lang)
  6971 {
  6972   static struct re_pattern_buffer zeropattern;
  6973   char sep, *pat, *name, *modifiers;
  6974   char empty = '\0';
  6975   const char *err;
  6976   struct re_pattern_buffer *patbuf;
  6977   regexp *rp;
  6978   bool
  6979     ignore_case = false,        /* case is significant */
  6980     multi_line = false,         /* matches are done one line at a time */
  6981     single_line = false;        /* dot does not match newline */
  6982 
  6983 
  6984   if (strnlen (regexp_pattern, 3) < 3)
  6985     {
  6986       error ("null regexp");
  6987       return;
  6988     }
  6989   sep = regexp_pattern[0];
  6990   name = scan_separators (regexp_pattern);
  6991   if (name == NULL)
  6992     {
  6993       error ("%s: unterminated regexp", regexp_pattern);
  6994       return;
  6995     }
  6996   if (name[1] == sep)
  6997     {
  6998       error ("null name for regexp \"%s\"", regexp_pattern);
  6999       return;
  7000     }
  7001   modifiers = scan_separators (name);
  7002   if (modifiers == NULL)        /* no terminating separator --> no name */
  7003     {
  7004       modifiers = name;
  7005       name = &empty;
  7006     }
  7007   else
  7008     modifiers += 1;             /* skip separator */
  7009 
  7010   /* Parse regex modifiers. */
  7011   for (; modifiers[0] != '\0'; modifiers++)
  7012     switch (modifiers[0])
  7013       {
  7014       case 'N':
  7015         if (modifiers == name)
  7016           error ("forcing explicit tag name but no name, ignoring");
  7017         /* This option has no effect and is present only for backward
  7018            compatibility.  */
  7019         break;
  7020       case 'i':
  7021         ignore_case = true;
  7022         break;
  7023       case 's':
  7024         single_line = true;
  7025         FALLTHROUGH;
  7026       case 'm':
  7027         multi_line = true;
  7028         need_filebuf = true;
  7029         break;
  7030       default:
  7031         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
  7032         break;
  7033       }
  7034 
  7035   patbuf = xnew (1, struct re_pattern_buffer);
  7036   *patbuf = zeropattern;
  7037   if (ignore_case)
  7038     {
  7039       static unsigned char lc_trans[UCHAR_MAX + 1];
  7040       int i;
  7041       for (i = 0; i < UCHAR_MAX + 1; i++)
  7042         lc_trans[i] = c_tolower (i);
  7043       patbuf->translate = lc_trans;     /* translation table to fold case  */
  7044     }
  7045 
  7046   if (multi_line)
  7047     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
  7048   else
  7049     pat = regexp_pattern;
  7050 
  7051   if (single_line)
  7052     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
  7053   else
  7054     re_set_syntax (RE_SYNTAX_EMACS);
  7055 
  7056   err = re_compile_pattern (pat, strlen (pat), patbuf);
  7057   if (multi_line)
  7058     free (pat);
  7059   if (err != NULL)
  7060     {
  7061       error ("%s while compiling pattern", err);
  7062       return;
  7063     }
  7064 
  7065   rp = p_head;
  7066   p_head = xnew (1, regexp);
  7067   p_head->pattern = savestr (regexp_pattern);
  7068   p_head->p_next = rp;
  7069   p_head->lang = lang;
  7070   p_head->pat = patbuf;
  7071   p_head->name = savestr (name);
  7072   p_head->error_signaled = false;
  7073   p_head->ignore_case = ignore_case;
  7074   p_head->multi_line = multi_line;
  7075 }
  7076 
  7077 /*
  7078  * Do the substitutions indicated by the regular expression and
  7079  * arguments.
  7080  */
  7081 static char *
  7082 substitute (char *in, char *out, struct re_registers *regs)
  7083 {
  7084   char *result, *t;
  7085 
  7086   result = NULL;
  7087   ptrdiff_t size = strlen (out);
  7088 
  7089   /* Pass 1: figure out how much to allocate by finding all \N strings. */
  7090   if (out[size - 1] == '\\')
  7091     fatal ("pattern error in \"%s\"", out);
  7092   for (t = strchr (out, '\\');
  7093        t != NULL;
  7094        t = strchr (t + 2, '\\'))
  7095     if (c_isdigit (t[1]))
  7096       {
  7097         int dig = t[1] - '0';
  7098         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
  7099         size += diglen - 2;
  7100       }
  7101     else
  7102       size -= 1;
  7103 
  7104   /* Allocate space and do the substitutions. */
  7105   assert (size >= 0);
  7106   result = xnew (size + 1, char);
  7107 
  7108   for (t = result; *out != '\0'; out++)
  7109     if (*out == '\\' && c_isdigit (*++out))
  7110       {
  7111         int dig = *out - '0';
  7112         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
  7113         memcpy (t, in + regs->start[dig], diglen);
  7114         t += diglen;
  7115       }
  7116     else
  7117       *t++ = *out;
  7118   *t = '\0';
  7119 
  7120   assert (t <= result + size);
  7121   assert (t == result + strlen (result));
  7122 
  7123   return result;
  7124 }
  7125 
  7126 /* Deallocate all regexps. */
  7127 static void
  7128 free_regexps (void)
  7129 {
  7130   regexp *rp;
  7131   while (p_head != NULL)
  7132     {
  7133       rp = p_head->p_next;
  7134       free (p_head->pattern);
  7135       free (p_head->name);
  7136       free (p_head);
  7137       p_head = rp;
  7138     }
  7139   return;
  7140 }
  7141 
  7142 /*
  7143  * Reads the whole file as a single string from `filebuf' and looks for
  7144  * multi-line regular expressions, creating tags on matches.
  7145  * readline already dealt with normal regexps.
  7146  *
  7147  * Idea by Ben Wing <ben@666.com> (2002).
  7148  */
  7149 static void
  7150 regex_tag_multiline (void)
  7151 {
  7152   char *buffer = filebuf.buffer;
  7153   regexp *rp;
  7154   char *name;
  7155 
  7156   for (rp = p_head; rp != NULL; rp = rp->p_next)
  7157     {
  7158       ptrdiff_t match = 0;
  7159 
  7160       if (!rp->multi_line)
  7161         continue;               /* skip normal regexps */
  7162 
  7163       /* Generic initializations before parsing file from memory. */
  7164       lineno = 1;               /* reset global line number */
  7165       charno = 0;               /* reset global char number */
  7166       linecharno = 0;           /* reset global char number of line start */
  7167 
  7168       /* Only use generic regexps or those for the current language. */
  7169       if (rp->lang != NULL && rp->lang != curfdp->lang)
  7170         continue;
  7171 
  7172       while (match >= 0 && match < filebuf.len)
  7173         {
  7174           match = re_search (rp->pat, buffer, filebuf.len, charno,
  7175                              filebuf.len - match, &rp->regs);
  7176           switch (match)
  7177             {
  7178             case -2:
  7179               /* Some error. */
  7180               if (!rp->error_signaled)
  7181                 {
  7182                   error ("regexp stack overflow while matching \"%s\"",
  7183                          rp->pattern);
  7184                   rp->error_signaled = true;
  7185                 }
  7186               break;
  7187             case -1:
  7188               /* No match. */
  7189               break;
  7190             default:
  7191               if (match == rp->regs.end[0])
  7192                 {
  7193                   if (!rp->error_signaled)
  7194                     {
  7195                       error ("regexp matches the empty string: \"%s\"",
  7196                              rp->pattern);
  7197                       rp->error_signaled = true;
  7198                     }
  7199                   match = -3;   /* exit from while loop */
  7200                   break;
  7201                 }
  7202 
  7203               /* Match occurred.  Construct a tag. */
  7204               while (charno < rp->regs.end[0])
  7205                 if (buffer[charno++] == '\n')
  7206                   lineno++, linecharno = charno;
  7207               name = rp->name;
  7208               if (name[0] == '\0')
  7209                 name = NULL;
  7210               else /* make a named tag */
  7211                 name = substitute (buffer, rp->name, &rp->regs);
  7212 
  7213               /* Force explicit tag name, if a name is there. */
  7214               pfnote (name, true, buffer + linecharno,
  7215                       charno - linecharno + 1, lineno, linecharno);
  7216 
  7217               if (debug)
  7218                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  7219                          name ? name : "(unnamed)", curfdp->taggedfname,
  7220                          lineno, buffer + linecharno);
  7221               break;
  7222             }
  7223         }
  7224     }
  7225 }
  7226 
  7227 
  7228 static bool
  7229 nocase_tail (const char *cp)
  7230 {
  7231   ptrdiff_t len = 0;
  7232 
  7233   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
  7234     cp++, len++;
  7235   if (*cp == '\0' && !intoken (dbp[len]))
  7236     {
  7237       dbp += len;
  7238       return true;
  7239     }
  7240   return false;
  7241 }
  7242 
  7243 static void
  7244 get_tag (register char *bp, char **namepp)
  7245 {
  7246   register char *cp = bp;
  7247 
  7248   if (*bp != '\0')
  7249     {
  7250       /* Go till you get to white space or a syntactic break */
  7251       for (cp = bp + 1; !notinname (*cp); cp++)
  7252         continue;
  7253       make_tag (bp, cp - bp, true,
  7254                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  7255     }
  7256 
  7257   if (namepp != NULL)
  7258     *namepp = savenstr (bp, cp - bp);
  7259 }
  7260 
  7261 /* Similar to get_tag, but include '=' as part of the tag. */
  7262 static void
  7263 get_lispy_tag (register char *bp)
  7264 {
  7265   register char *cp = bp;
  7266 
  7267   if (*bp != '\0')
  7268     {
  7269       /* Go till you get to white space or a syntactic break */
  7270       for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
  7271         continue;
  7272       make_tag (bp, cp - bp, true,
  7273                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  7274     }
  7275 }
  7276 
  7277 /*
  7278  * Read a line of text from `stream' into `lbp', excluding the
  7279  * newline or CR-NL (if `leave_cr` is false), if any.  Return the
  7280  * number of characters read from `stream', which is the length
  7281  * of the line including the newline.
  7282  *
  7283  * On DOS or Windows, if `leave_cr` is false, we do not count the
  7284  * CR character, if any before the NL, in the returned length;
  7285  * this mirrors the behavior of Emacs on those
  7286  * platforms (for text files, it translates CR-NL to NL as it reads in the
  7287  * file).
  7288  *
  7289  * If multi-line regular expressions are requested, each line read is
  7290  * appended to `filebuf'.
  7291  */
  7292 static ptrdiff_t
  7293 readline_internal (linebuffer *lbp, FILE *stream, char const *filename,
  7294                    const bool leave_cr)
  7295 {
  7296   char *buffer = lbp->buffer;
  7297   char *p = lbp->buffer;
  7298   char *pend;
  7299   int chars_deleted;
  7300 
  7301   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
  7302 
  7303   for (;;)
  7304     {
  7305       register int c = getc (stream);
  7306       if (p == pend)
  7307         {
  7308           /* We're at the end of linebuffer: expand it. */
  7309           xrnew (buffer, lbp->size, 2);
  7310           p = buffer + lbp->size;
  7311           lbp->size *= 2;
  7312           pend = buffer + lbp->size;
  7313           lbp->buffer = buffer;
  7314         }
  7315       if (c == EOF)
  7316         {
  7317           if (ferror (stream))
  7318             perror (filename);
  7319           *p = '\0';
  7320           chars_deleted = 0;
  7321           break;
  7322         }
  7323       if (c == '\n')
  7324         {
  7325           if (!leave_cr && p > buffer && p[-1] == '\r')
  7326             {
  7327               p -= 1;
  7328               chars_deleted = 2;
  7329             }
  7330           else
  7331             {
  7332               chars_deleted = 1;
  7333             }
  7334           *p = '\0';
  7335           break;
  7336         }
  7337       *p++ = c;
  7338     }
  7339   lbp->len = p - buffer;
  7340 
  7341   if (need_filebuf              /* we need filebuf for multi-line regexps */
  7342       && chars_deleted > 0)     /* not at EOF */
  7343     {
  7344       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
  7345         {
  7346           /* Expand filebuf. */
  7347           xrnew (filebuf.buffer, filebuf.size, 2);
  7348           filebuf.size *= 2;
  7349         }
  7350       strcpy (mempcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len),
  7351               "\n");
  7352       filebuf.len += lbp->len + 1;
  7353     }
  7354 
  7355   return lbp->len + chars_deleted;
  7356 }
  7357 
  7358 /*
  7359  * Like readline_internal, above, but in addition try to match the
  7360  * input line against relevant regular expressions and manage #line
  7361  * directives.
  7362  */
  7363 static void
  7364 readline (linebuffer *lbp, FILE *stream)
  7365 {
  7366   linecharno = charno;          /* update global char number of line start */
  7367   ptrdiff_t result = readline_internal (lbp, stream, infilename, false);
  7368   lineno += 1;                  /* increment global line number */
  7369   charno += result;             /* increment global char number */
  7370 
  7371   /* Honor #line directives. */
  7372   if (!no_line_directive)
  7373     {
  7374       static bool discard_until_line_directive;
  7375 
  7376       /* Check whether this is a #line directive. */
  7377       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
  7378         {
  7379           intmax_t lno;
  7380           int start = 0;
  7381 
  7382           if (sscanf (lbp->buffer, "#line %"SCNdMAX" \"%n", &lno, &start) >= 1
  7383               && start > 0)     /* double quote character found */
  7384             {
  7385               char *endp = lbp->buffer + start;
  7386 
  7387               while ((endp = strchr (endp, '"')) != NULL
  7388                      && endp[-1] == '\\')
  7389                 endp++;
  7390               if (endp != NULL)
  7391                 /* Ok, this is a real #line directive.  Let's deal with it. */
  7392                 {
  7393                   char *taggedabsname;  /* absolute name of original file */
  7394                   char *taggedfname;    /* name of original file as given */
  7395                   char *name;           /* temp var */
  7396 
  7397                   discard_until_line_directive = false; /* found it */
  7398                   name = lbp->buffer + start;
  7399                   *endp = '\0';
  7400                   canonicalize_filename (name);
  7401                   taggedabsname = absolute_filename (name, tagfiledir);
  7402                   if (filename_is_absolute (name)
  7403                       || filename_is_absolute (curfdp->infname))
  7404                     taggedfname = savestr (taggedabsname);
  7405                   else
  7406                     taggedfname = relative_filename (taggedabsname,tagfiledir);
  7407 
  7408                   if (streq (curfdp->taggedfname, taggedfname))
  7409                     /* The #line directive is only a line number change.  We
  7410                        deal with this afterwards. */
  7411                     free (taggedfname);
  7412                   else
  7413                     /* The tags following this #line directive should be
  7414                        attributed to taggedfname.  In order to do this, set
  7415                        curfdp accordingly. */
  7416                     {
  7417                       fdesc *fdp; /* file description pointer */
  7418 
  7419                       /* Go look for a file description already set up for the
  7420                          file indicated in the #line directive.  If there is
  7421                          one, use it from now until the next #line
  7422                          directive. */
  7423                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  7424                         if (streq (fdp->infname, curfdp->infname)
  7425                             && streq (fdp->taggedfname, taggedfname))
  7426                           /* If we remove the second test above (after the &&)
  7427                              then all entries pertaining to the same file are
  7428                              coalesced in the tags file.  If we use it, then
  7429                              entries pertaining to the same file but generated
  7430                              from different files (via #line directives) will
  7431                              go into separate sections in the tags file.  These
  7432                              alternatives look equivalent.  The first one
  7433                              destroys some apparently useless information. */
  7434                           {
  7435                             curfdp = fdp;
  7436                             free (taggedfname);
  7437                             break;
  7438                           }
  7439                       /* Else, if we already tagged the real file, skip all
  7440                          input lines until the next #line directive. */
  7441                       if (fdp == NULL) /* not found */
  7442                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  7443                           if (streq (fdp->infabsname, taggedabsname))
  7444                             {
  7445                               discard_until_line_directive = true;
  7446                               free (taggedfname);
  7447                               break;
  7448                             }
  7449                       /* Else create a new file description and use that from
  7450                          now on, until the next #line directive. */
  7451                       if (fdp == NULL) /* not found */
  7452                         {
  7453                           fdp = fdhead;
  7454                           fdhead = xnew (1, fdesc);
  7455                           *fdhead = *curfdp; /* copy curr. file description */
  7456                           fdhead->next = fdp;
  7457                           fdhead->infname = savestr (curfdp->infname);
  7458                           fdhead->infabsname = savestr (curfdp->infabsname);
  7459                           fdhead->infabsdir = savestr (curfdp->infabsdir);
  7460                           fdhead->taggedfname = taggedfname;
  7461                           fdhead->usecharno = false;
  7462                           fdhead->prop = NULL;
  7463                           fdhead->written = false;
  7464                           curfdp = fdhead;
  7465                         }
  7466                     }
  7467                   free (taggedabsname);
  7468                   lineno = lno - 1;
  7469                   readline (lbp, stream);
  7470                   return;
  7471                 } /* if a real #line directive */
  7472             } /* if #line is followed by a number */
  7473         } /* if line begins with "#line " */
  7474 
  7475       /* If we are here, no #line directive was found. */
  7476       if (discard_until_line_directive)
  7477         {
  7478           if (result > 0)
  7479             {
  7480               /* Do a tail recursion on ourselves, thus discarding the contents
  7481                  of the line buffer. */
  7482               readline (lbp, stream);
  7483               return;
  7484             }
  7485           /* End of file. */
  7486           discard_until_line_directive = false;
  7487           return;
  7488         }
  7489     } /* if #line directives should be considered */
  7490 
  7491   {
  7492     ptrdiff_t match;
  7493     regexp *rp;
  7494     char *name;
  7495 
  7496     /* Match against relevant regexps. */
  7497     if (lbp->len > 0)
  7498       for (rp = p_head; rp != NULL; rp = rp->p_next)
  7499         {
  7500           /* Only use generic regexps or those for the current language.
  7501              Also do not use multiline regexps, which is the job of
  7502              regex_tag_multiline. */
  7503           if ((rp->lang != NULL && rp->lang != fdhead->lang)
  7504               || rp->multi_line)
  7505             continue;
  7506 
  7507           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
  7508           switch (match)
  7509             {
  7510             case -2:
  7511               /* Some error. */
  7512               if (!rp->error_signaled)
  7513                 {
  7514                   error ("regexp stack overflow while matching \"%s\"",
  7515                          rp->pattern);
  7516                   rp->error_signaled = true;
  7517                 }
  7518               break;
  7519             case -1:
  7520               /* No match. */
  7521               break;
  7522             case 0:
  7523               /* Empty string matched. */
  7524               if (!rp->error_signaled)
  7525                 {
  7526                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
  7527                   rp->error_signaled = true;
  7528                 }
  7529               break;
  7530             default:
  7531               /* Match occurred.  Construct a tag. */
  7532               name = rp->name;
  7533               if (name[0] == '\0')
  7534                 name = NULL;
  7535               else /* make a named tag */
  7536                 name = substitute (lbp->buffer, rp->name, &rp->regs);
  7537 
  7538               /* Force explicit tag name, if a name is there. */
  7539               pfnote (name, true, lbp->buffer, match, lineno, linecharno);
  7540 
  7541               if (debug)
  7542                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  7543                          name ? name : "(unnamed)", curfdp->taggedfname,
  7544                          lineno, lbp->buffer);
  7545               break;
  7546             }
  7547         }
  7548   }
  7549 }
  7550 
  7551 
  7552 /*
  7553  * Return a pointer to a space of size strlen(cp)+1 allocated
  7554  * with xnew where the string CP has been copied.
  7555  */
  7556 static char *
  7557 savestr (const char *cp)
  7558 {
  7559   return savenstr (cp, strlen (cp));
  7560 }
  7561 
  7562 /*
  7563  * Return a pointer to a space of size LEN+1 allocated with xnew
  7564  * with a copy of CP (containing LEN bytes) followed by a NUL byte.
  7565  */
  7566 static char *
  7567 savenstr (const char *cp, ptrdiff_t len)
  7568 {
  7569   char *dp = xnew (len + 1, char);
  7570   dp[len] = '\0';
  7571   return memcpy (dp, cp, len);
  7572 }
  7573 
  7574 /* Skip spaces (end of string is not space), return new pointer. */
  7575 static char *
  7576 skip_spaces (char *cp)
  7577 {
  7578   while (c_isspace (*cp))
  7579     cp++;
  7580   return cp;
  7581 }
  7582 
  7583 /* Skip non spaces, except end of string, return new pointer. */
  7584 static char *
  7585 skip_non_spaces (char *cp)
  7586 {
  7587   while (*cp != '\0' && !c_isspace (*cp))
  7588     cp++;
  7589   return cp;
  7590 }
  7591 
  7592 /* Skip any chars in the "name" class.*/
  7593 static char *
  7594 skip_name (char *cp)
  7595 {
  7596   /* '\0' is a notinname() so loop stops there too */
  7597   while (! notinname (*cp))
  7598     cp++;
  7599   return cp;
  7600 }
  7601 
  7602 /* Print error message and exit.  */
  7603 static void
  7604 fatal (char const *format, ...)
  7605 {
  7606   va_list ap;
  7607   va_start (ap, format);
  7608   verror (format, ap);
  7609   va_end (ap);
  7610   exit (EXIT_FAILURE);
  7611 }
  7612 
  7613 static void
  7614 pfatal (const char *s1)
  7615 {
  7616   perror (s1);
  7617   exit (EXIT_FAILURE);
  7618 }
  7619 
  7620 static void
  7621 suggest_asking_for_help (void)
  7622 {
  7623   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
  7624            progname);
  7625   exit (EXIT_FAILURE);
  7626 }
  7627 
  7628 /* Output a diagnostic with printf-style FORMAT and args.  */
  7629 static void
  7630 error (const char *format, ...)
  7631 {
  7632   va_list ap;
  7633   va_start (ap, format);
  7634   verror (format, ap);
  7635   va_end (ap);
  7636 }
  7637 
  7638 static void
  7639 verror (char const *format, va_list ap)
  7640 {
  7641   fprintf (stderr, "%s: ", progname);
  7642   vfprintf (stderr, format, ap);
  7643   fprintf (stderr, "\n");
  7644 }
  7645 
  7646 /* Return a newly-allocated string whose contents
  7647    concatenate those of s1, s2, s3.  */
  7648 static char *
  7649 concat (const char *s1, const char *s2, const char *s3)
  7650 {
  7651   ptrdiff_t len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
  7652   char *result = xnew (len1 + len2 + len3 + 1, char);
  7653   strcpy (stpcpy (stpcpy (result, s1), s2), s3);
  7654   return result;
  7655 }
  7656 
  7657 
  7658 /* Does the same work as the system V getcwd, but does not need to
  7659    guess the buffer size in advance. */
  7660 static char *
  7661 etags_getcwd (void)
  7662 {
  7663   ptrdiff_t bufsize = 200;
  7664   char *path = xnew (bufsize, char);
  7665 
  7666   while (getcwd (path, bufsize) == NULL)
  7667     {
  7668       if (errno != ERANGE)
  7669         pfatal ("getcwd");
  7670       free (path);
  7671       path = xnmalloc (bufsize, 2 * sizeof *path);
  7672       bufsize *= 2;
  7673     }
  7674 
  7675   canonicalize_filename (path);
  7676   return path;
  7677 }
  7678 
  7679 /* Return a newly allocated string containing a name of a temporary file.  */
  7680 static char *
  7681 etags_mktmp (void)
  7682 {
  7683   const char *tmpdir = getenv ("TMPDIR");
  7684   const char *slash = "/";
  7685 
  7686 #if MSDOS || defined (DOS_NT)
  7687   if (!tmpdir)
  7688     tmpdir = getenv ("TEMP");
  7689   if (!tmpdir)
  7690     tmpdir = getenv ("TMP");
  7691   if (!tmpdir)
  7692     tmpdir = ".";
  7693   if (tmpdir[strlen (tmpdir) - 1] == '/'
  7694       || tmpdir[strlen (tmpdir) - 1] == '\\')
  7695     slash = "";
  7696 #else
  7697   if (!tmpdir)
  7698     tmpdir = "/tmp";
  7699   if (tmpdir[strlen (tmpdir) - 1] == '/')
  7700     slash = "";
  7701 #endif
  7702 
  7703   char *templt = concat (tmpdir, slash, "etXXXXXX");
  7704   int fd = mkostemp (templt, O_CLOEXEC);
  7705   if (fd < 0 || close (fd) != 0)
  7706     {
  7707       free (templt);
  7708       templt = NULL;
  7709     }
  7710 #if defined (DOS_NT)
  7711   else
  7712     {
  7713       /* The file name will be used in shell redirection, so it needs to have
  7714          DOS-style backslashes, or else the Windows shell will barf.  */
  7715       char *p;
  7716       for (p = templt; *p; p++)
  7717         if (*p == '/')
  7718           *p = '\\';
  7719     }
  7720 #endif
  7721 
  7722   return templt;
  7723 }
  7724 
  7725 #if !MSDOS && !defined (DOS_NT)
  7726 /*
  7727  * Add single quotes around a string, and escape any single quotes.
  7728  * Return a newly-allocated string.
  7729  *
  7730  * For example:
  7731  * escape_shell_arg_string ("test.txt")  => "'test.txt'"
  7732  * escape_shell_arg_string ("'test.txt") => "''\''test.txt'"
  7733  */
  7734 static char *
  7735 escape_shell_arg_string (char *str)
  7736 {
  7737   char *p = str;
  7738   int need_space = 2;           /* ' at begin and end */
  7739 
  7740   while (*p != '\0')
  7741     {
  7742       if (*p == '\'')
  7743         need_space += 4;        /* ' to '\'', length is 4 */
  7744       else
  7745         need_space++;
  7746 
  7747       p++;
  7748     }
  7749 
  7750   char *new_str = xnew (need_space + 1, char);
  7751   new_str[0] = '\'';
  7752   new_str[need_space-1] = '\'';
  7753 
  7754   int i = 1;                    /* skip first byte */
  7755   p = str;
  7756   while (*p != '\0')
  7757     {
  7758       new_str[i] = *p;
  7759       if (*p == '\'')
  7760         {
  7761           new_str[i+1] = '\\';
  7762           new_str[i+2] = '\'';
  7763           new_str[i+3] = '\'';
  7764           i += 3;
  7765         }
  7766 
  7767       i++;
  7768       p++;
  7769     }
  7770 
  7771   new_str[need_space] = '\0';
  7772   return new_str;
  7773 }
  7774 #endif
  7775 
  7776 static void
  7777 do_move_file(const char *src_file, const char *dst_file)
  7778 {
  7779   if (rename (src_file, dst_file) == 0)
  7780     return;
  7781 
  7782   FILE *src_f = fopen (src_file, "rb");
  7783   FILE *dst_f = fopen (dst_file, "wb");
  7784 
  7785   if (src_f == NULL)
  7786     pfatal (src_file);
  7787 
  7788   if (dst_f == NULL)
  7789     pfatal (dst_file);
  7790 
  7791   int c;
  7792   while ((c = fgetc (src_f)) != EOF)
  7793     {
  7794       if (ferror (src_f))
  7795         pfatal (src_file);
  7796 
  7797       if (ferror (dst_f))
  7798         pfatal (dst_file);
  7799 
  7800       if (fputc (c, dst_f) == EOF)
  7801         pfatal ("cannot write");
  7802     }
  7803 
  7804   if (fclose (src_f) == EOF)
  7805     pfatal (src_file);
  7806 
  7807   if (fclose (dst_f) == EOF)
  7808     pfatal (dst_file);
  7809 
  7810   if (unlink (src_file) == -1)
  7811     pfatal ("unlink error");
  7812 
  7813   return;
  7814 }
  7815 
  7816 /* Return a newly allocated string containing the file name of FILE
  7817    relative to the absolute directory DIR (which should end with a slash). */
  7818 static char *
  7819 relative_filename (char *file, char *dir)
  7820 {
  7821   char *fp, *dp, *afn, *res;
  7822   ptrdiff_t i;
  7823   char *dir_last_slash UNINIT;
  7824 
  7825   /* Find the common root of file and dir (with a trailing slash). */
  7826   afn = absolute_filename (file, cwd);
  7827   fp = afn;
  7828   dp = dir;
  7829   while (*fp++ == *dp++)
  7830     if (dp[-1] == '/')
  7831       dir_last_slash = dp - 1;
  7832 #ifdef DOS_NT
  7833   if (fp - 1 == afn && afn[0] != '/')
  7834     return afn; /* Cannot build a relative name.  */
  7835 #endif
  7836   fp -= dp - dir_last_slash;
  7837   dp = dir_last_slash;
  7838 
  7839   /* Build a sequence of "../" strings for the resulting relative file name. */
  7840   i = 0;
  7841   while ((dp = strchr (dp + 1, '/')) != NULL)
  7842     i += 1;
  7843   res = xnew (3*i + strlen (fp + 1) + 1, char);
  7844   char *z = res;
  7845   while (i-- > 0)
  7846     z = stpcpy (z, "../");
  7847 
  7848   /* Add the file name relative to the common root of file and dir. */
  7849   strcpy (z, fp + 1);
  7850   free (afn);
  7851 
  7852   return res;
  7853 }
  7854 
  7855 /* Return a newly allocated string containing the absolute file name
  7856    of FILE given DIR (which should end with a slash). */
  7857 static char *
  7858 absolute_filename (char *file, char *dir)
  7859 {
  7860   char *slashp, *cp, *res;
  7861 
  7862   if (filename_is_absolute (file))
  7863     res = savestr (file);
  7864 #ifdef DOS_NT
  7865   /* We don't support non-absolute file names with a drive
  7866      letter, like `d:NAME' (it's too much hassle).  */
  7867   else if (file[1] == ':')
  7868     fatal ("%s: relative file names with drive letters not supported", file);
  7869 #endif
  7870   else
  7871     res = concat (dir, file, "");
  7872 
  7873   /* Delete the "/dirname/.." and "/." substrings. */
  7874   slashp = strchr (res, '/');
  7875   while (slashp != NULL && slashp[0] != '\0')
  7876     {
  7877       if (slashp[1] == '.')
  7878         {
  7879           if (slashp[2] == '.'
  7880               && (slashp[3] == '/' || slashp[3] == '\0'))
  7881             {
  7882               cp = slashp;
  7883               do
  7884                 cp--;
  7885               while (cp >= res && !filename_is_absolute (cp));
  7886               if (cp < res)
  7887                 cp = slashp;    /* the absolute name begins with "/.." */
  7888 #ifdef DOS_NT
  7889               /* Under MSDOS and NT we get `d:/NAME' as absolute
  7890                  file name, so the luser could say `d:/../NAME'.
  7891                  We silently treat this as `d:/NAME'.  */
  7892               else if (cp[0] != '/')
  7893                 cp = slashp;
  7894 #endif
  7895               memmove (cp, slashp + 3, strlen (slashp + 2));
  7896               slashp = cp;
  7897               continue;
  7898             }
  7899           else if (slashp[2] == '/' || slashp[2] == '\0')
  7900             {
  7901               memmove (slashp, slashp + 2, strlen (slashp + 1));
  7902               continue;
  7903             }
  7904         }
  7905 
  7906       slashp = strchr (slashp + 1, '/');
  7907     }
  7908 
  7909   if (res[0] == '\0')           /* just a safety net: should never happen */
  7910     {
  7911       free (res);
  7912       return savestr ("/");
  7913     }
  7914   else
  7915     return res;
  7916 }
  7917 
  7918 /* Return a newly allocated string containing the absolute
  7919    file name of dir where FILE resides given DIR (which should
  7920    end with a slash). */
  7921 static char *
  7922 absolute_dirname (char *file, char *dir)
  7923 {
  7924   char *slashp, *res;
  7925   char save;
  7926 
  7927   slashp = strrchr (file, '/');
  7928   if (slashp == NULL)
  7929     return savestr (dir);
  7930   save = slashp[1];
  7931   slashp[1] = '\0';
  7932   res = absolute_filename (file, dir);
  7933   slashp[1] = save;
  7934 
  7935   return res;
  7936 }
  7937 
  7938 /* Whether the argument string is an absolute file name.  The argument
  7939    string must have been canonicalized with canonicalize_filename. */
  7940 static bool
  7941 filename_is_absolute (char *fn)
  7942 {
  7943   return (fn[0] == '/'
  7944 #ifdef DOS_NT
  7945           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
  7946 #endif
  7947           );
  7948 }
  7949 
  7950 /* Downcase DOS drive letter and collapse separators into single slashes.
  7951    Works in place. */
  7952 static void
  7953 canonicalize_filename (register char *fn)
  7954 {
  7955   register char* cp;
  7956 
  7957 #ifdef DOS_NT
  7958   /* Canonicalize drive letter case.  */
  7959   if (c_isupper (fn[0]) && fn[1] == ':')
  7960     fn[0] = c_tolower (fn[0]);
  7961 
  7962   /* Collapse multiple forward- and back-slashes into a single forward
  7963      slash. */
  7964   for (cp = fn; *cp != '\0'; cp++, fn++)
  7965     if (*cp == '/' || *cp == '\\')
  7966       {
  7967         *fn = '/';
  7968         while (cp[1] == '/' || cp[1] == '\\')
  7969           cp++;
  7970       }
  7971     else
  7972       *fn = *cp;
  7973 
  7974 #else  /* !DOS_NT */
  7975 
  7976   /* Collapse multiple slashes into a single slash. */
  7977   for (cp = fn; *cp != '\0'; cp++, fn++)
  7978     if (*cp == '/')
  7979       {
  7980         *fn = '/';
  7981         while (cp[1] == '/')
  7982           cp++;
  7983       }
  7984     else
  7985       *fn = *cp;
  7986 
  7987 #endif  /* !DOS_NT */
  7988 
  7989   *fn = '\0';
  7990 }
  7991 
  7992 
  7993 /* Initialize a linebuffer for use. */
  7994 static void
  7995 linebuffer_init (linebuffer *lbp)
  7996 {
  7997   lbp->size = (DEBUG) ? 3 : 200;
  7998   lbp->buffer = xnew (lbp->size, char);
  7999   lbp->buffer[0] = '\0';
  8000   lbp->len = 0;
  8001 }
  8002 
  8003 /* Set the minimum size of a string contained in a linebuffer. */
  8004 static void
  8005 linebuffer_setlen (linebuffer *lbp, ptrdiff_t toksize)
  8006 {
  8007   if (lbp->size <= toksize)
  8008     {
  8009       ptrdiff_t multiplier = toksize / lbp->size + 1;
  8010       xrnew (lbp->buffer, lbp->size, multiplier);
  8011       lbp->size *= multiplier;
  8012     }
  8013   lbp->len = toksize;
  8014 }
  8015 
  8016 /* Memory allocators with a fatal error if memory is exhausted.  */
  8017 
  8018 static void
  8019 memory_full (void)
  8020 {
  8021   fatal ("virtual memory exhausted");
  8022 }
  8023 
  8024 static void *
  8025 xmalloc (ptrdiff_t size)
  8026 {
  8027   if (SIZE_MAX < size)
  8028     memory_full ();
  8029   void *result = malloc (size);
  8030   if (result == NULL)
  8031     memory_full ();
  8032   return result;
  8033 }
  8034 
  8035 static void *
  8036 xnmalloc (ptrdiff_t nitems, ptrdiff_t item_size)
  8037 {
  8038   ptrdiff_t nbytes;
  8039   assume (0 <= nitems);
  8040   assume (0 < item_size);
  8041   if (INT_MULTIPLY_WRAPV (nitems, item_size, &nbytes))
  8042     memory_full ();
  8043   return xmalloc (nbytes);
  8044 }
  8045 
  8046 static void *
  8047 xnrealloc (void *pa, ptrdiff_t nitems, ptrdiff_t item_size)
  8048 {
  8049   ptrdiff_t nbytes;
  8050   assume (0 <= nitems);
  8051   assume (0 < item_size);
  8052   if (INT_MULTIPLY_WRAPV (nitems, item_size, &nbytes) || SIZE_MAX < nbytes)
  8053     memory_full ();
  8054   void *result = realloc (pa, nbytes);
  8055   if (!result)
  8056     memory_full ();
  8057   return result;
  8058 }
  8059 
  8060 /*
  8061  * Local Variables:
  8062  * indent-tabs-mode: t
  8063  * tab-width: 8
  8064  * fill-column: 79
  8065  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
  8066  * c-file-style: "gnu"
  8067  * End:
  8068  */
  8069 
  8070 /* etags.c ends here */

/* [<][>][^][v][top][bottom][index][help] */