root/lib-src/etags.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. memcpyz
  2. streq
  3. strcaseeq
  4. strneq
  5. strncaseeq
  6. notinname
  7. begtoken
  8. intoken
  9. endtoken
  10. print_language_names
  11. print_version
  12. print_help
  13. main
  14. cleanup_tags_file
  15. get_compressor_from_suffix
  16. get_language_from_langname
  17. get_language_from_interpreter
  18. get_language_from_filename
  19. process_file_name
  20. process_file
  21. reset_input
  22. find_entries
  23. make_tag
  24. pfnote
  25. push_node
  26. pop_node
  27. free_tree
  28. free_fdesc
  29. add_node
  30. invalidate_nodes
  31. number_len
  32. total_size_of_entries
  33. put_entry
  34. put_entries
  35. hash
  36. in_word_set
  37. C_symtype
  38. pushclass_above
  39. popclass_above
  40. write_classname
  41. consider_token
  42. make_C_tag
  43. perhaps_more_input
  44. C_entries
  45. default_C_entries
  46. plain_C_entries
  47. Cplusplus_entries
  48. Cjava_entries
  49. Cstar_entries
  50. Yacc_entries
  51. just_read_file
  52. F_takeprec
  53. F_getit
  54. Fortran_functions
  55. Go_functions
  56. Ada_getit
  57. Ada_funcs
  58. Asm_labels
  59. Perl_functions
  60. Python_functions
  61. Ruby_functions
  62. Rust_entries
  63. PHP_functions
  64. Cobol_paragraphs
  65. Makefile_targets
  66. Pascal_functions
  67. L_getit
  68. Lisp_functions
  69. Lua_functions
  70. PS_functions
  71. Forth_words
  72. Scheme_functions
  73. TeX_commands
  74. TEX_decode_env
  75. Texinfo_nodes
  76. HTML_labels
  77. Prolog_functions
  78. prolog_skip_comment
  79. prolog_pr
  80. prolog_atom
  81. test_objc_is_mercury
  82. Mercury_functions
  83. mercury_skip_comment
  84. mercury_decl
  85. mercury_pr
  86. Erlang_functions
  87. erlang_func
  88. erlang_attribute
  89. erlang_atom
  90. scan_separators
  91. analyze_regex
  92. add_regex
  93. substitute
  94. free_regexps
  95. regex_tag_multiline
  96. nocase_tail
  97. get_tag
  98. get_lispy_tag
  99. readline_internal
  100. readline
  101. savestr
  102. savenstr
  103. skip_spaces
  104. skip_non_spaces
  105. skip_name
  106. fatal
  107. pfatal
  108. suggest_asking_for_help
  109. error
  110. verror
  111. concat
  112. etags_getcwd
  113. etags_mktmp
  114. escape_shell_arg_string
  115. do_move_file
  116. relative_filename
  117. absolute_filename
  118. absolute_dirname
  119. filename_is_absolute
  120. canonicalize_filename
  121. linebuffer_init
  122. linebuffer_setlen
  123. memory_full
  124. xmalloc
  125. xnmalloc
  126. xnrealloc

     1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
     2 
     3 Copyright (C) 1984 The Regents of the University of California
     4 
     5 Redistribution and use in source and binary forms, with or without
     6 modification, are permitted provided that the following conditions are
     7 met:
     8 1. Redistributions of source code must retain the above copyright
     9    notice, this list of conditions and the following disclaimer.
    10 2. Redistributions in binary form must reproduce the above copyright
    11    notice, this list of conditions and the following disclaimer in the
    12    documentation and/or other materials provided with the
    13    distribution.
    14 3. Neither the name of the University nor the names of its
    15    contributors may be used to endorse or promote products derived
    16    from this software without specific prior written permission.
    17 
    18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
    19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
    20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
    22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
    25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
    27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
    28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29 
    30 
    31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2023 Free Software
    32 Foundation, Inc.
    33 
    34 This file is not considered part of GNU Emacs.
    35 
    36 This program is free software: you can redistribute it and/or modify
    37 it under the terms of the GNU General Public License as published by
    38 the Free Software Foundation, either version 3 of the License, or (at
    39 your option) any later version.
    40 
    41 This program is distributed in the hope that it will be useful,
    42 but WITHOUT ANY WARRANTY; without even the implied warranty of
    43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    44 GNU General Public License for more details.
    45 
    46 You should have received a copy of the GNU General Public License
    47 along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
    48 
    49 
    50 /* NB To comply with the above BSD license, copyright information is
    51 reproduced in etc/ETAGS.README.  That file should be updated when the
    52 above notices are.
    53 
    54 To the best of our knowledge, this code was originally based on the
    55 ctags.c distributed with BSD4.2, which was copyrighted by the
    56 University of California, as described above. */
    57 
    58 
    59 /*
    60  * Authors:
    61  * 1983 Ctags originally by Ken Arnold.
    62  * 1984 Fortran added by Jim Kleckner.
    63  * 1984 Ed Pelegri-Llopart added C typedefs.
    64  * 1985 Emacs TAGS format by Richard Stallman.
    65  * 1989 Sam Kendall added C++.
    66  * 1992 Joseph B. Wells improved C and C++ parsing.
    67  * 1993 Francesco Potortì reorganized C and C++.
    68  * 1994 Line-by-line regexp tags by Tom Tromey.
    69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
    70  * 2002 #line directives by Francesco Potortì.
    71  * Francesco Potortì maintained and improved it for many years
    72    starting in 1993.
    73  */
    74 
    75 /*
    76  * If you want to add support for a new language, start by looking at the LUA
    77  * language, which is the simplest.  Alternatively, consider distributing etags
    78  * together with a configuration file containing regexp definitions for etags.
    79  */
    80 
    81 #ifdef DEBUG
    82 #  undef DEBUG
    83 #  define DEBUG true
    84 #else
    85 #  define DEBUG false
    86 #endif
    87 
    88 #include <config.h>
    89 
    90 #ifdef MSDOS
    91 # undef MSDOS
    92 # define MSDOS true
    93 # include <sys/param.h>
    94 #else
    95 # define MSDOS false
    96 #endif /* MSDOS */
    97 
    98 #ifdef WINDOWSNT
    99 # include <direct.h>
   100 # undef HAVE_NTGUI
   101 # undef  DOS_NT
   102 # define DOS_NT
   103 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h.  */
   104 # define O_CLOEXEC O_NOINHERIT
   105 #endif /* WINDOWSNT */
   106 
   107 #include <attribute.h>
   108 #include <inttypes.h>
   109 #include <limits.h>
   110 #include <unistd.h>
   111 #include <stdarg.h>
   112 #include <stdckdint.h>
   113 #include <stdlib.h>
   114 #include <string.h>
   115 #include <sysstdio.h>
   116 #include <errno.h>
   117 #include <fcntl.h>
   118 #include <binary-io.h>
   119 #include <intprops.h>
   120 #include <unlocked-io.h>
   121 #include <verify.h>
   122 #include <c-ctype.h>
   123 #include <c-strcase.h>
   124 
   125 #include <assert.h>
   126 #include <getopt.h>
   127 #include <regex.h>
   128 
   129 /* Define CTAGS to make the program "ctags" compatible with the usual one.
   130  Leave it undefined to make the program "etags", which makes emacs-style
   131  tag tables and tags typedefs, #defines and struct/union/enum by default. */
   132 #ifdef CTAGS
   133 # undef  CTAGS
   134 # define CTAGS true
   135 #else
   136 # define CTAGS false
   137 #endif
   138 
   139 /* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
   140    Mercury from Objective C, which have same file extensions .m
   141    See comments before function test_objc_is_mercury for details.  */
   142 #ifndef  MERCURY_HEURISTICS_RATIO
   143 # define MERCURY_HEURISTICS_RATIO 0.5
   144 #endif
   145 
   146 /* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
   147 static void
   148 memcpyz (void *dest, void const *src, ptrdiff_t len)
   149 {
   150   char *e = mempcpy (dest, src, len);
   151   *e = '\0';
   152 }
   153 
   154 static bool
   155 streq (char const *s, char const *t)
   156 {
   157   return strcmp (s, t) == 0;
   158 }
   159 
   160 static bool
   161 strcaseeq (char const *s, char const *t)
   162 {
   163   return c_strcasecmp (s, t) == 0;
   164 }
   165 
   166 static bool
   167 strneq (char const *s, char const *t, size_t n)
   168 {
   169   return strncmp (s, t, n) == 0;
   170 }
   171 
   172 static bool
   173 strncaseeq (char const *s, char const *t, size_t n)
   174 {
   175   return c_strncasecmp (s, t, n) == 0;
   176 }
   177 
   178 /* C is not in a name.  */
   179 static bool
   180 notinname (unsigned char c)
   181 {
   182   /* Look at make_tag before modifying!  */
   183   static bool const table[UCHAR_MAX + 1] = {
   184     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
   185     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
   186   };
   187   return table[c];
   188 }
   189 
   190 /* C can start a token.  */
   191 static bool
   192 begtoken (unsigned char c)
   193 {
   194   static bool const table[UCHAR_MAX + 1] = {
   195     ['$']=1, ['@']=1,
   196     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
   197     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
   198     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
   199     ['Y']=1, ['Z']=1,
   200     ['_']=1,
   201     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
   202     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
   203     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
   204     ['y']=1, ['z']=1,
   205     ['~']=1
   206   };
   207   return table[c];
   208 }
   209 
   210 /* C can be in the middle of a token.  */
   211 static bool
   212 intoken (unsigned char c)
   213 {
   214   static bool const table[UCHAR_MAX + 1] = {
   215     ['$']=1,
   216     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
   217     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
   218     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
   219     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
   220     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
   221     ['Y']=1, ['Z']=1,
   222     ['_']=1,
   223     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
   224     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
   225     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
   226     ['y']=1, ['z']=1
   227   };
   228   return table[c];
   229 }
   230 
   231 /* C can end a token.  */
   232 static bool
   233 endtoken (unsigned char c)
   234 {
   235   static bool const table[UCHAR_MAX + 1] = {
   236     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
   237     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
   238     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
   239     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
   240     ['{']=1, ['|']=1, ['}']=1, ['~']=1
   241   };
   242   return table[c];
   243 }
   244 
   245 /*
   246  *      xnew, xrnew -- allocate, reallocate storage
   247  *
   248  * SYNOPSIS:    Type *xnew (ptrdiff_t n, Type);
   249  *              void xrnew (OldPointer, ptrdiff_t n, int multiplier);
   250  */
   251 #define xnew(n, Type) ((Type *) xnmalloc (n, sizeof (Type)))
   252 #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op)))
   253 
   254 typedef void Lang_function (FILE *);
   255 
   256 typedef struct
   257 {
   258   const char *suffix;           /* file name suffix for this compressor */
   259   const char *command;          /* takes one arg and decompresses to stdout */
   260 } compressor;
   261 
   262 typedef struct
   263 {
   264   const char *name;             /* language name */
   265   const char *help;             /* detailed help for the language */
   266   Lang_function *function;      /* parse function */
   267   const char **suffixes;        /* name suffixes of this language's files */
   268   const char **filenames;       /* names of this language's files */
   269   const char **interpreters;    /* interpreters for this language */
   270   bool metasource;              /* source used to generate other sources */
   271 } language;
   272 
   273 typedef struct fdesc
   274 {
   275   struct fdesc *next;           /* for the linked list */
   276   char *infname;                /* uncompressed input file name */
   277   char *infabsname;             /* absolute uncompressed input file name */
   278   char *infabsdir;              /* absolute dir of input file */
   279   char *taggedfname;            /* file name to write in tagfile */
   280   language *lang;               /* language of file */
   281   char *prop;                   /* file properties to write in tagfile */
   282   bool usecharno;               /* etags tags shall contain char number */
   283   bool written;                 /* entry written in the tags file */
   284 } fdesc;
   285 
   286 typedef struct node_st
   287 {                               /* sorting structure */
   288   struct node_st *left, *right; /* left and right sons */
   289   fdesc *fdp;                   /* description of file to whom tag belongs */
   290   char *name;                   /* tag name */
   291   char *regex;                  /* search regexp */
   292   bool valid;                   /* write this tag on the tag file */
   293   bool is_func;                 /* function tag: use regexp in CTAGS mode */
   294   bool been_warned;             /* warning already given for duplicated tag */
   295   intmax_t lno;                 /* line number tag is on */
   296   intmax_t cno;                 /* character number line starts on */
   297 } node;
   298 
   299 /*
   300  * A `linebuffer' is a structure which holds a line of text.
   301  * `readline_internal' reads a line from a stream into a linebuffer
   302  * and works regardless of the length of the line.
   303  * SIZE is the size of BUFFER, LEN is the length of the string in
   304  * BUFFER after readline reads it.
   305  */
   306 typedef struct
   307 {
   308   ptrdiff_t size;
   309   ptrdiff_t len;
   310   char *buffer;
   311 } linebuffer;
   312 
   313 /* Used to support mixing of --lang and file names. */
   314 typedef struct
   315 {
   316   enum {
   317     at_language,                /* a language specification */
   318     at_regexp,                  /* a regular expression */
   319     at_filename,                /* a file name */
   320     at_stdin,                   /* read from stdin here */
   321     at_end                      /* stop parsing the list */
   322   } arg_type;                   /* argument type */
   323   language *lang;               /* language associated with the argument */
   324   char *what;                   /* the argument itself */
   325 } argument;
   326 
   327 /* Structure defining a regular expression. */
   328 typedef struct regexp
   329 {
   330   struct regexp *p_next;        /* pointer to next in list */
   331   language *lang;               /* if set, use only for this language */
   332   char *pattern;                /* the regexp pattern */
   333   char *name;                   /* tag name */
   334   struct re_pattern_buffer *pat; /* the compiled pattern */
   335   struct re_registers regs;     /* re registers */
   336   bool error_signaled;          /* already signaled for this regexp */
   337   bool ignore_case;             /* ignore case when matching */
   338   bool multi_line;              /* do a multi-line match on the whole file */
   339 } regexp;
   340 
   341 
   342 /* Many compilers barf on this:
   343         Lang_function Ada_funcs;
   344    so let's write it this way */
   345 static void Ada_funcs (FILE *);
   346 static void Asm_labels (FILE *);
   347 static void C_entries (int c_ext, FILE *);
   348 static void default_C_entries (FILE *);
   349 static void plain_C_entries (FILE *);
   350 static void Cjava_entries (FILE *);
   351 static void Cobol_paragraphs (FILE *);
   352 static void Cplusplus_entries (FILE *);
   353 static void Cstar_entries (FILE *);
   354 static void Erlang_functions (FILE *);
   355 static void Forth_words (FILE *);
   356 static void Fortran_functions (FILE *);
   357 static void Go_functions (FILE *);
   358 static void HTML_labels (FILE *);
   359 static void Lisp_functions (FILE *);
   360 static void Lua_functions (FILE *);
   361 static void Makefile_targets (FILE *);
   362 static void Mercury_functions (FILE *);
   363 static void Pascal_functions (FILE *);
   364 static void Perl_functions (FILE *);
   365 static void PHP_functions (FILE *);
   366 static void PS_functions (FILE *);
   367 static void Prolog_functions (FILE *);
   368 static void Python_functions (FILE *);
   369 static void Ruby_functions (FILE *);
   370 static void Rust_entries (FILE *);
   371 static void Scheme_functions (FILE *);
   372 static void TeX_commands (FILE *);
   373 static void Texinfo_nodes (FILE *);
   374 static void Yacc_entries (FILE *);
   375 static void just_read_file (FILE *);
   376 
   377 static language *get_language_from_langname (const char *);
   378 static void readline (linebuffer *, FILE *);
   379 static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *, const bool);
   380 static bool nocase_tail (const char *);
   381 static void get_tag (char *, char **);
   382 static void get_lispy_tag (char *);
   383 static void test_objc_is_mercury (char *, language **);
   384 
   385 static void analyze_regex (char *);
   386 static void free_regexps (void);
   387 static void regex_tag_multiline (void);
   388 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
   389 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
   390 static _Noreturn void suggest_asking_for_help (void);
   391 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
   392 static _Noreturn void pfatal (const char *);
   393 static void add_node (node *, node **);
   394 
   395 static void process_file_name (char *, language *);
   396 static void process_file (FILE *, char *, language *);
   397 static void find_entries (FILE *);
   398 static void free_tree (node *);
   399 static void free_fdesc (fdesc *);
   400 static void pfnote (char *, bool, char *, ptrdiff_t, intmax_t, intmax_t);
   401 static void invalidate_nodes (fdesc *, node **);
   402 static void put_entries (node *);
   403 static void cleanup_tags_file (char const * const, char const * const);
   404 
   405 #if !MSDOS && !defined (DOS_NT)
   406 static char *escape_shell_arg_string (char *);
   407 #endif
   408 static void do_move_file (const char *, const char *);
   409 static char *concat (const char *, const char *, const char *);
   410 static char *skip_spaces (char *);
   411 static char *skip_non_spaces (char *);
   412 static char *skip_name (char *);
   413 static char *savenstr (const char *, ptrdiff_t);
   414 static char *savestr (const char *);
   415 static char *etags_getcwd (void);
   416 static char *relative_filename (char *, char *);
   417 static char *absolute_filename (char *, char *);
   418 static char *absolute_dirname (char *, char *);
   419 static bool filename_is_absolute (char *f);
   420 static void canonicalize_filename (char *);
   421 static char *etags_mktmp (void);
   422 static void linebuffer_init (linebuffer *);
   423 static void linebuffer_setlen (linebuffer *, ptrdiff_t);
   424 static void *xmalloc (ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1));
   425 static void *xnmalloc (ptrdiff_t, ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1,2));
   426 static void *xnrealloc (void *, ptrdiff_t, ptrdiff_t)
   427   ATTRIBUTE_ALLOC_SIZE ((2,3));
   428 
   429 
   430 static char searchar = '/';     /* use /.../ searches */
   431 
   432 static char *tagfile;           /* output file */
   433 static char *progname;          /* name this program was invoked with */
   434 static char *cwd;               /* current working directory */
   435 static char *tagfiledir;        /* directory of tagfile */
   436 static FILE *tagf;              /* ioptr for tags file */
   437 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
   438 
   439 static fdesc *fdhead;           /* head of file description list */
   440 static fdesc *curfdp;           /* current file description */
   441 static char *infilename;        /* current input file name */
   442 static intmax_t lineno;         /* line number of current line */
   443 static intmax_t charno;         /* current character number */
   444 static intmax_t linecharno;     /* charno of start of current line */
   445 static char *dbp;               /* pointer to start of current tag */
   446 
   447 static intmax_t const invalidcharno = -1;
   448 
   449 static node *nodehead;          /* the head of the binary tree of tags */
   450 static node *last_node;         /* the last node created */
   451 
   452 static linebuffer lb;           /* the current line */
   453 static linebuffer filebuf;      /* a buffer containing the whole file */
   454 static linebuffer token_name;   /* a buffer containing a tag name */
   455 
   456 static bool append_to_tagfile;  /* -a: append to tags */
   457 /* The next five default to true in C and derived languages.  */
   458 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
   459 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
   460                                 /* 0 struct/enum/union decls, and C++ */
   461                                 /* member functions. */
   462 static bool constantypedefs;    /* -d: create tags for C #define, enum */
   463                                 /* constants and variables. */
   464                                 /* -D: opposite of -d.  Default under ctags. */
   465 static int globals;             /* create tags for global variables */
   466 static int members;             /* create tags for C member variables */
   467 static int declarations;        /* --declarations: tag them and extern in C&Co*/
   468 static int no_line_directive;   /* ignore #line directives (undocumented) */
   469 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
   470 static bool update;             /* -u: update tags */
   471 static bool vgrind_style;       /* -v: create vgrind style index output */
   472 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
   473 static bool cxref_style;        /* -x: create cxref style output */
   474 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
   475 static bool ignoreindent;       /* -I: ignore indentation in C */
   476 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
   477 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
   478 static int debug;               /* --debug */
   479 
   480 /* STDIN is defined in LynxOS system headers */
   481 #ifdef STDIN
   482 # undef STDIN
   483 #endif
   484 
   485 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
   486 static bool parsing_stdin;      /* --parse-stdin used */
   487 
   488 static regexp *p_head;          /* list of all regexps */
   489 static bool need_filebuf;       /* some regexes are multi-line */
   490 
   491 static struct option longopts[] =
   492 {
   493   { "append",             no_argument,       NULL,               'a'   },
   494   { "packages-only",      no_argument,       &packages_only,     1     },
   495   { "c++",                no_argument,       NULL,               'C'   },
   496   { "debug",              no_argument,       &debug,             1     },
   497   { "declarations",       no_argument,       &declarations,      1     },
   498   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
   499   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
   500   { "help",               no_argument,       NULL,               'h'   },
   501   { "help",               no_argument,       NULL,               'H'   },
   502   { "ignore-indentation", no_argument,       NULL,               'I'   },
   503   { "language",           required_argument, NULL,               'l'   },
   504   { "members",            no_argument,       &members,           1     },
   505   { "no-members",         no_argument,       &members,           0     },
   506   { "output",             required_argument, NULL,               'o'   },
   507   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
   508   { "regex",              required_argument, NULL,               'r'   },
   509   { "no-regex",           no_argument,       NULL,               'R'   },
   510   { "ignore-case-regex",  required_argument, NULL,               'c'   },
   511   { "parse-stdin",        required_argument, NULL,               STDIN },
   512   { "version",            no_argument,       NULL,               'V'   },
   513 
   514 #if CTAGS /* Ctags options */
   515   { "backward-search",    no_argument,       NULL,               'B'   },
   516   { "cxref",              no_argument,       NULL,               'x'   },
   517   { "defines",            no_argument,       NULL,               'd'   },
   518   { "globals",            no_argument,       &globals,           1     },
   519   { "typedefs",           no_argument,       NULL,               't'   },
   520   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
   521   { "update",             no_argument,       NULL,               'u'   },
   522   { "vgrind",             no_argument,       NULL,               'v'   },
   523   { "no-warn",            no_argument,       NULL,               'w'   },
   524 
   525 #else /* Etags options */
   526   { "no-defines",         no_argument,       NULL,               'D'   },
   527   { "no-globals",         no_argument,       &globals,           0     },
   528   { "include",            required_argument, NULL,               'i'   },
   529 #endif
   530   { NULL }
   531 };
   532 
   533 static compressor compressors[] =
   534 {
   535   { "z", "gzip -d -c"},
   536   { "Z", "gzip -d -c"},
   537   { "gz", "gzip -d -c"},
   538   { "GZ", "gzip -d -c"},
   539   { "bz2", "bzip2 -d -c" },
   540   { "xz", "xz -d -c" },
   541   { "zst", "zstd -d -c" },
   542   { NULL }
   543 };
   544 
   545 /*
   546  * Language stuff.
   547  */
   548 
   549 /* Ada code */
   550 static const char *Ada_suffixes [] =
   551   { "ads", "adb", "ada", NULL };
   552 static const char Ada_help [] =
   553 "In Ada code, functions, procedures, packages, tasks and types are\n\
   554 tags.  Use the '--packages-only' option to create tags for\n\
   555 packages only.\n\
   556 Ada tag names have suffixes indicating the type of entity:\n\
   557         Entity type:    Qualifier:\n\
   558         ------------    ----------\n\
   559         function        /f\n\
   560         procedure       /p\n\
   561         package spec    /s\n\
   562         package body    /b\n\
   563         type            /t\n\
   564         task            /k\n\
   565 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
   566 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
   567 will just search for any tag 'bidule'.";
   568 
   569 /* Assembly code */
   570 static const char *Asm_suffixes [] =
   571   { "a",        /* Unix assembler */
   572     "asm", /* Microcontroller assembly */
   573     "def", /* BSO/Tasking definition includes  */
   574     "inc", /* Microcontroller include files */
   575     "ins", /* Microcontroller include files */
   576     "s", "sa", /* Unix assembler */
   577     "S",   /* cpp-processed Unix assembler */
   578     "src", /* BSO/Tasking C compiler output */
   579     NULL
   580   };
   581 static const char Asm_help [] =
   582 "In assembler code, labels appearing at the beginning of a line,\n\
   583 followed by a colon, are tags.";
   584 
   585 
   586 /* Note that .c and .h can be considered C++, if the --c++ flag was
   587    given, or if the `class' or `template' keywords are met inside the file.
   588    That is why default_C_entries is called for these. */
   589 static const char *default_C_suffixes [] =
   590   { "c", "h", NULL };
   591 #if CTAGS                               /* C help for Ctags */
   592 static const char default_C_help [] =
   593 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
   594 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
   595 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
   596 Use --globals to tag global variables.\n\
   597 You can tag function declarations and external variables by\n\
   598 using '--declarations', and struct members by using '--members'.";
   599 #else                                   /* C help for Etags */
   600 static const char default_C_help [] =
   601 "In C code, any C function or typedef is a tag, and so are\n\
   602 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
   603 definitions and 'enum' constants are tags unless you specify\n\
   604 '--no-defines'.  Global variables are tags unless you specify\n\
   605 '--no-globals' and so are struct members unless you specify\n\
   606 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
   607 '--no-members' can make the tags table file much smaller.\n\
   608 You can tag function declarations and external variables by\n\
   609 using '--declarations'.";
   610 #endif  /* C help for Ctags and Etags */
   611 
   612 static const char *Cplusplus_suffixes [] =
   613   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
   614     "M",                        /* Objective C++ */
   615     "pdb",                      /* PostScript with C syntax */
   616     NULL };
   617 static const char Cplusplus_help [] =
   618 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
   619 --help --lang=c --lang=c++ for full help.)\n\
   620 In addition to C tags, member functions are also recognized.  Member\n\
   621 variables are recognized unless you use the '--no-members' option.\n\
   622 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
   623 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
   624 'operator+'.";
   625 
   626 static const char *Cjava_suffixes [] =
   627   { "java", NULL };
   628 static char Cjava_help [] =
   629 "In Java code, all the tags constructs of C and C++ code are\n\
   630 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
   631 
   632 
   633 static const char *Cobol_suffixes [] =
   634   { "COB", "cob", NULL };
   635 static char Cobol_help [] =
   636 "In Cobol code, tags are paragraph names; that is, any word\n\
   637 starting in column 8 and followed by a period.";
   638 
   639 static const char *Cstar_suffixes [] =
   640   { "cs", "hs", NULL };
   641 
   642 static const char *Erlang_suffixes [] =
   643   { "erl", "hrl", NULL };
   644 static const char Erlang_help [] =
   645 "In Erlang code, the tags are the functions, records and macros\n\
   646 defined in the file.";
   647 static const char *Erlang_interpreters [] =
   648   { "escript", NULL };
   649 
   650 static const char *Forth_suffixes [] =
   651   { "fth", "tok", NULL };
   652 static const char Forth_help [] =
   653 "In Forth code, tags are words defined by ':',\n\
   654 constant, code, create, defer, value, variable, buffer:, field.";
   655 
   656 static const char *Fortran_suffixes [] =
   657   { "F", "f", "f90", "for", NULL };
   658 static const char Fortran_help [] =
   659 "In Fortran code, functions, subroutines and block data are tags.";
   660 
   661 static const char *Go_suffixes [] = {"go", NULL};
   662 static const char Go_help [] =
   663   "In Go code, functions, interfaces and packages are tags.";
   664 
   665 static const char *HTML_suffixes [] =
   666   { "htm", "html", "shtml", NULL };
   667 static const char HTML_help [] =
   668 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
   669 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
   670 occurrences of 'id='.";
   671 
   672 static const char *Lisp_suffixes [] =
   673   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
   674 static const char Lisp_help [] =
   675 "In Lisp code, any function defined with 'defun', any variable\n\
   676 defined with 'defvar' or 'defconst', and in general the first\n\
   677 argument of any expression that starts with '(def' in column zero\n\
   678 is a tag.\n\
   679 The '--declarations' option tags \"(defvar foo)\" constructs too.";
   680 
   681 static const char *Lua_suffixes [] =
   682   { "lua", "LUA", NULL };
   683 static const char Lua_help [] =
   684 "In Lua scripts, all functions are tags.";
   685 static const char *Lua_interpreters [] =
   686   { "lua", NULL };
   687 
   688 static const char *Makefile_filenames [] =
   689   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
   690 static const char Makefile_help [] =
   691 "In makefiles, targets are tags; additionally, variables are tags\n\
   692 unless you specify '--no-globals'.";
   693 
   694 /* Mercury and Objective C share the same .m file extensions.  */
   695 static const char *Mercury_suffixes [] =
   696   {"m",
   697    NULL};
   698 static const char Mercury_help [] =
   699   "In Mercury code, tags are all declarations beginning a line with ':-'\n\
   700 and optionally Prolog-like definitions (first rule for a predicate or \
   701 function).\n\
   702 To enable this behavior, run etags using --declarations.";
   703 static bool with_mercury_definitions = false;
   704 float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
   705 
   706 static const char *Objc_suffixes [] =
   707   { "lm",                       /* Objective lex file  */
   708     "m",                        /* By default, Objective C file will be assumed.  */
   709      NULL};
   710 static const char Objc_help [] =
   711 "In Objective C code, tags include Objective C definitions for classes,\n\
   712 class categories, methods and protocols.  Tags for variables and\n\
   713 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
   714 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
   715 
   716 static const char *Pascal_suffixes [] =
   717   { "p", "pas", NULL };
   718 static const char Pascal_help [] =
   719 "In Pascal code, the tags are the functions and procedures defined\n\
   720 in the file.";
   721 /* " // this is for working around an Emacs highlighting bug... */
   722 
   723 static const char *Perl_suffixes [] =
   724   { "pl", "pm", NULL };
   725 static const char *Perl_interpreters [] =
   726   { "perl", "@PERL@", NULL };
   727 static const char Perl_help [] =
   728 "In Perl code, the tags are the packages, subroutines and variables\n\
   729 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
   730 '--globals' if you want to tag global variables.  Tags for\n\
   731 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
   732 defined in the default package is 'main::SUB'.";
   733 
   734 static const char *PHP_suffixes [] =
   735   { "php", "php3", "php4", NULL };
   736 static const char PHP_help [] =
   737 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
   738 the '--no-members' option, vars are tags too.";
   739 
   740 static const char *plain_C_suffixes [] =
   741   { "pc",                       /* Pro*C file */
   742      NULL };
   743 
   744 static const char *PS_suffixes [] =
   745   { "ps", "psw", NULL };        /* .psw is for PSWrap */
   746 static const char PS_help [] =
   747 "In PostScript code, the tags are the functions.";
   748 
   749 static const char *Prolog_suffixes [] =
   750   { "prolog", NULL };
   751 static const char Prolog_help [] =
   752 "In Prolog code, tags are predicates and rules at the beginning of\n\
   753 line.";
   754 static const char *Prolog_interpreters [] =
   755   { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
   756 
   757 static const char *Python_suffixes [] =
   758   { "py", NULL };
   759 static const char Python_help [] =
   760 "In Python code, 'def' or 'class' at the beginning of a line\n\
   761 generate a tag.";
   762 static const char *Python_interpreters [] =
   763   { "python", NULL };
   764 
   765 static const char *Ruby_suffixes [] =
   766   { "rb", "ru", "rbw", NULL };
   767 static const char *Ruby_filenames [] =
   768   { "Rakefile", "Thorfile", NULL };
   769 static const char Ruby_help [] =
   770   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
   771 a line generate a tag.  Constants also generate a tag.";
   772 static const char *Ruby_interpreters [] =
   773   { "ruby", NULL };
   774 
   775 static const char *Rust_suffixes [] =
   776   { "rs", NULL };
   777 static const char Rust_help [] =
   778   "In Rust code, tags anything defined with 'fn', 'enum', \n\
   779 'struct' or 'macro_rules!'.";
   780 
   781 /* Can't do the `SCM' or `scm' prefix with a version number. */
   782 static const char *Scheme_suffixes [] =
   783   { "oak", "rkt", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
   784 static const char Scheme_help [] =
   785 "In Scheme code, tags include anything defined with 'def' or with a\n\
   786 construct whose name starts with 'def'.  They also include\n\
   787 variables set with 'set!' at top level in the file.";
   788 
   789 static const char *TeX_suffixes [] =
   790   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
   791 static const char TeX_help [] =
   792 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
   793 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
   794 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
   795 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
   796 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
   797 \n\
   798 Other commands can be specified by setting the environment variable\n\
   799 'TEXTAGS' to a colon-separated list like, for example,\n\
   800      TEXTAGS=\"mycommand:myothercommand\".";
   801 
   802 
   803 static const char *Texinfo_suffixes [] =
   804   { "texi", "texinfo", "txi", NULL };
   805 static const char Texinfo_help [] =
   806 "for texinfo files, lines starting with @node are tagged.";
   807 
   808 static const char *Yacc_suffixes [] =
   809   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
   810 static const char Yacc_help [] =
   811 "In Bison or Yacc input files, each rule defines as a tag the\n\
   812 nonterminal it constructs.  The portions of the file that contain\n\
   813 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
   814 for full help).";
   815 
   816 static const char auto_help [] =
   817 "'auto' is not a real language, it indicates to use\n\
   818 a default language for files base on file name suffix and file contents.";
   819 
   820 static const char none_help [] =
   821 "'none' is not a real language, it indicates to only do\n\
   822 regexp processing on files.";
   823 
   824 static const char no_lang_help [] =
   825 "No detailed help available for this language.";
   826 
   827 
   828 /*
   829  * Table of languages.
   830  *
   831  * It is ok for a given function to be listed under more than one
   832  * name.  I just didn't.
   833  */
   834 
   835 static language lang_names [] =
   836 {
   837   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
   838   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
   839   { "c",         default_C_help, default_C_entries, default_C_suffixes },
   840   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
   841   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
   842   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
   843   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes,
   844                  NULL,           Erlang_interpreters },
   845   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
   846   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
   847   { "go",        Go_help,        Go_functions,      Go_suffixes        },
   848   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
   849   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
   850   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
   851   { "lua",       Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
   852   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
   853   /* objc listed before mercury as it is a better default for .m extensions.  */
   854   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
   855   { "mercury",   Mercury_help,   Mercury_functions, Mercury_suffixes   },
   856   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
   857   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
   858   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
   859   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
   860   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
   861   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes,
   862                  NULL,           Prolog_interpreters },
   863   { "python",    Python_help,    Python_functions,  Python_suffixes,
   864                  NULL,           Python_interpreters },
   865   { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes,
   866                  Ruby_filenames, Ruby_interpreters },
   867   { "rust",      Rust_help,      Rust_entries,      Rust_suffixes      },
   868   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
   869   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
   870   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
   871   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
   872   { "auto",      auto_help },                      /* default guessing scheme */
   873   { "none",      none_help,      just_read_file }, /* regexp matching only */
   874   { NULL }                /* end of list */
   875 };
   876 
   877 
   878 static void
   879 print_language_names (void)
   880 {
   881   language *lang;
   882   const char **name, **ext;
   883 
   884   puts ("\nThese are the currently supported languages, along with the\n\
   885 default file names and dot suffixes:");
   886   for (lang = lang_names; lang->name != NULL; lang++)
   887     {
   888       printf ("  %-*s", 10, lang->name);
   889       if (lang->filenames != NULL)
   890         for (name = lang->filenames; *name != NULL; name++)
   891           printf (" %s", *name);
   892       if (lang->suffixes != NULL)
   893         for (ext = lang->suffixes; *ext != NULL; ext++)
   894           printf (" .%s", *ext);
   895       puts ("");
   896     }
   897   puts ("where 'auto' means use default language for files based on file\n\
   898 name suffix, and 'none' means only do regexp processing on files.\n\
   899 If no language is specified and no matching suffix is found,\n\
   900 the first line of the file is read for a sharp-bang (#!) sequence\n\
   901 followed by the name of an interpreter.  If no such sequence is found,\n\
   902 Fortran is tried first; if no tags are found, C is tried next.\n\
   903 When parsing any C file, a \"class\" or \"template\" keyword\n\
   904 switches to C++.");
   905   puts ("Compressed files are supported using gzip, bzip2, xz, and zstd.\n\
   906 \n\
   907 For detailed help on a given language use, for example,\n\
   908 etags --help --lang=ada.");
   909 }
   910 
   911 #if CTAGS
   912 # define PROGRAM_NAME "ctags"
   913 #else
   914 # define PROGRAM_NAME "etags"
   915 #endif
   916 static _Noreturn void
   917 print_version (void)
   918 {
   919   fputs ((PROGRAM_NAME " (" PACKAGE_NAME " " PACKAGE_VERSION ")\n"
   920           COPYRIGHT "\n"
   921           "This program is distributed under the terms in ETAGS.README\n"),
   922          stdout);
   923   exit (EXIT_SUCCESS);
   924 }
   925 
   926 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
   927 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
   928 #endif
   929 
   930 static _Noreturn void
   931 print_help (argument *argbuffer)
   932 {
   933   bool help_for_lang = false;
   934 
   935   for (; argbuffer->arg_type != at_end; argbuffer++)
   936     if (argbuffer->arg_type == at_language)
   937       {
   938         if (help_for_lang)
   939           puts ("");
   940         puts (argbuffer->lang->help);
   941         help_for_lang = true;
   942       }
   943 
   944   if (help_for_lang)
   945     exit (EXIT_SUCCESS);
   946 
   947   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
   948 \n\
   949 These are the options accepted by %s.\n", progname, progname);
   950   puts ("You may use unambiguous abbreviations for the long option names.");
   951   puts ("  A - as file name means read names from stdin (one per line).\n\
   952 Absolute names are stored in the output file as they are.\n\
   953 Relative ones are stored relative to the output file's directory.\n");
   954 
   955   puts ("-a, --append\n\
   956         Append tag entries to existing tags file.");
   957 
   958   puts ("--packages-only\n\
   959         For Ada files, only generate tags for packages.");
   960 
   961   if (CTAGS)
   962     puts ("-B, --backward-search\n\
   963         Write the search commands for the tag entries using '?', the\n\
   964         backward-search command instead of '/', the forward-search command.");
   965 
   966   /* This option is mostly obsolete, because etags can now automatically
   967      detect C++.  Retained for backward compatibility and for debugging and
   968      experimentation.  In principle, we could want to tag as C++ even
   969      before any "class" or "template" keyword.
   970   puts ("-C, --c++\n\
   971         Treat files whose name suffix defaults to C language as C++ files.");
   972   */
   973 
   974   puts ("--declarations\n\
   975         In C and derived languages, create tags for function declarations,");
   976   if (CTAGS)
   977     puts ("\tand create tags for extern variables if --globals is used.");
   978   else
   979     puts
   980       ("\tand create tags for extern variables unless --no-globals is used.");
   981 
   982   puts ("\tIn Mercury, tag both declarations starting a line with ':-' and\n\
   983         first predicates or functions in clauses.");
   984 
   985   if (CTAGS)
   986     puts ("-d, --defines\n\
   987         Create tag entries for C #define constants and enum constants, too.");
   988   else
   989     puts ("-D, --no-defines\n\
   990         Don't create tag entries for C #define constants and enum constants.\n\
   991         This makes the tags file smaller.");
   992 
   993   if (!CTAGS)
   994     puts ("-i FILE, --include=FILE\n\
   995         Include a note in tag file indicating that, when searching for\n\
   996         a tag, one should also consult the tags file FILE after\n\
   997         checking the current file.");
   998 
   999   puts ("-l LANG, --language=LANG\n\
  1000         Force the following files to be considered as written in the\n\
  1001         named language up to the next --language=LANG option.");
  1002 
  1003   if (CTAGS)
  1004     puts ("--globals\n\
  1005         Create tag entries for global variables in some languages.");
  1006   else
  1007     puts ("--no-globals\n\
  1008         Do not create tag entries for global variables in some\n\
  1009         languages.  This makes the tags file smaller.");
  1010 
  1011   puts ("--no-line-directive\n\
  1012         Ignore #line preprocessor directives in C and derived languages.");
  1013 
  1014   if (CTAGS)
  1015     puts ("--members\n\
  1016         Create tag entries for members of structures in some languages.");
  1017   else
  1018     puts ("--no-members\n\
  1019         Do not create tag entries for members of structures\n\
  1020         in some languages.");
  1021 
  1022   puts ("-Q, --class-qualify\n\
  1023         Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
  1024         This produces tag names of the form \"class::member\" for C++,\n\
  1025         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
  1026         For Objective C, this also produces class methods qualified with\n\
  1027         their arguments, as in \"foo:bar:baz:more\".\n\
  1028         For Perl, this produces \"package::member\".");
  1029   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
  1030         Make a tag for each line matching a regular expression pattern\n\
  1031         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
  1032         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
  1033         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
  1034         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
  1035   puts ("       If TAGNAME/ is present, the tags created are named.\n\
  1036         For example Tcl named tags can be created with:\n\
  1037           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
  1038         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
  1039         'm' means to allow multi-line matches, 's' implies 'm' and\n\
  1040         causes dot to match any character, including newline.");
  1041 
  1042   puts ("-R, --no-regex\n\
  1043         Don't create tags from regexps for the following files.");
  1044 
  1045   puts ("-I, --ignore-indentation\n\
  1046         In C and C++ do not assume that a closing brace in the first\n\
  1047         column is the final brace of a function or structure definition.");
  1048 
  1049   puts ("-o FILE, --output=FILE\n\
  1050         Write the tags to FILE.");
  1051 
  1052   puts ("--parse-stdin=NAME\n\
  1053         Read from standard input and record tags as belonging to file NAME.");
  1054 
  1055   if (CTAGS)
  1056     {
  1057       puts ("-t, --typedefs\n\
  1058         Generate tag entries for C and Ada typedefs.");
  1059       puts ("-T, --typedefs-and-c++\n\
  1060         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
  1061         and C++ member functions.");
  1062     }
  1063 
  1064   if (CTAGS)
  1065     puts ("-u, --update\n\
  1066         Update the tag entries for the given files, leaving tag\n\
  1067         entries for other files in place.  Currently, this is\n\
  1068         implemented by deleting the existing entries for the given\n\
  1069         files and then rewriting the new entries at the end of the\n\
  1070         tags file.  It is often faster to simply rebuild the entire\n\
  1071         tag file than to use this.");
  1072 
  1073   if (CTAGS)
  1074     {
  1075       puts ("-v, --vgrind\n\
  1076         Print on the standard output an index of items intended for\n\
  1077         human consumption, similar to the output of vgrind.  The index\n\
  1078         is sorted, and gives the page number of each item.");
  1079 
  1080       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
  1081         puts ("-w, --no-duplicates\n\
  1082         Do not create duplicate tag entries, for compatibility with\n\
  1083         traditional ctags.");
  1084 
  1085       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
  1086         puts ("-w, --no-warn\n\
  1087         Suppress warning messages about duplicate tag entries.");
  1088 
  1089       puts ("-x, --cxref\n\
  1090         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
  1091         The output uses line numbers instead of page numbers, but\n\
  1092         beyond that the differences are cosmetic; try both to see\n\
  1093         which you like.");
  1094     }
  1095 
  1096   puts ("-V, --version\n\
  1097         Print the version of the program.\n\
  1098 -h, --help\n\
  1099         Print this help message.\n\
  1100         Followed by one or more '--language' options prints detailed\n\
  1101         help about tag generation for the specified languages.");
  1102 
  1103   print_language_names ();
  1104 
  1105   puts ("");
  1106   puts ("Report bugs to bug-gnu-emacs@gnu.org");
  1107 
  1108   exit (EXIT_SUCCESS);
  1109 }
  1110 
  1111 
  1112 int
  1113 main (int argc, char **argv)
  1114 {
  1115   int i;
  1116   int nincluded_files;
  1117   char **included_files;
  1118   argument *argbuffer;
  1119   int current_arg, file_count;
  1120   linebuffer filename_lb;
  1121   bool help_asked = false;
  1122   ptrdiff_t len;
  1123   char *optstring;
  1124   int opt;
  1125 
  1126   progname = argv[0];
  1127   nincluded_files = 0;
  1128   included_files = xnew (argc, char *);
  1129   current_arg = 0;
  1130   file_count = 0;
  1131 
  1132   /* Allocate enough no matter what happens.  Overkill, but each one
  1133      is small. */
  1134   argbuffer = xnew (argc, argument);
  1135 
  1136   /*
  1137    * Always find typedefs and structure tags.
  1138    * Also default to find macro constants, enum constants, struct
  1139    * members and global variables.  Do it for both etags and ctags.
  1140    */
  1141   typedefs = typedefs_or_cplusplus = constantypedefs = true;
  1142   globals = members = true;
  1143 
  1144   /* When the optstring begins with a '-' getopt_long does not rearrange the
  1145      non-options arguments to be at the end, but leaves them alone. */
  1146   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
  1147                       (CTAGS) ? "BxdtTuvw" : "Di:",
  1148                       "");
  1149 
  1150   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
  1151     switch (opt)
  1152       {
  1153       case 0:
  1154         /* If getopt returns 0, then it has already processed a
  1155            long-named option.  We should do nothing.  */
  1156         break;
  1157 
  1158       case 1:
  1159         /* This means that a file name has been seen.  Record it. */
  1160         argbuffer[current_arg].arg_type = at_filename;
  1161         argbuffer[current_arg].what     = optarg;
  1162         len = strlen (optarg);
  1163         if (whatlen_max < len)
  1164           whatlen_max = len;
  1165         ++current_arg;
  1166         ++file_count;
  1167         break;
  1168 
  1169       case STDIN:
  1170         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
  1171         argbuffer[current_arg].arg_type = at_stdin;
  1172         argbuffer[current_arg].what     = optarg;
  1173         len = strlen (optarg);
  1174         if (whatlen_max < len)
  1175           whatlen_max = len;
  1176         ++current_arg;
  1177         ++file_count;
  1178         if (parsing_stdin)
  1179           fatal ("cannot parse standard input more than once");
  1180         parsing_stdin = true;
  1181         break;
  1182 
  1183         /* Common options. */
  1184       case 'a': append_to_tagfile = true;       break;
  1185       case 'C': cplusplus = true;               break;
  1186       case 'f':         /* for compatibility with old makefiles */
  1187       case 'o':
  1188         if (tagfile)
  1189           {
  1190             error ("-o option may only be given once.");
  1191             suggest_asking_for_help ();
  1192           }
  1193         tagfile = optarg;
  1194         break;
  1195       case 'I':
  1196       case 'S':         /* for backward compatibility */
  1197         ignoreindent = true;
  1198         break;
  1199       case 'l':
  1200         {
  1201           language *lang = get_language_from_langname (optarg);
  1202           if (lang != NULL)
  1203             {
  1204               argbuffer[current_arg].lang = lang;
  1205               argbuffer[current_arg].arg_type = at_language;
  1206               ++current_arg;
  1207             }
  1208         }
  1209         break;
  1210       case 'c':
  1211         /* Backward compatibility: support obsolete --ignore-case-regexp. */
  1212         optarg = concat (optarg, "i", ""); /* memory leak here */
  1213         FALLTHROUGH;
  1214       case 'r':
  1215         argbuffer[current_arg].arg_type = at_regexp;
  1216         argbuffer[current_arg].what = optarg;
  1217         len = strlen (optarg);
  1218         if (whatlen_max < len)
  1219           whatlen_max = len;
  1220         ++current_arg;
  1221         break;
  1222       case 'R':
  1223         argbuffer[current_arg].arg_type = at_regexp;
  1224         argbuffer[current_arg].what = NULL;
  1225         ++current_arg;
  1226         break;
  1227       case 'V':
  1228         print_version ();
  1229         break;
  1230       case 'h':
  1231       case 'H':
  1232         help_asked = true;
  1233         break;
  1234       case 'Q':
  1235         class_qualify = 1;
  1236         break;
  1237 
  1238         /* Etags options */
  1239       case 'D': constantypedefs = false;                        break;
  1240       case 'i': included_files[nincluded_files++] = optarg;     break;
  1241 
  1242         /* Ctags options. */
  1243       case 'B': searchar = '?';                                 break;
  1244       case 'd': constantypedefs = true;                         break;
  1245       case 't': typedefs = true;                                break;
  1246       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
  1247       case 'u': update = true;                                  break;
  1248       case 'v': vgrind_style = true;                            FALLTHROUGH;
  1249       case 'x': cxref_style = true;                             break;
  1250       case 'w': no_warnings = true;                             break;
  1251       default:
  1252         suggest_asking_for_help ();
  1253       }
  1254 
  1255   /* No more options.  Store the rest of arguments. */
  1256   for (; optind < argc; optind++)
  1257     {
  1258       argbuffer[current_arg].arg_type = at_filename;
  1259       argbuffer[current_arg].what = argv[optind];
  1260       len = strlen (argv[optind]);
  1261       if (whatlen_max < len)
  1262         whatlen_max = len;
  1263       ++current_arg;
  1264       ++file_count;
  1265     }
  1266 
  1267   argbuffer[current_arg].arg_type = at_end;
  1268 
  1269   if (help_asked)
  1270     print_help (argbuffer);
  1271 
  1272   if (nincluded_files == 0 && file_count == 0)
  1273     {
  1274       error ("no input files specified.");
  1275       suggest_asking_for_help ();
  1276     }
  1277 
  1278   if (tagfile == NULL)
  1279     tagfile = savestr (CTAGS ? "tags" : "TAGS");
  1280   cwd = etags_getcwd ();        /* the current working directory */
  1281   if (cwd[strlen (cwd) - 1] != '/')
  1282     {
  1283       char *oldcwd = cwd;
  1284       cwd = concat (oldcwd, "/", "");
  1285       free (oldcwd);
  1286     }
  1287 
  1288   /* Compute base directory for relative file names. */
  1289   if (streq (tagfile, "-")
  1290       || strneq (tagfile, "/dev/", 5))
  1291     tagfiledir = cwd;            /* relative file names are relative to cwd */
  1292   else
  1293     {
  1294       canonicalize_filename (tagfile);
  1295       tagfiledir = absolute_dirname (tagfile, cwd);
  1296     }
  1297 
  1298   linebuffer_init (&lb);
  1299   linebuffer_init (&filename_lb);
  1300   linebuffer_init (&filebuf);
  1301   linebuffer_init (&token_name);
  1302 
  1303   if (!CTAGS)
  1304     {
  1305       if (streq (tagfile, "-"))
  1306         {
  1307           tagf = stdout;
  1308           set_binary_mode (STDOUT_FILENO, O_BINARY);
  1309         }
  1310       else
  1311         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
  1312       if (tagf == NULL)
  1313         pfatal (tagfile);
  1314     }
  1315 
  1316   /*
  1317    * Loop through files finding functions.
  1318    */
  1319   for (i = 0; i < current_arg; i++)
  1320     {
  1321       static language *lang;    /* non-NULL if language is forced */
  1322       char *this_file;
  1323 
  1324       switch (argbuffer[i].arg_type)
  1325         {
  1326         case at_language:
  1327           lang = argbuffer[i].lang;
  1328           break;
  1329         case at_regexp:
  1330           analyze_regex (argbuffer[i].what);
  1331           break;
  1332         case at_filename:
  1333               this_file = argbuffer[i].what;
  1334               /* Input file named "-" means read file names from stdin
  1335                  (one per line) and use them. */
  1336               if (streq (this_file, "-"))
  1337                 {
  1338                   if (parsing_stdin)
  1339                     fatal ("cannot parse standard input "
  1340                            "AND read file names from it");
  1341                   while (readline_internal (&filename_lb, stdin, "-", false) > 0)
  1342                     process_file_name (filename_lb.buffer, lang);
  1343                 }
  1344               else
  1345                 process_file_name (this_file, lang);
  1346           break;
  1347         case at_stdin:
  1348           this_file = argbuffer[i].what;
  1349           process_file (stdin, this_file, lang);
  1350           break;
  1351         default:
  1352           error ("internal error: arg_type");
  1353         }
  1354     }
  1355 
  1356   free_regexps ();
  1357   free (lb.buffer);
  1358   free (filebuf.buffer);
  1359   free (token_name.buffer);
  1360 
  1361   if (!CTAGS || cxref_style)
  1362     {
  1363       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
  1364       put_entries (nodehead);
  1365       free_tree (nodehead);
  1366       nodehead = NULL;
  1367       if (!CTAGS)
  1368         {
  1369           fdesc *fdp;
  1370 
  1371           /* Output file entries that have no tags. */
  1372           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  1373             if (!fdp->written)
  1374               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
  1375 
  1376           while (nincluded_files-- > 0)
  1377             fprintf (tagf, "\f\n%s,include\n", *included_files++);
  1378 
  1379           if (fclose (tagf) == EOF)
  1380             pfatal (tagfile);
  1381         }
  1382 
  1383       return EXIT_SUCCESS;
  1384     }
  1385 
  1386   /* From here on, we are in (CTAGS && !cxref_style) */
  1387   if (update)
  1388     {
  1389       for (i = 0; i < current_arg; ++i)
  1390         {
  1391           switch (argbuffer[i].arg_type)
  1392             {
  1393             case at_filename:
  1394             case at_stdin:
  1395               break;
  1396             default:
  1397               continue;         /* the for loop */
  1398             }
  1399           cleanup_tags_file (tagfile, argbuffer[i].what);
  1400         }
  1401       append_to_tagfile = true;
  1402     }
  1403 
  1404   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
  1405   if (tagf == NULL)
  1406     pfatal (tagfile);
  1407   put_entries (nodehead);       /* write all the tags (CTAGS) */
  1408   free_tree (nodehead);
  1409   nodehead = NULL;
  1410   if (fclose (tagf) == EOF)
  1411     pfatal (tagfile);
  1412 
  1413   if (CTAGS)
  1414     if (append_to_tagfile || update)
  1415       {
  1416         /* Maybe these should be used:
  1417            setenv ("LC_COLLATE", "C", 1);
  1418            setenv ("LC_ALL", "C", 1); */
  1419         char *cmd = xmalloc (8 * strlen (tagfile) + sizeof "sort -u -o '' ''");
  1420 #if defined WINDOWSNT || MSDOS
  1421         /* Quote "like this".  No need to escape the quotes in the file name,
  1422            since it is not allowed in file names on these systems.  */
  1423         char *z = stpcpy (cmd, "sort -u -o \"");
  1424         z = stpcpy (z, tagfile);
  1425         z = stpcpy (z, "\" \"");
  1426         z = stpcpy (z, tagfile);
  1427         stpcpy (z, "\"");
  1428 #else
  1429         /* Quote 'like this', and escape the apostrophe in the file name.  */
  1430         char *z = stpcpy (cmd, "sort -u -o '");
  1431         char *escaped_tagfile = z;
  1432         for (; *tagfile; *z++ = *tagfile++)
  1433           if (*tagfile == '\'')
  1434             z = stpcpy (z, "'\\'");
  1435         ptrdiff_t escaped_tagfile_len = z - escaped_tagfile;
  1436         z = stpcpy (z, "' '");
  1437         z = mempcpy (z, escaped_tagfile, escaped_tagfile_len);
  1438         strcpy (z, "'");
  1439 #endif
  1440         return system (cmd);
  1441       }
  1442   return EXIT_SUCCESS;
  1443 }
  1444 
  1445 /*
  1446  * Equivalent to: mv tags OTAGS;grep -Fv ' filename ' OTAGS >tags;rm OTAGS
  1447  */
  1448 static void
  1449 cleanup_tags_file (const char* tagfile, const char* match_file_name)
  1450 {
  1451   FILE *otags_f = fopen ("OTAGS", "wb");
  1452   FILE *tag_f = fopen (tagfile, "rb");
  1453 
  1454   if (otags_f == NULL)
  1455     pfatal ("OTAGS");
  1456 
  1457   if (tag_f == NULL)
  1458     pfatal (tagfile);
  1459 
  1460   int buf_len = strlen (match_file_name) + sizeof ("\t\t ") + 1;
  1461   char *buf = xmalloc (buf_len);
  1462   snprintf (buf, buf_len, "\t%s\t", match_file_name);
  1463 
  1464   linebuffer line;
  1465   linebuffer_init (&line);
  1466   while (readline_internal (&line, tag_f, tagfile, true) > 0)
  1467     {
  1468       if (ferror (tag_f))
  1469         pfatal (tagfile);
  1470 
  1471       if (strstr (line.buffer, buf) == NULL)
  1472         {
  1473           fprintf (otags_f, "%s\n", line.buffer);
  1474           if (ferror (tag_f))
  1475             pfatal (tagfile);
  1476         }
  1477     }
  1478   free (buf);
  1479   free (line.buffer);
  1480 
  1481   if (fclose (otags_f) == EOF)
  1482     pfatal ("OTAGS");
  1483 
  1484   if (fclose (tag_f) == EOF)
  1485     pfatal (tagfile);
  1486 
  1487   do_move_file ("OTAGS", tagfile);
  1488   return;
  1489 }
  1490 
  1491 /*
  1492  * Return a compressor given the file name.  If EXTPTR is non-zero,
  1493  * return a pointer into FILE where the compressor-specific
  1494  * extension begins.  If no compressor is found, NULL is returned
  1495  * and EXTPTR is not significant.
  1496  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
  1497  */
  1498 static compressor *
  1499 get_compressor_from_suffix (char *file, char **extptr)
  1500 {
  1501   compressor *compr;
  1502   char *slash, *suffix;
  1503 
  1504   /* File has been processed by canonicalize_filename,
  1505      so we don't need to consider backslashes on DOS_NT.  */
  1506   slash = strrchr (file, '/');
  1507   suffix = strrchr (file, '.');
  1508   if (suffix == NULL || suffix < slash)
  1509     return NULL;
  1510   if (extptr != NULL)
  1511     *extptr = suffix;
  1512   suffix += 1;
  1513   /* Let those poor souls who live with DOS 8+3 file name limits get
  1514      some solace by treating foo.cgz as if it were foo.c.gz, etc.
  1515      Only the first do loop is run if not MSDOS */
  1516   do
  1517     {
  1518       for (compr = compressors; compr->suffix != NULL; compr++)
  1519         if (streq (compr->suffix, suffix))
  1520           return compr;
  1521       if (!MSDOS)
  1522         break;                  /* do it only once: not really a loop */
  1523       if (extptr != NULL)
  1524         *extptr = ++suffix;
  1525     } while (*suffix != '\0');
  1526   return NULL;
  1527 }
  1528 
  1529 
  1530 
  1531 /*
  1532  * Return a language given the name.
  1533  */
  1534 static language *
  1535 get_language_from_langname (const char *name)
  1536 {
  1537   language *lang;
  1538 
  1539   if (name == NULL)
  1540     error ("empty language name");
  1541   else
  1542     {
  1543       for (lang = lang_names; lang->name != NULL; lang++)
  1544         if (streq (name, lang->name))
  1545           return lang;
  1546       error ("unknown language \"%s\"", name);
  1547     }
  1548 
  1549   return NULL;
  1550 }
  1551 
  1552 
  1553 /*
  1554  * Return a language given the interpreter name.
  1555  */
  1556 static language *
  1557 get_language_from_interpreter (char *interpreter)
  1558 {
  1559   language *lang;
  1560   const char **iname;
  1561 
  1562   if (interpreter == NULL)
  1563     return NULL;
  1564   for (lang = lang_names; lang->name != NULL; lang++)
  1565     if (lang->interpreters != NULL)
  1566       for (iname = lang->interpreters; *iname != NULL; iname++)
  1567         if (streq (*iname, interpreter))
  1568             return lang;
  1569 
  1570   return NULL;
  1571 }
  1572 
  1573 
  1574 
  1575 /*
  1576  * Return a language given the file name.
  1577  */
  1578 static language *
  1579 get_language_from_filename (char *file, bool case_sensitive)
  1580 {
  1581   language *lang;
  1582   const char **name, **ext, *suffix;
  1583   char *slash;
  1584 
  1585   /* Try whole file name first. */
  1586   slash = strrchr (file, '/');
  1587   if (slash != NULL)
  1588     file = slash + 1;
  1589 #ifdef DOS_NT
  1590   else if (file[0] && file[1] == ':')
  1591     file += 2;
  1592 #endif
  1593   for (lang = lang_names; lang->name != NULL; lang++)
  1594     if (lang->filenames != NULL)
  1595       for (name = lang->filenames; *name != NULL; name++)
  1596         if ((case_sensitive)
  1597             ? streq (*name, file)
  1598             : strcaseeq (*name, file))
  1599           return lang;
  1600 
  1601   /* If not found, try suffix after last dot. */
  1602   suffix = strrchr (file, '.');
  1603   if (suffix == NULL)
  1604     return NULL;
  1605   suffix += 1;
  1606   for (lang = lang_names; lang->name != NULL; lang++)
  1607     if (lang->suffixes != NULL)
  1608       for (ext = lang->suffixes; *ext != NULL; ext++)
  1609         if ((case_sensitive)
  1610             ? streq (*ext, suffix)
  1611             : strcaseeq (*ext, suffix))
  1612           return lang;
  1613   return NULL;
  1614 }
  1615 
  1616 
  1617 /*
  1618  * This routine is called on each file argument.
  1619  */
  1620 static void
  1621 process_file_name (char *file, language *lang)
  1622 {
  1623   FILE *inf;
  1624   fdesc *fdp;
  1625   compressor *compr;
  1626   char *compressed_name, *uncompressed_name;
  1627   char *ext, *real_name UNINIT, *tmp_name UNINIT;
  1628   int retval;
  1629 
  1630   canonicalize_filename (file);
  1631   if (streq (file, tagfile) && !streq (tagfile, "-"))
  1632     {
  1633       error ("skipping inclusion of %s in self.", file);
  1634       return;
  1635     }
  1636   compr = get_compressor_from_suffix (file, &ext);
  1637   if (compr)
  1638     {
  1639       compressed_name = file;
  1640       uncompressed_name = savenstr (file, ext - file);
  1641     }
  1642   else
  1643     {
  1644       compressed_name = NULL;
  1645       uncompressed_name = file;
  1646     }
  1647 
  1648   /* If the canonicalized uncompressed name
  1649      has already been dealt with, skip it silently. */
  1650   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  1651     {
  1652       assert (fdp->infname != NULL);
  1653       if (streq (uncompressed_name, fdp->infname))
  1654         goto cleanup;
  1655     }
  1656 
  1657   inf = fopen (file, "r" FOPEN_BINARY);
  1658   if (inf)
  1659     real_name = file;
  1660   else
  1661     {
  1662       int file_errno = errno;
  1663       if (compressed_name)
  1664         {
  1665           /* Try with the given suffix.  */
  1666           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
  1667           if (inf)
  1668             real_name = uncompressed_name;
  1669         }
  1670       else
  1671         {
  1672           /* Try all possible suffixes.  */
  1673           for (compr = compressors; compr->suffix != NULL; compr++)
  1674             {
  1675               compressed_name = concat (file, ".", compr->suffix);
  1676               inf = fopen (compressed_name, "r" FOPEN_BINARY);
  1677               if (inf)
  1678                 {
  1679                   real_name = compressed_name;
  1680                   break;
  1681                 }
  1682               if (MSDOS)
  1683                 {
  1684                   char *suf = compressed_name + strlen (file);
  1685                   size_t suflen = strlen (compr->suffix) + 1;
  1686                   for ( ; suf[1]; suf++, suflen--)
  1687                     {
  1688                       memmove (suf, suf + 1, suflen);
  1689                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
  1690                       if (inf)
  1691                         {
  1692                           real_name = compressed_name;
  1693                           break;
  1694                         }
  1695                     }
  1696                   if (inf)
  1697                     break;
  1698                 }
  1699               free (compressed_name);
  1700               compressed_name = NULL;
  1701             }
  1702         }
  1703       if (! inf)
  1704         {
  1705           errno = file_errno;
  1706           perror (file);
  1707           goto cleanup;
  1708         }
  1709     }
  1710 
  1711   if (real_name == compressed_name)
  1712     {
  1713       fclose (inf);
  1714       tmp_name = etags_mktmp ();
  1715       if (!tmp_name)
  1716         inf = NULL;
  1717       else
  1718         {
  1719 #if MSDOS || defined (DOS_NT)
  1720           int buf_len =
  1721             strlen (compr->command)
  1722             + strlen (" \"\" > \"\"") + strlen (real_name)
  1723             + strlen (tmp_name) + 1;
  1724           char *cmd = xmalloc (buf_len);
  1725           snprintf (cmd, buf_len, "%s \"%s\" > \"%s\"",
  1726                     compr->command, real_name, tmp_name);
  1727 #else
  1728           char *new_real_name = escape_shell_arg_string (real_name);
  1729           char *new_tmp_name = escape_shell_arg_string (tmp_name);
  1730           int buf_len =
  1731             strlen (compr->command) + strlen ("  > ") + strlen (new_real_name)
  1732             + strlen (new_tmp_name) + 1;
  1733           char *cmd = xmalloc (buf_len);
  1734           snprintf (cmd, buf_len, "%s %s > %s",
  1735                     compr->command, new_real_name, new_tmp_name);
  1736           free (new_real_name);
  1737           free (new_tmp_name);
  1738 #endif
  1739           inf = (system (cmd) == -1
  1740                  ? NULL
  1741                  : fopen (tmp_name, "r" FOPEN_BINARY));
  1742           free (cmd);
  1743         }
  1744 
  1745       if (!inf)
  1746         {
  1747           perror (real_name);
  1748           goto cleanup;
  1749         }
  1750     }
  1751 
  1752   process_file (inf, uncompressed_name, lang);
  1753 
  1754   retval = fclose (inf);
  1755   if (real_name == compressed_name)
  1756     {
  1757       remove (tmp_name);
  1758       free (tmp_name);
  1759     }
  1760   if (retval < 0)
  1761     pfatal (file);
  1762 
  1763  cleanup:
  1764   if (compressed_name != file)
  1765     free (compressed_name);
  1766   if (uncompressed_name != file)
  1767     free (uncompressed_name);
  1768   last_node = NULL;
  1769   curfdp = NULL;
  1770   return;
  1771 }
  1772 
  1773 static void
  1774 process_file (FILE *fh, char *fn, language *lang)
  1775 {
  1776   static const fdesc emptyfdesc;
  1777   fdesc *fdp;
  1778 
  1779   infilename = fn;
  1780   /* Create a new input file description entry. */
  1781   fdp = xnew (1, fdesc);
  1782   *fdp = emptyfdesc;
  1783   fdp->next = fdhead;
  1784   fdp->infname = savestr (fn);
  1785   fdp->lang = lang;
  1786   fdp->infabsname = absolute_filename (fn, cwd);
  1787   fdp->infabsdir = absolute_dirname (fn, cwd);
  1788   if (filename_is_absolute (fn))
  1789     {
  1790       /* An absolute file name.  Canonicalize it. */
  1791       fdp->taggedfname = absolute_filename (fn, NULL);
  1792     }
  1793   else
  1794     {
  1795       /* A file name relative to cwd.  Make it relative
  1796          to the directory of the tags file. */
  1797       fdp->taggedfname = relative_filename (fn, tagfiledir);
  1798     }
  1799   fdp->usecharno = true;        /* use char position when making tags */
  1800   fdp->prop = NULL;
  1801   fdp->written = false;         /* not written on tags file yet */
  1802 
  1803   fdhead = fdp;
  1804   curfdp = fdhead;              /* the current file description */
  1805 
  1806   find_entries (fh);
  1807 
  1808   /* If not Ctags, and if this is not metasource and if it contained no #line
  1809      directives, we can write the tags and free all nodes pointing to
  1810      curfdp. */
  1811   if (!CTAGS
  1812       && curfdp->usecharno      /* no #line directives in this file */
  1813       && !curfdp->lang->metasource)
  1814     {
  1815       node *np, *prev;
  1816 
  1817       /* Look for the head of the sublist relative to this file.  See add_node
  1818          for the structure of the node tree. */
  1819       prev = NULL;
  1820       for (np = nodehead; np != NULL; prev = np, np = np->left)
  1821         if (np->fdp == curfdp)
  1822           break;
  1823 
  1824       /* If we generated tags for this file, write and delete them. */
  1825       if (np != NULL)
  1826         {
  1827           /* This is the head of the last sublist, if any.  The following
  1828              instructions depend on this being true. */
  1829           assert (np->left == NULL);
  1830 
  1831           assert (fdhead == curfdp);
  1832           assert (last_node->fdp == curfdp);
  1833           put_entries (np);     /* write tags for file curfdp->taggedfname */
  1834           free_tree (np);       /* remove the written nodes */
  1835           if (prev == NULL)
  1836             nodehead = NULL;    /* no nodes left */
  1837           else
  1838             prev->left = NULL;  /* delete the pointer to the sublist */
  1839         }
  1840     }
  1841 }
  1842 
  1843 static void
  1844 reset_input (FILE *inf)
  1845 {
  1846   if (fseek (inf, 0, SEEK_SET) != 0)
  1847     perror (infilename);
  1848 }
  1849 
  1850 /*
  1851  * This routine opens the specified file and calls the function
  1852  * which finds the function and type definitions.
  1853  */
  1854 static void
  1855 find_entries (FILE *inf)
  1856 {
  1857   char *cp;
  1858   language *lang = curfdp->lang;
  1859   Lang_function *parser = NULL;
  1860 
  1861   /* If user specified a language, use it. */
  1862   if (lang != NULL && lang->function != NULL)
  1863     {
  1864       parser = lang->function;
  1865     }
  1866 
  1867   /* Else try to guess the language given the file name. */
  1868   if (parser == NULL)
  1869     {
  1870       lang = get_language_from_filename (curfdp->infname, true);
  1871 
  1872       /* Disambiguate file names between Objc and Mercury. */
  1873       if (lang != NULL && strcmp (lang->name, "objc") == 0)
  1874         test_objc_is_mercury (curfdp->infname, &lang);
  1875 
  1876       if (lang != NULL && lang->function != NULL)
  1877         {
  1878           curfdp->lang = lang;
  1879           parser = lang->function;
  1880         }
  1881     }
  1882 
  1883   /* Else look for sharp-bang as the first two characters. */
  1884   if (parser == NULL
  1885       && readline_internal (&lb, inf, infilename, false) > 0
  1886       && lb.len >= 2
  1887       && lb.buffer[0] == '#'
  1888       && lb.buffer[1] == '!')
  1889     {
  1890       char *lp;
  1891 
  1892       /* Set lp to point at the first char after the last slash in the
  1893          line or, if no slashes, at the first nonblank.  Then set cp to
  1894          the first successive blank and terminate the string. */
  1895       lp = strrchr (lb.buffer+2, '/');
  1896       if (lp != NULL)
  1897         lp += 1;
  1898       else
  1899         lp = skip_spaces (lb.buffer + 2);
  1900       cp = skip_non_spaces (lp);
  1901       /* If the "interpreter" turns out to be "env", the real interpreter is
  1902          the next word.  */
  1903       if (cp > lp && strneq (lp, "env", cp - lp))
  1904         {
  1905           lp = skip_spaces (cp);
  1906           cp = skip_non_spaces (lp);
  1907         }
  1908       *cp = '\0';
  1909 
  1910       if (*lp)
  1911         {
  1912           lang = get_language_from_interpreter (lp);
  1913           if (lang != NULL && lang->function != NULL)
  1914             {
  1915               curfdp->lang = lang;
  1916               parser = lang->function;
  1917             }
  1918         }
  1919     }
  1920 
  1921   reset_input (inf);
  1922 
  1923   /* Else try to guess the language given the case insensitive file name. */
  1924   if (parser == NULL)
  1925     {
  1926       lang = get_language_from_filename (curfdp->infname, false);
  1927       if (lang != NULL && lang->function != NULL)
  1928         {
  1929           curfdp->lang = lang;
  1930           parser = lang->function;
  1931         }
  1932     }
  1933 
  1934   /* Else try Fortran or C. */
  1935   if (parser == NULL)
  1936     {
  1937       node *old_last_node = last_node;
  1938 
  1939       curfdp->lang = get_language_from_langname ("fortran");
  1940       find_entries (inf);
  1941 
  1942       if (old_last_node == last_node)
  1943         /* No Fortran entries found.  Try C. */
  1944         {
  1945           reset_input (inf);
  1946           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
  1947           find_entries (inf);
  1948         }
  1949       return;
  1950     }
  1951 
  1952   if (!no_line_directive
  1953       && curfdp->lang != NULL && curfdp->lang->metasource)
  1954     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
  1955        file, or anyway we parsed a file that is automatically generated from
  1956        this one.  If this is the case, the bingo.c file contained #line
  1957        directives that generated tags pointing to this file.  Let's delete
  1958        them all before parsing this file, which is the real source. */
  1959     {
  1960       fdesc **fdpp = &fdhead;
  1961       while (*fdpp != NULL)
  1962         if (*fdpp != curfdp
  1963             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
  1964           /* We found one of those!  We must delete both the file description
  1965              and all tags referring to it. */
  1966           {
  1967             fdesc *badfdp = *fdpp;
  1968 
  1969             /* Delete the tags referring to badfdp->taggedfname
  1970                that were obtained from badfdp->infname. */
  1971             invalidate_nodes (badfdp, &nodehead);
  1972 
  1973             *fdpp = badfdp->next; /* remove the bad description from the list */
  1974             free_fdesc (badfdp);
  1975           }
  1976         else
  1977           fdpp = &(*fdpp)->next; /* advance the list pointer */
  1978     }
  1979 
  1980   assert (parser != NULL);
  1981 
  1982   /* Generic initializations before reading from file. */
  1983   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
  1984 
  1985   /* Generic initializations before parsing file with readline. */
  1986   lineno = 0;                  /* reset global line number */
  1987   charno = 0;                  /* reset global char number */
  1988   linecharno = 0;              /* reset global char number of line start */
  1989 
  1990   parser (inf);
  1991 
  1992   regex_tag_multiline ();
  1993 }
  1994 
  1995 
  1996 /*
  1997  * Check whether an implicitly named tag should be created,
  1998  * then call `pfnote'.
  1999  * NAME is a string that is internally copied by this function.
  2000  *
  2001  * TAGS format specification
  2002  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
  2003  * The following is explained in some more detail in etc/ETAGS.EBNF.
  2004  *
  2005  * make_tag creates tags with "implicit tag names" (unnamed tags)
  2006  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
  2007  *  1. NAME does not contain any of the characters in NONAM;
  2008  *  2. LINESTART contains name as either a rightmost, or rightmost but
  2009  *     one character, substring;
  2010  *  3. the character, if any, immediately before NAME in LINESTART must
  2011  *     be a character in NONAM;
  2012  *  4. the character, if any, immediately after NAME in LINESTART must
  2013  *     also be a character in NONAM.
  2014  *
  2015  * The implementation uses the notinname() macro, which recognizes the
  2016  * characters stored in the string `nonam'.
  2017  * etags.el needs to use the same characters that are in NONAM.
  2018  */
  2019 static void
  2020 make_tag (const char *name,     /* tag name, or NULL if unnamed */
  2021           ptrdiff_t namelen,    /* tag length */
  2022           bool is_func,         /* tag is a function */
  2023           char *linestart,      /* start of the line where tag is */
  2024           ptrdiff_t linelen,    /* length of the line where tag is */
  2025           intmax_t lno,         /* line number */
  2026           intmax_t cno)         /* character number */
  2027 {
  2028   bool named = (name != NULL && namelen > 0);
  2029   char *nname = NULL;
  2030 
  2031   if (debug)
  2032     fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  2033              named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
  2034 
  2035   if (!CTAGS && named)          /* maybe set named to false */
  2036     /* Let's try to make an implicit tag name, that is, create an unnamed tag
  2037        such that etags.el can guess a name from it. */
  2038     {
  2039       ptrdiff_t i;
  2040       const char *cp = name;
  2041 
  2042       for (i = 0; i < namelen; i++)
  2043         if (notinname (*cp++))
  2044           break;
  2045       if (i == namelen)                         /* rule #1 */
  2046         {
  2047           cp = linestart + linelen - namelen;
  2048           if (notinname (linestart[linelen-1]))
  2049             cp -= 1;                            /* rule #4 */
  2050           if (cp >= linestart                   /* rule #2 */
  2051               && (cp == linestart
  2052                   || notinname (cp[-1]))        /* rule #3 */
  2053               && strneq (name, cp, namelen))    /* rule #2 */
  2054             named = false;      /* use implicit tag name */
  2055         }
  2056     }
  2057 
  2058   if (named)
  2059     nname = savenstr (name, namelen);
  2060 
  2061   pfnote (nname, is_func, linestart, linelen, lno, cno);
  2062 }
  2063 
  2064 /* Record a tag. */
  2065 static void
  2066 pfnote (char *name,             /* tag name, or NULL if unnamed */
  2067         bool is_func,           /* tag is a function */
  2068         char *linestart,        /* start of the line where tag is */
  2069         ptrdiff_t linelen,      /* length of the line where tag is */
  2070         intmax_t lno,           /* line number */
  2071         intmax_t cno)           /* character number */
  2072 
  2073 {
  2074   register node *np;
  2075 
  2076   if ((CTAGS && name == NULL)
  2077       /* We used to have an assertion here for the case below, but if we hit
  2078          that case, it just means our parser got confused, and there's nothing
  2079          to do about such empty "tags".  */
  2080       || (!CTAGS && name && name[0] == '\0'))
  2081     return;
  2082 
  2083   np = xnew (1, node);
  2084 
  2085   /* If ctags mode, change name "main" to M<thisfilename>. */
  2086   if (CTAGS && !cxref_style && streq (name, "main"))
  2087     {
  2088       char *fp = strrchr (curfdp->taggedfname, '/');
  2089       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
  2090       fp = strrchr (np->name, '.');
  2091       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
  2092         fp[0] = '\0';
  2093     }
  2094   else
  2095     np->name = name;
  2096   np->valid = true;
  2097   np->been_warned = false;
  2098   np->fdp = curfdp;
  2099   np->is_func = is_func;
  2100   np->lno = lno;
  2101   if (np->fdp->usecharno)
  2102     /* Our char numbers are 0-base, because of C language tradition?
  2103        ctags compatibility?  old versions compatibility?   I don't know.
  2104        Anyway, since emacs's are 1-base we expect etags.el to take care
  2105        of the difference.  If we wanted to have 1-based numbers, we would
  2106        uncomment the +1 below. */
  2107     np->cno = cno /* + 1 */ ;
  2108   else
  2109     np->cno = invalidcharno;
  2110   np->left = np->right = NULL;
  2111   if (CTAGS && !cxref_style)
  2112     {
  2113       if (strnlen (linestart, 50) < 50)
  2114         np->regex = concat (linestart, "$", "");
  2115       else
  2116         np->regex = savenstr (linestart, 50);
  2117     }
  2118   else
  2119     np->regex = savenstr (linestart, linelen);
  2120 
  2121   add_node (np, &nodehead);
  2122 }
  2123 
  2124 /*
  2125  * Utility functions and data to avoid recursion.
  2126  */
  2127 
  2128 typedef struct stack_entry {
  2129   node *np;
  2130   struct stack_entry *next;
  2131 } stkentry;
  2132 
  2133 static void
  2134 push_node (node *np, stkentry **stack_top)
  2135 {
  2136   if (np)
  2137     {
  2138       stkentry *new = xnew (1, stkentry);
  2139 
  2140       new->np = np;
  2141       new->next = *stack_top;
  2142       *stack_top = new;
  2143     }
  2144 }
  2145 
  2146 static node *
  2147 pop_node (stkentry **stack_top)
  2148 {
  2149   node *ret = NULL;
  2150 
  2151   if (*stack_top)
  2152     {
  2153       stkentry *old_start = *stack_top;
  2154 
  2155       ret = (*stack_top)->np;
  2156       *stack_top = (*stack_top)->next;
  2157       free (old_start);
  2158     }
  2159   return ret;
  2160 }
  2161 
  2162 /*
  2163  * free_tree ()
  2164  *      emulate recursion on left children, iterate on right children.
  2165  */
  2166 static void
  2167 free_tree (register node *np)
  2168 {
  2169   stkentry *stack = NULL;
  2170 
  2171   while (np)
  2172     {
  2173       /* Descent on left children.  */
  2174       while (np->left)
  2175         {
  2176           push_node (np, &stack);
  2177           np = np->left;
  2178         }
  2179       /* Free node without left children.  */
  2180       node *node_right = np->right;
  2181       free (np->name);
  2182       free (np->regex);
  2183       free (np);
  2184       if (!node_right)
  2185         {
  2186           /* Backtrack to find a node with right children, while freeing nodes
  2187              that don't have right children.  */
  2188           while (node_right == NULL && (np = pop_node (&stack)) != NULL)
  2189             {
  2190               node_right = np->right;
  2191               free (np->name);
  2192               free (np->regex);
  2193               free (np);
  2194             }
  2195         }
  2196       /* Free right children.  */
  2197       np = node_right;
  2198     }
  2199 }
  2200 
  2201 /*
  2202  * free_fdesc ()
  2203  *      delete a file description
  2204  */
  2205 static void
  2206 free_fdesc (register fdesc *fdp)
  2207 {
  2208   free (fdp->infname);
  2209   free (fdp->infabsname);
  2210   free (fdp->infabsdir);
  2211   free (fdp->taggedfname);
  2212   free (fdp->prop);
  2213   free (fdp);
  2214 }
  2215 
  2216 /*
  2217  * add_node ()
  2218  *      Adds a node to the tree of nodes.  In etags mode, sort by file
  2219  *      name.  In ctags mode, sort by tag name.  Make no attempt at
  2220  *      balancing.
  2221  *
  2222  *      add_node is the only function allowed to add nodes, so it can
  2223  *      maintain state.
  2224  */
  2225 static void
  2226 add_node (node *np, node **cur_node_p)
  2227 {
  2228   node *cur_node = *cur_node_p;
  2229 
  2230   /* Make the first node.  */
  2231   if (cur_node == NULL)
  2232     {
  2233       *cur_node_p = np;
  2234       last_node = np;
  2235       return;
  2236     }
  2237 
  2238   if (!CTAGS)
  2239     /* Etags Mode */
  2240     {
  2241       /* For each file name, tags are in a linked sublist on the right
  2242          pointer.  The first tags of different files are a linked list
  2243          on the left pointer.  last_node points to the end of the last
  2244          used sublist. */
  2245       if (last_node != NULL && last_node->fdp == np->fdp)
  2246         {
  2247           /* Let's use the same sublist as the last added node. */
  2248           assert (last_node->right == NULL);
  2249           last_node->right = np;
  2250           last_node = np;
  2251         }
  2252       else
  2253         {
  2254            while (cur_node->fdp != np->fdp)
  2255              {
  2256                if (cur_node->left == NULL)
  2257                  break;
  2258                /* The head of this sublist is not good for us.  Let's try the
  2259                   next one. */
  2260                cur_node = cur_node->left;
  2261              }
  2262            if (cur_node->left)
  2263              {
  2264                /* Scanning the list we found the head of a sublist which is
  2265                   good for us.  Let's scan this sublist. */
  2266                if (cur_node->right)
  2267                  {
  2268                    cur_node = cur_node->right;
  2269                    while (cur_node->right)
  2270                      cur_node = cur_node->right;
  2271                  }
  2272                /* Make a new node in this sublist.  */
  2273                cur_node->right = np;
  2274              }
  2275            else
  2276              {
  2277                /* Make a new sublist.  */
  2278                cur_node->left = np;
  2279              }
  2280            last_node = np;
  2281         }
  2282     } /* if ETAGS mode */
  2283   else
  2284     {
  2285       /* Ctags Mode */
  2286       node **next_node = &cur_node;
  2287 
  2288       while ((cur_node = *next_node) != NULL)
  2289         {
  2290           int dif = strcmp (np->name, cur_node->name);
  2291           /*
  2292            * If this tag name matches an existing one, then
  2293            * do not add the node, but maybe print a warning.
  2294            */
  2295           if (!dif && no_duplicates)
  2296             {
  2297               if (np->fdp == cur_node->fdp)
  2298                 {
  2299                   if (!no_warnings)
  2300                     {
  2301                       fprintf (stderr,
  2302                                ("Duplicate entry in file %s, "
  2303                                 "line %"PRIdMAX": %s\n"),
  2304                                np->fdp->infname, lineno, np->name);
  2305                       fprintf (stderr, "Second entry ignored\n");
  2306                     }
  2307                 }
  2308               else if (!cur_node->been_warned && !no_warnings)
  2309                 {
  2310                   fprintf
  2311                     (stderr,
  2312                      "Duplicate entry in files %s and %s: %s (Warning only)\n",
  2313                      np->fdp->infname, cur_node->fdp->infname, np->name);
  2314                   cur_node->been_warned = true;
  2315                 }
  2316               return;
  2317             }
  2318           else
  2319             next_node = dif < 0 ? &cur_node->left : &cur_node->right;
  2320         }
  2321       *next_node = np;
  2322       last_node = np;
  2323     } /* if CTAGS mode */
  2324 }
  2325 
  2326 /*
  2327  * invalidate_nodes ()
  2328  *      Scan the node tree and invalidate all nodes pointing to the
  2329  *      given file description (CTAGS case) or free them (ETAGS case).
  2330  */
  2331 static void
  2332 invalidate_nodes (fdesc *badfdp, node **npp)
  2333 {
  2334   node *np = *npp;
  2335   stkentry *stack = NULL;
  2336 
  2337   if (CTAGS)
  2338     {
  2339       while (np)
  2340         {
  2341           /* Push all the left children on the stack.  */
  2342           while (np->left != NULL)
  2343             {
  2344               push_node (np, &stack);
  2345               np = np->left;
  2346             }
  2347           /* Invalidate this node.  */
  2348           if (np->fdp == badfdp)
  2349             np->valid = false;
  2350           if (!np->right)
  2351             {
  2352               /* Pop nodes from stack, invalidating them, until we find one
  2353                  with a right child.  */
  2354               while ((np = pop_node (&stack)) != NULL)
  2355                 {
  2356                   if (np->fdp == badfdp)
  2357                     np->valid = false;
  2358                   if (np->right != NULL)
  2359                     break;
  2360                 }
  2361             }
  2362           /* Process the right child, if any.  */
  2363           if (np)
  2364             np = np->right;
  2365         }
  2366     }
  2367   else
  2368     {
  2369       node super_root, *np_parent = NULL;
  2370 
  2371       super_root.left = np;
  2372       super_root.fdp = (fdesc *) -1;
  2373       np = &super_root;
  2374 
  2375       while (np)
  2376         {
  2377           /* Descent on left children until node with BADFP.  */
  2378           while (np && np->fdp != badfdp)
  2379             {
  2380               assert (np->fdp != NULL);
  2381               np_parent = np;
  2382               np = np->left;
  2383             }
  2384           if (np)
  2385             {
  2386               np_parent->left = np->left; /* detach subtree from the tree */
  2387               np->left = NULL;            /* isolate it */
  2388               free_tree (np);             /* free it */
  2389 
  2390               /* Continue with rest of tree.  */
  2391               np = np_parent->left;
  2392             }
  2393         }
  2394       *npp = super_root.left;
  2395     }
  2396 }
  2397 
  2398 
  2399 static ptrdiff_t total_size_of_entries (node *);
  2400 static int number_len (intmax_t) ATTRIBUTE_CONST;
  2401 
  2402 /* Length of a non-negative number's decimal representation. */
  2403 static int
  2404 number_len (intmax_t num)
  2405 {
  2406   int len = 1;
  2407   while ((num /= 10) > 0)
  2408     len += 1;
  2409   return len;
  2410 }
  2411 
  2412 /*
  2413  * Return total number of characters that put_entries will output for
  2414  * the nodes in the linked list at the right of the specified node.
  2415  * This count is irrelevant with etags.el since emacs 19.34 at least,
  2416  * but is still supplied for backward compatibility.
  2417  */
  2418 static ptrdiff_t
  2419 total_size_of_entries (node *np)
  2420 {
  2421   ptrdiff_t total = 0;
  2422 
  2423   for (; np != NULL; np = np->right)
  2424     if (np->valid)
  2425       {
  2426         total += strlen (np->regex) + 1;                /* pat\177 */
  2427         if (np->name != NULL)
  2428           total += strlen (np->name) + 1;               /* name\001 */
  2429         total += number_len (np->lno) + 1;              /* lno, */
  2430         if (np->cno != invalidcharno)                   /* cno */
  2431           total += number_len (np->cno);
  2432         total += 1;                                     /* newline */
  2433       }
  2434 
  2435   return total;
  2436 }
  2437 
  2438 static void
  2439 put_entry (node *np)
  2440 {
  2441   register char *sp;
  2442   static fdesc *fdp = NULL;
  2443 
  2444   /* Output this entry */
  2445   if (np->valid)
  2446     {
  2447       if (!CTAGS)
  2448         {
  2449           /* Etags mode */
  2450           if (fdp != np->fdp)
  2451             {
  2452               fdp = np->fdp;
  2453               fprintf (tagf, "\f\n%s,%"PRIdPTR"\n",
  2454                        fdp->taggedfname, total_size_of_entries (np));
  2455               fdp->written = true;
  2456             }
  2457           fputs (np->regex, tagf);
  2458           fputc ('\177', tagf);
  2459           if (np->name != NULL)
  2460             {
  2461               fputs (np->name, tagf);
  2462               fputc ('\001', tagf);
  2463             }
  2464           fprintf (tagf, "%"PRIdMAX",", np->lno);
  2465           if (np->cno != invalidcharno)
  2466             fprintf (tagf, "%"PRIdMAX, np->cno);
  2467           fputs ("\n", tagf);
  2468         }
  2469       else
  2470         {
  2471           /* Ctags mode */
  2472           if (np->name == NULL)
  2473             error ("internal error: NULL name in ctags mode.");
  2474 
  2475           if (cxref_style)
  2476             {
  2477               if (vgrind_style)
  2478                 fprintf (stdout, "%s %s %"PRIdMAX"\n",
  2479                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
  2480               else
  2481                 fprintf (stdout, "%-16s %3"PRIdMAX" %-16s %s\n",
  2482                          np->name, np->lno, np->fdp->taggedfname, np->regex);
  2483             }
  2484           else
  2485             {
  2486               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
  2487 
  2488               if (np->is_func)
  2489                 {               /* function or #define macro with args */
  2490                   putc (searchar, tagf);
  2491                   putc ('^', tagf);
  2492 
  2493                   for (sp = np->regex; *sp; sp++)
  2494                     {
  2495                       if (*sp == '\\' || *sp == searchar)
  2496                         putc ('\\', tagf);
  2497                       putc (*sp, tagf);
  2498                     }
  2499                   putc (searchar, tagf);
  2500                 }
  2501               else
  2502                 {               /* anything else; text pattern inadequate */
  2503                   fprintf (tagf, "%"PRIdMAX, np->lno);
  2504                 }
  2505               putc ('\n', tagf);
  2506             }
  2507         }
  2508     } /* if this node contains a valid tag */
  2509 }
  2510 
  2511 static void
  2512 put_entries (node *np)
  2513 {
  2514   stkentry *stack = NULL;
  2515 
  2516   if (np == NULL)
  2517     return;
  2518 
  2519   if (CTAGS)
  2520     {
  2521       while (np)
  2522         {
  2523           /* Stack subentries that precede this one.  */
  2524           while (np->left)
  2525             {
  2526               push_node (np, &stack);
  2527               np = np->left;
  2528             }
  2529           /* Output this subentry.  */
  2530           put_entry (np);
  2531           /* Stack subentries that follow this one.  */
  2532           while (!np->right)
  2533             {
  2534               /* Output subentries that precede the next one.  */
  2535               np = pop_node (&stack);
  2536               if (!np)
  2537                 break;
  2538               put_entry (np);
  2539             }
  2540           if (np)
  2541             np = np->right;
  2542         }
  2543     }
  2544   else
  2545     {
  2546       push_node (np, &stack);
  2547       while ((np = pop_node (&stack)) != NULL)
  2548         {
  2549           /* Output this subentry.  */
  2550           put_entry (np);
  2551           while (np->right)
  2552             {
  2553               /* Output subentries that follow this one.  */
  2554               put_entry (np->right);
  2555               /* Stack subentries from the following files.  */
  2556               push_node (np->left, &stack);
  2557               np = np->right;
  2558             }
  2559           push_node (np->left, &stack);
  2560         }
  2561     }
  2562 }
  2563 
  2564 
  2565 /* C extensions. */
  2566 #define C_EXT   0x00fff         /* C extensions */
  2567 #define C_PLAIN 0x00000         /* C */
  2568 #define C_PLPL  0x00001         /* C++ */
  2569 #define C_STAR  0x00003         /* C* */
  2570 #define C_JAVA  0x00005         /* JAVA */
  2571 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
  2572 #define YACC    0x10000         /* yacc file */
  2573 
  2574 /*
  2575  * The C symbol tables.
  2576  */
  2577 enum sym_type
  2578 {
  2579   st_none,
  2580   st_C_objprot, st_C_objimpl, st_C_objend,
  2581   st_C_gnumacro,
  2582   st_C_ignore, st_C_attribute, st_C_enum_bf,
  2583   st_C_javastruct,
  2584   st_C_operator,
  2585   st_C_class, st_C_template,
  2586   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
  2587 };
  2588 
  2589 /* Feed stuff between (but not including) %[ and %] lines to:
  2590      gperf -m 5
  2591 %[
  2592 %compare-strncmp
  2593 %enum
  2594 %struct-type
  2595 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
  2596 %%
  2597 if,             0,                      st_C_ignore
  2598 for,            0,                      st_C_ignore
  2599 while,          0,                      st_C_ignore
  2600 switch,         0,                      st_C_ignore
  2601 return,         0,                      st_C_ignore
  2602 __attribute__,  0,                      st_C_attribute
  2603 GTY,            0,                      st_C_attribute
  2604 @interface,     0,                      st_C_objprot
  2605 @protocol,      0,                      st_C_objprot
  2606 @implementation,0,                      st_C_objimpl
  2607 @end,           0,                      st_C_objend
  2608 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
  2609 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
  2610 friend,         C_PLPL,                 st_C_ignore
  2611 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
  2612 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
  2613 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
  2614 class,          0,                      st_C_class
  2615 namespace,      C_PLPL,                 st_C_struct
  2616 domain,         C_STAR,                 st_C_struct
  2617 union,          0,                      st_C_struct
  2618 struct,         0,                      st_C_struct
  2619 extern,         0,                      st_C_extern
  2620 enum,           0,                      st_C_enum
  2621 typedef,        0,                      st_C_typedef
  2622 define,         0,                      st_C_define
  2623 undef,          0,                      st_C_define
  2624 operator,       C_PLPL,                 st_C_operator
  2625 template,       0,                      st_C_template
  2626 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
  2627 DEFUN,          0,                      st_C_gnumacro
  2628 SYSCALL,        0,                      st_C_gnumacro
  2629 ENTRY,          0,                      st_C_gnumacro
  2630 PSEUDO,         0,                      st_C_gnumacro
  2631 ENUM_BF,        0,                      st_C_enum_bf
  2632 # These are defined inside C functions, so currently they are not met.
  2633 # EXFUN used in glibc, DEFVAR_* in emacs.
  2634 #EXFUN,         0,                      st_C_gnumacro
  2635 #DEFVAR_,       0,                      st_C_gnumacro
  2636 %]
  2637 and replace lines between %< and %> with its output, then:
  2638  - remove the #if characterset check
  2639  - remove any #line directives
  2640  - make in_word_set static and not inline
  2641  - remove any 'register' qualifications from variable decls. */
  2642 /*%<*/
  2643 /* C code produced by gperf version 3.0.1 */
  2644 /* Command-line: gperf -m 5 */
  2645 /* Computed positions: -k'2-3' */
  2646 
  2647 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
  2648 /* maximum key range = 34, duplicates = 0 */
  2649 
  2650 static int
  2651 hash (const char *str, int len)
  2652 {
  2653   static char const asso_values[] =
  2654     {
  2655       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2656       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2657       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2658       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2659       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2660       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2661       36, 36, 36, 36, 36, 36, 36, 36, 36,  3,
  2662       27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
  2663       36, 36, 36, 25,  0,  0, 36, 36, 36,  0,
  2664       36, 36, 36, 36, 36,  1, 36, 16, 36,  6,
  2665       23,  0,  0, 36, 22,  0, 36, 36,  5,  0,
  2666        0, 15,  1, 36,  6, 36,  8, 19, 36, 16,
  2667        4,  5, 36, 36, 36, 36, 36, 36, 36, 36,
  2668       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2669       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2670       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2671       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2672       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2673       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2674       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2675       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2676       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2677       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2678       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2679       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
  2680       36, 36, 36, 36, 36, 36
  2681     };
  2682   int hval = len;
  2683 
  2684   switch (hval)
  2685     {
  2686       default:
  2687         hval += asso_values[(unsigned char) str[2]];
  2688         FALLTHROUGH;
  2689       case 2:
  2690         hval += asso_values[(unsigned char) str[1]];
  2691         break;
  2692     }
  2693   return hval;
  2694 }
  2695 
  2696 static struct C_stab_entry *
  2697 in_word_set (const char *str, ptrdiff_t len)
  2698 {
  2699   enum
  2700     {
  2701       TOTAL_KEYWORDS = 34,
  2702       MIN_WORD_LENGTH = 2,
  2703       MAX_WORD_LENGTH = 15,
  2704       MIN_HASH_VALUE = 2,
  2705       MAX_HASH_VALUE = 35
  2706     };
  2707 
  2708   static struct C_stab_entry wordlist[] =
  2709     {
  2710       {""}, {""},
  2711       {"if",            0,                      st_C_ignore},
  2712       {"GTY",           0,                      st_C_attribute},
  2713       {"@end",          0,                      st_C_objend},
  2714       {"union",         0,                      st_C_struct},
  2715       {"define",                0,                      st_C_define},
  2716       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
  2717       {"template",      0,                      st_C_template},
  2718       {"operator",      C_PLPL,                 st_C_operator},
  2719       {"@interface",    0,                      st_C_objprot},
  2720       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
  2721       {"friend",                C_PLPL,                 st_C_ignore},
  2722       {"typedef",       0,                      st_C_typedef},
  2723       {"return",                0,                      st_C_ignore},
  2724       {"@implementation",0,                     st_C_objimpl},
  2725       {"@protocol",     0,                      st_C_objprot},
  2726       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
  2727       {"extern",                0,                      st_C_extern},
  2728       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
  2729       {"struct",                0,                      st_C_struct},
  2730       {"domain",                C_STAR,                 st_C_struct},
  2731       {"switch",                0,                      st_C_ignore},
  2732       {"enum",          0,                      st_C_enum},
  2733       {"for",           0,                      st_C_ignore},
  2734       {"namespace",     C_PLPL,                 st_C_struct},
  2735       {"class",         0,                      st_C_class},
  2736       {"while",         0,                      st_C_ignore},
  2737       {"undef",         0,                      st_C_define},
  2738       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
  2739       {"__attribute__", 0,                      st_C_attribute},
  2740       {"ENTRY",         0,                      st_C_gnumacro},
  2741       {"SYSCALL",       0,                      st_C_gnumacro},
  2742       {"ENUM_BF",       0,                      st_C_enum_bf},
  2743       {"PSEUDO",                0,                      st_C_gnumacro},
  2744       {"DEFUN",         0,                      st_C_gnumacro}
  2745     };
  2746 
  2747   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
  2748     {
  2749       int key = hash (str, len);
  2750 
  2751       if (key <= MAX_HASH_VALUE && key >= 0)
  2752         {
  2753           const char *s = wordlist[key].name;
  2754 
  2755           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
  2756             return &wordlist[key];
  2757         }
  2758     }
  2759   return 0;
  2760 }
  2761 /*%>*/
  2762 
  2763 static enum sym_type
  2764 C_symtype (char *str, ptrdiff_t len, int c_ext)
  2765 {
  2766   struct C_stab_entry *se = in_word_set (str, len);
  2767 
  2768   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
  2769     return st_none;
  2770   return se->type;
  2771 }
  2772 
  2773 
  2774 /*
  2775  * Ignoring __attribute__ ((list))
  2776  */
  2777 static bool inattribute;        /* looking at an __attribute__ construct */
  2778 
  2779 /* Ignoring ENUM_BF (type)
  2780  *
  2781  */
  2782 static bool in_enum_bf;         /* inside parentheses following ENUM_BF */
  2783 
  2784 /*
  2785  * C functions and variables are recognized using a simple
  2786  * finite automaton.  fvdef is its state variable.
  2787  */
  2788 static enum
  2789 {
  2790   fvnone,                       /* nothing seen */
  2791   fdefunkey,                    /* Emacs DEFUN keyword seen */
  2792   fdefunname,                   /* Emacs DEFUN name seen */
  2793   foperator,                    /* func: operator keyword seen (cplpl) */
  2794   fvnameseen,                   /* function or variable name seen */
  2795   fstartlist,                   /* func: just after open parenthesis */
  2796   finlist,                      /* func: in parameter list */
  2797   flistseen,                    /* func: after parameter list */
  2798   fignore,                      /* func: before open brace */
  2799   vignore                       /* var-like: ignore until ';' */
  2800 } fvdef;
  2801 
  2802 static bool fvextern;           /* func or var: extern keyword seen; */
  2803 
  2804 /*
  2805  * typedefs are recognized using a simple finite automaton.
  2806  * typdef is its state variable.
  2807  */
  2808 static enum
  2809 {
  2810   tnone,                        /* nothing seen */
  2811   tkeyseen,                     /* typedef keyword seen */
  2812   ttypeseen,                    /* defined type seen */
  2813   tinbody,                      /* inside typedef body */
  2814   tend,                         /* just before typedef tag */
  2815   tignore                       /* junk after typedef tag */
  2816 } typdef;
  2817 
  2818 /*
  2819  * struct-like structures (enum, struct and union) are recognized
  2820  * using another simple finite automaton.  `structdef' is its state
  2821  * variable.
  2822  */
  2823 static enum
  2824 {
  2825   snone,                        /* nothing seen yet,
  2826                                    or in struct body if bracelev > 0 */
  2827   skeyseen,                     /* struct-like keyword seen */
  2828   stagseen,                     /* struct-like tag seen */
  2829   scolonseen                    /* colon seen after struct-like tag */
  2830 } structdef;
  2831 
  2832 /*
  2833  * When objdef is different from onone, objtag is the name of the class.
  2834  */
  2835 static const char *objtag = "<uninited>";
  2836 
  2837 /*
  2838  * Yet another little state machine to deal with preprocessor lines.
  2839  */
  2840 static enum
  2841 {
  2842   dnone,                        /* nothing seen */
  2843   dsharpseen,                   /* '#' seen as first char on line */
  2844   ddefineseen,                  /* '#' and 'define' seen */
  2845   dignorerest                   /* ignore rest of line */
  2846 } definedef;
  2847 
  2848 /*
  2849  * State machine for Objective C protocols and implementations.
  2850  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
  2851  */
  2852 static enum
  2853 {
  2854   onone,                        /* nothing seen */
  2855   oprotocol,                    /* @interface or @protocol seen */
  2856   oimplementation,              /* @implementations seen */
  2857   otagseen,                     /* class name seen */
  2858   oparenseen,                   /* parenthesis before category seen */
  2859   ocatseen,                     /* category name seen */
  2860   oinbody,                      /* in @implementation body */
  2861   omethodsign,                  /* in @implementation body, after +/- */
  2862   omethodtag,                   /* after method name */
  2863   omethodcolon,                 /* after method colon */
  2864   omethodparm,                  /* after method parameter */
  2865   oignore                       /* wait for @end */
  2866 } objdef;
  2867 
  2868 
  2869 /*
  2870  * Use this structure to keep info about the token read, and how it
  2871  * should be tagged.  Used by the make_C_tag function to build a tag.
  2872  */
  2873 static struct tok
  2874 {
  2875   char *line;                   /* string containing the token */
  2876   ptrdiff_t offset;             /* where the token starts in LINE */
  2877   ptrdiff_t length;             /* token length */
  2878   /*
  2879     The previous members can be used to pass strings around for generic
  2880     purposes.  The following ones specifically refer to creating tags.  In this
  2881     case the token contained here is the pattern that will be used to create a
  2882     tag.
  2883   */
  2884   bool valid;                   /* do not create a tag; the token should be
  2885                                    invalidated whenever a state machine is
  2886                                    reset prematurely */
  2887   bool named;                   /* create a named tag */
  2888   intmax_t lineno;              /* source line number of tag */
  2889   intmax_t linepos;             /* source char number of tag */
  2890 } token;                        /* latest token read */
  2891 
  2892 /*
  2893  * Variables and functions for dealing with nested structures.
  2894  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
  2895  */
  2896 static void pushclass_above (ptrdiff_t, char *, ptrdiff_t);
  2897 static void popclass_above (ptrdiff_t);
  2898 static void write_classname (linebuffer *, const char *qualifier);
  2899 
  2900 static struct {
  2901   char **cname;                 /* nested class names */
  2902   ptrdiff_t *bracelev;          /* nested class brace level */
  2903   ptrdiff_t nl;                 /* class nesting level (elements used) */
  2904   ptrdiff_t size;               /* length of the array */
  2905 } cstack;                       /* stack for nested declaration tags */
  2906 /* Current struct nesting depth (namespace, class, struct, union, enum). */
  2907 #define nestlev         (cstack.nl)
  2908 /* After struct keyword or in struct body, not inside a nested function. */
  2909 #define instruct        (structdef == snone && nestlev > 0                      \
  2910                          && bracelev == cstack.bracelev[nestlev-1] + 1)
  2911 
  2912 static void
  2913 pushclass_above (ptrdiff_t bracelev, char *str, ptrdiff_t len)
  2914 {
  2915   ptrdiff_t nl;
  2916 
  2917   popclass_above (bracelev);
  2918   nl = cstack.nl;
  2919   if (nl >= cstack.size)
  2920     {
  2921       xrnew (cstack.cname, cstack.size, 2);
  2922       xrnew (cstack.bracelev, cstack.size, 2);
  2923       cstack.size *= 2;
  2924     }
  2925   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
  2926   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
  2927   cstack.bracelev[nl] = bracelev;
  2928   cstack.nl = nl + 1;
  2929 }
  2930 
  2931 static void
  2932 popclass_above (ptrdiff_t bracelev)
  2933 {
  2934   for (ptrdiff_t nl = cstack.nl - 1;
  2935        nl >= 0 && cstack.bracelev[nl] >= bracelev;
  2936        nl--)
  2937     {
  2938       free (cstack.cname[nl]);
  2939       cstack.nl = nl;
  2940     }
  2941 }
  2942 
  2943 static void
  2944 write_classname (linebuffer *cn, const char *qualifier)
  2945 {
  2946   ptrdiff_t len;
  2947 
  2948   if (cstack.nl == 0 || cstack.cname[0] == NULL)
  2949     {
  2950       len = 0;
  2951       cn->len = 0;
  2952       cn->buffer[0] = '\0';
  2953     }
  2954   else
  2955     {
  2956       len = strlen (cstack.cname[0]);
  2957       linebuffer_setlen (cn, len);
  2958       strcpy (cn->buffer, cstack.cname[0]);
  2959     }
  2960   for (ptrdiff_t i = 1; i < cstack.nl; i++)
  2961     {
  2962       char *s = cstack.cname[i];
  2963       if (s == NULL)
  2964         continue;
  2965       int qlen = strlen (qualifier);
  2966       ptrdiff_t slen = strlen (s);
  2967       linebuffer_setlen (cn, len + qlen + slen);
  2968       memcpyz (stpcpy (cn->buffer + len, qualifier), s, slen);
  2969       len += qlen + slen;
  2970     }
  2971 }
  2972 
  2973 
  2974 static bool consider_token (char *, ptrdiff_t, int, int *,
  2975                             ptrdiff_t, ptrdiff_t, bool *);
  2976 static void make_C_tag (bool);
  2977 
  2978 /*
  2979  * consider_token ()
  2980  *      checks to see if the current token is at the start of a
  2981  *      function or variable, or corresponds to a typedef, or
  2982  *      is a struct/union/enum tag, or #define, or an enum constant.
  2983  *
  2984  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
  2985  *      with args.  C_EXTP points to which language we are looking at.
  2986  *
  2987  * Globals
  2988  *      fvdef                   IN OUT
  2989  *      structdef               IN OUT
  2990  *      definedef               IN OUT
  2991  *      typdef                  IN OUT
  2992  *      objdef                  IN OUT
  2993  */
  2994 
  2995 static bool
  2996 consider_token (char *str,            /* IN: token pointer */
  2997                 ptrdiff_t len,        /* IN: token length */
  2998                 int c,                /* IN: first char after the token */
  2999                 int *c_extp,          /* IN, OUT: C extensions mask */
  3000                 ptrdiff_t bracelev,   /* IN: brace level */
  3001                 ptrdiff_t parlev,     /* IN: parenthesis level */
  3002                 bool *is_func_or_var) /* OUT: function or variable found */
  3003 {
  3004   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
  3005      structtype is the type of the preceding struct-like keyword, and
  3006      structbracelev is the brace level where it has been seen. */
  3007   static enum sym_type structtype;
  3008   static ptrdiff_t structbracelev;
  3009   static enum sym_type toktype;
  3010 
  3011 
  3012   toktype = C_symtype (str, len, *c_extp);
  3013 
  3014   /*
  3015    * Skip __attribute__
  3016    */
  3017   if (toktype == st_C_attribute)
  3018     {
  3019       inattribute = true;
  3020       return false;
  3021      }
  3022 
  3023   /*
  3024    * Skip ENUM_BF
  3025    */
  3026   if (toktype == st_C_enum_bf && definedef == dnone)
  3027     {
  3028       in_enum_bf = true;
  3029       return false;
  3030     }
  3031 
  3032    /*
  3033     * Advance the definedef state machine.
  3034     */
  3035    switch (definedef)
  3036      {
  3037      case dnone:
  3038        /* We're not on a preprocessor line. */
  3039        if (toktype == st_C_gnumacro)
  3040          {
  3041            fvdef = fdefunkey;
  3042            return false;
  3043          }
  3044        break;
  3045      case dsharpseen:
  3046        if (toktype == st_C_define)
  3047          {
  3048            definedef = ddefineseen;
  3049          }
  3050        else
  3051          {
  3052            definedef = dignorerest;
  3053          }
  3054        return false;
  3055      case ddefineseen:
  3056        /*
  3057         * Make a tag for any macro, unless it is a constant
  3058         * and constantypedefs is false.
  3059         */
  3060        definedef = dignorerest;
  3061        *is_func_or_var = (c == '(');
  3062        if (!*is_func_or_var && !constantypedefs)
  3063          return false;
  3064        else
  3065          return true;
  3066      case dignorerest:
  3067        return false;
  3068      default:
  3069        error ("internal error: definedef value.");
  3070      }
  3071 
  3072    /*
  3073     * Now typedefs
  3074     */
  3075    switch (typdef)
  3076      {
  3077      case tnone:
  3078        if (toktype == st_C_typedef)
  3079          {
  3080            if (typedefs)
  3081              typdef = tkeyseen;
  3082            fvextern = false;
  3083            fvdef = fvnone;
  3084            return false;
  3085          }
  3086        break;
  3087      case tkeyseen:
  3088        switch (toktype)
  3089          {
  3090          case st_none:
  3091          case st_C_class:
  3092          case st_C_struct:
  3093          case st_C_enum:
  3094            typdef = ttypeseen;
  3095            break;
  3096          default:
  3097            break;
  3098          }
  3099        break;
  3100      case ttypeseen:
  3101        if (structdef == snone && fvdef == fvnone)
  3102          {
  3103            fvdef = fvnameseen;
  3104            return true;
  3105          }
  3106        break;
  3107      case tend:
  3108        switch (toktype)
  3109          {
  3110          case st_C_class:
  3111          case st_C_struct:
  3112          case st_C_enum:
  3113            return false;
  3114          default:
  3115            return true;
  3116          }
  3117      default:
  3118        break;
  3119      }
  3120 
  3121    switch (toktype)
  3122      {
  3123      case st_C_javastruct:
  3124        if (structdef == stagseen)
  3125          structdef = scolonseen;
  3126        return false;
  3127      case st_C_template:
  3128      case st_C_class:
  3129        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
  3130            && bracelev == 0
  3131            && definedef == dnone && structdef == snone
  3132            && typdef == tnone && fvdef == fvnone)
  3133          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
  3134        if (toktype == st_C_template)
  3135          break;
  3136        FALLTHROUGH;
  3137      case st_C_struct:
  3138      case st_C_enum:
  3139        if (parlev == 0
  3140            && fvdef != vignore
  3141            && (typdef == tkeyseen
  3142                || (typedefs_or_cplusplus && structdef == snone)))
  3143          {
  3144            structdef = skeyseen;
  3145            structtype = toktype;
  3146            structbracelev = bracelev;
  3147            if (fvdef == fvnameseen)
  3148              fvdef = fvnone;
  3149          }
  3150        return false;
  3151      default:
  3152        break;
  3153      }
  3154 
  3155    if (structdef == skeyseen)
  3156      {
  3157        structdef = stagseen;
  3158        return true;
  3159      }
  3160 
  3161    if (typdef != tnone)
  3162      definedef = dnone;
  3163 
  3164    /* Detect Objective C constructs. */
  3165    switch (objdef)
  3166      {
  3167      case onone:
  3168        switch (toktype)
  3169          {
  3170          case st_C_objprot:
  3171            objdef = oprotocol;
  3172            return false;
  3173          case st_C_objimpl:
  3174            objdef = oimplementation;
  3175            return false;
  3176          default:
  3177            break;
  3178          }
  3179        break;
  3180      case oimplementation:
  3181        /* Save the class tag for functions or variables defined inside. */
  3182        objtag = savenstr (str, len);
  3183        objdef = oinbody;
  3184        return false;
  3185      case oprotocol:
  3186        /* Save the class tag for categories. */
  3187        objtag = savenstr (str, len);
  3188        objdef = otagseen;
  3189        *is_func_or_var = true;
  3190        return true;
  3191      case oparenseen:
  3192        objdef = ocatseen;
  3193        *is_func_or_var = true;
  3194        return true;
  3195      case oinbody:
  3196        break;
  3197      case omethodsign:
  3198        if (parlev == 0)
  3199          {
  3200            fvdef = fvnone;
  3201            objdef = omethodtag;
  3202            linebuffer_setlen (&token_name, len);
  3203            memcpyz (token_name.buffer, str, len);
  3204            return true;
  3205          }
  3206        return false;
  3207      case omethodcolon:
  3208        if (parlev == 0)
  3209          objdef = omethodparm;
  3210        return false;
  3211      case omethodparm:
  3212        if (parlev == 0)
  3213          {
  3214            objdef = omethodtag;
  3215            if (class_qualify)
  3216              {
  3217                ptrdiff_t oldlen = token_name.len;
  3218                fvdef = fvnone;
  3219                linebuffer_setlen (&token_name, oldlen + len);
  3220                memcpyz (token_name.buffer + oldlen, str, len);
  3221              }
  3222            return true;
  3223          }
  3224        return false;
  3225      case oignore:
  3226        if (toktype == st_C_objend)
  3227          {
  3228            /* Memory leakage here: the string pointed by objtag is
  3229               never released, because many tests would be needed to
  3230               avoid breaking on incorrect input code.  The amount of
  3231               memory leaked here is the sum of the lengths of the
  3232               class tags.
  3233            free (objtag); */
  3234            objdef = onone;
  3235          }
  3236        return false;
  3237      default:
  3238        break;
  3239      }
  3240 
  3241    /* A function, variable or enum constant? */
  3242    switch (toktype)
  3243      {
  3244      case st_C_extern:
  3245        fvextern = true;
  3246        switch  (fvdef)
  3247          {
  3248          case finlist:
  3249          case flistseen:
  3250          case fignore:
  3251          case vignore:
  3252            break;
  3253          default:
  3254            fvdef = fvnone;
  3255          }
  3256        return false;
  3257      case st_C_ignore:
  3258        fvextern = false;
  3259        fvdef = vignore;
  3260        return false;
  3261      case st_C_operator:
  3262        fvdef = foperator;
  3263        *is_func_or_var = true;
  3264        return true;
  3265      case st_none:
  3266        if (constantypedefs
  3267            && structdef == snone
  3268            && structtype == st_C_enum && bracelev > structbracelev
  3269            /* Don't tag tokens in expressions that assign values to enum
  3270               constants.  */
  3271            && fvdef != vignore)
  3272          return true;           /* enum constant */
  3273        switch (fvdef)
  3274          {
  3275          case fdefunkey:
  3276            if (bracelev > 0)
  3277              break;
  3278            fvdef = fdefunname;  /* GNU macro */
  3279            *is_func_or_var = true;
  3280            return true;
  3281          case fvnone:
  3282            switch (typdef)
  3283              {
  3284              case ttypeseen:
  3285                return false;
  3286              case tnone:
  3287                if ((strneq (str, "asm", 3) && endtoken (str[3]))
  3288                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
  3289                  {
  3290                    fvdef = vignore;
  3291                    return false;
  3292                  }
  3293                break;
  3294              default:
  3295                break;
  3296              }
  3297            FALLTHROUGH;
  3298           case fvnameseen:
  3299           if (len >= 10 && strneq (str+len-10, "::operator", 10))
  3300             {
  3301               if (*c_extp & C_AUTO) /* automatic detection of C++ */
  3302                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
  3303               fvdef = foperator;
  3304               *is_func_or_var = true;
  3305               return true;
  3306             }
  3307           if (bracelev > 0 && !instruct)
  3308             break;
  3309           fvdef = fvnameseen;   /* function or variable */
  3310           *is_func_or_var = true;
  3311           return true;
  3312          default:
  3313            break;
  3314         }
  3315       break;
  3316      default:
  3317        break;
  3318     }
  3319 
  3320   return false;
  3321 }
  3322 
  3323 
  3324 /*
  3325  * C_entries often keeps pointers to tokens or lines which are older than
  3326  * the line currently read.  By keeping two line buffers, and switching
  3327  * them at end of line, it is possible to use those pointers.
  3328  */
  3329 static struct
  3330 {
  3331   intmax_t linepos;
  3332   linebuffer lb;
  3333 } lbs[2];
  3334 
  3335 #define current_lb_is_new (newndx == curndx)
  3336 #define switch_line_buffers() (curndx = 1 - curndx)
  3337 
  3338 #define curlb (lbs[curndx].lb)
  3339 #define newlb (lbs[newndx].lb)
  3340 #define curlinepos (lbs[curndx].linepos)
  3341 #define newlinepos (lbs[newndx].linepos)
  3342 
  3343 #define plainc ((c_ext & C_EXT) == C_PLAIN)
  3344 #define cplpl (c_ext & C_PLPL)
  3345 #define cjava ((c_ext & C_JAVA) == C_JAVA)
  3346 
  3347 #define CNL_SAVE_DEFINEDEF()                                            \
  3348 do {                                                                    \
  3349   curlinepos = charno;                                                  \
  3350   readline (&curlb, inf);                                               \
  3351   lp = curlb.buffer;                                                    \
  3352   quotednl = false;                                                     \
  3353   newndx = curndx;                                                      \
  3354 } while (0)
  3355 
  3356 #define CNL()                                                           \
  3357 do {                                                                    \
  3358   CNL_SAVE_DEFINEDEF ();                                                \
  3359   if (savetoken.valid)                                                  \
  3360     {                                                                   \
  3361       token = savetoken;                                                \
  3362       savetoken.valid = false;                                          \
  3363     }                                                                   \
  3364   definedef = dnone;                                                    \
  3365 } while (0)
  3366 
  3367 
  3368 static void
  3369 make_C_tag (bool isfun)
  3370 {
  3371   /* This function is never called when token.valid is false, but
  3372      we must protect against invalid input or internal errors. */
  3373   if (token.valid)
  3374     make_tag (token_name.buffer, token_name.len, isfun, token.line,
  3375               token.offset+token.length+1, token.lineno, token.linepos);
  3376   else if (DEBUG)
  3377     {                             /* this branch is optimized away if !DEBUG */
  3378       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
  3379                 token_name.len + 17, isfun, token.line,
  3380                 token.offset+token.length+1, token.lineno, token.linepos);
  3381       error ("INVALID TOKEN");
  3382     }
  3383 
  3384   token.valid = false;
  3385 }
  3386 
  3387 static bool
  3388 perhaps_more_input (FILE *inf)
  3389 {
  3390   return !feof (inf) && !ferror (inf);
  3391 }
  3392 
  3393 
  3394 /*
  3395  * C_entries ()
  3396  *      This routine finds functions, variables, typedefs,
  3397  *      #define's, enum constants and struct/union/enum definitions in
  3398  *      C syntax and adds them to the list.
  3399  */
  3400 static void
  3401 C_entries (int c_ext,           /* extension of C */
  3402            FILE *inf)           /* input file */
  3403 {
  3404   char c;                       /* latest char read; '\0' for end of line */
  3405   char *lp;                     /* pointer one beyond the character `c' */
  3406   bool curndx, newndx;          /* indices for current and new lb */
  3407   ptrdiff_t tokoff;             /* offset in line of start of current token */
  3408   ptrdiff_t toklen;             /* length of current token */
  3409   const char *qualifier;        /* string used to qualify names */
  3410   int qlen;                     /* length of qualifier */
  3411   ptrdiff_t bracelev;           /* current brace level */
  3412   ptrdiff_t bracketlev;         /* current bracket level */
  3413   ptrdiff_t parlev;             /* current parenthesis level */
  3414   ptrdiff_t attrparlev;         /* __attribute__ parenthesis level */
  3415   ptrdiff_t templatelev;        /* current template level */
  3416   ptrdiff_t typdefbracelev;     /* bracelev where a typedef struct body begun */
  3417   bool incomm, inquote, inchar, quotednl, midtoken;
  3418   bool yacc_rules;              /* in the rules part of a yacc file */
  3419   struct tok savetoken = {0};   /* token saved during preprocessor handling */
  3420 
  3421 
  3422   linebuffer_init (&lbs[0].lb);
  3423   linebuffer_init (&lbs[1].lb);
  3424   if (cstack.size == 0)
  3425     {
  3426       cstack.size = (DEBUG) ? 1 : 4;
  3427       cstack.nl = 0;
  3428       cstack.cname = xnew (cstack.size, char *);
  3429       cstack.bracelev = xnew (cstack.size, ptrdiff_t);
  3430     }
  3431 
  3432   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
  3433   curndx = newndx = 0;
  3434   lp = curlb.buffer;
  3435   *lp = 0;
  3436 
  3437   fvdef = fvnone; fvextern = false; typdef = tnone;
  3438   structdef = snone; definedef = dnone; objdef = onone;
  3439   yacc_rules = false;
  3440   midtoken = inquote = inchar = incomm = quotednl = false;
  3441   token.valid = savetoken.valid = false;
  3442   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
  3443   if (cjava)
  3444     { qualifier = "."; qlen = 1; }
  3445   else
  3446     { qualifier = "::"; qlen = 2; }
  3447 
  3448 
  3449   while (perhaps_more_input (inf))
  3450     {
  3451       c = *lp++;
  3452       if (c == '\\')
  3453         {
  3454           /* If we are at the end of the line, the next character is a
  3455              '\0'; do not skip it, because it is what tells us
  3456              to read the next line.  */
  3457           if (*lp == '\0')
  3458             {
  3459               quotednl = true;
  3460               continue;
  3461             }
  3462           lp++;
  3463           c = ' ';
  3464         }
  3465       else if (incomm)
  3466         {
  3467           switch (c)
  3468             {
  3469             case '*':
  3470               if (*lp == '/')
  3471                 {
  3472                   c = *lp++;
  3473                   incomm = false;
  3474                 }
  3475               break;
  3476             case '\0':
  3477               /* Newlines inside comments do not end macro definitions in
  3478                  traditional cpp. */
  3479               CNL_SAVE_DEFINEDEF ();
  3480               break;
  3481             }
  3482           continue;
  3483         }
  3484       else if (inquote)
  3485         {
  3486           switch (c)
  3487             {
  3488             case '"':
  3489               inquote = false;
  3490               break;
  3491             case '\0':
  3492               /* Newlines inside strings do not end macro definitions
  3493                  in traditional cpp, even though compilers don't
  3494                  usually accept them. */
  3495               CNL_SAVE_DEFINEDEF ();
  3496               break;
  3497             }
  3498           continue;
  3499         }
  3500       else if (inchar)
  3501         {
  3502           switch (c)
  3503             {
  3504             case '\0':
  3505               /* Hmmm, something went wrong. */
  3506               CNL ();
  3507               FALLTHROUGH;
  3508             case '\'':
  3509               inchar = false;
  3510               break;
  3511             }
  3512           continue;
  3513         }
  3514       else switch (c)
  3515         {
  3516         case '"':
  3517           inquote = true;
  3518           if (bracketlev > 0)
  3519             continue;
  3520           if (inattribute)
  3521             break;
  3522           switch (fvdef)
  3523             {
  3524             case fdefunkey:
  3525             case fstartlist:
  3526             case finlist:
  3527             case fignore:
  3528             case vignore:
  3529               break;
  3530             default:
  3531               fvextern = false;
  3532               fvdef = fvnone;
  3533             }
  3534           continue;
  3535         case '\'':
  3536           inchar = true;
  3537           if (bracketlev > 0)
  3538             continue;
  3539           if (inattribute)
  3540             break;
  3541           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
  3542             {
  3543               fvextern = false;
  3544               fvdef = fvnone;
  3545             }
  3546           continue;
  3547         case '/':
  3548           if (*lp == '*')
  3549             {
  3550               incomm = true;
  3551               lp++;
  3552               c = ' ';
  3553               if (bracketlev > 0)
  3554                 continue;
  3555             }
  3556           else if (/* cplpl && */ *lp == '/')
  3557             {
  3558               c = '\0';
  3559             }
  3560           break;
  3561         case '%':
  3562           if ((c_ext & YACC) && *lp == '%')
  3563             {
  3564               /* Entering or exiting rules section in yacc file. */
  3565               lp++;
  3566               definedef = dnone; fvdef = fvnone; fvextern = false;
  3567               typdef = tnone; structdef = snone;
  3568               midtoken = inquote = inchar = incomm = quotednl = false;
  3569               bracelev = 0;
  3570               yacc_rules = !yacc_rules;
  3571               continue;
  3572             }
  3573           else
  3574             break;
  3575         case '#':
  3576           if (definedef == dnone)
  3577             {
  3578               char *cp;
  3579               bool cpptoken = true;
  3580 
  3581               /* Look back on this line.  If all blanks, or nonblanks
  3582                  followed by an end of comment, this is a preprocessor
  3583                  token. */
  3584               for (cp = newlb.buffer; cp < lp-1; cp++)
  3585                 if (!c_isspace (*cp))
  3586                   {
  3587                     if (*cp == '*' && cp[1] == '/')
  3588                       {
  3589                         cp++;
  3590                         cpptoken = true;
  3591                       }
  3592                     else
  3593                       cpptoken = false;
  3594                   }
  3595               if (cpptoken)
  3596                 {
  3597                   definedef = dsharpseen;
  3598                   /* This is needed for tagging enum values: when there are
  3599                      preprocessor conditionals inside the enum, we need to
  3600                      reset the value of fvdef so that the next enum value is
  3601                      tagged even though the one before it did not end in a
  3602                      comma.  */
  3603                   if (fvdef == vignore && instruct && parlev == 0)
  3604                     {
  3605                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
  3606                         fvdef = fvnone;
  3607                     }
  3608                 }
  3609             } /* if (definedef == dnone) */
  3610           continue;
  3611         case '[':
  3612           bracketlev++;
  3613           continue;
  3614         default:
  3615           if (bracketlev > 0)
  3616             {
  3617               if (c == ']')
  3618                 --bracketlev;
  3619               else if (c == '\0')
  3620                 CNL_SAVE_DEFINEDEF ();
  3621               continue;
  3622             }
  3623           break;
  3624         } /* switch (c) */
  3625 
  3626 
  3627       /* Consider token only if some involved conditions are satisfied. */
  3628       if (typdef != tignore
  3629           && definedef != dignorerest
  3630           && fvdef != finlist
  3631           && templatelev == 0
  3632           && (definedef != dnone
  3633               || structdef != scolonseen)
  3634           && !inattribute
  3635           && !in_enum_bf)
  3636         {
  3637           if (midtoken)
  3638             {
  3639               if (endtoken (c))
  3640                 {
  3641                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
  3642                     /* This handles :: in the middle,
  3643                        but not at the beginning of an identifier.
  3644                        Also, space-separated :: is not recognized. */
  3645                     {
  3646                       if (c_ext & C_AUTO) /* automatic detection of C++ */
  3647                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
  3648                       lp += 2;
  3649                       toklen += 2;
  3650                       c = lp[-1];
  3651                       goto still_in_token;
  3652                     }
  3653                   else
  3654                     {
  3655                       bool funorvar = false;
  3656 
  3657                       if (yacc_rules
  3658                           || consider_token (newlb.buffer + tokoff, toklen, c,
  3659                                              &c_ext, bracelev, parlev,
  3660                                              &funorvar))
  3661                         {
  3662                           if (fvdef == foperator)
  3663                             {
  3664                               char *oldlp = lp;
  3665                               lp = skip_spaces (lp-1);
  3666                               if (*lp != '\0')
  3667                                 lp += 1;
  3668                               while (*lp != '\0'
  3669                                      && !c_isspace (*lp) && *lp != '(')
  3670                                 lp += 1;
  3671                               c = *lp++;
  3672                               toklen += lp - oldlp;
  3673                             }
  3674                           token.named = false;
  3675                           if (!plainc
  3676                               && nestlev > 0 && definedef == dnone)
  3677                             /* in struct body */
  3678                             {
  3679                               if (class_qualify)
  3680                                 {
  3681                                   write_classname (&token_name, qualifier);
  3682                                   ptrdiff_t len = token_name.len;
  3683                                   linebuffer_setlen (&token_name,
  3684                                                      len + qlen + toklen);
  3685                                   memcpyz (stpcpy (token_name.buffer + len,
  3686                                                    qualifier),
  3687                                            newlb.buffer + tokoff, toklen);
  3688                                 }
  3689                               else
  3690                                 {
  3691                                   linebuffer_setlen (&token_name, toklen);
  3692                                   memcpyz (token_name.buffer,
  3693                                            newlb.buffer + tokoff, toklen);
  3694                                 }
  3695                               token.named = true;
  3696                             }
  3697                           else if (objdef == ocatseen)
  3698                             /* Objective C category */
  3699                             {
  3700                               if (class_qualify)
  3701                                 {
  3702                                   ptrdiff_t len = strlen (objtag) + 2 + toklen;
  3703                                   linebuffer_setlen (&token_name, len);
  3704                                   char *p1 = stpcpy (token_name.buffer, objtag);
  3705                                   char *p2 = stpcpy (p1, "(");
  3706                                   char *p3 = mempcpy (p2, newlb.buffer + tokoff,
  3707                                                       toklen);
  3708                                   strcpy (p3, ")");
  3709                                 }
  3710                               else
  3711                                 {
  3712                                   linebuffer_setlen (&token_name, toklen);
  3713                                   memcpyz (token_name.buffer,
  3714                                            newlb.buffer + tokoff, toklen);
  3715                                 }
  3716                               token.named = true;
  3717                             }
  3718                           else if (objdef == omethodtag
  3719                                    || objdef == omethodparm)
  3720                             /* Objective C method */
  3721                             {
  3722                               token.named = true;
  3723                             }
  3724                           else if (fvdef == fdefunname)
  3725                             /* GNU DEFUN and similar macros */
  3726                             {
  3727                               bool defun = (newlb.buffer[tokoff] == 'F');
  3728                               ptrdiff_t off = tokoff;
  3729                               ptrdiff_t len = toklen;
  3730 
  3731                               if (defun)
  3732                                 {
  3733                                   off += 1;
  3734                                   len -= 1;
  3735 
  3736                                   /* First, tag it as its C name */
  3737                                   linebuffer_setlen (&token_name, toklen);
  3738                                   memcpyz (token_name.buffer,
  3739                                            newlb.buffer + tokoff, toklen);
  3740                                   token.named = true;
  3741                                   token.lineno = lineno;
  3742                                   token.offset = tokoff;
  3743                                   token.length = toklen;
  3744                                   token.line = newlb.buffer;
  3745                                   token.linepos = newlinepos;
  3746                                   token.valid = true;
  3747                                   make_C_tag (funorvar);
  3748                                 }
  3749                               /* Rewrite the tag so that emacs lisp DEFUNs
  3750                                  can be found also by their elisp name */
  3751                               linebuffer_setlen (&token_name, len);
  3752                               memcpyz (token_name.buffer,
  3753                                        newlb.buffer + off, len);
  3754                               if (defun)
  3755                                 while (--len >= 0)
  3756                                   if (token_name.buffer[len] == '_')
  3757                                     token_name.buffer[len] = '-';
  3758                               token.named = defun;
  3759                             }
  3760                           else
  3761                             {
  3762                               linebuffer_setlen (&token_name, toklen);
  3763                               memcpyz (token_name.buffer,
  3764                                        newlb.buffer + tokoff, toklen);
  3765                               /* Name macros and members. */
  3766                               token.named = (structdef == stagseen
  3767                                              || typdef == ttypeseen
  3768                                              || typdef == tend
  3769                                              || (funorvar
  3770                                                  && definedef == dignorerest)
  3771                                              || (funorvar
  3772                                                  && definedef == dnone
  3773                                                  && structdef == snone
  3774                                                  && bracelev > 0));
  3775                             }
  3776                           token.lineno = lineno;
  3777                           token.offset = tokoff;
  3778                           token.length = toklen;
  3779                           token.line = newlb.buffer;
  3780                           token.linepos = newlinepos;
  3781                           token.valid = true;
  3782 
  3783                           if (definedef == dnone
  3784                               && (fvdef == fvnameseen
  3785                                   || fvdef == foperator
  3786                                   || structdef == stagseen
  3787                                   || typdef == tend
  3788                                   || typdef == ttypeseen
  3789                                   || objdef != onone))
  3790                             {
  3791                               if (current_lb_is_new)
  3792                                 switch_line_buffers ();
  3793                             }
  3794                           else if (definedef != dnone
  3795                                    || fvdef == fdefunname
  3796                                    || instruct)
  3797                             make_C_tag (funorvar);
  3798                         }
  3799                       else /* not yacc and consider_token failed */
  3800                         {
  3801                           if (inattribute && fvdef == fignore)
  3802                             {
  3803                               /* We have just met __attribute__ after a
  3804                                  function parameter list: do not tag the
  3805                                  function again. */
  3806                               fvdef = fvnone;
  3807                             }
  3808                         }
  3809                       midtoken = false;
  3810                     }
  3811                 } /* if (endtoken (c)) */
  3812               else if (intoken (c))
  3813                 still_in_token:
  3814                 {
  3815                   toklen++;
  3816                   continue;
  3817                 }
  3818             } /* if (midtoken) */
  3819           else if (begtoken (c))
  3820             {
  3821               switch (definedef)
  3822                 {
  3823                 case dnone:
  3824                   switch (fvdef)
  3825                     {
  3826                     case fstartlist:
  3827                       /* This prevents tagging fb in
  3828                          void (__attribute__((noreturn)) *fb) (void);
  3829                          Fixing this is not easy and not very important. */
  3830                       fvdef = finlist;
  3831                       continue;
  3832                     case flistseen:
  3833                       if (plainc || declarations)
  3834                         {
  3835                           make_C_tag (true); /* a function */
  3836                           fvdef = fignore;
  3837                         }
  3838                       break;
  3839                     default:
  3840                       break;
  3841                     }
  3842                   if (structdef == stagseen && !cjava)
  3843                     {
  3844                       popclass_above (bracelev);
  3845                       structdef = snone;
  3846                     }
  3847                   break;
  3848                 case dsharpseen:
  3849                   savetoken = token;
  3850                   break;
  3851                 default:
  3852                   break;
  3853                 }
  3854               if (!yacc_rules || lp == newlb.buffer + 1)
  3855                 {
  3856                   tokoff = lp - 1 - newlb.buffer;
  3857                   toklen = 1;
  3858                   midtoken = true;
  3859                 }
  3860               continue;
  3861             } /* if (begtoken) */
  3862         } /* if must look at token */
  3863 
  3864 
  3865       /* Detect end of line, colon, comma, semicolon and various braces
  3866          after having handled a token.*/
  3867       switch (c)
  3868         {
  3869         case ':':
  3870           if (inattribute)
  3871             break;
  3872           if (yacc_rules && token.offset == 0 && token.valid)
  3873             {
  3874               make_C_tag (false); /* a yacc function */
  3875               break;
  3876             }
  3877           if (definedef != dnone)
  3878             break;
  3879           switch (objdef)
  3880             {
  3881             case otagseen:
  3882               objdef = oignore;
  3883               make_C_tag (true); /* an Objective C class */
  3884               break;
  3885             case omethodtag:
  3886             case omethodparm:
  3887               objdef = omethodcolon;
  3888               if (class_qualify)
  3889                 {
  3890                   ptrdiff_t toklen = token_name.len;
  3891                   linebuffer_setlen (&token_name, toklen + 1);
  3892                   strcpy (token_name.buffer + toklen, ":");
  3893                 }
  3894               break;
  3895             default:
  3896               break;
  3897             }
  3898           if (structdef == stagseen)
  3899             {
  3900               structdef = scolonseen;
  3901               break;
  3902             }
  3903           /* Should be useless, but may be work as a safety net. */
  3904           if (cplpl && fvdef == flistseen)
  3905             {
  3906               make_C_tag (true); /* a function */
  3907               fvdef = fignore;
  3908               break;
  3909             }
  3910           break;
  3911         case ';':
  3912           if (definedef != dnone || inattribute)
  3913             break;
  3914           switch (typdef)
  3915             {
  3916             case tend:
  3917             case ttypeseen:
  3918               make_C_tag (false); /* a typedef */
  3919               typdef = tnone;
  3920               fvdef = fvnone;
  3921               break;
  3922             case tnone:
  3923             case tinbody:
  3924             case tignore:
  3925               switch (fvdef)
  3926                 {
  3927                 case fignore:
  3928                   if (typdef == tignore || cplpl)
  3929                     fvdef = fvnone;
  3930                   break;
  3931                 case fvnameseen:
  3932                   if ((globals && bracelev == 0 && (!fvextern || declarations))
  3933                       || (members && instruct))
  3934                     make_C_tag (false); /* a variable */
  3935                   fvextern = false;
  3936                   fvdef = fvnone;
  3937                   token.valid = false;
  3938                   break;
  3939                 case flistseen:
  3940                   if ((declarations
  3941                        && (cplpl || !instruct)
  3942                        && (typdef == tnone || (typdef != tignore && instruct)))
  3943                       || (members
  3944                           && plainc && instruct))
  3945                     make_C_tag (true);  /* a function */
  3946                   FALLTHROUGH;
  3947                 default:
  3948                   fvextern = false;
  3949                   fvdef = fvnone;
  3950                   if (declarations
  3951                        && cplpl && structdef == stagseen)
  3952                     make_C_tag (false); /* forward declaration */
  3953                   else
  3954                     token.valid = false;
  3955                 } /* switch (fvdef) */
  3956               FALLTHROUGH;
  3957             default:
  3958               if (!instruct)
  3959                 typdef = tnone;
  3960             }
  3961           if (structdef == stagseen)
  3962             structdef = snone;
  3963           break;
  3964         case ',':
  3965           if (definedef != dnone || inattribute)
  3966             break;
  3967           switch (objdef)
  3968             {
  3969             case omethodtag:
  3970             case omethodparm:
  3971               make_C_tag (true); /* an Objective C method */
  3972               objdef = oinbody;
  3973               break;
  3974             default:
  3975               break;
  3976             }
  3977           switch (fvdef)
  3978             {
  3979             case fdefunkey:
  3980             case foperator:
  3981             case fstartlist:
  3982             case finlist:
  3983             case fignore:
  3984               break;
  3985             case vignore:
  3986               if (instruct && parlev == 0)
  3987                 fvdef = fvnone;
  3988               break;
  3989             case fdefunname:
  3990               fvdef = fignore;
  3991               break;
  3992             case fvnameseen:
  3993               if (parlev == 0
  3994                   && ((globals
  3995                        && bracelev == 0
  3996                        && templatelev == 0
  3997                        && (!fvextern || declarations))
  3998                       || (members && instruct)))
  3999                   make_C_tag (false); /* a variable */
  4000               break;
  4001             case flistseen:
  4002               if ((declarations && typdef == tnone && !instruct)
  4003                   || (members && typdef != tignore && instruct))
  4004                 {
  4005                   make_C_tag (true); /* a function */
  4006                   fvdef = fvnameseen;
  4007                 }
  4008               else if (!declarations)
  4009                 fvdef = fvnone;
  4010               token.valid = false;
  4011               break;
  4012             default:
  4013               fvdef = fvnone;
  4014             }
  4015           if (structdef == stagseen)
  4016             structdef = snone;
  4017           break;
  4018         case ']':
  4019           if (definedef != dnone || inattribute)
  4020             break;
  4021           if (structdef == stagseen)
  4022             structdef = snone;
  4023           switch (typdef)
  4024             {
  4025             case ttypeseen:
  4026             case tend:
  4027               typdef = tignore;
  4028               make_C_tag (false);       /* a typedef */
  4029               break;
  4030             case tnone:
  4031             case tinbody:
  4032               switch (fvdef)
  4033                 {
  4034                 case foperator:
  4035                 case finlist:
  4036                 case fignore:
  4037                 case vignore:
  4038                   break;
  4039                 case fvnameseen:
  4040                   if ((members && bracelev == 1)
  4041                       || (globals && bracelev == 0
  4042                           && (!fvextern || declarations)))
  4043                     make_C_tag (false); /* a variable */
  4044                   FALLTHROUGH;
  4045                 default:
  4046                   fvdef = fvnone;
  4047                 }
  4048               break;
  4049             default:
  4050               break;
  4051             }
  4052           break;
  4053         case '(':
  4054           if (inattribute)
  4055             {
  4056               attrparlev++;
  4057               break;
  4058             }
  4059           if (definedef != dnone)
  4060             break;
  4061           if (objdef == otagseen && parlev == 0)
  4062             objdef = oparenseen;
  4063           switch (fvdef)
  4064             {
  4065             case fvnameseen:
  4066               if (typdef == ttypeseen
  4067                   && *lp != '*'
  4068                   && !instruct)
  4069                 {
  4070                   /* This handles constructs like:
  4071                      typedef void OperatorFun (int fun); */
  4072                   make_C_tag (false);
  4073                   typdef = tignore;
  4074                   fvdef = fignore;
  4075                   break;
  4076                 }
  4077               FALLTHROUGH;
  4078             case foperator:
  4079               fvdef = fstartlist;
  4080               break;
  4081             case flistseen:
  4082               fvdef = finlist;
  4083               break;
  4084             default:
  4085               break;
  4086             }
  4087           parlev++;
  4088           break;
  4089         case ')':
  4090           if (inattribute)
  4091             {
  4092               if (--attrparlev == 0)
  4093                 inattribute = false;
  4094               break;
  4095             }
  4096           if (in_enum_bf)
  4097             {
  4098               if (--parlev == 0)
  4099                 in_enum_bf = false;
  4100               break;
  4101             }
  4102           if (definedef != dnone)
  4103             break;
  4104           if (objdef == ocatseen && parlev == 1)
  4105             {
  4106               make_C_tag (true); /* an Objective C category */
  4107               objdef = oignore;
  4108             }
  4109           if (--parlev == 0)
  4110             {
  4111               switch (fvdef)
  4112                 {
  4113                 case fstartlist:
  4114                 case finlist:
  4115                   fvdef = flistseen;
  4116                   break;
  4117                 default:
  4118                   break;
  4119                 }
  4120               if (!instruct
  4121                   && (typdef == tend
  4122                       || typdef == ttypeseen))
  4123                 {
  4124                   typdef = tignore;
  4125                   make_C_tag (false); /* a typedef */
  4126                 }
  4127             }
  4128           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
  4129             parlev = 0;
  4130           break;
  4131         case '{':
  4132           if (definedef != dnone)
  4133             break;
  4134           if (typdef == ttypeseen)
  4135             {
  4136               /* Whenever typdef is set to tinbody (currently only
  4137                  here), typdefbracelev should be set to bracelev. */
  4138               typdef = tinbody;
  4139               typdefbracelev = bracelev;
  4140             }
  4141           switch (fvdef)
  4142             {
  4143             case flistseen:
  4144               if (cplpl && !class_qualify)
  4145                 {
  4146                   /* Remove class and namespace qualifiers from the token,
  4147                      leaving only the method/member name.  */
  4148                   char *cc, *uqname = token_name.buffer;
  4149                   char *tok_end = token_name.buffer + token_name.len;
  4150 
  4151                   for (cc = token_name.buffer; cc < tok_end; cc++)
  4152                     {
  4153                       if (*cc == ':' && cc[1] == ':')
  4154                         {
  4155                           uqname = cc + 2;
  4156                           cc++;
  4157                         }
  4158                     }
  4159                   if (uqname > token_name.buffer)
  4160                     {
  4161                       ptrdiff_t uqlen = strlen (uqname);
  4162                       linebuffer_setlen (&token_name, uqlen);
  4163                       memmove (token_name.buffer, uqname, uqlen + 1);
  4164                     }
  4165                 }
  4166               make_C_tag (true);    /* a function */
  4167               FALLTHROUGH;
  4168             case fignore:
  4169               fvdef = fvnone;
  4170               break;
  4171             case fvnone:
  4172               switch (objdef)
  4173                 {
  4174                 case otagseen:
  4175                   make_C_tag (true); /* an Objective C class */
  4176                   objdef = oignore;
  4177                   break;
  4178                 case omethodtag:
  4179                 case omethodparm:
  4180                   make_C_tag (true); /* an Objective C method */
  4181                   objdef = oinbody;
  4182                   break;
  4183                 default:
  4184                   /* Neutralize `extern "C" {' grot. */
  4185                   if (bracelev == 0 && structdef == snone && nestlev == 0
  4186                       && typdef == tnone)
  4187                     bracelev = -1;
  4188                 }
  4189               break;
  4190             default:
  4191               break;
  4192             }
  4193           switch (structdef)
  4194             {
  4195             case skeyseen:         /* unnamed struct */
  4196               pushclass_above (bracelev, NULL, 0);
  4197               structdef = snone;
  4198               break;
  4199             case stagseen:         /* named struct or enum */
  4200             case scolonseen:       /* a class */
  4201               pushclass_above (bracelev,token.line+token.offset, token.length);
  4202               structdef = snone;
  4203               make_C_tag (false);  /* a struct or enum */
  4204               break;
  4205             default:
  4206               break;
  4207             }
  4208           bracelev += 1;
  4209           break;
  4210         case '*':
  4211           if (definedef != dnone)
  4212             break;
  4213           if (fvdef == fstartlist)
  4214             {
  4215               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
  4216               token.valid = false;
  4217             }
  4218           break;
  4219         case '}':
  4220           if (definedef != dnone)
  4221             break;
  4222           bracelev -= 1;
  4223           /* If we see a closing brace in column zero, and we weren't told to
  4224              ignore indentation, we assume this the final brace of a function
  4225              or struct definition, and reset bracelev to zero.  */
  4226           if (!ignoreindent && lp == newlb.buffer + 1)
  4227             {
  4228               if (bracelev != 0)
  4229                 token.valid = false; /* unexpected value, token unreliable */
  4230               bracelev = 0;     /* reset brace level if first column */
  4231               parlev = 0;       /* also reset paren level, just in case... */
  4232             }
  4233           else if (bracelev < 0)
  4234             {
  4235               token.valid = false; /* something gone amiss, token unreliable */
  4236               bracelev = 0;
  4237             }
  4238           if (bracelev == 0 && fvdef == vignore)
  4239             fvdef = fvnone;             /* end of function */
  4240           popclass_above (bracelev);
  4241           structdef = snone;
  4242           /* Only if typdef == tinbody is typdefbracelev significant. */
  4243           if (typdef == tinbody && bracelev <= typdefbracelev)
  4244             {
  4245               assert (bracelev == typdefbracelev);
  4246               typdef = tend;
  4247             }
  4248           break;
  4249         case '=':
  4250           if (definedef != dnone)
  4251             break;
  4252           switch (fvdef)
  4253             {
  4254             case foperator:
  4255             case finlist:
  4256             case fignore:
  4257             case vignore:
  4258               break;
  4259             case fvnameseen:
  4260               if ((members && bracelev == 1)
  4261                   || (globals && bracelev == 0 && (!fvextern || declarations)))
  4262                 make_C_tag (false); /* a variable */
  4263               FALLTHROUGH;
  4264             default:
  4265               fvdef = vignore;
  4266             }
  4267           break;
  4268         case '<':
  4269           if (cplpl
  4270               && (structdef == stagseen || fvdef == fvnameseen))
  4271             {
  4272               templatelev++;
  4273               break;
  4274             }
  4275           goto resetfvdef;
  4276         case '>':
  4277           if (templatelev > 0)
  4278             {
  4279               templatelev--;
  4280               break;
  4281             }
  4282           goto resetfvdef;
  4283         case '+':
  4284         case '-':
  4285           if (objdef == oinbody && bracelev == 0)
  4286             {
  4287               objdef = omethodsign;
  4288               break;
  4289             }
  4290           FALLTHROUGH;
  4291         case '#': case '~': case '&': case '%': case '/':
  4292         case '|': case '^': case '!': case '.': case '?':
  4293         resetfvdef:
  4294           if (definedef != dnone)
  4295             break;
  4296           /* These surely cannot follow a function tag in C. */
  4297           switch (fvdef)
  4298             {
  4299             case foperator:
  4300             case finlist:
  4301             case fignore:
  4302             case vignore:
  4303               break;
  4304             default:
  4305               fvdef = fvnone;
  4306             }
  4307           break;
  4308         case '\0':
  4309           if (objdef == otagseen)
  4310             {
  4311               make_C_tag (true); /* an Objective C class */
  4312               objdef = oignore;
  4313             }
  4314           /* If a macro spans multiple lines don't reset its state. */
  4315           if (quotednl)
  4316             CNL_SAVE_DEFINEDEF ();
  4317           else
  4318             CNL ();
  4319           break;
  4320         } /* switch (c) */
  4321 
  4322     } /* while not eof */
  4323 
  4324   free (lbs[0].lb.buffer);
  4325   free (lbs[1].lb.buffer);
  4326 }
  4327 
  4328 /*
  4329  * Process either a C++ file or a C file depending on the setting
  4330  * of a global flag.
  4331  */
  4332 static void
  4333 default_C_entries (FILE *inf)
  4334 {
  4335   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
  4336 }
  4337 
  4338 /* Always do plain C. */
  4339 static void
  4340 plain_C_entries (FILE *inf)
  4341 {
  4342   C_entries (0, inf);
  4343 }
  4344 
  4345 /* Always do C++. */
  4346 static void
  4347 Cplusplus_entries (FILE *inf)
  4348 {
  4349   C_entries (C_PLPL, inf);
  4350 }
  4351 
  4352 /* Always do Java. */
  4353 static void
  4354 Cjava_entries (FILE *inf)
  4355 {
  4356   C_entries (C_JAVA, inf);
  4357 }
  4358 
  4359 /* Always do C*. */
  4360 static void
  4361 Cstar_entries (FILE *inf)
  4362 {
  4363   C_entries (C_STAR, inf);
  4364 }
  4365 
  4366 /* Always do Yacc. */
  4367 static void
  4368 Yacc_entries (FILE *inf)
  4369 {
  4370   C_entries (YACC, inf);
  4371 }
  4372 
  4373 
  4374 /* Useful macros. */
  4375 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
  4376   while (perhaps_more_input (file_pointer)                              \
  4377          && (readline (&(line_buffer), file_pointer),                   \
  4378              (char_pointer) = (line_buffer).buffer,                     \
  4379              true))
  4380 
  4381 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
  4382   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
  4383    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
  4384    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
  4385    && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
  4386 
  4387 /* Similar to LOOKING_AT but does not use notinname, does not skip */
  4388 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
  4389   ((assert ("" kw), true) /* syntax error if not a literal string */    \
  4390    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
  4391    && ((cp) += sizeof (kw) - 1, true))          /* skip spaces */
  4392 
  4393 /*
  4394  * Read a file, but do no processing.  This is used to do regexp
  4395  * matching on files that have no language defined.
  4396  */
  4397 static void
  4398 just_read_file (FILE *inf)
  4399 {
  4400   while (perhaps_more_input (inf))
  4401     readline (&lb, inf);
  4402 }
  4403 
  4404 
  4405 /* Fortran parsing */
  4406 
  4407 static void F_takeprec (void);
  4408 static void F_getit (FILE *);
  4409 
  4410 static void
  4411 F_takeprec (void)
  4412 {
  4413   dbp = skip_spaces (dbp);
  4414   if (*dbp != '*')
  4415     return;
  4416   dbp++;
  4417   dbp = skip_spaces (dbp);
  4418   if (strneq (dbp, "(*)", 3))
  4419     {
  4420       dbp += 3;
  4421       return;
  4422     }
  4423   if (!c_isdigit (*dbp))
  4424     {
  4425       --dbp;                    /* force failure */
  4426       return;
  4427     }
  4428   do
  4429     dbp++;
  4430   while (c_isdigit (*dbp));
  4431 }
  4432 
  4433 static void
  4434 F_getit (FILE *inf)
  4435 {
  4436   register char *cp;
  4437 
  4438   dbp = skip_spaces (dbp);
  4439   if (*dbp == '\0')
  4440     {
  4441       readline (&lb, inf);
  4442       dbp = lb.buffer;
  4443       if (dbp[5] != '&')
  4444         return;
  4445       dbp += 6;
  4446       dbp = skip_spaces (dbp);
  4447     }
  4448   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
  4449     return;
  4450   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
  4451     continue;
  4452   make_tag (dbp, cp-dbp, true,
  4453             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4454 }
  4455 
  4456 
  4457 static void
  4458 Fortran_functions (FILE *inf)
  4459 {
  4460   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  4461     {
  4462       if (*dbp == '%')
  4463         dbp++;                  /* Ratfor escape to fortran */
  4464       dbp = skip_spaces (dbp);
  4465       if (*dbp == '\0')
  4466         continue;
  4467 
  4468       if (LOOKING_AT_NOCASE (dbp, "recursive"))
  4469         dbp = skip_spaces (dbp);
  4470 
  4471       if (LOOKING_AT_NOCASE (dbp, "pure"))
  4472         dbp = skip_spaces (dbp);
  4473 
  4474       if (LOOKING_AT_NOCASE (dbp, "elemental"))
  4475         dbp = skip_spaces (dbp);
  4476 
  4477       switch (c_tolower (*dbp))
  4478         {
  4479         case 'i':
  4480           if (nocase_tail ("integer"))
  4481             F_takeprec ();
  4482           break;
  4483         case 'r':
  4484           if (nocase_tail ("real"))
  4485             F_takeprec ();
  4486           break;
  4487         case 'l':
  4488           if (nocase_tail ("logical"))
  4489             F_takeprec ();
  4490           break;
  4491         case 'c':
  4492           if (nocase_tail ("complex") || nocase_tail ("character"))
  4493             F_takeprec ();
  4494           break;
  4495         case 'd':
  4496           if (nocase_tail ("double"))
  4497             {
  4498               dbp = skip_spaces (dbp);
  4499               if (*dbp == '\0')
  4500                 continue;
  4501               if (nocase_tail ("precision"))
  4502                 break;
  4503               continue;
  4504             }
  4505           break;
  4506         }
  4507       dbp = skip_spaces (dbp);
  4508       if (*dbp == '\0')
  4509         continue;
  4510       switch (c_tolower (*dbp))
  4511         {
  4512         case 'f':
  4513           if (nocase_tail ("function"))
  4514             F_getit (inf);
  4515           continue;
  4516         case 's':
  4517           if (nocase_tail ("subroutine"))
  4518             F_getit (inf);
  4519           continue;
  4520         case 'e':
  4521           if (nocase_tail ("entry"))
  4522             F_getit (inf);
  4523           continue;
  4524         case 'b':
  4525           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
  4526             {
  4527               dbp = skip_spaces (dbp);
  4528               if (*dbp == '\0') /* assume un-named */
  4529                 make_tag ("blockdata", 9, true,
  4530                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
  4531               else
  4532                 F_getit (inf);  /* look for name */
  4533             }
  4534           continue;
  4535         }
  4536     }
  4537 }
  4538 
  4539 
  4540 /*
  4541  * Go language support
  4542  * Original code by Xi Lu <lx@shellcodes.org> (2016)
  4543  */
  4544 static void
  4545 Go_functions(FILE *inf)
  4546 {
  4547   char *cp, *name;
  4548 
  4549   LOOP_ON_INPUT_LINES(inf, lb, cp)
  4550     {
  4551       cp = skip_spaces (cp);
  4552 
  4553       if (LOOKING_AT (cp, "package"))
  4554         {
  4555           name = cp;
  4556           while (!notinname (*cp) && *cp != '\0')
  4557             cp++;
  4558           make_tag (name, cp - name, false, lb.buffer,
  4559                     cp - lb.buffer + 1, lineno, linecharno);
  4560         }
  4561       else if (LOOKING_AT (cp, "func"))
  4562         {
  4563           /* Go implementation of interface, such as:
  4564              func (n *Integer) Add(m Integer) ...
  4565              skip `(n *Integer)` part.
  4566           */
  4567           if (*cp == '(')
  4568             {
  4569               while (*cp != ')')
  4570                 cp++;
  4571               cp = skip_spaces (cp+1);
  4572             }
  4573 
  4574           if (*cp)
  4575             {
  4576               name = cp;
  4577 
  4578               while (!notinname (*cp))
  4579                 cp++;
  4580 
  4581               make_tag (name, cp - name, true, lb.buffer,
  4582                         cp - lb.buffer + 1, lineno, linecharno);
  4583             }
  4584         }
  4585       else if (members && LOOKING_AT (cp, "type"))
  4586         {
  4587           name = cp;
  4588 
  4589           /* Ignore the likes of the following:
  4590              type (
  4591                     A
  4592              )
  4593            */
  4594           if (*cp == '(')
  4595             return;
  4596 
  4597           while (!notinname (*cp) && *cp != '\0')
  4598             cp++;
  4599 
  4600           make_tag (name, cp - name, false, lb.buffer,
  4601                     cp - lb.buffer + 1, lineno, linecharno);
  4602         }
  4603     }
  4604 }
  4605 
  4606 
  4607 /*
  4608  * Ada parsing
  4609  * Original code by
  4610  * Philippe Waroquiers (1998)
  4611  */
  4612 
  4613 /* Once we are positioned after an "interesting" keyword, let's get
  4614    the real tag value necessary. */
  4615 static void
  4616 Ada_getit (FILE *inf, const char *name_qualifier)
  4617 {
  4618   register char *cp;
  4619   char *name;
  4620   char c;
  4621 
  4622   while (perhaps_more_input (inf))
  4623     {
  4624       dbp = skip_spaces (dbp);
  4625       if (*dbp == '\0'
  4626           || (dbp[0] == '-' && dbp[1] == '-'))
  4627         {
  4628           readline (&lb, inf);
  4629           dbp = lb.buffer;
  4630         }
  4631       switch (c_tolower (*dbp))
  4632         {
  4633         case 'b':
  4634           if (nocase_tail ("body"))
  4635             {
  4636               /* Skipping body of   procedure body   or   package body or ....
  4637                  resetting qualifier to body instead of spec. */
  4638               name_qualifier = "/b";
  4639               continue;
  4640             }
  4641           break;
  4642         case 't':
  4643           /* Skipping type of   task type   or   protected type ... */
  4644           if (nocase_tail ("type"))
  4645             continue;
  4646           break;
  4647         }
  4648       if (*dbp == '"')
  4649         {
  4650           dbp += 1;
  4651           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
  4652             continue;
  4653         }
  4654       else
  4655         {
  4656           dbp = skip_spaces (dbp);
  4657           for (cp = dbp;
  4658                c_isalnum (*cp) || *cp == '_' || *cp == '.';
  4659                cp++)
  4660             continue;
  4661           if (cp == dbp)
  4662             return;
  4663         }
  4664       c = *cp;
  4665       *cp = '\0';
  4666       name = concat (dbp, name_qualifier, "");
  4667       *cp = c;
  4668       make_tag (name, strlen (name), true,
  4669                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4670       free (name);
  4671       if (c == '"')
  4672         dbp = cp + 1;
  4673       return;
  4674     }
  4675 }
  4676 
  4677 static void
  4678 Ada_funcs (FILE *inf)
  4679 {
  4680   bool inquote = false;
  4681   bool skip_till_semicolumn = false;
  4682 
  4683   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  4684     {
  4685       while (*dbp != '\0')
  4686         {
  4687           /* Skip a string i.e. "abcd". */
  4688           if (inquote || (*dbp == '"'))
  4689             {
  4690               dbp = strchr (dbp + !inquote, '"');
  4691               if (dbp != NULL)
  4692                 {
  4693                   inquote = false;
  4694                   dbp += 1;
  4695                   continue;     /* advance char */
  4696                 }
  4697               else
  4698                 {
  4699                   inquote = true;
  4700                   break;        /* advance line */
  4701                 }
  4702             }
  4703 
  4704           /* Skip comments. */
  4705           if (dbp[0] == '-' && dbp[1] == '-')
  4706             break;              /* advance line */
  4707 
  4708           /* Skip character enclosed in single quote i.e. 'a'
  4709              and skip single quote starting an attribute i.e. 'Image. */
  4710           if (*dbp == '\'')
  4711             {
  4712               dbp++ ;
  4713               if (*dbp != '\0')
  4714                 dbp++;
  4715               continue;
  4716             }
  4717 
  4718           if (skip_till_semicolumn)
  4719             {
  4720               if (*dbp == ';')
  4721                 skip_till_semicolumn = false;
  4722               dbp++;
  4723               continue;         /* advance char */
  4724             }
  4725 
  4726           /* Search for beginning of a token.  */
  4727           if (!begtoken (*dbp))
  4728             {
  4729               dbp++;
  4730               continue;         /* advance char */
  4731             }
  4732 
  4733           /* We are at the beginning of a token. */
  4734           switch (c_tolower (*dbp))
  4735             {
  4736             case 'f':
  4737               if (!packages_only && nocase_tail ("function"))
  4738                 Ada_getit (inf, "/f");
  4739               else
  4740                 break;          /* from switch */
  4741               continue;         /* advance char */
  4742             case 'p':
  4743               if (!packages_only && nocase_tail ("procedure"))
  4744                 Ada_getit (inf, "/p");
  4745               else if (nocase_tail ("package"))
  4746                 Ada_getit (inf, "/s");
  4747               else if (nocase_tail ("protected")) /* protected type */
  4748                 Ada_getit (inf, "/t");
  4749               else
  4750                 break;          /* from switch */
  4751               continue;         /* advance char */
  4752 
  4753             case 'u':
  4754               if (typedefs && !packages_only && nocase_tail ("use"))
  4755                 {
  4756                   /* when tagging types, avoid tagging  use type Pack.Typename;
  4757                      for this, we will skip everything till a ; */
  4758                   skip_till_semicolumn = true;
  4759                   continue;     /* advance char */
  4760                 }
  4761 
  4762             case 't':
  4763               if (!packages_only && nocase_tail ("task"))
  4764                 Ada_getit (inf, "/k");
  4765               else if (typedefs && !packages_only && nocase_tail ("type"))
  4766                 {
  4767                   Ada_getit (inf, "/t");
  4768                   while (*dbp != '\0')
  4769                     dbp += 1;
  4770                 }
  4771               else
  4772                 break;          /* from switch */
  4773               continue;         /* advance char */
  4774             }
  4775 
  4776           /* Look for the end of the token. */
  4777           while (!endtoken (*dbp))
  4778             dbp++;
  4779 
  4780         } /* advance char */
  4781     } /* advance line */
  4782 }
  4783 
  4784 
  4785 /*
  4786  * Unix and microcontroller assembly tag handling
  4787  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
  4788  * Idea by Bob Weiner, Motorola Inc. (1994)
  4789  */
  4790 static void
  4791 Asm_labels (FILE *inf)
  4792 {
  4793   register char *cp;
  4794 
  4795   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4796     {
  4797       /* If first char is alphabetic or one of [_.$], test for colon
  4798          following identifier. */
  4799       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
  4800         {
  4801           /* Read past label. */
  4802           cp++;
  4803           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
  4804             cp++;
  4805           if (*cp == ':' || c_isspace (*cp))
  4806             /* Found end of label, so copy it and add it to the table. */
  4807             make_tag (lb.buffer, cp - lb.buffer, true,
  4808                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4809         }
  4810     }
  4811 }
  4812 
  4813 
  4814 /*
  4815  * Perl support
  4816  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
  4817  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
  4818  * Perl variable names: /^(my|local).../
  4819  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
  4820  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
  4821  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
  4822  */
  4823 static void
  4824 Perl_functions (FILE *inf)
  4825 {
  4826   char *package = savestr ("main"); /* current package name */
  4827   register char *cp;
  4828 
  4829   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4830     {
  4831       cp = skip_spaces (cp);
  4832 
  4833       if (LOOKING_AT (cp, "package"))
  4834         {
  4835           free (package);
  4836           get_tag (cp, &package);
  4837         }
  4838       else if (LOOKING_AT (cp, "sub"))
  4839         {
  4840           char *pos, *sp;
  4841 
  4842         subr:
  4843           sp = cp;
  4844           while (!notinname (*cp))
  4845             cp++;
  4846           if (cp == sp)
  4847             continue;           /* nothing found */
  4848           pos = strchr (sp, ':');
  4849           if (pos && pos < cp && pos[1] == ':')
  4850             {
  4851               /* The name is already qualified. */
  4852               if (!class_qualify)
  4853                 {
  4854                   char *q = pos + 2, *qpos;
  4855                   while ((qpos = strchr (q, ':')) != NULL
  4856                          && qpos < cp
  4857                          && qpos[1] == ':')
  4858                     q = qpos + 2;
  4859                   sp = q;
  4860                 }
  4861               make_tag (sp, cp - sp, true,
  4862                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4863             }
  4864           else if (class_qualify)
  4865             /* Qualify it. */
  4866             {
  4867               char savechar, *name;
  4868 
  4869               savechar = *cp;
  4870               *cp = '\0';
  4871               name = concat (package, "::", sp);
  4872               *cp = savechar;
  4873               make_tag (name, strlen (name), true,
  4874                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4875               free (name);
  4876             }
  4877           else
  4878             make_tag (sp, cp - sp, true,
  4879                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4880         }
  4881       else if (LOOKING_AT (cp, "use constant")
  4882                || LOOKING_AT (cp, "use constant::defer"))
  4883         {
  4884           /* For hash style multi-constant like
  4885                 use constant { FOO => 123,
  4886                                BAR => 456 };
  4887              only the first FOO is picked up.  Parsing across the value
  4888              expressions would be difficult in general, due to possible nested
  4889              hashes, here-documents, etc.  */
  4890           if (*cp == '{')
  4891             cp = skip_spaces (cp+1);
  4892           goto subr;
  4893         }
  4894       else if (globals) /* only if we are tagging global vars */
  4895         {
  4896           /* Skip a qualifier, if any. */
  4897           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
  4898           /* After "my" or "local", but before any following paren or space. */
  4899           char *varstart = cp;
  4900 
  4901           if (qual              /* should this be removed?  If yes, how? */
  4902               && (*cp == '$' || *cp == '@' || *cp == '%'))
  4903             {
  4904               varstart += 1;
  4905               do
  4906                 cp++;
  4907               while (c_isalnum (*cp) || *cp == '_');
  4908             }
  4909           else if (qual)
  4910             {
  4911               /* Should be examining a variable list at this point;
  4912                  could insist on seeing an open parenthesis. */
  4913               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
  4914                 cp++;
  4915             }
  4916           else
  4917             continue;
  4918 
  4919           make_tag (varstart, cp - varstart, false,
  4920                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4921         }
  4922     }
  4923   free (package);
  4924 }
  4925 
  4926 
  4927 /*
  4928  * Python support
  4929  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
  4930  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
  4931  * More ideas by seb bacon <seb@jamkit.com> (2002)
  4932  */
  4933 static void
  4934 Python_functions (FILE *inf)
  4935 {
  4936   register char *cp;
  4937 
  4938   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4939     {
  4940       cp = skip_spaces (cp);
  4941       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
  4942         {
  4943           char *name = cp;
  4944           while (!notinname (*cp) && *cp != ':')
  4945             cp++;
  4946           make_tag (name, cp - name, true,
  4947                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4948         }
  4949     }
  4950 }
  4951 
  4952 /*
  4953  * Ruby support
  4954  * Original code by Xi Lu <lx@shellcodes.org> (2015)
  4955  */
  4956 static void
  4957 Ruby_functions (FILE *inf)
  4958 {
  4959   char *cp = NULL;
  4960   bool reader = false, writer = false, alias = false, continuation = false;
  4961 
  4962   LOOP_ON_INPUT_LINES (inf, lb, cp)
  4963     {
  4964       bool is_class = false;
  4965       bool is_method = false;
  4966       char *name;
  4967 
  4968       cp = skip_spaces (cp);
  4969       if (!continuation
  4970           /* Constants.  */
  4971           && c_isalpha (*cp) && c_isupper (*cp))
  4972         {
  4973           char *bp, *colon = NULL;
  4974 
  4975           name = cp;
  4976 
  4977           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
  4978             {
  4979               if (*cp == ':')
  4980                 colon = cp;
  4981             }
  4982           if (cp > name + 1)
  4983             {
  4984               bp = skip_spaces (cp);
  4985               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
  4986                 {
  4987                   if (colon && !c_isspace (colon[1]))
  4988                     name = colon + 1;
  4989                   make_tag (name, cp - name, false,
  4990                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  4991                 }
  4992             }
  4993         }
  4994       else if (!continuation
  4995                /* Modules, classes, methods.  */
  4996                && ((is_method = LOOKING_AT (cp, "def"))
  4997                    || (is_class = LOOKING_AT (cp, "class"))
  4998                    || LOOKING_AT (cp, "module")))
  4999         {
  5000           const char self_name[] = "self.";
  5001           const size_t self_size1 = sizeof (self_name) - 1;
  5002 
  5003           name = cp;
  5004 
  5005          /* Ruby method names can end in a '='.  Also, operator overloading can
  5006             define operators whose names include '='.  */
  5007           while (!notinname (*cp) || *cp == '=')
  5008             cp++;
  5009 
  5010           /* Remove "self." from the method name.  */
  5011           if (cp - name > self_size1
  5012               && strneq (name, self_name, self_size1))
  5013             name += self_size1;
  5014 
  5015           /* Remove the class/module qualifiers from method names.  */
  5016           if (is_method)
  5017             {
  5018               char *q;
  5019 
  5020               for (q = name; q < cp && *q != '.'; q++)
  5021                 ;
  5022               if (q < cp - 1)   /* punt if we see just "FOO." */
  5023                 name = q + 1;
  5024             }
  5025 
  5026           /* Don't tag singleton classes.  */
  5027           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
  5028             continue;
  5029 
  5030           make_tag (name, cp - name, true,
  5031                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5032         }
  5033       else
  5034         {
  5035           /* Tag accessors and aliases.  */
  5036 
  5037           if (!continuation)
  5038             reader = writer = alias = false;
  5039 
  5040           while (*cp && *cp != '#')
  5041             {
  5042               if (!continuation)
  5043                 {
  5044                   reader = writer = alias = false;
  5045                   if (LOOKING_AT (cp, "attr_reader"))
  5046                     reader = true;
  5047                   else if (LOOKING_AT (cp, "attr_writer"))
  5048                     writer = true;
  5049                   else if (LOOKING_AT (cp, "attr_accessor"))
  5050                     {
  5051                       reader = true;
  5052                       writer = true;
  5053                     }
  5054                   else if (LOOKING_AT (cp, "alias_method"))
  5055                     alias = true;
  5056                 }
  5057               if (reader || writer || alias)
  5058                 {
  5059                   do {
  5060                     char *np;
  5061 
  5062                     cp = skip_spaces (cp);
  5063                     if (*cp == '(')
  5064                       cp = skip_spaces (cp + 1);
  5065                     np = cp;
  5066                     cp = skip_name (cp);
  5067                     if (*np != ':')
  5068                       continue;
  5069                     np++;
  5070                     if (reader)
  5071                       {
  5072                         make_tag (np, cp - np, true,
  5073                                   lb.buffer, cp - lb.buffer + 1,
  5074                                   lineno, linecharno);
  5075                         continuation = false;
  5076                       }
  5077                     if (writer)
  5078                       {
  5079                         size_t name_len = cp - np + 1;
  5080                         char *wr_name = xnew (name_len + 1, char);
  5081 
  5082                         strcpy (mempcpy (wr_name, np, name_len - 1), "=");
  5083                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
  5084                                 lineno, linecharno);
  5085                         if (debug)
  5086                           fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n", wr_name,
  5087                                    curfdp->taggedfname, lineno, lb.buffer);
  5088                         continuation = false;
  5089                       }
  5090                     if (alias)
  5091                       {
  5092                         if (!continuation)
  5093                           make_tag (np, cp - np, true,
  5094                                     lb.buffer, cp - lb.buffer + 1,
  5095                                     lineno, linecharno);
  5096                         continuation = false;
  5097                         while (*cp && *cp != '#' && *cp != ';')
  5098                           {
  5099                             if (*cp == ',')
  5100                               continuation = true;
  5101                             else if (!c_isspace (*cp))
  5102                               continuation = false;
  5103                             cp++;
  5104                           }
  5105                         if (*cp == ';')
  5106                           continuation = false;
  5107                       }
  5108                     cp = skip_spaces (cp);
  5109                   } while ((alias
  5110                             ? (*cp == ',')
  5111                             : (continuation = (*cp == ',')))
  5112                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
  5113                 }
  5114               if (*cp != '#')
  5115                 cp = skip_name (cp);
  5116               while (*cp && *cp != '#' && notinname (*cp))
  5117                 cp++;
  5118             }
  5119         }
  5120     }
  5121 }
  5122 
  5123 
  5124 /*
  5125  * Rust support
  5126  * Look for:
  5127  *  - fn: Function
  5128  *  - struct: Structure
  5129  *  - enum: Enumeration
  5130  *  - macro_rules!: Macro
  5131  */
  5132 static void
  5133 Rust_entries (FILE *inf)
  5134 {
  5135   char *cp, *name;
  5136   bool is_func = false;
  5137 
  5138   LOOP_ON_INPUT_LINES(inf, lb, cp)
  5139     {
  5140       cp = skip_spaces(cp);
  5141       name = cp;
  5142 
  5143       // Skip 'pub' keyworld
  5144       (void)LOOKING_AT (cp, "pub");
  5145 
  5146       // Look for define
  5147       if ((is_func = LOOKING_AT (cp, "fn"))
  5148           || LOOKING_AT (cp, "enum")
  5149           || LOOKING_AT (cp, "struct")
  5150           || (is_func = LOOKING_AT (cp, "macro_rules!")))
  5151         {
  5152           cp = skip_spaces (cp);
  5153           name = cp;
  5154 
  5155           while (!notinname (*cp))
  5156             cp++;
  5157 
  5158           make_tag (name, cp - name, is_func,
  5159                     lb.buffer, cp - lb.buffer + 1,
  5160                     lineno, linecharno);
  5161           is_func = false;
  5162         }
  5163     }
  5164 }
  5165 
  5166 
  5167 /*
  5168  * PHP support
  5169  * Look for:
  5170  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
  5171  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
  5172  *  - /^[ \t]*define\(\"[^\"]+/
  5173  * Only with --members:
  5174  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
  5175  * Idea by Diez B. Roggisch (2001)
  5176  */
  5177 static void
  5178 PHP_functions (FILE *inf)
  5179 {
  5180   char *cp, *name;
  5181   bool search_identifier = false;
  5182 
  5183   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5184     {
  5185       cp = skip_spaces (cp);
  5186       name = cp;
  5187       if (search_identifier
  5188           && *cp != '\0')
  5189         {
  5190           while (!notinname (*cp))
  5191             cp++;
  5192           make_tag (name, cp - name, true,
  5193                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5194           search_identifier = false;
  5195         }
  5196       else if (LOOKING_AT (cp, "function"))
  5197         {
  5198           if (*cp == '&')
  5199             cp = skip_spaces (cp+1);
  5200           if (*cp != '\0')
  5201             {
  5202               name = cp;
  5203               while (!notinname (*cp))
  5204                 cp++;
  5205               make_tag (name, cp - name, true,
  5206                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5207             }
  5208           else
  5209             search_identifier = true;
  5210         }
  5211       else if (LOOKING_AT (cp, "class"))
  5212         {
  5213           if (*cp != '\0')
  5214             {
  5215               name = cp;
  5216               while (*cp != '\0' && !c_isspace (*cp))
  5217                 cp++;
  5218               make_tag (name, cp - name, false,
  5219                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5220             }
  5221           else
  5222             search_identifier = true;
  5223         }
  5224       else if (strneq (cp, "define", 6)
  5225                && (cp = skip_spaces (cp+6))
  5226                && *cp++ == '('
  5227                && (*cp == '"' || *cp == '\''))
  5228         {
  5229           char quote = *cp++;
  5230           name = cp;
  5231           while (*cp != quote && *cp != '\0')
  5232             cp++;
  5233           make_tag (name, cp - name, false,
  5234                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5235         }
  5236       else if (members
  5237                && LOOKING_AT (cp, "var")
  5238                && *cp == '$')
  5239         {
  5240           name = cp;
  5241           while (!notinname (*cp))
  5242             cp++;
  5243           make_tag (name, cp - name, false,
  5244                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5245         }
  5246     }
  5247 }
  5248 
  5249 
  5250 /*
  5251  * Cobol tag functions
  5252  * We could look for anything that could be a paragraph name.
  5253  * i.e. anything that starts in column 8 is one word and ends in a full stop.
  5254  * Idea by Corny de Souza (1993)
  5255  */
  5256 static void
  5257 Cobol_paragraphs (FILE *inf)
  5258 {
  5259   register char *bp, *ep;
  5260 
  5261   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5262     {
  5263       if (lb.len < 9)
  5264         continue;
  5265       bp += 8;
  5266 
  5267       /* If eoln, compiler option or comment ignore whole line. */
  5268       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
  5269         continue;
  5270 
  5271       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
  5272         continue;
  5273       if (*ep++ == '.')
  5274         make_tag (bp, ep - bp, true,
  5275                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
  5276     }
  5277 }
  5278 
  5279 
  5280 /*
  5281  * Makefile support
  5282  * Ideas by Assar Westerlund <assar@sics.se> (2001)
  5283  */
  5284 static void
  5285 Makefile_targets (FILE *inf)
  5286 {
  5287   register char *bp;
  5288 
  5289   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5290     {
  5291       if (*bp == '\t' || *bp == '#')
  5292         continue;
  5293       while (*bp != '\0' && *bp != '=' && *bp != ':')
  5294         bp++;
  5295       if (*bp == ':' || (globals && *bp == '='))
  5296         {
  5297           /* We should detect if there is more than one tag, but we do not.
  5298              We just skip initial and final spaces. */
  5299           char * namestart = skip_spaces (lb.buffer);
  5300           while (--bp > namestart)
  5301             if (!notinname (*bp))
  5302               break;
  5303           make_tag (namestart, bp - namestart + 1, true,
  5304                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
  5305         }
  5306     }
  5307 }
  5308 
  5309 
  5310 /*
  5311  * Pascal parsing
  5312  * Original code by Mosur K. Mohan (1989)
  5313  *
  5314  *  Locates tags for procedures & functions.  Doesn't do any type- or
  5315  *  var-definitions.  It does look for the keyword "extern" or
  5316  *  "forward" immediately following the procedure statement; if found,
  5317  *  the tag is skipped.
  5318  */
  5319 static void
  5320 Pascal_functions (FILE *inf)
  5321 {
  5322   linebuffer tline;             /* mostly copied from C_entries */
  5323   intmax_t save_lcno, save_lineno;
  5324   ptrdiff_t namelen, taglen;
  5325   char c, *name;
  5326 
  5327   bool                          /* each of these flags is true if: */
  5328     incomment,                  /* point is inside a comment */
  5329     inquote,                    /* point is inside '..' string */
  5330     get_tagname,                /* point is after PROCEDURE/FUNCTION
  5331                                    keyword, so next item = potential tag */
  5332     found_tag,                  /* point is after a potential tag */
  5333     inparms,                    /* point is within parameter-list */
  5334     verify_tag;                 /* point has passed the parm-list, so the
  5335                                    next token will determine whether this
  5336                                    is a FORWARD/EXTERN to be ignored, or
  5337                                    whether it is a real tag */
  5338 
  5339   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
  5340   name = NULL;                  /* keep compiler quiet */
  5341   dbp = lb.buffer;
  5342   *dbp = '\0';
  5343   linebuffer_init (&tline);
  5344 
  5345   incomment = inquote = false;
  5346   found_tag = false;            /* have a proc name; check if extern */
  5347   get_tagname = false;          /* found "procedure" keyword         */
  5348   inparms = false;              /* found '(' after "proc"            */
  5349   verify_tag = false;           /* check if "extern" is ahead        */
  5350 
  5351 
  5352   while (perhaps_more_input (inf)) /* long main loop to get next char */
  5353     {
  5354       c = *dbp++;
  5355       if (c == '\0')            /* if end of line */
  5356         {
  5357           readline (&lb, inf);
  5358           dbp = lb.buffer;
  5359           if (*dbp == '\0')
  5360             continue;
  5361           if (!((found_tag && verify_tag)
  5362                 || get_tagname))
  5363             c = *dbp++;         /* only if don't need *dbp pointing
  5364                                    to the beginning of the name of
  5365                                    the procedure or function */
  5366         }
  5367       if (incomment)
  5368         {
  5369           if (c == '}')         /* within { } comments */
  5370             incomment = false;
  5371           else if (c == '*' && *dbp == ')') /* within (* *) comments */
  5372             {
  5373               dbp++;
  5374               incomment = false;
  5375             }
  5376           continue;
  5377         }
  5378       else if (inquote)
  5379         {
  5380           if (c == '\'')
  5381             inquote = false;
  5382           continue;
  5383         }
  5384       else
  5385         switch (c)
  5386           {
  5387           case '\'':
  5388             inquote = true;     /* found first quote */
  5389             continue;
  5390           case '{':             /* found open { comment */
  5391             incomment = true;
  5392             continue;
  5393           case '(':
  5394             if (*dbp == '*')    /* found open (* comment */
  5395               {
  5396                 incomment = true;
  5397                 dbp++;
  5398               }
  5399             else if (found_tag) /* found '(' after tag, i.e., parm-list */
  5400               inparms = true;
  5401             continue;
  5402           case ')':             /* end of parms list */
  5403             if (inparms)
  5404               inparms = false;
  5405             continue;
  5406           case ';':
  5407             if (found_tag && !inparms) /* end of proc or fn stmt */
  5408               {
  5409                 verify_tag = true;
  5410                 break;
  5411               }
  5412             continue;
  5413           }
  5414       if (found_tag && verify_tag && (*dbp != ' '))
  5415         {
  5416           /* Check if this is an "extern" declaration. */
  5417           if (*dbp == '\0')
  5418             continue;
  5419           if (c_tolower (*dbp) == 'e')
  5420             {
  5421               if (nocase_tail ("extern")) /* superfluous, really! */
  5422                 {
  5423                   found_tag = false;
  5424                   verify_tag = false;
  5425                 }
  5426             }
  5427           else if (c_tolower (*dbp) == 'f')
  5428             {
  5429               if (nocase_tail ("forward")) /* check for forward reference */
  5430                 {
  5431                   found_tag = false;
  5432                   verify_tag = false;
  5433                 }
  5434             }
  5435           if (found_tag && verify_tag) /* not external proc, so make tag */
  5436             {
  5437               found_tag = false;
  5438               verify_tag = false;
  5439               make_tag (name, namelen, true,
  5440                         tline.buffer, taglen, save_lineno, save_lcno);
  5441               continue;
  5442             }
  5443         }
  5444       if (get_tagname)          /* grab name of proc or fn */
  5445         {
  5446           char *cp;
  5447 
  5448           if (*dbp == '\0')
  5449             continue;
  5450 
  5451           /* Find block name. */
  5452           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
  5453             continue;
  5454 
  5455           /* Save all values for later tagging. */
  5456           linebuffer_setlen (&tline, lb.len);
  5457           strcpy (tline.buffer, lb.buffer);
  5458           save_lineno = lineno;
  5459           save_lcno = linecharno;
  5460           name = tline.buffer + (dbp - lb.buffer);
  5461           namelen = cp - dbp;
  5462           taglen = cp - lb.buffer + 1;
  5463 
  5464           dbp = cp;             /* set dbp to e-o-token */
  5465           get_tagname = false;
  5466           found_tag = true;
  5467           continue;
  5468 
  5469           /* And proceed to check for "extern". */
  5470         }
  5471       else if (!incomment && !inquote && !found_tag)
  5472         {
  5473           /* Check for proc/fn keywords. */
  5474           switch (c_tolower (c))
  5475             {
  5476             case 'p':
  5477               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
  5478                 get_tagname = true;
  5479               continue;
  5480             case 'f':
  5481               if (nocase_tail ("unction"))
  5482                 get_tagname = true;
  5483               continue;
  5484             }
  5485         }
  5486     } /* while not eof */
  5487 
  5488   free (tline.buffer);
  5489 }
  5490 
  5491 
  5492 /*
  5493  * Lisp tag functions
  5494  *  look for (def or (DEF, quote or QUOTE
  5495  */
  5496 
  5497 static void L_getit (void);
  5498 
  5499 static void
  5500 L_getit (void)
  5501 {
  5502   if (*dbp == '\'')             /* Skip prefix quote */
  5503     dbp++;
  5504   else if (*dbp == '(')
  5505   {
  5506     dbp++;
  5507     /* Try to skip "(quote " */
  5508     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
  5509       /* Ok, then skip "(" before name in (defstruct (foo)) */
  5510       dbp = skip_spaces (dbp);
  5511   }
  5512   get_lispy_tag (dbp);
  5513 }
  5514 
  5515 static void
  5516 Lisp_functions (FILE *inf)
  5517 {
  5518   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  5519     {
  5520       if (dbp[0] != '(')
  5521         continue;
  5522 
  5523       /* "(defvar foo)" is a declaration rather than a definition.  */
  5524       if (! declarations)
  5525         {
  5526           char *p = dbp + 1;
  5527           if (LOOKING_AT (p, "defvar"))
  5528             {
  5529               p = skip_name (p); /* past var name */
  5530               p = skip_spaces (p);
  5531               if (*p == ')')
  5532                 continue;
  5533             }
  5534         }
  5535 
  5536       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
  5537         dbp += 3;
  5538 
  5539       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
  5540         {
  5541           dbp = skip_non_spaces (dbp);
  5542           dbp = skip_spaces (dbp);
  5543           L_getit ();
  5544         }
  5545       else
  5546         {
  5547           /* Check for (foo::defmumble name-defined ... */
  5548           do
  5549             dbp++;
  5550           while (!notinname (*dbp) && *dbp != ':');
  5551           if (*dbp == ':')
  5552             {
  5553               do
  5554                 dbp++;
  5555               while (*dbp == ':');
  5556 
  5557               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
  5558                 {
  5559                   dbp = skip_non_spaces (dbp);
  5560                   dbp = skip_spaces (dbp);
  5561                   L_getit ();
  5562                 }
  5563             }
  5564         }
  5565     }
  5566 }
  5567 
  5568 
  5569 /*
  5570  * Lua script language parsing
  5571  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
  5572  *
  5573  *  "function" and "local function" are tags if they start at column 1.
  5574  */
  5575 static void
  5576 Lua_functions (FILE *inf)
  5577 {
  5578   register char *bp;
  5579 
  5580   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5581     {
  5582       bp = skip_spaces (bp);
  5583       if (bp[0] != 'f' && bp[0] != 'l')
  5584         continue;
  5585 
  5586       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
  5587 
  5588       if (LOOKING_AT (bp, "function"))
  5589         {
  5590           char *tag_name, *tp_dot, *tp_colon;
  5591 
  5592           get_tag (bp, &tag_name);
  5593           /* If the tag ends with ".foo" or ":foo", make an additional tag for
  5594              "foo".  */
  5595           tp_dot = strrchr (tag_name, '.');
  5596           tp_colon = strrchr (tag_name, ':');
  5597           if (tp_dot || tp_colon)
  5598             {
  5599               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
  5600               ptrdiff_t len_add = p - tag_name + 1;
  5601 
  5602               get_tag (bp + len_add, NULL);
  5603             }
  5604         }
  5605     }
  5606 }
  5607 
  5608 
  5609 /*
  5610  * PostScript tags
  5611  * Just look for lines where the first character is '/'
  5612  * Also look at "defineps" for PSWrap
  5613  * Ideas by:
  5614  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
  5615  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
  5616  */
  5617 static void
  5618 PS_functions (FILE *inf)
  5619 {
  5620   register char *bp, *ep;
  5621 
  5622   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5623     {
  5624       if (bp[0] == '/')
  5625         {
  5626           for (ep = bp+1;
  5627                *ep != '\0' && *ep != ' ' && *ep != '{';
  5628                ep++)
  5629             continue;
  5630           make_tag (bp, ep - bp, true,
  5631                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
  5632         }
  5633       else if (LOOKING_AT (bp, "defineps"))
  5634         get_tag (bp, NULL);
  5635     }
  5636 }
  5637 
  5638 
  5639 /*
  5640  * Forth tags
  5641  * Ignore anything after \ followed by space or in ( )
  5642  * Look for words defined by :
  5643  * Look for constant, code, create, defer, value, and variable
  5644  * OBP extensions:  Look for buffer:, field,
  5645  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
  5646  */
  5647 static void
  5648 Forth_words (FILE *inf)
  5649 {
  5650   register char *bp;
  5651 
  5652   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5653     while ((bp = skip_spaces (bp))[0] != '\0')
  5654       if (bp[0] == '\\' && c_isspace (bp[1]))
  5655         break;                  /* read next line */
  5656       else if (bp[0] == '(' && c_isspace (bp[1]))
  5657         do                      /* skip to ) or eol */
  5658           bp++;
  5659         while (*bp != ')' && *bp != '\0');
  5660       else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
  5661                 || LOOKING_AT_NOCASE (bp, "constant")
  5662                 || LOOKING_AT_NOCASE (bp, "2constant")
  5663                 || LOOKING_AT_NOCASE (bp, "fconstant")
  5664                 || LOOKING_AT_NOCASE (bp, "code")
  5665                 || LOOKING_AT_NOCASE (bp, "create")
  5666                 || LOOKING_AT_NOCASE (bp, "defer")
  5667                 || LOOKING_AT_NOCASE (bp, "value")
  5668                 || LOOKING_AT_NOCASE (bp, "2value")
  5669                 || LOOKING_AT_NOCASE (bp, "fvalue")
  5670                 || LOOKING_AT_NOCASE (bp, "variable")
  5671                 || LOOKING_AT_NOCASE (bp, "2variable")
  5672                 || LOOKING_AT_NOCASE (bp, "fvariable")
  5673                 || LOOKING_AT_NOCASE (bp, "buffer:")
  5674                 || LOOKING_AT_NOCASE (bp, "field:")
  5675                 || LOOKING_AT_NOCASE (bp, "+field")
  5676                 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
  5677                 || LOOKING_AT_NOCASE (bp, "begin-structure")
  5678                 || LOOKING_AT_NOCASE (bp, "synonym")
  5679                 )
  5680                && c_isspace (bp[0]))
  5681         {
  5682           /* Yay!  A definition! */
  5683           char* name_start = skip_spaces (bp);
  5684           char* name_end = skip_non_spaces (name_start);
  5685           if (name_start < name_end)
  5686             make_tag (name_start, name_end - name_start,
  5687                       true, lb.buffer, name_end - lb.buffer,
  5688                       lineno, linecharno);
  5689           bp = name_end;
  5690         }
  5691       else
  5692         bp = skip_non_spaces (bp);
  5693 }
  5694 
  5695 
  5696 /*
  5697  * Scheme tag functions
  5698  * look for (def... xyzzy
  5699  *          (def... (xyzzy
  5700  *          (def ... ((...(xyzzy ....
  5701  *          (set! xyzzy
  5702  * Original code by Ken Haase (1985?)
  5703  */
  5704 static void
  5705 Scheme_functions (FILE *inf)
  5706 {
  5707   register char *bp;
  5708 
  5709   LOOP_ON_INPUT_LINES (inf, lb, bp)
  5710     {
  5711       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
  5712         {
  5713           bp = skip_non_spaces (bp+4);
  5714           /* Skip over open parens and white space.
  5715              Don't continue past '\0' or '='. */
  5716           while (*bp && notinname (*bp) && *bp != '=')
  5717             bp++;
  5718           get_lispy_tag (bp);
  5719         }
  5720       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
  5721         get_lispy_tag (bp);
  5722     }
  5723 }
  5724 
  5725 
  5726 /* Find tags in TeX and LaTeX input files.  */
  5727 
  5728 /* TEX_toktab is a table of TeX control sequences that define tags.
  5729  * Each entry records one such control sequence.
  5730  *
  5731  * Original code from who knows whom.
  5732  * Ideas by:
  5733  *   Stefan Monnier (2002)
  5734  */
  5735 
  5736 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
  5737 
  5738 /* Default set of control sequences to put into TEX_toktab.
  5739    The value of environment var TEXTAGS is prepended to this.  */
  5740 static const char *TEX_defenv = "\
  5741 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
  5742 :part:appendix:entry:index:def\
  5743 :newcommand:renewcommand:newenvironment:renewenvironment";
  5744 
  5745 static void TEX_decode_env (const char *, const char *);
  5746 
  5747 /*
  5748  * TeX/LaTeX scanning loop.
  5749  */
  5750 static void
  5751 TeX_commands (FILE *inf)
  5752 {
  5753   char *cp;
  5754   linebuffer *key;
  5755 
  5756   char TEX_esc = '\0';
  5757   char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
  5758 
  5759   /* Initialize token table once from environment. */
  5760   if (TEX_toktab == NULL)
  5761     TEX_decode_env ("TEXTAGS", TEX_defenv);
  5762 
  5763   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5764     {
  5765       /* Look at each TEX keyword in line. */
  5766       for (;;)
  5767         {
  5768           /* Look for a TEX escape. */
  5769           while (true)
  5770             {
  5771               char c = *cp++;
  5772               if (c == '\0' || c == '%')
  5773                 goto tex_next_line;
  5774 
  5775               /* Select either \ or ! as escape character, whichever comes
  5776                  first outside a comment.  */
  5777               if (!TEX_esc)
  5778                 switch (c)
  5779                   {
  5780                   case '\\':
  5781                     TEX_esc = c;
  5782                     TEX_opgrp = '{';
  5783                     TEX_clgrp = '}';
  5784                     break;
  5785 
  5786                   case '!':
  5787                     TEX_esc = c;
  5788                     TEX_opgrp = '<';
  5789                     TEX_clgrp = '>';
  5790                     break;
  5791                   }
  5792 
  5793               if (c == TEX_esc)
  5794                 break;
  5795             }
  5796 
  5797           for (key = TEX_toktab; key->buffer != NULL; key++)
  5798             if (strneq (cp, key->buffer, key->len))
  5799               {
  5800                 char *p;
  5801                 ptrdiff_t namelen, linelen;
  5802                 bool opgrp = false;
  5803 
  5804                 cp = skip_spaces (cp + key->len);
  5805                 if (*cp == TEX_opgrp)
  5806                   {
  5807                     opgrp = true;
  5808                     cp++;
  5809                   }
  5810                 for (p = cp;
  5811                      (!c_isspace (*p) && *p != '#' &&
  5812                       *p != TEX_opgrp && *p != TEX_clgrp);
  5813                      p++)
  5814                   continue;
  5815                 namelen = p - cp;
  5816                 linelen = lb.len;
  5817                 if (!opgrp || *p == TEX_clgrp)
  5818                   {
  5819                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
  5820                       p++;
  5821                     linelen = p - lb.buffer + 1;
  5822                   }
  5823                 make_tag (cp, namelen, true,
  5824                           lb.buffer, linelen, lineno, linecharno);
  5825                 goto tex_next_line; /* We only tag a line once */
  5826               }
  5827         }
  5828     tex_next_line:
  5829       ;
  5830     }
  5831 }
  5832 
  5833 /* Read environment and prepend it to the default string.
  5834    Build token table. */
  5835 static void
  5836 TEX_decode_env (const char *evarname, const char *defenv)
  5837 {
  5838   const char *env, *p;
  5839   ptrdiff_t len = 1;
  5840 
  5841   /* Append default string to environment. */
  5842   env = getenv (evarname);
  5843   if (!env)
  5844     env = defenv;
  5845   else
  5846     env = concat (env, defenv, "");
  5847 
  5848   /* If the environment variable doesn't start with a colon, increase
  5849      the length of the token table.  */
  5850   if (*env != ':')
  5851     len++;
  5852 
  5853   /* Allocate a token table */
  5854   for (p = env; (p = strchr (p, ':')); )
  5855     if (*++p)
  5856       len++;
  5857   TEX_toktab = xnew (len, linebuffer);
  5858 
  5859   /* Unpack environment string into token table. Be careful about */
  5860   /* zero-length strings (leading ':', "::" and trailing ':') */
  5861   for (ptrdiff_t i = 0; *env != '\0'; )
  5862     {
  5863       p = strchr (env, ':');
  5864       if (!p)                   /* End of environment string. */
  5865         p = env + strlen (env);
  5866       if (p - env > 0)
  5867         {                       /* Only non-zero strings. */
  5868           TEX_toktab[i].buffer = savenstr (env, p - env);
  5869           TEX_toktab[i].len = p - env;
  5870           i++;
  5871         }
  5872       if (*p)
  5873         env = p + 1;
  5874       else
  5875         {
  5876           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
  5877           TEX_toktab[i].len = 0;
  5878           break;
  5879         }
  5880     }
  5881 }
  5882 
  5883 
  5884 /* Texinfo support.  Dave Love, Mar. 2000.  */
  5885 static void
  5886 Texinfo_nodes (FILE *inf)
  5887 {
  5888   char *cp, *start;
  5889   LOOP_ON_INPUT_LINES (inf, lb, cp)
  5890     if (LOOKING_AT (cp, "@node"))
  5891       {
  5892         start = cp;
  5893         while (*cp != '\0' && *cp != ',')
  5894           cp++;
  5895         make_tag (start, cp - start, true,
  5896                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  5897       }
  5898 }
  5899 
  5900 
  5901 /*
  5902  * HTML support.
  5903  * Contents of <title>, <h1>, <h2>, <h3> are tags.
  5904  * Contents of <a name=xxx> are tags with name xxx.
  5905  *
  5906  * Francesco Potortì, 2002.
  5907  */
  5908 static void
  5909 HTML_labels (FILE *inf)
  5910 {
  5911   bool getnext = false;         /* next text outside of HTML tags is a tag */
  5912   bool skiptag = false;         /* skip to the end of the current HTML tag */
  5913   bool intag = false;           /* inside an html tag, looking for ID= */
  5914   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
  5915   char *end;
  5916 
  5917 
  5918   linebuffer_setlen (&token_name, 0); /* no name in buffer */
  5919 
  5920   LOOP_ON_INPUT_LINES (inf, lb, dbp)
  5921     for (;;)                    /* loop on the same line */
  5922       {
  5923         if (skiptag)            /* skip HTML tag */
  5924           {
  5925             while (*dbp != '\0' && *dbp != '>')
  5926               dbp++;
  5927             if (*dbp == '>')
  5928               {
  5929                 dbp += 1;
  5930                 skiptag = false;
  5931                 continue;       /* look on the same line */
  5932               }
  5933             break;              /* go to next line */
  5934           }
  5935 
  5936         else if (intag) /* look for "name=" or "id=" */
  5937           {
  5938             while (*dbp != '\0' && *dbp != '>'
  5939                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
  5940               dbp++;
  5941             if (*dbp == '\0')
  5942               break;            /* go to next line */
  5943             if (*dbp == '>')
  5944               {
  5945                 dbp += 1;
  5946                 intag = false;
  5947                 continue;       /* look on the same line */
  5948               }
  5949             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
  5950                 || LOOKING_AT_NOCASE (dbp, "id="))
  5951               {
  5952                 bool quoted = (dbp[0] == '"');
  5953 
  5954                 if (quoted)
  5955                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
  5956                     continue;
  5957                 else
  5958                   for (end = dbp; *end != '\0' && intoken (*end); end++)
  5959                     continue;
  5960                 linebuffer_setlen (&token_name, end - dbp);
  5961                 memcpyz (token_name.buffer, dbp, end - dbp);
  5962 
  5963                 dbp = end;
  5964                 intag = false;  /* we found what we looked for */
  5965                 skiptag = true; /* skip to the end of the tag */
  5966                 getnext = true; /* then grab the text */
  5967                 continue;       /* look on the same line */
  5968               }
  5969             dbp += 1;
  5970           }
  5971 
  5972         else if (getnext)       /* grab next tokens and tag them */
  5973           {
  5974             dbp = skip_spaces (dbp);
  5975             if (*dbp == '\0')
  5976               break;            /* go to next line */
  5977             if (*dbp == '<')
  5978               {
  5979                 intag = true;
  5980                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
  5981                 continue;       /* look on the same line */
  5982               }
  5983 
  5984             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
  5985               continue;
  5986             make_tag (token_name.buffer, token_name.len, true,
  5987                       dbp, end - dbp, lineno, linecharno);
  5988             linebuffer_setlen (&token_name, 0); /* no name in buffer */
  5989             getnext = false;
  5990             break;              /* go to next line */
  5991           }
  5992 
  5993         else                    /* look for an interesting HTML tag */
  5994           {
  5995             while (*dbp != '\0' && *dbp != '<')
  5996               dbp++;
  5997             if (*dbp == '\0')
  5998               break;            /* go to next line */
  5999             intag = true;
  6000             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
  6001               {
  6002                 inanchor = true;
  6003                 continue;       /* look on the same line */
  6004               }
  6005             else if (LOOKING_AT_NOCASE (dbp, "<title>")
  6006                      || LOOKING_AT_NOCASE (dbp, "<h1>")
  6007                      || LOOKING_AT_NOCASE (dbp, "<h2>")
  6008                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
  6009               {
  6010                 intag = false;
  6011                 getnext = true;
  6012                 continue;       /* look on the same line */
  6013               }
  6014             dbp += 1;
  6015           }
  6016       }
  6017 }
  6018 
  6019 
  6020 /*
  6021  * Prolog support
  6022  *
  6023  * Assumes that the predicate or rule starts at column 0.
  6024  * Only the first clause of a predicate or rule is added.
  6025  * Original code by Sunichirou Sugou (1989)
  6026  * Rewritten by Anders Lindgren (1996)
  6027  */
  6028 static ptrdiff_t prolog_pr (char *, char *, ptrdiff_t);
  6029 static void prolog_skip_comment (linebuffer *, FILE *);
  6030 static size_t prolog_atom (char *, size_t);
  6031 
  6032 static void
  6033 Prolog_functions (FILE *inf)
  6034 {
  6035   char *cp, *last = NULL;
  6036   ptrdiff_t lastlen = 0, allocated = 0;
  6037 
  6038   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6039     {
  6040       if (cp[0] == '\0')        /* Empty line */
  6041         continue;
  6042       else if (c_isspace (cp[0])) /* Not a predicate */
  6043         continue;
  6044       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
  6045         prolog_skip_comment (&lb, inf);
  6046       else
  6047         {
  6048           ptrdiff_t len = prolog_pr (cp, last, lastlen);
  6049           if (0 < len)
  6050             {
  6051               /* Store the predicate name to avoid generating duplicate
  6052                  tags later.  */
  6053               if (allocated <= len)
  6054                 {
  6055                   xrnew (last, len + 1, 1);
  6056                   allocated = len + 1;
  6057                 }
  6058               memcpyz (last, cp, len);
  6059               lastlen = len;
  6060             }
  6061         }
  6062     }
  6063   free (last);
  6064 }
  6065 
  6066 
  6067 static void
  6068 prolog_skip_comment (linebuffer *plb, FILE *inf)
  6069 {
  6070   char *cp;
  6071 
  6072   do
  6073     {
  6074       for (cp = plb->buffer; *cp != '\0'; cp++)
  6075         if (cp[0] == '*' && cp[1] == '/')
  6076           return;
  6077       readline (plb, inf);
  6078     }
  6079   while (perhaps_more_input (inf));
  6080 }
  6081 
  6082 /*
  6083  * A predicate or rule definition is added if it matches:
  6084  *     <beginning of line><Prolog Atom><whitespace>(
  6085  * or  <beginning of line><Prolog Atom><whitespace>:-
  6086  *
  6087  * It is added to the tags database if it doesn't match the
  6088  * name of the previous clause header.
  6089  *
  6090  * Return the size of the name of the predicate or rule, or 0 if no
  6091  * header was found.
  6092  */
  6093 static ptrdiff_t
  6094 prolog_pr (char *s, char *last, ptrdiff_t lastlen)
  6095 {
  6096   ptrdiff_t len = prolog_atom (s, 0);
  6097   if (len == 0)
  6098     return 0;
  6099   ptrdiff_t pos = skip_spaces (s + len) - s;
  6100 
  6101   /* Save only the first clause.  */
  6102   if ((s[pos] == '.'
  6103        || (s[pos] == '(' && (pos += 1))
  6104        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
  6105       && ! (lastlen == len && memcmp (s, last, len) == 0))
  6106     {
  6107       make_tag (s, len, true, s, pos, lineno, linecharno);
  6108       return len;
  6109     }
  6110 
  6111   return 0;
  6112 }
  6113 
  6114 /*
  6115  * Consume a Prolog atom.
  6116  * Return the number of bytes consumed, or 0 if there was an error.
  6117  *
  6118  * A prolog atom, in this context, could be one of:
  6119  * - An alphanumeric sequence, starting with a lower case letter.
  6120  * - A quoted arbitrary string. Single quotes can escape themselves.
  6121  *   Backslash quotes everything.
  6122  */
  6123 static size_t
  6124 prolog_atom (char *s, size_t pos)
  6125 {
  6126   size_t origpos;
  6127 
  6128   origpos = pos;
  6129 
  6130   if (c_islower (s[pos]) || s[pos] == '_')
  6131     {
  6132       /* The atom is unquoted. */
  6133       pos++;
  6134       while (c_isalnum (s[pos]) || s[pos] == '_')
  6135         {
  6136           pos++;
  6137         }
  6138       return pos - origpos;
  6139     }
  6140   else if (s[pos] == '\'')
  6141     {
  6142       pos++;
  6143 
  6144       for (;;)
  6145         {
  6146           if (s[pos] == '\'')
  6147             {
  6148               pos++;
  6149               if (s[pos] != '\'')
  6150                 break;
  6151               pos++;            /* A double quote  */
  6152             }
  6153           else if (s[pos] == '\0')
  6154             /* Multiline quoted atoms are ignored.  */
  6155             return 0;
  6156           else if (s[pos] == '\\')
  6157             {
  6158               if (s[pos+1] == '\0')
  6159                 return 0;
  6160               pos += 2;
  6161             }
  6162           else
  6163             pos++;
  6164         }
  6165       return pos - origpos;
  6166     }
  6167   else
  6168     return 0;
  6169 }
  6170 
  6171 
  6172 /*
  6173  * Support for Mercury
  6174  *
  6175  * Assumes that the declarations start at column 0.
  6176  * Original code by Sunichirou Sugou (1989) for Prolog.
  6177  * Rewritten by Anders Lindgren (1996) for Prolog.
  6178  * Adapted by Fabrice Nicol (2021) for Mercury.
  6179  * Note: Prolog-support behavior is preserved if
  6180  * --declarations is used, corresponding to
  6181  * with_mercury_definitions=true.
  6182  */
  6183 
  6184 static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
  6185 static void mercury_skip_comment (linebuffer *, FILE *);
  6186 static bool is_mercury_type = false;
  6187 static bool is_mercury_quantifier = false;
  6188 static bool is_mercury_declaration = false;
  6189 typedef struct
  6190 {
  6191   size_t pos;          /* Position reached in parsing tag name.  */
  6192   size_t namelength;   /* Length of tag name  */
  6193   size_t totlength;    /* Total length of parsed tag: this field is currently
  6194                           reserved for control and debugging.   */
  6195 } mercury_pos_t;
  6196 
  6197 /*
  6198  * Objective-C and Mercury have identical file extension .m.
  6199  * To disambiguate between Objective C and Mercury, parse file
  6200  * with the following heuristics hook:
  6201  *   - if line starts with :-, choose Mercury unconditionally;
  6202  *   - if line starts with #, @, choose Objective-C;
  6203  *   - otherwise compute the following ratio:
  6204  *
  6205  *     r = (number of lines with :-
  6206  *          or % in non-commented parts or . at trimmed EOL)
  6207  *         / (number of lines - number of lines starting by any amount
  6208  *                        of whitespace, optionally followed by comment(s))
  6209  *
  6210  * Note: strings are neglected in counts.
  6211  *
  6212  * If r > mercury_heuristics_ratio, choose Mercury.
  6213  * Experimental tests show that a possibly optimal default value for
  6214  * this floor value is around 0.5.  This is the default value for
  6215  * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
  6216  * The closer r is to 0.5, the closer the source code to pure Prolog.
  6217  * Idiomatic Mercury is scored either with r = 1.0 or higher.
  6218  * Objective-C is scored with r = 0.0.  When this fails, the r-score
  6219  * never rose above 0.1 in Objective-C tests.
  6220  */
  6221 
  6222 static void
  6223 test_objc_is_mercury (char *this_file, language **lang)
  6224 {
  6225   if (this_file == NULL) return;
  6226   FILE* fp = fopen (this_file, "r");
  6227   if (fp == NULL)
  6228     pfatal (this_file);
  6229 
  6230   bool blank_line = false; /* Line starting with any amount of white space
  6231                               followed by optional comment(s).  */
  6232   bool commented_line = false;
  6233   bool found_dot = false;
  6234   bool only_space_before = true;
  6235   bool start_of_line = true;
  6236   int c;
  6237   intmax_t lines = 1;
  6238   intmax_t mercury_dots = 0;
  6239   intmax_t percentage_signs = 0;
  6240   intmax_t rule_signs = 0;
  6241   float ratio = 0;
  6242 
  6243   while ((c = fgetc (fp)) != EOF)
  6244     {
  6245       switch (c)
  6246         {
  6247         case '\n':
  6248           if (! blank_line) ++lines;
  6249           blank_line = true;
  6250           commented_line = false;
  6251           start_of_line = true;
  6252           if (found_dot) ++mercury_dots;
  6253           found_dot = false;
  6254           only_space_before = true;
  6255           break;
  6256         case '.':
  6257           found_dot = ! commented_line;
  6258           only_space_before = false;
  6259           break;
  6260         case  '%': /* More frequent in Mercury.  May be modulo in Obj.-C.  */
  6261           if (! commented_line)
  6262             {
  6263               ++percentage_signs;
  6264               /* Cannot tell if it is a comment or modulo yet for sure.
  6265                  Yet works for heuristic purposes.  */
  6266               commented_line = true;
  6267             }
  6268           found_dot = false;
  6269           start_of_line = false;
  6270           only_space_before = false;
  6271           break;
  6272         case  '/':
  6273           {
  6274             int d = fgetc (fp);
  6275             found_dot = false;
  6276             only_space_before = false;
  6277             if (! commented_line)
  6278               {
  6279                 if (d == '*')
  6280                   commented_line = true;
  6281                 else
  6282                   /* If d == '/', cannot tell if it is an Obj.-C comment:
  6283                      may be Mercury integ. division.  */
  6284                     blank_line = false;
  6285               }
  6286           }
  6287           FALLTHROUGH;
  6288         case  ' ':
  6289         case '\t':
  6290           start_of_line = false;
  6291           break;
  6292         case ':':
  6293           c = fgetc (fp);
  6294           if (start_of_line)
  6295             {
  6296               if (c == '-')
  6297                 {
  6298                   ratio = 1.0; /* Failsafe, not an operator in Obj.-C.  */
  6299                   goto out;
  6300                 }
  6301               start_of_line = false;
  6302             }
  6303           else
  6304             {
  6305               /* p :- q.  Frequent in Mercury.
  6306                  Rare or in quoted exprs in Obj.-C.  */
  6307               if (c == '-' && ! commented_line)
  6308                 ++rule_signs;
  6309             }
  6310           blank_line = false;
  6311           found_dot = false;
  6312           only_space_before = false;
  6313           break;
  6314         case '@':
  6315         case '#':
  6316           if (start_of_line || only_space_before)
  6317             {
  6318               ratio = 0.0;
  6319               goto out;
  6320             }
  6321           FALLTHROUGH;
  6322         default:
  6323           start_of_line = false;
  6324           blank_line = false;
  6325           found_dot = false;
  6326           only_space_before = false;
  6327         }
  6328     }
  6329 
  6330   /* Fallback heuristic test.  Not failsafe but errless in practice.  */
  6331   ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
  6332 
  6333  out:
  6334   if (fclose (fp) == EOF)
  6335     pfatal (this_file);
  6336 
  6337   if (ratio > mercury_heuristics_ratio)
  6338     {
  6339       /* Change the language from Objective-C to Mercury.  */
  6340       static language lang0 = { "mercury", Mercury_help, Mercury_functions,
  6341         Mercury_suffixes };
  6342       *lang = &lang0;
  6343     }
  6344 }
  6345 
  6346 static void
  6347 Mercury_functions (FILE *inf)
  6348 {
  6349   char *cp, *last = NULL;
  6350   ptrdiff_t lastlen = 0, allocated = 0;
  6351   if (declarations) with_mercury_definitions = true;
  6352 
  6353   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6354     {
  6355       if (cp[0] == '\0')   /* Empty line.  */
  6356         continue;
  6357       else if (c_isspace (cp[0]) || cp[0] == '%')
  6358         /*  A Prolog-type comment or anything other than a declaration.  */
  6359         continue;
  6360       else if (cp[0] == '/' && cp[1] == '*')  /* Mercury C-type comment.  */
  6361         mercury_skip_comment (&lb, inf);
  6362       else
  6363         {
  6364           is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
  6365 
  6366           if (is_mercury_declaration
  6367               || with_mercury_definitions)
  6368             {
  6369               ptrdiff_t len = mercury_pr (cp, last, lastlen);
  6370               if (0 < len)
  6371                 {
  6372                   /* Store the declaration to avoid generating duplicate
  6373                      tags later.  */
  6374                   if (allocated <= len)
  6375                     {
  6376                       xrnew (last, len + 1, 1);
  6377                       allocated = len + 1;
  6378                     }
  6379                   memcpyz (last, cp, len);
  6380                   lastlen = len;
  6381                 }
  6382             }
  6383         }
  6384     }
  6385   free (last);
  6386 }
  6387 
  6388 static void
  6389 mercury_skip_comment (linebuffer *plb, FILE *inf)
  6390 {
  6391   char *cp;
  6392 
  6393   do
  6394     {
  6395       for (cp = plb->buffer; *cp != '\0'; ++cp)
  6396         if (cp[0] == '*' && cp[1] == '/')
  6397           return;
  6398       readline (plb, inf);
  6399     }
  6400   while (perhaps_more_input (inf));
  6401 }
  6402 
  6403 /*
  6404  * A declaration is added if it matches:
  6405  *     <beginning of line>:-<whitespace><Mercury Term><whitespace>(
  6406  * If with_mercury_definitions == true, we also add:
  6407  *     <beginning of line><Mercury item><whitespace>(
  6408  * or  <beginning of line><Mercury item><whitespace>:-
  6409  * As for Prolog support, different arities and types are not taken into
  6410  * consideration.
  6411  * Item is added to the tags database if it doesn't match the
  6412  * name of the previous declaration.
  6413  *
  6414  * Consume a Mercury declaration.
  6415  * Return the number of bytes consumed, or 0 if there was an error.
  6416  *
  6417  * A Mercury declaration must be one of:
  6418  *  :- type
  6419  *  :- solver type
  6420  *  :- pred
  6421  *  :- func
  6422  *  :- inst
  6423  *  :- mode
  6424  *  :- typeclass
  6425  *  :- instance
  6426  *  :- pragma
  6427  *  :- promise
  6428  *  :- initialise
  6429  *  :- finalise
  6430  *  :- mutable
  6431  *  :- module
  6432  *  :- interface
  6433  *  :- implementation
  6434  *  :- import_module
  6435  *  :- use_module
  6436  *  :- include_module
  6437  *  :- end_module
  6438  * followed on the same line by an alphanumeric sequence, starting with a lower
  6439  * case letter or by a single-quoted arbitrary string.
  6440  * Single quotes can escape themselves.  Backslash quotes everything.
  6441  *
  6442  * Return the size of the name of the declaration or 0 if no header was found.
  6443  * As quantifiers may precede functions or predicates, we must list them too.
  6444  */
  6445 
  6446 static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
  6447   "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
  6448   "initialise", "finalise", "mutable", "module", "interface", "implementation",
  6449   "import_module", "use_module", "include_module", "end_module", "some", "all"};
  6450 
  6451 static mercury_pos_t
  6452 mercury_decl (char *s, size_t pos)
  6453 {
  6454   mercury_pos_t null_pos = {0, 0, 0};
  6455 
  6456   if (s == NULL) return null_pos;
  6457 
  6458   size_t origpos;
  6459   origpos = pos;
  6460 
  6461   while (c_isalnum (s[pos]) || s[pos] == '_')
  6462     pos++;
  6463 
  6464   unsigned char decl_type_length = pos - origpos;
  6465   char buf[decl_type_length + 1];
  6466   memset (buf, 0, decl_type_length + 1);
  6467 
  6468   /* Mercury declaration tags.  Consume them, then check the declaration item
  6469      following :- is legitimate, then go on as in the prolog case.  */
  6470 
  6471   memcpy (buf, &s[origpos], decl_type_length);
  6472 
  6473   bool found_decl_tag = false;
  6474 
  6475   if (is_mercury_quantifier)
  6476     {
  6477       if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax.  */
  6478         return null_pos;
  6479 
  6480       is_mercury_quantifier = false; /* Reset to base value.  */
  6481       found_decl_tag = true;
  6482     }
  6483   else
  6484     {
  6485       for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
  6486         {
  6487           if (strcmp (buf, Mercury_decl_tags[j]) == 0)
  6488             {
  6489               found_decl_tag = true;
  6490               if (strcmp (buf, "type") == 0)
  6491                 is_mercury_type = true;
  6492 
  6493               if (strcmp (buf, "some") == 0
  6494                   || strcmp (buf, "all") == 0)
  6495                 {
  6496                   is_mercury_quantifier = true;
  6497                 }
  6498 
  6499               break;  /* Found declaration tag of rank j.  */
  6500             }
  6501           else
  6502             /* 'solver type' has a blank in the middle,
  6503                so this is the hard case.  */
  6504             if (strcmp (buf, "solver") == 0)
  6505               {
  6506                 do
  6507                   pos++;
  6508                 while (c_isalnum (s[pos]) || s[pos] == '_');
  6509 
  6510                 decl_type_length = pos - origpos;
  6511                 char buf2[decl_type_length + 1];
  6512                 memset (buf2, 0, decl_type_length + 1);
  6513                 memcpy (buf2, &s[origpos], decl_type_length);
  6514 
  6515                 if (strcmp (buf2, "solver type") == 0)
  6516                   {
  6517                     found_decl_tag = false;
  6518                     break;  /* Found declaration tag of rank j.  */
  6519                   }
  6520               }
  6521         }
  6522     }
  6523 
  6524   /* If with_mercury_definitions == false
  6525    * this is a Mercury syntax error, ignoring... */
  6526 
  6527   if (with_mercury_definitions)
  6528     {
  6529       if (found_decl_tag)
  6530         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
  6531       else
  6532         /* Prolog-like behavior
  6533          * we have parsed the predicate once, yet inappropriately
  6534          * so restarting again the parsing step.  */
  6535         pos = 0;
  6536     }
  6537   else
  6538     {
  6539       if (found_decl_tag)
  6540         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
  6541       else
  6542         return null_pos;
  6543     }
  6544 
  6545   /* From now on it is the same as for Prolog except for module dots.  */
  6546 
  6547   size_t start_of_name = pos;
  6548 
  6549   if (c_islower (s[pos]) || s[pos] == '_' )
  6550     {
  6551       /* The name is unquoted.
  6552          Do not confuse module dots with end-of-declaration dots.  */
  6553       int module_dot_pos = 0;
  6554 
  6555       while (c_isalnum (s[pos])
  6556              || s[pos] == '_'
  6557              || (s[pos] == '.' /* A module dot.  */
  6558                  && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')
  6559                  && (module_dot_pos = pos)))  /* Record module dot position.
  6560                                                  Erase module from name.  */
  6561         ++pos;
  6562 
  6563       if (module_dot_pos)
  6564         {
  6565           start_of_name = module_dot_pos + 2;
  6566           ++pos;
  6567         }
  6568 
  6569       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
  6570       return position;
  6571     }
  6572   else if (s[pos] == '\'')
  6573     {
  6574       ++pos;
  6575       for (;;)
  6576         {
  6577           if (s[pos] == '\'')
  6578             {
  6579               ++pos;
  6580               if (s[pos] != '\'')
  6581                 break;
  6582               ++pos; /* A double quote.  */
  6583             }
  6584           else if (s[pos] == '\0')  /* Multiline quoted atoms are ignored.  */
  6585             return null_pos;
  6586           else if (s[pos] == '\\')
  6587             {
  6588               if (s[pos+1] == '\0')
  6589                 return null_pos;
  6590               pos += 2;
  6591             }
  6592           else
  6593             ++pos;
  6594         }
  6595 
  6596       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
  6597       return position;
  6598     }
  6599   else if (is_mercury_quantifier && s[pos] == '[')   /* :- some [T] pred/func.  */
  6600     {
  6601       char *close_bracket = strchr (s + pos + 1, ']');
  6602       if (!close_bracket)
  6603         return null_pos;
  6604       pos = skip_spaces (close_bracket + 1) - s;
  6605       mercury_pos_t position = mercury_decl (s, pos);
  6606       position.totlength += pos - origpos;
  6607       return position;
  6608     }
  6609   else if (s[pos] == '.')  /* as in ':- interface.'  */
  6610     {
  6611       mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos};
  6612       return position;
  6613     }
  6614   else
  6615     return null_pos;
  6616 }
  6617 
  6618 static ptrdiff_t
  6619 mercury_pr (char *s, char *last, ptrdiff_t lastlen)
  6620 {
  6621   size_t len0 = 0;
  6622   is_mercury_type = false;
  6623   is_mercury_quantifier = false;
  6624   bool stop_at_rule = false;
  6625 
  6626   if (is_mercury_declaration)
  6627     {
  6628       /* Skip len0 blanks only for declarations.  */
  6629       len0 = skip_spaces (s + 2) - s;
  6630     }
  6631 
  6632   mercury_pos_t position = mercury_decl (s, len0);
  6633   size_t pos = position.pos;
  6634   int offset = 0;  /* may be < 0  */
  6635   if (pos == 0) return 0;
  6636 
  6637   /* Skip white space for:
  6638      a. rules in definitions before :-
  6639      b. 0-arity predicates with inlined modes.
  6640      c. possibly multiline type definitions  */
  6641 
  6642   while (c_isspace (s[pos])) { ++pos; ++offset; }
  6643 
  6644   if (( ((s[pos] == '.' && (pos += 1))     /* case 1
  6645                                               This is a statement dot,
  6646                                               not a module dot. */
  6647          || c_isalnum(s[pos])              /* 0-arity procedures  */
  6648          || (s[pos] == '(' && (pos += 1))  /* case 2: arity > 0   */
  6649          || ((s[pos] == ':')               /* case 3: rules  */
  6650              && s[pos + 1] == '-' && (stop_at_rule = true)))
  6651      && (lastlen != pos || memcmp (s, last, pos) != 0)
  6652         )
  6653       /* Types are often declared on several lines so keeping just
  6654          the first line.  */
  6655 
  6656       || is_mercury_type)  /* When types are implemented.  */
  6657     {
  6658       size_t namelength = position.namelength;
  6659       if (stop_at_rule && offset) --offset;
  6660 
  6661       /* Left-trim type definitions.  */
  6662 
  6663       while (pos > namelength + offset
  6664              && c_isspace (s[pos - namelength - offset]))
  6665         --offset;
  6666 
  6667       make_tag (s + pos - namelength - offset, namelength - 1, true,
  6668                                 s, pos - offset - 1, lineno, linecharno);
  6669       return pos;
  6670     }
  6671 
  6672   return 0;
  6673 }
  6674 
  6675 
  6676 /*
  6677  * Support for Erlang
  6678  *
  6679  * Generates tags for functions, defines, and records.
  6680  * Assumes that Erlang functions start at column 0.
  6681  * Original code by Anders Lindgren (1996)
  6682  */
  6683 static ptrdiff_t erlang_func (char *, char *, ptrdiff_t, ptrdiff_t *);
  6684 static void erlang_attribute (char *);
  6685 static ptrdiff_t erlang_atom (char *);
  6686 
  6687 static void
  6688 Erlang_functions (FILE *inf)
  6689 {
  6690   char *cp, *last = NULL;
  6691   ptrdiff_t lastlen = 0, allocated = 0;
  6692 
  6693   LOOP_ON_INPUT_LINES (inf, lb, cp)
  6694     {
  6695       if (cp[0] == '\0')        /* Empty line */
  6696         continue;
  6697       else if (c_isspace (cp[0])) /* Not function nor attribute */
  6698         continue;
  6699       else if (cp[0] == '%')    /* comment */
  6700         continue;
  6701       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
  6702         continue;
  6703       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
  6704         {
  6705           erlang_attribute (cp);
  6706           if (last != NULL)
  6707             {
  6708               free (last);
  6709               last = NULL;
  6710               allocated = lastlen = 0;
  6711             }
  6712         }
  6713       else
  6714         {
  6715           ptrdiff_t name_offset;
  6716           ptrdiff_t len = erlang_func (cp, last, lastlen, &name_offset);
  6717           if (0 < len)
  6718             {
  6719               /* Store the function name to avoid generating duplicate
  6720                  tags later.  */
  6721               if (allocated <= len)
  6722                 {
  6723                   xrnew (last, len + 1, 1);
  6724                   allocated = len + 1;
  6725                 }
  6726               memcpyz (last, cp + name_offset, len);
  6727               lastlen = len;
  6728             }
  6729         }
  6730     }
  6731   free (last);
  6732 }
  6733 
  6734 
  6735 /*
  6736  * A function definition is added if it matches:
  6737  *     <beginning of line><Erlang Atom><whitespace>(
  6738  *
  6739  * It is added to the tags database if it doesn't match the
  6740  * name of the previous clause header.
  6741  *
  6742  * Return the size of the name of the function, or 0 if no function
  6743  * was found.
  6744  */
  6745 static ptrdiff_t
  6746 erlang_func (char *s, char *last, ptrdiff_t lastlen, ptrdiff_t *name_offset)
  6747 {
  6748   char *name = s;
  6749   ptrdiff_t len = erlang_atom (s);
  6750   if (len == 0)
  6751     return 0;
  6752   ptrdiff_t pos = skip_spaces (s + len) - s;
  6753 
  6754   /* If the name is quoted, the quotes are not part of the name. */
  6755   bool quoted = 2 < len && name[0] == '\'' && name[len - 1] == '\'';
  6756   name += quoted;
  6757   len -= 2 * quoted;
  6758 
  6759   /* Save only the first clause. */
  6760   if (s[pos++] == '('
  6761       && ! (lastlen == len && memcmp (name, last, len) == 0))
  6762     {
  6763       make_tag (s, len, true, s, pos, lineno, linecharno);
  6764       *name_offset = quoted;
  6765       return len;
  6766     }
  6767 
  6768   return 0;
  6769 }
  6770 
  6771 
  6772 /*
  6773  * Handle attributes.  Currently, tags are generated for defines
  6774  * and records.
  6775  *
  6776  * They are on the form:
  6777  * -define(foo, bar).
  6778  * -define(Foo(M, N), M+N).
  6779  * -record(graph, {vtab = notable, cyclic = true}).
  6780  */
  6781 static void
  6782 erlang_attribute (char *s)
  6783 {
  6784   char *cp = s;
  6785 
  6786   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
  6787       && *cp++ == '(')
  6788     {
  6789       cp = skip_spaces (cp);
  6790       ptrdiff_t len = erlang_atom (cp);
  6791       ptrdiff_t pos = cp + len - s;
  6792       if (len > 0)
  6793         {
  6794           /* If the name is quoted, the quotes are not part of the name. */
  6795           if (len > 2 && cp[0] == '\'' && cp[len - 1] == '\'')
  6796             {
  6797               cp++;
  6798               len -= 2;
  6799             }
  6800           make_tag (cp, len, true, s, pos, lineno, linecharno);
  6801         }
  6802     }
  6803   return;
  6804 }
  6805 
  6806 
  6807 /*
  6808  * Consume an Erlang atom (or variable).
  6809  * Return the number of bytes consumed, or -1 if there was an error.
  6810  */
  6811 static ptrdiff_t
  6812 erlang_atom (char *s)
  6813 {
  6814   ptrdiff_t pos = 0;
  6815 
  6816   if (c_isalpha (s[pos]) || s[pos] == '_')
  6817     {
  6818       /* The atom is unquoted. */
  6819       do
  6820         pos++;
  6821       while (c_isalnum (s[pos]) || s[pos] == '_');
  6822     }
  6823   else if (s[pos] == '\'')
  6824     {
  6825       for (pos++; s[pos] != '\''; pos++)
  6826         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
  6827             || (s[pos] == '\\' && s[++pos] == '\0'))
  6828           return 0;
  6829       pos++;
  6830     }
  6831 
  6832   return pos;
  6833 }
  6834 
  6835 
  6836 static char *scan_separators (char *);
  6837 static void add_regex (char *, language *);
  6838 static char *substitute (char *, char *, struct re_registers *);
  6839 
  6840 /*
  6841  * Take a string like "/blah/" and turn it into "blah", verifying
  6842  * that the first and last characters are the same, and handling
  6843  * quoted separator characters.  Actually, stops on the occurrence of
  6844  * an unquoted separator.  Also process \t, \n, etc. and turn into
  6845  * appropriate characters. Works in place.  Null terminates name string.
  6846  * Returns pointer to terminating separator, or NULL for
  6847  * unterminated regexps.
  6848  */
  6849 static char *
  6850 scan_separators (char *name)
  6851 {
  6852   char sep = name[0];
  6853   char *copyto = name;
  6854   bool quoted = false;
  6855 
  6856   for (++name; *name != '\0'; ++name)
  6857     {
  6858       if (quoted)
  6859         {
  6860           switch (*name)
  6861             {
  6862             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
  6863             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
  6864             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
  6865             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
  6866             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
  6867             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
  6868             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
  6869             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
  6870             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
  6871             default:
  6872               if (*name == sep)
  6873                 *copyto++ = sep;
  6874               else
  6875                 {
  6876                   /* Something else is quoted, so preserve the quote. */
  6877                   *copyto++ = '\\';
  6878                   *copyto++ = *name;
  6879                 }
  6880               break;
  6881             }
  6882           quoted = false;
  6883         }
  6884       else if (*name == '\\')
  6885         quoted = true;
  6886       else if (*name == sep)
  6887         break;
  6888       else
  6889         *copyto++ = *name;
  6890     }
  6891   if (*name != sep)
  6892     name = NULL;                /* signal unterminated regexp */
  6893 
  6894   /* Terminate copied string. */
  6895   *copyto = '\0';
  6896   return name;
  6897 }
  6898 
  6899 /* Look at the argument of --regex or --no-regex and do the right
  6900    thing.  Same for each line of a regexp file. */
  6901 static void
  6902 analyze_regex (char *regex_arg)
  6903 {
  6904   if (regex_arg == NULL)
  6905     {
  6906       free_regexps ();          /* --no-regex: remove existing regexps */
  6907       return;
  6908     }
  6909 
  6910   /* A real --regexp option or a line in a regexp file. */
  6911   switch (regex_arg[0])
  6912     {
  6913       /* Comments in regexp file or null arg to --regex. */
  6914     case '\0':
  6915     case ' ':
  6916     case '\t':
  6917       break;
  6918 
  6919       /* Read a regex file.  This is recursive and may result in a
  6920          loop, which will stop when the file descriptors are exhausted. */
  6921     case '@':
  6922       {
  6923         FILE *regexfp;
  6924         linebuffer regexbuf;
  6925         char *regexfile = regex_arg + 1;
  6926 
  6927         /* regexfile is a file containing regexps, one per line. */
  6928         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
  6929         if (regexfp == NULL)
  6930           pfatal (regexfile);
  6931         linebuffer_init (&regexbuf);
  6932         while (readline_internal (&regexbuf, regexfp, regexfile, false) > 0)
  6933           analyze_regex (regexbuf.buffer);
  6934         free (regexbuf.buffer);
  6935         if (fclose (regexfp) != 0)
  6936           pfatal (regexfile);
  6937       }
  6938       break;
  6939 
  6940       /* Regexp to be used for a specific language only. */
  6941     case '{':
  6942       {
  6943         language *lang;
  6944         char *lang_name = regex_arg + 1;
  6945         char *cp;
  6946 
  6947         for (cp = lang_name; *cp != '}'; cp++)
  6948           if (*cp == '\0')
  6949             {
  6950               error ("unterminated language name in regex: %s", regex_arg);
  6951               return;
  6952             }
  6953         *cp++ = '\0';
  6954         lang = get_language_from_langname (lang_name);
  6955         if (lang == NULL)
  6956           return;
  6957         add_regex (cp, lang);
  6958       }
  6959       break;
  6960 
  6961       /* Regexp to be used for any language. */
  6962     default:
  6963       add_regex (regex_arg, NULL);
  6964       break;
  6965     }
  6966 }
  6967 
  6968 /* Separate the regexp pattern, compile it,
  6969    and care for optional name and modifiers. */
  6970 static void
  6971 add_regex (char *regexp_pattern, language *lang)
  6972 {
  6973   static struct re_pattern_buffer zeropattern;
  6974   char sep, *pat, *name, *modifiers;
  6975   char empty = '\0';
  6976   const char *err;
  6977   struct re_pattern_buffer *patbuf;
  6978   regexp *rp;
  6979   bool
  6980     ignore_case = false,        /* case is significant */
  6981     multi_line = false,         /* matches are done one line at a time */
  6982     single_line = false;        /* dot does not match newline */
  6983 
  6984 
  6985   if (strnlen (regexp_pattern, 3) < 3)
  6986     {
  6987       error ("null regexp");
  6988       return;
  6989     }
  6990   sep = regexp_pattern[0];
  6991   name = scan_separators (regexp_pattern);
  6992   if (name == NULL)
  6993     {
  6994       error ("%s: unterminated regexp", regexp_pattern);
  6995       return;
  6996     }
  6997   if (name[1] == sep)
  6998     {
  6999       error ("null name for regexp \"%s\"", regexp_pattern);
  7000       return;
  7001     }
  7002   modifiers = scan_separators (name);
  7003   if (modifiers == NULL)        /* no terminating separator --> no name */
  7004     {
  7005       modifiers = name;
  7006       name = &empty;
  7007     }
  7008   else
  7009     modifiers += 1;             /* skip separator */
  7010 
  7011   /* Parse regex modifiers. */
  7012   for (; modifiers[0] != '\0'; modifiers++)
  7013     switch (modifiers[0])
  7014       {
  7015       case 'N':
  7016         if (modifiers == name)
  7017           error ("forcing explicit tag name but no name, ignoring");
  7018         /* This option has no effect and is present only for backward
  7019            compatibility.  */
  7020         break;
  7021       case 'i':
  7022         ignore_case = true;
  7023         break;
  7024       case 's':
  7025         single_line = true;
  7026         FALLTHROUGH;
  7027       case 'm':
  7028         multi_line = true;
  7029         need_filebuf = true;
  7030         break;
  7031       default:
  7032         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
  7033         break;
  7034       }
  7035 
  7036   patbuf = xnew (1, struct re_pattern_buffer);
  7037   *patbuf = zeropattern;
  7038   if (ignore_case)
  7039     {
  7040       static unsigned char lc_trans[UCHAR_MAX + 1];
  7041       int i;
  7042       for (i = 0; i < UCHAR_MAX + 1; i++)
  7043         lc_trans[i] = c_tolower (i);
  7044       patbuf->translate = lc_trans;     /* translation table to fold case  */
  7045     }
  7046 
  7047   if (multi_line)
  7048     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
  7049   else
  7050     pat = regexp_pattern;
  7051 
  7052   if (single_line)
  7053     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
  7054   else
  7055     re_set_syntax (RE_SYNTAX_EMACS);
  7056 
  7057   err = re_compile_pattern (pat, strlen (pat), patbuf);
  7058   if (multi_line)
  7059     free (pat);
  7060   if (err != NULL)
  7061     {
  7062       error ("%s while compiling pattern", err);
  7063       return;
  7064     }
  7065 
  7066   rp = p_head;
  7067   p_head = xnew (1, regexp);
  7068   p_head->pattern = savestr (regexp_pattern);
  7069   p_head->p_next = rp;
  7070   p_head->lang = lang;
  7071   p_head->pat = patbuf;
  7072   p_head->name = savestr (name);
  7073   p_head->error_signaled = false;
  7074   p_head->ignore_case = ignore_case;
  7075   p_head->multi_line = multi_line;
  7076 }
  7077 
  7078 /*
  7079  * Do the substitutions indicated by the regular expression and
  7080  * arguments.
  7081  */
  7082 static char *
  7083 substitute (char *in, char *out, struct re_registers *regs)
  7084 {
  7085   char *result, *t;
  7086 
  7087   result = NULL;
  7088   ptrdiff_t size = strlen (out);
  7089 
  7090   /* Pass 1: figure out how much to allocate by finding all \N strings. */
  7091   if (out[size - 1] == '\\')
  7092     fatal ("pattern error in \"%s\"", out);
  7093   for (t = strchr (out, '\\');
  7094        t != NULL;
  7095        t = strchr (t + 2, '\\'))
  7096     if (c_isdigit (t[1]))
  7097       {
  7098         int dig = t[1] - '0';
  7099         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
  7100         size += diglen - 2;
  7101       }
  7102     else
  7103       size -= 1;
  7104 
  7105   /* Allocate space and do the substitutions. */
  7106   assert (size >= 0);
  7107   result = xnew (size + 1, char);
  7108 
  7109   for (t = result; *out != '\0'; out++)
  7110     if (*out == '\\' && c_isdigit (*++out))
  7111       {
  7112         int dig = *out - '0';
  7113         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
  7114         memcpy (t, in + regs->start[dig], diglen);
  7115         t += diglen;
  7116       }
  7117     else
  7118       *t++ = *out;
  7119   *t = '\0';
  7120 
  7121   assert (t <= result + size);
  7122   assert (t == result + strlen (result));
  7123 
  7124   return result;
  7125 }
  7126 
  7127 /* Deallocate all regexps. */
  7128 static void
  7129 free_regexps (void)
  7130 {
  7131   regexp *rp;
  7132   while (p_head != NULL)
  7133     {
  7134       rp = p_head->p_next;
  7135       free (p_head->pattern);
  7136       free (p_head->name);
  7137       free (p_head);
  7138       p_head = rp;
  7139     }
  7140   return;
  7141 }
  7142 
  7143 /*
  7144  * Reads the whole file as a single string from `filebuf' and looks for
  7145  * multi-line regular expressions, creating tags on matches.
  7146  * readline already dealt with normal regexps.
  7147  *
  7148  * Idea by Ben Wing <ben@666.com> (2002).
  7149  */
  7150 static void
  7151 regex_tag_multiline (void)
  7152 {
  7153   char *buffer = filebuf.buffer;
  7154   regexp *rp;
  7155   char *name;
  7156 
  7157   for (rp = p_head; rp != NULL; rp = rp->p_next)
  7158     {
  7159       ptrdiff_t match = 0;
  7160 
  7161       if (!rp->multi_line)
  7162         continue;               /* skip normal regexps */
  7163 
  7164       /* Generic initializations before parsing file from memory. */
  7165       lineno = 1;               /* reset global line number */
  7166       charno = 0;               /* reset global char number */
  7167       linecharno = 0;           /* reset global char number of line start */
  7168 
  7169       /* Only use generic regexps or those for the current language. */
  7170       if (rp->lang != NULL && rp->lang != curfdp->lang)
  7171         continue;
  7172 
  7173       while (match >= 0 && match < filebuf.len)
  7174         {
  7175           match = re_search (rp->pat, buffer, filebuf.len, charno,
  7176                              filebuf.len - match, &rp->regs);
  7177           switch (match)
  7178             {
  7179             case -2:
  7180               /* Some error. */
  7181               if (!rp->error_signaled)
  7182                 {
  7183                   error ("regexp stack overflow while matching \"%s\"",
  7184                          rp->pattern);
  7185                   rp->error_signaled = true;
  7186                 }
  7187               break;
  7188             case -1:
  7189               /* No match. */
  7190               break;
  7191             default:
  7192               if (match == rp->regs.end[0])
  7193                 {
  7194                   if (!rp->error_signaled)
  7195                     {
  7196                       error ("regexp matches the empty string: \"%s\"",
  7197                              rp->pattern);
  7198                       rp->error_signaled = true;
  7199                     }
  7200                   match = -3;   /* exit from while loop */
  7201                   break;
  7202                 }
  7203 
  7204               /* Match occurred.  Construct a tag. */
  7205               while (charno < rp->regs.end[0])
  7206                 if (buffer[charno++] == '\n')
  7207                   lineno++, linecharno = charno;
  7208               name = rp->name;
  7209               if (name[0] == '\0')
  7210                 name = NULL;
  7211               else /* make a named tag */
  7212                 name = substitute (buffer, rp->name, &rp->regs);
  7213 
  7214               /* Force explicit tag name, if a name is there. */
  7215               pfnote (name, true, buffer + linecharno,
  7216                       charno - linecharno + 1, lineno, linecharno);
  7217 
  7218               if (debug)
  7219                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  7220                          name ? name : "(unnamed)", curfdp->taggedfname,
  7221                          lineno, buffer + linecharno);
  7222               break;
  7223             }
  7224         }
  7225     }
  7226 }
  7227 
  7228 
  7229 static bool
  7230 nocase_tail (const char *cp)
  7231 {
  7232   ptrdiff_t len = 0;
  7233 
  7234   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
  7235     cp++, len++;
  7236   if (*cp == '\0' && !intoken (dbp[len]))
  7237     {
  7238       dbp += len;
  7239       return true;
  7240     }
  7241   return false;
  7242 }
  7243 
  7244 static void
  7245 get_tag (register char *bp, char **namepp)
  7246 {
  7247   register char *cp = bp;
  7248 
  7249   if (*bp != '\0')
  7250     {
  7251       /* Go till you get to white space or a syntactic break */
  7252       for (cp = bp + 1; !notinname (*cp); cp++)
  7253         continue;
  7254       make_tag (bp, cp - bp, true,
  7255                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  7256     }
  7257 
  7258   if (namepp != NULL)
  7259     *namepp = savenstr (bp, cp - bp);
  7260 }
  7261 
  7262 /* Similar to get_tag, but include '=' as part of the tag. */
  7263 static void
  7264 get_lispy_tag (register char *bp)
  7265 {
  7266   register char *cp = bp;
  7267 
  7268   if (*bp != '\0')
  7269     {
  7270       /* Go till you get to white space or a syntactic break */
  7271       for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
  7272         continue;
  7273       make_tag (bp, cp - bp, true,
  7274                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  7275     }
  7276 }
  7277 
  7278 /*
  7279  * Read a line of text from `stream' into `lbp', excluding the
  7280  * newline or CR-NL (if `leave_cr` is false), if any.  Return the
  7281  * number of characters read from `stream', which is the length
  7282  * of the line including the newline.
  7283  *
  7284  * On DOS or Windows, if `leave_cr` is false, we do not count the
  7285  * CR character, if any before the NL, in the returned length;
  7286  * this mirrors the behavior of Emacs on those
  7287  * platforms (for text files, it translates CR-NL to NL as it reads in the
  7288  * file).
  7289  *
  7290  * If multi-line regular expressions are requested, each line read is
  7291  * appended to `filebuf'.
  7292  */
  7293 static ptrdiff_t
  7294 readline_internal (linebuffer *lbp, FILE *stream, char const *filename,
  7295                    const bool leave_cr)
  7296 {
  7297   char *buffer = lbp->buffer;
  7298   char *p = lbp->buffer;
  7299   char *pend;
  7300   int chars_deleted;
  7301 
  7302   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
  7303 
  7304   for (;;)
  7305     {
  7306       register int c = getc (stream);
  7307       if (p == pend)
  7308         {
  7309           /* We're at the end of linebuffer: expand it. */
  7310           xrnew (buffer, lbp->size, 2);
  7311           p = buffer + lbp->size;
  7312           lbp->size *= 2;
  7313           pend = buffer + lbp->size;
  7314           lbp->buffer = buffer;
  7315         }
  7316       if (c == EOF)
  7317         {
  7318           if (ferror (stream))
  7319             perror (filename);
  7320           *p = '\0';
  7321           chars_deleted = 0;
  7322           break;
  7323         }
  7324       if (c == '\n')
  7325         {
  7326           if (!leave_cr && p > buffer && p[-1] == '\r')
  7327             {
  7328               p -= 1;
  7329               chars_deleted = 2;
  7330             }
  7331           else
  7332             {
  7333               chars_deleted = 1;
  7334             }
  7335           *p = '\0';
  7336           break;
  7337         }
  7338       *p++ = c;
  7339     }
  7340   lbp->len = p - buffer;
  7341 
  7342   if (need_filebuf              /* we need filebuf for multi-line regexps */
  7343       && chars_deleted > 0)     /* not at EOF */
  7344     {
  7345       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
  7346         {
  7347           /* Expand filebuf. */
  7348           xrnew (filebuf.buffer, filebuf.size, 2);
  7349           filebuf.size *= 2;
  7350         }
  7351       strcpy (mempcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len),
  7352               "\n");
  7353       filebuf.len += lbp->len + 1;
  7354     }
  7355 
  7356   return lbp->len + chars_deleted;
  7357 }
  7358 
  7359 /*
  7360  * Like readline_internal, above, but in addition try to match the
  7361  * input line against relevant regular expressions and manage #line
  7362  * directives.
  7363  */
  7364 static void
  7365 readline (linebuffer *lbp, FILE *stream)
  7366 {
  7367   linecharno = charno;          /* update global char number of line start */
  7368   ptrdiff_t result = readline_internal (lbp, stream, infilename, false);
  7369   lineno += 1;                  /* increment global line number */
  7370   charno += result;             /* increment global char number */
  7371 
  7372   /* Honor #line directives. */
  7373   if (!no_line_directive)
  7374     {
  7375       static bool discard_until_line_directive;
  7376 
  7377       /* Check whether this is a #line directive. */
  7378       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
  7379         {
  7380           intmax_t lno;
  7381           int start = 0;
  7382 
  7383           if (sscanf (lbp->buffer, "#line %"SCNdMAX" \"%n", &lno, &start) >= 1
  7384               && start > 0)     /* double quote character found */
  7385             {
  7386               char *endp = lbp->buffer + start;
  7387 
  7388               while ((endp = strchr (endp, '"')) != NULL
  7389                      && endp[-1] == '\\')
  7390                 endp++;
  7391               if (endp != NULL)
  7392                 /* Ok, this is a real #line directive.  Let's deal with it. */
  7393                 {
  7394                   char *taggedabsname;  /* absolute name of original file */
  7395                   char *taggedfname;    /* name of original file as given */
  7396                   char *name;           /* temp var */
  7397 
  7398                   discard_until_line_directive = false; /* found it */
  7399                   name = lbp->buffer + start;
  7400                   *endp = '\0';
  7401                   canonicalize_filename (name);
  7402                   taggedabsname = absolute_filename (name, tagfiledir);
  7403                   if (filename_is_absolute (name)
  7404                       || filename_is_absolute (curfdp->infname))
  7405                     taggedfname = savestr (taggedabsname);
  7406                   else
  7407                     taggedfname = relative_filename (taggedabsname,tagfiledir);
  7408 
  7409                   if (streq (curfdp->taggedfname, taggedfname))
  7410                     /* The #line directive is only a line number change.  We
  7411                        deal with this afterwards. */
  7412                     free (taggedfname);
  7413                   else
  7414                     /* The tags following this #line directive should be
  7415                        attributed to taggedfname.  In order to do this, set
  7416                        curfdp accordingly. */
  7417                     {
  7418                       fdesc *fdp; /* file description pointer */
  7419 
  7420                       /* Go look for a file description already set up for the
  7421                          file indicated in the #line directive.  If there is
  7422                          one, use it from now until the next #line
  7423                          directive. */
  7424                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  7425                         if (streq (fdp->infname, curfdp->infname)
  7426                             && streq (fdp->taggedfname, taggedfname))
  7427                           /* If we remove the second test above (after the &&)
  7428                              then all entries pertaining to the same file are
  7429                              coalesced in the tags file.  If we use it, then
  7430                              entries pertaining to the same file but generated
  7431                              from different files (via #line directives) will
  7432                              go into separate sections in the tags file.  These
  7433                              alternatives look equivalent.  The first one
  7434                              destroys some apparently useless information. */
  7435                           {
  7436                             curfdp = fdp;
  7437                             free (taggedfname);
  7438                             break;
  7439                           }
  7440                       /* Else, if we already tagged the real file, skip all
  7441                          input lines until the next #line directive. */
  7442                       if (fdp == NULL) /* not found */
  7443                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
  7444                           if (streq (fdp->infabsname, taggedabsname))
  7445                             {
  7446                               discard_until_line_directive = true;
  7447                               free (taggedfname);
  7448                               break;
  7449                             }
  7450                       /* Else create a new file description and use that from
  7451                          now on, until the next #line directive. */
  7452                       if (fdp == NULL) /* not found */
  7453                         {
  7454                           fdp = fdhead;
  7455                           fdhead = xnew (1, fdesc);
  7456                           *fdhead = *curfdp; /* copy curr. file description */
  7457                           fdhead->next = fdp;
  7458                           fdhead->infname = savestr (curfdp->infname);
  7459                           fdhead->infabsname = savestr (curfdp->infabsname);
  7460                           fdhead->infabsdir = savestr (curfdp->infabsdir);
  7461                           fdhead->taggedfname = taggedfname;
  7462                           fdhead->usecharno = false;
  7463                           fdhead->prop = NULL;
  7464                           fdhead->written = false;
  7465                           curfdp = fdhead;
  7466                         }
  7467                     }
  7468                   free (taggedabsname);
  7469                   lineno = lno - 1;
  7470                   readline (lbp, stream);
  7471                   return;
  7472                 } /* if a real #line directive */
  7473             } /* if #line is followed by a number */
  7474         } /* if line begins with "#line " */
  7475 
  7476       /* If we are here, no #line directive was found. */
  7477       if (discard_until_line_directive)
  7478         {
  7479           if (result > 0)
  7480             {
  7481               /* Do a tail recursion on ourselves, thus discarding the contents
  7482                  of the line buffer. */
  7483               readline (lbp, stream);
  7484               return;
  7485             }
  7486           /* End of file. */
  7487           discard_until_line_directive = false;
  7488           return;
  7489         }
  7490     } /* if #line directives should be considered */
  7491 
  7492   {
  7493     ptrdiff_t match;
  7494     regexp *rp;
  7495     char *name;
  7496 
  7497     /* Match against relevant regexps. */
  7498     if (lbp->len > 0)
  7499       for (rp = p_head; rp != NULL; rp = rp->p_next)
  7500         {
  7501           /* Only use generic regexps or those for the current language.
  7502              Also do not use multiline regexps, which is the job of
  7503              regex_tag_multiline. */
  7504           if ((rp->lang != NULL && rp->lang != fdhead->lang)
  7505               || rp->multi_line)
  7506             continue;
  7507 
  7508           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
  7509           switch (match)
  7510             {
  7511             case -2:
  7512               /* Some error. */
  7513               if (!rp->error_signaled)
  7514                 {
  7515                   error ("regexp stack overflow while matching \"%s\"",
  7516                          rp->pattern);
  7517                   rp->error_signaled = true;
  7518                 }
  7519               break;
  7520             case -1:
  7521               /* No match. */
  7522               break;
  7523             case 0:
  7524               /* Empty string matched. */
  7525               if (!rp->error_signaled)
  7526                 {
  7527                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
  7528                   rp->error_signaled = true;
  7529                 }
  7530               break;
  7531             default:
  7532               /* Match occurred.  Construct a tag. */
  7533               name = rp->name;
  7534               if (name[0] == '\0')
  7535                 name = NULL;
  7536               else /* make a named tag */
  7537                 name = substitute (lbp->buffer, rp->name, &rp->regs);
  7538 
  7539               /* Force explicit tag name, if a name is there. */
  7540               pfnote (name, true, lbp->buffer, match, lineno, linecharno);
  7541 
  7542               if (debug)
  7543                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
  7544                          name ? name : "(unnamed)", curfdp->taggedfname,
  7545                          lineno, lbp->buffer);
  7546               break;
  7547             }
  7548         }
  7549   }
  7550 }
  7551 
  7552 
  7553 /*
  7554  * Return a pointer to a space of size strlen(cp)+1 allocated
  7555  * with xnew where the string CP has been copied.
  7556  */
  7557 static char *
  7558 savestr (const char *cp)
  7559 {
  7560   return savenstr (cp, strlen (cp));
  7561 }
  7562 
  7563 /*
  7564  * Return a pointer to a space of size LEN+1 allocated with xnew
  7565  * with a copy of CP (containing LEN bytes) followed by a NUL byte.
  7566  */
  7567 static char *
  7568 savenstr (const char *cp, ptrdiff_t len)
  7569 {
  7570   char *dp = xnew (len + 1, char);
  7571   dp[len] = '\0';
  7572   return memcpy (dp, cp, len);
  7573 }
  7574 
  7575 /* Skip spaces (end of string is not space), return new pointer. */
  7576 static char *
  7577 skip_spaces (char *cp)
  7578 {
  7579   while (c_isspace (*cp))
  7580     cp++;
  7581   return cp;
  7582 }
  7583 
  7584 /* Skip non spaces, except end of string, return new pointer. */
  7585 static char *
  7586 skip_non_spaces (char *cp)
  7587 {
  7588   while (*cp != '\0' && !c_isspace (*cp))
  7589     cp++;
  7590   return cp;
  7591 }
  7592 
  7593 /* Skip any chars in the "name" class.*/
  7594 static char *
  7595 skip_name (char *cp)
  7596 {
  7597   /* '\0' is a notinname() so loop stops there too */
  7598   while (! notinname (*cp))
  7599     cp++;
  7600   return cp;
  7601 }
  7602 
  7603 /* Print error message and exit.  */
  7604 static void
  7605 fatal (char const *format, ...)
  7606 {
  7607   va_list ap;
  7608   va_start (ap, format);
  7609   verror (format, ap);
  7610   va_end (ap);
  7611   exit (EXIT_FAILURE);
  7612 }
  7613 
  7614 static void
  7615 pfatal (const char *s1)
  7616 {
  7617   perror (s1);
  7618   exit (EXIT_FAILURE);
  7619 }
  7620 
  7621 static void
  7622 suggest_asking_for_help (void)
  7623 {
  7624   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
  7625            progname);
  7626   exit (EXIT_FAILURE);
  7627 }
  7628 
  7629 /* Output a diagnostic with printf-style FORMAT and args.  */
  7630 static void
  7631 error (const char *format, ...)
  7632 {
  7633   va_list ap;
  7634   va_start (ap, format);
  7635   verror (format, ap);
  7636   va_end (ap);
  7637 }
  7638 
  7639 static void
  7640 verror (char const *format, va_list ap)
  7641 {
  7642   fprintf (stderr, "%s: ", progname);
  7643   vfprintf (stderr, format, ap);
  7644   fprintf (stderr, "\n");
  7645 }
  7646 
  7647 /* Return a newly-allocated string whose contents
  7648    concatenate those of s1, s2, s3.  */
  7649 static char *
  7650 concat (const char *s1, const char *s2, const char *s3)
  7651 {
  7652   ptrdiff_t len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
  7653   char *result = xnew (len1 + len2 + len3 + 1, char);
  7654   strcpy (stpcpy (stpcpy (result, s1), s2), s3);
  7655   return result;
  7656 }
  7657 
  7658 
  7659 /* Does the same work as the system V getcwd, but does not need to
  7660    guess the buffer size in advance. */
  7661 static char *
  7662 etags_getcwd (void)
  7663 {
  7664   ptrdiff_t bufsize = 200;
  7665   char *path = xnew (bufsize, char);
  7666 
  7667   while (getcwd (path, bufsize) == NULL)
  7668     {
  7669       if (errno != ERANGE)
  7670         pfatal ("getcwd");
  7671       free (path);
  7672       path = xnmalloc (bufsize, 2 * sizeof *path);
  7673       bufsize *= 2;
  7674     }
  7675 
  7676   canonicalize_filename (path);
  7677   return path;
  7678 }
  7679 
  7680 /* Return a newly allocated string containing a name of a temporary file.  */
  7681 static char *
  7682 etags_mktmp (void)
  7683 {
  7684   const char *tmpdir = getenv ("TMPDIR");
  7685   const char *slash = "/";
  7686 
  7687 #if MSDOS || defined (DOS_NT)
  7688   if (!tmpdir)
  7689     tmpdir = getenv ("TEMP");
  7690   if (!tmpdir)
  7691     tmpdir = getenv ("TMP");
  7692   if (!tmpdir)
  7693     tmpdir = ".";
  7694   if (tmpdir[strlen (tmpdir) - 1] == '/'
  7695       || tmpdir[strlen (tmpdir) - 1] == '\\')
  7696     slash = "";
  7697 #else
  7698   if (!tmpdir)
  7699     tmpdir = "/tmp";
  7700   if (tmpdir[strlen (tmpdir) - 1] == '/')
  7701     slash = "";
  7702 #endif
  7703 
  7704   char *templt = concat (tmpdir, slash, "etXXXXXX");
  7705   int fd = mkostemp (templt, O_CLOEXEC);
  7706   if (fd < 0 || close (fd) != 0)
  7707     {
  7708       free (templt);
  7709       templt = NULL;
  7710     }
  7711 #if defined (DOS_NT)
  7712   else
  7713     {
  7714       /* The file name will be used in shell redirection, so it needs to have
  7715          DOS-style backslashes, or else the Windows shell will barf.  */
  7716       char *p;
  7717       for (p = templt; *p; p++)
  7718         if (*p == '/')
  7719           *p = '\\';
  7720     }
  7721 #endif
  7722 
  7723   return templt;
  7724 }
  7725 
  7726 #if !MSDOS && !defined (DOS_NT)
  7727 /*
  7728  * Add single quotes around a string, and escape any single quotes.
  7729  * Return a newly-allocated string.
  7730  *
  7731  * For example:
  7732  * escape_shell_arg_string ("test.txt")  => "'test.txt'"
  7733  * escape_shell_arg_string ("'test.txt") => "''\''test.txt'"
  7734  */
  7735 static char *
  7736 escape_shell_arg_string (char *str)
  7737 {
  7738   char *p = str;
  7739   int need_space = 2;           /* ' at begin and end */
  7740 
  7741   while (*p != '\0')
  7742     {
  7743       if (*p == '\'')
  7744         need_space += 4;        /* ' to '\'', length is 4 */
  7745       else
  7746         need_space++;
  7747 
  7748       p++;
  7749     }
  7750 
  7751   char *new_str = xnew (need_space + 1, char);
  7752   new_str[0] = '\'';
  7753   new_str[need_space-1] = '\'';
  7754 
  7755   int i = 1;                    /* skip first byte */
  7756   p = str;
  7757   while (*p != '\0')
  7758     {
  7759       new_str[i] = *p;
  7760       if (*p == '\'')
  7761         {
  7762           new_str[i+1] = '\\';
  7763           new_str[i+2] = '\'';
  7764           new_str[i+3] = '\'';
  7765           i += 3;
  7766         }
  7767 
  7768       i++;
  7769       p++;
  7770     }
  7771 
  7772   new_str[need_space] = '\0';
  7773   return new_str;
  7774 }
  7775 #endif
  7776 
  7777 static void
  7778 do_move_file (const char *src_file, const char *dst_file)
  7779 {
  7780   if (rename (src_file, dst_file) == 0)
  7781     return;
  7782 
  7783   FILE *src_f = fopen (src_file, "rb");
  7784   FILE *dst_f = fopen (dst_file, "wb");
  7785 
  7786   if (src_f == NULL)
  7787     pfatal (src_file);
  7788 
  7789   if (dst_f == NULL)
  7790     pfatal (dst_file);
  7791 
  7792   int c;
  7793   while ((c = fgetc (src_f)) != EOF)
  7794     {
  7795       if (ferror (src_f))
  7796         pfatal (src_file);
  7797 
  7798       if (ferror (dst_f))
  7799         pfatal (dst_file);
  7800 
  7801       if (fputc (c, dst_f) == EOF)
  7802         pfatal ("cannot write");
  7803     }
  7804 
  7805   if (fclose (src_f) == EOF)
  7806     pfatal (src_file);
  7807 
  7808   if (fclose (dst_f) == EOF)
  7809     pfatal (dst_file);
  7810 
  7811   if (unlink (src_file) == -1)
  7812     pfatal ("unlink error");
  7813 
  7814   return;
  7815 }
  7816 
  7817 /* Return a newly allocated string containing the file name of FILE
  7818    relative to the absolute directory DIR (which should end with a slash). */
  7819 static char *
  7820 relative_filename (char *file, char *dir)
  7821 {
  7822   char *fp, *dp, *afn, *res;
  7823   ptrdiff_t i;
  7824   char *dir_last_slash UNINIT;
  7825 
  7826   /* Find the common root of file and dir (with a trailing slash). */
  7827   afn = absolute_filename (file, cwd);
  7828   fp = afn;
  7829   dp = dir;
  7830   while (*fp++ == *dp++)
  7831     if (dp[-1] == '/')
  7832       dir_last_slash = dp - 1;
  7833 #ifdef DOS_NT
  7834   if (fp - 1 == afn && afn[0] != '/')
  7835     return afn; /* Cannot build a relative name.  */
  7836 #endif
  7837   fp -= dp - dir_last_slash;
  7838   dp = dir_last_slash;
  7839 
  7840   /* Build a sequence of "../" strings for the resulting relative file name. */
  7841   i = 0;
  7842   while ((dp = strchr (dp + 1, '/')) != NULL)
  7843     i += 1;
  7844   res = xnew (3*i + strlen (fp + 1) + 1, char);
  7845   char *z = res;
  7846   while (i-- > 0)
  7847     z = stpcpy (z, "../");
  7848 
  7849   /* Add the file name relative to the common root of file and dir. */
  7850   strcpy (z, fp + 1);
  7851   free (afn);
  7852 
  7853   return res;
  7854 }
  7855 
  7856 /* Return a newly allocated string containing the absolute file name
  7857    of FILE given DIR (which should end with a slash). */
  7858 static char *
  7859 absolute_filename (char *file, char *dir)
  7860 {
  7861   char *slashp, *cp, *res;
  7862 
  7863   if (filename_is_absolute (file))
  7864     res = savestr (file);
  7865 #ifdef DOS_NT
  7866   /* We don't support non-absolute file names with a drive
  7867      letter, like `d:NAME' (it's too much hassle).  */
  7868   else if (file[1] == ':')
  7869     fatal ("%s: relative file names with drive letters not supported", file);
  7870 #endif
  7871   else
  7872     res = concat (dir, file, "");
  7873 
  7874   /* Delete the "/dirname/.." and "/." substrings. */
  7875   slashp = strchr (res, '/');
  7876   while (slashp != NULL && slashp[0] != '\0')
  7877     {
  7878       if (slashp[1] == '.')
  7879         {
  7880           if (slashp[2] == '.'
  7881               && (slashp[3] == '/' || slashp[3] == '\0'))
  7882             {
  7883               cp = slashp;
  7884               do
  7885                 cp--;
  7886               while (cp >= res && !filename_is_absolute (cp));
  7887               if (cp < res)
  7888                 cp = slashp;    /* the absolute name begins with "/.." */
  7889 #ifdef DOS_NT
  7890               /* Under MSDOS and NT we get `d:/NAME' as absolute
  7891                  file name, so the luser could say `d:/../NAME'.
  7892                  We silently treat this as `d:/NAME'.  */
  7893               else if (cp[0] != '/')
  7894                 cp = slashp;
  7895 #endif
  7896               memmove (cp, slashp + 3, strlen (slashp + 2));
  7897               slashp = cp;
  7898               continue;
  7899             }
  7900           else if (slashp[2] == '/' || slashp[2] == '\0')
  7901             {
  7902               memmove (slashp, slashp + 2, strlen (slashp + 1));
  7903               continue;
  7904             }
  7905         }
  7906 
  7907       slashp = strchr (slashp + 1, '/');
  7908     }
  7909 
  7910   if (res[0] == '\0')           /* just a safety net: should never happen */
  7911     {
  7912       free (res);
  7913       return savestr ("/");
  7914     }
  7915   else
  7916     return res;
  7917 }
  7918 
  7919 /* Return a newly allocated string containing the absolute
  7920    file name of dir where FILE resides given DIR (which should
  7921    end with a slash). */
  7922 static char *
  7923 absolute_dirname (char *file, char *dir)
  7924 {
  7925   char *slashp, *res;
  7926   char save;
  7927 
  7928   slashp = strrchr (file, '/');
  7929   if (slashp == NULL)
  7930     return savestr (dir);
  7931   save = slashp[1];
  7932   slashp[1] = '\0';
  7933   res = absolute_filename (file, dir);
  7934   slashp[1] = save;
  7935 
  7936   return res;
  7937 }
  7938 
  7939 /* Whether the argument string is an absolute file name.  The argument
  7940    string must have been canonicalized with canonicalize_filename. */
  7941 static bool
  7942 filename_is_absolute (char *fn)
  7943 {
  7944   return (fn[0] == '/'
  7945 #ifdef DOS_NT
  7946           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
  7947 #endif
  7948           );
  7949 }
  7950 
  7951 /* Downcase DOS drive letter and collapse separators into single slashes.
  7952    Works in place. */
  7953 static void
  7954 canonicalize_filename (register char *fn)
  7955 {
  7956   register char* cp;
  7957 
  7958 #ifdef DOS_NT
  7959   /* Canonicalize drive letter case.  */
  7960   if (c_isupper (fn[0]) && fn[1] == ':')
  7961     fn[0] = c_tolower (fn[0]);
  7962 
  7963   /* Collapse multiple forward- and back-slashes into a single forward
  7964      slash. */
  7965   for (cp = fn; *cp != '\0'; cp++, fn++)
  7966     if (*cp == '/' || *cp == '\\')
  7967       {
  7968         *fn = '/';
  7969         while (cp[1] == '/' || cp[1] == '\\')
  7970           cp++;
  7971       }
  7972     else
  7973       *fn = *cp;
  7974 
  7975 #else  /* !DOS_NT */
  7976 
  7977   /* Collapse multiple slashes into a single slash. */
  7978   for (cp = fn; *cp != '\0'; cp++, fn++)
  7979     if (*cp == '/')
  7980       {
  7981         *fn = '/';
  7982         while (cp[1] == '/')
  7983           cp++;
  7984       }
  7985     else
  7986       *fn = *cp;
  7987 
  7988 #endif  /* !DOS_NT */
  7989 
  7990   *fn = '\0';
  7991 }
  7992 
  7993 
  7994 /* Initialize a linebuffer for use. */
  7995 static void
  7996 linebuffer_init (linebuffer *lbp)
  7997 {
  7998   lbp->size = (DEBUG) ? 3 : 200;
  7999   lbp->buffer = xnew (lbp->size, char);
  8000   lbp->buffer[0] = '\0';
  8001   lbp->len = 0;
  8002 }
  8003 
  8004 /* Set the minimum size of a string contained in a linebuffer. */
  8005 static void
  8006 linebuffer_setlen (linebuffer *lbp, ptrdiff_t toksize)
  8007 {
  8008   if (lbp->size <= toksize)
  8009     {
  8010       ptrdiff_t multiplier = toksize / lbp->size + 1;
  8011       xrnew (lbp->buffer, lbp->size, multiplier);
  8012       lbp->size *= multiplier;
  8013     }
  8014   lbp->len = toksize;
  8015 }
  8016 
  8017 /* Memory allocators with a fatal error if memory is exhausted.  */
  8018 
  8019 static void
  8020 memory_full (void)
  8021 {
  8022   fatal ("virtual memory exhausted");
  8023 }
  8024 
  8025 static void *
  8026 xmalloc (ptrdiff_t size)
  8027 {
  8028   if (SIZE_MAX < size)
  8029     memory_full ();
  8030   void *result = malloc (size);
  8031   if (result == NULL)
  8032     memory_full ();
  8033   return result;
  8034 }
  8035 
  8036 static void *
  8037 xnmalloc (ptrdiff_t nitems, ptrdiff_t item_size)
  8038 {
  8039   ptrdiff_t nbytes;
  8040   assume (0 <= nitems);
  8041   assume (0 < item_size);
  8042   if (ckd_mul (&nbytes, nitems, item_size))
  8043     memory_full ();
  8044   return xmalloc (nbytes);
  8045 }
  8046 
  8047 static void *
  8048 xnrealloc (void *pa, ptrdiff_t nitems, ptrdiff_t item_size)
  8049 {
  8050   ptrdiff_t nbytes;
  8051   assume (0 <= nitems);
  8052   assume (0 < item_size);
  8053   if (ckd_mul (&nbytes, nitems, item_size) || SIZE_MAX < nbytes)
  8054     memory_full ();
  8055   void *result = realloc (pa, nbytes);
  8056   if (!result)
  8057     memory_full ();
  8058   return result;
  8059 }
  8060 
  8061 /*
  8062  * Local Variables:
  8063  * indent-tabs-mode: t
  8064  * tab-width: 8
  8065  * fill-column: 79
  8066  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
  8067  * c-file-style: "gnu"
  8068  * End:
  8069  */
  8070 
  8071 /* etags.c ends here */

/* [<][>][^][v][top][bottom][index][help] */