root/src/doprnt.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. parse_format_integer
  2. doprnt_non_null_end
  3. doprnt
  4. esprintf
  5. exprintf
  6. evxprintf

     1 /* Output like sprintf to a buffer of specified size.    -*- coding: utf-8 -*-
     2    Also takes args differently: pass one pointer to the end
     3    of the format string in addition to the format string itself.
     4    Copyright (C) 1985, 2001-2023 Free Software Foundation, Inc.
     5 
     6 This file is part of GNU Emacs.
     7 
     8 GNU Emacs is free software: you can redistribute it and/or modify
     9 it under the terms of the GNU General Public License as published by
    10 the Free Software Foundation, either version 3 of the License, or (at
    11 your option) any later version.
    12 
    13 GNU Emacs is distributed in the hope that it will be useful,
    14 but WITHOUT ANY WARRANTY; without even the implied warranty of
    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16 GNU General Public License for more details.
    17 
    18 You should have received a copy of the GNU General Public License
    19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    20 
    21 /* If you think about replacing this with some similar standard C function of
    22    the printf family (such as vsnprintf), please note that this function
    23    supports the following Emacs-specific features:
    24 
    25    . For %c conversions, it produces a string with the multibyte representation
    26      of the (`int') argument, suitable for display in an Emacs buffer.
    27 
    28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
    29      the display width of each character, according to char-width-table.  That
    30      is, it does not assume that each character takes one column on display.
    31      Nor does it assume that each character is a single byte.
    32 
    33    . If the size of the buffer is not enough to produce the formatted string in
    34      its entirety, it makes sure that truncation does not chop the last
    35      character in the middle of its multibyte sequence, producing an invalid
    36      sequence.
    37 
    38    . It accepts a pointer to the end of the format string, so the format string
    39      could include embedded null characters.
    40 
    41    . It signals an error if the length of the formatted string is about to
    42      overflow ptrdiff_t or size_t, to avoid producing strings longer than what
    43      Emacs can handle.
    44 
    45    OTOH, this function supports only a small subset of the standard C formatted
    46    output facilities.  E.g., %u is not supported, precision is ignored
    47    in %s and %c conversions, and %lld does not necessarily work and
    48    code should use something like %"pM"d with intmax_t instead.
    49    (See below for the detailed documentation of what is supported.)
    50    However, this is okay, as this function is supposed to be called
    51    from 'error' and similar C functions, and thus does not need to
    52    support all the features of 'Fformat_message', which is used by the
    53    Lisp 'error' function.  */
    54 
    55 /* In the FORMAT argument this function supports ` and ' as directives
    56    that output left and right quotes as per ‘text-quoting style’.  It
    57    also supports the following %-sequences:
    58 
    59    %s means print a string argument.
    60    %S is treated as %s, for loose compatibility with `Fformat_message'.
    61    %d means print a `signed int' argument in decimal.
    62    %o means print an `unsigned int' argument in octal.
    63    %x means print an `unsigned int' argument in hex.
    64    %e means print a `double' argument in exponential notation.
    65    %f means print a `double' argument in decimal-point notation.
    66    %g means print a `double' argument in exponential notation
    67       or in decimal-point notation, depending on the value;
    68       this is often (though not always) the shorter of the two notations.
    69    %c means print a `signed int' argument as a single character.
    70    %% means produce a literal % character.
    71 
    72    A %-sequence other than %% may contain optional flags, width, precision,
    73    and length, as follows:
    74 
    75      %<flags><width><precision><length>character
    76 
    77    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
    78    is empty or l or the value of the pD or pI or PRIdMAX (sans "d") macros.
    79    A % that does not introduce a valid %-sequence causes undefined behavior.
    80    Bytes in FORMAT other than % are copied through as-is.
    81 
    82    The + flag character inserts a + before any positive number, while a space
    83    inserts a space before any positive number; these flags only affect %d, %o,
    84    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
    85    as described below.  For signed numerical arguments only, the ` ' (space)
    86    flag causes the result to be prefixed with a space character if it does not
    87    start with a sign (+ or -).
    88 
    89    The l (lower-case letter ell) length modifier is a `long' data type
    90    modifier: it is supported for %d, %o, and %x conversions of integral
    91    arguments, must immediately precede the conversion specifier, and means that
    92    the respective argument is to be treated as `long int' or `unsigned long
    93    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
    94    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
    95    value of the PRIdMAX etc. macros means to use intmax_t or uintmax_t,
    96    and the empty length modifier means `int' or `unsigned int'.
    97 
    98    The width specifier supplies a lower limit for the length of the printed
    99    representation.  The padding, if any, normally goes on the left, but it goes
   100    on the right if the - flag is present.  The padding character is normally a
   101    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
   102    The - flag takes precedence over the 0 flag.
   103 
   104    For %e, %f, and %g sequences, the number after the "." in the precision
   105    specifier says how many decimal places to show; if zero, the decimal point
   106    itself is omitted.  For %d, %o, and %x sequences, the precision specifies
   107    the minimum number of digits to appear.  Precision specifiers are
   108    not supported for other %-sequences.  */
   109 
   110 #include <config.h>
   111 #include <stdio.h>
   112 #include <stdlib.h>
   113 #include <float.h>
   114 #include <unistd.h>
   115 #include <limits.h>
   116 
   117 #include "lisp.h"
   118 
   119 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
   120    don't have to include others because CHAR_HEAD_P does not contains
   121    another macro.  */
   122 #include "character.h"
   123 
   124 /* Enough to handle floating point formats with large numbers.  */
   125 enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 };
   126 
   127 /* Parse FMT as an unsigned decimal integer, putting its value into *VALUE.
   128    Return the address of the first byte after the integer.
   129    If FMT is not an integer, return FMT and store zero into *VALUE.  */
   130 static char const *
   131 parse_format_integer (char const *fmt, int *value)
   132 {
   133   int n = 0;
   134   bool overflow = false;
   135   for (; '0' <= *fmt && *fmt <= '9'; fmt++)
   136     {
   137       overflow |= INT_MULTIPLY_WRAPV (n, 10, &n);
   138       overflow |= INT_ADD_WRAPV (n, *fmt - '0', &n);
   139     }
   140   if (overflow || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n)
   141     error ("Format width or precision too large");
   142   *value = n;
   143   return fmt;
   144 }
   145 
   146 /* Like doprnt, except FORMAT_END must be non-null.
   147    Although this function is never exercised in current Emacs,
   148    it is retained in case some future Emacs version
   149    contains doprnt callers that need such formats.
   150    Having a separate function helps GCC optimize doprnt better.  */
   151 static ptrdiff_t
   152 doprnt_non_null_end (char *buffer, ptrdiff_t bufsize, char const *format,
   153                      char const *format_end, va_list ap)
   154 {
   155   USE_SAFE_ALLOCA;
   156   ptrdiff_t fmtlen = format_end - format;
   157   char *fmt = SAFE_ALLOCA (fmtlen + 1);
   158   memcpy (fmt, format, fmtlen);
   159   fmt[fmtlen] = 0;
   160   ptrdiff_t nbytes = doprnt (buffer, bufsize, fmt, NULL, ap);
   161   SAFE_FREE ();
   162   return nbytes;
   163 }
   164 
   165 /* Generate output from a format-spec FORMAT,
   166    terminated at either the first NUL or (if FORMAT_END is non-null
   167    and there are no NUL bytes between FORMAT and FORMAT_END)
   168    terminated at position FORMAT_END.
   169    (*FORMAT_END is not part of the format, but must exist and be readable.)
   170    Output goes in BUFFER, which has room for BUFSIZE chars.
   171    BUFSIZE must be positive.  If the output does not fit, truncate it
   172    to fit and return BUFSIZE - 1; if this truncates a multibyte
   173    sequence, store '\0' into the sequence's first byte.
   174    Returns the number of bytes stored into BUFFER, excluding
   175    the terminating null byte.  Output is always null-terminated.
   176    String arguments are passed as C strings.
   177    Integers are passed as C integers.
   178 
   179    FIXME: If FORMAT_END is not at a character boundary
   180    doprnt_non_null_end will cut the string in the middle of the
   181    character and the returned string will have an incomplete character
   182    sequence at the end.  We may prefer to cut at a character
   183    boundary.  */
   184 
   185 ptrdiff_t
   186 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
   187         const char *format_end, va_list ap)
   188 {
   189   if (format_end)
   190     return doprnt_non_null_end (buffer, bufsize, format, format_end, ap);
   191 
   192   const char *fmt = format;     /* Pointer into format string.  */
   193   char *bufptr = buffer;        /* Pointer into output buffer.  */
   194 
   195   /* Use this for sprintf unless we need something really big.  */
   196   char tembuf[SIZE_BOUND_EXTRA + 50];
   197 
   198   /* Size of sprintf_buffer.  */
   199   ptrdiff_t size_allocated = sizeof (tembuf);
   200 
   201   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
   202   char *sprintf_buffer = tembuf;
   203 
   204   /* Buffer we have got with malloc.  */
   205   char *big_buffer = NULL;
   206 
   207   Lisp_Object quoting_style = Ftext_quoting_style ();
   208 
   209   bufsize--;
   210 
   211   /* Loop until end of format string or buffer full. */
   212   while (*fmt && bufsize > 0)
   213     {
   214       char const *fmt0 = fmt;
   215       char fmtchar = *fmt++;
   216       if (fmtchar == '%')
   217         {
   218           ptrdiff_t width;  /* Columns occupied by STRING on display.  */
   219           enum {
   220             pDlen = sizeof pD - 1,
   221             pIlen = sizeof pI - 1,
   222             pMlen = sizeof PRIdMAX - 2,
   223             maxmlen = max (max (1, pDlen), max (pIlen, pMlen))
   224           };
   225           enum {
   226             no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
   227           } length_modifier = no_modifier;
   228           static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
   229           int mlen;
   230           char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
   231 
   232           /* Width and precision specified by this %-sequence.  */
   233           int wid = 0, prec = -1;
   234 
   235           /* FMTSTAR will be a "%*.*X"-like version of this %-sequence.
   236              Start by putting '%' into FMTSTAR.  */
   237           char fmtstar[sizeof "%-+ 0*.*d" + maxmlen];
   238           char *string = fmtstar;
   239           *string++ = '%';
   240 
   241           /* Copy at most one instance of each flag into FMTSTAR.  */
   242           bool minusflag = false, plusflag = false, zeroflag = false,
   243             spaceflag = false;
   244           for (;; fmt++)
   245             {
   246               *string = *fmt;
   247               switch (*fmt)
   248                 {
   249                 case '-': string += !minusflag; minusflag = true; continue;
   250                 case '+': string += !plusflag; plusflag = true; continue;
   251                 case ' ': string += !spaceflag; spaceflag = true; continue;
   252                 case '0': string += !zeroflag; zeroflag = true; continue;
   253                 }
   254               break;
   255             }
   256 
   257           /* Parse width and precision, putting "*.*" into FMTSTAR.  */
   258           if ('1' <= *fmt && *fmt <= '9')
   259             fmt = parse_format_integer (fmt, &wid);
   260           if (*fmt == '.')
   261             fmt = parse_format_integer (fmt + 1, &prec);
   262           *string++ = '*';
   263           *string++ = '.';
   264           *string++ = '*';
   265 
   266           /* Check for the length modifiers in textual length order, so
   267              that longer modifiers override shorter ones.  */
   268           for (mlen = 1; mlen <= maxmlen; mlen++)
   269             {
   270               if (mlen == 1 && *fmt == 'l')
   271                 length_modifier = long_modifier;
   272               if (mlen == pDlen && strncmp (fmt, pD, pDlen) == 0)
   273                 length_modifier = pD_modifier;
   274               if (mlen == pIlen && strncmp (fmt, pI, pIlen) == 0)
   275                 length_modifier = pI_modifier;
   276               if (mlen == pMlen && strncmp (fmt, PRIdMAX, pMlen) == 0)
   277                 length_modifier = pM_modifier;
   278             }
   279 
   280           /* Copy optional length modifier and conversion specifier
   281              character into FMTSTAR, and append a NUL.  */
   282           mlen = modifier_len[length_modifier];
   283           string = mempcpy (string, fmt, mlen + 1);
   284           fmt += mlen;
   285           *string = 0;
   286 
   287           /* An idea of how much space we might need.
   288              This might be a field width or a precision; e.g.
   289              %1.1000f and %1000.1f both might need 1000+ bytes.
   290              Make it large enough to handle floating point formats
   291              with large numbers.  */
   292           ptrdiff_t size_bound = max (wid, prec) + SIZE_BOUND_EXTRA;
   293 
   294           /* Make sure we have that much.  */
   295           if (size_bound > size_allocated)
   296             {
   297               if (big_buffer)
   298                 xfree (big_buffer);
   299               big_buffer = xmalloc (size_bound);
   300               sprintf_buffer = big_buffer;
   301               size_allocated = size_bound;
   302             }
   303           int minlen = 0;
   304           ptrdiff_t tem;
   305           switch (*fmt++)
   306             {
   307             default:
   308               error ("Invalid format operation %s", fmt0);
   309 
   310             case 'd':
   311               switch (length_modifier)
   312                 {
   313                 case no_modifier:
   314                   {
   315                     int v = va_arg (ap, int);
   316                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   317                   }
   318                   break;
   319                 case long_modifier:
   320                   {
   321                     long v = va_arg (ap, long);
   322                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   323                   }
   324                   break;
   325                 case pD_modifier:
   326                 signed_pD_modifier:
   327                   {
   328                     ptrdiff_t v = va_arg (ap, ptrdiff_t);
   329                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   330                   }
   331                   break;
   332                 case pI_modifier:
   333                   {
   334                     EMACS_INT v = va_arg (ap, EMACS_INT);
   335                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   336                   }
   337                   break;
   338                 case pM_modifier:
   339                   {
   340                     intmax_t v = va_arg (ap, intmax_t);
   341                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   342                   }
   343                   break;
   344                 default:
   345                   eassume (false);
   346                 }
   347               /* Now copy into final output, truncating as necessary.  */
   348               string = sprintf_buffer;
   349               goto doit;
   350 
   351             case 'o':
   352             case 'x':
   353               switch (length_modifier)
   354                 {
   355                 case no_modifier:
   356                   {
   357                     unsigned v = va_arg (ap, unsigned);
   358                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   359                   }
   360                   break;
   361                 case long_modifier:
   362                   {
   363                     unsigned long v = va_arg (ap, unsigned long);
   364                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   365                   }
   366                   break;
   367                 case pD_modifier:
   368                   goto signed_pD_modifier;
   369                 case pI_modifier:
   370                   {
   371                     EMACS_UINT v = va_arg (ap, EMACS_UINT);
   372                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   373                   }
   374                   break;
   375                 case pM_modifier:
   376                   {
   377                     uintmax_t v = va_arg (ap, uintmax_t);
   378                     tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v);
   379                   }
   380                   break;
   381                 default:
   382                   eassume (false);
   383                 }
   384               /* Now copy into final output, truncating as necessary.  */
   385               string = sprintf_buffer;
   386               goto doit;
   387 
   388             case 'f':
   389             case 'e':
   390             case 'g':
   391               {
   392                 double d = va_arg (ap, double);
   393                 tem = sprintf (sprintf_buffer, fmtstar, wid, prec, d);
   394                 /* Now copy into final output, truncating as necessary.  */
   395                 string = sprintf_buffer;
   396                 goto doit;
   397               }
   398 
   399             case 'S':
   400             case 's':
   401               minlen = minusflag ? -wid : wid;
   402               string = va_arg (ap, char *);
   403               tem = strnlen (string, STRING_BYTES_BOUND + 1);
   404               if (tem == STRING_BYTES_BOUND + 1)
   405                 error ("String for %%s or %%S format is too long");
   406               width = strwidth (string, tem);
   407               goto doit1;
   408 
   409               /* Copy string into final output, truncating if no room.  */
   410             doit:
   411               eassert (0 <= tem);
   412               /* Coming here means STRING contains ASCII only.  */
   413               if (STRING_BYTES_BOUND < tem)
   414                 error ("Format width or precision too large");
   415               width = tem;
   416             doit1:
   417               /* We have already calculated:
   418                  TEM -- length of STRING,
   419                  WIDTH -- columns occupied by STRING when displayed, and
   420                  MINLEN -- minimum columns of the output.  */
   421               if (minlen > 0)
   422                 {
   423                   while (minlen > width && bufsize > 0)
   424                     {
   425                       *bufptr++ = ' ';
   426                       bufsize--;
   427                       minlen--;
   428                     }
   429                   minlen = 0;
   430                 }
   431               if (tem > bufsize)
   432                 {
   433                   /* Truncate the string at character boundary.  */
   434                   tem = bufsize;
   435                   do
   436                     {
   437                       tem--;
   438                       if (CHAR_HEAD_P (string[tem]))
   439                         {
   440                           if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
   441                             tem = bufsize;
   442                           break;
   443                         }
   444                     }
   445                   while (tem != 0);
   446 
   447                   memcpy (bufptr, string, tem);
   448                   bufptr[tem] = 0;
   449                   /* Trigger exit from the loop, but make sure we
   450                      return to the caller a value which will indicate
   451                      that the buffer was too small.  */
   452                   bufptr += bufsize;
   453                   bufsize = 0;
   454                   continue;
   455                 }
   456               memcpy (bufptr, string, tem);
   457               bufptr += tem;
   458               bufsize -= tem;
   459               if (minlen < 0)
   460                 {
   461                   while (minlen < - width && bufsize > 0)
   462                     {
   463                       *bufptr++ = ' ';
   464                       bufsize--;
   465                       minlen++;
   466                     }
   467                   minlen = 0;
   468                 }
   469               continue;
   470 
   471             case 'c':
   472               {
   473                 int chr = va_arg (ap, int);
   474                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
   475                 string = charbuf;
   476                 string[tem] = 0;
   477                 width = strwidth (string, tem);
   478                 minlen = minusflag ? -wid : wid;
   479                 goto doit1;
   480               }
   481 
   482             case '%':
   483               /* Treat this '%' as normal.  */
   484               break;
   485             }
   486         }
   487 
   488       char const *src;
   489       ptrdiff_t srclen;
   490       if (EQ (quoting_style, Qcurve) && fmtchar == '`')
   491         src = uLSQM, srclen = sizeof uLSQM - 1;
   492       else if (EQ (quoting_style, Qcurve) && fmtchar == '\'')
   493         src = uRSQM, srclen = sizeof uRSQM - 1;
   494       else if (! LEADING_CODE_P (fmtchar))
   495         {
   496           if (EQ (quoting_style, Qstraight) && fmtchar == '`')
   497             fmtchar = '\'';
   498 
   499           *bufptr++ = fmtchar;
   500           continue;
   501         }
   502       else
   503         {
   504           int charlen = BYTES_BY_CHAR_HEAD (fmtchar);
   505           src = fmt0;
   506 
   507           /* If the format string ends in the middle of a multibyte
   508              character we don't want to skip over the NUL byte.  */
   509           for (srclen = 1 ; *(src + srclen) != 0 && srclen < charlen ; srclen++);
   510 
   511           fmt = src + srclen;
   512         }
   513 
   514       if (bufsize < srclen)
   515         {
   516           /* Truncate, but return value that will signal to caller
   517              that the buffer was too small.  */
   518           do
   519             *bufptr++ = '\0';
   520           while (--bufsize != 0);
   521         }
   522       else
   523         {
   524           do
   525             *bufptr++ = *src++;
   526           while (--srclen != 0);
   527         }
   528     }
   529 
   530   /* If we had to malloc something, free it.  */
   531   xfree (big_buffer);
   532 
   533   *bufptr = 0;          /* Make sure our string ends with a '\0' */
   534   return bufptr - buffer;
   535 }
   536 
   537 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
   538    is not limited to returning an 'int' so it doesn't have a silly 2
   539    GiB limit on typical 64-bit hosts.  However, it is limited to the
   540    Emacs-style formats that doprnt supports, and it requotes ` and '
   541    as per ‘text-quoting-style’.
   542 
   543    Return the number of bytes put into BUF, excluding the terminating
   544    '\0'.  */
   545 ptrdiff_t
   546 esprintf (char *buf, char const *format, ...)
   547 {
   548   ptrdiff_t nbytes;
   549   va_list ap;
   550   va_start (ap, format);
   551   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
   552   va_end (ap);
   553   return nbytes;
   554 }
   555 
   556 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
   557 
   558 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
   559    and updating *BUFSIZE if the buffer is too small, and otherwise
   560    behaving line esprintf.  When reallocating, free *BUF unless it is
   561    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
   562    memory exhaustion instead of growing the buffer size past
   563    BUFSIZE_MAX.  */
   564 ptrdiff_t
   565 exprintf (char **buf, ptrdiff_t *bufsize,
   566           char *nonheapbuf, ptrdiff_t bufsize_max,
   567           char const *format, ...)
   568 {
   569   ptrdiff_t nbytes;
   570   va_list ap;
   571   va_start (ap, format);
   572   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
   573   va_end (ap);
   574   return nbytes;
   575 }
   576 
   577 #endif
   578 
   579 /* Act like exprintf, except take a va_list.  */
   580 ptrdiff_t
   581 evxprintf (char **buf, ptrdiff_t *bufsize,
   582            char *nonheapbuf, ptrdiff_t bufsize_max,
   583            char const *format, va_list ap)
   584 {
   585   for (;;)
   586     {
   587       ptrdiff_t nbytes;
   588       va_list ap_copy;
   589       va_copy (ap_copy, ap);
   590       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
   591       va_end (ap_copy);
   592       if (nbytes < *bufsize - 1)
   593         return nbytes;
   594       if (*buf != nonheapbuf)
   595         {
   596           xfree (*buf);
   597           *buf = NULL;
   598         }
   599       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
   600     }
   601 }

/* [<][>][^][v][top][bottom][index][help] */