src/sort.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
inorder
binarysort
count_run
gallop_left
gallop_right
merge_init
merge_markmem
cleanup_mem
merge_getmem
needmem
merge_lo
merge_hi
merge_at
powerloop
found_new_run
merge_force_collapse
merge_compute_minrun
reverse_vector
tim_sort
     1 /* Timsort for sequences.
     2 
     3 Copyright (C) 2022-2023 Free Software Foundation, Inc.
     4 
     5 This file is part of GNU Emacs.
     6 
     7 GNU Emacs is free software: you can redistribute it and/or modify
     8 it under the terms of the GNU General Public License as published by
     9 the Free Software Foundation, either version 3 of the License, or (at
    10 your option) any later version.
    11 
    12 GNU Emacs is distributed in the hope that it will be useful,
    13 but WITHOUT ANY WARRANTY; without even the implied warranty of
    14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    15 GNU General Public License for more details.
    16 
    17 You should have received a copy of the GNU General Public License
    18 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    19 
    20 /* This is a version of the cpython code implementing the TIMSORT
    21    sorting algorithm described in
    22    https://github.com/python/cpython/blob/main/Objects/listsort.txt.
    23    This algorithm identifies and pushes naturally ordered sublists of
    24    the original list, or "runs", onto a stack, and merges them
    25    periodically according to a merge strategy called "powersort".
    26    State is maintained during the sort in a merge_state structure,
    27    which is passed around as an argument to all the subroutines.  A
    28    "stretch" structure includes a pointer to the run BASE of length
    29    LEN along with its POWER (a computed integer used by the powersort
    30    merge strategy that depends on this run and the succeeding run.)  */
    31 
    32 
    33 #include <config.h>
    34 #include "lisp.h"
    35 
    36 
    37 /* MAX_MERGE_PENDING is the maximum number of entries in merge_state's
    38    pending-stretch stack.  For a list with n elements, this needs at most
    39    floor(log2(n)) + 1 entries even if we didn't force runs to a
    40    minimal length.  So the number of bits in a ptrdiff_t is plenty large
    41    enough for all cases.  */
    42 
    43 #define MAX_MERGE_PENDING (sizeof (ptrdiff_t)  * 8)
    44 
    45 /* Once we get into galloping mode, we stay there as long as both runs
    46    win at least GALLOP_WIN_MIN consecutive times.  */
    47 
    48 #define GALLOP_WIN_MIN 7
    49 
    50 /* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid
    51    malloc when merging small lists.  */
    52 
    53 #define MERGESTATE_TEMP_SIZE 256
    54 
    55 struct stretch
    56 {
    57   Lisp_Object *base;
    58   ptrdiff_t len;
    59   int power;
    60 };
    61 
    62 struct reloc
    63 {
    64   Lisp_Object **src;
    65   Lisp_Object **dst;
    66   ptrdiff_t *size;
    67   int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise.  */
    68 };
    69 
    70 
    71 typedef struct
    72 {
    73   Lisp_Object *listbase;
    74   ptrdiff_t listlen;
    75 
    76   /* PENDING is a stack of N pending stretches yet to be merged.
    77      Stretch #i starts at address base[i] and extends for len[i]
    78      elements.  */
    79 
    80   int n;
    81   struct stretch pending[MAX_MERGE_PENDING];
    82 
    83   /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls
    84      when we get *into* galloping mode.  merge_lo and merge_hi tend to
    85      nudge it higher for random data, and lower for highly structured
    86      data.  */
    87 
    88   ptrdiff_t min_gallop;
    89 
    90   /* 'A' is temporary storage, able to hold ALLOCED elements, to help
    91      with merges.  'A' initially points to TEMPARRAY, and subsequently
    92      to newly allocated memory if needed.  */
    93 
    94   Lisp_Object *a;
    95   ptrdiff_t alloced;
    96   specpdl_ref count;
    97   Lisp_Object temparray[MERGESTATE_TEMP_SIZE];
    98 
    99   /* If an exception is thrown while merging we might have to relocate
   100      some list elements from temporary storage back into the list.
   101      RELOC keeps track of the information needed to do this.  */
   102 
   103   struct reloc reloc;
   104 
   105   /* PREDICATE is the lisp comparison predicate for the sort.  */
   106 
   107   Lisp_Object predicate;
   108 } merge_state;
   109 
   110 
   111 /* Return true iff (PREDICATE A B) is non-nil.  */
   112 
   113 static inline bool
   114 inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b)
   115 {
   116   return !NILP (call2 (predicate, a, b));
   117 }
   118 
   119 
   120 /* Sort the list starting at LO and ending at HI using a stable binary
   121    insertion sort algorithm. On entry the sublist [LO, START) (with
   122    START between LO and HIGH) is known to be sorted (pass START == LO
   123    if you are unsure).  Even in case of error, the output will be some
   124    permutation of the input (nothing is lost or duplicated).  */
   125 
   126 static void
   127 binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
   128             Lisp_Object *start)
   129 {
   130   Lisp_Object pred = ms->predicate;
   131 
   132   eassume (lo <= start && start <= hi);
   133   if (lo == start)
   134     ++start;
   135   for (; start < hi; ++start)
   136     {
   137       Lisp_Object *l = lo;
   138       Lisp_Object *r = start;
   139       Lisp_Object pivot = *r;
   140 
   141       eassume (l < r);
   142       do {
   143         Lisp_Object *p = l + ((r - l) >> 1);
   144         if (inorder (pred, pivot, *p))
   145           r = p;
   146         else
   147           l = p + 1;
   148       } while (l < r);
   149       eassume (l == r);
   150       for (Lisp_Object *p = start; p > l; --p)
   151         p[0] = p[-1];
   152       *l = pivot;
   153     }
   154 }
   155 
   156 
   157 /*  Find and return the length of the "run" (the longest
   158     non-decreasing sequence or the longest strictly decreasing
   159     sequence, with the Boolean *DESCENDING set to 0 in the former
   160     case, or to 1 in the latter) beginning at LO, in the slice [LO,
   161     HI) with LO < HI.  The strictness of the definition of
   162     "descending" ensures there are no equal elements to get out of
   163     order so the caller can safely reverse a descending sequence
   164     without violating stability.  */
   165 
   166 static ptrdiff_t
   167 count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
   168            bool *descending)
   169 {
   170   Lisp_Object pred = ms->predicate;
   171 
   172   eassume (lo < hi);
   173   *descending = 0;
   174   ++lo;
   175   ptrdiff_t n = 1;
   176   if (lo == hi)
   177     return n;
   178 
   179   n = 2;
   180   if (inorder (pred, lo[0], lo[-1]))
   181     {
   182       *descending = 1;
   183       for (lo = lo + 1; lo < hi; ++lo, ++n)
   184         {
   185           if (!inorder (pred, lo[0], lo[-1]))
   186             break;
   187         }
   188     }
   189   else
   190     {
   191       for (lo = lo + 1; lo < hi; ++lo, ++n)
   192         {
   193           if (inorder (pred, lo[0], lo[-1]))
   194             break;
   195         }
   196     }
   197 
   198   return n;
   199 }
   200 
   201 
   202 /*  Locate and return the proper insertion position of KEY in a sorted
   203     vector: if the vector contains an element equal to KEY, return the
   204     position immediately to the left of the leftmost equal element.
   205     [GALLOP_RIGHT does the same except it returns the position to the
   206     right of the rightmost equal element (if any).]
   207 
   208     'A' is a sorted vector of N elements. N must be > 0.
   209 
   210     Elements preceding HINT, a non-negative index less than N, are
   211     skipped.  The closer HINT is to the final result, the faster this
   212     runs.
   213 
   214     The return value is the int k in [0, N] such that
   215 
   216     A[k-1] < KEY <= a[k]
   217 
   218     pretending that *(A-1) precedes all values and *(A+N) succeeds all
   219     values.  In other words, the first k elements of A should precede
   220     KEY, and the last N-k should follow KEY.  */
   221 
   222 static ptrdiff_t
   223 gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
   224              const ptrdiff_t n, const ptrdiff_t hint)
   225 {
   226   Lisp_Object pred = ms->predicate;
   227 
   228   eassume (a && n > 0 && hint >= 0 && hint < n);
   229 
   230   a += hint;
   231   ptrdiff_t lastofs = 0;
   232   ptrdiff_t ofs = 1;
   233   if (inorder (pred, *a, key))
   234     {
   235       /* When a[hint] < key, gallop right until
   236          a[hint + lastofs] < key <= a[hint + ofs].  */
   237       const ptrdiff_t maxofs = n - hint; /* This is one after the end of a.  */
   238       while (ofs < maxofs)
   239         {
   240           if (inorder (pred, a[ofs], key))
   241             {
   242               lastofs = ofs;
   243               eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
   244               ofs = (ofs << 1) + 1;
   245             }
   246           else
   247             break; /* Here key <= a[hint+ofs].  */
   248         }
   249       if (ofs > maxofs)
   250         ofs = maxofs;
   251       /* Translate back to offsets relative to &a[0].  */
   252       lastofs += hint;
   253       ofs += hint;
   254     }
   255   else
   256     {
   257       /* When key <= a[hint], gallop left, until
   258          a[hint - ofs] < key <= a[hint - lastofs].  */
   259       const ptrdiff_t maxofs = hint + 1;        /* Here &a[0] is lowest.  */
   260       while (ofs < maxofs)
   261         {
   262           if (inorder (pred, a[-ofs], key))
   263             break;
   264           /* Here key <= a[hint - ofs].  */
   265           lastofs = ofs;
   266           eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
   267           ofs = (ofs << 1) + 1;
   268         }
   269       if (ofs > maxofs)
   270         ofs = maxofs;
   271       /* Translate back to use positive offsets relative to &a[0].  */
   272       ptrdiff_t k = lastofs;
   273       lastofs = hint - ofs;
   274       ofs = hint - k;
   275     }
   276   a -= hint;
   277 
   278   eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
   279   /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
   280      right of lastofs but no farther right than ofs.  Do a binary
   281      search, with invariant a[lastofs-1] < key <= a[ofs].  */
   282   ++lastofs;
   283   while (lastofs < ofs)
   284     {
   285       ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
   286 
   287       if (inorder (pred, a[m], key))
   288         lastofs = m + 1;            /* Here a[m] < key.  */
   289       else
   290         ofs = m;                    /* Here key <= a[m].  */
   291     }
   292   eassume (lastofs == ofs);         /* Then a[ofs-1] < key <= a[ofs].  */
   293   return ofs;
   294 }
   295 
   296 
   297 /*  Locate and return the proper position of KEY in a sorted vector
   298     exactly like GALLOP_LEFT, except that if KEY already exists in
   299     A[0:N] find the position immediately to the right of the rightmost
   300     equal value.
   301 
   302     The return value is the int k in [0, N] such that
   303 
   304     A[k-1] <= KEY < A[k].  */
   305 
   306 static ptrdiff_t
   307 gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
   308               const ptrdiff_t n, const ptrdiff_t hint)
   309 {
   310   Lisp_Object pred = ms->predicate;
   311 
   312   eassume (a && n > 0 && hint >= 0 && hint < n);
   313 
   314   a += hint;
   315   ptrdiff_t lastofs = 0;
   316   ptrdiff_t ofs = 1;
   317   if (inorder (pred, key, *a))
   318     {
   319       /* When key < a[hint], gallop left until
   320          a[hint - ofs] <= key < a[hint - lastofs].  */
   321       const ptrdiff_t maxofs = hint + 1;        /* Here &a[0] is lowest.  */
   322       while (ofs < maxofs)
   323         {
   324           if (inorder (pred, key, a[-ofs]))
   325             {
   326               lastofs = ofs;
   327               eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
   328               ofs = (ofs << 1) + 1;
   329             }
   330           else                /* Here a[hint - ofs] <= key.  */
   331             break;
   332         }
   333       if (ofs > maxofs)
   334         ofs = maxofs;
   335       /* Translate back to use positive offsets relative to &a[0].  */
   336       ptrdiff_t k = lastofs;
   337       lastofs = hint - ofs;
   338       ofs = hint - k;
   339     }
   340   else
   341     {
   342       /* When a[hint] <= key, gallop right, until
   343          a[hint + lastofs] <= key < a[hint + ofs].  */
   344       const ptrdiff_t maxofs = n - hint;        /* Here &a[n-1] is highest.  */
   345       while (ofs < maxofs)
   346         {
   347           if (inorder (pred, key, a[ofs]))
   348             break;
   349           /* Here a[hint + ofs] <= key.  */
   350           lastofs = ofs;
   351           eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
   352           ofs = (ofs << 1) + 1;
   353         }
   354       if (ofs > maxofs)
   355         ofs = maxofs;
   356       /* Translate back to use offsets relative to &a[0].  */
   357       lastofs += hint;
   358       ofs += hint;
   359     }
   360   a -= hint;
   361 
   362   eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
   363   /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
   364      right of lastofs but no farther right than ofs.  Do a binary
   365      search, with invariant a[lastofs-1] <= key < a[ofs].  */
   366   ++lastofs;
   367   while (lastofs < ofs)
   368     {
   369       ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
   370 
   371       if (inorder (pred, key, a[m]))
   372         ofs = m;                    /* Here key < a[m].  */
   373       else
   374         lastofs = m + 1;            /* Here a[m] <= key.  */
   375     }
   376   eassume (lastofs == ofs);         /* Now  a[ofs-1] <= key < a[ofs].  */
   377   return ofs;
   378 }
   379 
   380 
   381 static void
   382 merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo,
   383             const Lisp_Object predicate)
   384 {
   385   eassume (ms != NULL);
   386 
   387   ms->a = ms->temparray;
   388   ms->alloced = MERGESTATE_TEMP_SIZE;
   389 
   390   ms->n = 0;
   391   ms->min_gallop = GALLOP_WIN_MIN;
   392   ms->listlen = list_size;
   393   ms->listbase = lo;
   394   ms->predicate = predicate;
   395   ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
   396 }
   397 
   398 
   399 /* The dynamically allocated memory may hold lisp objects during
   400    merging.  MERGE_MARKMEM marks them so they aren't reaped during
   401    GC.  */
   402 
   403 static void
   404 merge_markmem (void *arg)
   405 {
   406   merge_state *ms = arg;
   407   eassume (ms != NULL);
   408 
   409   if (ms->reloc.size != NULL && *ms->reloc.size > 0)
   410     {
   411       eassume (ms->reloc.src != NULL);
   412       mark_objects (*ms->reloc.src, *ms->reloc.size);
   413     }
   414 }
   415 
   416 
   417 /* Free all temp storage.  If an exception occurs while merging,
   418    relocate any lisp elements in temp storage back to the original
   419    array.  */
   420 
   421 static void
   422 cleanup_mem (void *arg)
   423 {
   424   merge_state *ms = arg;
   425   eassume (ms != NULL);
   426 
   427   /* If we have an exception while merging, some of the list elements
   428      might only live in temp storage; we copy everything remaining in
   429      the temp storage back into the original list.  This ensures that
   430      the original list has all of the original elements, although
   431      their order is unpredictable.  */
   432 
   433   if (ms->reloc.order != 0 && *ms->reloc.size > 0)
   434     {
   435       eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL);
   436       ptrdiff_t n = *ms->reloc.size;
   437       ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1;
   438       memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size);
   439     }
   440 
   441   /* Free any remaining temp storage.  */
   442   xfree (ms->a);
   443 }
   444 
   445 
   446 /* Allocate enough temp memory for NEED array slots.  Any previously
   447    allocated memory is first freed, and a cleanup routine is
   448    registered to free memory at the very end of the sort, or on
   449    exception.  */
   450 
   451 static void
   452 merge_getmem (merge_state *ms, const ptrdiff_t need)
   453 {
   454   eassume (ms != NULL);
   455 
   456   if (ms->a == ms->temparray)
   457     {
   458       /* We only get here if alloc is needed and this is the first
   459          time, so we set up the unwind protection.  */
   460       specpdl_ref count = SPECPDL_INDEX ();
   461       record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
   462       ms->count = count;
   463     }
   464   else
   465     {
   466       /* We have previously alloced storage.  Since we don't care
   467          what's in the block we don't use realloc which would waste
   468          cycles copying the old data.  We just free and alloc
   469          again.  */
   470       xfree (ms->a);
   471     }
   472   ms->a = xmalloc (need * word_size);
   473   ms->alloced = need;
   474 }
   475 
   476 
   477 static inline void
   478 needmem (merge_state *ms, ptrdiff_t na)
   479 {
   480   if (na > ms->alloced)
   481     merge_getmem (ms, na);
   482 }
   483 
   484 
   485 /* Stably merge (in-place) the NA elements starting at SSA with the NB
   486    elements starting at SSB = SSA + NA.  NA and NB must be positive.
   487    Require that SSA[NA-1] belongs at the end of the merge, and NA <=
   488    NB.  */
   489 
   490 static void
   491 merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb,
   492           ptrdiff_t nb)
   493 {
   494   Lisp_Object pred = ms->predicate;
   495 
   496   eassume (ms && ssa && ssb && na > 0 && nb > 0);
   497   eassume (ssa + na == ssb);
   498   needmem (ms, na);
   499   memcpy (ms->a, ssa, na * word_size);
   500   Lisp_Object *dest = ssa;
   501   ssa = ms->a;
   502 
   503   ms->reloc = (struct reloc){&ssa, &dest, &na, -1};
   504 
   505   *dest++ = *ssb++;
   506   --nb;
   507   if (nb == 0)
   508     goto Succeed;
   509   if (na == 1)
   510     goto CopyB;
   511 
   512   ptrdiff_t min_gallop = ms->min_gallop;
   513   for (;;)
   514     {
   515       ptrdiff_t acount = 0;   /* The # of consecutive times A won.  */
   516 
   517       ptrdiff_t bcount = 0;   /* The # of consecutive times B won.  */
   518 
   519       for (;;)
   520         {
   521           eassume (na > 1 && nb > 0);
   522           if (inorder (pred, *ssb, *ssa))
   523             {
   524               *dest++ = *ssb++ ;
   525               ++bcount;
   526               acount = 0;
   527               --nb;
   528               if (nb == 0)
   529                 goto Succeed;
   530               if (bcount >= min_gallop)
   531                 break;
   532             }
   533           else
   534             {
   535               *dest++ = *ssa++;
   536               ++acount;
   537               bcount = 0;
   538               --na;
   539               if (na == 1)
   540                 goto CopyB;
   541               if (acount >= min_gallop)
   542                 break;
   543             }
   544         }
   545 
   546       /* One run is winning so consistently that galloping may be a
   547          huge speedup.  We try that, and continue galloping until (if
   548          ever) neither run appears to be winning consistently
   549          anymore.  */
   550       ++min_gallop;
   551       do {
   552         eassume (na > 1 && nb > 0);
   553         min_gallop -= min_gallop > 1;
   554         ms->min_gallop = min_gallop;
   555         ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0);
   556         acount = k;
   557         if (k)
   558           {
   559             memcpy (dest, ssa, k * word_size);
   560             dest += k;
   561             ssa += k;
   562             na -= k;
   563             if (na == 1)
   564               goto CopyB;
   565             /* While na==0 is impossible for a consistent comparison
   566                function, we shouldn't assume that it is.  */
   567             if (na == 0)
   568               goto Succeed;
   569           }
   570         *dest++ = *ssb++ ;
   571         --nb;
   572         if (nb == 0)
   573           goto Succeed;
   574 
   575         k = gallop_left (ms, ssa[0], ssb, nb, 0);
   576         bcount = k;
   577         if (k)
   578           {
   579             memmove (dest, ssb, k * word_size);
   580             dest += k;
   581             ssb += k;
   582             nb -= k;
   583             if (nb == 0)
   584               goto Succeed;
   585           }
   586         *dest++ = *ssa++;
   587         --na;
   588         if (na == 1)
   589           goto CopyB;
   590       } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
   591       ++min_gallop;   /* Apply a penalty for leaving galloping mode.  */
   592       ms->min_gallop = min_gallop;
   593     }
   594  Succeed:
   595   ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
   596 
   597   if (na)
   598     memcpy (dest, ssa, na * word_size);
   599   return;
   600  CopyB:
   601   eassume (na == 1 && nb > 0);
   602   ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
   603 
   604   /* The last element of ssa belongs at the end of the merge.  */
   605   memmove (dest, ssb, nb * word_size);
   606   dest[nb] = ssa[0];
   607 }
   608 
   609 
   610 /* Stably merge (in-place) the NA elements starting at SSA with the NB
   611    elements starting at SSB = SSA + NA.  NA and NB must be positive.
   612    Require that SSA[NA-1] belongs at the end of the merge, and NA >=
   613    NB.  */
   614 
   615 static void
   616 merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na,
   617           Lisp_Object *ssb, ptrdiff_t nb)
   618 {
   619   Lisp_Object pred = ms->predicate;
   620 
   621   eassume (ms && ssa && ssb && na > 0 && nb > 0);
   622   eassume (ssa + na == ssb);
   623   needmem (ms, nb);
   624   Lisp_Object *dest = ssb;
   625   dest += nb - 1;
   626   memcpy(ms->a, ssb, nb * word_size);
   627   Lisp_Object *basea = ssa;
   628   Lisp_Object *baseb = ms->a;
   629   ssb = ms->a + nb - 1;
   630   ssa += na - 1;
   631 
   632   ms->reloc = (struct reloc){&baseb, &dest, &nb, 1};
   633 
   634   *dest-- = *ssa--;
   635   --na;
   636   if (na == 0)
   637     goto Succeed;
   638   if (nb == 1)
   639     goto CopyA;
   640 
   641   ptrdiff_t min_gallop = ms->min_gallop;
   642   for (;;) {
   643     ptrdiff_t acount = 0;   /* The # of consecutive times A won.  */
   644     ptrdiff_t bcount = 0;   /* The # of consecutive times B won.  */
   645 
   646     for (;;) {
   647       eassume (na > 0 && nb > 1);
   648       if (inorder (pred, *ssb, *ssa))
   649         {
   650           *dest-- = *ssa--;
   651           ++acount;
   652           bcount = 0;
   653           --na;
   654           if (na == 0)
   655             goto Succeed;
   656           if (acount >= min_gallop)
   657             break;
   658         }
   659       else
   660         {
   661           *dest-- = *ssb--;
   662           ++bcount;
   663           acount = 0;
   664           --nb;
   665           if (nb == 1)
   666             goto CopyA;
   667           if (bcount >= min_gallop)
   668             break;
   669         }
   670     }
   671 
   672     /* One run is winning so consistently that galloping may be a huge
   673        speedup.  Try that, and continue galloping until (if ever)
   674        neither run appears to be winning consistently anymore.  */
   675     ++min_gallop;
   676     do {
   677       eassume (na > 0 && nb > 1);
   678       min_gallop -= min_gallop > 1;
   679       ms->min_gallop = min_gallop;
   680       ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1);
   681       k = na - k;
   682       acount = k;
   683       if (k)
   684         {
   685           dest += -k;
   686           ssa += -k;
   687           memmove(dest + 1, ssa + 1, k * word_size);
   688           na -= k;
   689           if (na == 0)
   690             goto Succeed;
   691         }
   692       *dest-- = *ssb--;
   693       --nb;
   694       if (nb == 1)
   695         goto CopyA;
   696 
   697       k = gallop_left (ms, ssa[0], baseb, nb, nb - 1);
   698       k = nb - k;
   699       bcount = k;
   700       if (k)
   701         {
   702           dest += -k;
   703           ssb += -k;
   704           memcpy(dest + 1, ssb + 1, k * word_size);
   705           nb -= k;
   706           if (nb == 1)
   707             goto CopyA;
   708           /* While nb==0 is impossible for a consistent comparison
   709               function we shouldn't assume that it is.  */
   710           if (nb == 0)
   711             goto Succeed;
   712         }
   713       *dest-- = *ssa--;
   714       --na;
   715       if (na == 0)
   716         goto Succeed;
   717     } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
   718     ++min_gallop;      /* Apply a penalty for leaving galloping mode.  */
   719     ms->min_gallop = min_gallop;
   720   }
   721  Succeed:
   722   ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
   723   if (nb)
   724     memcpy (dest - nb + 1, baseb, nb * word_size);
   725   return;
   726  CopyA:
   727   eassume (nb == 1 && na > 0);
   728   ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
   729   /* The first element of ssb belongs at the front of the merge.  */
   730   memmove (dest + 1 - na, ssa + 1 - na, na * word_size);
   731   dest += -na;
   732   ssa += -na;
   733   dest[0] = ssb[0];
   734 }
   735 
   736 
   737 /* Merge the two runs at stack indices I and I+1.  */
   738 
   739 static void
   740 merge_at (merge_state *ms, const ptrdiff_t i)
   741 {
   742   eassume (ms != NULL);
   743   eassume (ms->n >= 2);
   744   eassume (i >= 0);
   745   eassume (i == ms->n - 2 || i == ms->n - 3);
   746 
   747   Lisp_Object *ssa = ms->pending[i].base;
   748   ptrdiff_t na = ms->pending[i].len;
   749   Lisp_Object *ssb = ms->pending[i + 1].base;
   750   ptrdiff_t nb = ms->pending[i + 1].len;
   751   eassume (na > 0 && nb > 0);
   752   eassume (ssa + na == ssb);
   753 
   754   /* Record the length of the combined runs. The current run i+1 goes
   755      away after the merge.  If i is the 3rd-last run now, slide the
   756      last run (which isn't involved in this merge) over to i+1.  */
   757   ms->pending[i].len = na + nb;
   758   if (i == ms->n - 3)
   759     ms->pending[i + 1] = ms->pending[i + 2];
   760   --ms->n;
   761 
   762   /* Where does b start in a?  Elements in a before that can be
   763      ignored (they are already in place).  */
   764   ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0);
   765   eassume (k >= 0);
   766   ssa += k;
   767   na -= k;
   768   if (na == 0)
   769     return;
   770 
   771   /* Where does a end in b?  Elements in b after that can be ignored
   772      (they are already in place).  */
   773   nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1);
   774   if (nb == 0)
   775     return;
   776   eassume (nb > 0);
   777   /* Merge what remains of the runs using a temp array with size
   778      min(na, nb) elements.  */
   779   if (na <= nb)
   780     merge_lo (ms, ssa, na, ssb, nb);
   781   else
   782     merge_hi (ms, ssa, na, ssb, nb);
   783 }
   784 
   785 
   786 /* Compute the "power" of the first of two adjacent runs beginning at
   787    index S1, with the first having length N1 and the second (starting
   788    at index S1+N1) having length N2.  The run has total length N.  */
   789 
   790 static int
   791 powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2,
   792            const ptrdiff_t n)
   793 {
   794   eassume (s1 >= 0);
   795   eassume (n1 > 0 && n2 > 0);
   796   eassume (s1 + n1 + n2 <= n);
   797   /* The midpoints a and b are
   798      a = s1 + n1/2
   799      b = s1 + n1 + n2/2 = a + (n1 + n2)/2
   800 
   801      These may not be integers because of the "/2", so we work with
   802      2*a and 2*b instead.  It makes no difference to the outcome,
   803      since the bits in the expansion of (2*i)/n are merely shifted one
   804      position from those of i/n.  */
   805   ptrdiff_t a = 2 * s1 + n1;
   806   ptrdiff_t b = a + n1 + n2;
   807   int result = 0;
   808   /* Emulate a/n and b/n one bit a time, until their bits differ.  */
   809   for (;;)
   810     {
   811       ++result;
   812       if (a >= n)
   813         {  /* Both quotient bits are now 1.  */
   814           eassume (b >= a);
   815           a -= n;
   816           b -= n;
   817         }
   818       else if (b >= n)
   819         {  /* a/n bit is 0 and b/n bit is 1.  */
   820           break;
   821         } /* Otherwise both quotient bits are 0.  */
   822       eassume (a < b && b < n);
   823       a <<= 1;
   824       b <<= 1;
   825     }
   826   return result;
   827 }
   828 
   829 
   830 /* Update the state upon identifying a run of length N2.  If there's
   831    already a stretch on the stack, apply the "powersort" merge
   832    strategy: compute the topmost stretch's "power" (depth in a
   833    conceptual binary merge tree) and merge adjacent runs on the stack
   834    with greater power.  */
   835 
   836 static void
   837 found_new_run (merge_state *ms, const ptrdiff_t n2)
   838 {
   839   eassume (ms != NULL);
   840   if (ms->n)
   841     {
   842       eassume (ms->n > 0);
   843       struct stretch *p = ms->pending;
   844       ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase;
   845       ptrdiff_t n1 = p[ms->n - 1].len;
   846       int power = powerloop (s1, n1, n2, ms->listlen);
   847       while (ms->n > 1 && p[ms->n - 2].power > power)
   848         {
   849           merge_at (ms, ms->n - 2);
   850         }
   851       eassume (ms->n < 2 || p[ms->n - 2].power < power);
   852       p[ms->n - 1].power = power;
   853     }
   854 }
   855 
   856 
   857 /* Unconditionally merge all stretches on the stack until only one
   858    remains.  */
   859 
   860 static void
   861 merge_force_collapse (merge_state *ms)
   862 {
   863   struct stretch *p = ms->pending;
   864 
   865   eassume (ms != NULL);
   866   while (ms->n > 1)
   867     {
   868       ptrdiff_t n = ms->n - 2;
   869       if (n > 0 && p[n - 1].len < p[n + 1].len)
   870         --n;
   871       merge_at (ms, n);
   872     }
   873 }
   874 
   875 
   876 /* Compute a good value for the minimum run length; natural runs
   877    shorter than this are boosted artificially via binary insertion.
   878 
   879    If N < 64, return N (it's too small to bother with fancy stuff).
   880    Otherwise if N is an exact power of 2, return 32.  Finally, return
   881    an int k, 32 <= k <= 64, such that N/k is close to, but strictly
   882    less than, an exact power of 2.  */
   883 
   884 static ptrdiff_t
   885 merge_compute_minrun (ptrdiff_t n)
   886 {
   887   ptrdiff_t r = 0;           /* r will become 1 if any non-zero bits are
   888                                 shifted off.  */
   889 
   890   eassume (n >= 0);
   891   while (n >= 64)
   892     {
   893       r |= n & 1;
   894       n >>= 1;
   895     }
   896   return n + r;
   897 }
   898 
   899 
   900 static void
   901 reverse_vector (Lisp_Object *s, const ptrdiff_t n)
   902 {
   903   for (ptrdiff_t i = 0; i < n >> 1; i++)
   904     {
   905       Lisp_Object tem = s[i];
   906       s[i] =  s[n - i - 1];
   907       s[n - i - 1] = tem;
   908     }
   909 }
   910 
   911 /* Sort the array SEQ with LENGTH elements in the order determined by
   912    PREDICATE.  */
   913 
   914 void
   915 tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
   916 {
   917   if (SYMBOLP (predicate))
   918     {
   919       /* Attempt to resolve the function as far as possible ahead of time,
   920          to avoid having to do it for each call.  */
   921       Lisp_Object fun = XSYMBOL (predicate)->u.s.function;
   922       if (SYMBOLP (fun))
   923         /* Function was an alias; use slow-path resolution.  */
   924         fun = indirect_function (fun);
   925       /* Don't resolve to an autoload spec; that would be very slow.  */
   926       if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload)))
   927         predicate = fun;
   928     }
   929 
   930   merge_state ms;
   931   Lisp_Object *lo = seq;
   932 
   933   merge_init (&ms, length, lo, predicate);
   934 
   935   /* March over the array once, left to right, finding natural runs,
   936      and extending short natural runs to minrun elements.  */
   937   const ptrdiff_t minrun = merge_compute_minrun (length);
   938   ptrdiff_t nremaining = length;
   939   do {
   940     bool descending;
   941 
   942     /* Identify the next run.  */
   943     ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending);
   944     if (descending)
   945       reverse_vector (lo, n);
   946     /* If the run is short, extend it to min(minrun, nremaining).  */
   947     if (n < minrun)
   948       {
   949         const ptrdiff_t force = nremaining <= minrun ?
   950           nremaining : minrun;
   951         binarysort (&ms, lo, lo + force, lo + n);
   952         n = force;
   953       }
   954     eassume (ms.n == 0 || ms.pending[ms.n - 1].base +
   955              ms.pending[ms.n - 1].len == lo);
   956     found_new_run (&ms, n);
   957     /* Push the new run on to the stack.  */
   958     eassume (ms.n < MAX_MERGE_PENDING);
   959     ms.pending[ms.n].base = lo;
   960     ms.pending[ms.n].len = n;
   961     ++ms.n;
   962     /* Advance to find the next run.  */
   963     lo += n;
   964     nremaining -= n;
   965   } while (nremaining);
   966 
   967   merge_force_collapse (&ms);
   968   eassume (ms.n == 1);
   969   eassume (ms.pending[0].len == length);
   970   lo = ms.pending[0].base;
   971 
   972   if (ms.a != ms.temparray)
   973     unbind_to (ms.count, Qnil);
   974 }
/* [<][>][^][v][top][bottom][index][help] */
root/src/sort.c

DEFINITIONS