1 /* Timsort for sequences.
2
3 Copyright (C) 2022-2023 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
19
20 /* This is a version of the cpython code implementing the TIMSORT
21 sorting algorithm described in
22 https://github.com/python/cpython/blob/main/Objects/listsort.txt.
23 This algorithm identifies and pushes naturally ordered sublists of
24 the original list, or "runs", onto a stack, and merges them
25 periodically according to a merge strategy called "powersort".
26 State is maintained during the sort in a merge_state structure,
27 which is passed around as an argument to all the subroutines. A
28 "stretch" structure includes a pointer to the run BASE of length
29 LEN along with its POWER (a computed integer used by the powersort
30 merge strategy that depends on this run and the succeeding run.) */
31
32
33 #include <config.h>
34 #include "lisp.h"
35
36
37 /* MAX_MERGE_PENDING is the maximum number of entries in merge_state's
38 pending-stretch stack. For a list with n elements, this needs at most
39 floor(log2(n)) + 1 entries even if we didn't force runs to a
40 minimal length. So the number of bits in a ptrdiff_t is plenty large
41 enough for all cases. */
42
43 #define MAX_MERGE_PENDING (sizeof (ptrdiff_t) * 8)
44
45 /* Once we get into galloping mode, we stay there as long as both runs
46 win at least GALLOP_WIN_MIN consecutive times. */
47
48 #define GALLOP_WIN_MIN 7
49
50 /* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid
51 malloc when merging small lists. */
52
53 #define MERGESTATE_TEMP_SIZE 256
54
55 struct stretch
56 {
57 Lisp_Object *base;
58 ptrdiff_t len;
59 int power;
60 };
61
62 struct reloc
63 {
64 Lisp_Object **src;
65 Lisp_Object **dst;
66 ptrdiff_t *size;
67 int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */
68 };
69
70
71 typedef struct
72 {
73 Lisp_Object *listbase;
74 ptrdiff_t listlen;
75
76 /* PENDING is a stack of N pending stretches yet to be merged.
77 Stretch #i starts at address base[i] and extends for len[i]
78 elements. */
79
80 int n;
81 struct stretch pending[MAX_MERGE_PENDING];
82
83 /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls
84 when we get *into* galloping mode. merge_lo and merge_hi tend to
85 nudge it higher for random data, and lower for highly structured
86 data. */
87
88 ptrdiff_t min_gallop;
89
90 /* 'A' is temporary storage, able to hold ALLOCED elements, to help
91 with merges. 'A' initially points to TEMPARRAY, and subsequently
92 to newly allocated memory if needed. */
93
94 Lisp_Object *a;
95 ptrdiff_t alloced;
96 specpdl_ref count;
97 Lisp_Object temparray[MERGESTATE_TEMP_SIZE];
98
99 /* If an exception is thrown while merging we might have to relocate
100 some list elements from temporary storage back into the list.
101 RELOC keeps track of the information needed to do this. */
102
103 struct reloc reloc;
104
105 /* PREDICATE is the lisp comparison predicate for the sort. */
106
107 Lisp_Object predicate;
108 } merge_state;
109
110
111 /* Return true iff (PREDICATE A B) is non-nil. */
112
113 static inline bool
114 inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b)
115 {
116 return !NILP (call2 (predicate, a, b));
117 }
118
119
120 /* Sort the list starting at LO and ending at HI using a stable binary
121 insertion sort algorithm. On entry the sublist [LO, START) (with
122 START between LO and HIGH) is known to be sorted (pass START == LO
123 if you are unsure). Even in case of error, the output will be some
124 permutation of the input (nothing is lost or duplicated). */
125
126 static void
127 binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
128 Lisp_Object *start)
129 {
130 Lisp_Object pred = ms->predicate;
131
132 eassume (lo <= start && start <= hi);
133 if (lo == start)
134 ++start;
135 for (; start < hi; ++start)
136 {
137 Lisp_Object *l = lo;
138 Lisp_Object *r = start;
139 Lisp_Object pivot = *r;
140
141 eassume (l < r);
142 do {
143 Lisp_Object *p = l + ((r - l) >> 1);
144 if (inorder (pred, pivot, *p))
145 r = p;
146 else
147 l = p + 1;
148 } while (l < r);
149 eassume (l == r);
150 for (Lisp_Object *p = start; p > l; --p)
151 p[0] = p[-1];
152 *l = pivot;
153 }
154 }
155
156
157 /* Find and return the length of the "run" (the longest
158 non-decreasing sequence or the longest strictly decreasing
159 sequence, with the Boolean *DESCENDING set to 0 in the former
160 case, or to 1 in the latter) beginning at LO, in the slice [LO,
161 HI) with LO < HI. The strictness of the definition of
162 "descending" ensures there are no equal elements to get out of
163 order so the caller can safely reverse a descending sequence
164 without violating stability. */
165
166 static ptrdiff_t
167 count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
168 bool *descending)
169 {
170 Lisp_Object pred = ms->predicate;
171
172 eassume (lo < hi);
173 *descending = 0;
174 ++lo;
175 ptrdiff_t n = 1;
176 if (lo == hi)
177 return n;
178
179 n = 2;
180 if (inorder (pred, lo[0], lo[-1]))
181 {
182 *descending = 1;
183 for (lo = lo + 1; lo < hi; ++lo, ++n)
184 {
185 if (!inorder (pred, lo[0], lo[-1]))
186 break;
187 }
188 }
189 else
190 {
191 for (lo = lo + 1; lo < hi; ++lo, ++n)
192 {
193 if (inorder (pred, lo[0], lo[-1]))
194 break;
195 }
196 }
197
198 return n;
199 }
200
201
202 /* Locate and return the proper insertion position of KEY in a sorted
203 vector: if the vector contains an element equal to KEY, return the
204 position immediately to the left of the leftmost equal element.
205 [GALLOP_RIGHT does the same except it returns the position to the
206 right of the rightmost equal element (if any).]
207
208 'A' is a sorted vector of N elements. N must be > 0.
209
210 Elements preceding HINT, a non-negative index less than N, are
211 skipped. The closer HINT is to the final result, the faster this
212 runs.
213
214 The return value is the int k in [0, N] such that
215
216 A[k-1] < KEY <= a[k]
217
218 pretending that *(A-1) precedes all values and *(A+N) succeeds all
219 values. In other words, the first k elements of A should precede
220 KEY, and the last N-k should follow KEY. */
221
222 static ptrdiff_t
223 gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
224 const ptrdiff_t n, const ptrdiff_t hint)
225 {
226 Lisp_Object pred = ms->predicate;
227
228 eassume (a && n > 0 && hint >= 0 && hint < n);
229
230 a += hint;
231 ptrdiff_t lastofs = 0;
232 ptrdiff_t ofs = 1;
233 if (inorder (pred, *a, key))
234 {
235 /* When a[hint] < key, gallop right until
236 a[hint + lastofs] < key <= a[hint + ofs]. */
237 const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */
238 while (ofs < maxofs)
239 {
240 if (inorder (pred, a[ofs], key))
241 {
242 lastofs = ofs;
243 eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
244 ofs = (ofs << 1) + 1;
245 }
246 else
247 break; /* Here key <= a[hint+ofs]. */
248 }
249 if (ofs > maxofs)
250 ofs = maxofs;
251 /* Translate back to offsets relative to &a[0]. */
252 lastofs += hint;
253 ofs += hint;
254 }
255 else
256 {
257 /* When key <= a[hint], gallop left, until
258 a[hint - ofs] < key <= a[hint - lastofs]. */
259 const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */
260 while (ofs < maxofs)
261 {
262 if (inorder (pred, a[-ofs], key))
263 break;
264 /* Here key <= a[hint - ofs]. */
265 lastofs = ofs;
266 eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
267 ofs = (ofs << 1) + 1;
268 }
269 if (ofs > maxofs)
270 ofs = maxofs;
271 /* Translate back to use positive offsets relative to &a[0]. */
272 ptrdiff_t k = lastofs;
273 lastofs = hint - ofs;
274 ofs = hint - k;
275 }
276 a -= hint;
277
278 eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
279 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
280 right of lastofs but no farther right than ofs. Do a binary
281 search, with invariant a[lastofs-1] < key <= a[ofs]. */
282 ++lastofs;
283 while (lastofs < ofs)
284 {
285 ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
286
287 if (inorder (pred, a[m], key))
288 lastofs = m + 1; /* Here a[m] < key. */
289 else
290 ofs = m; /* Here key <= a[m]. */
291 }
292 eassume (lastofs == ofs); /* Then a[ofs-1] < key <= a[ofs]. */
293 return ofs;
294 }
295
296
297 /* Locate and return the proper position of KEY in a sorted vector
298 exactly like GALLOP_LEFT, except that if KEY already exists in
299 A[0:N] find the position immediately to the right of the rightmost
300 equal value.
301
302 The return value is the int k in [0, N] such that
303
304 A[k-1] <= KEY < A[k]. */
305
306 static ptrdiff_t
307 gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
308 const ptrdiff_t n, const ptrdiff_t hint)
309 {
310 Lisp_Object pred = ms->predicate;
311
312 eassume (a && n > 0 && hint >= 0 && hint < n);
313
314 a += hint;
315 ptrdiff_t lastofs = 0;
316 ptrdiff_t ofs = 1;
317 if (inorder (pred, key, *a))
318 {
319 /* When key < a[hint], gallop left until
320 a[hint - ofs] <= key < a[hint - lastofs]. */
321 const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */
322 while (ofs < maxofs)
323 {
324 if (inorder (pred, key, a[-ofs]))
325 {
326 lastofs = ofs;
327 eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
328 ofs = (ofs << 1) + 1;
329 }
330 else /* Here a[hint - ofs] <= key. */
331 break;
332 }
333 if (ofs > maxofs)
334 ofs = maxofs;
335 /* Translate back to use positive offsets relative to &a[0]. */
336 ptrdiff_t k = lastofs;
337 lastofs = hint - ofs;
338 ofs = hint - k;
339 }
340 else
341 {
342 /* When a[hint] <= key, gallop right, until
343 a[hint + lastofs] <= key < a[hint + ofs]. */
344 const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */
345 while (ofs < maxofs)
346 {
347 if (inorder (pred, key, a[ofs]))
348 break;
349 /* Here a[hint + ofs] <= key. */
350 lastofs = ofs;
351 eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
352 ofs = (ofs << 1) + 1;
353 }
354 if (ofs > maxofs)
355 ofs = maxofs;
356 /* Translate back to use offsets relative to &a[0]. */
357 lastofs += hint;
358 ofs += hint;
359 }
360 a -= hint;
361
362 eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
363 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
364 right of lastofs but no farther right than ofs. Do a binary
365 search, with invariant a[lastofs-1] <= key < a[ofs]. */
366 ++lastofs;
367 while (lastofs < ofs)
368 {
369 ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
370
371 if (inorder (pred, key, a[m]))
372 ofs = m; /* Here key < a[m]. */
373 else
374 lastofs = m + 1; /* Here a[m] <= key. */
375 }
376 eassume (lastofs == ofs); /* Now a[ofs-1] <= key < a[ofs]. */
377 return ofs;
378 }
379
380
381 static void
382 merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo,
383 const Lisp_Object predicate)
384 {
385 eassume (ms != NULL);
386
387 ms->a = ms->temparray;
388 ms->alloced = MERGESTATE_TEMP_SIZE;
389
390 ms->n = 0;
391 ms->min_gallop = GALLOP_WIN_MIN;
392 ms->listlen = list_size;
393 ms->listbase = lo;
394 ms->predicate = predicate;
395 ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
396 }
397
398
399 /* The dynamically allocated memory may hold lisp objects during
400 merging. MERGE_MARKMEM marks them so they aren't reaped during
401 GC. */
402
403 static void
404 merge_markmem (void *arg)
405 {
406 merge_state *ms = arg;
407 eassume (ms != NULL);
408
409 if (ms->reloc.size != NULL && *ms->reloc.size > 0)
410 {
411 eassume (ms->reloc.src != NULL);
412 mark_objects (*ms->reloc.src, *ms->reloc.size);
413 }
414 }
415
416
417 /* Free all temp storage. If an exception occurs while merging,
418 relocate any lisp elements in temp storage back to the original
419 array. */
420
421 static void
422 cleanup_mem (void *arg)
423 {
424 merge_state *ms = arg;
425 eassume (ms != NULL);
426
427 /* If we have an exception while merging, some of the list elements
428 might only live in temp storage; we copy everything remaining in
429 the temp storage back into the original list. This ensures that
430 the original list has all of the original elements, although
431 their order is unpredictable. */
432
433 if (ms->reloc.order != 0 && *ms->reloc.size > 0)
434 {
435 eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL);
436 ptrdiff_t n = *ms->reloc.size;
437 ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1;
438 memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size);
439 }
440
441 /* Free any remaining temp storage. */
442 xfree (ms->a);
443 }
444
445
446 /* Allocate enough temp memory for NEED array slots. Any previously
447 allocated memory is first freed, and a cleanup routine is
448 registered to free memory at the very end of the sort, or on
449 exception. */
450
451 static void
452 merge_getmem (merge_state *ms, const ptrdiff_t need)
453 {
454 eassume (ms != NULL);
455
456 if (ms->a == ms->temparray)
457 {
458 /* We only get here if alloc is needed and this is the first
459 time, so we set up the unwind protection. */
460 specpdl_ref count = SPECPDL_INDEX ();
461 record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
462 ms->count = count;
463 }
464 else
465 {
466 /* We have previously alloced storage. Since we don't care
467 what's in the block we don't use realloc which would waste
468 cycles copying the old data. We just free and alloc
469 again. */
470 xfree (ms->a);
471 }
472 ms->a = xmalloc (need * word_size);
473 ms->alloced = need;
474 }
475
476
477 static inline void
478 needmem (merge_state *ms, ptrdiff_t na)
479 {
480 if (na > ms->alloced)
481 merge_getmem (ms, na);
482 }
483
484
485 /* Stably merge (in-place) the NA elements starting at SSA with the NB
486 elements starting at SSB = SSA + NA. NA and NB must be positive.
487 Require that SSA[NA-1] belongs at the end of the merge, and NA <=
488 NB. */
489
490 static void
491 merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb,
492 ptrdiff_t nb)
493 {
494 Lisp_Object pred = ms->predicate;
495
496 eassume (ms && ssa && ssb && na > 0 && nb > 0);
497 eassume (ssa + na == ssb);
498 needmem (ms, na);
499 memcpy (ms->a, ssa, na * word_size);
500 Lisp_Object *dest = ssa;
501 ssa = ms->a;
502
503 ms->reloc = (struct reloc){&ssa, &dest, &na, -1};
504
505 *dest++ = *ssb++;
506 --nb;
507 if (nb == 0)
508 goto Succeed;
509 if (na == 1)
510 goto CopyB;
511
512 ptrdiff_t min_gallop = ms->min_gallop;
513 for (;;)
514 {
515 ptrdiff_t acount = 0; /* The # of consecutive times A won. */
516
517 ptrdiff_t bcount = 0; /* The # of consecutive times B won. */
518
519 for (;;)
520 {
521 eassume (na > 1 && nb > 0);
522 if (inorder (pred, *ssb, *ssa))
523 {
524 *dest++ = *ssb++ ;
525 ++bcount;
526 acount = 0;
527 --nb;
528 if (nb == 0)
529 goto Succeed;
530 if (bcount >= min_gallop)
531 break;
532 }
533 else
534 {
535 *dest++ = *ssa++;
536 ++acount;
537 bcount = 0;
538 --na;
539 if (na == 1)
540 goto CopyB;
541 if (acount >= min_gallop)
542 break;
543 }
544 }
545
546 /* One run is winning so consistently that galloping may be a
547 huge speedup. We try that, and continue galloping until (if
548 ever) neither run appears to be winning consistently
549 anymore. */
550 ++min_gallop;
551 do {
552 eassume (na > 1 && nb > 0);
553 min_gallop -= min_gallop > 1;
554 ms->min_gallop = min_gallop;
555 ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0);
556 acount = k;
557 if (k)
558 {
559 memcpy (dest, ssa, k * word_size);
560 dest += k;
561 ssa += k;
562 na -= k;
563 if (na == 1)
564 goto CopyB;
565 /* While na==0 is impossible for a consistent comparison
566 function, we shouldn't assume that it is. */
567 if (na == 0)
568 goto Succeed;
569 }
570 *dest++ = *ssb++ ;
571 --nb;
572 if (nb == 0)
573 goto Succeed;
574
575 k = gallop_left (ms, ssa[0], ssb, nb, 0);
576 bcount = k;
577 if (k)
578 {
579 memmove (dest, ssb, k * word_size);
580 dest += k;
581 ssb += k;
582 nb -= k;
583 if (nb == 0)
584 goto Succeed;
585 }
586 *dest++ = *ssa++;
587 --na;
588 if (na == 1)
589 goto CopyB;
590 } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
591 ++min_gallop; /* Apply a penalty for leaving galloping mode. */
592 ms->min_gallop = min_gallop;
593 }
594 Succeed:
595 ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
596
597 if (na)
598 memcpy (dest, ssa, na * word_size);
599 return;
600 CopyB:
601 eassume (na == 1 && nb > 0);
602 ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
603
604 /* The last element of ssa belongs at the end of the merge. */
605 memmove (dest, ssb, nb * word_size);
606 dest[nb] = ssa[0];
607 }
608
609
610 /* Stably merge (in-place) the NA elements starting at SSA with the NB
611 elements starting at SSB = SSA + NA. NA and NB must be positive.
612 Require that SSA[NA-1] belongs at the end of the merge, and NA >=
613 NB. */
614
615 static void
616 merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na,
617 Lisp_Object *ssb, ptrdiff_t nb)
618 {
619 Lisp_Object pred = ms->predicate;
620
621 eassume (ms && ssa && ssb && na > 0 && nb > 0);
622 eassume (ssa + na == ssb);
623 needmem (ms, nb);
624 Lisp_Object *dest = ssb;
625 dest += nb - 1;
626 memcpy(ms->a, ssb, nb * word_size);
627 Lisp_Object *basea = ssa;
628 Lisp_Object *baseb = ms->a;
629 ssb = ms->a + nb - 1;
630 ssa += na - 1;
631
632 ms->reloc = (struct reloc){&baseb, &dest, &nb, 1};
633
634 *dest-- = *ssa--;
635 --na;
636 if (na == 0)
637 goto Succeed;
638 if (nb == 1)
639 goto CopyA;
640
641 ptrdiff_t min_gallop = ms->min_gallop;
642 for (;;) {
643 ptrdiff_t acount = 0; /* The # of consecutive times A won. */
644 ptrdiff_t bcount = 0; /* The # of consecutive times B won. */
645
646 for (;;) {
647 eassume (na > 0 && nb > 1);
648 if (inorder (pred, *ssb, *ssa))
649 {
650 *dest-- = *ssa--;
651 ++acount;
652 bcount = 0;
653 --na;
654 if (na == 0)
655 goto Succeed;
656 if (acount >= min_gallop)
657 break;
658 }
659 else
660 {
661 *dest-- = *ssb--;
662 ++bcount;
663 acount = 0;
664 --nb;
665 if (nb == 1)
666 goto CopyA;
667 if (bcount >= min_gallop)
668 break;
669 }
670 }
671
672 /* One run is winning so consistently that galloping may be a huge
673 speedup. Try that, and continue galloping until (if ever)
674 neither run appears to be winning consistently anymore. */
675 ++min_gallop;
676 do {
677 eassume (na > 0 && nb > 1);
678 min_gallop -= min_gallop > 1;
679 ms->min_gallop = min_gallop;
680 ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1);
681 k = na - k;
682 acount = k;
683 if (k)
684 {
685 dest += -k;
686 ssa += -k;
687 memmove(dest + 1, ssa + 1, k * word_size);
688 na -= k;
689 if (na == 0)
690 goto Succeed;
691 }
692 *dest-- = *ssb--;
693 --nb;
694 if (nb == 1)
695 goto CopyA;
696
697 k = gallop_left (ms, ssa[0], baseb, nb, nb - 1);
698 k = nb - k;
699 bcount = k;
700 if (k)
701 {
702 dest += -k;
703 ssb += -k;
704 memcpy(dest + 1, ssb + 1, k * word_size);
705 nb -= k;
706 if (nb == 1)
707 goto CopyA;
708 /* While nb==0 is impossible for a consistent comparison
709 function we shouldn't assume that it is. */
710 if (nb == 0)
711 goto Succeed;
712 }
713 *dest-- = *ssa--;
714 --na;
715 if (na == 0)
716 goto Succeed;
717 } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
718 ++min_gallop; /* Apply a penalty for leaving galloping mode. */
719 ms->min_gallop = min_gallop;
720 }
721 Succeed:
722 ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
723 if (nb)
724 memcpy (dest - nb + 1, baseb, nb * word_size);
725 return;
726 CopyA:
727 eassume (nb == 1 && na > 0);
728 ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
729 /* The first element of ssb belongs at the front of the merge. */
730 memmove (dest + 1 - na, ssa + 1 - na, na * word_size);
731 dest += -na;
732 ssa += -na;
733 dest[0] = ssb[0];
734 }
735
736
737 /* Merge the two runs at stack indices I and I+1. */
738
739 static void
740 merge_at (merge_state *ms, const ptrdiff_t i)
741 {
742 eassume (ms != NULL);
743 eassume (ms->n >= 2);
744 eassume (i >= 0);
745 eassume (i == ms->n - 2 || i == ms->n - 3);
746
747 Lisp_Object *ssa = ms->pending[i].base;
748 ptrdiff_t na = ms->pending[i].len;
749 Lisp_Object *ssb = ms->pending[i + 1].base;
750 ptrdiff_t nb = ms->pending[i + 1].len;
751 eassume (na > 0 && nb > 0);
752 eassume (ssa + na == ssb);
753
754 /* Record the length of the combined runs. The current run i+1 goes
755 away after the merge. If i is the 3rd-last run now, slide the
756 last run (which isn't involved in this merge) over to i+1. */
757 ms->pending[i].len = na + nb;
758 if (i == ms->n - 3)
759 ms->pending[i + 1] = ms->pending[i + 2];
760 --ms->n;
761
762 /* Where does b start in a? Elements in a before that can be
763 ignored (they are already in place). */
764 ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0);
765 eassume (k >= 0);
766 ssa += k;
767 na -= k;
768 if (na == 0)
769 return;
770
771 /* Where does a end in b? Elements in b after that can be ignored
772 (they are already in place). */
773 nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1);
774 if (nb == 0)
775 return;
776 eassume (nb > 0);
777 /* Merge what remains of the runs using a temp array with size
778 min(na, nb) elements. */
779 if (na <= nb)
780 merge_lo (ms, ssa, na, ssb, nb);
781 else
782 merge_hi (ms, ssa, na, ssb, nb);
783 }
784
785
786 /* Compute the "power" of the first of two adjacent runs beginning at
787 index S1, with the first having length N1 and the second (starting
788 at index S1+N1) having length N2. The run has total length N. */
789
790 static int
791 powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2,
792 const ptrdiff_t n)
793 {
794 eassume (s1 >= 0);
795 eassume (n1 > 0 && n2 > 0);
796 eassume (s1 + n1 + n2 <= n);
797 /* The midpoints a and b are
798 a = s1 + n1/2
799 b = s1 + n1 + n2/2 = a + (n1 + n2)/2
800
801 These may not be integers because of the "/2", so we work with
802 2*a and 2*b instead. It makes no difference to the outcome,
803 since the bits in the expansion of (2*i)/n are merely shifted one
804 position from those of i/n. */
805 ptrdiff_t a = 2 * s1 + n1;
806 ptrdiff_t b = a + n1 + n2;
807 int result = 0;
808 /* Emulate a/n and b/n one bit a time, until their bits differ. */
809 for (;;)
810 {
811 ++result;
812 if (a >= n)
813 { /* Both quotient bits are now 1. */
814 eassume (b >= a);
815 a -= n;
816 b -= n;
817 }
818 else if (b >= n)
819 { /* a/n bit is 0 and b/n bit is 1. */
820 break;
821 } /* Otherwise both quotient bits are 0. */
822 eassume (a < b && b < n);
823 a <<= 1;
824 b <<= 1;
825 }
826 return result;
827 }
828
829
830 /* Update the state upon identifying a run of length N2. If there's
831 already a stretch on the stack, apply the "powersort" merge
832 strategy: compute the topmost stretch's "power" (depth in a
833 conceptual binary merge tree) and merge adjacent runs on the stack
834 with greater power. */
835
836 static void
837 found_new_run (merge_state *ms, const ptrdiff_t n2)
838 {
839 eassume (ms != NULL);
840 if (ms->n)
841 {
842 eassume (ms->n > 0);
843 struct stretch *p = ms->pending;
844 ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase;
845 ptrdiff_t n1 = p[ms->n - 1].len;
846 int power = powerloop (s1, n1, n2, ms->listlen);
847 while (ms->n > 1 && p[ms->n - 2].power > power)
848 {
849 merge_at (ms, ms->n - 2);
850 }
851 eassume (ms->n < 2 || p[ms->n - 2].power < power);
852 p[ms->n - 1].power = power;
853 }
854 }
855
856
857 /* Unconditionally merge all stretches on the stack until only one
858 remains. */
859
860 static void
861 merge_force_collapse (merge_state *ms)
862 {
863 struct stretch *p = ms->pending;
864
865 eassume (ms != NULL);
866 while (ms->n > 1)
867 {
868 ptrdiff_t n = ms->n - 2;
869 if (n > 0 && p[n - 1].len < p[n + 1].len)
870 --n;
871 merge_at (ms, n);
872 }
873 }
874
875
876 /* Compute a good value for the minimum run length; natural runs
877 shorter than this are boosted artificially via binary insertion.
878
879 If N < 64, return N (it's too small to bother with fancy stuff).
880 Otherwise if N is an exact power of 2, return 32. Finally, return
881 an int k, 32 <= k <= 64, such that N/k is close to, but strictly
882 less than, an exact power of 2. */
883
884 static ptrdiff_t
885 merge_compute_minrun (ptrdiff_t n)
886 {
887 ptrdiff_t r = 0; /* r will become 1 if any non-zero bits are
888 shifted off. */
889
890 eassume (n >= 0);
891 while (n >= 64)
892 {
893 r |= n & 1;
894 n >>= 1;
895 }
896 return n + r;
897 }
898
899
900 static void
901 reverse_vector (Lisp_Object *s, const ptrdiff_t n)
902 {
903 for (ptrdiff_t i = 0; i < n >> 1; i++)
904 {
905 Lisp_Object tem = s[i];
906 s[i] = s[n - i - 1];
907 s[n - i - 1] = tem;
908 }
909 }
910
911 /* Sort the array SEQ with LENGTH elements in the order determined by
912 PREDICATE. */
913
914 void
915 tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
916 {
917 if (SYMBOLP (predicate))
918 {
919 /* Attempt to resolve the function as far as possible ahead of time,
920 to avoid having to do it for each call. */
921 Lisp_Object fun = XSYMBOL (predicate)->u.s.function;
922 if (SYMBOLP (fun))
923 /* Function was an alias; use slow-path resolution. */
924 fun = indirect_function (fun);
925 /* Don't resolve to an autoload spec; that would be very slow. */
926 if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload)))
927 predicate = fun;
928 }
929
930 merge_state ms;
931 Lisp_Object *lo = seq;
932
933 merge_init (&ms, length, lo, predicate);
934
935 /* March over the array once, left to right, finding natural runs,
936 and extending short natural runs to minrun elements. */
937 const ptrdiff_t minrun = merge_compute_minrun (length);
938 ptrdiff_t nremaining = length;
939 do {
940 bool descending;
941
942 /* Identify the next run. */
943 ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending);
944 if (descending)
945 reverse_vector (lo, n);
946 /* If the run is short, extend it to min(minrun, nremaining). */
947 if (n < minrun)
948 {
949 const ptrdiff_t force = nremaining <= minrun ?
950 nremaining : minrun;
951 binarysort (&ms, lo, lo + force, lo + n);
952 n = force;
953 }
954 eassume (ms.n == 0 || ms.pending[ms.n - 1].base +
955 ms.pending[ms.n - 1].len == lo);
956 found_new_run (&ms, n);
957 /* Push the new run on to the stack. */
958 eassume (ms.n < MAX_MERGE_PENDING);
959 ms.pending[ms.n].base = lo;
960 ms.pending[ms.n].len = n;
961 ++ms.n;
962 /* Advance to find the next run. */
963 lo += n;
964 nremaining -= n;
965 } while (nremaining);
966
967 merge_force_collapse (&ms);
968 eassume (ms.n == 1);
969 eassume (ms.pending[0].len == length);
970 lo = ms.pending[0].base;
971
972 if (ms.a != ms.temparray)
973 unbind_to (ms.count, Qnil);
974 }