15205b32dSDag-Erling Smørgrav /*-
25205b32dSDag-Erling Smørgrav * Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org>
35205b32dSDag-Erling Smørgrav *
45205b32dSDag-Erling Smørgrav * SPDX-License-Identifier: BSD-2-Clause
55205b32dSDag-Erling Smørgrav */
65205b32dSDag-Erling Smørgrav
75205b32dSDag-Erling Smørgrav #include <atf-c.h>
85205b32dSDag-Erling Smørgrav #include <stdbool.h>
95205b32dSDag-Erling Smørgrav #include <stdio.h>
105205b32dSDag-Erling Smørgrav #include <stdlib.h>
115205b32dSDag-Erling Smørgrav #include <time.h>
125205b32dSDag-Erling Smørgrav #include <unistd.h>
135205b32dSDag-Erling Smørgrav
145205b32dSDag-Erling Smørgrav /*-
155205b32dSDag-Erling Smørgrav * Measures qsort(3) runtime with pathological input and verify that it
165205b32dSDag-Erling Smørgrav * stays close to N * log2(N).
175205b32dSDag-Erling Smørgrav *
185205b32dSDag-Erling Smørgrav * Thanks to Vivian Hussey for the proof of concept.
195205b32dSDag-Erling Smørgrav *
205205b32dSDag-Erling Smørgrav * The input we construct is similar to a sweep from 0 to N where each
215205b32dSDag-Erling Smørgrav * half, except for the first element, has been reversed; for instance,
225205b32dSDag-Erling Smørgrav * with N = 8, we get { 0, 3, 2, 1, 4, 8, 7, 6 }. This triggers a bug in
235205b32dSDag-Erling Smørgrav * the BSD qsort(3) where it will switch to insertion sort if the pivots
245205b32dSDag-Erling Smørgrav * are sorted.
255205b32dSDag-Erling Smørgrav *
265205b32dSDag-Erling Smørgrav * This article goes into more detail about the bug and its origin:
275205b32dSDag-Erling Smørgrav *
285205b32dSDag-Erling Smørgrav * https://www.raygard.net/2022/02/26/Re-engineering-a-qsort-part-3
295205b32dSDag-Erling Smørgrav *
305205b32dSDag-Erling Smørgrav * With this optimization (the `if (swap_cnt == 0)` block), qsort(3) needs
315205b32dSDag-Erling Smørgrav * roughly N * N / 4 comparisons to sort our pathological input. Without
325205b32dSDag-Erling Smørgrav * it, it needs only a little more than N * log2(N) comparisons.
335205b32dSDag-Erling Smørgrav */
345205b32dSDag-Erling Smørgrav
355205b32dSDag-Erling Smørgrav /* we stop testing once a single takes longer than this */
365205b32dSDag-Erling Smørgrav #define MAXRUNSECS 10
375205b32dSDag-Erling Smørgrav
385205b32dSDag-Erling Smørgrav static bool debugging;
395205b32dSDag-Erling Smørgrav
405205b32dSDag-Erling Smørgrav static uintmax_t ncmp;
415205b32dSDag-Erling Smørgrav
425205b32dSDag-Erling Smørgrav static int
intcmp(const void * a,const void * b)435205b32dSDag-Erling Smørgrav intcmp(const void *a, const void *b)
445205b32dSDag-Erling Smørgrav {
455205b32dSDag-Erling Smørgrav ncmp++;
465205b32dSDag-Erling Smørgrav return ((*(int *)a > *(int *)b) - (*(int *)a < *(int *)b));
475205b32dSDag-Erling Smørgrav }
485205b32dSDag-Erling Smørgrav
495205b32dSDag-Erling Smørgrav static void
qsort_bench(int log2n)505205b32dSDag-Erling Smørgrav qsort_bench(int log2n)
515205b32dSDag-Erling Smørgrav {
525205b32dSDag-Erling Smørgrav uintmax_t n = 1LLU << log2n;
535205b32dSDag-Erling Smørgrav int *buf;
545205b32dSDag-Erling Smørgrav
555205b32dSDag-Erling Smørgrav /* fill an array with a pathological pattern */
565205b32dSDag-Erling Smørgrav ATF_REQUIRE(buf = malloc(n * sizeof(*buf)));
575205b32dSDag-Erling Smørgrav buf[0] = 0;
585205b32dSDag-Erling Smørgrav buf[n / 2] = n / 2;
595205b32dSDag-Erling Smørgrav for (unsigned int i = 1; i < n / 2; i++) {
605205b32dSDag-Erling Smørgrav buf[i] = n / 2 - i;
615205b32dSDag-Erling Smørgrav buf[n / 2 + i] = n - i;
625205b32dSDag-Erling Smørgrav }
635205b32dSDag-Erling Smørgrav
645205b32dSDag-Erling Smørgrav ncmp = 0;
655205b32dSDag-Erling Smørgrav qsort(buf, n, sizeof(*buf), intcmp);
665205b32dSDag-Erling Smørgrav
675205b32dSDag-Erling Smørgrav /* check result and free array */
685205b32dSDag-Erling Smørgrav if (debugging) {
695205b32dSDag-Erling Smørgrav for (unsigned int i = 1; i < n; i++) {
705205b32dSDag-Erling Smørgrav ATF_REQUIRE_MSG(buf[i] > buf[i - 1],
715205b32dSDag-Erling Smørgrav "array is not sorted");
725205b32dSDag-Erling Smørgrav }
735205b32dSDag-Erling Smørgrav }
745205b32dSDag-Erling Smørgrav free(buf);
755205b32dSDag-Erling Smørgrav
765205b32dSDag-Erling Smørgrav /* check that runtime does not exceed N² */
775205b32dSDag-Erling Smørgrav ATF_CHECK_MSG(ncmp / n < n,
785205b32dSDag-Erling Smørgrav "runtime %ju exceeds N² for N = %ju", ncmp, n);
795205b32dSDag-Erling Smørgrav
805205b32dSDag-Erling Smørgrav /* check that runtime does not exceed N log N by much */
815205b32dSDag-Erling Smørgrav ATF_CHECK_MSG(ncmp / n <= log2n + 1,
825205b32dSDag-Erling Smørgrav "runtime %ju exceeds N log N for N = %ju", ncmp, n);
835205b32dSDag-Erling Smørgrav }
845205b32dSDag-Erling Smørgrav
855205b32dSDag-Erling Smørgrav ATF_TC_WITHOUT_HEAD(qsort_bench);
ATF_TC_BODY(qsort_bench,tc)865205b32dSDag-Erling Smørgrav ATF_TC_BODY(qsort_bench, tc)
875205b32dSDag-Erling Smørgrav {
885205b32dSDag-Erling Smørgrav struct timespec t0, t1;
895205b32dSDag-Erling Smørgrav uintmax_t tus;
905205b32dSDag-Erling Smørgrav
915205b32dSDag-Erling Smørgrav for (int i = 10; i <= 30; i++) {
925205b32dSDag-Erling Smørgrav clock_gettime(CLOCK_UPTIME, &t0);
935205b32dSDag-Erling Smørgrav qsort_bench(i);
945205b32dSDag-Erling Smørgrav clock_gettime(CLOCK_UPTIME, &t1);
955205b32dSDag-Erling Smørgrav tus = t1.tv_sec * 1000000 + t1.tv_nsec / 1000;
965205b32dSDag-Erling Smørgrav tus -= t0.tv_sec * 1000000 + t0.tv_nsec / 1000;
975205b32dSDag-Erling Smørgrav if (debugging) {
985205b32dSDag-Erling Smørgrav fprintf(stderr, "N = 2^%d in %ju.%06jus\n",
995205b32dSDag-Erling Smørgrav i, tus / 1000000, tus % 1000000);
1005205b32dSDag-Erling Smørgrav }
1015205b32dSDag-Erling Smørgrav /* stop once an individual run exceeds our limit */
1025205b32dSDag-Erling Smørgrav if (tus / 1000000 >= MAXRUNSECS)
1035205b32dSDag-Erling Smørgrav break;
1045205b32dSDag-Erling Smørgrav }
1055205b32dSDag-Erling Smørgrav }
1065205b32dSDag-Erling Smørgrav
ATF_TP_ADD_TCS(tp)1075205b32dSDag-Erling Smørgrav ATF_TP_ADD_TCS(tp)
1085205b32dSDag-Erling Smørgrav {
1095205b32dSDag-Erling Smørgrav debugging = !getenv("__RUNNING_INSIDE_ATF_RUN") &&
1105205b32dSDag-Erling Smørgrav isatty(STDERR_FILENO);
1115205b32dSDag-Erling Smørgrav ATF_TP_ADD_TC(tp, qsort_bench);
1125205b32dSDag-Erling Smørgrav return (atf_no_error());
1135205b32dSDag-Erling Smørgrav }
114