From e3a0287389ecbef998708ae754c9a2fb1065eb0e Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Thu, 16 Apr 2026 16:40:50 -0400 Subject: [PATCH 1/5] export Spopcount and use extrinsics when possible --- boot/pb/scheme.h | 1 + c/alloc.c | 28 +++++++++++++++++++++++++++- c/build.zuo | 2 +- c/fasl.c | 1 - c/gc.c | 1 - c/gcwrapper.c | 1 - c/popcount.h | 35 ----------------------------------- s/mkheader.ss | 1 + 8 files changed, 30 insertions(+), 40 deletions(-) delete mode 100644 c/popcount.h diff --git a/boot/pb/scheme.h b/boot/pb/scheme.h index b8ae62c3f..9dbc61e84 100644 --- a/boot/pb/scheme.h +++ b/boot/pb/scheme.h @@ -140,6 +140,7 @@ typedef unsigned char octet; #define Sstring_length(x) ((iptr)((uptr)(*((iptr *)TO_VOIDP((uptr)(x)+1)))>>4)) #define Sstring_ref(x,i) Schar_value(((string_char *)TO_VOIDP((uptr)(x)+9))[i]) #define Sunbox(x) (*((ptr *)TO_VOIDP((uptr)(x)+9))) +EXPORT int Spopcount(uptr); #define Sstencil_vector_length(x) Spopcount(((uptr)(*((iptr *)TO_VOIDP((uptr)(x)+1))))>>6) #define Sstencil_vector_ref(x,i) (((ptr *)TO_VOIDP((uptr)(x)+9))[i]) EXPORT iptr Sinteger_value(ptr); diff --git a/c/alloc.c b/c/alloc.c index baf4bcc6b..c0371e716 100644 --- a/c/alloc.c +++ b/c/alloc.c @@ -15,7 +15,9 @@ */ #include "system.h" -#include "popcount.h" +#if defined(_MSC_VER) +#include /* for Spopcount below */ +#endif /* locally defined functions */ static void maybe_queue_fire_collector(thread_gc *tgc); @@ -798,6 +800,30 @@ ptr S_null_immutable_string(void) { return v; } +int Spopcount(uptr x) { +#if defined(__clang__) || defined(__GNUC__) +#if ptr_bits == 32 + return __builtin_popcount(x); +#else + return __builtin_popcountl(x); +#endif +#elif defined(_MSC_VER) +#if ptr_bits == 32 + return (int)__popcnt(x); +#else + return (int)__popcnt64(x); +#endif +#else + /* Kernighan's method */ + int count = 0; + while (x != 0) { + x &= x - 1; + ++count; + } + return count; +#endif +} + static ptr stencil_vector(uptr type, uptr mask) { ptr tc; ptr p; iptr d; diff --git a/c/build.zuo b/c/build.zuo index 34df04987..dde5b3b6b 100644 --- a/c/build.zuo +++ b/c/build.zuo @@ -119,7 +119,7 @@ (map at-source (list "system.h" "types.h" "version.h" "globals.h" "externs.h" "segment.h" "atomic.h" "thread.h" "sort.h" "compress-io.h" - "nocurses.h" "popcount.h")) + "nocurses.h")) (list c-config-file) (map at-mach (list "equates.h" diff --git a/c/fasl.c b/c/fasl.c index d6d8d972a..96b2915b6 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -195,7 +195,6 @@ #include "system.h" #include "zlib.h" -#include "popcount.h" #ifdef WIN32 #include diff --git a/c/gc.c b/c/gc.c index 4906e4a75..905e2e35e 100644 --- a/c/gc.c +++ b/c/gc.c @@ -18,7 +18,6 @@ #ifndef WIN32 #include #endif /* WIN32 */ -#include "popcount.h" #include /* diff --git a/c/gcwrapper.c b/c/gcwrapper.c index 1b4b42096..8d53d9767 100644 --- a/c/gcwrapper.c +++ b/c/gcwrapper.c @@ -15,7 +15,6 @@ */ #include "system.h" -#include "popcount.h" /* locally defined functions */ static void segment_tell(uptr seg); diff --git a/c/popcount.h b/c/popcount.h deleted file mode 100644 index 134b6b368..000000000 --- a/c/popcount.h +++ /dev/null @@ -1,35 +0,0 @@ - -#if __GNUC__ >= 5 -static int Spopcount_32(U32 x) -{ - return __builtin_popcount(x); -} -#else -static int Spopcount_32(U32 x) -{ - /* http://bits.stephan-brumme.com/countBits.html */ - /* count bits of each 2-bit chunk */ - x = x - ((x >> 1) & 0x55555555); - /* count bits of each 4-bit chunk */ - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - /* count bits of each 8-bit chunk */ - x = x + (x >> 4); - /* mask out junk */ - x &= 0xF0F0F0F; - /* add all four 8-bit chunks */ - return (x * 0x01010101) >> 24; -} -#endif - -#if ptr_bits == 32 -static int Spopcount(uptr x) -{ - return Spopcount_32((U32)x); -} -#elif ptr_bits == 64 -static int Spopcount(uptr x) -{ - return Spopcount_32((U32)(x & 0xFFFFFFFF)) + Spopcount_32((U32)(x >> 32)); -} -#endif - diff --git a/s/mkheader.ss b/s/mkheader.ss index 214611a27..de1b76c6f 100644 --- a/s/mkheader.ss +++ b/s/mkheader.ss @@ -340,6 +340,7 @@ (defref Sunbox box ref) + (export "int" "Spopcount" "(uptr)") (def "Sstencil_vector_length(x)" (format "Spopcount(((uptr)~a)>>~d)" (access "x" stencil-vector type) From f4fca085c2f406832c144cb804153f9a38c7c3ab Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Thu, 16 Apr 2026 16:49:32 -0400 Subject: [PATCH 2/5] update release notes --- release_notes/release_notes.stex | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/release_notes/release_notes.stex b/release_notes/release_notes.stex index 7980eb7cb..57dd3668e 100644 --- a/release_notes/release_notes.stex +++ b/release_notes/release_notes.stex @@ -2987,6 +2987,12 @@ in fasl files does not generally make sense. %----------------------------------------------------------------------------- \section{Bug Fixes}\label{section:bugfixes} + +\subsection{Declare \scheme{Spopcount} in scheme.h (10.4.0)} + +A bug where the header file scheme.h refers to an undeclared \scheme{Spopcount} function +has been fixed. + \subsection{Eager port closing on error in \scheme{open-source-file} (10.4.0)} When \scheme{open-source-file} fails on a non-seekable device, the file descriptor port From 75785e5f2d08789d986d06bb886681250fbd0489 Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Thu, 16 Apr 2026 17:32:17 -0400 Subject: [PATCH 3/5] use Windows intrinsics when the CPU supports them --- c/alloc.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/c/alloc.c b/c/alloc.c index c0371e716..86690f947 100644 --- a/c/alloc.c +++ b/c/alloc.c @@ -15,8 +15,9 @@ */ #include "system.h" -#if defined(_MSC_VER) +#ifdef _MSC_VER #include /* for Spopcount below */ +static int has_popcnt = 0; #endif /* locally defined functions */ @@ -26,6 +27,11 @@ void S_alloc_init(void) { ISPC s; IGEN g; UINT i; if (S_boot_time) { +#ifdef _MSC_VER + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + has_popcnt = (cpuinfo[2] & (1 << 23)) != 0; /* ECX bit 23 */ +#endif ptr tc = TO_PTR(S_G.thread_context); GCDATA(tc) = TO_PTR(&S_G.main_thread_gc); @@ -802,18 +808,21 @@ ptr S_null_immutable_string(void) { int Spopcount(uptr x) { #if defined(__clang__) || defined(__GNUC__) -#if ptr_bits == 32 - return __builtin_popcount(x); -#else - return __builtin_popcountl(x); -#endif -#elif defined(_MSC_VER) -#if ptr_bits == 32 - return (int)__popcnt(x); -#else - return (int)__popcnt64(x); -#endif + if (sizeof(x) <= sizeof(unsigned long)) + return __builtin_popcountl((unsigned long)x); + else + return __builtin_popcountll((unsigned long long)x); #else +# if defined(_MSC_VER) + if (has_popcnt) { +# if defined(_WIN64) + return (int)__popcnt64((unsigned __int64)x); +# else + return (int)__popcnt((unsigned int)x); +# endif +# endif + } + /* Kernighan's method */ int count = 0; while (x != 0) { From 6e8494ed5c6667676632c11e18197f52c7d829dc Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Fri, 17 Apr 2026 16:11:15 -0400 Subject: [PATCH 4/5] put static inline Spopcount definition into scheme.h eliminate __popcnt on Windows --- boot/pb/scheme.h | 17 ++++++++++++++++- c/alloc.c | 36 ------------------------------------ s/mkheader.ss | 26 ++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/boot/pb/scheme.h b/boot/pb/scheme.h index 9dbc61e84..34a51c878 100644 --- a/boot/pb/scheme.h +++ b/boot/pb/scheme.h @@ -140,7 +140,22 @@ typedef unsigned char octet; #define Sstring_length(x) ((iptr)((uptr)(*((iptr *)TO_VOIDP((uptr)(x)+1)))>>4)) #define Sstring_ref(x,i) Schar_value(((string_char *)TO_VOIDP((uptr)(x)+9))[i]) #define Sunbox(x) (*((ptr *)TO_VOIDP((uptr)(x)+9))) -EXPORT int Spopcount(uptr); +static inline int Spopcount(uptr x) { +#if defined(__clang__) || defined(__GNUC__) + return __builtin_popcountll((unsigned long)x); +#else + /* count bits of each 2-bit chunk */ + x = x - ((x >> 1) & 0x5555555555555555ULL); + /* count bits of each 4-bit chunk */ + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + /* count bits of each 8-bit chunk */ + x = x + (x >> 4); + /* mask out junk */ + x &= 0x0F0F0F0F0F0F0F0FULL; + /* add all 8-bit chunks */ + return (x * 0x0101010101010101ULL) >> 56; +#endif +} #define Sstencil_vector_length(x) Spopcount(((uptr)(*((iptr *)TO_VOIDP((uptr)(x)+1))))>>6) #define Sstencil_vector_ref(x,i) (((ptr *)TO_VOIDP((uptr)(x)+9))[i]) EXPORT iptr Sinteger_value(ptr); diff --git a/c/alloc.c b/c/alloc.c index 86690f947..23bb80b02 100644 --- a/c/alloc.c +++ b/c/alloc.c @@ -15,10 +15,6 @@ */ #include "system.h" -#ifdef _MSC_VER -#include /* for Spopcount below */ -static int has_popcnt = 0; -#endif /* locally defined functions */ static void maybe_queue_fire_collector(thread_gc *tgc); @@ -27,11 +23,6 @@ void S_alloc_init(void) { ISPC s; IGEN g; UINT i; if (S_boot_time) { -#ifdef _MSC_VER - int cpuinfo[4]; - __cpuid(cpuinfo, 1); - has_popcnt = (cpuinfo[2] & (1 << 23)) != 0; /* ECX bit 23 */ -#endif ptr tc = TO_PTR(S_G.thread_context); GCDATA(tc) = TO_PTR(&S_G.main_thread_gc); @@ -806,33 +797,6 @@ ptr S_null_immutable_string(void) { return v; } -int Spopcount(uptr x) { -#if defined(__clang__) || defined(__GNUC__) - if (sizeof(x) <= sizeof(unsigned long)) - return __builtin_popcountl((unsigned long)x); - else - return __builtin_popcountll((unsigned long long)x); -#else -# if defined(_MSC_VER) - if (has_popcnt) { -# if defined(_WIN64) - return (int)__popcnt64((unsigned __int64)x); -# else - return (int)__popcnt((unsigned int)x); -# endif -# endif - } - - /* Kernighan's method */ - int count = 0; - while (x != 0) { - x &= x - 1; - ++count; - } - return count; -#endif -} - static ptr stencil_vector(uptr type, uptr mask) { ptr tc; ptr p; iptr d; diff --git a/s/mkheader.ss b/s/mkheader.ss index de1b76c6f..0e4749883 100644 --- a/s/mkheader.ss +++ b/s/mkheader.ss @@ -339,8 +339,30 @@ (format "Schar_value~a" (access "x" "i" string data))) (defref Sunbox box ref) - - (export "int" "Spopcount" "(uptr)") + + (let-values + ([(suffix fives threes junk-mask ones shift) + (constant-case ptr-bits + [(32) (values "l" "0x55555555" "0x33333333" "0xF0F0F0F" "0x01010101" 24)] + [(64) (values "ll" "0x5555555555555555ULL" "0x3333333333333333ULL" + "0x0F0F0F0F0F0F0F0FULL" "0x0101010101010101ULL" 56)])]) + (pr "static inline int Spopcount(uptr x) {\n") + (pr "#if defined(__clang__) || defined(__GNUC__)\n") + (pr " return __builtin_popcount~a((unsigned long)x);\n" suffix) + (pr "#else\n") + (pr " /* count bits of each 2-bit chunk */\n") + (pr " x = x - ((x >> 1) & ~a);\n" fives) + (pr " /* count bits of each 4-bit chunk */\n") + (pr " x = (x & ~a) + ((x >> 2) & ~a);\n" threes threes) + (pr " /* count bits of each 8-bit chunk */\n") + (pr " x = x + (x >> 4);\n") + (pr " /* mask out junk */\n") + (pr " x &= ~a;\n" junk-mask) + (pr " /* add all 8-bit chunks */\n") + (pr " return (x * ~a) >> ~d;\n" ones shift) + (pr "#endif\n") + (pr "}\n")) + (def "Sstencil_vector_length(x)" (format "Spopcount(((uptr)~a)>>~d)" (access "x" stencil-vector type) From 2b005cf7abfbb945c63f41265354fb359120f7a4 Mon Sep 17 00:00:00 2001 From: Bob Burger Date: Fri, 17 Apr 2026 17:04:48 -0400 Subject: [PATCH 5/5] remove extra spaces --- boot/pb/scheme.h | 6 +++--- s/mkheader.ss | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/boot/pb/scheme.h b/boot/pb/scheme.h index 34a51c878..a00c05858 100644 --- a/boot/pb/scheme.h +++ b/boot/pb/scheme.h @@ -145,11 +145,11 @@ static inline int Spopcount(uptr x) { return __builtin_popcountll((unsigned long)x); #else /* count bits of each 2-bit chunk */ - x = x - ((x >> 1) & 0x5555555555555555ULL); + x = x - ((x >> 1) & 0x5555555555555555ULL); /* count bits of each 4-bit chunk */ - x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); /* count bits of each 8-bit chunk */ - x = x + (x >> 4); + x = x + (x >> 4); /* mask out junk */ x &= 0x0F0F0F0F0F0F0F0FULL; /* add all 8-bit chunks */ diff --git a/s/mkheader.ss b/s/mkheader.ss index 0e4749883..8ee98002b 100644 --- a/s/mkheader.ss +++ b/s/mkheader.ss @@ -351,11 +351,11 @@ (pr " return __builtin_popcount~a((unsigned long)x);\n" suffix) (pr "#else\n") (pr " /* count bits of each 2-bit chunk */\n") - (pr " x = x - ((x >> 1) & ~a);\n" fives) + (pr " x = x - ((x >> 1) & ~a);\n" fives) (pr " /* count bits of each 4-bit chunk */\n") - (pr " x = (x & ~a) + ((x >> 2) & ~a);\n" threes threes) + (pr " x = (x & ~a) + ((x >> 2) & ~a);\n" threes threes) (pr " /* count bits of each 8-bit chunk */\n") - (pr " x = x + (x >> 4);\n") + (pr " x = x + (x >> 4);\n") (pr " /* mask out junk */\n") (pr " x &= ~a;\n" junk-mask) (pr " /* add all 8-bit chunks */\n")