From 1c6b42104d378334b5f0d6cb4bf788cebaf5e53c Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 4 Mar 2026 22:43:13 -0800 Subject: [PATCH 01/21] purego: support 32 args on unix platforms --- func.go | 16 ++++++-- sys_amd64.s | 36 ++++++++++++++++- sys_arm64.s | 36 ++++++++++++++++- sys_loong64.s | 36 ++++++++++++++++- sys_ppc64le.s | 60 ++++++++++++++++++++++------ sys_riscv64.s | 57 +++++++++++++++++++++------ sys_s390x.s | 45 ++++++++++++++++++--- syscall.go | 67 +++++++++++++++++++++++++++---- syscall_32bit.go | 15 ++++++- syscall_64bit_fallback.go | 83 +++++++++++++++++++++++++++++++++++++++ syscall_unix.go | 4 ++ 11 files changed, 411 insertions(+), 44 deletions(-) create mode 100644 syscall_64bit_fallback.go diff --git a/func.go b/func.go index 2192dd7c..5293db3a 100644 --- a/func.go +++ b/func.go @@ -141,6 +141,7 @@ func RegisterFunc(fptr any, cfn uintptr) { // to avoid crashing with too many arguments var ints int var floats int + floatArgRegs := numOfFloatRegisters() var stack int for i := 0; i < ty.NumIn(); i++ { arg := ty.In(i) @@ -167,7 +168,7 @@ func RegisterFunc(fptr any, cfn uintptr) { stack++ } case reflect.Float32, reflect.Float64: - if floats < numOfFloatRegisters() { + if floats < floatArgRegs { floats++ } else { stack++ @@ -202,7 +203,11 @@ func RegisterFunc(fptr any, cfn uintptr) { } } - sizeOfStack := maxArgs - numOfIntegerRegisters() + argsLimit := maxArgs + if runtime.GOOS == "windows" { + argsLimit = 15 + } + sizeOfStack := argsLimit - numOfIntegerRegisters() // On Darwin ARM64, use byte-based validation since arguments pack efficiently. // See https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { @@ -224,6 +229,7 @@ func RegisterFunc(fptr any, cfn uintptr) { // since numOfFloatRegisters() is a function call, not a constant. // maxArgs is always greater than or equal to numOfFloatRegisters() so this is safe. var floats [maxArgs]uintptr + floatArgRegs := numOfFloatRegisters() var numInts int var numFloats int var numStack int @@ -243,9 +249,13 @@ func RegisterFunc(fptr any, cfn uintptr) { } } addFloat = func(x uintptr) { - if numFloats < numOfFloatRegisters() { + if numFloats < floatArgRegs { floats[numFloats] = x numFloats++ + if runtime.GOARCH == "ppc64le" { + // Keep stack indexing in sync with ppc64le callback decoding. + numStack++ + } } else { addStack(x) } diff --git a/sys_amd64.s b/sys_amd64.s index 8719a065..9a1da9f6 100644 --- a/sys_amd64.s +++ b/sys_amd64.s @@ -8,7 +8,7 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 80 +#define STACK_SIZE 224 #define PTR_ADDRESS (STACK_SIZE - 8) // syscall15X calls a function in libc on behalf of the syscall package. @@ -77,6 +77,40 @@ TEXT syscall15X(SB), NOSPLIT, $STACK_SIZE MOVQ R12, 56(SP) // push a14 MOVQ syscall15Args_a15(R11), R12 MOVQ R12, 64(SP) // push a15 + MOVQ syscall15Args_a16(R11), R12 + MOVQ R12, 72(SP) // push a16 + MOVQ syscall15Args_a17(R11), R12 + MOVQ R12, 80(SP) // push a17 + MOVQ syscall15Args_a18(R11), R12 + MOVQ R12, 88(SP) // push a18 + MOVQ syscall15Args_a19(R11), R12 + MOVQ R12, 96(SP) // push a19 + MOVQ syscall15Args_a20(R11), R12 + MOVQ R12, 104(SP) // push a20 + MOVQ syscall15Args_a21(R11), R12 + MOVQ R12, 112(SP) // push a21 + MOVQ syscall15Args_a22(R11), R12 + MOVQ R12, 120(SP) // push a22 + MOVQ syscall15Args_a23(R11), R12 + MOVQ R12, 128(SP) // push a23 + MOVQ syscall15Args_a24(R11), R12 + MOVQ R12, 136(SP) // push a24 + MOVQ syscall15Args_a25(R11), R12 + MOVQ R12, 144(SP) // push a25 + MOVQ syscall15Args_a26(R11), R12 + MOVQ R12, 152(SP) // push a26 + MOVQ syscall15Args_a27(R11), R12 + MOVQ R12, 160(SP) // push a27 + MOVQ syscall15Args_a28(R11), R12 + MOVQ R12, 168(SP) // push a28 + MOVQ syscall15Args_a29(R11), R12 + MOVQ R12, 176(SP) // push a29 + MOVQ syscall15Args_a30(R11), R12 + MOVQ R12, 184(SP) // push a30 + MOVQ syscall15Args_a31(R11), R12 + MOVQ R12, 192(SP) // push a31 + MOVQ syscall15Args_a32(R11), R12 + MOVQ R12, 200(SP) // push a32 XORL AX, AX // vararg: say "no float args" MOVQ syscall15Args_fn(R11), R10 // fn diff --git a/sys_arm64.s b/sys_arm64.s index 26201011..51685722 100644 --- a/sys_arm64.s +++ b/sys_arm64.s @@ -7,7 +7,7 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 64 +#define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) // syscall15X calls a function in libc on behalf of the syscall package. @@ -75,6 +75,40 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD R10, 40(RSP) // push a14 onto stack MOVD syscall15Args_a15(R9), R10 MOVD R10, 48(RSP) // push a15 onto stack + MOVD syscall15Args_a16(R9), R10 + MOVD R10, 56(RSP) // push a16 onto stack + MOVD syscall15Args_a17(R9), R10 + MOVD R10, 64(RSP) // push a17 onto stack + MOVD syscall15Args_a18(R9), R10 + MOVD R10, 72(RSP) // push a18 onto stack + MOVD syscall15Args_a19(R9), R10 + MOVD R10, 80(RSP) // push a19 onto stack + MOVD syscall15Args_a20(R9), R10 + MOVD R10, 88(RSP) // push a20 onto stack + MOVD syscall15Args_a21(R9), R10 + MOVD R10, 96(RSP) // push a21 onto stack + MOVD syscall15Args_a22(R9), R10 + MOVD R10, 104(RSP) // push a22 onto stack + MOVD syscall15Args_a23(R9), R10 + MOVD R10, 112(RSP) // push a23 onto stack + MOVD syscall15Args_a24(R9), R10 + MOVD R10, 120(RSP) // push a24 onto stack + MOVD syscall15Args_a25(R9), R10 + MOVD R10, 128(RSP) // push a25 onto stack + MOVD syscall15Args_a26(R9), R10 + MOVD R10, 136(RSP) // push a26 onto stack + MOVD syscall15Args_a27(R9), R10 + MOVD R10, 144(RSP) // push a27 onto stack + MOVD syscall15Args_a28(R9), R10 + MOVD R10, 152(RSP) // push a28 onto stack + MOVD syscall15Args_a29(R9), R10 + MOVD R10, 160(RSP) // push a29 onto stack + MOVD syscall15Args_a30(R9), R10 + MOVD R10, 168(RSP) // push a30 onto stack + MOVD syscall15Args_a31(R9), R10 + MOVD R10, 176(RSP) // push a31 onto stack + MOVD syscall15Args_a32(R9), R10 + MOVD R10, 184(RSP) // push a32 onto stack MOVD syscall15Args_fn(R9), R10 // fn BL (R10) diff --git a/sys_loong64.s b/sys_loong64.s index 420b855c..cd39346c 100644 --- a/sys_loong64.s +++ b/sys_loong64.s @@ -7,7 +7,7 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 64 +#define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) // syscall15X calls a function in libc on behalf of the syscall package. @@ -76,6 +76,40 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVV R12, 40(R3) MOVV syscall15Args_a15(R13), R12 MOVV R12, 48(R3) + MOVV syscall15Args_a16(R13), R12 + MOVV R12, 56(R3) + MOVV syscall15Args_a17(R13), R12 + MOVV R12, 64(R3) + MOVV syscall15Args_a18(R13), R12 + MOVV R12, 72(R3) + MOVV syscall15Args_a19(R13), R12 + MOVV R12, 80(R3) + MOVV syscall15Args_a20(R13), R12 + MOVV R12, 88(R3) + MOVV syscall15Args_a21(R13), R12 + MOVV R12, 96(R3) + MOVV syscall15Args_a22(R13), R12 + MOVV R12, 104(R3) + MOVV syscall15Args_a23(R13), R12 + MOVV R12, 112(R3) + MOVV syscall15Args_a24(R13), R12 + MOVV R12, 120(R3) + MOVV syscall15Args_a25(R13), R12 + MOVV R12, 128(R3) + MOVV syscall15Args_a26(R13), R12 + MOVV R12, 136(R3) + MOVV syscall15Args_a27(R13), R12 + MOVV R12, 144(R3) + MOVV syscall15Args_a28(R13), R12 + MOVV R12, 152(R3) + MOVV syscall15Args_a29(R13), R12 + MOVV R12, 160(R3) + MOVV syscall15Args_a30(R13), R12 + MOVV R12, 168(R3) + MOVV syscall15Args_a31(R13), R12 + MOVV R12, 176(R3) + MOVV syscall15Args_a32(R13), R12 + MOVV R12, 184(R3) MOVV syscall15Args_fn(R13), R12 JAL (R12) diff --git a/sys_ppc64le.s b/sys_ppc64le.s index 391b30a9..fab9554f 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -9,7 +9,7 @@ // PPC64LE ELFv2 ABI: // - Integer args: R3-R10 (8 registers) -// - Float args: F1-F8 (8 registers) +// - Float args: F1-F13 (13 registers) // - Return: R3 (integer), F1 (float) // - Stack pointer: R1 // - Link register: LR (special) @@ -24,16 +24,15 @@ // 32(R1) - Parameter save area start (8 * 8 = 64 bytes for R3-R10) // 96(R1) - First stack arg (a9) - this is where callee looks // 104(R1) - Second stack arg (a10) -// 112-152 - Stack args a11-a15 (5 * 8 = 40 bytes) -// 160(R1) - TOC save (we put it here, outside param save area) -// 168(R1) - saved args pointer -// 176(R1) - padding for 16-byte alignment -// Total: 176 bytes +// 112-280 - Stack args a11-a32 +// 288(R1) - TOC save (outside parameter save area) +// 296(R1) - saved args pointer +// Total: 304 bytes -#define STACK_SIZE 176 +#define STACK_SIZE 304 #define LR_SAVE 16 -#define TOC_SAVE 160 -#define ARGP_SAVE 168 +#define TOC_SAVE 288 +#define ARGP_SAVE 296 GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) @@ -54,7 +53,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 // R11 := args pointer (syscall15Args*) MOVD R3, R11 - // Load float args into F1-F8 + // Load float args into F1-F13 FMOVD syscall15Args_f1(R11), F1 FMOVD syscall15Args_f2(R11), F2 FMOVD syscall15Args_f3(R11), F3 @@ -63,6 +62,11 @@ TEXT syscall15X(SB), NOSPLIT, $0 FMOVD syscall15Args_f6(R11), F6 FMOVD syscall15Args_f7(R11), F7 FMOVD syscall15Args_f8(R11), F8 + FMOVD syscall15Args_f9(R11), F9 + FMOVD syscall15Args_f10(R11), F10 + FMOVD syscall15Args_f11(R11), F11 + FMOVD syscall15Args_f12(R11), F12 + FMOVD syscall15Args_f13(R11), F13 // Load integer args into R3-R10 MOVD syscall15Args_a1(R11), R3 @@ -74,7 +78,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD syscall15Args_a7(R11), R9 MOVD syscall15Args_a8(R11), R10 - // Spill a9-a15 onto the stack (stack parameters start at 96(R1)) + // Spill a9-a32 onto the stack (stack parameters start at 96(R1)) // Per ELFv2: parameter save area is 32-95, stack args start at 96 MOVD ARGP_SAVE(R1), R11 // reload args pointer MOVD syscall15Args_a9(R11), R12 @@ -91,6 +95,40 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD R12, 136(R1) // a14 at 136(R1) MOVD syscall15Args_a15(R11), R12 MOVD R12, 144(R1) // a15 at 144(R1) + MOVD syscall15Args_a16(R11), R12 + MOVD R12, 152(R1) // a16 at 152(R1) + MOVD syscall15Args_a17(R11), R12 + MOVD R12, 160(R1) // a17 at 160(R1) + MOVD syscall15Args_a18(R11), R12 + MOVD R12, 168(R1) // a18 at 168(R1) + MOVD syscall15Args_a19(R11), R12 + MOVD R12, 176(R1) // a19 at 176(R1) + MOVD syscall15Args_a20(R11), R12 + MOVD R12, 184(R1) // a20 at 184(R1) + MOVD syscall15Args_a21(R11), R12 + MOVD R12, 192(R1) // a21 at 192(R1) + MOVD syscall15Args_a22(R11), R12 + MOVD R12, 200(R1) // a22 at 200(R1) + MOVD syscall15Args_a23(R11), R12 + MOVD R12, 208(R1) // a23 at 208(R1) + MOVD syscall15Args_a24(R11), R12 + MOVD R12, 216(R1) // a24 at 216(R1) + MOVD syscall15Args_a25(R11), R12 + MOVD R12, 224(R1) // a25 at 224(R1) + MOVD syscall15Args_a26(R11), R12 + MOVD R12, 232(R1) // a26 at 232(R1) + MOVD syscall15Args_a27(R11), R12 + MOVD R12, 240(R1) // a27 at 240(R1) + MOVD syscall15Args_a28(R11), R12 + MOVD R12, 248(R1) // a28 at 248(R1) + MOVD syscall15Args_a29(R11), R12 + MOVD R12, 256(R1) // a29 at 256(R1) + MOVD syscall15Args_a30(R11), R12 + MOVD R12, 264(R1) // a30 at 264(R1) + MOVD syscall15Args_a31(R11), R12 + MOVD R12, 272(R1) // a31 at 272(R1) + MOVD syscall15Args_a32(R11), R12 + MOVD R12, 280(R1) // a32 at 280(R1) // Call function: load fn and call MOVD syscall15Args_fn(R11), R12 diff --git a/sys_riscv64.s b/sys_riscv64.s index e7e887e1..b6784310 100644 --- a/sys_riscv64.s +++ b/sys_riscv64.s @@ -8,17 +8,16 @@ #include "funcdata.h" // Stack usage: -// 0(SP) - 56(SP): stack args a9-a15 (7 * 8 bytes = 56) -// 56(SP) - 64(SP): saved RA (x1) -// 64(SP) - 72(SP): saved X9 (s1) -// 72(SP) - 80(SP): saved X18 (s2) -// 80(SP) - 88(SP): saved args pointer (original X10) -// 88(SP) - 96(SP): padding -#define STACK_SIZE 96 -#define SAVE_RA 56 -#define SAVE_X9 64 -#define SAVE_X18 72 -#define SAVE_ARGP 80 +// 0(SP) - 192(SP): stack args a9-a32 (24 * 8 bytes) +// 192(SP) - 200(SP): saved RA (x1) +// 200(SP) - 208(SP): saved X9 (s1) +// 208(SP) - 216(SP): saved X18 (s2) +// 216(SP) - 224(SP): saved args pointer (original X10) +#define STACK_SIZE 224 +#define SAVE_RA 192 +#define SAVE_X9 200 +#define SAVE_X18 208 +#define SAVE_ARGP 216 GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) @@ -58,7 +57,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOV syscall15Args_a7(X9), X16 MOV syscall15Args_a8(X9), X17 - // Spill a9-a15 onto the stack (C ABI) + // Spill a9-a32 onto the stack (C ABI) MOV syscall15Args_a9(X9), X18 MOV X18, 0(SP) MOV syscall15Args_a10(X9), X18 @@ -73,6 +72,40 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOV X18, 40(SP) MOV syscall15Args_a15(X9), X18 MOV X18, 48(SP) + MOV syscall15Args_a16(X9), X18 + MOV X18, 56(SP) + MOV syscall15Args_a17(X9), X18 + MOV X18, 64(SP) + MOV syscall15Args_a18(X9), X18 + MOV X18, 72(SP) + MOV syscall15Args_a19(X9), X18 + MOV X18, 80(SP) + MOV syscall15Args_a20(X9), X18 + MOV X18, 88(SP) + MOV syscall15Args_a21(X9), X18 + MOV X18, 96(SP) + MOV syscall15Args_a22(X9), X18 + MOV X18, 104(SP) + MOV syscall15Args_a23(X9), X18 + MOV X18, 112(SP) + MOV syscall15Args_a24(X9), X18 + MOV X18, 120(SP) + MOV syscall15Args_a25(X9), X18 + MOV X18, 128(SP) + MOV syscall15Args_a26(X9), X18 + MOV X18, 136(SP) + MOV syscall15Args_a27(X9), X18 + MOV X18, 144(SP) + MOV syscall15Args_a28(X9), X18 + MOV X18, 152(SP) + MOV syscall15Args_a29(X9), X18 + MOV X18, 160(SP) + MOV syscall15Args_a30(X9), X18 + MOV X18, 168(SP) + MOV syscall15Args_a31(X9), X18 + MOV X18, 176(SP) + MOV syscall15Args_a32(X9), X18 + MOV X18, 184(SP) // Call fn // IMPORTANT: preserve RA across this call (we saved it above) diff --git a/sys_s390x.s b/sys_s390x.s index a044e34d..040cb0e0 100644 --- a/sys_s390x.s +++ b/sys_s390x.s @@ -24,14 +24,13 @@ // // We need space for: // - 160 bytes standard frame (with register save area) -// - Stack args a6-a15 (10 * 8 = 80 bytes) +// - Stack args a6-a32 (27 * 8 = 216 bytes) // - Saved args pointer (8 bytes) -// - Padding for alignment -// Total: 264 bytes (rounded to 8-byte alignment) +// Total: 384 bytes -#define STACK_SIZE 264 +#define STACK_SIZE 384 #define STACK_ARGS 160 -#define ARGP_SAVE 248 +#define ARGP_SAVE 376 GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) @@ -65,7 +64,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD syscall15Args_a4(R9), R5 MOVD syscall15Args_a5(R9), R6 - // Spill remaining args (a6-a15) onto the stack at 160(R15) + // Spill remaining args (a6-a32) onto the stack at 160(R15) MOVD ARGP_SAVE(R15), R9 // reload args pointer MOVD syscall15Args_a6(R9), R1 MOVD R1, (STACK_ARGS+0*8)(R15) @@ -87,6 +86,40 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD R1, (STACK_ARGS+8*8)(R15) MOVD syscall15Args_a15(R9), R1 MOVD R1, (STACK_ARGS+9*8)(R15) + MOVD syscall15Args_a16(R9), R1 + MOVD R1, (STACK_ARGS+10*8)(R15) + MOVD syscall15Args_a17(R9), R1 + MOVD R1, (STACK_ARGS+11*8)(R15) + MOVD syscall15Args_a18(R9), R1 + MOVD R1, (STACK_ARGS+12*8)(R15) + MOVD syscall15Args_a19(R9), R1 + MOVD R1, (STACK_ARGS+13*8)(R15) + MOVD syscall15Args_a20(R9), R1 + MOVD R1, (STACK_ARGS+14*8)(R15) + MOVD syscall15Args_a21(R9), R1 + MOVD R1, (STACK_ARGS+15*8)(R15) + MOVD syscall15Args_a22(R9), R1 + MOVD R1, (STACK_ARGS+16*8)(R15) + MOVD syscall15Args_a23(R9), R1 + MOVD R1, (STACK_ARGS+17*8)(R15) + MOVD syscall15Args_a24(R9), R1 + MOVD R1, (STACK_ARGS+18*8)(R15) + MOVD syscall15Args_a25(R9), R1 + MOVD R1, (STACK_ARGS+19*8)(R15) + MOVD syscall15Args_a26(R9), R1 + MOVD R1, (STACK_ARGS+20*8)(R15) + MOVD syscall15Args_a27(R9), R1 + MOVD R1, (STACK_ARGS+21*8)(R15) + MOVD syscall15Args_a28(R9), R1 + MOVD R1, (STACK_ARGS+22*8)(R15) + MOVD syscall15Args_a29(R9), R1 + MOVD R1, (STACK_ARGS+23*8)(R15) + MOVD syscall15Args_a30(R9), R1 + MOVD R1, (STACK_ARGS+24*8)(R15) + MOVD syscall15Args_a31(R9), R1 + MOVD R1, (STACK_ARGS+25*8)(R15) + MOVD syscall15Args_a32(R9), R1 + MOVD R1, (STACK_ARGS+26*8)(R15) // Call function MOVD syscall15Args_fn(R9), R1 diff --git a/syscall.go b/syscall.go index 7b45383d..88ea2b18 100644 --- a/syscall.go +++ b/syscall.go @@ -1,10 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build !386 && !arm && (darwin || freebsd || linux || netbsd || windows) +//go:build (!386 && !arm && windows) || ((amd64 || arm64) && (darwin || freebsd || linux || netbsd)) || (linux && (loong64 || ppc64le || riscv64 || s390x)) package purego +import ( + "runtime" + "unsafe" +) + // CDecl marks a function as being called using the __cdecl calling convention as defined in // the [MSDocs] when passed to NewCallback. It must be the first argument to the function. // This is only useful on 386 Windows, but it is safe to use on other platforms. @@ -13,13 +18,15 @@ package purego type CDecl struct{} const ( - maxArgs = 15 + maxArgs = 32 ) type syscall15Args struct { - fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr - f1, f2, f3, f4, f5, f6, f7, f8 uintptr - arm64_r8 uintptr + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + f9, f10, f11, f12, f13 uintptr + arm64_r8 uintptr } func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { @@ -39,6 +46,23 @@ func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uin s.a13 = ints[12] s.a14 = ints[13] s.a15 = ints[14] + s.a16 = ints[15] + s.a17 = ints[16] + s.a18 = ints[17] + s.a19 = ints[18] + s.a20 = ints[19] + s.a21 = ints[20] + s.a22 = ints[21] + s.a23 = ints[22] + s.a24 = ints[23] + s.a25 = ints[24] + s.a26 = ints[25] + s.a27 = ints[26] + s.a28 = ints[27] + s.a29 = ints[28] + s.a30 = ints[29] + s.a31 = ints[30] + s.a32 = ints[31] s.f1 = floats[0] s.f2 = floats[1] s.f3 = floats[2] @@ -47,6 +71,11 @@ func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uin s.f6 = floats[5] s.f7 = floats[6] s.f8 = floats[7] + s.f9 = floats[8] + s.f10 = floats[9] + s.f11 = floats[10] + s.f12 = floats[11] + s.f13 = floats[12] s.arm64_r8 = r8 } @@ -73,11 +102,35 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { if fn == 0 { panic("purego: fn is nil") } - if len(args) > maxArgs { + limit := maxArgs + if runtime.GOOS == "windows" { + limit = 15 + } + if len(args) > limit { panic("purego: too many arguments to SyscallN") } + + // Windows uses syscall.Syscall15 in syscall_windows.go. + if runtime.GOOS == "windows" { + var tmp [maxArgs]uintptr + copy(tmp[:], args) + return syscall_syscall15X( + fn, + tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], + tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14], + ) + } + + syscall := thePool.Get().(*syscall15Args) + defer thePool.Put(syscall) + *syscall = syscall15Args{} + // add padding so there is no out-of-bounds slicing var tmp [maxArgs]uintptr copy(tmp[:], args) - return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) + var floats [maxArgs]uintptr + copy(floats[:], tmp[:]) + syscall.Set(fn, tmp[:], floats[:], 0) + runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) + return syscall.a1, syscall.a2, syscall.a3 } diff --git a/syscall_32bit.go b/syscall_32bit.go index f9f37630..866fa6ef 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -5,6 +5,8 @@ package purego +import "unsafe" + // CDecl marks a function as being called using the __cdecl calling convention as defined in // the [MSDocs] when passed to NewCallback. It must be the first argument to the function. // This is only useful on 386 Windows, but it is safe to use on other platforms. @@ -102,8 +104,17 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { if len(args) > maxArgs { panic("purego: too many arguments to SyscallN") } - // add padding so there is no out-of-bounds slicing + + syscall := thePool.Get().(*syscall15Args) + defer thePool.Put(syscall) + *syscall = syscall15Args{} + + // Add padding so there is no out-of-bounds slicing. var tmp [maxArgs]uintptr copy(tmp[:], args) - return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) + var floats [16]uintptr + copy(floats[:], tmp[:16]) + syscall.Set(fn, tmp[:], floats[:], 0) + runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) + return syscall.a1, syscall.a2, syscall.a3 } diff --git a/syscall_64bit_fallback.go b/syscall_64bit_fallback.go new file mode 100644 index 00000000..f3519c55 --- /dev/null +++ b/syscall_64bit_fallback.go @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +//go:build !386 && !arm && !windows && (darwin || freebsd || linux || netbsd) && !(amd64 || arm64 || loong64 || ppc64le || riscv64 || s390x) + +package purego + +// CDecl marks a function as being called using the __cdecl calling convention as defined in +// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. +// This is only useful on 386 Windows, but it is safe to use on other platforms. +// +// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 +type CDecl struct{} + +const ( + maxArgs = 15 +) + +type syscall15Args struct { + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + arm64_r8 uintptr +} + +func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { + s.fn = fn + s.a1 = ints[0] + s.a2 = ints[1] + s.a3 = ints[2] + s.a4 = ints[3] + s.a5 = ints[4] + s.a6 = ints[5] + s.a7 = ints[6] + s.a8 = ints[7] + s.a9 = ints[8] + s.a10 = ints[9] + s.a11 = ints[10] + s.a12 = ints[11] + s.a13 = ints[12] + s.a14 = ints[13] + s.a15 = ints[14] + s.f1 = floats[0] + s.f2 = floats[1] + s.f3 = floats[2] + s.f4 = floats[3] + s.f5 = floats[4] + s.f6 = floats[5] + s.f7 = floats[6] + s.f8 = floats[7] + s.arm64_r8 = r8 +} + +// SyscallN takes fn, a C function pointer and a list of arguments as uintptr. +// There is an internal maximum number of arguments that SyscallN can take. It panics +// when the maximum is exceeded. It returns the result and the libc error code if there is one. +// +// In order to call this function properly make sure to follow all the rules specified in [unsafe.Pointer] +// especially point 4. +// +// NOTE: SyscallN does not properly call functions that have both integer and float parameters. +// See discussion comment https://github.com/ebiten/purego/pull/1#issuecomment-1128057607 +// for an explanation of why that is. +// +// On amd64, if there are more than 8 floats the 9th and so on will be placed incorrectly on the +// stack. +// +// The pragma go:nosplit is not needed at this function declaration because it uses go:uintptrescapes +// which forces all the objects that the uintptrs point to onto the heap where a stack split won't affect +// their memory location. +// +//go:uintptrescapes +func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + if fn == 0 { + panic("purego: fn is nil") + } + if len(args) > maxArgs { + panic("purego: too many arguments to SyscallN") + } + // add padding so there is no out-of-bounds slicing + var tmp [maxArgs]uintptr + copy(tmp[:], args) + return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) +} diff --git a/syscall_unix.go b/syscall_unix.go index cee86887..4912635b 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -136,6 +136,7 @@ func callbackWrap(a *callbackArgs) { // stackFrame points to stack-passed arguments. On most architectures this is // contiguous with frame (after register args), but on ppc64le it's separate. var stackFrame *[callbackMaxFrame]uintptr + isPPC64LE := runtime.GOARCH == "ppc64le" if sf := a.stackFrame(); sf != nil { // Only ppc64le uses separate stackArgs pointer due to NOSPLIT constraints stackFrame = (*[callbackMaxFrame]uintptr)(sf) @@ -194,6 +195,9 @@ func callbackWrap(a *callbackArgs) { } else { args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[floatsN])).Elem() } + if isPPC64LE { + stackSlot += slots + } } floatsN += slots case reflect.Struct: From dd48b3415c24669eae6aafb569d08dfe4562c89a Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 4 Mar 2026 22:43:13 -0800 Subject: [PATCH 02/21] purego: extend high-arg ABI coverage --- func_test.go | 170 ++++++++++++++++++++++++++++++------ testdata/abitest/abi_test.c | 38 ++++++++ 2 files changed, 182 insertions(+), 26 deletions(-) diff --git a/func_test.go b/func_test.go index 6ae69c40..bac318fc 100644 --- a/func_test.go +++ b/func_test.go @@ -373,8 +373,8 @@ func TestABI_ArgumentPassing(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if tt.name == "20_int32" && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") { - t.Skip("20 int32 arguments only supported on Darwin ARM64 with smart stack checking") + if tt.name == "20_int32" && runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") } if tt.name == "10_float32" && (runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") { t.Skip("float32 stack arguments not yet supported on this platform") @@ -394,39 +394,157 @@ func TestABI_ArgumentPassing(t *testing.T) { } }) } -} -func TestABI_TooManyArguments(t *testing.T) { - if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" { - t.Skip("This test is specific to Darwin ARM64") - } + t.Run("20_uintptr", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") + } - libFileName := filepath.Join(t.TempDir(), "abitest.so") - if err := buildSharedLib("CC", libFileName, filepath.Join("testdata", "abitest", "abi_test.c")); err != nil { - t.Fatal(err) - } - lib, err := load.OpenLibrary(libFileName) - if err != nil { - t.Fatalf("Failed to open library %q: %v", libFileName, err) - } - t.Cleanup(func() { - if err := load.CloseLibrary(lib); err != nil { - t.Errorf("Failed to close library: %v", err) + var fn func(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) uintptr + purego.RegisterLibFunc(&fn, lib, "stack_20_uintptr") + got := fn(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) + const want = uintptr(210) + if got != want { + t.Fatalf("stack_20_uintptr: got %d, want %d", got, want) } }) - // Test that 35 int64 arguments (27 slots needed) exceeds the limit - t.Run("35_int64_exceeds_limit", func(t *testing.T) { + t.Run("32_uintptr", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") + } + + var fn func( + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + ) uintptr + purego.RegisterLibFunc(&fn, lib, "stack_32_uintptr") + got := fn( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ) + const want = uintptr(528) + if got != want { + t.Fatalf("stack_32_uintptr: got %d, want %d", got, want) + } + }) + + t.Run("syscalln_20_uintptr", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") + } + + fn, err := load.OpenSymbol(lib, "stack_20_uintptr") + if err != nil { + t.Fatalf("OpenSymbol(stack_20_uintptr) failed: %v", err) + } + got, _, _ := purego.SyscallN(fn, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + ) + const want = uintptr(210) + if got != want { + t.Fatalf("stack_20_uintptr SyscallN: got %d, want %d", got, want) + } + }) + + t.Run("syscalln_32_uintptr", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") + } + + fn, err := load.OpenSymbol(lib, "stack_32_uintptr") + if err != nil { + t.Fatalf("OpenSymbol(stack_32_uintptr) failed: %v", err) + } + got, _, _ := purego.SyscallN(fn, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ) + const want = uintptr(528) + if got != want { + t.Fatalf("stack_32_uintptr SyscallN: got %d, want %d", got, want) + } + }) + + t.Run("32_mixed_int_float", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("windows supports at most 15 arguments") + } + if unsafe.Sizeof(uintptr(0)) == 4 { + t.Skip("requires 64-bit uintptr slots") + } + if runtime.GOARCH == "ppc64le" { + t.Skip("mixed int/float stack arguments are not yet supported on ppc64le") + } + + var fn func( + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + float64, float64, float64, float64, float64, float64, float64, float64, + float64, float64, float64, float64, float64, float64, float64, float64, + ) float64 + purego.RegisterLibFunc(&fn, lib, "stack_32_mixed_int_float") + got := fn( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ) + const want = 5168.0 + if got != want { + t.Fatalf("stack_32_mixed_int_float: got %f, want %f", got, want) + } + }) +} + +func TestABI_TooManyArguments(t *testing.T) { + mustPanic := func(t *testing.T, want string, f func()) { + t.Helper() defer func() { - if r := recover(); r != nil { - t.Logf("Got expected panic: %v", r) - } else { - t.Errorf("Expected panic but didn't get one") + r := recover() + if r == nil { + t.Fatalf("expected panic %q, got none", want) + } + got := fmt.Sprint(r) + if got != want { + t.Fatalf("panic mismatch:\n got: %q\n want: %q", got, want) } }() + f() + } + + // 33 int64 parameters exceeds maxArgs=32 on non-Windows targets. + // On Windows this is still an overflow because maxArgs is 15. + t.Run("registerfunc_33_int64_exceeds_limit", func(t *testing.T) { + mustPanic(t, "purego: too many stack arguments", func() { + var fn func( + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, + ) + purego.RegisterFunc(&fn, 1) + }) + }) - var fn func(*byte, uintptr, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64) - purego.RegisterLibFunc(&fn, lib, "stack_35_int64_exceeds") + t.Run("syscalln_33_uintptr_exceeds_limit", func(t *testing.T) { + mustPanic(t, "purego: too many arguments to SyscallN", func() { + purego.SyscallN(1, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, + ) + }) }) } diff --git a/testdata/abitest/abi_test.c b/testdata/abitest/abi_test.c index 446d9e99..824e453d 100644 --- a/testdata/abitest/abi_test.c +++ b/testdata/abitest/abi_test.c @@ -129,3 +129,41 @@ void stack_25_int64_exceeds(char *buf, size_t bufsize, int64_t a1, int64_t a2, i snprintf(buf, bufsize, "%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25); } + +uintptr_t stack_20_uintptr( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, + uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, + uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, + uintptr_t a16, uintptr_t a17, uintptr_t a18, uintptr_t a19, uintptr_t a20 +) { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + + a11 + a12 + a13 + a14 + a15 + a16 + a17 + a18 + a19 + a20; +} + +uintptr_t stack_32_uintptr( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, + uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, + uintptr_t a17, uintptr_t a18, uintptr_t a19, uintptr_t a20, uintptr_t a21, uintptr_t a22, uintptr_t a23, uintptr_t a24, + uintptr_t a25, uintptr_t a26, uintptr_t a27, uintptr_t a28, uintptr_t a29, uintptr_t a30, uintptr_t a31, uintptr_t a32 +) { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + + a9 + a10 + a11 + a12 + a13 + a14 + a15 + a16 + + a17 + a18 + a19 + a20 + a21 + a22 + a23 + a24 + + a25 + a26 + a27 + a28 + a29 + a30 + a31 + a32; +} + +double stack_32_mixed_int_float( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, + uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, + double f1, double f2, double f3, double f4, double f5, double f6, double f7, double f8, + double f9, double f10, double f11, double f12, double f13, double f14, double f15, double f16 +) { + return (double)a1 * 1 + (double)a2 * 2 + (double)a3 * 3 + (double)a4 * 4 + + (double)a5 * 5 + (double)a6 * 6 + (double)a7 * 7 + (double)a8 * 8 + + (double)a9 * 9 + (double)a10 * 10 + (double)a11 * 11 + (double)a12 * 12 + + (double)a13 * 13 + (double)a14 * 14 + (double)a15 * 15 + (double)a16 * 16 + + f1 * 17 + f2 * 18 + f3 * 19 + f4 * 20 + + f5 * 21 + f6 * 22 + f7 * 23 + f8 * 24 + + f9 * 25 + f10 * 26 + f11 * 27 + f12 * 28 + + f13 * 29 + f14 * 30 + f15 * 31 + f16 * 32; +} From 7711628df6af151c5e383e9c8664d1376ab007c5 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 5 Mar 2026 00:12:42 -0800 Subject: [PATCH 03/21] purego: use SyscallN for high-arg windows calls --- func.go | 22 ++++++++++++---------- func_test.go | 25 +------------------------ syscall.go | 16 +++------------- syscall_unix.go | 4 ++++ syscall_windows.go | 8 ++++++-- 5 files changed, 26 insertions(+), 49 deletions(-) diff --git a/func.go b/func.go index 5293db3a..e8c5bf54 100644 --- a/func.go +++ b/func.go @@ -204,13 +204,14 @@ func RegisterFunc(fptr any, cfn uintptr) { } argsLimit := maxArgs - if runtime.GOOS == "windows" { - argsLimit = 15 - } sizeOfStack := argsLimit - numOfIntegerRegisters() - // On Darwin ARM64, use byte-based validation since arguments pack efficiently. - // See https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms - if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { + if runtime.GOOS == "windows" { + if ints+floats+stack > argsLimit { + panic("purego: too many stack arguments") + } + } else if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { + // On Darwin ARM64, use byte-based validation since arguments pack efficiently. + // See https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms stackBytes := estimateStackBytes(ty) maxStackBytes := sizeOfStack * 8 if stackBytes > maxStackBytes { @@ -267,6 +268,9 @@ func RegisterFunc(fptr any, cfn uintptr) { // This is in contrast to how macOS and Linux pass arguments which // tries to use as many registers as possible in the calling convention. addStack = func(x uintptr) { + if numStack >= maxArgs { + panic("purego: too many stack arguments") + } sysargs[numStack] = x numStack++ } @@ -332,10 +336,8 @@ func RegisterFunc(fptr any, cfn uintptr) { runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) } else { *syscall = syscall15Args{} - // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats - syscall.a1, syscall.a2, _ = syscall_syscall15X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], - sysargs[5], sysargs[6], sysargs[7], sysargs[8], sysargs[9], sysargs[10], sysargs[11], - sysargs[12], sysargs[13], sysargs[14]) + // This is a fallback for Windows amd64, 386, and arm. + syscall.a1, syscall.a2, _ = syscall_syscallN(cfn, sysargs[:numStack]...) syscall.f1 = syscall.a2 // on amd64 a2 stores the float return. On 32bit platforms floats aren't support } if ty.NumOut() == 0 { diff --git a/func_test.go b/func_test.go index bac318fc..f3800c32 100644 --- a/func_test.go +++ b/func_test.go @@ -373,9 +373,6 @@ func TestABI_ArgumentPassing(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if tt.name == "20_int32" && runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } if tt.name == "10_float32" && (runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") { t.Skip("float32 stack arguments not yet supported on this platform") } @@ -396,10 +393,6 @@ func TestABI_ArgumentPassing(t *testing.T) { } t.Run("20_uintptr", func(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } - var fn func(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) uintptr purego.RegisterLibFunc(&fn, lib, "stack_20_uintptr") got := fn(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) @@ -410,10 +403,6 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("32_uintptr", func(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } - var fn func( uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, @@ -434,10 +423,6 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("syscalln_20_uintptr", func(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } - fn, err := load.OpenSymbol(lib, "stack_20_uintptr") if err != nil { t.Fatalf("OpenSymbol(stack_20_uintptr) failed: %v", err) @@ -453,10 +438,6 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("syscalln_32_uintptr", func(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } - fn, err := load.OpenSymbol(lib, "stack_32_uintptr") if err != nil { t.Fatalf("OpenSymbol(stack_32_uintptr) failed: %v", err) @@ -474,9 +455,6 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("32_mixed_int_float", func(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("windows supports at most 15 arguments") - } if unsafe.Sizeof(uintptr(0)) == 4 { t.Skip("requires 64-bit uintptr slots") } @@ -520,8 +498,7 @@ func TestABI_TooManyArguments(t *testing.T) { f() } - // 33 int64 parameters exceeds maxArgs=32 on non-Windows targets. - // On Windows this is still an overflow because maxArgs is 15. + // 33 int64 parameters exceeds maxArgs=32. t.Run("registerfunc_33_int64_exceeds_limit", func(t *testing.T) { mustPanic(t, "purego: too many stack arguments", func() { var fn func( diff --git a/syscall.go b/syscall.go index 88ea2b18..b238db89 100644 --- a/syscall.go +++ b/syscall.go @@ -102,23 +102,13 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { if fn == 0 { panic("purego: fn is nil") } - limit := maxArgs - if runtime.GOOS == "windows" { - limit = 15 - } - if len(args) > limit { + if len(args) > maxArgs { panic("purego: too many arguments to SyscallN") } - // Windows uses syscall.Syscall15 in syscall_windows.go. + // Windows uses syscall.SyscallN in syscall_windows.go. if runtime.GOOS == "windows" { - var tmp [maxArgs]uintptr - copy(tmp[:], args) - return syscall_syscall15X( - fn, - tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], - tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14], - ) + return syscall_syscallN(fn, args...) } syscall := thePool.Get().(*syscall15Args) diff --git a/syscall_unix.go b/syscall_unix.go index 4912635b..0efb738d 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -30,6 +30,10 @@ func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a return args.a1, args.a2, args.a3 } +func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + panic("purego: syscall_syscallN is only supported on windows") +} + // NewCallback converts a Go function to a function pointer conforming to the C calling convention. // This is useful when interoperating with C code requiring callbacks. The argument is expected to be a // function with zero or one uintptr-sized result. The function must not have arguments with size larger than the size diff --git a/syscall_windows.go b/syscall_windows.go index 9e3f8923..e373b902 100644 --- a/syscall_windows.go +++ b/syscall_windows.go @@ -11,11 +11,15 @@ import ( var syscall15XABI0 uintptr -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - r1, r2, errno := syscall.Syscall15(fn, 15, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) +func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + r1, r2, errno := syscall.SyscallN(fn, args...) return r1, r2, uintptr(errno) } +func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + return syscall_syscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) +} + // NewCallback converts a Go function to a function pointer conforming to the stdcall calling convention. // This is useful when interoperating with Windows code requiring callbacks. The argument is expected to be a // function with one uintptr-sized result. The function must not have arguments with size larger than the From e4cd47157d6b2c9bcab62f83974447130a1cf5a0 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 5 Mar 2026 10:24:55 -0800 Subject: [PATCH 04/21] purego: define CDecl in one place --- cdecl.go | 13 +++++++++++++ syscall.go | 7 ------- syscall_32bit.go | 7 ------- syscall_64bit_fallback.go | 7 ------- 4 files changed, 13 insertions(+), 21 deletions(-) create mode 100644 cdecl.go diff --git a/cdecl.go b/cdecl.go new file mode 100644 index 00000000..7688a231 --- /dev/null +++ b/cdecl.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +//go:build darwin || freebsd || linux || netbsd || windows + +package purego + +// CDecl marks a function as being called using the __cdecl calling convention as defined in +// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. +// This is only useful on 386 Windows, but it is safe to use on other platforms. +// +// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 +type CDecl struct{} diff --git a/syscall.go b/syscall.go index b238db89..6f8eb92a 100644 --- a/syscall.go +++ b/syscall.go @@ -10,13 +10,6 @@ import ( "unsafe" ) -// CDecl marks a function as being called using the __cdecl calling convention as defined in -// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. -// This is only useful on 386 Windows, but it is safe to use on other platforms. -// -// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 -type CDecl struct{} - const ( maxArgs = 32 ) diff --git a/syscall_32bit.go b/syscall_32bit.go index 866fa6ef..f8edd283 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -7,13 +7,6 @@ package purego import "unsafe" -// CDecl marks a function as being called using the __cdecl calling convention as defined in -// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. -// This is only useful on 386 Windows, but it is safe to use on other platforms. -// -// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 -type CDecl struct{} - const ( maxArgs = 32 ) diff --git a/syscall_64bit_fallback.go b/syscall_64bit_fallback.go index f3519c55..5dd805a6 100644 --- a/syscall_64bit_fallback.go +++ b/syscall_64bit_fallback.go @@ -5,13 +5,6 @@ package purego -// CDecl marks a function as being called using the __cdecl calling convention as defined in -// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. -// This is only useful on 386 Windows, but it is safe to use on other platforms. -// -// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 -type CDecl struct{} - const ( maxArgs = 15 ) From a3f159c7305f2325ec9be9a2178c188ec708f735 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 5 Mar 2026 10:59:03 -0800 Subject: [PATCH 05/21] cdecl: remove build tags --- cdecl.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/cdecl.go b/cdecl.go index 7688a231..a4b105ca 100644 --- a/cdecl.go +++ b/cdecl.go @@ -1,8 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2026 The Ebitengine Authors -//go:build darwin || freebsd || linux || netbsd || windows - package purego // CDecl marks a function as being called using the __cdecl calling convention as defined in From 4f4147940f4843917b671f78a0de6cff32eb49fd Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 5 Mar 2026 12:24:19 -0800 Subject: [PATCH 06/21] purego: fix review follow-ups for 32-arg syscall --- func.go | 4 ---- sys_ppc64le.s | 9 ++------- syscall.go | 6 ------ syscall_32bit.go | 10 +++++++++- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/func.go b/func.go index e8c5bf54..dfe4aa06 100644 --- a/func.go +++ b/func.go @@ -253,10 +253,6 @@ func RegisterFunc(fptr any, cfn uintptr) { if numFloats < floatArgRegs { floats[numFloats] = x numFloats++ - if runtime.GOARCH == "ppc64le" { - // Keep stack indexing in sync with ppc64le callback decoding. - numStack++ - } } else { addStack(x) } diff --git a/sys_ppc64le.s b/sys_ppc64le.s index fab9554f..642071e7 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -9,7 +9,7 @@ // PPC64LE ELFv2 ABI: // - Integer args: R3-R10 (8 registers) -// - Float args: F1-F13 (13 registers) +// - Float args: F1-F8 (8 registers) // - Return: R3 (integer), F1 (float) // - Stack pointer: R1 // - Link register: LR (special) @@ -53,7 +53,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 // R11 := args pointer (syscall15Args*) MOVD R3, R11 - // Load float args into F1-F13 + // Load float args into F1-F8 FMOVD syscall15Args_f1(R11), F1 FMOVD syscall15Args_f2(R11), F2 FMOVD syscall15Args_f3(R11), F3 @@ -62,11 +62,6 @@ TEXT syscall15X(SB), NOSPLIT, $0 FMOVD syscall15Args_f6(R11), F6 FMOVD syscall15Args_f7(R11), F7 FMOVD syscall15Args_f8(R11), F8 - FMOVD syscall15Args_f9(R11), F9 - FMOVD syscall15Args_f10(R11), F10 - FMOVD syscall15Args_f11(R11), F11 - FMOVD syscall15Args_f12(R11), F12 - FMOVD syscall15Args_f13(R11), F13 // Load integer args into R3-R10 MOVD syscall15Args_a1(R11), R3 diff --git a/syscall.go b/syscall.go index 6f8eb92a..d25db25a 100644 --- a/syscall.go +++ b/syscall.go @@ -18,7 +18,6 @@ type syscall15Args struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8 uintptr - f9, f10, f11, f12, f13 uintptr arm64_r8 uintptr } @@ -64,11 +63,6 @@ func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uin s.f6 = floats[5] s.f7 = floats[6] s.f8 = floats[7] - s.f9 = floats[8] - s.f10 = floats[9] - s.f11 = floats[10] - s.f12 = floats[11] - s.f13 = floats[12] s.arm64_r8 = r8 } diff --git a/syscall_32bit.go b/syscall_32bit.go index f8edd283..f98aeac0 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -5,7 +5,10 @@ package purego -import "unsafe" +import ( + "runtime" + "unsafe" +) const ( maxArgs = 32 @@ -98,6 +101,11 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { panic("purego: too many arguments to SyscallN") } + // Windows uses syscall.SyscallN in syscall_windows.go. + if runtime.GOOS == "windows" { + return syscall_syscallN(fn, args...) + } + syscall := thePool.Get().(*syscall15Args) defer thePool.Put(syscall) *syscall = syscall15Args{} From 5b0e02b09dcf20da12e752a8134d90c795eda503 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 5 Mar 2026 12:45:56 -0800 Subject: [PATCH 07/21] purego: fix ppc64le callback float decoding --- func.go | 11 ++++++++++- sys_ppc64le.s | 9 +++++++-- sys_unix_ppc64le.s | 20 ++++++++++---------- syscall.go | 6 ++++++ syscall_unix.go | 8 ++++++-- 5 files changed, 39 insertions(+), 15 deletions(-) diff --git a/func.go b/func.go index dfe4aa06..52f492e1 100644 --- a/func.go +++ b/func.go @@ -253,6 +253,12 @@ func RegisterFunc(fptr any, cfn uintptr) { if numFloats < floatArgRegs { floats[numFloats] = x numFloats++ + if runtime.GOARCH == "ppc64le" { + // ELFv2: each float parameter consumes a GPR/stack slot + // even when passed in an FPR. Advance numStack so overflow + // floats land at the correct parameter-order position. + numStack++ + } } else { addStack(x) } @@ -513,7 +519,10 @@ func roundUpTo8(val uintptr) uintptr { func numOfFloatRegisters() int { switch runtime.GOARCH { - case "amd64", "arm64", "loong64", "ppc64le", "riscv64": + case "ppc64le": + // ELFv2 ABI uses F1-F13 for floating-point parameters. + return 13 + case "amd64", "arm64", "loong64", "riscv64": return 8 case "s390x": return 4 diff --git a/sys_ppc64le.s b/sys_ppc64le.s index 642071e7..fab9554f 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -9,7 +9,7 @@ // PPC64LE ELFv2 ABI: // - Integer args: R3-R10 (8 registers) -// - Float args: F1-F8 (8 registers) +// - Float args: F1-F13 (13 registers) // - Return: R3 (integer), F1 (float) // - Stack pointer: R1 // - Link register: LR (special) @@ -53,7 +53,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 // R11 := args pointer (syscall15Args*) MOVD R3, R11 - // Load float args into F1-F8 + // Load float args into F1-F13 FMOVD syscall15Args_f1(R11), F1 FMOVD syscall15Args_f2(R11), F2 FMOVD syscall15Args_f3(R11), F3 @@ -62,6 +62,11 @@ TEXT syscall15X(SB), NOSPLIT, $0 FMOVD syscall15Args_f6(R11), F6 FMOVD syscall15Args_f7(R11), F7 FMOVD syscall15Args_f8(R11), F8 + FMOVD syscall15Args_f9(R11), F9 + FMOVD syscall15Args_f10(R11), F10 + FMOVD syscall15Args_f11(R11), F11 + FMOVD syscall15Args_f12(R11), F12 + FMOVD syscall15Args_f13(R11), F13 // Load integer args into R3-R10 MOVD syscall15Args_a1(R11), R3 diff --git a/sys_unix_ppc64le.s b/sys_unix_ppc64le.s index 37f0d8d6..f9def763 100644 --- a/sys_unix_ppc64le.s +++ b/sys_unix_ppc64le.s @@ -17,24 +17,19 @@ // 24(R1) - TOC save area (if needed) // 32(R1)+ - parameter save area / local variables // -// Our frame (total 208 bytes, 16-byte aligned): +// Our frame (total 240 bytes, 16-byte aligned): // 32(R1) - saved R31 (8 bytes) // 40(R1) - callbackArgs struct (32 bytes: index, args, result, stackArgs) -// 72(R1) - args array: floats (64) + ints (64) = 128 bytes, ends at 200 -// Total with alignment: 208 bytes +// 72(R1) - args array: floats (104) + ints (64) = 168 bytes, ends at 240 // // Stack args are NOT copied - we pass a pointer to their location in caller's frame. -// This keeps frame size small enough for NOSPLIT with CGO_ENABLED=1. -// Budget: 208 + 544 (crosscall2) + 56 (cgocallback) = 808 bytes -// This is 8 bytes over the 800 limit, but cgocallback's children (load_g, save_g) -// reuse the same stack space, so in practice it works. -#define FRAME_SIZE 200 +#define FRAME_SIZE 240 #define SAVE_R31 32 #define CB_ARGS 40 #define ARGS_ARRAY 72 #define FLOAT_OFF 0 -#define INT_OFF 64 +#define INT_OFF 104 TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 NO_LOCAL_POINTERS @@ -57,7 +52,7 @@ TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 MOVD R11, (CB_ARGS+0)(R1) // Save callback arguments to args array. - // Layout: floats first (F1-F8), then ints (R3-R10), then stack args + // Layout: floats first (F1-F13), then ints (R3-R10), then stack args FMOVD F1, (ARGS_ARRAY+FLOAT_OFF+0*8)(R1) FMOVD F2, (ARGS_ARRAY+FLOAT_OFF+1*8)(R1) FMOVD F3, (ARGS_ARRAY+FLOAT_OFF+2*8)(R1) @@ -66,6 +61,11 @@ TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 FMOVD F6, (ARGS_ARRAY+FLOAT_OFF+5*8)(R1) FMOVD F7, (ARGS_ARRAY+FLOAT_OFF+6*8)(R1) FMOVD F8, (ARGS_ARRAY+FLOAT_OFF+7*8)(R1) + FMOVD F9, (ARGS_ARRAY+FLOAT_OFF+8*8)(R1) + FMOVD F10, (ARGS_ARRAY+FLOAT_OFF+9*8)(R1) + FMOVD F11, (ARGS_ARRAY+FLOAT_OFF+10*8)(R1) + FMOVD F12, (ARGS_ARRAY+FLOAT_OFF+11*8)(R1) + FMOVD F13, (ARGS_ARRAY+FLOAT_OFF+12*8)(R1) MOVD R3, (ARGS_ARRAY+INT_OFF+0*8)(R1) MOVD R4, (ARGS_ARRAY+INT_OFF+1*8)(R1) diff --git a/syscall.go b/syscall.go index d25db25a..6f8eb92a 100644 --- a/syscall.go +++ b/syscall.go @@ -18,6 +18,7 @@ type syscall15Args struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8 uintptr + f9, f10, f11, f12, f13 uintptr arm64_r8 uintptr } @@ -63,6 +64,11 @@ func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uin s.f6 = floats[5] s.f7 = floats[6] s.f8 = floats[7] + s.f9 = floats[8] + s.f10 = floats[9] + s.f11 = floats[10] + s.f12 = floats[11] + s.f13 = floats[12] s.arm64_r8 = r8 } diff --git a/syscall_unix.go b/syscall_unix.go index 0efb738d..51ea8be7 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -140,7 +140,6 @@ func callbackWrap(a *callbackArgs) { // stackFrame points to stack-passed arguments. On most architectures this is // contiguous with frame (after register args), but on ppc64le it's separate. var stackFrame *[callbackMaxFrame]uintptr - isPPC64LE := runtime.GOARCH == "ppc64le" if sf := a.stackFrame(); sf != nil { // Only ppc64le uses separate stackArgs pointer due to NOSPLIT constraints stackFrame = (*[callbackMaxFrame]uintptr)(sf) @@ -199,7 +198,8 @@ func callbackWrap(a *callbackArgs) { } else { args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[floatsN])).Elem() } - if isPPC64LE { + if runtime.GOARCH == "ppc64le" { + // ELFv2: each FPR-passed float also consumes a stack slot. stackSlot += slots } } @@ -243,6 +243,10 @@ func callbackWrap(a *callbackArgs) { } else { args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[pos])).Elem() } + if runtime.GOARCH == "ppc64le" { + // ELFv2: each GPR-passed int also consumes a stack slot. + stackSlot += slots + } } intsN += slots } From dceb831678bbf19967ce8b135714c7265967ebcd Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Sat, 14 Mar 2026 17:52:26 -0700 Subject: [PATCH 08/21] purego: remove dead syscall_64bit_fallback.go The build constraints excluded every supported architecture, so this file only compiled for unsupported platforms like mips64 where purego does not work anyway. Removing it addresses the review comment about consolidating syscall15Args definitions. --- syscall_64bit_fallback.go | 76 --------------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 syscall_64bit_fallback.go diff --git a/syscall_64bit_fallback.go b/syscall_64bit_fallback.go deleted file mode 100644 index 5dd805a6..00000000 --- a/syscall_64bit_fallback.go +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2026 The Ebitengine Authors - -//go:build !386 && !arm && !windows && (darwin || freebsd || linux || netbsd) && !(amd64 || arm64 || loong64 || ppc64le || riscv64 || s390x) - -package purego - -const ( - maxArgs = 15 -) - -type syscall15Args struct { - fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr - f1, f2, f3, f4, f5, f6, f7, f8 uintptr - arm64_r8 uintptr -} - -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.arm64_r8 = r8 -} - -// SyscallN takes fn, a C function pointer and a list of arguments as uintptr. -// There is an internal maximum number of arguments that SyscallN can take. It panics -// when the maximum is exceeded. It returns the result and the libc error code if there is one. -// -// In order to call this function properly make sure to follow all the rules specified in [unsafe.Pointer] -// especially point 4. -// -// NOTE: SyscallN does not properly call functions that have both integer and float parameters. -// See discussion comment https://github.com/ebiten/purego/pull/1#issuecomment-1128057607 -// for an explanation of why that is. -// -// On amd64, if there are more than 8 floats the 9th and so on will be placed incorrectly on the -// stack. -// -// The pragma go:nosplit is not needed at this function declaration because it uses go:uintptrescapes -// which forces all the objects that the uintptrs point to onto the heap where a stack split won't affect -// their memory location. -// -//go:uintptrescapes -func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { - if fn == 0 { - panic("purego: fn is nil") - } - if len(args) > maxArgs { - panic("purego: too many arguments to SyscallN") - } - // add padding so there is no out-of-bounds slicing - var tmp [maxArgs]uintptr - copy(tmp[:], args) - return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) -} From 091d916ad050922374279a95bc1ffb707272aeba Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Mon, 16 Mar 2026 10:05:37 -0700 Subject: [PATCH 09/21] purego: keep ppc64le on 15-arg ABI --- func.go | 9 +--- func_test.go | 15 ++++++ sys_ppc64le.s | 60 +++++------------------ sys_unix_ppc64le.s | 20 ++++---- syscall.go | 2 +- syscall_notstackargs.go | 4 ++ syscall_ppc64le.go | 94 ++++++++++++++++++++++++++++++++++++ syscall_stackargs.go | 42 ---------------- syscall_stackargs_ppc64le.go | 32 ++++++++++++ syscall_stackargs_s390x.go | 32 ++++++++++++ syscall_unix.go | 30 ++++++------ 11 files changed, 215 insertions(+), 125 deletions(-) create mode 100644 syscall_ppc64le.go delete mode 100644 syscall_stackargs.go create mode 100644 syscall_stackargs_ppc64le.go create mode 100644 syscall_stackargs_s390x.go diff --git a/func.go b/func.go index 52f492e1..5c6d35c5 100644 --- a/func.go +++ b/func.go @@ -253,12 +253,6 @@ func RegisterFunc(fptr any, cfn uintptr) { if numFloats < floatArgRegs { floats[numFloats] = x numFloats++ - if runtime.GOARCH == "ppc64le" { - // ELFv2: each float parameter consumes a GPR/stack slot - // even when passed in an FPR. Advance numStack so overflow - // floats land at the correct parameter-order position. - numStack++ - } } else { addStack(x) } @@ -520,8 +514,7 @@ func roundUpTo8(val uintptr) uintptr { func numOfFloatRegisters() int { switch runtime.GOARCH { case "ppc64le": - // ELFv2 ABI uses F1-F13 for floating-point parameters. - return 13 + return 8 case "amd64", "arm64", "loong64", "riscv64": return 8 case "s390x": diff --git a/func_test.go b/func_test.go index f3800c32..dfe512bc 100644 --- a/func_test.go +++ b/func_test.go @@ -373,6 +373,9 @@ func TestABI_ArgumentPassing(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + if tt.name == "20_int32" && runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } if tt.name == "10_float32" && (runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") { t.Skip("float32 stack arguments not yet supported on this platform") } @@ -393,6 +396,9 @@ func TestABI_ArgumentPassing(t *testing.T) { } t.Run("20_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } var fn func(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) uintptr purego.RegisterLibFunc(&fn, lib, "stack_20_uintptr") got := fn(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) @@ -403,6 +409,9 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("32_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } var fn func( uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, @@ -423,6 +432,9 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("syscalln_20_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } fn, err := load.OpenSymbol(lib, "stack_20_uintptr") if err != nil { t.Fatalf("OpenSymbol(stack_20_uintptr) failed: %v", err) @@ -438,6 +450,9 @@ func TestABI_ArgumentPassing(t *testing.T) { }) t.Run("syscalln_32_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } fn, err := load.OpenSymbol(lib, "stack_32_uintptr") if err != nil { t.Fatalf("OpenSymbol(stack_32_uintptr) failed: %v", err) diff --git a/sys_ppc64le.s b/sys_ppc64le.s index fab9554f..391b30a9 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -9,7 +9,7 @@ // PPC64LE ELFv2 ABI: // - Integer args: R3-R10 (8 registers) -// - Float args: F1-F13 (13 registers) +// - Float args: F1-F8 (8 registers) // - Return: R3 (integer), F1 (float) // - Stack pointer: R1 // - Link register: LR (special) @@ -24,15 +24,16 @@ // 32(R1) - Parameter save area start (8 * 8 = 64 bytes for R3-R10) // 96(R1) - First stack arg (a9) - this is where callee looks // 104(R1) - Second stack arg (a10) -// 112-280 - Stack args a11-a32 -// 288(R1) - TOC save (outside parameter save area) -// 296(R1) - saved args pointer -// Total: 304 bytes +// 112-152 - Stack args a11-a15 (5 * 8 = 40 bytes) +// 160(R1) - TOC save (we put it here, outside param save area) +// 168(R1) - saved args pointer +// 176(R1) - padding for 16-byte alignment +// Total: 176 bytes -#define STACK_SIZE 304 +#define STACK_SIZE 176 #define LR_SAVE 16 -#define TOC_SAVE 288 -#define ARGP_SAVE 296 +#define TOC_SAVE 160 +#define ARGP_SAVE 168 GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) @@ -53,7 +54,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 // R11 := args pointer (syscall15Args*) MOVD R3, R11 - // Load float args into F1-F13 + // Load float args into F1-F8 FMOVD syscall15Args_f1(R11), F1 FMOVD syscall15Args_f2(R11), F2 FMOVD syscall15Args_f3(R11), F3 @@ -62,11 +63,6 @@ TEXT syscall15X(SB), NOSPLIT, $0 FMOVD syscall15Args_f6(R11), F6 FMOVD syscall15Args_f7(R11), F7 FMOVD syscall15Args_f8(R11), F8 - FMOVD syscall15Args_f9(R11), F9 - FMOVD syscall15Args_f10(R11), F10 - FMOVD syscall15Args_f11(R11), F11 - FMOVD syscall15Args_f12(R11), F12 - FMOVD syscall15Args_f13(R11), F13 // Load integer args into R3-R10 MOVD syscall15Args_a1(R11), R3 @@ -78,7 +74,7 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD syscall15Args_a7(R11), R9 MOVD syscall15Args_a8(R11), R10 - // Spill a9-a32 onto the stack (stack parameters start at 96(R1)) + // Spill a9-a15 onto the stack (stack parameters start at 96(R1)) // Per ELFv2: parameter save area is 32-95, stack args start at 96 MOVD ARGP_SAVE(R1), R11 // reload args pointer MOVD syscall15Args_a9(R11), R12 @@ -95,40 +91,6 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD R12, 136(R1) // a14 at 136(R1) MOVD syscall15Args_a15(R11), R12 MOVD R12, 144(R1) // a15 at 144(R1) - MOVD syscall15Args_a16(R11), R12 - MOVD R12, 152(R1) // a16 at 152(R1) - MOVD syscall15Args_a17(R11), R12 - MOVD R12, 160(R1) // a17 at 160(R1) - MOVD syscall15Args_a18(R11), R12 - MOVD R12, 168(R1) // a18 at 168(R1) - MOVD syscall15Args_a19(R11), R12 - MOVD R12, 176(R1) // a19 at 176(R1) - MOVD syscall15Args_a20(R11), R12 - MOVD R12, 184(R1) // a20 at 184(R1) - MOVD syscall15Args_a21(R11), R12 - MOVD R12, 192(R1) // a21 at 192(R1) - MOVD syscall15Args_a22(R11), R12 - MOVD R12, 200(R1) // a22 at 200(R1) - MOVD syscall15Args_a23(R11), R12 - MOVD R12, 208(R1) // a23 at 208(R1) - MOVD syscall15Args_a24(R11), R12 - MOVD R12, 216(R1) // a24 at 216(R1) - MOVD syscall15Args_a25(R11), R12 - MOVD R12, 224(R1) // a25 at 224(R1) - MOVD syscall15Args_a26(R11), R12 - MOVD R12, 232(R1) // a26 at 232(R1) - MOVD syscall15Args_a27(R11), R12 - MOVD R12, 240(R1) // a27 at 240(R1) - MOVD syscall15Args_a28(R11), R12 - MOVD R12, 248(R1) // a28 at 248(R1) - MOVD syscall15Args_a29(R11), R12 - MOVD R12, 256(R1) // a29 at 256(R1) - MOVD syscall15Args_a30(R11), R12 - MOVD R12, 264(R1) // a30 at 264(R1) - MOVD syscall15Args_a31(R11), R12 - MOVD R12, 272(R1) // a31 at 272(R1) - MOVD syscall15Args_a32(R11), R12 - MOVD R12, 280(R1) // a32 at 280(R1) // Call function: load fn and call MOVD syscall15Args_fn(R11), R12 diff --git a/sys_unix_ppc64le.s b/sys_unix_ppc64le.s index f9def763..37f0d8d6 100644 --- a/sys_unix_ppc64le.s +++ b/sys_unix_ppc64le.s @@ -17,19 +17,24 @@ // 24(R1) - TOC save area (if needed) // 32(R1)+ - parameter save area / local variables // -// Our frame (total 240 bytes, 16-byte aligned): +// Our frame (total 208 bytes, 16-byte aligned): // 32(R1) - saved R31 (8 bytes) // 40(R1) - callbackArgs struct (32 bytes: index, args, result, stackArgs) -// 72(R1) - args array: floats (104) + ints (64) = 168 bytes, ends at 240 +// 72(R1) - args array: floats (64) + ints (64) = 128 bytes, ends at 200 +// Total with alignment: 208 bytes // // Stack args are NOT copied - we pass a pointer to their location in caller's frame. +// This keeps frame size small enough for NOSPLIT with CGO_ENABLED=1. +// Budget: 208 + 544 (crosscall2) + 56 (cgocallback) = 808 bytes +// This is 8 bytes over the 800 limit, but cgocallback's children (load_g, save_g) +// reuse the same stack space, so in practice it works. -#define FRAME_SIZE 240 +#define FRAME_SIZE 200 #define SAVE_R31 32 #define CB_ARGS 40 #define ARGS_ARRAY 72 #define FLOAT_OFF 0 -#define INT_OFF 104 +#define INT_OFF 64 TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 NO_LOCAL_POINTERS @@ -52,7 +57,7 @@ TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 MOVD R11, (CB_ARGS+0)(R1) // Save callback arguments to args array. - // Layout: floats first (F1-F13), then ints (R3-R10), then stack args + // Layout: floats first (F1-F8), then ints (R3-R10), then stack args FMOVD F1, (ARGS_ARRAY+FLOAT_OFF+0*8)(R1) FMOVD F2, (ARGS_ARRAY+FLOAT_OFF+1*8)(R1) FMOVD F3, (ARGS_ARRAY+FLOAT_OFF+2*8)(R1) @@ -61,11 +66,6 @@ TEXT callbackasm1(SB), NOSPLIT|NOFRAME, $0 FMOVD F6, (ARGS_ARRAY+FLOAT_OFF+5*8)(R1) FMOVD F7, (ARGS_ARRAY+FLOAT_OFF+6*8)(R1) FMOVD F8, (ARGS_ARRAY+FLOAT_OFF+7*8)(R1) - FMOVD F9, (ARGS_ARRAY+FLOAT_OFF+8*8)(R1) - FMOVD F10, (ARGS_ARRAY+FLOAT_OFF+9*8)(R1) - FMOVD F11, (ARGS_ARRAY+FLOAT_OFF+10*8)(R1) - FMOVD F12, (ARGS_ARRAY+FLOAT_OFF+11*8)(R1) - FMOVD F13, (ARGS_ARRAY+FLOAT_OFF+12*8)(R1) MOVD R3, (ARGS_ARRAY+INT_OFF+0*8)(R1) MOVD R4, (ARGS_ARRAY+INT_OFF+1*8)(R1) diff --git a/syscall.go b/syscall.go index 6f8eb92a..b2c5cabe 100644 --- a/syscall.go +++ b/syscall.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build (!386 && !arm && windows) || ((amd64 || arm64) && (darwin || freebsd || linux || netbsd)) || (linux && (loong64 || ppc64le || riscv64 || s390x)) +//go:build (!386 && !arm && windows) || ((amd64 || arm64) && (darwin || freebsd || linux || netbsd)) || (linux && (loong64 || riscv64 || s390x)) package purego diff --git a/syscall_notstackargs.go b/syscall_notstackargs.go index 76d1f5a7..8acb7ddb 100644 --- a/syscall_notstackargs.go +++ b/syscall_notstackargs.go @@ -35,3 +35,7 @@ type callbackArgs struct { func (c *callbackArgs) stackFrame() unsafe.Pointer { return nil } + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go new file mode 100644 index 00000000..d55d3aa9 --- /dev/null +++ b/syscall_ppc64le.go @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build linux && ppc64le + +package purego + +import ( + "runtime" + "unsafe" +) + +const ( + maxArgs = 15 +) + +type syscall15Args struct { + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + arm64_r8 uintptr +} + +func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { + s.fn = fn + s.a1 = ints[0] + s.a2 = ints[1] + s.a3 = ints[2] + s.a4 = ints[3] + s.a5 = ints[4] + s.a6 = ints[5] + s.a7 = ints[6] + s.a8 = ints[7] + s.a9 = ints[8] + s.a10 = ints[9] + s.a11 = ints[10] + s.a12 = ints[11] + s.a13 = ints[12] + s.a14 = ints[13] + s.a15 = ints[14] + s.f1 = floats[0] + s.f2 = floats[1] + s.f3 = floats[2] + s.f4 = floats[3] + s.f5 = floats[4] + s.f6 = floats[5] + s.f7 = floats[6] + s.f8 = floats[7] + s.arm64_r8 = r8 +} + +// SyscallN takes fn, a C function pointer and a list of arguments as uintptr. +// There is an internal maximum number of arguments that SyscallN can take. It panics +// when the maximum is exceeded. It returns the result and the libc error code if there is one. +// +// In order to call this function properly make sure to follow all the rules specified in [unsafe.Pointer] +// especially point 4. +// +// NOTE: SyscallN does not properly call functions that have both integer and float parameters. +// See discussion comment https://github.com/ebiten/purego/pull/1#issuecomment-1128057607 +// for an explanation of why that is. +// +// On amd64, if there are more than 8 floats the 9th and so on will be placed incorrectly on the +// stack. +// +// The pragma go:nosplit is not needed at this function declaration because it uses go:uintptrescapes +// which forces all the objects that the uintptrs point to onto the heap where a stack split won't affect +// their memory location. +// +//go:uintptrescapes +func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + if fn == 0 { + panic("purego: fn is nil") + } + if len(args) > maxArgs { + panic("purego: too many arguments to SyscallN") + } + + // Windows uses syscall.SyscallN in syscall_windows.go. + if runtime.GOOS == "windows" { + return syscall_syscallN(fn, args...) + } + + syscall := thePool.Get().(*syscall15Args) + defer thePool.Put(syscall) + *syscall = syscall15Args{} + + var tmp [maxArgs]uintptr + copy(tmp[:], args) + var floats [maxArgs]uintptr + copy(floats[:], tmp[:]) + syscall.Set(fn, tmp[:], floats[:], 0) + runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) + return syscall.a1, syscall.a2, syscall.a3 +} diff --git a/syscall_stackargs.go b/syscall_stackargs.go deleted file mode 100644 index f7e46790..00000000 --- a/syscall_stackargs.go +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2026 The Ebitengine Authors - -//go:build ppc64le || s390x - -package purego - -import "unsafe" - -// callbackArgs is the argument block passed from the assembly trampoline -// to callbackWrap when C code calls a Go callback registered with NewCallback. -// The assembly fills in the fields before calling callbackWrap, which uses -// them to determine which Go function to invoke and where to read its -// arguments from, and writes the return value back into result. -// -// callbackArgs is only used on Unix. On Windows, callbacks are handled by -// the runtime's own callback mechanism, so this type is compiled but unused, -// serving only as a stub to satisfy cross-platform compilation. -type callbackArgs struct { - index uintptr - // args points to the argument block. - // - // The structure of the arguments goes - // float registers followed by the - // integer registers followed by the stack. - // - // This variable is treated as a contiguous - // block of memory containing all of the arguments - // for this callback. - args unsafe.Pointer - // Below are out-args from callbackWrap - result [1]uintptr - // stackArgs points to stack-passed arguments for architectures where - // they can't be made contiguous with register args (e.g., ppc64le). - // On other architectures, this is nil and stack args are read from - // the end of the args block. - stackArgs unsafe.Pointer -} - -func (c *callbackArgs) stackFrame() unsafe.Pointer { - return c.stackArgs -} diff --git a/syscall_stackargs_ppc64le.go b/syscall_stackargs_ppc64le.go new file mode 100644 index 00000000..48e5423b --- /dev/null +++ b/syscall_stackargs_ppc64le.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +//go:build ppc64le + +package purego + +import "unsafe" + +// callbackArgs is the argument block passed from the assembly trampoline +// to callbackWrap when C code calls a Go callback registered with NewCallback. +type callbackArgs struct { + index uintptr + // args points to the argument block. + // + // The structure of the arguments goes + // float registers followed by the + // integer registers. + args unsafe.Pointer + // Below are out-args from callbackWrap. + result [1]uintptr + // stackArgs points to stack-passed arguments. + stackArgs unsafe.Pointer +} + +func (c *callbackArgs) stackFrame() unsafe.Pointer { + return c.stackArgs +} + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_stackargs_s390x.go b/syscall_stackargs_s390x.go new file mode 100644 index 00000000..b8af24d9 --- /dev/null +++ b/syscall_stackargs_s390x.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +//go:build s390x + +package purego + +import "unsafe" + +// callbackArgs is the argument block passed from the assembly trampoline +// to callbackWrap when C code calls a Go callback registered with NewCallback. +type callbackArgs struct { + index uintptr + // args points to the argument block. + // + // The structure of the arguments goes + // float registers followed by the + // integer registers followed by the stack. + args unsafe.Pointer + // Below are out-args from callbackWrap. + result [1]uintptr + // stackArgs points to stack-passed arguments. + stackArgs unsafe.Pointer +} + +func (c *callbackArgs) stackFrame() unsafe.Pointer { + return c.stackArgs +} + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_unix.go b/syscall_unix.go index 51ea8be7..508de3bd 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -140,10 +140,14 @@ func callbackWrap(a *callbackArgs) { // stackFrame points to stack-passed arguments. On most architectures this is // contiguous with frame (after register args), but on ppc64le it's separate. var stackFrame *[callbackMaxFrame]uintptr + var intFrame *[callbackMaxFrame]uintptr if sf := a.stackFrame(); sf != nil { // Only ppc64le uses separate stackArgs pointer due to NOSPLIT constraints stackFrame = (*[callbackMaxFrame]uintptr)(sf) } + if intf := a.intFrame(); intf != nil { + intFrame = (*[callbackMaxFrame]uintptr)(intf) + } // floatsN and intsN track the number of register slots used, not argument count. // This distinction matters on ARM32 where float64 uses 2 slots (32-bit registers). var floatsN int @@ -151,7 +155,7 @@ func callbackWrap(a *callbackArgs) { // On amd64/loong64/ppc64le/riscv64/s390x, when returning a struct larger than // maxRegAllocStructSize, the caller passes a hidden pointer in the first integer // register. Skip it to avoid misreading it as the first function argument. - if (runtime.GOARCH == "amd64" || runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") && + if (runtime.GOARCH == "amd64" || runtime.GOARCH == "loong64" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") && fnType.NumOut() == 1 && fnType.Out(0).Kind() == reflect.Struct && fnType.Out(0).Size() > maxRegAllocStructSize { intsN = 1 @@ -198,10 +202,6 @@ func callbackWrap(a *callbackArgs) { } else { args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[floatsN])).Elem() } - if runtime.GOARCH == "ppc64le" { - // ELFv2: each FPR-passed float also consumes a stack slot. - stackSlot += slots - } } floatsN += slots case reflect.Struct: @@ -235,17 +235,17 @@ func callbackWrap(a *callbackArgs) { stackSlot += slots } } else { - // the integers begin after the floats in frame - pos := intsN + numOfFloatRegisters() - if runtime.GOARCH == "s390x" { - // s390x big-endian: sub-8-byte values are right-justified in GPR slot - args[i] = callbackArgFromSlotBigEndian(unsafe.Pointer(&frame[pos]), inType) + if intFrame != nil { + args[i] = reflect.NewAt(inType, unsafe.Pointer(&intFrame[intsN])).Elem() } else { - args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[pos])).Elem() - } - if runtime.GOARCH == "ppc64le" { - // ELFv2: each GPR-passed int also consumes a stack slot. - stackSlot += slots + // the integers begin after the floats in frame + pos := intsN + numOfFloatRegisters() + if runtime.GOARCH == "s390x" { + // s390x big-endian: sub-8-byte values are right-justified in GPR slot + args[i] = callbackArgFromSlotBigEndian(unsafe.Pointer(&frame[pos]), inType) + } else { + args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[pos])).Elem() + } } } intsN += slots From 2402f067362b722e1a9463752e0e84f236636eae Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Mon, 16 Mar 2026 10:34:37 -0700 Subject: [PATCH 10/21] purego: rename syscall.go to syscall_64bit.go --- syscall.go => syscall_64bit.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename syscall.go => syscall_64bit.go (100%) diff --git a/syscall.go b/syscall_64bit.go similarity index 100% rename from syscall.go rename to syscall_64bit.go From 475e000337971a24dd367a3d986e3536894030ee Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Mon, 16 Mar 2026 14:43:05 -0700 Subject: [PATCH 11/21] purego: clean up syscall build tags --- syscall_64bit.go | 2 +- syscall_stackargs_ppc64le.go | 2 -- syscall_stackargs_s390x.go | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/syscall_64bit.go b/syscall_64bit.go index b2c5cabe..b22032e3 100644 --- a/syscall_64bit.go +++ b/syscall_64bit.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build (!386 && !arm && windows) || ((amd64 || arm64) && (darwin || freebsd || linux || netbsd)) || (linux && (loong64 || riscv64 || s390x)) +//go:build ((amd64 || arm64) && (darwin || freebsd || linux || netbsd || windows)) || (linux && (loong64 || riscv64 || s390x)) package purego diff --git a/syscall_stackargs_ppc64le.go b/syscall_stackargs_ppc64le.go index 48e5423b..9eb41315 100644 --- a/syscall_stackargs_ppc64le.go +++ b/syscall_stackargs_ppc64le.go @@ -1,8 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2026 The Ebitengine Authors -//go:build ppc64le - package purego import "unsafe" diff --git a/syscall_stackargs_s390x.go b/syscall_stackargs_s390x.go index b8af24d9..399985a5 100644 --- a/syscall_stackargs_s390x.go +++ b/syscall_stackargs_s390x.go @@ -1,8 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2026 The Ebitengine Authors -//go:build s390x - package purego import "unsafe" From 02d268bde29bec64602003865c13a668ebcfaa59 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 18 Mar 2026 00:46:03 -0700 Subject: [PATCH 12/21] purego: clean up build tags --- func.go | 4 +--- syscall_64bit.go | 2 +- syscall_ppc64le.go | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/func.go b/func.go index 5c6d35c5..dfe4aa06 100644 --- a/func.go +++ b/func.go @@ -513,9 +513,7 @@ func roundUpTo8(val uintptr) uintptr { func numOfFloatRegisters() int { switch runtime.GOARCH { - case "ppc64le": - return 8 - case "amd64", "arm64", "loong64", "riscv64": + case "amd64", "arm64", "loong64", "ppc64le", "riscv64": return 8 case "s390x": return 4 diff --git a/syscall_64bit.go b/syscall_64bit.go index b22032e3..bd9659db 100644 --- a/syscall_64bit.go +++ b/syscall_64bit.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build ((amd64 || arm64) && (darwin || freebsd || linux || netbsd || windows)) || (linux && (loong64 || riscv64 || s390x)) +//go:build (amd64 || arm64 || loong64 || riscv64 || s390x) && (darwin || freebsd || linux || netbsd || windows) package purego diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go index d55d3aa9..f5fdb65c 100644 --- a/syscall_ppc64le.go +++ b/syscall_ppc64le.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build linux && ppc64le +//go:build linux package purego From aa53ffcbc5bcd7615a76f23798e981449c2a3e60 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 18 Mar 2026 19:52:06 -0700 Subject: [PATCH 13/21] purego: keep 64bit handling code in syscall.go --- syscall_64bit.go => syscall.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename syscall_64bit.go => syscall.go (100%) diff --git a/syscall_64bit.go b/syscall.go similarity index 100% rename from syscall_64bit.go rename to syscall.go From 306800e3ee94f79e42949a3c69f29db75ea3651d Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 18 Mar 2026 21:25:56 -0700 Subject: [PATCH 14/21] syscall: keep upstream build constraint --- syscall.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syscall.go b/syscall.go index bd9659db..89720e3d 100644 --- a/syscall.go +++ b/syscall.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build (amd64 || arm64 || loong64 || riscv64 || s390x) && (darwin || freebsd || linux || netbsd || windows) +//go:build !386 && !arm && (darwin || freebsd || linux || netbsd || windows) package purego From dabbc2b16118d66e69d6890eaf4406a05ae878fb Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Fri, 20 Mar 2026 13:28:09 -0700 Subject: [PATCH 15/21] syscall: exclude ppc64le in build tags for syscall.go --- syscall.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syscall.go b/syscall.go index 89720e3d..4b1dedd2 100644 --- a/syscall.go +++ b/syscall.go @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build !386 && !arm && (darwin || freebsd || linux || netbsd || windows) +//go:build !386 && !arm && !ppc64le && (darwin || freebsd || linux || netbsd || windows) package purego From fa112907c5ee6cde158fda57527e0c8e0c57ef39 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Thu, 26 Mar 2026 16:43:30 -0700 Subject: [PATCH 16/21] syscall_ppc64le: omit windows GOOS check --- syscall_ppc64le.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go index f5fdb65c..320fa67c 100644 --- a/syscall_ppc64le.go +++ b/syscall_ppc64le.go @@ -6,7 +6,6 @@ package purego import ( - "runtime" "unsafe" ) @@ -75,11 +74,6 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { panic("purego: too many arguments to SyscallN") } - // Windows uses syscall.SyscallN in syscall_windows.go. - if runtime.GOOS == "windows" { - return syscall_syscallN(fn, args...) - } - syscall := thePool.Get().(*syscall15Args) defer thePool.Put(syscall) *syscall = syscall15Args{} From b9c5f0ab0565c266c3cc3388e1f07e44facb877c Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Sun, 12 Apr 2026 13:39:55 -0700 Subject: [PATCH 17/21] purego: address review feedback - Rename syscall15Args to syscallArgs, syscall15X to syscallX, syscall15XABI0 to syscallXABI0 throughout Go and assembly files - Remove unused syscall_syscall15X functions from syscall_unix.go, syscall_windows.go, and syscall_cgo_linux.go - Remove unnecessary float fields f9-f13 from 64-bit syscallArgs struct (only f1-f8 are used by 64-bit assembly) - Replace Set method with syscall_SyscallN function that handles pool acquisition, struct population, and cgocall in one step - Simplify func.go call site to use syscall_SyscallN directly --- func.go | 22 +++---- internal/cgo/syscall_cgo_unix.go | 14 ++-- struct_386.go | 2 +- struct_amd64.go | 2 +- struct_arm.go | 2 +- struct_arm64.go | 2 +- struct_loong64.go | 2 +- struct_ppc64le.go | 2 +- struct_riscv64.go | 2 +- struct_s390x.go | 2 +- sys_386.s | 86 ++++++++++++------------ sys_amd64.s | 106 ++++++++++++++--------------- sys_arm.s | 108 +++++++++++++++--------------- sys_arm64.s | 110 +++++++++++++++---------------- sys_loong64.s | 106 ++++++++++++++--------------- sys_ppc64le.s | 68 +++++++++---------- sys_riscv64.s | 104 ++++++++++++++--------------- sys_s390x.s | 94 +++++++++++++------------- syscall.go | 81 +++++++---------------- syscall_32bit.go | 83 +++++++---------------- syscall_cgo_linux.go | 7 +- syscall_ppc64le.go | 52 +++++---------- syscall_unix.go | 17 +---- syscall_windows.go | 6 +- 24 files changed, 482 insertions(+), 598 deletions(-) diff --git a/func.go b/func.go index dfe4aa06..c6d1150f 100644 --- a/func.go +++ b/func.go @@ -23,7 +23,7 @@ const ( ) var thePool = sync.Pool{New: func() any { - return new(syscall15Args) + return new(syscallArgs) }} // RegisterLibFunc is a wrapper around RegisterFunc that uses the C function returned from Dlsym(handle, name). @@ -320,22 +320,16 @@ func RegisterFunc(fptr any, cfn uintptr) { keepAlive = addValue(v, keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack) } - syscall := thePool.Get().(*syscall15Args) - defer thePool.Put(syscall) - - if runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x" { - syscall.Set(cfn, sysargs[:], floats[:], 0) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - } else if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { - // Use the normal arm64 calling convention even on Windows - syscall.Set(cfn, sysargs[:], floats[:], arm64_r8) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - } else { - *syscall = syscall15Args{} - // This is a fallback for Windows amd64, 386, and arm. + var syscall *syscallArgs + if runtime.GOOS == "windows" && runtime.GOARCH != "arm64" { + // Windows amd64, 386, and arm use syscall.SyscallN. + syscall = thePool.Get().(*syscallArgs) syscall.a1, syscall.a2, _ = syscall_syscallN(cfn, sysargs[:numStack]...) syscall.f1 = syscall.a2 // on amd64 a2 stores the float return. On 32bit platforms floats aren't support + } else { + syscall = syscall_SyscallN(cfn, sysargs[:], floats[:], arm64_r8) } + defer thePool.Put(syscall) if ty.NumOut() == 0 { return nil } diff --git a/internal/cgo/syscall_cgo_unix.go b/internal/cgo/syscall_cgo_unix.go index 1e39de3b..1b061dc3 100644 --- a/internal/cgo/syscall_cgo_unix.go +++ b/internal/cgo/syscall_cgo_unix.go @@ -16,14 +16,14 @@ package cgo #include #include -typedef struct syscall15Args { +typedef struct syscallArgs { uintptr_t fn; uintptr_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; uintptr_t f1, f2, f3, f4, f5, f6, f7, f8; uintptr_t err; -} syscall15Args; +} syscallArgs; -void syscall15(struct syscall15Args *args) { +void syscall15(struct syscallArgs *args) { assert((args->f1|args->f2|args->f3|args->f4|args->f5|args->f6|args->f7|args->f8) == 0); uintptr_t (*func_name)(uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, @@ -39,12 +39,12 @@ void syscall15(struct syscall15Args *args) { import "C" import "unsafe" -// assign purego.syscall15XABI0 to the C version of this function. -var Syscall15XABI0 = unsafe.Pointer(C.syscall15) +// assign purego.syscallXABI0 to the C version of this function. +var SyscallXABI0 = unsafe.Pointer(C.syscall15) //go:nosplit -func Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - args := C.syscall15Args{ +func SyscallX(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + args := C.syscallArgs{ C.uintptr_t(fn), C.uintptr_t(a1), C.uintptr_t(a2), C.uintptr_t(a3), C.uintptr_t(a4), C.uintptr_t(a5), C.uintptr_t(a6), C.uintptr_t(a7), C.uintptr_t(a8), C.uintptr_t(a9), C.uintptr_t(a10), C.uintptr_t(a11), C.uintptr_t(a12), diff --git a/struct_386.go b/struct_386.go index a4621883..d34fa2cc 100644 --- a/struct_386.go +++ b/struct_386.go @@ -12,7 +12,7 @@ func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFl panic("purego: struct arguments are not supported") } -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { panic("purego: struct returns are not supported") } diff --git a/struct_amd64.go b/struct_amd64.go index 7ca50c59..40ed91c0 100644 --- a/struct_amd64.go +++ b/struct_amd64.go @@ -10,7 +10,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_arm.go b/struct_arm.go index 6519e4ad..bfaac841 100644 --- a/struct_arm.go +++ b/struct_arm.go @@ -28,7 +28,7 @@ func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFl return keepAlive } -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() if outSize == 0 { return reflect.New(outType).Elem() diff --git a/struct_arm64.go b/struct_arm64.go index 3a04828c..285c756b 100644 --- a/struct_arm64.go +++ b/struct_arm64.go @@ -14,7 +14,7 @@ import ( "github.com/ebitengine/purego/internal/strings" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_loong64.go b/struct_loong64.go index 0464cd76..7a8a7c7c 100644 --- a/struct_loong64.go +++ b/struct_loong64.go @@ -9,7 +9,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_ppc64le.go b/struct_ppc64le.go index f781ae7f..0cb481e4 100644 --- a/struct_ppc64le.go +++ b/struct_ppc64le.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/struct_riscv64.go b/struct_riscv64.go index c377c445..aa4e50b6 100644 --- a/struct_riscv64.go +++ b/struct_riscv64.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/struct_s390x.go b/struct_s390x.go index 48cc1730..6e15d415 100644 --- a/struct_s390x.go +++ b/struct_s390x.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/sys_386.s b/sys_386.s index 82931413..476f5631 100644 --- a/sys_386.s +++ b/sys_386.s @@ -10,8 +10,8 @@ #define STACK_SIZE 160 #define PTR_ADDRESS (STACK_SIZE - 4) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -22,18 +22,18 @@ // f16 uintptr // arm64_r8 uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). // // On i386 System V ABI, all arguments are passed on the stack. // Return value is in EAX (and EDX for 64-bit values). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $4 -DATA ·syscall15XABI0(SB)/4, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $4 +DATA ·syscallXABI0(SB)/4, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT|NOFRAME, $0-0 // Called via C calling convention: argument pointer at 4(SP) // NOT via Go calling convention // On i386, the first argument is at 4(SP) after CALL pushes return address - MOVL 4(SP), AX // get pointer to syscall15Args + MOVL 4(SP), AX // get pointer to syscallArgs // Save callee-saved registers PUSHL BP @@ -50,75 +50,75 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 MOVL BX, PTR_ADDRESS(SP) // save args pointer // Load function pointer - MOVL syscall15Args_fn(BX), AX + MOVL syscallArgs_fn(BX), AX MOVL AX, (PTR_ADDRESS-4)(SP) // save fn pointer // Push all integer arguments onto the stack (a1-a32) // i386 SysV ABI: arguments pushed right-to-left, but we're // setting up the stack from low to high addresses - MOVL syscall15Args_a1(BX), AX + MOVL syscallArgs_a1(BX), AX MOVL AX, 0(SP) - MOVL syscall15Args_a2(BX), AX + MOVL syscallArgs_a2(BX), AX MOVL AX, 4(SP) - MOVL syscall15Args_a3(BX), AX + MOVL syscallArgs_a3(BX), AX MOVL AX, 8(SP) - MOVL syscall15Args_a4(BX), AX + MOVL syscallArgs_a4(BX), AX MOVL AX, 12(SP) - MOVL syscall15Args_a5(BX), AX + MOVL syscallArgs_a5(BX), AX MOVL AX, 16(SP) - MOVL syscall15Args_a6(BX), AX + MOVL syscallArgs_a6(BX), AX MOVL AX, 20(SP) - MOVL syscall15Args_a7(BX), AX + MOVL syscallArgs_a7(BX), AX MOVL AX, 24(SP) - MOVL syscall15Args_a8(BX), AX + MOVL syscallArgs_a8(BX), AX MOVL AX, 28(SP) - MOVL syscall15Args_a9(BX), AX + MOVL syscallArgs_a9(BX), AX MOVL AX, 32(SP) - MOVL syscall15Args_a10(BX), AX + MOVL syscallArgs_a10(BX), AX MOVL AX, 36(SP) - MOVL syscall15Args_a11(BX), AX + MOVL syscallArgs_a11(BX), AX MOVL AX, 40(SP) - MOVL syscall15Args_a12(BX), AX + MOVL syscallArgs_a12(BX), AX MOVL AX, 44(SP) - MOVL syscall15Args_a13(BX), AX + MOVL syscallArgs_a13(BX), AX MOVL AX, 48(SP) - MOVL syscall15Args_a14(BX), AX + MOVL syscallArgs_a14(BX), AX MOVL AX, 52(SP) - MOVL syscall15Args_a15(BX), AX + MOVL syscallArgs_a15(BX), AX MOVL AX, 56(SP) - MOVL syscall15Args_a16(BX), AX + MOVL syscallArgs_a16(BX), AX MOVL AX, 60(SP) - MOVL syscall15Args_a17(BX), AX + MOVL syscallArgs_a17(BX), AX MOVL AX, 64(SP) - MOVL syscall15Args_a18(BX), AX + MOVL syscallArgs_a18(BX), AX MOVL AX, 68(SP) - MOVL syscall15Args_a19(BX), AX + MOVL syscallArgs_a19(BX), AX MOVL AX, 72(SP) - MOVL syscall15Args_a20(BX), AX + MOVL syscallArgs_a20(BX), AX MOVL AX, 76(SP) - MOVL syscall15Args_a21(BX), AX + MOVL syscallArgs_a21(BX), AX MOVL AX, 80(SP) - MOVL syscall15Args_a22(BX), AX + MOVL syscallArgs_a22(BX), AX MOVL AX, 84(SP) - MOVL syscall15Args_a23(BX), AX + MOVL syscallArgs_a23(BX), AX MOVL AX, 88(SP) - MOVL syscall15Args_a24(BX), AX + MOVL syscallArgs_a24(BX), AX MOVL AX, 92(SP) - MOVL syscall15Args_a25(BX), AX + MOVL syscallArgs_a25(BX), AX MOVL AX, 96(SP) - MOVL syscall15Args_a26(BX), AX + MOVL syscallArgs_a26(BX), AX MOVL AX, 100(SP) - MOVL syscall15Args_a27(BX), AX + MOVL syscallArgs_a27(BX), AX MOVL AX, 104(SP) - MOVL syscall15Args_a28(BX), AX + MOVL syscallArgs_a28(BX), AX MOVL AX, 108(SP) - MOVL syscall15Args_a29(BX), AX + MOVL syscallArgs_a29(BX), AX MOVL AX, 112(SP) - MOVL syscall15Args_a30(BX), AX + MOVL syscallArgs_a30(BX), AX MOVL AX, 116(SP) - MOVL syscall15Args_a31(BX), AX + MOVL syscallArgs_a31(BX), AX MOVL AX, 120(SP) - MOVL syscall15Args_a32(BX), AX + MOVL syscallArgs_a32(BX), AX MOVL AX, 124(SP) // Call the C function @@ -127,13 +127,13 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 // Get args pointer back and save results MOVL PTR_ADDRESS(SP), BX - MOVL AX, syscall15Args_a1(BX) // return value r1 - MOVL DX, syscall15Args_a2(BX) // return value r2 (for 64-bit returns) + MOVL AX, syscallArgs_a1(BX) // return value r1 + MOVL DX, syscallArgs_a2(BX) // return value r2 (for 64-bit returns) // Save x87 FPU return value (ST0) to f1 field // On i386 System V ABI, float/double returns are in ST(0) // We save as float64 (8 bytes) to preserve precision - FMOVDP F0, syscall15Args_f1(BX) + FMOVDP F0, syscallArgs_f1(BX) // Clean up stack ADDL $STACK_SIZE, SP diff --git a/sys_amd64.s b/sys_amd64.s index 9a1da9f6..c2a14c89 100644 --- a/sys_amd64.s +++ b/sys_amd64.s @@ -11,8 +11,8 @@ #define STACK_SIZE 224 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -34,99 +34,99 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $STACK_SIZE +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $STACK_SIZE MOVQ DI, PTR_ADDRESS(SP) // save the pointer MOVQ DI, R11 - MOVQ syscall15Args_f1(R11), X0 // f1 - MOVQ syscall15Args_f2(R11), X1 // f2 - MOVQ syscall15Args_f3(R11), X2 // f3 - MOVQ syscall15Args_f4(R11), X3 // f4 - MOVQ syscall15Args_f5(R11), X4 // f5 - MOVQ syscall15Args_f6(R11), X5 // f6 - MOVQ syscall15Args_f7(R11), X6 // f7 - MOVQ syscall15Args_f8(R11), X7 // f8 - - MOVQ syscall15Args_a1(R11), DI // a1 - MOVQ syscall15Args_a2(R11), SI // a2 - MOVQ syscall15Args_a3(R11), DX // a3 - MOVQ syscall15Args_a4(R11), CX // a4 - MOVQ syscall15Args_a5(R11), R8 // a5 - MOVQ syscall15Args_a6(R11), R9 // a6 + MOVQ syscallArgs_f1(R11), X0 // f1 + MOVQ syscallArgs_f2(R11), X1 // f2 + MOVQ syscallArgs_f3(R11), X2 // f3 + MOVQ syscallArgs_f4(R11), X3 // f4 + MOVQ syscallArgs_f5(R11), X4 // f5 + MOVQ syscallArgs_f6(R11), X5 // f6 + MOVQ syscallArgs_f7(R11), X6 // f7 + MOVQ syscallArgs_f8(R11), X7 // f8 + + MOVQ syscallArgs_a1(R11), DI // a1 + MOVQ syscallArgs_a2(R11), SI // a2 + MOVQ syscallArgs_a3(R11), DX // a3 + MOVQ syscallArgs_a4(R11), CX // a4 + MOVQ syscallArgs_a5(R11), R8 // a5 + MOVQ syscallArgs_a6(R11), R9 // a6 // push the remaining parameters onto the stack - MOVQ syscall15Args_a7(R11), R12 + MOVQ syscallArgs_a7(R11), R12 MOVQ R12, 0(SP) // push a7 - MOVQ syscall15Args_a8(R11), R12 + MOVQ syscallArgs_a8(R11), R12 MOVQ R12, 8(SP) // push a8 - MOVQ syscall15Args_a9(R11), R12 + MOVQ syscallArgs_a9(R11), R12 MOVQ R12, 16(SP) // push a9 - MOVQ syscall15Args_a10(R11), R12 + MOVQ syscallArgs_a10(R11), R12 MOVQ R12, 24(SP) // push a10 - MOVQ syscall15Args_a11(R11), R12 + MOVQ syscallArgs_a11(R11), R12 MOVQ R12, 32(SP) // push a11 - MOVQ syscall15Args_a12(R11), R12 + MOVQ syscallArgs_a12(R11), R12 MOVQ R12, 40(SP) // push a12 - MOVQ syscall15Args_a13(R11), R12 + MOVQ syscallArgs_a13(R11), R12 MOVQ R12, 48(SP) // push a13 - MOVQ syscall15Args_a14(R11), R12 + MOVQ syscallArgs_a14(R11), R12 MOVQ R12, 56(SP) // push a14 - MOVQ syscall15Args_a15(R11), R12 + MOVQ syscallArgs_a15(R11), R12 MOVQ R12, 64(SP) // push a15 - MOVQ syscall15Args_a16(R11), R12 + MOVQ syscallArgs_a16(R11), R12 MOVQ R12, 72(SP) // push a16 - MOVQ syscall15Args_a17(R11), R12 + MOVQ syscallArgs_a17(R11), R12 MOVQ R12, 80(SP) // push a17 - MOVQ syscall15Args_a18(R11), R12 + MOVQ syscallArgs_a18(R11), R12 MOVQ R12, 88(SP) // push a18 - MOVQ syscall15Args_a19(R11), R12 + MOVQ syscallArgs_a19(R11), R12 MOVQ R12, 96(SP) // push a19 - MOVQ syscall15Args_a20(R11), R12 + MOVQ syscallArgs_a20(R11), R12 MOVQ R12, 104(SP) // push a20 - MOVQ syscall15Args_a21(R11), R12 + MOVQ syscallArgs_a21(R11), R12 MOVQ R12, 112(SP) // push a21 - MOVQ syscall15Args_a22(R11), R12 + MOVQ syscallArgs_a22(R11), R12 MOVQ R12, 120(SP) // push a22 - MOVQ syscall15Args_a23(R11), R12 + MOVQ syscallArgs_a23(R11), R12 MOVQ R12, 128(SP) // push a23 - MOVQ syscall15Args_a24(R11), R12 + MOVQ syscallArgs_a24(R11), R12 MOVQ R12, 136(SP) // push a24 - MOVQ syscall15Args_a25(R11), R12 + MOVQ syscallArgs_a25(R11), R12 MOVQ R12, 144(SP) // push a25 - MOVQ syscall15Args_a26(R11), R12 + MOVQ syscallArgs_a26(R11), R12 MOVQ R12, 152(SP) // push a26 - MOVQ syscall15Args_a27(R11), R12 + MOVQ syscallArgs_a27(R11), R12 MOVQ R12, 160(SP) // push a27 - MOVQ syscall15Args_a28(R11), R12 + MOVQ syscallArgs_a28(R11), R12 MOVQ R12, 168(SP) // push a28 - MOVQ syscall15Args_a29(R11), R12 + MOVQ syscallArgs_a29(R11), R12 MOVQ R12, 176(SP) // push a29 - MOVQ syscall15Args_a30(R11), R12 + MOVQ syscallArgs_a30(R11), R12 MOVQ R12, 184(SP) // push a30 - MOVQ syscall15Args_a31(R11), R12 + MOVQ syscallArgs_a31(R11), R12 MOVQ R12, 192(SP) // push a31 - MOVQ syscall15Args_a32(R11), R12 + MOVQ syscallArgs_a32(R11), R12 MOVQ R12, 200(SP) // push a32 XORL AX, AX // vararg: say "no float args" - MOVQ syscall15Args_fn(R11), R10 // fn + MOVQ syscallArgs_fn(R11), R10 // fn CALL R10 MOVQ PTR_ADDRESS(SP), DI // get the pointer back - MOVQ AX, syscall15Args_a1(DI) // r1 - MOVQ DX, syscall15Args_a2(DI) // r2 - MOVQ X0, syscall15Args_f1(DI) // f1 - MOVQ X1, syscall15Args_f2(DI) // f2 + MOVQ AX, syscallArgs_a1(DI) // r1 + MOVQ DX, syscallArgs_a2(DI) // r2 + MOVQ X0, syscallArgs_f1(DI) // f1 + MOVQ X1, syscallArgs_f2(DI) // f2 #ifdef GOOS_darwin CALL purego_error(SB) MOVQ PTR_ADDRESS(SP), DI // reload (DI clobbered by call) MOVQ (AX), AX - MOVQ AX, syscall15Args_a3(DI) // save errno + MOVQ AX, syscallArgs_a3(DI) // save errno #endif XORL AX, AX // no error (it's ignored anyway) diff --git a/sys_arm.s b/sys_arm.s index 3a8ce0d0..f1ea44a2 100644 --- a/sys_arm.s +++ b/sys_arm.s @@ -10,8 +10,8 @@ #define STACK_SIZE 128 #define PTR_ADDRESS (STACK_SIZE - 4) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -22,12 +22,12 @@ // f16 uintptr // arm64_r8 uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $4 -DATA ·syscall15XABI0(SB)/4, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 - // Called via C calling convention: R0 = pointer to syscall15Args +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $4 +DATA ·syscallXABI0(SB)/4, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT|NOFRAME, $0-0 + // Called via C calling convention: R0 = pointer to syscallArgs // NOT via Go calling convention // Save link register and callee-saved registers first MOVW.W R14, -4(R13) // save LR (decrement and store) @@ -38,82 +38,82 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 MOVW R8, PTR_ADDRESS(R13) // Load function pointer first (before anything can corrupt R8) - MOVW syscall15Args_fn(R8), R5 + MOVW syscallArgs_fn(R8), R5 MOVW R5, (PTR_ADDRESS-4)(R13) // save fn at offset 56 // Load floating point arguments // Each float64 spans 2 uintptr slots (8 bytes) on ARM32, so we skip by 2 - MOVD syscall15Args_f1(R8), F0 // f1+f2 -> D0 - MOVD syscall15Args_f3(R8), F1 // f3+f4 -> D1 - MOVD syscall15Args_f5(R8), F2 // f5+f6 -> D2 - MOVD syscall15Args_f7(R8), F3 // f7+f8 -> D3 - MOVD syscall15Args_f9(R8), F4 // f9+f10 -> D4 - MOVD syscall15Args_f11(R8), F5 // f11+f12 -> D5 - MOVD syscall15Args_f13(R8), F6 // f13+f14 -> D6 - MOVD syscall15Args_f15(R8), F7 // f15+f16 -> D7 + MOVD syscallArgs_f1(R8), F0 // f1+f2 -> D0 + MOVD syscallArgs_f3(R8), F1 // f3+f4 -> D1 + MOVD syscallArgs_f5(R8), F2 // f5+f6 -> D2 + MOVD syscallArgs_f7(R8), F3 // f7+f8 -> D3 + MOVD syscallArgs_f9(R8), F4 // f9+f10 -> D4 + MOVD syscallArgs_f11(R8), F5 // f11+f12 -> D5 + MOVD syscallArgs_f13(R8), F6 // f13+f14 -> D6 + MOVD syscallArgs_f15(R8), F7 // f15+f16 -> D7 // Load integer arguments into registers (R0-R3 for ARM EABI) - MOVW syscall15Args_a1(R8), R0 // a1 - MOVW syscall15Args_a2(R8), R1 // a2 - MOVW syscall15Args_a3(R8), R2 // a3 - MOVW syscall15Args_a4(R8), R3 // a4 + MOVW syscallArgs_a1(R8), R0 // a1 + MOVW syscallArgs_a2(R8), R1 // a2 + MOVW syscallArgs_a3(R8), R2 // a3 + MOVW syscallArgs_a4(R8), R3 // a4 // push a5-a32 onto stack - MOVW syscall15Args_a5(R8), R4 + MOVW syscallArgs_a5(R8), R4 MOVW R4, 0(R13) - MOVW syscall15Args_a6(R8), R4 + MOVW syscallArgs_a6(R8), R4 MOVW R4, 4(R13) - MOVW syscall15Args_a7(R8), R4 + MOVW syscallArgs_a7(R8), R4 MOVW R4, 8(R13) - MOVW syscall15Args_a8(R8), R4 + MOVW syscallArgs_a8(R8), R4 MOVW R4, 12(R13) - MOVW syscall15Args_a9(R8), R4 + MOVW syscallArgs_a9(R8), R4 MOVW R4, 16(R13) - MOVW syscall15Args_a10(R8), R4 + MOVW syscallArgs_a10(R8), R4 MOVW R4, 20(R13) - MOVW syscall15Args_a11(R8), R4 + MOVW syscallArgs_a11(R8), R4 MOVW R4, 24(R13) - MOVW syscall15Args_a12(R8), R4 + MOVW syscallArgs_a12(R8), R4 MOVW R4, 28(R13) - MOVW syscall15Args_a13(R8), R4 + MOVW syscallArgs_a13(R8), R4 MOVW R4, 32(R13) - MOVW syscall15Args_a14(R8), R4 + MOVW syscallArgs_a14(R8), R4 MOVW R4, 36(R13) - MOVW syscall15Args_a15(R8), R4 + MOVW syscallArgs_a15(R8), R4 MOVW R4, 40(R13) - MOVW syscall15Args_a16(R8), R4 + MOVW syscallArgs_a16(R8), R4 MOVW R4, 44(R13) - MOVW syscall15Args_a17(R8), R4 + MOVW syscallArgs_a17(R8), R4 MOVW R4, 48(R13) - MOVW syscall15Args_a18(R8), R4 + MOVW syscallArgs_a18(R8), R4 MOVW R4, 52(R13) - MOVW syscall15Args_a19(R8), R4 + MOVW syscallArgs_a19(R8), R4 MOVW R4, 56(R13) - MOVW syscall15Args_a20(R8), R4 + MOVW syscallArgs_a20(R8), R4 MOVW R4, 60(R13) - MOVW syscall15Args_a21(R8), R4 + MOVW syscallArgs_a21(R8), R4 MOVW R4, 64(R13) - MOVW syscall15Args_a22(R8), R4 + MOVW syscallArgs_a22(R8), R4 MOVW R4, 68(R13) - MOVW syscall15Args_a23(R8), R4 + MOVW syscallArgs_a23(R8), R4 MOVW R4, 72(R13) - MOVW syscall15Args_a24(R8), R4 + MOVW syscallArgs_a24(R8), R4 MOVW R4, 76(R13) - MOVW syscall15Args_a25(R8), R4 + MOVW syscallArgs_a25(R8), R4 MOVW R4, 80(R13) - MOVW syscall15Args_a26(R8), R4 + MOVW syscallArgs_a26(R8), R4 MOVW R4, 84(R13) - MOVW syscall15Args_a27(R8), R4 + MOVW syscallArgs_a27(R8), R4 MOVW R4, 88(R13) - MOVW syscall15Args_a28(R8), R4 + MOVW syscallArgs_a28(R8), R4 MOVW R4, 92(R13) - MOVW syscall15Args_a29(R8), R4 + MOVW syscallArgs_a29(R8), R4 MOVW R4, 96(R13) - MOVW syscall15Args_a30(R8), R4 + MOVW syscallArgs_a30(R8), R4 MOVW R4, 100(R13) - MOVW syscall15Args_a31(R8), R4 + MOVW syscallArgs_a31(R8), R4 MOVW R4, 104(R13) - MOVW syscall15Args_a32(R8), R4 + MOVW syscallArgs_a32(R8), R4 MOVW R4, 108(R13) // Load saved function pointer and call @@ -128,14 +128,14 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 ADD $STACK_SIZE, R13 // save R0, R1 - MOVW R0, syscall15Args_a1(R8) - MOVW R1, syscall15Args_a2(R8) + MOVW R0, syscallArgs_a1(R8) + MOVW R1, syscallArgs_a2(R8) // save f0-f3 (each float64 spans 2 uintptr slots on ARM32) - MOVD F0, syscall15Args_f1(R8) - MOVD F1, syscall15Args_f3(R8) - MOVD F2, syscall15Args_f5(R8) - MOVD F3, syscall15Args_f7(R8) + MOVD F0, syscallArgs_f1(R8) + MOVD F1, syscallArgs_f3(R8) + MOVD F2, syscallArgs_f5(R8) + MOVD F3, syscallArgs_f7(R8) // Restore callee-saved registers and return MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, R11] diff --git a/sys_arm64.s b/sys_arm64.s index 51685722..eede3526 100644 --- a/sys_arm64.s +++ b/sys_arm64.s @@ -10,8 +10,8 @@ #define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -33,100 +33,100 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $0 SUB $STACK_SIZE, RSP // push structure pointer MOVD R0, PTR_ADDRESS(RSP) MOVD R0, R9 - FMOVD syscall15Args_f1(R9), F0 // f1 - FMOVD syscall15Args_f2(R9), F1 // f2 - FMOVD syscall15Args_f3(R9), F2 // f3 - FMOVD syscall15Args_f4(R9), F3 // f4 - FMOVD syscall15Args_f5(R9), F4 // f5 - FMOVD syscall15Args_f6(R9), F5 // f6 - FMOVD syscall15Args_f7(R9), F6 // f7 - FMOVD syscall15Args_f8(R9), F7 // f8 + FMOVD syscallArgs_f1(R9), F0 // f1 + FMOVD syscallArgs_f2(R9), F1 // f2 + FMOVD syscallArgs_f3(R9), F2 // f3 + FMOVD syscallArgs_f4(R9), F3 // f4 + FMOVD syscallArgs_f5(R9), F4 // f5 + FMOVD syscallArgs_f6(R9), F5 // f6 + FMOVD syscallArgs_f7(R9), F6 // f7 + FMOVD syscallArgs_f8(R9), F7 // f8 - MOVD syscall15Args_a1(R9), R0 // a1 - MOVD syscall15Args_a2(R9), R1 // a2 - MOVD syscall15Args_a3(R9), R2 // a3 - MOVD syscall15Args_a4(R9), R3 // a4 - MOVD syscall15Args_a5(R9), R4 // a5 - MOVD syscall15Args_a6(R9), R5 // a6 - MOVD syscall15Args_a7(R9), R6 // a7 - MOVD syscall15Args_a8(R9), R7 // a8 - MOVD syscall15Args_arm64_r8(R9), R8 // r8 + MOVD syscallArgs_a1(R9), R0 // a1 + MOVD syscallArgs_a2(R9), R1 // a2 + MOVD syscallArgs_a3(R9), R2 // a3 + MOVD syscallArgs_a4(R9), R3 // a4 + MOVD syscallArgs_a5(R9), R4 // a5 + MOVD syscallArgs_a6(R9), R5 // a6 + MOVD syscallArgs_a7(R9), R6 // a7 + MOVD syscallArgs_a8(R9), R7 // a8 + MOVD syscallArgs_arm64_r8(R9), R8 // r8 - MOVD syscall15Args_a9(R9), R10 + MOVD syscallArgs_a9(R9), R10 MOVD R10, 0(RSP) // push a9 onto stack - MOVD syscall15Args_a10(R9), R10 + MOVD syscallArgs_a10(R9), R10 MOVD R10, 8(RSP) // push a10 onto stack - MOVD syscall15Args_a11(R9), R10 + MOVD syscallArgs_a11(R9), R10 MOVD R10, 16(RSP) // push a11 onto stack - MOVD syscall15Args_a12(R9), R10 + MOVD syscallArgs_a12(R9), R10 MOVD R10, 24(RSP) // push a12 onto stack - MOVD syscall15Args_a13(R9), R10 + MOVD syscallArgs_a13(R9), R10 MOVD R10, 32(RSP) // push a13 onto stack - MOVD syscall15Args_a14(R9), R10 + MOVD syscallArgs_a14(R9), R10 MOVD R10, 40(RSP) // push a14 onto stack - MOVD syscall15Args_a15(R9), R10 + MOVD syscallArgs_a15(R9), R10 MOVD R10, 48(RSP) // push a15 onto stack - MOVD syscall15Args_a16(R9), R10 + MOVD syscallArgs_a16(R9), R10 MOVD R10, 56(RSP) // push a16 onto stack - MOVD syscall15Args_a17(R9), R10 + MOVD syscallArgs_a17(R9), R10 MOVD R10, 64(RSP) // push a17 onto stack - MOVD syscall15Args_a18(R9), R10 + MOVD syscallArgs_a18(R9), R10 MOVD R10, 72(RSP) // push a18 onto stack - MOVD syscall15Args_a19(R9), R10 + MOVD syscallArgs_a19(R9), R10 MOVD R10, 80(RSP) // push a19 onto stack - MOVD syscall15Args_a20(R9), R10 + MOVD syscallArgs_a20(R9), R10 MOVD R10, 88(RSP) // push a20 onto stack - MOVD syscall15Args_a21(R9), R10 + MOVD syscallArgs_a21(R9), R10 MOVD R10, 96(RSP) // push a21 onto stack - MOVD syscall15Args_a22(R9), R10 + MOVD syscallArgs_a22(R9), R10 MOVD R10, 104(RSP) // push a22 onto stack - MOVD syscall15Args_a23(R9), R10 + MOVD syscallArgs_a23(R9), R10 MOVD R10, 112(RSP) // push a23 onto stack - MOVD syscall15Args_a24(R9), R10 + MOVD syscallArgs_a24(R9), R10 MOVD R10, 120(RSP) // push a24 onto stack - MOVD syscall15Args_a25(R9), R10 + MOVD syscallArgs_a25(R9), R10 MOVD R10, 128(RSP) // push a25 onto stack - MOVD syscall15Args_a26(R9), R10 + MOVD syscallArgs_a26(R9), R10 MOVD R10, 136(RSP) // push a26 onto stack - MOVD syscall15Args_a27(R9), R10 + MOVD syscallArgs_a27(R9), R10 MOVD R10, 144(RSP) // push a27 onto stack - MOVD syscall15Args_a28(R9), R10 + MOVD syscallArgs_a28(R9), R10 MOVD R10, 152(RSP) // push a28 onto stack - MOVD syscall15Args_a29(R9), R10 + MOVD syscallArgs_a29(R9), R10 MOVD R10, 160(RSP) // push a29 onto stack - MOVD syscall15Args_a30(R9), R10 + MOVD syscallArgs_a30(R9), R10 MOVD R10, 168(RSP) // push a30 onto stack - MOVD syscall15Args_a31(R9), R10 + MOVD syscallArgs_a31(R9), R10 MOVD R10, 176(RSP) // push a31 onto stack - MOVD syscall15Args_a32(R9), R10 + MOVD syscallArgs_a32(R9), R10 MOVD R10, 184(RSP) // push a32 onto stack - MOVD syscall15Args_fn(R9), R10 // fn + MOVD syscallArgs_fn(R9), R10 // fn BL (R10) MOVD PTR_ADDRESS(RSP), R2 // pop structure pointer ADD $STACK_SIZE, RSP - MOVD R0, syscall15Args_a1(R2) // save r1 - MOVD R1, syscall15Args_a2(R2) // save r3 - FMOVD F0, syscall15Args_f1(R2) // save f0 - FMOVD F1, syscall15Args_f2(R2) // save f1 - FMOVD F2, syscall15Args_f3(R2) // save f2 - FMOVD F3, syscall15Args_f4(R2) // save f3 + MOVD R0, syscallArgs_a1(R2) // save r1 + MOVD R1, syscallArgs_a2(R2) // save r3 + FMOVD F0, syscallArgs_f1(R2) // save f0 + FMOVD F1, syscallArgs_f2(R2) // save f1 + FMOVD F2, syscallArgs_f3(R2) // save f2 + FMOVD F3, syscallArgs_f4(R2) // save f3 #ifdef GOOS_darwin BL purego_error(SB) MOVD (R0), R0 - MOVD R0, syscall15Args_a3(R2) // save errno + MOVD R0, syscallArgs_a3(R2) // save errno #endif RET diff --git a/sys_loong64.s b/sys_loong64.s index cd39346c..cf4e8146 100644 --- a/sys_loong64.s +++ b/sys_loong64.s @@ -10,8 +10,8 @@ #define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -33,85 +33,85 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $0 // push structure pointer SUBV $STACK_SIZE, R3 MOVV R4, PTR_ADDRESS(R3) MOVV R4, R13 - MOVD syscall15Args_f1(R13), F0 // f1 - MOVD syscall15Args_f2(R13), F1 // f2 - MOVD syscall15Args_f3(R13), F2 // f3 - MOVD syscall15Args_f4(R13), F3 // f4 - MOVD syscall15Args_f5(R13), F4 // f5 - MOVD syscall15Args_f6(R13), F5 // f6 - MOVD syscall15Args_f7(R13), F6 // f7 - MOVD syscall15Args_f8(R13), F7 // f8 + MOVD syscallArgs_f1(R13), F0 // f1 + MOVD syscallArgs_f2(R13), F1 // f2 + MOVD syscallArgs_f3(R13), F2 // f3 + MOVD syscallArgs_f4(R13), F3 // f4 + MOVD syscallArgs_f5(R13), F4 // f5 + MOVD syscallArgs_f6(R13), F5 // f6 + MOVD syscallArgs_f7(R13), F6 // f7 + MOVD syscallArgs_f8(R13), F7 // f8 - MOVV syscall15Args_a1(R13), R4 // a1 - MOVV syscall15Args_a2(R13), R5 // a2 - MOVV syscall15Args_a3(R13), R6 // a3 - MOVV syscall15Args_a4(R13), R7 // a4 - MOVV syscall15Args_a5(R13), R8 // a5 - MOVV syscall15Args_a6(R13), R9 // a6 - MOVV syscall15Args_a7(R13), R10 // a7 - MOVV syscall15Args_a8(R13), R11 // a8 + MOVV syscallArgs_a1(R13), R4 // a1 + MOVV syscallArgs_a2(R13), R5 // a2 + MOVV syscallArgs_a3(R13), R6 // a3 + MOVV syscallArgs_a4(R13), R7 // a4 + MOVV syscallArgs_a5(R13), R8 // a5 + MOVV syscallArgs_a6(R13), R9 // a6 + MOVV syscallArgs_a7(R13), R10 // a7 + MOVV syscallArgs_a8(R13), R11 // a8 // push a9-a15 onto stack - MOVV syscall15Args_a9(R13), R12 + MOVV syscallArgs_a9(R13), R12 MOVV R12, 0(R3) - MOVV syscall15Args_a10(R13), R12 + MOVV syscallArgs_a10(R13), R12 MOVV R12, 8(R3) - MOVV syscall15Args_a11(R13), R12 + MOVV syscallArgs_a11(R13), R12 MOVV R12, 16(R3) - MOVV syscall15Args_a12(R13), R12 + MOVV syscallArgs_a12(R13), R12 MOVV R12, 24(R3) - MOVV syscall15Args_a13(R13), R12 + MOVV syscallArgs_a13(R13), R12 MOVV R12, 32(R3) - MOVV syscall15Args_a14(R13), R12 + MOVV syscallArgs_a14(R13), R12 MOVV R12, 40(R3) - MOVV syscall15Args_a15(R13), R12 + MOVV syscallArgs_a15(R13), R12 MOVV R12, 48(R3) - MOVV syscall15Args_a16(R13), R12 + MOVV syscallArgs_a16(R13), R12 MOVV R12, 56(R3) - MOVV syscall15Args_a17(R13), R12 + MOVV syscallArgs_a17(R13), R12 MOVV R12, 64(R3) - MOVV syscall15Args_a18(R13), R12 + MOVV syscallArgs_a18(R13), R12 MOVV R12, 72(R3) - MOVV syscall15Args_a19(R13), R12 + MOVV syscallArgs_a19(R13), R12 MOVV R12, 80(R3) - MOVV syscall15Args_a20(R13), R12 + MOVV syscallArgs_a20(R13), R12 MOVV R12, 88(R3) - MOVV syscall15Args_a21(R13), R12 + MOVV syscallArgs_a21(R13), R12 MOVV R12, 96(R3) - MOVV syscall15Args_a22(R13), R12 + MOVV syscallArgs_a22(R13), R12 MOVV R12, 104(R3) - MOVV syscall15Args_a23(R13), R12 + MOVV syscallArgs_a23(R13), R12 MOVV R12, 112(R3) - MOVV syscall15Args_a24(R13), R12 + MOVV syscallArgs_a24(R13), R12 MOVV R12, 120(R3) - MOVV syscall15Args_a25(R13), R12 + MOVV syscallArgs_a25(R13), R12 MOVV R12, 128(R3) - MOVV syscall15Args_a26(R13), R12 + MOVV syscallArgs_a26(R13), R12 MOVV R12, 136(R3) - MOVV syscall15Args_a27(R13), R12 + MOVV syscallArgs_a27(R13), R12 MOVV R12, 144(R3) - MOVV syscall15Args_a28(R13), R12 + MOVV syscallArgs_a28(R13), R12 MOVV R12, 152(R3) - MOVV syscall15Args_a29(R13), R12 + MOVV syscallArgs_a29(R13), R12 MOVV R12, 160(R3) - MOVV syscall15Args_a30(R13), R12 + MOVV syscallArgs_a30(R13), R12 MOVV R12, 168(R3) - MOVV syscall15Args_a31(R13), R12 + MOVV syscallArgs_a31(R13), R12 MOVV R12, 176(R3) - MOVV syscall15Args_a32(R13), R12 + MOVV syscallArgs_a32(R13), R12 MOVV R12, 184(R3) - MOVV syscall15Args_fn(R13), R12 + MOVV syscallArgs_fn(R13), R12 JAL (R12) // pop structure pointer @@ -119,12 +119,12 @@ TEXT syscall15X(SB), NOSPLIT, $0 ADDV $STACK_SIZE, R3 // save R4, R5 - MOVV R4, syscall15Args_a1(R13) - MOVV R5, syscall15Args_a2(R13) + MOVV R4, syscallArgs_a1(R13) + MOVV R5, syscallArgs_a2(R13) // save f0-f3 - MOVD F0, syscall15Args_f1(R13) - MOVD F1, syscall15Args_f2(R13) - MOVD F2, syscall15Args_f3(R13) - MOVD F3, syscall15Args_f4(R13) + MOVD F0, syscallArgs_f1(R13) + MOVD F1, syscallArgs_f2(R13) + MOVD F2, syscallArgs_f3(R13) + MOVD F3, syscallArgs_f4(R13) RET diff --git a/sys_ppc64le.s b/sys_ppc64le.s index 391b30a9..fc9c26ae 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -35,10 +35,10 @@ #define TOC_SAVE 160 #define ARGP_SAVE 168 -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +TEXT syscallX(SB), NOSPLIT, $0 // Prologue: create stack frame // R3 contains the args pointer on entry MOVD R1, R12 // save old SP @@ -51,49 +51,49 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save args pointer (in R3) MOVD R3, ARGP_SAVE(R1) - // R11 := args pointer (syscall15Args*) + // R11 := args pointer (syscallArgs*) MOVD R3, R11 // Load float args into F1-F8 - FMOVD syscall15Args_f1(R11), F1 - FMOVD syscall15Args_f2(R11), F2 - FMOVD syscall15Args_f3(R11), F3 - FMOVD syscall15Args_f4(R11), F4 - FMOVD syscall15Args_f5(R11), F5 - FMOVD syscall15Args_f6(R11), F6 - FMOVD syscall15Args_f7(R11), F7 - FMOVD syscall15Args_f8(R11), F8 + FMOVD syscallArgs_f1(R11), F1 + FMOVD syscallArgs_f2(R11), F2 + FMOVD syscallArgs_f3(R11), F3 + FMOVD syscallArgs_f4(R11), F4 + FMOVD syscallArgs_f5(R11), F5 + FMOVD syscallArgs_f6(R11), F6 + FMOVD syscallArgs_f7(R11), F7 + FMOVD syscallArgs_f8(R11), F8 // Load integer args into R3-R10 - MOVD syscall15Args_a1(R11), R3 - MOVD syscall15Args_a2(R11), R4 - MOVD syscall15Args_a3(R11), R5 - MOVD syscall15Args_a4(R11), R6 - MOVD syscall15Args_a5(R11), R7 - MOVD syscall15Args_a6(R11), R8 - MOVD syscall15Args_a7(R11), R9 - MOVD syscall15Args_a8(R11), R10 + MOVD syscallArgs_a1(R11), R3 + MOVD syscallArgs_a2(R11), R4 + MOVD syscallArgs_a3(R11), R5 + MOVD syscallArgs_a4(R11), R6 + MOVD syscallArgs_a5(R11), R7 + MOVD syscallArgs_a6(R11), R8 + MOVD syscallArgs_a7(R11), R9 + MOVD syscallArgs_a8(R11), R10 // Spill a9-a15 onto the stack (stack parameters start at 96(R1)) // Per ELFv2: parameter save area is 32-95, stack args start at 96 MOVD ARGP_SAVE(R1), R11 // reload args pointer - MOVD syscall15Args_a9(R11), R12 + MOVD syscallArgs_a9(R11), R12 MOVD R12, 96(R1) // a9 at 96(R1) - MOVD syscall15Args_a10(R11), R12 + MOVD syscallArgs_a10(R11), R12 MOVD R12, 104(R1) // a10 at 104(R1) - MOVD syscall15Args_a11(R11), R12 + MOVD syscallArgs_a11(R11), R12 MOVD R12, 112(R1) // a11 at 112(R1) - MOVD syscall15Args_a12(R11), R12 + MOVD syscallArgs_a12(R11), R12 MOVD R12, 120(R1) // a12 at 120(R1) - MOVD syscall15Args_a13(R11), R12 + MOVD syscallArgs_a13(R11), R12 MOVD R12, 128(R1) // a13 at 128(R1) - MOVD syscall15Args_a14(R11), R12 + MOVD syscallArgs_a14(R11), R12 MOVD R12, 136(R1) // a14 at 136(R1) - MOVD syscall15Args_a15(R11), R12 + MOVD syscallArgs_a15(R11), R12 MOVD R12, 144(R1) // a15 at 144(R1) // Call function: load fn and call - MOVD syscall15Args_fn(R11), R12 + MOVD syscallArgs_fn(R11), R12 MOVD R12, CTR BL (CTR) @@ -104,14 +104,14 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD ARGP_SAVE(R1), R11 // Store integer results back (R3, R4) - MOVD R3, syscall15Args_a1(R11) - MOVD R4, syscall15Args_a2(R11) + MOVD R3, syscallArgs_a1(R11) + MOVD R4, syscallArgs_a2(R11) // Store float return values (F1-F4) - FMOVD F1, syscall15Args_f1(R11) - FMOVD F2, syscall15Args_f2(R11) - FMOVD F3, syscall15Args_f3(R11) - FMOVD F4, syscall15Args_f4(R11) + FMOVD F1, syscallArgs_f1(R11) + FMOVD F2, syscallArgs_f2(R11) + FMOVD F3, syscallArgs_f3(R11) + FMOVD F4, syscallArgs_f4(R11) // Epilogue: restore and return MOVD LR_SAVE(R1), R12 diff --git a/sys_riscv64.s b/sys_riscv64.s index b6784310..cdae50ae 100644 --- a/sys_riscv64.s +++ b/sys_riscv64.s @@ -19,10 +19,10 @@ #define SAVE_X18 208 #define SAVE_ARGP 216 -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +TEXT syscallX(SB), NOSPLIT, $0 // Allocate stack frame (keeps 16-byte alignment) SUB $STACK_SIZE, SP @@ -34,96 +34,96 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save original args pointer (in a0/X10) MOV X10, SAVE_ARGP(SP) - // X9 := args pointer (syscall15Args*) + // X9 := args pointer (syscallArgs*) MOV X10, X9 // Load float args into fa0-fa7 (F10-F17) - MOVD syscall15Args_f1(X9), F10 - MOVD syscall15Args_f2(X9), F11 - MOVD syscall15Args_f3(X9), F12 - MOVD syscall15Args_f4(X9), F13 - MOVD syscall15Args_f5(X9), F14 - MOVD syscall15Args_f6(X9), F15 - MOVD syscall15Args_f7(X9), F16 - MOVD syscall15Args_f8(X9), F17 + MOVD syscallArgs_f1(X9), F10 + MOVD syscallArgs_f2(X9), F11 + MOVD syscallArgs_f3(X9), F12 + MOVD syscallArgs_f4(X9), F13 + MOVD syscallArgs_f5(X9), F14 + MOVD syscallArgs_f6(X9), F15 + MOVD syscallArgs_f7(X9), F16 + MOVD syscallArgs_f8(X9), F17 // Load integer args into a0-a7 (X10-X17) - MOV syscall15Args_a1(X9), X10 - MOV syscall15Args_a2(X9), X11 - MOV syscall15Args_a3(X9), X12 - MOV syscall15Args_a4(X9), X13 - MOV syscall15Args_a5(X9), X14 - MOV syscall15Args_a6(X9), X15 - MOV syscall15Args_a7(X9), X16 - MOV syscall15Args_a8(X9), X17 + MOV syscallArgs_a1(X9), X10 + MOV syscallArgs_a2(X9), X11 + MOV syscallArgs_a3(X9), X12 + MOV syscallArgs_a4(X9), X13 + MOV syscallArgs_a5(X9), X14 + MOV syscallArgs_a6(X9), X15 + MOV syscallArgs_a7(X9), X16 + MOV syscallArgs_a8(X9), X17 // Spill a9-a32 onto the stack (C ABI) - MOV syscall15Args_a9(X9), X18 + MOV syscallArgs_a9(X9), X18 MOV X18, 0(SP) - MOV syscall15Args_a10(X9), X18 + MOV syscallArgs_a10(X9), X18 MOV X18, 8(SP) - MOV syscall15Args_a11(X9), X18 + MOV syscallArgs_a11(X9), X18 MOV X18, 16(SP) - MOV syscall15Args_a12(X9), X18 + MOV syscallArgs_a12(X9), X18 MOV X18, 24(SP) - MOV syscall15Args_a13(X9), X18 + MOV syscallArgs_a13(X9), X18 MOV X18, 32(SP) - MOV syscall15Args_a14(X9), X18 + MOV syscallArgs_a14(X9), X18 MOV X18, 40(SP) - MOV syscall15Args_a15(X9), X18 + MOV syscallArgs_a15(X9), X18 MOV X18, 48(SP) - MOV syscall15Args_a16(X9), X18 + MOV syscallArgs_a16(X9), X18 MOV X18, 56(SP) - MOV syscall15Args_a17(X9), X18 + MOV syscallArgs_a17(X9), X18 MOV X18, 64(SP) - MOV syscall15Args_a18(X9), X18 + MOV syscallArgs_a18(X9), X18 MOV X18, 72(SP) - MOV syscall15Args_a19(X9), X18 + MOV syscallArgs_a19(X9), X18 MOV X18, 80(SP) - MOV syscall15Args_a20(X9), X18 + MOV syscallArgs_a20(X9), X18 MOV X18, 88(SP) - MOV syscall15Args_a21(X9), X18 + MOV syscallArgs_a21(X9), X18 MOV X18, 96(SP) - MOV syscall15Args_a22(X9), X18 + MOV syscallArgs_a22(X9), X18 MOV X18, 104(SP) - MOV syscall15Args_a23(X9), X18 + MOV syscallArgs_a23(X9), X18 MOV X18, 112(SP) - MOV syscall15Args_a24(X9), X18 + MOV syscallArgs_a24(X9), X18 MOV X18, 120(SP) - MOV syscall15Args_a25(X9), X18 + MOV syscallArgs_a25(X9), X18 MOV X18, 128(SP) - MOV syscall15Args_a26(X9), X18 + MOV syscallArgs_a26(X9), X18 MOV X18, 136(SP) - MOV syscall15Args_a27(X9), X18 + MOV syscallArgs_a27(X9), X18 MOV X18, 144(SP) - MOV syscall15Args_a28(X9), X18 + MOV syscallArgs_a28(X9), X18 MOV X18, 152(SP) - MOV syscall15Args_a29(X9), X18 + MOV syscallArgs_a29(X9), X18 MOV X18, 160(SP) - MOV syscall15Args_a30(X9), X18 + MOV syscallArgs_a30(X9), X18 MOV X18, 168(SP) - MOV syscall15Args_a31(X9), X18 + MOV syscallArgs_a31(X9), X18 MOV X18, 176(SP) - MOV syscall15Args_a32(X9), X18 + MOV syscallArgs_a32(X9), X18 MOV X18, 184(SP) // Call fn // IMPORTANT: preserve RA across this call (we saved it above) - MOV syscall15Args_fn(X9), X18 + MOV syscallArgs_fn(X9), X18 CALL X18 - // Restore args pointer (syscall15Args*) for storing results + // Restore args pointer (syscallArgs*) for storing results MOV SAVE_ARGP(SP), X9 // Store results back - MOV X10, syscall15Args_a1(X9) - MOV X11, syscall15Args_a2(X9) + MOV X10, syscallArgs_a1(X9) + MOV X11, syscallArgs_a2(X9) // Store back float return regs if used by your ABI contract - MOVD F10, syscall15Args_f1(X9) - MOVD F11, syscall15Args_f2(X9) - MOVD F12, syscall15Args_f3(X9) - MOVD F13, syscall15Args_f4(X9) + MOVD F10, syscallArgs_f1(X9) + MOVD F11, syscallArgs_f2(X9) + MOVD F12, syscallArgs_f3(X9) + MOVD F13, syscallArgs_f4(X9) // Restore callee-saved regs and return address MOV SAVE_X18(SP), X18 diff --git a/sys_s390x.s b/sys_s390x.s index 040cb0e0..6db4f01b 100644 --- a/sys_s390x.s +++ b/sys_s390x.s @@ -32,10 +32,10 @@ #define STACK_ARGS 160 #define ARGP_SAVE 376 -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +TEXT syscallX(SB), NOSPLIT, $0 // On entry, R2 contains the args pointer // Save callee-saved registers in caller's frame (per ABI) STMG R6, R15, 48(R15) @@ -48,95 +48,95 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save args pointer MOVD R2, ARGP_SAVE(R15) - // R9 := args pointer (syscall15Args*) + // R9 := args pointer (syscallArgs*) MOVD R2, R9 // Load float args into F0, F2, F4, F6 (s390x uses even-numbered FPRs) - FMOVD syscall15Args_f1(R9), F0 - FMOVD syscall15Args_f2(R9), F2 - FMOVD syscall15Args_f3(R9), F4 - FMOVD syscall15Args_f4(R9), F6 + FMOVD syscallArgs_f1(R9), F0 + FMOVD syscallArgs_f2(R9), F2 + FMOVD syscallArgs_f3(R9), F4 + FMOVD syscallArgs_f4(R9), F6 // Load integer args into R2-R6 (5 registers) - MOVD syscall15Args_a1(R9), R2 - MOVD syscall15Args_a2(R9), R3 - MOVD syscall15Args_a3(R9), R4 - MOVD syscall15Args_a4(R9), R5 - MOVD syscall15Args_a5(R9), R6 + MOVD syscallArgs_a1(R9), R2 + MOVD syscallArgs_a2(R9), R3 + MOVD syscallArgs_a3(R9), R4 + MOVD syscallArgs_a4(R9), R5 + MOVD syscallArgs_a5(R9), R6 // Spill remaining args (a6-a32) onto the stack at 160(R15) MOVD ARGP_SAVE(R15), R9 // reload args pointer - MOVD syscall15Args_a6(R9), R1 + MOVD syscallArgs_a6(R9), R1 MOVD R1, (STACK_ARGS+0*8)(R15) - MOVD syscall15Args_a7(R9), R1 + MOVD syscallArgs_a7(R9), R1 MOVD R1, (STACK_ARGS+1*8)(R15) - MOVD syscall15Args_a8(R9), R1 + MOVD syscallArgs_a8(R9), R1 MOVD R1, (STACK_ARGS+2*8)(R15) - MOVD syscall15Args_a9(R9), R1 + MOVD syscallArgs_a9(R9), R1 MOVD R1, (STACK_ARGS+3*8)(R15) - MOVD syscall15Args_a10(R9), R1 + MOVD syscallArgs_a10(R9), R1 MOVD R1, (STACK_ARGS+4*8)(R15) - MOVD syscall15Args_a11(R9), R1 + MOVD syscallArgs_a11(R9), R1 MOVD R1, (STACK_ARGS+5*8)(R15) - MOVD syscall15Args_a12(R9), R1 + MOVD syscallArgs_a12(R9), R1 MOVD R1, (STACK_ARGS+6*8)(R15) - MOVD syscall15Args_a13(R9), R1 + MOVD syscallArgs_a13(R9), R1 MOVD R1, (STACK_ARGS+7*8)(R15) - MOVD syscall15Args_a14(R9), R1 + MOVD syscallArgs_a14(R9), R1 MOVD R1, (STACK_ARGS+8*8)(R15) - MOVD syscall15Args_a15(R9), R1 + MOVD syscallArgs_a15(R9), R1 MOVD R1, (STACK_ARGS+9*8)(R15) - MOVD syscall15Args_a16(R9), R1 + MOVD syscallArgs_a16(R9), R1 MOVD R1, (STACK_ARGS+10*8)(R15) - MOVD syscall15Args_a17(R9), R1 + MOVD syscallArgs_a17(R9), R1 MOVD R1, (STACK_ARGS+11*8)(R15) - MOVD syscall15Args_a18(R9), R1 + MOVD syscallArgs_a18(R9), R1 MOVD R1, (STACK_ARGS+12*8)(R15) - MOVD syscall15Args_a19(R9), R1 + MOVD syscallArgs_a19(R9), R1 MOVD R1, (STACK_ARGS+13*8)(R15) - MOVD syscall15Args_a20(R9), R1 + MOVD syscallArgs_a20(R9), R1 MOVD R1, (STACK_ARGS+14*8)(R15) - MOVD syscall15Args_a21(R9), R1 + MOVD syscallArgs_a21(R9), R1 MOVD R1, (STACK_ARGS+15*8)(R15) - MOVD syscall15Args_a22(R9), R1 + MOVD syscallArgs_a22(R9), R1 MOVD R1, (STACK_ARGS+16*8)(R15) - MOVD syscall15Args_a23(R9), R1 + MOVD syscallArgs_a23(R9), R1 MOVD R1, (STACK_ARGS+17*8)(R15) - MOVD syscall15Args_a24(R9), R1 + MOVD syscallArgs_a24(R9), R1 MOVD R1, (STACK_ARGS+18*8)(R15) - MOVD syscall15Args_a25(R9), R1 + MOVD syscallArgs_a25(R9), R1 MOVD R1, (STACK_ARGS+19*8)(R15) - MOVD syscall15Args_a26(R9), R1 + MOVD syscallArgs_a26(R9), R1 MOVD R1, (STACK_ARGS+20*8)(R15) - MOVD syscall15Args_a27(R9), R1 + MOVD syscallArgs_a27(R9), R1 MOVD R1, (STACK_ARGS+21*8)(R15) - MOVD syscall15Args_a28(R9), R1 + MOVD syscallArgs_a28(R9), R1 MOVD R1, (STACK_ARGS+22*8)(R15) - MOVD syscall15Args_a29(R9), R1 + MOVD syscallArgs_a29(R9), R1 MOVD R1, (STACK_ARGS+23*8)(R15) - MOVD syscall15Args_a30(R9), R1 + MOVD syscallArgs_a30(R9), R1 MOVD R1, (STACK_ARGS+24*8)(R15) - MOVD syscall15Args_a31(R9), R1 + MOVD syscallArgs_a31(R9), R1 MOVD R1, (STACK_ARGS+25*8)(R15) - MOVD syscall15Args_a32(R9), R1 + MOVD syscallArgs_a32(R9), R1 MOVD R1, (STACK_ARGS+26*8)(R15) // Call function - MOVD syscall15Args_fn(R9), R1 + MOVD syscallArgs_fn(R9), R1 BL (R1) // Restore args pointer for storing results MOVD ARGP_SAVE(R15), R9 // Store integer results back (R2, R3) - MOVD R2, syscall15Args_a1(R9) - MOVD R3, syscall15Args_a2(R9) + MOVD R2, syscallArgs_a1(R9) + MOVD R3, syscallArgs_a2(R9) // Store float return values (F0, F2, F4, F6) - FMOVD F0, syscall15Args_f1(R9) - FMOVD F2, syscall15Args_f2(R9) - FMOVD F4, syscall15Args_f3(R9) - FMOVD F6, syscall15Args_f4(R9) + FMOVD F0, syscallArgs_f1(R9) + FMOVD F2, syscallArgs_f2(R9) + FMOVD F4, syscallArgs_f3(R9) + FMOVD F6, syscallArgs_f4(R9) // Deallocate stack frame ADD $STACK_SIZE, R15 diff --git a/syscall.go b/syscall.go index 4b1dedd2..d1fac0ed 100644 --- a/syscall.go +++ b/syscall.go @@ -14,62 +14,31 @@ const ( maxArgs = 32 ) -type syscall15Args struct { +type syscallArgs struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8 uintptr - f9, f10, f11, f12, f13 uintptr - arm64_r8 uintptr + arm64_r8 uintptr } -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.a16 = ints[15] - s.a17 = ints[16] - s.a18 = ints[17] - s.a19 = ints[18] - s.a20 = ints[19] - s.a21 = ints[20] - s.a22 = ints[21] - s.a23 = ints[22] - s.a24 = ints[23] - s.a25 = ints[24] - s.a26 = ints[25] - s.a27 = ints[26] - s.a28 = ints[27] - s.a29 = ints[28] - s.a30 = ints[29] - s.a31 = ints[30] - s.a32 = ints[31] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.f9 = floats[8] - s.f10 = floats[9] - s.f11 = floats[10] - s.f12 = floats[11] - s.f13 = floats[12] - s.arm64_r8 = r8 +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], + a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], + a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], + a25: sysargs[24], a26: sysargs[25], a27: sysargs[26], a28: sysargs[27], + a29: sysargs[28], a30: sysargs[29], a31: sysargs[30], a32: sysargs[31], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s } // SyscallN takes fn, a C function pointer and a list of arguments as uintptr. @@ -104,16 +73,12 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { return syscall_syscallN(fn, args...) } - syscall := thePool.Get().(*syscall15Args) - defer thePool.Put(syscall) - *syscall = syscall15Args{} - // add padding so there is no out-of-bounds slicing var tmp [maxArgs]uintptr copy(tmp[:], args) var floats [maxArgs]uintptr copy(floats[:], tmp[:]) - syscall.Set(fn, tmp[:], floats[:], 0) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - return syscall.a1, syscall.a2, syscall.a3 + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 } diff --git a/syscall_32bit.go b/syscall_32bit.go index f98aeac0..2fde7df5 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -14,64 +14,33 @@ const ( maxArgs = 32 ) -type syscall15Args struct { +type syscallArgs struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16 uintptr arm64_r8 uintptr } -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.a16 = ints[15] - s.a17 = ints[16] - s.a18 = ints[17] - s.a19 = ints[18] - s.a20 = ints[19] - s.a21 = ints[20] - s.a22 = ints[21] - s.a23 = ints[22] - s.a24 = ints[23] - s.a25 = ints[24] - s.a26 = ints[25] - s.a27 = ints[26] - s.a28 = ints[27] - s.a29 = ints[28] - s.a30 = ints[29] - s.a31 = ints[30] - s.a32 = ints[31] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.f9 = floats[8] - s.f10 = floats[9] - s.f11 = floats[10] - s.f12 = floats[11] - s.f13 = floats[12] - s.f14 = floats[13] - s.f15 = floats[14] - s.f16 = floats[15] - s.arm64_r8 = r8 +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], + a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], + a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], + a25: sysargs[24], a26: sysargs[25], a27: sysargs[26], a28: sysargs[27], + a29: sysargs[28], a30: sysargs[29], a31: sysargs[30], a32: sysargs[31], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + f9: floats[8], f10: floats[9], f11: floats[10], f12: floats[11], + f13: floats[12], f14: floats[13], f15: floats[14], f16: floats[15], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s } // SyscallN takes fn, a C function pointer and a list of arguments as uintptr. @@ -106,16 +75,12 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { return syscall_syscallN(fn, args...) } - syscall := thePool.Get().(*syscall15Args) - defer thePool.Put(syscall) - *syscall = syscall15Args{} - // Add padding so there is no out-of-bounds slicing. var tmp [maxArgs]uintptr copy(tmp[:], args) var floats [16]uintptr copy(floats[:], tmp[:16]) - syscall.Set(fn, tmp[:], floats[:], 0) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - return syscall.a1, syscall.a2, syscall.a3 + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 } diff --git a/syscall_cgo_linux.go b/syscall_cgo_linux.go index 179167f4..24e0eb89 100644 --- a/syscall_cgo_linux.go +++ b/syscall_cgo_linux.go @@ -9,12 +9,7 @@ import ( "github.com/ebitengine/purego/internal/cgo" ) -var syscall15XABI0 = uintptr(cgo.Syscall15XABI0) - -//go:nosplit -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - return cgo.Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) -} +var syscallXABI0 = uintptr(cgo.SyscallXABI0) func NewCallback(_ any) uintptr { panic("purego: NewCallback on Linux is only supported on 386/amd64/arm64/arm/loong64/ppc64le/riscv64/s390x") diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go index 320fa67c..dfb14c95 100644 --- a/syscall_ppc64le.go +++ b/syscall_ppc64le.go @@ -13,38 +13,26 @@ const ( maxArgs = 15 ) -type syscall15Args struct { +type syscallArgs struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr f1, f2, f3, f4, f5, f6, f7, f8 uintptr arm64_r8 uintptr } -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.arm64_r8 = r8 +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s } // SyscallN takes fn, a C function pointer and a list of arguments as uintptr. @@ -74,15 +62,11 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { panic("purego: too many arguments to SyscallN") } - syscall := thePool.Get().(*syscall15Args) - defer thePool.Put(syscall) - *syscall = syscall15Args{} - var tmp [maxArgs]uintptr copy(tmp[:], args) var floats [maxArgs]uintptr copy(floats[:], tmp[:]) - syscall.Set(fn, tmp[:], floats[:], 0) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - return syscall.a1, syscall.a2, syscall.a3 + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 } diff --git a/syscall_unix.go b/syscall_unix.go index 508de3bd..87030f8c 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -13,22 +13,7 @@ import ( "unsafe" ) -var syscall15XABI0 uintptr - -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - args := thePool.Get().(*syscall15Args) - defer thePool.Put(args) - - *args = syscall15Args{ - fn: fn, - a1: a1, a2: a2, a3: a3, a4: a4, a5: a5, a6: a6, a7: a7, a8: a8, - a9: a9, a10: a10, a11: a11, a12: a12, a13: a13, a14: a14, a15: a15, - f1: a1, f2: a2, f3: a3, f4: a4, f5: a5, f6: a6, f7: a7, f8: a8, - } - - runtime_cgocall(syscall15XABI0, unsafe.Pointer(args)) - return args.a1, args.a2, args.a3 -} +var syscallXABI0 uintptr func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { panic("purego: syscall_syscallN is only supported on windows") diff --git a/syscall_windows.go b/syscall_windows.go index e373b902..1e2ba8cb 100644 --- a/syscall_windows.go +++ b/syscall_windows.go @@ -9,17 +9,13 @@ import ( "unsafe" ) -var syscall15XABI0 uintptr +var syscallXABI0 uintptr func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { r1, r2, errno := syscall.SyscallN(fn, args...) return r1, r2, uintptr(errno) } -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - return syscall_syscallN(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) -} - // NewCallback converts a Go function to a function pointer conforming to the stdcall calling convention. // This is useful when interoperating with Windows code requiring callbacks. The argument is expected to be a // function with one uintptr-sized result. The function must not have arguments with size larger than the From 5185f3f1f6a98959ee1817f52ecdb66fcae6ff69 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Sun, 12 Apr 2026 13:53:50 -0700 Subject: [PATCH 18/21] syscall: fix gofmt formatting reformats code to avoid failure --- syscall.go | 14 +++++++------- syscall_32bit.go | 8 ++++---- syscall_ppc64le.go | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/syscall.go b/syscall.go index d1fac0ed..32f4a3d9 100644 --- a/syscall.go +++ b/syscall.go @@ -18,23 +18,23 @@ type syscallArgs struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8 uintptr - arm64_r8 uintptr + arm64_r8 uintptr } func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { s := thePool.Get().(*syscallArgs) *s = syscallArgs{ - fn: fn, - a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], - a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], - a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], a25: sysargs[24], a26: sysargs[25], a27: sysargs[26], a28: sysargs[27], a29: sysargs[28], a30: sysargs[29], a31: sysargs[30], a32: sysargs[31], - f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], - f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], arm64_r8: r8, } runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) diff --git a/syscall_32bit.go b/syscall_32bit.go index 2fde7df5..c0641110 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -24,10 +24,10 @@ type syscallArgs struct { func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { s := thePool.Get().(*syscallArgs) *s = syscallArgs{ - fn: fn, - a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], - a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], - a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go index dfb14c95..a133be3a 100644 --- a/syscall_ppc64le.go +++ b/syscall_ppc64le.go @@ -22,10 +22,10 @@ type syscallArgs struct { func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { s := thePool.Get().(*syscallArgs) *s = syscallArgs{ - fn: fn, - a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], - a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], - a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], From fb6bdb038c315cb69cc5f9e63b5156e36ac9ab5e Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Sun, 26 Apr 2026 12:56:45 -0700 Subject: [PATCH 19/21] internal/cgo: support 32 args in cgo syscall path Match the Go-side syscallArgs layout so runtime_cgocall passes a struct the C side can read correctly. Errno is written to a3 to match the asm convention; the Go caller reads s.a1, s.a2, s.a3. --- internal/cgo/syscall_cgo_unix.go | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/internal/cgo/syscall_cgo_unix.go b/internal/cgo/syscall_cgo_unix.go index 1b061dc3..99877387 100644 --- a/internal/cgo/syscall_cgo_unix.go +++ b/internal/cgo/syscall_cgo_unix.go @@ -19,20 +19,26 @@ package cgo typedef struct syscallArgs { uintptr_t fn; uintptr_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; + uintptr_t a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32; uintptr_t f1, f2, f3, f4, f5, f6, f7, f8; - uintptr_t err; + uintptr_t arm64_r8; } syscallArgs; void syscall15(struct syscallArgs *args) { assert((args->f1|args->f2|args->f3|args->f4|args->f5|args->f6|args->f7|args->f8) == 0); uintptr_t (*func_name)(uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, - uintptr_t a13, uintptr_t a14, uintptr_t a15); + uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, uintptr_t a17, uintptr_t a18, + uintptr_t a19, uintptr_t a20, uintptr_t a21, uintptr_t a22, uintptr_t a23, uintptr_t a24, + uintptr_t a25, uintptr_t a26, uintptr_t a27, uintptr_t a28, uintptr_t a29, uintptr_t a30, + uintptr_t a31, uintptr_t a32); *(void**)(&func_name) = (void*)(args->fn); - uintptr_t r1 = func_name(args->a1,args->a2,args->a3,args->a4,args->a5,args->a6,args->a7,args->a8,args->a9, - args->a10,args->a11,args->a12,args->a13,args->a14,args->a15); + uintptr_t r1 = func_name(args->a1,args->a2,args->a3,args->a4,args->a5,args->a6,args->a7,args->a8,args->a9, + args->a10,args->a11,args->a12,args->a13,args->a14,args->a15,args->a16,args->a17,args->a18, + args->a19,args->a20,args->a21,args->a22,args->a23,args->a24,args->a25,args->a26,args->a27, + args->a28,args->a29,args->a30,args->a31,args->a32); args->a1 = r1; - args->err = errno; + args->a3 = errno; } */ @@ -45,11 +51,13 @@ var SyscallXABI0 = unsafe.Pointer(C.syscall15) //go:nosplit func SyscallX(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { args := C.syscallArgs{ - C.uintptr_t(fn), C.uintptr_t(a1), C.uintptr_t(a2), C.uintptr_t(a3), - C.uintptr_t(a4), C.uintptr_t(a5), C.uintptr_t(a6), - C.uintptr_t(a7), C.uintptr_t(a8), C.uintptr_t(a9), C.uintptr_t(a10), C.uintptr_t(a11), C.uintptr_t(a12), - C.uintptr_t(a13), C.uintptr_t(a14), C.uintptr_t(a15), 0, 0, 0, 0, 0, 0, 0, 0, 0, + fn: C.uintptr_t(fn), + a1: C.uintptr_t(a1), a2: C.uintptr_t(a2), a3: C.uintptr_t(a3), + a4: C.uintptr_t(a4), a5: C.uintptr_t(a5), a6: C.uintptr_t(a6), + a7: C.uintptr_t(a7), a8: C.uintptr_t(a8), a9: C.uintptr_t(a9), + a10: C.uintptr_t(a10), a11: C.uintptr_t(a11), a12: C.uintptr_t(a12), + a13: C.uintptr_t(a13), a14: C.uintptr_t(a14), a15: C.uintptr_t(a15), } C.syscall15(&args) - return uintptr(args.a1), 0, uintptr(args.err) + return uintptr(args.a1), uintptr(args.a2), uintptr(args.a3) } From 1ef573c5a8da2e2127d65241bffb550e8ed54ab2 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Sun, 26 Apr 2026 13:13:09 -0700 Subject: [PATCH 20/21] internal/cgo: remove unused SyscallX wrapper The cgo path is invoked via runtime_cgocall(syscallXABI0, ...) directly against C.syscall15, so the SyscallX Go wrapper has had no callers since b9c5f0a. The syscall_syscall15X helpers in syscall_unix.go and syscall_windows.go were deleted in that commit per review feedback; this completes the same cleanup for internal/cgo. --- internal/cgo/syscall_cgo_unix.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/internal/cgo/syscall_cgo_unix.go b/internal/cgo/syscall_cgo_unix.go index 99877387..c71287e3 100644 --- a/internal/cgo/syscall_cgo_unix.go +++ b/internal/cgo/syscall_cgo_unix.go @@ -47,17 +47,3 @@ import "unsafe" // assign purego.syscallXABI0 to the C version of this function. var SyscallXABI0 = unsafe.Pointer(C.syscall15) - -//go:nosplit -func SyscallX(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - args := C.syscallArgs{ - fn: C.uintptr_t(fn), - a1: C.uintptr_t(a1), a2: C.uintptr_t(a2), a3: C.uintptr_t(a3), - a4: C.uintptr_t(a4), a5: C.uintptr_t(a5), a6: C.uintptr_t(a6), - a7: C.uintptr_t(a7), a8: C.uintptr_t(a8), a9: C.uintptr_t(a9), - a10: C.uintptr_t(a10), a11: C.uintptr_t(a11), a12: C.uintptr_t(a12), - a13: C.uintptr_t(a13), a14: C.uintptr_t(a14), a15: C.uintptr_t(a15), - } - C.syscall15(&args) - return uintptr(args.a1), uintptr(args.a2), uintptr(args.a3) -} From 4830a635560e2c061e68792aaf2e7b91cf0bc255 Mon Sep 17 00:00:00 2001 From: Travis Cline Date: Wed, 29 Apr 2026 15:53:27 -0700 Subject: [PATCH 21/21] purego: test ppc64le 16-argument limit --- func_test.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/func_test.go b/func_test.go index dfe512bc..cf7ebc04 100644 --- a/func_test.go +++ b/func_test.go @@ -527,6 +527,19 @@ func TestABI_TooManyArguments(t *testing.T) { }) }) + t.Run("registerfunc_16_int64_exceeds_ppc64le_limit", func(t *testing.T) { + if runtime.GOARCH != "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + mustPanic(t, "purego: too many stack arguments", func() { + var fn func( + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + ) + purego.RegisterFunc(&fn, 1) + }) + }) + t.Run("syscalln_33_uintptr_exceeds_limit", func(t *testing.T) { mustPanic(t, "purego: too many arguments to SyscallN", func() { purego.SyscallN(1, @@ -538,6 +551,18 @@ func TestABI_TooManyArguments(t *testing.T) { ) }) }) + + t.Run("syscalln_16_uintptr_exceeds_ppc64le_limit", func(t *testing.T) { + if runtime.GOARCH != "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + mustPanic(t, "purego: too many arguments to SyscallN", func() { + purego.SyscallN(1, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ) + }) + }) } func buildSharedLib(compilerEnv, libFile string, sources ...string) error {