diff --git a/cdecl.go b/cdecl.go new file mode 100644 index 00000000..a4b105ca --- /dev/null +++ b/cdecl.go @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +package purego + +// CDecl marks a function as being called using the __cdecl calling convention as defined in +// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. +// This is only useful on 386 Windows, but it is safe to use on other platforms. +// +// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 +type CDecl struct{} diff --git a/func.go b/func.go index 2192dd7c..c6d1150f 100644 --- a/func.go +++ b/func.go @@ -23,7 +23,7 @@ const ( ) var thePool = sync.Pool{New: func() any { - return new(syscall15Args) + return new(syscallArgs) }} // RegisterLibFunc is a wrapper around RegisterFunc that uses the C function returned from Dlsym(handle, name). @@ -141,6 +141,7 @@ func RegisterFunc(fptr any, cfn uintptr) { // to avoid crashing with too many arguments var ints int var floats int + floatArgRegs := numOfFloatRegisters() var stack int for i := 0; i < ty.NumIn(); i++ { arg := ty.In(i) @@ -167,7 +168,7 @@ func RegisterFunc(fptr any, cfn uintptr) { stack++ } case reflect.Float32, reflect.Float64: - if floats < numOfFloatRegisters() { + if floats < floatArgRegs { floats++ } else { stack++ @@ -202,10 +203,15 @@ func RegisterFunc(fptr any, cfn uintptr) { } } - sizeOfStack := maxArgs - numOfIntegerRegisters() - // On Darwin ARM64, use byte-based validation since arguments pack efficiently. - // See https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms - if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { + argsLimit := maxArgs + sizeOfStack := argsLimit - numOfIntegerRegisters() + if runtime.GOOS == "windows" { + if ints+floats+stack > argsLimit { + panic("purego: too many stack arguments") + } + } else if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { + // On Darwin ARM64, use byte-based validation since arguments pack efficiently. + // See https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms stackBytes := estimateStackBytes(ty) maxStackBytes := sizeOfStack * 8 if stackBytes > maxStackBytes { @@ -224,6 +230,7 @@ func RegisterFunc(fptr any, cfn uintptr) { // since numOfFloatRegisters() is a function call, not a constant. // maxArgs is always greater than or equal to numOfFloatRegisters() so this is safe. var floats [maxArgs]uintptr + floatArgRegs := numOfFloatRegisters() var numInts int var numFloats int var numStack int @@ -243,7 +250,7 @@ func RegisterFunc(fptr any, cfn uintptr) { } } addFloat = func(x uintptr) { - if numFloats < numOfFloatRegisters() { + if numFloats < floatArgRegs { floats[numFloats] = x numFloats++ } else { @@ -257,6 +264,9 @@ func RegisterFunc(fptr any, cfn uintptr) { // This is in contrast to how macOS and Linux pass arguments which // tries to use as many registers as possible in the calling convention. addStack = func(x uintptr) { + if numStack >= maxArgs { + panic("purego: too many stack arguments") + } sysargs[numStack] = x numStack++ } @@ -310,24 +320,16 @@ func RegisterFunc(fptr any, cfn uintptr) { keepAlive = addValue(v, keepAlive, addInt, addFloat, addStack, &numInts, &numFloats, &numStack) } - syscall := thePool.Get().(*syscall15Args) - defer thePool.Put(syscall) - - if runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x" { - syscall.Set(cfn, sysargs[:], floats[:], 0) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - } else if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { - // Use the normal arm64 calling convention even on Windows - syscall.Set(cfn, sysargs[:], floats[:], arm64_r8) - runtime_cgocall(syscall15XABI0, unsafe.Pointer(syscall)) - } else { - *syscall = syscall15Args{} - // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats - syscall.a1, syscall.a2, _ = syscall_syscall15X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], - sysargs[5], sysargs[6], sysargs[7], sysargs[8], sysargs[9], sysargs[10], sysargs[11], - sysargs[12], sysargs[13], sysargs[14]) + var syscall *syscallArgs + if runtime.GOOS == "windows" && runtime.GOARCH != "arm64" { + // Windows amd64, 386, and arm use syscall.SyscallN. + syscall = thePool.Get().(*syscallArgs) + syscall.a1, syscall.a2, _ = syscall_syscallN(cfn, sysargs[:numStack]...) syscall.f1 = syscall.a2 // on amd64 a2 stores the float return. On 32bit platforms floats aren't support + } else { + syscall = syscall_SyscallN(cfn, sysargs[:], floats[:], arm64_r8) } + defer thePool.Put(syscall) if ty.NumOut() == 0 { return nil } diff --git a/func_test.go b/func_test.go index 6ae69c40..cf7ebc04 100644 --- a/func_test.go +++ b/func_test.go @@ -373,8 +373,8 @@ func TestABI_ArgumentPassing(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if tt.name == "20_int32" && (runtime.GOOS != "darwin" || runtime.GOARCH != "arm64") { - t.Skip("20 int32 arguments only supported on Darwin ARM64 with smart stack checking") + if tt.name == "20_int32" && runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") } if tt.name == "10_float32" && (runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") { t.Skip("float32 stack arguments not yet supported on this platform") @@ -394,39 +394,174 @@ func TestABI_ArgumentPassing(t *testing.T) { } }) } -} -func TestABI_TooManyArguments(t *testing.T) { - if runtime.GOOS != "darwin" || runtime.GOARCH != "arm64" { - t.Skip("This test is specific to Darwin ARM64") - } + t.Run("20_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + var fn func(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) uintptr + purego.RegisterLibFunc(&fn, lib, "stack_20_uintptr") + got := fn(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) + const want = uintptr(210) + if got != want { + t.Fatalf("stack_20_uintptr: got %d, want %d", got, want) + } + }) - libFileName := filepath.Join(t.TempDir(), "abitest.so") - if err := buildSharedLib("CC", libFileName, filepath.Join("testdata", "abitest", "abi_test.c")); err != nil { - t.Fatal(err) - } - lib, err := load.OpenLibrary(libFileName) - if err != nil { - t.Fatalf("Failed to open library %q: %v", libFileName, err) - } - t.Cleanup(func() { - if err := load.CloseLibrary(lib); err != nil { - t.Errorf("Failed to close library: %v", err) + t.Run("32_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + var fn func( + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + ) uintptr + purego.RegisterLibFunc(&fn, lib, "stack_32_uintptr") + got := fn( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ) + const want = uintptr(528) + if got != want { + t.Fatalf("stack_32_uintptr: got %d, want %d", got, want) + } + }) + + t.Run("syscalln_20_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + fn, err := load.OpenSymbol(lib, "stack_20_uintptr") + if err != nil { + t.Fatalf("OpenSymbol(stack_20_uintptr) failed: %v", err) + } + got, _, _ := purego.SyscallN(fn, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + ) + const want = uintptr(210) + if got != want { + t.Fatalf("stack_20_uintptr SyscallN: got %d, want %d", got, want) + } + }) + + t.Run("syscalln_32_uintptr", func(t *testing.T) { + if runtime.GOARCH == "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + fn, err := load.OpenSymbol(lib, "stack_32_uintptr") + if err != nil { + t.Fatalf("OpenSymbol(stack_32_uintptr) failed: %v", err) + } + got, _, _ := purego.SyscallN(fn, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ) + const want = uintptr(528) + if got != want { + t.Fatalf("stack_32_uintptr SyscallN: got %d, want %d", got, want) + } + }) + + t.Run("32_mixed_int_float", func(t *testing.T) { + if unsafe.Sizeof(uintptr(0)) == 4 { + t.Skip("requires 64-bit uintptr slots") + } + if runtime.GOARCH == "ppc64le" { + t.Skip("mixed int/float stack arguments are not yet supported on ppc64le") + } + + var fn func( + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, + float64, float64, float64, float64, float64, float64, float64, float64, + float64, float64, float64, float64, float64, float64, float64, float64, + ) float64 + purego.RegisterLibFunc(&fn, lib, "stack_32_mixed_int_float") + got := fn( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ) + const want = 5168.0 + if got != want { + t.Fatalf("stack_32_mixed_int_float: got %f, want %f", got, want) } }) +} - // Test that 35 int64 arguments (27 slots needed) exceeds the limit - t.Run("35_int64_exceeds_limit", func(t *testing.T) { +func TestABI_TooManyArguments(t *testing.T) { + mustPanic := func(t *testing.T, want string, f func()) { + t.Helper() defer func() { - if r := recover(); r != nil { - t.Logf("Got expected panic: %v", r) - } else { - t.Errorf("Expected panic but didn't get one") + r := recover() + if r == nil { + t.Fatalf("expected panic %q, got none", want) + } + got := fmt.Sprint(r) + if got != want { + t.Fatalf("panic mismatch:\n got: %q\n want: %q", got, want) } }() + f() + } + + // 33 int64 parameters exceeds maxArgs=32. + t.Run("registerfunc_33_int64_exceeds_limit", func(t *testing.T) { + mustPanic(t, "purego: too many stack arguments", func() { + var fn func( + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + int64, + ) + purego.RegisterFunc(&fn, 1) + }) + }) - var fn func(*byte, uintptr, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64, int64) - purego.RegisterLibFunc(&fn, lib, "stack_35_int64_exceeds") + t.Run("registerfunc_16_int64_exceeds_ppc64le_limit", func(t *testing.T) { + if runtime.GOARCH != "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + mustPanic(t, "purego: too many stack arguments", func() { + var fn func( + int64, int64, int64, int64, int64, int64, int64, int64, + int64, int64, int64, int64, int64, int64, int64, int64, + ) + purego.RegisterFunc(&fn, 1) + }) + }) + + t.Run("syscalln_33_uintptr_exceeds_limit", func(t *testing.T) { + mustPanic(t, "purego: too many arguments to SyscallN", func() { + purego.SyscallN(1, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, + ) + }) + }) + + t.Run("syscalln_16_uintptr_exceeds_ppc64le_limit", func(t *testing.T) { + if runtime.GOARCH != "ppc64le" { + t.Skip("ppc64le retains the 15-argument limit") + } + mustPanic(t, "purego: too many arguments to SyscallN", func() { + purego.SyscallN(1, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + ) + }) }) } diff --git a/internal/cgo/syscall_cgo_unix.go b/internal/cgo/syscall_cgo_unix.go index 1e39de3b..c71287e3 100644 --- a/internal/cgo/syscall_cgo_unix.go +++ b/internal/cgo/syscall_cgo_unix.go @@ -16,40 +16,34 @@ package cgo #include #include -typedef struct syscall15Args { +typedef struct syscallArgs { uintptr_t fn; uintptr_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; + uintptr_t a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32; uintptr_t f1, f2, f3, f4, f5, f6, f7, f8; - uintptr_t err; -} syscall15Args; + uintptr_t arm64_r8; +} syscallArgs; -void syscall15(struct syscall15Args *args) { +void syscall15(struct syscallArgs *args) { assert((args->f1|args->f2|args->f3|args->f4|args->f5|args->f6|args->f7|args->f8) == 0); uintptr_t (*func_name)(uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, - uintptr_t a13, uintptr_t a14, uintptr_t a15); + uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, uintptr_t a17, uintptr_t a18, + uintptr_t a19, uintptr_t a20, uintptr_t a21, uintptr_t a22, uintptr_t a23, uintptr_t a24, + uintptr_t a25, uintptr_t a26, uintptr_t a27, uintptr_t a28, uintptr_t a29, uintptr_t a30, + uintptr_t a31, uintptr_t a32); *(void**)(&func_name) = (void*)(args->fn); - uintptr_t r1 = func_name(args->a1,args->a2,args->a3,args->a4,args->a5,args->a6,args->a7,args->a8,args->a9, - args->a10,args->a11,args->a12,args->a13,args->a14,args->a15); + uintptr_t r1 = func_name(args->a1,args->a2,args->a3,args->a4,args->a5,args->a6,args->a7,args->a8,args->a9, + args->a10,args->a11,args->a12,args->a13,args->a14,args->a15,args->a16,args->a17,args->a18, + args->a19,args->a20,args->a21,args->a22,args->a23,args->a24,args->a25,args->a26,args->a27, + args->a28,args->a29,args->a30,args->a31,args->a32); args->a1 = r1; - args->err = errno; + args->a3 = errno; } */ import "C" import "unsafe" -// assign purego.syscall15XABI0 to the C version of this function. -var Syscall15XABI0 = unsafe.Pointer(C.syscall15) - -//go:nosplit -func Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - args := C.syscall15Args{ - C.uintptr_t(fn), C.uintptr_t(a1), C.uintptr_t(a2), C.uintptr_t(a3), - C.uintptr_t(a4), C.uintptr_t(a5), C.uintptr_t(a6), - C.uintptr_t(a7), C.uintptr_t(a8), C.uintptr_t(a9), C.uintptr_t(a10), C.uintptr_t(a11), C.uintptr_t(a12), - C.uintptr_t(a13), C.uintptr_t(a14), C.uintptr_t(a15), 0, 0, 0, 0, 0, 0, 0, 0, 0, - } - C.syscall15(&args) - return uintptr(args.a1), 0, uintptr(args.err) -} +// assign purego.syscallXABI0 to the C version of this function. +var SyscallXABI0 = unsafe.Pointer(C.syscall15) diff --git a/struct_386.go b/struct_386.go index a4621883..d34fa2cc 100644 --- a/struct_386.go +++ b/struct_386.go @@ -12,7 +12,7 @@ func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFl panic("purego: struct arguments are not supported") } -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { panic("purego: struct returns are not supported") } diff --git a/struct_amd64.go b/struct_amd64.go index 7ca50c59..40ed91c0 100644 --- a/struct_amd64.go +++ b/struct_amd64.go @@ -10,7 +10,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_arm.go b/struct_arm.go index 6519e4ad..bfaac841 100644 --- a/struct_arm.go +++ b/struct_arm.go @@ -28,7 +28,7 @@ func addStruct(v reflect.Value, numInts, numFloats, numStack *int, addInt, addFl return keepAlive } -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() if outSize == 0 { return reflect.New(outType).Elem() diff --git a/struct_arm64.go b/struct_arm64.go index 3a04828c..285c756b 100644 --- a/struct_arm64.go +++ b/struct_arm64.go @@ -14,7 +14,7 @@ import ( "github.com/ebitengine/purego/internal/strings" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_loong64.go b/struct_loong64.go index 0464cd76..7a8a7c7c 100644 --- a/struct_loong64.go +++ b/struct_loong64.go @@ -9,7 +9,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) (v reflect.Value) { +func getStruct(outType reflect.Type, syscall syscallArgs) (v reflect.Value) { outSize := outType.Size() switch { case outSize == 0: diff --git a/struct_ppc64le.go b/struct_ppc64le.go index f781ae7f..0cb481e4 100644 --- a/struct_ppc64le.go +++ b/struct_ppc64le.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/struct_riscv64.go b/struct_riscv64.go index c377c445..aa4e50b6 100644 --- a/struct_riscv64.go +++ b/struct_riscv64.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/struct_s390x.go b/struct_s390x.go index 48cc1730..6e15d415 100644 --- a/struct_s390x.go +++ b/struct_s390x.go @@ -8,7 +8,7 @@ import ( "unsafe" ) -func getStruct(outType reflect.Type, syscall syscall15Args) reflect.Value { +func getStruct(outType reflect.Type, syscall syscallArgs) reflect.Value { outSize := outType.Size() switch { diff --git a/sys_386.s b/sys_386.s index 82931413..476f5631 100644 --- a/sys_386.s +++ b/sys_386.s @@ -10,8 +10,8 @@ #define STACK_SIZE 160 #define PTR_ADDRESS (STACK_SIZE - 4) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -22,18 +22,18 @@ // f16 uintptr // arm64_r8 uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). // // On i386 System V ABI, all arguments are passed on the stack. // Return value is in EAX (and EDX for 64-bit values). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $4 -DATA ·syscall15XABI0(SB)/4, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $4 +DATA ·syscallXABI0(SB)/4, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT|NOFRAME, $0-0 // Called via C calling convention: argument pointer at 4(SP) // NOT via Go calling convention // On i386, the first argument is at 4(SP) after CALL pushes return address - MOVL 4(SP), AX // get pointer to syscall15Args + MOVL 4(SP), AX // get pointer to syscallArgs // Save callee-saved registers PUSHL BP @@ -50,75 +50,75 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 MOVL BX, PTR_ADDRESS(SP) // save args pointer // Load function pointer - MOVL syscall15Args_fn(BX), AX + MOVL syscallArgs_fn(BX), AX MOVL AX, (PTR_ADDRESS-4)(SP) // save fn pointer // Push all integer arguments onto the stack (a1-a32) // i386 SysV ABI: arguments pushed right-to-left, but we're // setting up the stack from low to high addresses - MOVL syscall15Args_a1(BX), AX + MOVL syscallArgs_a1(BX), AX MOVL AX, 0(SP) - MOVL syscall15Args_a2(BX), AX + MOVL syscallArgs_a2(BX), AX MOVL AX, 4(SP) - MOVL syscall15Args_a3(BX), AX + MOVL syscallArgs_a3(BX), AX MOVL AX, 8(SP) - MOVL syscall15Args_a4(BX), AX + MOVL syscallArgs_a4(BX), AX MOVL AX, 12(SP) - MOVL syscall15Args_a5(BX), AX + MOVL syscallArgs_a5(BX), AX MOVL AX, 16(SP) - MOVL syscall15Args_a6(BX), AX + MOVL syscallArgs_a6(BX), AX MOVL AX, 20(SP) - MOVL syscall15Args_a7(BX), AX + MOVL syscallArgs_a7(BX), AX MOVL AX, 24(SP) - MOVL syscall15Args_a8(BX), AX + MOVL syscallArgs_a8(BX), AX MOVL AX, 28(SP) - MOVL syscall15Args_a9(BX), AX + MOVL syscallArgs_a9(BX), AX MOVL AX, 32(SP) - MOVL syscall15Args_a10(BX), AX + MOVL syscallArgs_a10(BX), AX MOVL AX, 36(SP) - MOVL syscall15Args_a11(BX), AX + MOVL syscallArgs_a11(BX), AX MOVL AX, 40(SP) - MOVL syscall15Args_a12(BX), AX + MOVL syscallArgs_a12(BX), AX MOVL AX, 44(SP) - MOVL syscall15Args_a13(BX), AX + MOVL syscallArgs_a13(BX), AX MOVL AX, 48(SP) - MOVL syscall15Args_a14(BX), AX + MOVL syscallArgs_a14(BX), AX MOVL AX, 52(SP) - MOVL syscall15Args_a15(BX), AX + MOVL syscallArgs_a15(BX), AX MOVL AX, 56(SP) - MOVL syscall15Args_a16(BX), AX + MOVL syscallArgs_a16(BX), AX MOVL AX, 60(SP) - MOVL syscall15Args_a17(BX), AX + MOVL syscallArgs_a17(BX), AX MOVL AX, 64(SP) - MOVL syscall15Args_a18(BX), AX + MOVL syscallArgs_a18(BX), AX MOVL AX, 68(SP) - MOVL syscall15Args_a19(BX), AX + MOVL syscallArgs_a19(BX), AX MOVL AX, 72(SP) - MOVL syscall15Args_a20(BX), AX + MOVL syscallArgs_a20(BX), AX MOVL AX, 76(SP) - MOVL syscall15Args_a21(BX), AX + MOVL syscallArgs_a21(BX), AX MOVL AX, 80(SP) - MOVL syscall15Args_a22(BX), AX + MOVL syscallArgs_a22(BX), AX MOVL AX, 84(SP) - MOVL syscall15Args_a23(BX), AX + MOVL syscallArgs_a23(BX), AX MOVL AX, 88(SP) - MOVL syscall15Args_a24(BX), AX + MOVL syscallArgs_a24(BX), AX MOVL AX, 92(SP) - MOVL syscall15Args_a25(BX), AX + MOVL syscallArgs_a25(BX), AX MOVL AX, 96(SP) - MOVL syscall15Args_a26(BX), AX + MOVL syscallArgs_a26(BX), AX MOVL AX, 100(SP) - MOVL syscall15Args_a27(BX), AX + MOVL syscallArgs_a27(BX), AX MOVL AX, 104(SP) - MOVL syscall15Args_a28(BX), AX + MOVL syscallArgs_a28(BX), AX MOVL AX, 108(SP) - MOVL syscall15Args_a29(BX), AX + MOVL syscallArgs_a29(BX), AX MOVL AX, 112(SP) - MOVL syscall15Args_a30(BX), AX + MOVL syscallArgs_a30(BX), AX MOVL AX, 116(SP) - MOVL syscall15Args_a31(BX), AX + MOVL syscallArgs_a31(BX), AX MOVL AX, 120(SP) - MOVL syscall15Args_a32(BX), AX + MOVL syscallArgs_a32(BX), AX MOVL AX, 124(SP) // Call the C function @@ -127,13 +127,13 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 // Get args pointer back and save results MOVL PTR_ADDRESS(SP), BX - MOVL AX, syscall15Args_a1(BX) // return value r1 - MOVL DX, syscall15Args_a2(BX) // return value r2 (for 64-bit returns) + MOVL AX, syscallArgs_a1(BX) // return value r1 + MOVL DX, syscallArgs_a2(BX) // return value r2 (for 64-bit returns) // Save x87 FPU return value (ST0) to f1 field // On i386 System V ABI, float/double returns are in ST(0) // We save as float64 (8 bytes) to preserve precision - FMOVDP F0, syscall15Args_f1(BX) + FMOVDP F0, syscallArgs_f1(BX) // Clean up stack ADDL $STACK_SIZE, SP diff --git a/sys_amd64.s b/sys_amd64.s index 8719a065..c2a14c89 100644 --- a/sys_amd64.s +++ b/sys_amd64.s @@ -8,11 +8,11 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 80 +#define STACK_SIZE 224 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -34,65 +34,99 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $STACK_SIZE +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $STACK_SIZE MOVQ DI, PTR_ADDRESS(SP) // save the pointer MOVQ DI, R11 - MOVQ syscall15Args_f1(R11), X0 // f1 - MOVQ syscall15Args_f2(R11), X1 // f2 - MOVQ syscall15Args_f3(R11), X2 // f3 - MOVQ syscall15Args_f4(R11), X3 // f4 - MOVQ syscall15Args_f5(R11), X4 // f5 - MOVQ syscall15Args_f6(R11), X5 // f6 - MOVQ syscall15Args_f7(R11), X6 // f7 - MOVQ syscall15Args_f8(R11), X7 // f8 - - MOVQ syscall15Args_a1(R11), DI // a1 - MOVQ syscall15Args_a2(R11), SI // a2 - MOVQ syscall15Args_a3(R11), DX // a3 - MOVQ syscall15Args_a4(R11), CX // a4 - MOVQ syscall15Args_a5(R11), R8 // a5 - MOVQ syscall15Args_a6(R11), R9 // a6 + MOVQ syscallArgs_f1(R11), X0 // f1 + MOVQ syscallArgs_f2(R11), X1 // f2 + MOVQ syscallArgs_f3(R11), X2 // f3 + MOVQ syscallArgs_f4(R11), X3 // f4 + MOVQ syscallArgs_f5(R11), X4 // f5 + MOVQ syscallArgs_f6(R11), X5 // f6 + MOVQ syscallArgs_f7(R11), X6 // f7 + MOVQ syscallArgs_f8(R11), X7 // f8 + + MOVQ syscallArgs_a1(R11), DI // a1 + MOVQ syscallArgs_a2(R11), SI // a2 + MOVQ syscallArgs_a3(R11), DX // a3 + MOVQ syscallArgs_a4(R11), CX // a4 + MOVQ syscallArgs_a5(R11), R8 // a5 + MOVQ syscallArgs_a6(R11), R9 // a6 // push the remaining parameters onto the stack - MOVQ syscall15Args_a7(R11), R12 + MOVQ syscallArgs_a7(R11), R12 MOVQ R12, 0(SP) // push a7 - MOVQ syscall15Args_a8(R11), R12 + MOVQ syscallArgs_a8(R11), R12 MOVQ R12, 8(SP) // push a8 - MOVQ syscall15Args_a9(R11), R12 + MOVQ syscallArgs_a9(R11), R12 MOVQ R12, 16(SP) // push a9 - MOVQ syscall15Args_a10(R11), R12 + MOVQ syscallArgs_a10(R11), R12 MOVQ R12, 24(SP) // push a10 - MOVQ syscall15Args_a11(R11), R12 + MOVQ syscallArgs_a11(R11), R12 MOVQ R12, 32(SP) // push a11 - MOVQ syscall15Args_a12(R11), R12 + MOVQ syscallArgs_a12(R11), R12 MOVQ R12, 40(SP) // push a12 - MOVQ syscall15Args_a13(R11), R12 + MOVQ syscallArgs_a13(R11), R12 MOVQ R12, 48(SP) // push a13 - MOVQ syscall15Args_a14(R11), R12 + MOVQ syscallArgs_a14(R11), R12 MOVQ R12, 56(SP) // push a14 - MOVQ syscall15Args_a15(R11), R12 + MOVQ syscallArgs_a15(R11), R12 MOVQ R12, 64(SP) // push a15 + MOVQ syscallArgs_a16(R11), R12 + MOVQ R12, 72(SP) // push a16 + MOVQ syscallArgs_a17(R11), R12 + MOVQ R12, 80(SP) // push a17 + MOVQ syscallArgs_a18(R11), R12 + MOVQ R12, 88(SP) // push a18 + MOVQ syscallArgs_a19(R11), R12 + MOVQ R12, 96(SP) // push a19 + MOVQ syscallArgs_a20(R11), R12 + MOVQ R12, 104(SP) // push a20 + MOVQ syscallArgs_a21(R11), R12 + MOVQ R12, 112(SP) // push a21 + MOVQ syscallArgs_a22(R11), R12 + MOVQ R12, 120(SP) // push a22 + MOVQ syscallArgs_a23(R11), R12 + MOVQ R12, 128(SP) // push a23 + MOVQ syscallArgs_a24(R11), R12 + MOVQ R12, 136(SP) // push a24 + MOVQ syscallArgs_a25(R11), R12 + MOVQ R12, 144(SP) // push a25 + MOVQ syscallArgs_a26(R11), R12 + MOVQ R12, 152(SP) // push a26 + MOVQ syscallArgs_a27(R11), R12 + MOVQ R12, 160(SP) // push a27 + MOVQ syscallArgs_a28(R11), R12 + MOVQ R12, 168(SP) // push a28 + MOVQ syscallArgs_a29(R11), R12 + MOVQ R12, 176(SP) // push a29 + MOVQ syscallArgs_a30(R11), R12 + MOVQ R12, 184(SP) // push a30 + MOVQ syscallArgs_a31(R11), R12 + MOVQ R12, 192(SP) // push a31 + MOVQ syscallArgs_a32(R11), R12 + MOVQ R12, 200(SP) // push a32 XORL AX, AX // vararg: say "no float args" - MOVQ syscall15Args_fn(R11), R10 // fn + MOVQ syscallArgs_fn(R11), R10 // fn CALL R10 MOVQ PTR_ADDRESS(SP), DI // get the pointer back - MOVQ AX, syscall15Args_a1(DI) // r1 - MOVQ DX, syscall15Args_a2(DI) // r2 - MOVQ X0, syscall15Args_f1(DI) // f1 - MOVQ X1, syscall15Args_f2(DI) // f2 + MOVQ AX, syscallArgs_a1(DI) // r1 + MOVQ DX, syscallArgs_a2(DI) // r2 + MOVQ X0, syscallArgs_f1(DI) // f1 + MOVQ X1, syscallArgs_f2(DI) // f2 #ifdef GOOS_darwin CALL purego_error(SB) MOVQ PTR_ADDRESS(SP), DI // reload (DI clobbered by call) MOVQ (AX), AX - MOVQ AX, syscall15Args_a3(DI) // save errno + MOVQ AX, syscallArgs_a3(DI) // save errno #endif XORL AX, AX // no error (it's ignored anyway) diff --git a/sys_arm.s b/sys_arm.s index 3a8ce0d0..f1ea44a2 100644 --- a/sys_arm.s +++ b/sys_arm.s @@ -10,8 +10,8 @@ #define STACK_SIZE 128 #define PTR_ADDRESS (STACK_SIZE - 4) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -22,12 +22,12 @@ // f16 uintptr // arm64_r8 uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $4 -DATA ·syscall15XABI0(SB)/4, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 - // Called via C calling convention: R0 = pointer to syscall15Args +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $4 +DATA ·syscallXABI0(SB)/4, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT|NOFRAME, $0-0 + // Called via C calling convention: R0 = pointer to syscallArgs // NOT via Go calling convention // Save link register and callee-saved registers first MOVW.W R14, -4(R13) // save LR (decrement and store) @@ -38,82 +38,82 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 MOVW R8, PTR_ADDRESS(R13) // Load function pointer first (before anything can corrupt R8) - MOVW syscall15Args_fn(R8), R5 + MOVW syscallArgs_fn(R8), R5 MOVW R5, (PTR_ADDRESS-4)(R13) // save fn at offset 56 // Load floating point arguments // Each float64 spans 2 uintptr slots (8 bytes) on ARM32, so we skip by 2 - MOVD syscall15Args_f1(R8), F0 // f1+f2 -> D0 - MOVD syscall15Args_f3(R8), F1 // f3+f4 -> D1 - MOVD syscall15Args_f5(R8), F2 // f5+f6 -> D2 - MOVD syscall15Args_f7(R8), F3 // f7+f8 -> D3 - MOVD syscall15Args_f9(R8), F4 // f9+f10 -> D4 - MOVD syscall15Args_f11(R8), F5 // f11+f12 -> D5 - MOVD syscall15Args_f13(R8), F6 // f13+f14 -> D6 - MOVD syscall15Args_f15(R8), F7 // f15+f16 -> D7 + MOVD syscallArgs_f1(R8), F0 // f1+f2 -> D0 + MOVD syscallArgs_f3(R8), F1 // f3+f4 -> D1 + MOVD syscallArgs_f5(R8), F2 // f5+f6 -> D2 + MOVD syscallArgs_f7(R8), F3 // f7+f8 -> D3 + MOVD syscallArgs_f9(R8), F4 // f9+f10 -> D4 + MOVD syscallArgs_f11(R8), F5 // f11+f12 -> D5 + MOVD syscallArgs_f13(R8), F6 // f13+f14 -> D6 + MOVD syscallArgs_f15(R8), F7 // f15+f16 -> D7 // Load integer arguments into registers (R0-R3 for ARM EABI) - MOVW syscall15Args_a1(R8), R0 // a1 - MOVW syscall15Args_a2(R8), R1 // a2 - MOVW syscall15Args_a3(R8), R2 // a3 - MOVW syscall15Args_a4(R8), R3 // a4 + MOVW syscallArgs_a1(R8), R0 // a1 + MOVW syscallArgs_a2(R8), R1 // a2 + MOVW syscallArgs_a3(R8), R2 // a3 + MOVW syscallArgs_a4(R8), R3 // a4 // push a5-a32 onto stack - MOVW syscall15Args_a5(R8), R4 + MOVW syscallArgs_a5(R8), R4 MOVW R4, 0(R13) - MOVW syscall15Args_a6(R8), R4 + MOVW syscallArgs_a6(R8), R4 MOVW R4, 4(R13) - MOVW syscall15Args_a7(R8), R4 + MOVW syscallArgs_a7(R8), R4 MOVW R4, 8(R13) - MOVW syscall15Args_a8(R8), R4 + MOVW syscallArgs_a8(R8), R4 MOVW R4, 12(R13) - MOVW syscall15Args_a9(R8), R4 + MOVW syscallArgs_a9(R8), R4 MOVW R4, 16(R13) - MOVW syscall15Args_a10(R8), R4 + MOVW syscallArgs_a10(R8), R4 MOVW R4, 20(R13) - MOVW syscall15Args_a11(R8), R4 + MOVW syscallArgs_a11(R8), R4 MOVW R4, 24(R13) - MOVW syscall15Args_a12(R8), R4 + MOVW syscallArgs_a12(R8), R4 MOVW R4, 28(R13) - MOVW syscall15Args_a13(R8), R4 + MOVW syscallArgs_a13(R8), R4 MOVW R4, 32(R13) - MOVW syscall15Args_a14(R8), R4 + MOVW syscallArgs_a14(R8), R4 MOVW R4, 36(R13) - MOVW syscall15Args_a15(R8), R4 + MOVW syscallArgs_a15(R8), R4 MOVW R4, 40(R13) - MOVW syscall15Args_a16(R8), R4 + MOVW syscallArgs_a16(R8), R4 MOVW R4, 44(R13) - MOVW syscall15Args_a17(R8), R4 + MOVW syscallArgs_a17(R8), R4 MOVW R4, 48(R13) - MOVW syscall15Args_a18(R8), R4 + MOVW syscallArgs_a18(R8), R4 MOVW R4, 52(R13) - MOVW syscall15Args_a19(R8), R4 + MOVW syscallArgs_a19(R8), R4 MOVW R4, 56(R13) - MOVW syscall15Args_a20(R8), R4 + MOVW syscallArgs_a20(R8), R4 MOVW R4, 60(R13) - MOVW syscall15Args_a21(R8), R4 + MOVW syscallArgs_a21(R8), R4 MOVW R4, 64(R13) - MOVW syscall15Args_a22(R8), R4 + MOVW syscallArgs_a22(R8), R4 MOVW R4, 68(R13) - MOVW syscall15Args_a23(R8), R4 + MOVW syscallArgs_a23(R8), R4 MOVW R4, 72(R13) - MOVW syscall15Args_a24(R8), R4 + MOVW syscallArgs_a24(R8), R4 MOVW R4, 76(R13) - MOVW syscall15Args_a25(R8), R4 + MOVW syscallArgs_a25(R8), R4 MOVW R4, 80(R13) - MOVW syscall15Args_a26(R8), R4 + MOVW syscallArgs_a26(R8), R4 MOVW R4, 84(R13) - MOVW syscall15Args_a27(R8), R4 + MOVW syscallArgs_a27(R8), R4 MOVW R4, 88(R13) - MOVW syscall15Args_a28(R8), R4 + MOVW syscallArgs_a28(R8), R4 MOVW R4, 92(R13) - MOVW syscall15Args_a29(R8), R4 + MOVW syscallArgs_a29(R8), R4 MOVW R4, 96(R13) - MOVW syscall15Args_a30(R8), R4 + MOVW syscallArgs_a30(R8), R4 MOVW R4, 100(R13) - MOVW syscall15Args_a31(R8), R4 + MOVW syscallArgs_a31(R8), R4 MOVW R4, 104(R13) - MOVW syscall15Args_a32(R8), R4 + MOVW syscallArgs_a32(R8), R4 MOVW R4, 108(R13) // Load saved function pointer and call @@ -128,14 +128,14 @@ TEXT syscall15X(SB), NOSPLIT|NOFRAME, $0-0 ADD $STACK_SIZE, R13 // save R0, R1 - MOVW R0, syscall15Args_a1(R8) - MOVW R1, syscall15Args_a2(R8) + MOVW R0, syscallArgs_a1(R8) + MOVW R1, syscallArgs_a2(R8) // save f0-f3 (each float64 spans 2 uintptr slots on ARM32) - MOVD F0, syscall15Args_f1(R8) - MOVD F1, syscall15Args_f3(R8) - MOVD F2, syscall15Args_f5(R8) - MOVD F3, syscall15Args_f7(R8) + MOVD F0, syscallArgs_f1(R8) + MOVD F1, syscallArgs_f3(R8) + MOVD F2, syscallArgs_f5(R8) + MOVD F3, syscallArgs_f7(R8) // Restore callee-saved registers and return MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, R11] diff --git a/sys_arm64.s b/sys_arm64.s index 26201011..eede3526 100644 --- a/sys_arm64.s +++ b/sys_arm64.s @@ -7,11 +7,11 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 64 +#define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -33,66 +33,100 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $0 SUB $STACK_SIZE, RSP // push structure pointer MOVD R0, PTR_ADDRESS(RSP) MOVD R0, R9 - FMOVD syscall15Args_f1(R9), F0 // f1 - FMOVD syscall15Args_f2(R9), F1 // f2 - FMOVD syscall15Args_f3(R9), F2 // f3 - FMOVD syscall15Args_f4(R9), F3 // f4 - FMOVD syscall15Args_f5(R9), F4 // f5 - FMOVD syscall15Args_f6(R9), F5 // f6 - FMOVD syscall15Args_f7(R9), F6 // f7 - FMOVD syscall15Args_f8(R9), F7 // f8 + FMOVD syscallArgs_f1(R9), F0 // f1 + FMOVD syscallArgs_f2(R9), F1 // f2 + FMOVD syscallArgs_f3(R9), F2 // f3 + FMOVD syscallArgs_f4(R9), F3 // f4 + FMOVD syscallArgs_f5(R9), F4 // f5 + FMOVD syscallArgs_f6(R9), F5 // f6 + FMOVD syscallArgs_f7(R9), F6 // f7 + FMOVD syscallArgs_f8(R9), F7 // f8 - MOVD syscall15Args_a1(R9), R0 // a1 - MOVD syscall15Args_a2(R9), R1 // a2 - MOVD syscall15Args_a3(R9), R2 // a3 - MOVD syscall15Args_a4(R9), R3 // a4 - MOVD syscall15Args_a5(R9), R4 // a5 - MOVD syscall15Args_a6(R9), R5 // a6 - MOVD syscall15Args_a7(R9), R6 // a7 - MOVD syscall15Args_a8(R9), R7 // a8 - MOVD syscall15Args_arm64_r8(R9), R8 // r8 + MOVD syscallArgs_a1(R9), R0 // a1 + MOVD syscallArgs_a2(R9), R1 // a2 + MOVD syscallArgs_a3(R9), R2 // a3 + MOVD syscallArgs_a4(R9), R3 // a4 + MOVD syscallArgs_a5(R9), R4 // a5 + MOVD syscallArgs_a6(R9), R5 // a6 + MOVD syscallArgs_a7(R9), R6 // a7 + MOVD syscallArgs_a8(R9), R7 // a8 + MOVD syscallArgs_arm64_r8(R9), R8 // r8 - MOVD syscall15Args_a9(R9), R10 + MOVD syscallArgs_a9(R9), R10 MOVD R10, 0(RSP) // push a9 onto stack - MOVD syscall15Args_a10(R9), R10 + MOVD syscallArgs_a10(R9), R10 MOVD R10, 8(RSP) // push a10 onto stack - MOVD syscall15Args_a11(R9), R10 + MOVD syscallArgs_a11(R9), R10 MOVD R10, 16(RSP) // push a11 onto stack - MOVD syscall15Args_a12(R9), R10 + MOVD syscallArgs_a12(R9), R10 MOVD R10, 24(RSP) // push a12 onto stack - MOVD syscall15Args_a13(R9), R10 + MOVD syscallArgs_a13(R9), R10 MOVD R10, 32(RSP) // push a13 onto stack - MOVD syscall15Args_a14(R9), R10 + MOVD syscallArgs_a14(R9), R10 MOVD R10, 40(RSP) // push a14 onto stack - MOVD syscall15Args_a15(R9), R10 + MOVD syscallArgs_a15(R9), R10 MOVD R10, 48(RSP) // push a15 onto stack + MOVD syscallArgs_a16(R9), R10 + MOVD R10, 56(RSP) // push a16 onto stack + MOVD syscallArgs_a17(R9), R10 + MOVD R10, 64(RSP) // push a17 onto stack + MOVD syscallArgs_a18(R9), R10 + MOVD R10, 72(RSP) // push a18 onto stack + MOVD syscallArgs_a19(R9), R10 + MOVD R10, 80(RSP) // push a19 onto stack + MOVD syscallArgs_a20(R9), R10 + MOVD R10, 88(RSP) // push a20 onto stack + MOVD syscallArgs_a21(R9), R10 + MOVD R10, 96(RSP) // push a21 onto stack + MOVD syscallArgs_a22(R9), R10 + MOVD R10, 104(RSP) // push a22 onto stack + MOVD syscallArgs_a23(R9), R10 + MOVD R10, 112(RSP) // push a23 onto stack + MOVD syscallArgs_a24(R9), R10 + MOVD R10, 120(RSP) // push a24 onto stack + MOVD syscallArgs_a25(R9), R10 + MOVD R10, 128(RSP) // push a25 onto stack + MOVD syscallArgs_a26(R9), R10 + MOVD R10, 136(RSP) // push a26 onto stack + MOVD syscallArgs_a27(R9), R10 + MOVD R10, 144(RSP) // push a27 onto stack + MOVD syscallArgs_a28(R9), R10 + MOVD R10, 152(RSP) // push a28 onto stack + MOVD syscallArgs_a29(R9), R10 + MOVD R10, 160(RSP) // push a29 onto stack + MOVD syscallArgs_a30(R9), R10 + MOVD R10, 168(RSP) // push a30 onto stack + MOVD syscallArgs_a31(R9), R10 + MOVD R10, 176(RSP) // push a31 onto stack + MOVD syscallArgs_a32(R9), R10 + MOVD R10, 184(RSP) // push a32 onto stack - MOVD syscall15Args_fn(R9), R10 // fn + MOVD syscallArgs_fn(R9), R10 // fn BL (R10) MOVD PTR_ADDRESS(RSP), R2 // pop structure pointer ADD $STACK_SIZE, RSP - MOVD R0, syscall15Args_a1(R2) // save r1 - MOVD R1, syscall15Args_a2(R2) // save r3 - FMOVD F0, syscall15Args_f1(R2) // save f0 - FMOVD F1, syscall15Args_f2(R2) // save f1 - FMOVD F2, syscall15Args_f3(R2) // save f2 - FMOVD F3, syscall15Args_f4(R2) // save f3 + MOVD R0, syscallArgs_a1(R2) // save r1 + MOVD R1, syscallArgs_a2(R2) // save r3 + FMOVD F0, syscallArgs_f1(R2) // save f0 + FMOVD F1, syscallArgs_f2(R2) // save f1 + FMOVD F2, syscallArgs_f3(R2) // save f2 + FMOVD F3, syscallArgs_f4(R2) // save f3 #ifdef GOOS_darwin BL purego_error(SB) MOVD (R0), R0 - MOVD R0, syscall15Args_a3(R2) // save errno + MOVD R0, syscallArgs_a3(R2) // save errno #endif RET diff --git a/sys_loong64.s b/sys_loong64.s index 420b855c..cf4e8146 100644 --- a/sys_loong64.s +++ b/sys_loong64.s @@ -7,11 +7,11 @@ #include "go_asm.h" #include "funcdata.h" -#define STACK_SIZE 64 +#define STACK_SIZE 208 #define PTR_ADDRESS (STACK_SIZE - 8) -// syscall15X calls a function in libc on behalf of the syscall package. -// syscall15X takes a pointer to a struct like: +// syscallX calls a function in libc on behalf of the syscall package. +// syscallX takes a pointer to a struct like: // struct { // fn uintptr // a1 uintptr @@ -33,51 +33,85 @@ // r2 uintptr // err uintptr // } -// syscall15X must be called on the g0 stack with the +// syscallX must be called on the g0 stack with the // C calling convention (use libcCall). -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) +TEXT syscallX(SB), NOSPLIT, $0 // push structure pointer SUBV $STACK_SIZE, R3 MOVV R4, PTR_ADDRESS(R3) MOVV R4, R13 - MOVD syscall15Args_f1(R13), F0 // f1 - MOVD syscall15Args_f2(R13), F1 // f2 - MOVD syscall15Args_f3(R13), F2 // f3 - MOVD syscall15Args_f4(R13), F3 // f4 - MOVD syscall15Args_f5(R13), F4 // f5 - MOVD syscall15Args_f6(R13), F5 // f6 - MOVD syscall15Args_f7(R13), F6 // f7 - MOVD syscall15Args_f8(R13), F7 // f8 + MOVD syscallArgs_f1(R13), F0 // f1 + MOVD syscallArgs_f2(R13), F1 // f2 + MOVD syscallArgs_f3(R13), F2 // f3 + MOVD syscallArgs_f4(R13), F3 // f4 + MOVD syscallArgs_f5(R13), F4 // f5 + MOVD syscallArgs_f6(R13), F5 // f6 + MOVD syscallArgs_f7(R13), F6 // f7 + MOVD syscallArgs_f8(R13), F7 // f8 - MOVV syscall15Args_a1(R13), R4 // a1 - MOVV syscall15Args_a2(R13), R5 // a2 - MOVV syscall15Args_a3(R13), R6 // a3 - MOVV syscall15Args_a4(R13), R7 // a4 - MOVV syscall15Args_a5(R13), R8 // a5 - MOVV syscall15Args_a6(R13), R9 // a6 - MOVV syscall15Args_a7(R13), R10 // a7 - MOVV syscall15Args_a8(R13), R11 // a8 + MOVV syscallArgs_a1(R13), R4 // a1 + MOVV syscallArgs_a2(R13), R5 // a2 + MOVV syscallArgs_a3(R13), R6 // a3 + MOVV syscallArgs_a4(R13), R7 // a4 + MOVV syscallArgs_a5(R13), R8 // a5 + MOVV syscallArgs_a6(R13), R9 // a6 + MOVV syscallArgs_a7(R13), R10 // a7 + MOVV syscallArgs_a8(R13), R11 // a8 // push a9-a15 onto stack - MOVV syscall15Args_a9(R13), R12 + MOVV syscallArgs_a9(R13), R12 MOVV R12, 0(R3) - MOVV syscall15Args_a10(R13), R12 + MOVV syscallArgs_a10(R13), R12 MOVV R12, 8(R3) - MOVV syscall15Args_a11(R13), R12 + MOVV syscallArgs_a11(R13), R12 MOVV R12, 16(R3) - MOVV syscall15Args_a12(R13), R12 + MOVV syscallArgs_a12(R13), R12 MOVV R12, 24(R3) - MOVV syscall15Args_a13(R13), R12 + MOVV syscallArgs_a13(R13), R12 MOVV R12, 32(R3) - MOVV syscall15Args_a14(R13), R12 + MOVV syscallArgs_a14(R13), R12 MOVV R12, 40(R3) - MOVV syscall15Args_a15(R13), R12 + MOVV syscallArgs_a15(R13), R12 MOVV R12, 48(R3) + MOVV syscallArgs_a16(R13), R12 + MOVV R12, 56(R3) + MOVV syscallArgs_a17(R13), R12 + MOVV R12, 64(R3) + MOVV syscallArgs_a18(R13), R12 + MOVV R12, 72(R3) + MOVV syscallArgs_a19(R13), R12 + MOVV R12, 80(R3) + MOVV syscallArgs_a20(R13), R12 + MOVV R12, 88(R3) + MOVV syscallArgs_a21(R13), R12 + MOVV R12, 96(R3) + MOVV syscallArgs_a22(R13), R12 + MOVV R12, 104(R3) + MOVV syscallArgs_a23(R13), R12 + MOVV R12, 112(R3) + MOVV syscallArgs_a24(R13), R12 + MOVV R12, 120(R3) + MOVV syscallArgs_a25(R13), R12 + MOVV R12, 128(R3) + MOVV syscallArgs_a26(R13), R12 + MOVV R12, 136(R3) + MOVV syscallArgs_a27(R13), R12 + MOVV R12, 144(R3) + MOVV syscallArgs_a28(R13), R12 + MOVV R12, 152(R3) + MOVV syscallArgs_a29(R13), R12 + MOVV R12, 160(R3) + MOVV syscallArgs_a30(R13), R12 + MOVV R12, 168(R3) + MOVV syscallArgs_a31(R13), R12 + MOVV R12, 176(R3) + MOVV syscallArgs_a32(R13), R12 + MOVV R12, 184(R3) - MOVV syscall15Args_fn(R13), R12 + MOVV syscallArgs_fn(R13), R12 JAL (R12) // pop structure pointer @@ -85,12 +119,12 @@ TEXT syscall15X(SB), NOSPLIT, $0 ADDV $STACK_SIZE, R3 // save R4, R5 - MOVV R4, syscall15Args_a1(R13) - MOVV R5, syscall15Args_a2(R13) + MOVV R4, syscallArgs_a1(R13) + MOVV R5, syscallArgs_a2(R13) // save f0-f3 - MOVD F0, syscall15Args_f1(R13) - MOVD F1, syscall15Args_f2(R13) - MOVD F2, syscall15Args_f3(R13) - MOVD F3, syscall15Args_f4(R13) + MOVD F0, syscallArgs_f1(R13) + MOVD F1, syscallArgs_f2(R13) + MOVD F2, syscallArgs_f3(R13) + MOVD F3, syscallArgs_f4(R13) RET diff --git a/sys_ppc64le.s b/sys_ppc64le.s index 391b30a9..fc9c26ae 100644 --- a/sys_ppc64le.s +++ b/sys_ppc64le.s @@ -35,10 +35,10 @@ #define TOC_SAVE 160 #define ARGP_SAVE 168 -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +TEXT syscallX(SB), NOSPLIT, $0 // Prologue: create stack frame // R3 contains the args pointer on entry MOVD R1, R12 // save old SP @@ -51,49 +51,49 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save args pointer (in R3) MOVD R3, ARGP_SAVE(R1) - // R11 := args pointer (syscall15Args*) + // R11 := args pointer (syscallArgs*) MOVD R3, R11 // Load float args into F1-F8 - FMOVD syscall15Args_f1(R11), F1 - FMOVD syscall15Args_f2(R11), F2 - FMOVD syscall15Args_f3(R11), F3 - FMOVD syscall15Args_f4(R11), F4 - FMOVD syscall15Args_f5(R11), F5 - FMOVD syscall15Args_f6(R11), F6 - FMOVD syscall15Args_f7(R11), F7 - FMOVD syscall15Args_f8(R11), F8 + FMOVD syscallArgs_f1(R11), F1 + FMOVD syscallArgs_f2(R11), F2 + FMOVD syscallArgs_f3(R11), F3 + FMOVD syscallArgs_f4(R11), F4 + FMOVD syscallArgs_f5(R11), F5 + FMOVD syscallArgs_f6(R11), F6 + FMOVD syscallArgs_f7(R11), F7 + FMOVD syscallArgs_f8(R11), F8 // Load integer args into R3-R10 - MOVD syscall15Args_a1(R11), R3 - MOVD syscall15Args_a2(R11), R4 - MOVD syscall15Args_a3(R11), R5 - MOVD syscall15Args_a4(R11), R6 - MOVD syscall15Args_a5(R11), R7 - MOVD syscall15Args_a6(R11), R8 - MOVD syscall15Args_a7(R11), R9 - MOVD syscall15Args_a8(R11), R10 + MOVD syscallArgs_a1(R11), R3 + MOVD syscallArgs_a2(R11), R4 + MOVD syscallArgs_a3(R11), R5 + MOVD syscallArgs_a4(R11), R6 + MOVD syscallArgs_a5(R11), R7 + MOVD syscallArgs_a6(R11), R8 + MOVD syscallArgs_a7(R11), R9 + MOVD syscallArgs_a8(R11), R10 // Spill a9-a15 onto the stack (stack parameters start at 96(R1)) // Per ELFv2: parameter save area is 32-95, stack args start at 96 MOVD ARGP_SAVE(R1), R11 // reload args pointer - MOVD syscall15Args_a9(R11), R12 + MOVD syscallArgs_a9(R11), R12 MOVD R12, 96(R1) // a9 at 96(R1) - MOVD syscall15Args_a10(R11), R12 + MOVD syscallArgs_a10(R11), R12 MOVD R12, 104(R1) // a10 at 104(R1) - MOVD syscall15Args_a11(R11), R12 + MOVD syscallArgs_a11(R11), R12 MOVD R12, 112(R1) // a11 at 112(R1) - MOVD syscall15Args_a12(R11), R12 + MOVD syscallArgs_a12(R11), R12 MOVD R12, 120(R1) // a12 at 120(R1) - MOVD syscall15Args_a13(R11), R12 + MOVD syscallArgs_a13(R11), R12 MOVD R12, 128(R1) // a13 at 128(R1) - MOVD syscall15Args_a14(R11), R12 + MOVD syscallArgs_a14(R11), R12 MOVD R12, 136(R1) // a14 at 136(R1) - MOVD syscall15Args_a15(R11), R12 + MOVD syscallArgs_a15(R11), R12 MOVD R12, 144(R1) // a15 at 144(R1) // Call function: load fn and call - MOVD syscall15Args_fn(R11), R12 + MOVD syscallArgs_fn(R11), R12 MOVD R12, CTR BL (CTR) @@ -104,14 +104,14 @@ TEXT syscall15X(SB), NOSPLIT, $0 MOVD ARGP_SAVE(R1), R11 // Store integer results back (R3, R4) - MOVD R3, syscall15Args_a1(R11) - MOVD R4, syscall15Args_a2(R11) + MOVD R3, syscallArgs_a1(R11) + MOVD R4, syscallArgs_a2(R11) // Store float return values (F1-F4) - FMOVD F1, syscall15Args_f1(R11) - FMOVD F2, syscall15Args_f2(R11) - FMOVD F3, syscall15Args_f3(R11) - FMOVD F4, syscall15Args_f4(R11) + FMOVD F1, syscallArgs_f1(R11) + FMOVD F2, syscallArgs_f2(R11) + FMOVD F3, syscallArgs_f3(R11) + FMOVD F4, syscallArgs_f4(R11) // Epilogue: restore and return MOVD LR_SAVE(R1), R12 diff --git a/sys_riscv64.s b/sys_riscv64.s index e7e887e1..cdae50ae 100644 --- a/sys_riscv64.s +++ b/sys_riscv64.s @@ -8,22 +8,21 @@ #include "funcdata.h" // Stack usage: -// 0(SP) - 56(SP): stack args a9-a15 (7 * 8 bytes = 56) -// 56(SP) - 64(SP): saved RA (x1) -// 64(SP) - 72(SP): saved X9 (s1) -// 72(SP) - 80(SP): saved X18 (s2) -// 80(SP) - 88(SP): saved args pointer (original X10) -// 88(SP) - 96(SP): padding -#define STACK_SIZE 96 -#define SAVE_RA 56 -#define SAVE_X9 64 -#define SAVE_X18 72 -#define SAVE_ARGP 80 - -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) - -TEXT syscall15X(SB), NOSPLIT, $0 +// 0(SP) - 192(SP): stack args a9-a32 (24 * 8 bytes) +// 192(SP) - 200(SP): saved RA (x1) +// 200(SP) - 208(SP): saved X9 (s1) +// 208(SP) - 216(SP): saved X18 (s2) +// 216(SP) - 224(SP): saved args pointer (original X10) +#define STACK_SIZE 224 +#define SAVE_RA 192 +#define SAVE_X9 200 +#define SAVE_X18 208 +#define SAVE_ARGP 216 + +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) + +TEXT syscallX(SB), NOSPLIT, $0 // Allocate stack frame (keeps 16-byte alignment) SUB $STACK_SIZE, SP @@ -35,62 +34,96 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save original args pointer (in a0/X10) MOV X10, SAVE_ARGP(SP) - // X9 := args pointer (syscall15Args*) + // X9 := args pointer (syscallArgs*) MOV X10, X9 // Load float args into fa0-fa7 (F10-F17) - MOVD syscall15Args_f1(X9), F10 - MOVD syscall15Args_f2(X9), F11 - MOVD syscall15Args_f3(X9), F12 - MOVD syscall15Args_f4(X9), F13 - MOVD syscall15Args_f5(X9), F14 - MOVD syscall15Args_f6(X9), F15 - MOVD syscall15Args_f7(X9), F16 - MOVD syscall15Args_f8(X9), F17 + MOVD syscallArgs_f1(X9), F10 + MOVD syscallArgs_f2(X9), F11 + MOVD syscallArgs_f3(X9), F12 + MOVD syscallArgs_f4(X9), F13 + MOVD syscallArgs_f5(X9), F14 + MOVD syscallArgs_f6(X9), F15 + MOVD syscallArgs_f7(X9), F16 + MOVD syscallArgs_f8(X9), F17 // Load integer args into a0-a7 (X10-X17) - MOV syscall15Args_a1(X9), X10 - MOV syscall15Args_a2(X9), X11 - MOV syscall15Args_a3(X9), X12 - MOV syscall15Args_a4(X9), X13 - MOV syscall15Args_a5(X9), X14 - MOV syscall15Args_a6(X9), X15 - MOV syscall15Args_a7(X9), X16 - MOV syscall15Args_a8(X9), X17 - - // Spill a9-a15 onto the stack (C ABI) - MOV syscall15Args_a9(X9), X18 + MOV syscallArgs_a1(X9), X10 + MOV syscallArgs_a2(X9), X11 + MOV syscallArgs_a3(X9), X12 + MOV syscallArgs_a4(X9), X13 + MOV syscallArgs_a5(X9), X14 + MOV syscallArgs_a6(X9), X15 + MOV syscallArgs_a7(X9), X16 + MOV syscallArgs_a8(X9), X17 + + // Spill a9-a32 onto the stack (C ABI) + MOV syscallArgs_a9(X9), X18 MOV X18, 0(SP) - MOV syscall15Args_a10(X9), X18 + MOV syscallArgs_a10(X9), X18 MOV X18, 8(SP) - MOV syscall15Args_a11(X9), X18 + MOV syscallArgs_a11(X9), X18 MOV X18, 16(SP) - MOV syscall15Args_a12(X9), X18 + MOV syscallArgs_a12(X9), X18 MOV X18, 24(SP) - MOV syscall15Args_a13(X9), X18 + MOV syscallArgs_a13(X9), X18 MOV X18, 32(SP) - MOV syscall15Args_a14(X9), X18 + MOV syscallArgs_a14(X9), X18 MOV X18, 40(SP) - MOV syscall15Args_a15(X9), X18 + MOV syscallArgs_a15(X9), X18 MOV X18, 48(SP) + MOV syscallArgs_a16(X9), X18 + MOV X18, 56(SP) + MOV syscallArgs_a17(X9), X18 + MOV X18, 64(SP) + MOV syscallArgs_a18(X9), X18 + MOV X18, 72(SP) + MOV syscallArgs_a19(X9), X18 + MOV X18, 80(SP) + MOV syscallArgs_a20(X9), X18 + MOV X18, 88(SP) + MOV syscallArgs_a21(X9), X18 + MOV X18, 96(SP) + MOV syscallArgs_a22(X9), X18 + MOV X18, 104(SP) + MOV syscallArgs_a23(X9), X18 + MOV X18, 112(SP) + MOV syscallArgs_a24(X9), X18 + MOV X18, 120(SP) + MOV syscallArgs_a25(X9), X18 + MOV X18, 128(SP) + MOV syscallArgs_a26(X9), X18 + MOV X18, 136(SP) + MOV syscallArgs_a27(X9), X18 + MOV X18, 144(SP) + MOV syscallArgs_a28(X9), X18 + MOV X18, 152(SP) + MOV syscallArgs_a29(X9), X18 + MOV X18, 160(SP) + MOV syscallArgs_a30(X9), X18 + MOV X18, 168(SP) + MOV syscallArgs_a31(X9), X18 + MOV X18, 176(SP) + MOV syscallArgs_a32(X9), X18 + MOV X18, 184(SP) // Call fn // IMPORTANT: preserve RA across this call (we saved it above) - MOV syscall15Args_fn(X9), X18 + MOV syscallArgs_fn(X9), X18 CALL X18 - // Restore args pointer (syscall15Args*) for storing results + // Restore args pointer (syscallArgs*) for storing results MOV SAVE_ARGP(SP), X9 // Store results back - MOV X10, syscall15Args_a1(X9) - MOV X11, syscall15Args_a2(X9) + MOV X10, syscallArgs_a1(X9) + MOV X11, syscallArgs_a2(X9) // Store back float return regs if used by your ABI contract - MOVD F10, syscall15Args_f1(X9) - MOVD F11, syscall15Args_f2(X9) - MOVD F12, syscall15Args_f3(X9) - MOVD F13, syscall15Args_f4(X9) + MOVD F10, syscallArgs_f1(X9) + MOVD F11, syscallArgs_f2(X9) + MOVD F12, syscallArgs_f3(X9) + MOVD F13, syscallArgs_f4(X9) // Restore callee-saved regs and return address MOV SAVE_X18(SP), X18 diff --git a/sys_s390x.s b/sys_s390x.s index a044e34d..6db4f01b 100644 --- a/sys_s390x.s +++ b/sys_s390x.s @@ -24,19 +24,18 @@ // // We need space for: // - 160 bytes standard frame (with register save area) -// - Stack args a6-a15 (10 * 8 = 80 bytes) +// - Stack args a6-a32 (27 * 8 = 216 bytes) // - Saved args pointer (8 bytes) -// - Padding for alignment -// Total: 264 bytes (rounded to 8-byte alignment) +// Total: 384 bytes -#define STACK_SIZE 264 +#define STACK_SIZE 384 #define STACK_ARGS 160 -#define ARGP_SAVE 248 +#define ARGP_SAVE 376 -GLOBL ·syscall15XABI0(SB), NOPTR|RODATA, $8 -DATA ·syscall15XABI0(SB)/8, $syscall15X(SB) +GLOBL ·syscallXABI0(SB), NOPTR|RODATA, $8 +DATA ·syscallXABI0(SB)/8, $syscallX(SB) -TEXT syscall15X(SB), NOSPLIT, $0 +TEXT syscallX(SB), NOSPLIT, $0 // On entry, R2 contains the args pointer // Save callee-saved registers in caller's frame (per ABI) STMG R6, R15, 48(R15) @@ -49,61 +48,95 @@ TEXT syscall15X(SB), NOSPLIT, $0 // Save args pointer MOVD R2, ARGP_SAVE(R15) - // R9 := args pointer (syscall15Args*) + // R9 := args pointer (syscallArgs*) MOVD R2, R9 // Load float args into F0, F2, F4, F6 (s390x uses even-numbered FPRs) - FMOVD syscall15Args_f1(R9), F0 - FMOVD syscall15Args_f2(R9), F2 - FMOVD syscall15Args_f3(R9), F4 - FMOVD syscall15Args_f4(R9), F6 + FMOVD syscallArgs_f1(R9), F0 + FMOVD syscallArgs_f2(R9), F2 + FMOVD syscallArgs_f3(R9), F4 + FMOVD syscallArgs_f4(R9), F6 // Load integer args into R2-R6 (5 registers) - MOVD syscall15Args_a1(R9), R2 - MOVD syscall15Args_a2(R9), R3 - MOVD syscall15Args_a3(R9), R4 - MOVD syscall15Args_a4(R9), R5 - MOVD syscall15Args_a5(R9), R6 + MOVD syscallArgs_a1(R9), R2 + MOVD syscallArgs_a2(R9), R3 + MOVD syscallArgs_a3(R9), R4 + MOVD syscallArgs_a4(R9), R5 + MOVD syscallArgs_a5(R9), R6 - // Spill remaining args (a6-a15) onto the stack at 160(R15) + // Spill remaining args (a6-a32) onto the stack at 160(R15) MOVD ARGP_SAVE(R15), R9 // reload args pointer - MOVD syscall15Args_a6(R9), R1 + MOVD syscallArgs_a6(R9), R1 MOVD R1, (STACK_ARGS+0*8)(R15) - MOVD syscall15Args_a7(R9), R1 + MOVD syscallArgs_a7(R9), R1 MOVD R1, (STACK_ARGS+1*8)(R15) - MOVD syscall15Args_a8(R9), R1 + MOVD syscallArgs_a8(R9), R1 MOVD R1, (STACK_ARGS+2*8)(R15) - MOVD syscall15Args_a9(R9), R1 + MOVD syscallArgs_a9(R9), R1 MOVD R1, (STACK_ARGS+3*8)(R15) - MOVD syscall15Args_a10(R9), R1 + MOVD syscallArgs_a10(R9), R1 MOVD R1, (STACK_ARGS+4*8)(R15) - MOVD syscall15Args_a11(R9), R1 + MOVD syscallArgs_a11(R9), R1 MOVD R1, (STACK_ARGS+5*8)(R15) - MOVD syscall15Args_a12(R9), R1 + MOVD syscallArgs_a12(R9), R1 MOVD R1, (STACK_ARGS+6*8)(R15) - MOVD syscall15Args_a13(R9), R1 + MOVD syscallArgs_a13(R9), R1 MOVD R1, (STACK_ARGS+7*8)(R15) - MOVD syscall15Args_a14(R9), R1 + MOVD syscallArgs_a14(R9), R1 MOVD R1, (STACK_ARGS+8*8)(R15) - MOVD syscall15Args_a15(R9), R1 + MOVD syscallArgs_a15(R9), R1 MOVD R1, (STACK_ARGS+9*8)(R15) + MOVD syscallArgs_a16(R9), R1 + MOVD R1, (STACK_ARGS+10*8)(R15) + MOVD syscallArgs_a17(R9), R1 + MOVD R1, (STACK_ARGS+11*8)(R15) + MOVD syscallArgs_a18(R9), R1 + MOVD R1, (STACK_ARGS+12*8)(R15) + MOVD syscallArgs_a19(R9), R1 + MOVD R1, (STACK_ARGS+13*8)(R15) + MOVD syscallArgs_a20(R9), R1 + MOVD R1, (STACK_ARGS+14*8)(R15) + MOVD syscallArgs_a21(R9), R1 + MOVD R1, (STACK_ARGS+15*8)(R15) + MOVD syscallArgs_a22(R9), R1 + MOVD R1, (STACK_ARGS+16*8)(R15) + MOVD syscallArgs_a23(R9), R1 + MOVD R1, (STACK_ARGS+17*8)(R15) + MOVD syscallArgs_a24(R9), R1 + MOVD R1, (STACK_ARGS+18*8)(R15) + MOVD syscallArgs_a25(R9), R1 + MOVD R1, (STACK_ARGS+19*8)(R15) + MOVD syscallArgs_a26(R9), R1 + MOVD R1, (STACK_ARGS+20*8)(R15) + MOVD syscallArgs_a27(R9), R1 + MOVD R1, (STACK_ARGS+21*8)(R15) + MOVD syscallArgs_a28(R9), R1 + MOVD R1, (STACK_ARGS+22*8)(R15) + MOVD syscallArgs_a29(R9), R1 + MOVD R1, (STACK_ARGS+23*8)(R15) + MOVD syscallArgs_a30(R9), R1 + MOVD R1, (STACK_ARGS+24*8)(R15) + MOVD syscallArgs_a31(R9), R1 + MOVD R1, (STACK_ARGS+25*8)(R15) + MOVD syscallArgs_a32(R9), R1 + MOVD R1, (STACK_ARGS+26*8)(R15) // Call function - MOVD syscall15Args_fn(R9), R1 + MOVD syscallArgs_fn(R9), R1 BL (R1) // Restore args pointer for storing results MOVD ARGP_SAVE(R15), R9 // Store integer results back (R2, R3) - MOVD R2, syscall15Args_a1(R9) - MOVD R3, syscall15Args_a2(R9) + MOVD R2, syscallArgs_a1(R9) + MOVD R3, syscallArgs_a2(R9) // Store float return values (F0, F2, F4, F6) - FMOVD F0, syscall15Args_f1(R9) - FMOVD F2, syscall15Args_f2(R9) - FMOVD F4, syscall15Args_f3(R9) - FMOVD F6, syscall15Args_f4(R9) + FMOVD F0, syscallArgs_f1(R9) + FMOVD F2, syscallArgs_f2(R9) + FMOVD F4, syscallArgs_f3(R9) + FMOVD F6, syscallArgs_f4(R9) // Deallocate stack frame ADD $STACK_SIZE, R15 diff --git a/syscall.go b/syscall.go index 7b45383d..32f4a3d9 100644 --- a/syscall.go +++ b/syscall.go @@ -1,53 +1,44 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: 2022 The Ebitengine Authors -//go:build !386 && !arm && (darwin || freebsd || linux || netbsd || windows) +//go:build !386 && !arm && !ppc64le && (darwin || freebsd || linux || netbsd || windows) package purego -// CDecl marks a function as being called using the __cdecl calling convention as defined in -// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. -// This is only useful on 386 Windows, but it is safe to use on other platforms. -// -// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 -type CDecl struct{} +import ( + "runtime" + "unsafe" +) const ( - maxArgs = 15 + maxArgs = 32 ) -type syscall15Args struct { - fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr - f1, f2, f3, f4, f5, f6, f7, f8 uintptr - arm64_r8 uintptr +type syscallArgs struct { + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + arm64_r8 uintptr } -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.arm64_r8 = r8 +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], + a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], + a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], + a25: sysargs[24], a26: sysargs[25], a27: sysargs[26], a28: sysargs[27], + a29: sysargs[28], a30: sysargs[29], a31: sysargs[30], a32: sysargs[31], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s } // SyscallN takes fn, a C function pointer and a list of arguments as uintptr. @@ -76,8 +67,18 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { if len(args) > maxArgs { panic("purego: too many arguments to SyscallN") } + + // Windows uses syscall.SyscallN in syscall_windows.go. + if runtime.GOOS == "windows" { + return syscall_syscallN(fn, args...) + } + // add padding so there is no out-of-bounds slicing var tmp [maxArgs]uintptr copy(tmp[:], args) - return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) + var floats [maxArgs]uintptr + copy(floats[:], tmp[:]) + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 } diff --git a/syscall_32bit.go b/syscall_32bit.go index f9f37630..c0641110 100644 --- a/syscall_32bit.go +++ b/syscall_32bit.go @@ -5,75 +5,42 @@ package purego -// CDecl marks a function as being called using the __cdecl calling convention as defined in -// the [MSDocs] when passed to NewCallback. It must be the first argument to the function. -// This is only useful on 386 Windows, but it is safe to use on other platforms. -// -// [MSDocs]: https://learn.microsoft.com/en-us/cpp/cpp/cdecl?view=msvc-170 -type CDecl struct{} +import ( + "runtime" + "unsafe" +) const ( maxArgs = 32 ) -type syscall15Args struct { +type syscallArgs struct { fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32 uintptr f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16 uintptr arm64_r8 uintptr } -func (s *syscall15Args) Set(fn uintptr, ints []uintptr, floats []uintptr, r8 uintptr) { - s.fn = fn - s.a1 = ints[0] - s.a2 = ints[1] - s.a3 = ints[2] - s.a4 = ints[3] - s.a5 = ints[4] - s.a6 = ints[5] - s.a7 = ints[6] - s.a8 = ints[7] - s.a9 = ints[8] - s.a10 = ints[9] - s.a11 = ints[10] - s.a12 = ints[11] - s.a13 = ints[12] - s.a14 = ints[13] - s.a15 = ints[14] - s.a16 = ints[15] - s.a17 = ints[16] - s.a18 = ints[17] - s.a19 = ints[18] - s.a20 = ints[19] - s.a21 = ints[20] - s.a22 = ints[21] - s.a23 = ints[22] - s.a24 = ints[23] - s.a25 = ints[24] - s.a26 = ints[25] - s.a27 = ints[26] - s.a28 = ints[27] - s.a29 = ints[28] - s.a30 = ints[29] - s.a31 = ints[30] - s.a32 = ints[31] - s.f1 = floats[0] - s.f2 = floats[1] - s.f3 = floats[2] - s.f4 = floats[3] - s.f5 = floats[4] - s.f6 = floats[5] - s.f7 = floats[6] - s.f8 = floats[7] - s.f9 = floats[8] - s.f10 = floats[9] - s.f11 = floats[10] - s.f12 = floats[11] - s.f13 = floats[12] - s.f14 = floats[13] - s.f15 = floats[14] - s.f16 = floats[15] - s.arm64_r8 = r8 +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], a16: sysargs[15], + a17: sysargs[16], a18: sysargs[17], a19: sysargs[18], a20: sysargs[19], + a21: sysargs[20], a22: sysargs[21], a23: sysargs[22], a24: sysargs[23], + a25: sysargs[24], a26: sysargs[25], a27: sysargs[26], a28: sysargs[27], + a29: sysargs[28], a30: sysargs[29], a31: sysargs[30], a32: sysargs[31], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + f9: floats[8], f10: floats[9], f11: floats[10], f12: floats[11], + f13: floats[12], f14: floats[13], f15: floats[14], f16: floats[15], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s } // SyscallN takes fn, a C function pointer and a list of arguments as uintptr. @@ -102,8 +69,18 @@ func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { if len(args) > maxArgs { panic("purego: too many arguments to SyscallN") } - // add padding so there is no out-of-bounds slicing + + // Windows uses syscall.SyscallN in syscall_windows.go. + if runtime.GOOS == "windows" { + return syscall_syscallN(fn, args...) + } + + // Add padding so there is no out-of-bounds slicing. var tmp [maxArgs]uintptr copy(tmp[:], args) - return syscall_syscall15X(fn, tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], tmp[8], tmp[9], tmp[10], tmp[11], tmp[12], tmp[13], tmp[14]) + var floats [16]uintptr + copy(floats[:], tmp[:16]) + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 } diff --git a/syscall_cgo_linux.go b/syscall_cgo_linux.go index 179167f4..24e0eb89 100644 --- a/syscall_cgo_linux.go +++ b/syscall_cgo_linux.go @@ -9,12 +9,7 @@ import ( "github.com/ebitengine/purego/internal/cgo" ) -var syscall15XABI0 = uintptr(cgo.Syscall15XABI0) - -//go:nosplit -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - return cgo.Syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) -} +var syscallXABI0 = uintptr(cgo.SyscallXABI0) func NewCallback(_ any) uintptr { panic("purego: NewCallback on Linux is only supported on 386/amd64/arm64/arm/loong64/ppc64le/riscv64/s390x") diff --git a/syscall_notstackargs.go b/syscall_notstackargs.go index 76d1f5a7..8acb7ddb 100644 --- a/syscall_notstackargs.go +++ b/syscall_notstackargs.go @@ -35,3 +35,7 @@ type callbackArgs struct { func (c *callbackArgs) stackFrame() unsafe.Pointer { return nil } + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_ppc64le.go b/syscall_ppc64le.go new file mode 100644 index 00000000..a133be3a --- /dev/null +++ b/syscall_ppc64le.go @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2022 The Ebitengine Authors + +//go:build linux + +package purego + +import ( + "unsafe" +) + +const ( + maxArgs = 15 +) + +type syscallArgs struct { + fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr + f1, f2, f3, f4, f5, f6, f7, f8 uintptr + arm64_r8 uintptr +} + +func syscall_SyscallN(fn uintptr, sysargs []uintptr, floats []uintptr, r8 uintptr) *syscallArgs { + s := thePool.Get().(*syscallArgs) + *s = syscallArgs{ + fn: fn, + a1: sysargs[0], a2: sysargs[1], a3: sysargs[2], a4: sysargs[3], + a5: sysargs[4], a6: sysargs[5], a7: sysargs[6], a8: sysargs[7], + a9: sysargs[8], a10: sysargs[9], a11: sysargs[10], a12: sysargs[11], + a13: sysargs[12], a14: sysargs[13], a15: sysargs[14], + f1: floats[0], f2: floats[1], f3: floats[2], f4: floats[3], + f5: floats[4], f6: floats[5], f7: floats[6], f8: floats[7], + arm64_r8: r8, + } + runtime_cgocall(syscallXABI0, unsafe.Pointer(s)) + return s +} + +// SyscallN takes fn, a C function pointer and a list of arguments as uintptr. +// There is an internal maximum number of arguments that SyscallN can take. It panics +// when the maximum is exceeded. It returns the result and the libc error code if there is one. +// +// In order to call this function properly make sure to follow all the rules specified in [unsafe.Pointer] +// especially point 4. +// +// NOTE: SyscallN does not properly call functions that have both integer and float parameters. +// See discussion comment https://github.com/ebiten/purego/pull/1#issuecomment-1128057607 +// for an explanation of why that is. +// +// On amd64, if there are more than 8 floats the 9th and so on will be placed incorrectly on the +// stack. +// +// The pragma go:nosplit is not needed at this function declaration because it uses go:uintptrescapes +// which forces all the objects that the uintptrs point to onto the heap where a stack split won't affect +// their memory location. +// +//go:uintptrescapes +func SyscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + if fn == 0 { + panic("purego: fn is nil") + } + if len(args) > maxArgs { + panic("purego: too many arguments to SyscallN") + } + + var tmp [maxArgs]uintptr + copy(tmp[:], args) + var floats [maxArgs]uintptr + copy(floats[:], tmp[:]) + s := syscall_SyscallN(fn, tmp[:], floats[:], 0) + defer thePool.Put(s) + return s.a1, s.a2, s.a3 +} diff --git a/syscall_stackargs.go b/syscall_stackargs.go deleted file mode 100644 index f7e46790..00000000 --- a/syscall_stackargs.go +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2026 The Ebitengine Authors - -//go:build ppc64le || s390x - -package purego - -import "unsafe" - -// callbackArgs is the argument block passed from the assembly trampoline -// to callbackWrap when C code calls a Go callback registered with NewCallback. -// The assembly fills in the fields before calling callbackWrap, which uses -// them to determine which Go function to invoke and where to read its -// arguments from, and writes the return value back into result. -// -// callbackArgs is only used on Unix. On Windows, callbacks are handled by -// the runtime's own callback mechanism, so this type is compiled but unused, -// serving only as a stub to satisfy cross-platform compilation. -type callbackArgs struct { - index uintptr - // args points to the argument block. - // - // The structure of the arguments goes - // float registers followed by the - // integer registers followed by the stack. - // - // This variable is treated as a contiguous - // block of memory containing all of the arguments - // for this callback. - args unsafe.Pointer - // Below are out-args from callbackWrap - result [1]uintptr - // stackArgs points to stack-passed arguments for architectures where - // they can't be made contiguous with register args (e.g., ppc64le). - // On other architectures, this is nil and stack args are read from - // the end of the args block. - stackArgs unsafe.Pointer -} - -func (c *callbackArgs) stackFrame() unsafe.Pointer { - return c.stackArgs -} diff --git a/syscall_stackargs_ppc64le.go b/syscall_stackargs_ppc64le.go new file mode 100644 index 00000000..9eb41315 --- /dev/null +++ b/syscall_stackargs_ppc64le.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +package purego + +import "unsafe" + +// callbackArgs is the argument block passed from the assembly trampoline +// to callbackWrap when C code calls a Go callback registered with NewCallback. +type callbackArgs struct { + index uintptr + // args points to the argument block. + // + // The structure of the arguments goes + // float registers followed by the + // integer registers. + args unsafe.Pointer + // Below are out-args from callbackWrap. + result [1]uintptr + // stackArgs points to stack-passed arguments. + stackArgs unsafe.Pointer +} + +func (c *callbackArgs) stackFrame() unsafe.Pointer { + return c.stackArgs +} + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_stackargs_s390x.go b/syscall_stackargs_s390x.go new file mode 100644 index 00000000..399985a5 --- /dev/null +++ b/syscall_stackargs_s390x.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2026 The Ebitengine Authors + +package purego + +import "unsafe" + +// callbackArgs is the argument block passed from the assembly trampoline +// to callbackWrap when C code calls a Go callback registered with NewCallback. +type callbackArgs struct { + index uintptr + // args points to the argument block. + // + // The structure of the arguments goes + // float registers followed by the + // integer registers followed by the stack. + args unsafe.Pointer + // Below are out-args from callbackWrap. + result [1]uintptr + // stackArgs points to stack-passed arguments. + stackArgs unsafe.Pointer +} + +func (c *callbackArgs) stackFrame() unsafe.Pointer { + return c.stackArgs +} + +func (c *callbackArgs) intFrame() unsafe.Pointer { + return nil +} diff --git a/syscall_unix.go b/syscall_unix.go index cee86887..87030f8c 100644 --- a/syscall_unix.go +++ b/syscall_unix.go @@ -13,21 +13,10 @@ import ( "unsafe" ) -var syscall15XABI0 uintptr +var syscallXABI0 uintptr -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - args := thePool.Get().(*syscall15Args) - defer thePool.Put(args) - - *args = syscall15Args{ - fn: fn, - a1: a1, a2: a2, a3: a3, a4: a4, a5: a5, a6: a6, a7: a7, a8: a8, - a9: a9, a10: a10, a11: a11, a12: a12, a13: a13, a14: a14, a15: a15, - f1: a1, f2: a2, f3: a3, f4: a4, f5: a5, f6: a6, f7: a7, f8: a8, - } - - runtime_cgocall(syscall15XABI0, unsafe.Pointer(args)) - return args.a1, args.a2, args.a3 +func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + panic("purego: syscall_syscallN is only supported on windows") } // NewCallback converts a Go function to a function pointer conforming to the C calling convention. @@ -136,10 +125,14 @@ func callbackWrap(a *callbackArgs) { // stackFrame points to stack-passed arguments. On most architectures this is // contiguous with frame (after register args), but on ppc64le it's separate. var stackFrame *[callbackMaxFrame]uintptr + var intFrame *[callbackMaxFrame]uintptr if sf := a.stackFrame(); sf != nil { // Only ppc64le uses separate stackArgs pointer due to NOSPLIT constraints stackFrame = (*[callbackMaxFrame]uintptr)(sf) } + if intf := a.intFrame(); intf != nil { + intFrame = (*[callbackMaxFrame]uintptr)(intf) + } // floatsN and intsN track the number of register slots used, not argument count. // This distinction matters on ARM32 where float64 uses 2 slots (32-bit registers). var floatsN int @@ -147,7 +140,7 @@ func callbackWrap(a *callbackArgs) { // On amd64/loong64/ppc64le/riscv64/s390x, when returning a struct larger than // maxRegAllocStructSize, the caller passes a hidden pointer in the first integer // register. Skip it to avoid misreading it as the first function argument. - if (runtime.GOARCH == "amd64" || runtime.GOARCH == "loong64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") && + if (runtime.GOARCH == "amd64" || runtime.GOARCH == "loong64" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x") && fnType.NumOut() == 1 && fnType.Out(0).Kind() == reflect.Struct && fnType.Out(0).Size() > maxRegAllocStructSize { intsN = 1 @@ -227,13 +220,17 @@ func callbackWrap(a *callbackArgs) { stackSlot += slots } } else { - // the integers begin after the floats in frame - pos := intsN + numOfFloatRegisters() - if runtime.GOARCH == "s390x" { - // s390x big-endian: sub-8-byte values are right-justified in GPR slot - args[i] = callbackArgFromSlotBigEndian(unsafe.Pointer(&frame[pos]), inType) + if intFrame != nil { + args[i] = reflect.NewAt(inType, unsafe.Pointer(&intFrame[intsN])).Elem() } else { - args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[pos])).Elem() + // the integers begin after the floats in frame + pos := intsN + numOfFloatRegisters() + if runtime.GOARCH == "s390x" { + // s390x big-endian: sub-8-byte values are right-justified in GPR slot + args[i] = callbackArgFromSlotBigEndian(unsafe.Pointer(&frame[pos]), inType) + } else { + args[i] = reflect.NewAt(inType, unsafe.Pointer(&frame[pos])).Elem() + } } } intsN += slots diff --git a/syscall_windows.go b/syscall_windows.go index 9e3f8923..1e2ba8cb 100644 --- a/syscall_windows.go +++ b/syscall_windows.go @@ -9,10 +9,10 @@ import ( "unsafe" ) -var syscall15XABI0 uintptr +var syscallXABI0 uintptr -func syscall_syscall15X(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { - r1, r2, errno := syscall.Syscall15(fn, 15, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15) +func syscall_syscallN(fn uintptr, args ...uintptr) (r1, r2, err uintptr) { + r1, r2, errno := syscall.SyscallN(fn, args...) return r1, r2, uintptr(errno) } diff --git a/testdata/abitest/abi_test.c b/testdata/abitest/abi_test.c index 446d9e99..824e453d 100644 --- a/testdata/abitest/abi_test.c +++ b/testdata/abitest/abi_test.c @@ -129,3 +129,41 @@ void stack_25_int64_exceeds(char *buf, size_t bufsize, int64_t a1, int64_t a2, i snprintf(buf, bufsize, "%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64 ":%" PRId64, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25); } + +uintptr_t stack_20_uintptr( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, + uintptr_t a6, uintptr_t a7, uintptr_t a8, uintptr_t a9, uintptr_t a10, + uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, + uintptr_t a16, uintptr_t a17, uintptr_t a18, uintptr_t a19, uintptr_t a20 +) { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + + a11 + a12 + a13 + a14 + a15 + a16 + a17 + a18 + a19 + a20; +} + +uintptr_t stack_32_uintptr( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, + uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, + uintptr_t a17, uintptr_t a18, uintptr_t a19, uintptr_t a20, uintptr_t a21, uintptr_t a22, uintptr_t a23, uintptr_t a24, + uintptr_t a25, uintptr_t a26, uintptr_t a27, uintptr_t a28, uintptr_t a29, uintptr_t a30, uintptr_t a31, uintptr_t a32 +) { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + + a9 + a10 + a11 + a12 + a13 + a14 + a15 + a16 + + a17 + a18 + a19 + a20 + a21 + a22 + a23 + a24 + + a25 + a26 + a27 + a28 + a29 + a30 + a31 + a32; +} + +double stack_32_mixed_int_float( + uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8, + uintptr_t a9, uintptr_t a10, uintptr_t a11, uintptr_t a12, uintptr_t a13, uintptr_t a14, uintptr_t a15, uintptr_t a16, + double f1, double f2, double f3, double f4, double f5, double f6, double f7, double f8, + double f9, double f10, double f11, double f12, double f13, double f14, double f15, double f16 +) { + return (double)a1 * 1 + (double)a2 * 2 + (double)a3 * 3 + (double)a4 * 4 + + (double)a5 * 5 + (double)a6 * 6 + (double)a7 * 7 + (double)a8 * 8 + + (double)a9 * 9 + (double)a10 * 10 + (double)a11 * 11 + (double)a12 * 12 + + (double)a13 * 13 + (double)a14 * 14 + (double)a15 * 15 + (double)a16 * 16 + + f1 * 17 + f2 * 18 + f3 * 19 + f4 * 20 + + f5 * 21 + f6 * 22 + f7 * 23 + f8 * 24 + + f9 * 25 + f10 * 26 + f11 * 27 + f12 * 28 + + f13 * 29 + f14 * 30 + f15 * 31 + f16 * 32; +}