diff --git a/arch/linx/Makefile b/arch/linx/Makefile index 5ff10e8cf20675..dae79ea923e0c5 100644 --- a/arch/linx/Makefile +++ b/arch/linx/Makefile @@ -18,11 +18,18 @@ export BITS ifeq ($(CONFIG_ARCH_RV64I),y) BITS := 64 UTS_MACHINE := linx64 - - KBUILD_LDFLAGS += -melf64llinxv5 else BITS := 32 UTS_MACHINE := linx32 +endif + +ifeq ($(CONFIG_LD_IS_LLD),y) + # The in-tree Linx LLD backend links ELF Linx objects correctly but does + # not accept GNU emulation strings like -melf64llinxv5. Let it infer the + # target from the input objects. +else ifeq ($(CONFIG_ARCH_RV64I),y) + KBUILD_LDFLAGS += -melf64llinxv5 +else KBUILD_LDFLAGS += -melf32llinxv5 endif @@ -41,6 +48,11 @@ linx-march-$(CONFIG_RISCV_ISA_C) := $(linx-march-y)c KBUILD_CFLAGS += -march=$(subst fd,,$(linx-march-y)) KBUILD_AFLAGS += -march=$(linx-march-y) +# Current Linx backend bring-up is not stable enough for large generic kernel +# loop/SLP vectorization. Disable those passes globally so unrelated drivers do +# not block serial-boot closure with backend crashes. +KBUILD_CFLAGS += -fno-vectorize -fno-slp-vectorize + KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) ifeq ($(CONFIG_PERF_EVENTS),y) KBUILD_CFLAGS += -fno-omit-frame-pointer @@ -72,8 +84,6 @@ else KBUILD_IMAGE := $(boot)/Image.gz endif -head-y := arch/linx/kernel/head.o - libs-y += arch/linx/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/linx/configs/linx_v150_defconfig b/arch/linx/configs/linx_v150_defconfig index 43d223eb303617..0a4ba2cc938c13 100644 --- a/arch/linx/configs/linx_v150_defconfig +++ b/arch/linx/configs/linx_v150_defconfig @@ -262,8 +262,7 @@ CONFIG_SOC_VIRT=y CONFIG_ARCH_RV64I=y # CONFIG_CMODEL_MEDLOW is not set CONFIG_CMODEL_MEDANY=y -CONFIG_SMP=y -CONFIG_NR_CPUS=8 +# CONFIG_SMP is not set # CONFIG_HOTPLUG_CPU is not set CONFIG_TUNE_GENERIC=y # CONFIG_NUMA is not set @@ -2081,7 +2080,7 @@ CONFIG_MFD_SYSCON=y # CONFIG_VGA_ARB=y CONFIG_VGA_ARB_MAX_GPUS=16 -CONFIG_DRM=y +# CONFIG_DRM is not set # CONFIG_DRM_DP_AUX_CHARDEV is not set # CONFIG_DRM_DEBUG_MM is not set # CONFIG_DRM_DEBUG_SELFTEST is not set @@ -2097,8 +2096,8 @@ CONFIG_DRM_GEM_SHMEM_HELPER=y # # I2C encoder or helper chips # -CONFIG_DRM_I2C_CH7006=y -CONFIG_DRM_I2C_SIL164=y +# CONFIG_DRM_I2C_CH7006 is not set +# CONFIG_DRM_I2C_SIL164 is not set # CONFIG_DRM_I2C_NXP_TDA998X is not set # CONFIG_DRM_I2C_NXP_TDA9950 is not set # end of I2C encoder or helper chips @@ -2109,26 +2108,11 @@ CONFIG_DRM_I2C_SIL164=y # CONFIG_DRM_KOMEDA is not set # end of ARM devices -CONFIG_DRM_RADEON=y -# CONFIG_DRM_RADEON_USERPTR is not set +# CONFIG_DRM_RADEON is not set # CONFIG_DRM_AMDGPU is not set -CONFIG_DRM_NOUVEAU=y -CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT=y -CONFIG_NOUVEAU_DEBUG=5 -CONFIG_NOUVEAU_DEBUG_DEFAULT=3 -# CONFIG_NOUVEAU_DEBUG_MMU is not set -# CONFIG_NOUVEAU_DEBUG_PUSH is not set -CONFIG_DRM_NOUVEAU_BACKLIGHT=y -# CONFIG_DRM_VGEM is not set -# CONFIG_DRM_VKMS is not set -# CONFIG_DRM_UDL is not set -# CONFIG_DRM_AST is not set -# CONFIG_DRM_MGAG200 is not set -# CONFIG_DRM_RCAR_DW_HDMI is not set -# CONFIG_DRM_RCAR_LVDS is not set -# CONFIG_DRM_QXL is not set -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_DRM_PANEL=y +# CONFIG_DRM_NOUVEAU is not set +# CONFIG_DRM_VIRTIO_GPU is not set +# CONFIG_DRM_PANEL is not set # # Display Panels diff --git a/arch/linx/include/asm/block-def.h b/arch/linx/include/asm/block-def.h index 6ea512c64c3801..eef309dbc50367 100644 --- a/arch/linx/include/asm/block-def.h +++ b/arch/linx/include/asm/block-def.h @@ -293,7 +293,7 @@ .macro block_next_ind symbol bstart.std ind 1: addtpc %tpcrel_hi(\symbol), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(\symbol), -> t setc.tgt t#1 bstop .endm @@ -304,7 +304,7 @@ .macro block_next_indcall symbol bstart.std icall 1: addtpc %tpcrel_hi(\symbol), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(\symbol), -> t setc.tgt t#1 setret 2f, -> ra bstop diff --git a/arch/linx/include/asm/cmpxchg.h b/arch/linx/include/asm/cmpxchg.h index 2f3758fb9a5ac5..a09ac0457396ae 100644 --- a/arch/linx/include/asm/cmpxchg.h +++ b/arch/linx/include/asm/cmpxchg.h @@ -160,11 +160,17 @@ * optional instruction atomic.order attribute, e.g. .rl * @ret, @new, @old, @ptr: * ret = *ptr, if (old == ret) *ptr = new, return @ret + * + * Current Linx LLVM inline-asm lowering rotates these named operands when + * materializing the final textual instruction, so the source string must be + * spelled as [%[n]], %[p], %[o] to emit the intended machine-order + * "[ptr], old, new". Keep this workaround local to the asm string until the + * backend contract is repaired. */ #define ASM_CMPXCHG(type, order, ret, new, old, ptr) \ __asm__ __volatile__ ( \ "BSTART.sys fall\n" \ - "hl.cas" #type #order " [%[p]], %[o], %[n], -> %[r]\n" \ + "hl.cas" #type #order " [%[n]], %[p], %[o], -> %[r]\n" \ : [r] "=&r" (ret) \ : [p] "r" (ptr), [o] "r" (old), [n] "r" (new) \ : "memory") diff --git a/arch/linx/include/asm/futex.h b/arch/linx/include/asm/futex.h index 79e6ddb323f270..f32ea3ad4c720f 100644 --- a/arch/linx/include/asm/futex.h +++ b/arch/linx/include/asm/futex.h @@ -72,12 +72,12 @@ "b.attr aqrl \n" \ "" insn " \n" \ "2: \n" \ - ".section .fixup,\"ax\" \n" \ + "BSTART.std direct, 4f \n" \ ".balign 16 \n" \ "3: \n" \ "BSTART.std direct, 2b \n" \ "subi zero, %[e], -> %[r] \n" \ - ".previous \n" \ + "4: \n" \ : [r] "+r" (ret), [ov] "=&r" (oldval) \ : [u] "r" (uaddr), [op] "r" (oparg), \ [e] "i" (EFAULT) \ diff --git a/arch/linx/include/asm/irqflags.h b/arch/linx/include/asm/irqflags.h index 60868b89257733..cd883e5640e601 100644 --- a/arch/linx/include/asm/irqflags.h +++ b/arch/linx/include/asm/irqflags.h @@ -49,7 +49,11 @@ static inline int arch_irqs_disabled(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - ssr_set(SSR_CSTATE, flags & CSTATE_I); + unsigned long cstate = ssr_read(SSR_CSTATE); + + cstate &= ~CSTATE_I; + cstate |= flags & CSTATE_I; + ssr_write(SSR_CSTATE, cstate); } #endif /* _ASM_LINX_IRQFLAGS_H */ diff --git a/arch/linx/include/asm/pgtable-64.h b/arch/linx/include/asm/pgtable-64.h index 8bb4fdf0d1e117..7b6b74843eb8ab 100644 --- a/arch/linx/include/asm/pgtable-64.h +++ b/arch/linx/include/asm/pgtable-64.h @@ -47,6 +47,21 @@ typedef struct { #define __p4d(x) ((p4d_t) { (x) }) #define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t)) +#if defined(__LINX__) +#define LINX_BOOT_PT_LOW_ALIAS_LIMIT _AC(64, UL) << 20 + +static inline bool linx_is_low_boot_pt_phys(unsigned long pa) +{ + /* + * Linx bring-up allocates early page-table pages from a bounded low + * physical window. Treat that entire window as directly aliasable so + * early page-table walks do not depend on relocation-sensitive global + * state to decide between low alias and __va(). + */ + return pa < LINX_BOOT_PT_LOW_ALIAS_LIMIT; +} +#endif + /* Page Upper Directory entry */ typedef struct { unsigned long pud; @@ -109,7 +124,14 @@ static inline unsigned long _pud_pfn(pud_t pud) static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); + unsigned long pa = pfn_to_phys(_pud_pfn(pud)); + +#if defined(__LINX__) + if (linx_is_low_boot_pt_phys(pa)) + return (pmd_t *)(uintptr_t)pa; +#endif + + return (pmd_t *)__va(pa); } static inline struct page *pud_page(pud_t pud) @@ -208,8 +230,16 @@ static inline unsigned long _p4d_pfn(p4d_t p4d) static inline pud_t *p4d_pgtable(p4d_t p4d) { - if (pgtable_l4_enabled) - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + if (pgtable_l4_enabled) { + unsigned long pa = pfn_to_phys(_p4d_pfn(p4d)); + +#if defined(__LINX__) + if (linx_is_low_boot_pt_phys(pa)) + return (pud_t *)(uintptr_t)pa; +#endif + + return (pud_t *)__va(pa); + } return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } @@ -272,8 +302,16 @@ static inline void pgd_clear(pgd_t *pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd) { - if (pgtable_l5_enabled) - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + if (pgtable_l5_enabled) { + unsigned long pa = pfn_to_phys(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + +#if defined(__LINX__) + if (linx_is_low_boot_pt_phys(pa)) + return (p4d_t *)(uintptr_t)pa; +#endif + + return (p4d_t *)__va(pa); + } return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } diff --git a/arch/linx/include/asm/processor.h b/arch/linx/include/asm/processor.h index 3444ead92455aa..ce831613960e83 100644 --- a/arch/linx/include/asm/processor.h +++ b/arch/linx/include/asm/processor.h @@ -33,6 +33,12 @@ struct thread_struct { unsigned long ra; unsigned long sp; /* Kernel mode stack */ unsigned long s[9]; /* s[0]: frame pointer */ + /* + * ACR1 EBSTATE switch window. + * Save the full 0x1f40..0x1f5f register range so schedule()/__switch_to() + * can round-trip in-flight block state without needing per-field policy. + */ + unsigned long ebstate[32]; struct __riscv_d_ext_state fstate; unsigned long bad_cause; }; diff --git a/arch/linx/include/asm/ssr.h b/arch/linx/include/asm/ssr.h index 075373b5e397fd..34cc66a069fc7e 100644 --- a/arch/linx/include/asm/ssr.h +++ b/arch/linx/include/asm/ssr.h @@ -185,14 +185,33 @@ #define SSR_A0_XBINFO 0x0f30 /* acr0's XB base register */ #define SSR_A0_ACR_PARAM 0x0f31 /* acr0's LxLc argument register */ -#define SSR_A0_ELPR0 0x0f40 /* acr0's exception t1 */ -#define SSR_A0_ELPR1 0x0f41 /* acr0's exception t2 */ -#define SSR_A0_ELPR2 0x0f42 /* acr0's exception t3 */ -#define SSR_A0_ELPR3 0x0f43 /* acr0's exception t4 */ -#define SSR_A0_ELPR4 0x0f44 /* acr0's exception u1 */ -#define SSR_A0_ELPR5 0x0f45 /* acr0's exception u2 */ -#define SSR_A0_ELPR6 0x0f46 /* acr0's exception u3 */ -#define SSR_A0_ELPR7 0x0f47 /* acr0's exception u4 */ +#define SSR_A0_EBARG0 0x0f40 /* acr0 packed EBARG control word */ +#define SSR_A0_EBARG_BPC_CUR 0x0f41 /* acr0 current block start PC */ +#define SSR_A0_EBARG_BPC_TGT 0x0f42 /* acr0 next-block target PC */ +#define SSR_A0_EBARG_TPC 0x0f43 /* acr0 body resume PC */ +#define SSR_A0_EBARG_LRA 0x0f44 /* acr0 local return address */ +#define SSR_A0_EBARG_TQ0 0x0f45 /* acr0 T-hand queue entry 0 */ +#define SSR_A0_EBARG_TQ1 0x0f46 /* acr0 T-hand queue entry 1 */ +#define SSR_A0_EBARG_TQ2 0x0f47 /* acr0 T-hand queue entry 2 */ +#define SSR_A0_EBARG_TQ3 0x0f48 /* acr0 T-hand queue entry 3 */ +#define SSR_A0_EBARG_UQ0 0x0f49 /* acr0 U-hand queue entry 0 */ +#define SSR_A0_EBARG_UQ1 0x0f4a /* acr0 U-hand queue entry 1 */ +#define SSR_A0_EBARG_UQ2 0x0f4b /* acr0 U-hand queue entry 2 */ +#define SSR_A0_EBARG_UQ3 0x0f4c /* acr0 U-hand queue entry 3 */ +#define SSR_A0_EBARG_LB 0x0f4d /* acr0 packed LB0..LB2 */ +#define SSR_A0_EBARG_LC 0x0f4e /* acr0 packed LC0..LC2 */ +#define SSR_A0_EBARG_EXTCTX_PTR 0x0f4f /* acr0 extended context pointer */ +#define SSR_A0_EBARG_EXTCTX_META 0x0f50 /* acr0 extended context metadata */ +#define SSR_A0_EBARG_TPLFLAGS 0x0f51 /* acr0 template replay flags */ +/* Legacy aliases retained for bring-up compatibility. */ +#define SSR_A0_ELPR0 SSR_A0_EBARG0 +#define SSR_A0_ELPR1 SSR_A0_EBARG_BPC_CUR +#define SSR_A0_ELPR2 SSR_A0_EBARG_BPC_TGT +#define SSR_A0_ELPR3 SSR_A0_EBARG_TPC +#define SSR_A0_ELPR4 SSR_A0_EBARG_LRA +#define SSR_A0_ELPR5 SSR_A0_EBARG_TQ0 +#define SSR_A0_ELPR6 SSR_A0_EBARG_TQ1 +#define SSR_A0_ELPR7 SSR_A0_EBARG_TQ2 #define SSR_ACR_SIZE 0x1000 @@ -219,16 +238,50 @@ #define SSR_A1_TIMER_TIMECMP 0x1f21 /* acr1's timer configuration register */ #define SSR_A1_XBINFO 0x1f30 /* acr0's XB base register */ -#define SSR_A1_ACR_PARAM 0x1f30 /* acr0's LxLc argument register */ - -#define SSR_A1_ELPR0 0x1f40 /* acr1's exception t1 */ -#define SSR_A1_ELPR1 0x1f41 /* acr1's exception t2 */ -#define SSR_A1_ELPR2 0x1f42 /* acr1's exception t3 */ -#define SSR_A1_ELPR3 0x1f43 /* acr1's exception t4 */ -#define SSR_A1_ELPR4 0x1f44 /* acr1's exception u1 */ -#define SSR_A1_ELPR5 0x1f45 /* acr1's exception u2 */ -#define SSR_A1_ELPR6 0x1f46 /* acr1's exception u3 */ -#define SSR_A1_ELPR7 0x1f47 /* acr1's exception u4 */ +#define SSR_A1_ACR_PARAM 0x1f31 /* acr1's LxLc argument register */ + +#define SSR_A1_EBARG0 0x1f40 /* acr1 packed EBARG control word */ +#define SSR_A1_EBARG_BPC_CUR 0x1f41 /* acr1 current block start PC */ +#define SSR_A1_EBARG_BPC_TGT 0x1f42 /* acr1 next-block target PC */ +#define SSR_A1_EBARG_TPC 0x1f43 /* acr1 body resume PC */ +#define SSR_A1_EBARG_LRA 0x1f44 /* acr1 local return address */ +#define SSR_A1_EBARG_TQ0 0x1f45 /* acr1 T-hand queue entry 0 */ +#define SSR_A1_EBARG_TQ1 0x1f46 /* acr1 T-hand queue entry 1 */ +#define SSR_A1_EBARG_TQ2 0x1f47 /* acr1 T-hand queue entry 2 */ +#define SSR_A1_EBARG_TQ3 0x1f48 /* acr1 T-hand queue entry 3 */ +#define SSR_A1_EBARG_UQ0 0x1f49 /* acr1 U-hand queue entry 0 */ +#define SSR_A1_EBARG_UQ1 0x1f4a /* acr1 U-hand queue entry 1 */ +#define SSR_A1_EBARG_UQ2 0x1f4b /* acr1 U-hand queue entry 2 */ +#define SSR_A1_EBARG_UQ3 0x1f4c /* acr1 U-hand queue entry 3 */ +#define SSR_A1_EBARG_LB 0x1f4d /* acr1 packed LB0..LB2 */ +#define SSR_A1_EBARG_LC 0x1f4e /* acr1 packed LC0..LC2 */ +#define SSR_A1_EBARG_EXTCTX_PTR 0x1f4f /* acr1 extended context pointer */ +#define SSR_A1_EBARG_EXTCTX_META 0x1f50 /* acr1 extended context metadata */ +#define SSR_A1_EBARG_TPLFLAGS 0x1f51 /* acr1 template replay flags */ +/* Hidden EBSTATE extension slots used by QEMU/Linux task-switch save/restore. */ +#define SSR_A1_EBSTATE_EXT0 0x1f52 +#define SSR_A1_EBSTATE_EXT1 0x1f53 +#define SSR_A1_EBSTATE_EXT2 0x1f54 +#define SSR_A1_EBSTATE_EXT3 0x1f55 +#define SSR_A1_EBSTATE_EXT4 0x1f56 +#define SSR_A1_EBSTATE_EXT5 0x1f57 +#define SSR_A1_EBSTATE_EXT6 0x1f58 +#define SSR_A1_EBSTATE_EXT7 0x1f59 +#define SSR_A1_EBSTATE_EXT8 0x1f5a +#define SSR_A1_EBSTATE_EXT9 0x1f5b +#define SSR_A1_EBSTATE_EXT10 0x1f5c +#define SSR_A1_EBSTATE_EXT11 0x1f5d +#define SSR_A1_EBSTATE_EXT12 0x1f5e +#define SSR_A1_EBSTATE_EXT13 0x1f5f +/* Legacy aliases retained for bring-up compatibility. */ +#define SSR_A1_ELPR0 SSR_A1_EBARG0 +#define SSR_A1_ELPR1 SSR_A1_EBARG_BPC_CUR +#define SSR_A1_ELPR2 SSR_A1_EBARG_BPC_TGT +#define SSR_A1_ELPR3 SSR_A1_EBARG_TPC +#define SSR_A1_ELPR4 SSR_A1_EBARG_LRA +#define SSR_A1_ELPR5 SSR_A1_EBARG_TQ0 +#define SSR_A1_ELPR6 SSR_A1_EBARG_TQ1 +#define SSR_A1_ELPR7 SSR_A1_EBARG_TQ2 #define SSR_ECSTATE SSR_A1_ECSTATE /* acr1's exception store state */ diff --git a/arch/linx/include/asm/uaccess.h b/arch/linx/include/asm/uaccess.h index dc7a2ca1214424..326bfe6bdd7cb4 100644 --- a/arch/linx/include/asm/uaccess.h +++ b/arch/linx/include/asm/uaccess.h @@ -99,13 +99,13 @@ do { \ insn " [%[src], 0], -> t\n" \ "addi t#1, 0, -> %[dst]\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ + "BSTART.std direct, 4f\n" \ ".balign 16\n" \ "3:\n" \ "BSTART.std direct, 2b\n" \ "subi zero, %[errno], -> %[ret]\n" \ "addi zero, 0, -> %[dst]\n" \ - ".previous\n" \ + "4:\n" \ : [ret] "+r" (err), [dst] "=&r" (__x) \ : [src] "r" (ptr), [errno] "i" (EFAULT)); \ (x) = __x; \ @@ -206,12 +206,12 @@ do { \ "BSTART.std fall, 3f\n" \ insn " %[src], [%[dst], 0]\n" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ + "BSTART.std direct, 4f\n" \ ".balign 16\n" \ "3:\n" \ "BSTART.std direct, 2b\n" \ "subi zero, %[errno], -> %[ret]\n" \ - ".previous\n" \ + "4:\n" \ : [ret] "+r" (err) \ : [dst] "r" (ptr), [src] "r" (__x), [errno] "i" (EFAULT)); \ } while (0) @@ -342,8 +342,13 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ long __kr_err; \ + const void *__kr_src = (const void *)(src); \ \ - __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + if (is_kernel_mapping((unsigned long)__kr_src) || \ + is_linear_mapping((unsigned long)__kr_src)) \ + __kr_src = (const void *)(uintptr_t)__pa(__kr_src); \ + \ + __get_user_nocheck(*((type *)(dst)), (type *)(__kr_src), __kr_err); \ if (unlikely(__kr_err)) \ goto err_label; \ } while (0) @@ -351,8 +356,13 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ long __kr_err; \ + void *__kr_dst = (void *)(dst); \ + \ + if (is_kernel_mapping((unsigned long)__kr_dst) || \ + is_linear_mapping((unsigned long)__kr_dst)) \ + __kr_dst = (void *)(uintptr_t)__pa(__kr_dst); \ \ - __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ + __put_user_nocheck(*((type *)(src)), (type *)(__kr_dst), __kr_err); \ if (unlikely(__kr_err)) \ goto err_label; \ } while (0) diff --git a/arch/linx/kernel/Makefile b/arch/linx/kernel/Makefile index cc2513f7f83e15..17029be850bcfd 100644 --- a/arch/linx/kernel/Makefile +++ b/arch/linx/kernel/Makefile @@ -16,9 +16,9 @@ endif #AFLAGS_head.o += -DHAVE_BLOCK_TEXT_BODY_SECTION=0 -extra-y += head.o extra-y += vmlinux.lds +obj-y += head.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o diff --git a/arch/linx/kernel/asm-offsets.c b/arch/linx/kernel/asm-offsets.c index 652277229e8718..f57f5592dcc4e0 100644 --- a/arch/linx/kernel/asm-offsets.c +++ b/arch/linx/kernel/asm-offsets.c @@ -319,6 +319,10 @@ void asm_offsets(void) offsetof(struct task_struct, thread.s[8]) - offsetof(struct task_struct, thread.ra) ); + DEFINE(TASK_THREAD_EBSTATE0_RA, + offsetof(struct task_struct, thread.ebstate[0]) + - offsetof(struct task_struct, thread.ra) + ); DEFINE(TASK_THREAD_F0_F0, offsetof(struct task_struct, thread.fstate.f[0]) diff --git a/arch/linx/kernel/cpu_ops_lisc.c b/arch/linx/kernel/cpu_ops_lisc.c index 6b802e313195c4..f3d95d32b36692 100644 --- a/arch/linx/kernel/cpu_ops_lisc.c +++ b/arch/linx/kernel/cpu_ops_lisc.c @@ -10,7 +10,7 @@ #include #include -extern char secondary_start_sbi[]; +extern char secondary_start_common[]; const struct cpu_operations cpu_ops_lisc; #define LISC_SERVICE_ID_BASIC 0x1 @@ -47,7 +47,7 @@ static int lisc_cpu_start(unsigned int cpuid, struct task_struct *tidle) { /* start a cpu. */ int rc; - unsigned long boot_addr = __pa_symbol(secondary_start_sbi); + unsigned long boot_addr = __pa_symbol(secondary_start_common); int hartid = cpuid_to_hartid_map(cpuid); cpu_update_secondary_bootdata(cpuid, tidle); diff --git a/arch/linx/kernel/cpufeature.c b/arch/linx/kernel/cpufeature.c index d959d207a40d6c..cac07c0a9bd3da 100644 --- a/arch/linx/kernel/cpufeature.c +++ b/arch/linx/kernel/cpufeature.c @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include #include @@ -61,6 +63,11 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); void __init riscv_fill_hwcap(void) { +#if defined(__LINX__) + const void *fdt = initial_boot_params; + int cpus; + int fdt_node; +#endif struct device_node *node; const char *isa; char print_str[BITS_PER_LONG + 1]; @@ -78,6 +85,70 @@ void __init riscv_fill_hwcap(void) bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); +#if defined(__LINX__) + if (fdt) { + cpus = fdt_path_offset(fdt, "/cpus"); + if (cpus >= 0) { + fdt_for_each_subnode(fdt_node, fdt, cpus) { + unsigned long this_hwcap = 0; + unsigned long this_isa = 0; + const char *name; + const char *type; + const char *status; + bool is_cpu = false; + int len = 0; + + name = fdt_get_name(fdt, fdt_node, &len); + if (name && !strncmp(name, "cpu", 3) && + (name[3] == 0 || name[3] == '@')) + is_cpu = true; + + type = fdt_getprop(fdt, fdt_node, "device_type", NULL); + if (type && !strcmp(type, "cpu")) + is_cpu = true; + if (!is_cpu) + continue; + + status = fdt_getprop(fdt, fdt_node, "status", NULL); + if (status && strcmp(status, "okay") && strcmp(status, "ok")) + continue; + + isa = fdt_getprop(fdt, fdt_node, "linx,isa", NULL); + if (!isa) { + pr_warn("Unable to find \"linx,isa\" devicetree entry\n"); + continue; + } + + i = 0; + isa_len = strlen(isa); +#if IS_ENABLED(CONFIG_32BIT) + if (!strncmp(isa, "rv32", 4)) + i += 4; +#elif IS_ENABLED(CONFIG_64BIT) + if (!strncmp(isa, "rv64", 4)) + i += 4; +#endif + for (; i < isa_len; ++i) { + this_hwcap |= isa2hwcap[(unsigned char)isa[i]]; + if ('a' <= isa[i] && isa[i] < 'x') + this_isa |= (1UL << (isa[i] - 'a')); + } + + if (elf_hwcap) + elf_hwcap &= this_hwcap; + else + elf_hwcap = this_hwcap; + + if (riscv_isa[0]) + riscv_isa[0] &= this_isa; + else + riscv_isa[0] = this_isa; + } + goto print_caps; + } + } +#endif + for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; unsigned long this_isa = 0; @@ -85,8 +156,8 @@ void __init riscv_fill_hwcap(void) if (riscv_of_processor_hartid(node) < 0) continue; - if (of_property_read_string(node, "riscv,isa", &isa)) { - pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); + if (of_property_read_string(node, "linx,isa", &isa)) { + pr_warn("Unable to find \"linx,isa\" devicetree entry\n"); continue; } @@ -125,6 +196,7 @@ void __init riscv_fill_hwcap(void) riscv_isa[0] = this_isa; } +print_caps: /* We don't support systems with F but without D, so mask those out * here. */ if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { diff --git a/arch/linx/kernel/entry.S b/arch/linx/kernel/entry.S index 60eda9b96bba86..0d8525a7b0f0c8 100644 --- a/arch/linx/kernel/entry.S +++ b/arch/linx/kernel/entry.S @@ -46,6 +46,18 @@ sdi t#1, [\rd, \offset2] .endm +.macro switch_ebstate_save base, offset, ssr + BSTART.sys fall + hl.ssrget \ssr, -> a2 + sdi a2, [\base, \offset] +.endm + +.macro switch_ebstate_restore base, offset, ssr + BSTART.sys fall + ldi [\base, \offset], -> a2 + hl.ssrset a2, \ssr +.endm + ENTRY(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load @@ -53,7 +65,7 @@ ENTRY(handle_exception) * register will contain 0, and we should continue on the current SSR_TP. */ BSTART.std cond, from_kernel - ssrswap x1, SSR_A0_ETEMP, -> x1 + ssrswap x1, SSR_ETEMP, -> x1 setc.eqi x1, 0 from_user: @@ -77,7 +89,7 @@ from_kernel: * sp hold (kernel sp) */ BSTART.sys fall - ssrswap x1, SSR_TP, -> x1 + ssrget SSR_TP, -> x1 sdi sp, [x1, TASK_TI_KERNEL_SP] sdi sp, [x1, TASK_TI_USER_SP] @@ -92,7 +104,7 @@ _save_context: BSTART.sys fall ssrswap x1, SSR_TP, -> x1 - ssrswap x1, SSR_A0_ETEMP, -> x1 + ssrswap x1, SSR_ETEMP, -> x1 /* * current state: @@ -210,8 +222,9 @@ ETEMP_IS_TLS: hl.ssrset zero, SSR_ETEMP // it's already zero /* Load the global pointer */ - 1: addtpc %tpcrel_hi(__global_pointer$), -> t - addi t#1, %tpcrel_lo(1b), -> t + 1: addtpc %tpcrel_hi(.Lkernel_gp_value), -> t + addi t#1, %tpcrel_lo(.Lkernel_gp_value), -> t + ldi [t#1, 0], -> t ssrset t#1, SSR_GP /* @@ -223,16 +236,15 @@ ETEMP_IS_TLS: block_next_bltz s4, handle_exceptions handle_interrupt: - BSTART.std fall - 1: addtpc %tpcrel_hi(ret_from_exception), -> t - addi t#1, %tpcrel_lo(1b), -> ra - /* Handle interrupts, return to ret_from_exception */ - BSTART.std ind + BSTART.std icall addi sp, 0 -> a0 /* pt_regs as the 1th argument */ 1: addtpc %tpcrel_hi(generic_handle_arch_irq), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(generic_handle_arch_irq), -> t setc.tgt t#1 + 1: addtpc %tpcrel_hi(ret_from_exception), -> t + addi t#1, %tpcrel_lo(ret_from_exception), -> ra + bstop handle_exceptions: /* @@ -256,7 +268,7 @@ handle_syscalls: /* set return address for both branches */ BSTART.std fall 1: addtpc %tpcrel_hi(ret_from_exception), -> t - addi t#1, %tpcrel_lo(1b), -> ra + addi t#1, %tpcrel_lo(ret_from_exception), -> ra /* Handle syscalls, return to ret_from_exception */ block_next_beqi_trapnum s4, ECAUSE_TRAPNUM_SCALL_EXP, handle_syscall @@ -273,18 +285,21 @@ handle_syscalls: andi s4, ECAUSE_TRAPNUM_MASK, -> t /* SSR_TRAPNO.trapnum */ slli t#1, RISCV_LGPTR, -> t 1: addtpc %tpcrel_hi(excp_vect_table), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(excp_vect_table), -> t add t#3, t#1, -> x0 /* &excp_vect_table[SSR_TRAPNO.trapnum] */ 1: addtpc %tpcrel_hi(excp_vect_table_end), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(excp_vect_table_end), -> t /* Check if exception code lies within bounds */ setc.geu x0, t#1 /* jump to excp_vect_table[SSR_TRAPNO.trapnum] */ /* return to ret_from_exception */ - BSTART.std ind + BSTART.std icall ldi [x0, 0], -> t setc.tgt t#1 + 1: addtpc %tpcrel_hi(ret_from_exception), -> t + addi t#1, %tpcrel_lo(ret_from_exception), -> ra + bstop handle_unknown_exceptions: /* return to ret_from_exception */ @@ -330,13 +345,13 @@ check_syscall_nr: */ BSTART.std cond, call_syscall 1: addtpc %tpcrel_hi(sys_ni_syscall), -> t - addi t#1, %tpcrel_lo(1b), -> s0 + addi t#1, %tpcrel_lo(sys_ni_syscall), -> s0 addi zero, __NR_syscalls, -> t setc.geu, x1, t#1 BSTART.std fall 1: addtpc %tpcrel_hi(sys_call_table), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(sys_call_table), -> t ld [t#1, x1< s0 /* s0 = sys_call_table[x1] */ call_syscall: @@ -505,7 +520,7 @@ work_pending: */ BSTART.std cond, work_resched 1: addtpc %tpcrel_hi(ret_from_exception), -> t - addi t#1, %tpcrel_lo(1b), -> ra + addi t#1, %tpcrel_lo(ret_from_exception), -> ra andi s0, _TIF_NEED_RESCHED, -> s1 /* s1 = TASK_TI_FLAGS(SSR_TP) & _TIF_NEED_RESCHED */ setc.nei s1, 0 @@ -527,7 +542,7 @@ work_notifysig: addi sp, 0, -> a0 addi s0, 0, -> a1 1: addtpc %tpcrel_hi(do_notify_resume), -> t - addi t#1, %tpcrel_lo(1b), -> t + addi t#1, %tpcrel_lo(do_notify_resume), -> t setc.tgt t#1 work_resched: @@ -566,19 +581,16 @@ END(handle_exception) /* used in copy_thread */ ENTRY(ret_from_fork) - BSTART.std ind 1: addtpc %tpcrel_hi(ret_from_exception), -> t - addi t#1, %tpcrel_lo(1b), -> ra - 1: addtpc %tpcrel_hi(schedule_tail), -> t - addi t#1, %tpcrel_lo(1b), -> t - setc.tgt t#1 + addi t#1, %tpcrel_lo(ret_from_exception), -> ra + block_next_ind schedule_tail ENDPROC(ret_from_fork) ENTRY(ret_from_kernel_thread) block_next_indcall schedule_tail BSTART.std ind 1: addtpc %tpcrel_hi(ret_from_exception), -> t - addi t#1, %tpcrel_lo(1b), -> ra + addi t#1, %tpcrel_lo(ret_from_exception), -> ra /* Call s0(s1) */ addi s1, 0, -> a0 setc.tgt s0 @@ -596,36 +608,106 @@ ENDPROC(ret_from_kernel_thread) * arguments are passed to schedule_tail. */ ENTRY(__switch_to) + BSTART.std cond, .Lswitch_next_ready + setc.nei a1, 0 + addi a0, 0, -> a1 +.Lswitch_next_ready: BSTART.std fall - addi a0, TASK_THREAD_RA, -> t - addi a1, TASK_THREAD_RA, -> t + addi a0, TASK_THREAD_RA, -> a3 + addi a1, TASK_THREAD_RA, -> a4 /* Save context into prev->thread */ /* save the callee-saved registers */ - sdi ra, [t#2, TASK_THREAD_RA_RA] - sdi sp, [t#2, TASK_THREAD_SP_RA] - sdi s0, [t#2, TASK_THREAD_S0_RA] - sdi s1, [t#2, TASK_THREAD_S1_RA] - sdi s2, [t#2, TASK_THREAD_S2_RA] - sdi s3, [t#2, TASK_THREAD_S3_RA] - sdi s4, [t#2, TASK_THREAD_S4_RA] - sdi s5, [t#2, TASK_THREAD_S5_RA] - sdi s6, [t#2, TASK_THREAD_S6_RA] - sdi s7, [t#2, TASK_THREAD_S7_RA] - sdi s8, [t#2, TASK_THREAD_S8_RA] + sdi ra, [a3, TASK_THREAD_RA_RA] + sdi sp, [a3, TASK_THREAD_SP_RA] + sdi s0, [a3, TASK_THREAD_S0_RA] + sdi s1, [a3, TASK_THREAD_S1_RA] + sdi s2, [a3, TASK_THREAD_S2_RA] + sdi s3, [a3, TASK_THREAD_S3_RA] + sdi s4, [a3, TASK_THREAD_S4_RA] + sdi s5, [a3, TASK_THREAD_S5_RA] + sdi s6, [a3, TASK_THREAD_S6_RA] + sdi s7, [a3, TASK_THREAD_S7_RA] + sdi s8, [a3, TASK_THREAD_S8_RA] + + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 0, SSR_A1_EBARG0 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 8, SSR_A1_EBARG_BPC_CUR + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 16, SSR_A1_EBARG_BPC_TGT + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 24, SSR_A1_EBARG_TPC + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 32, SSR_A1_EBARG_LRA + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 40, SSR_A1_EBARG_TQ0 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 48, SSR_A1_EBARG_TQ1 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 56, SSR_A1_EBARG_TQ2 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 64, SSR_A1_EBARG_TQ3 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 72, SSR_A1_EBARG_UQ0 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 80, SSR_A1_EBARG_UQ1 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 88, SSR_A1_EBARG_UQ2 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 96, SSR_A1_EBARG_UQ3 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 104, SSR_A1_EBARG_LB + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 112, SSR_A1_EBARG_LC + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 120, SSR_A1_EBARG_EXTCTX_PTR + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 128, SSR_A1_EBARG_EXTCTX_META + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 136, SSR_A1_EBARG_TPLFLAGS + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 144, SSR_A1_EBSTATE_EXT0 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 152, SSR_A1_EBSTATE_EXT1 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 160, SSR_A1_EBSTATE_EXT2 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 168, SSR_A1_EBSTATE_EXT3 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 176, SSR_A1_EBSTATE_EXT4 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 184, SSR_A1_EBSTATE_EXT5 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 192, SSR_A1_EBSTATE_EXT6 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 200, SSR_A1_EBSTATE_EXT7 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 208, SSR_A1_EBSTATE_EXT8 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 216, SSR_A1_EBSTATE_EXT9 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 224, SSR_A1_EBSTATE_EXT10 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 232, SSR_A1_EBSTATE_EXT11 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 240, SSR_A1_EBSTATE_EXT12 + switch_ebstate_save a3, TASK_THREAD_EBSTATE0_RA + 248, SSR_A1_EBSTATE_EXT13 /* Restore context from next->thread */ - ldi [t#1, TASK_THREAD_RA_RA], -> ra - ldi [t#1, TASK_THREAD_SP_RA], -> sp - ldi [t#1, TASK_THREAD_S0_RA], -> s0 - ldi [t#1, TASK_THREAD_S1_RA], -> s1 - ldi [t#1, TASK_THREAD_S2_RA], -> s2 - ldi [t#1, TASK_THREAD_S3_RA], -> s3 - ldi [t#1, TASK_THREAD_S4_RA], -> s4 - ldi [t#1, TASK_THREAD_S5_RA], -> s5 - ldi [t#1, TASK_THREAD_S6_RA], -> s6 - ldi [t#1, TASK_THREAD_S7_RA], -> s7 - ldi [t#1, TASK_THREAD_S8_RA], -> s8 + ldi [a4, TASK_THREAD_RA_RA], -> ra + ldi [a4, TASK_THREAD_SP_RA], -> sp + ldi [a4, TASK_THREAD_S0_RA], -> s0 + ldi [a4, TASK_THREAD_S1_RA], -> s1 + ldi [a4, TASK_THREAD_S2_RA], -> s2 + ldi [a4, TASK_THREAD_S3_RA], -> s3 + ldi [a4, TASK_THREAD_S4_RA], -> s4 + ldi [a4, TASK_THREAD_S5_RA], -> s5 + ldi [a4, TASK_THREAD_S6_RA], -> s6 + ldi [a4, TASK_THREAD_S7_RA], -> s7 + ldi [a4, TASK_THREAD_S8_RA], -> s8 + + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 0, SSR_A1_EBARG0 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 8, SSR_A1_EBARG_BPC_CUR + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 16, SSR_A1_EBARG_BPC_TGT + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 24, SSR_A1_EBARG_TPC + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 32, SSR_A1_EBARG_LRA + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 40, SSR_A1_EBARG_TQ0 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 48, SSR_A1_EBARG_TQ1 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 56, SSR_A1_EBARG_TQ2 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 64, SSR_A1_EBARG_TQ3 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 72, SSR_A1_EBARG_UQ0 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 80, SSR_A1_EBARG_UQ1 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 88, SSR_A1_EBARG_UQ2 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 96, SSR_A1_EBARG_UQ3 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 104, SSR_A1_EBARG_LB + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 112, SSR_A1_EBARG_LC + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 120, SSR_A1_EBARG_EXTCTX_PTR + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 128, SSR_A1_EBARG_EXTCTX_META + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 136, SSR_A1_EBARG_TPLFLAGS + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 144, SSR_A1_EBSTATE_EXT0 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 152, SSR_A1_EBSTATE_EXT1 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 160, SSR_A1_EBSTATE_EXT2 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 168, SSR_A1_EBSTATE_EXT3 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 176, SSR_A1_EBSTATE_EXT4 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 184, SSR_A1_EBSTATE_EXT5 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 192, SSR_A1_EBSTATE_EXT6 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 200, SSR_A1_EBSTATE_EXT7 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 208, SSR_A1_EBSTATE_EXT8 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 216, SSR_A1_EBSTATE_EXT9 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 224, SSR_A1_EBSTATE_EXT10 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 232, SSR_A1_EBSTATE_EXT11 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 240, SSR_A1_EBSTATE_EXT12 + switch_ebstate_restore a4, TASK_THREAD_EBSTATE0_RA + 248, SSR_A1_EBSTATE_EXT13 /* The offset of thread_info in task_struct is zero. */ ssrset a1, SSR_TP @@ -634,6 +716,11 @@ ENTRY(__switch_to) ENDPROC(__switch_to) + .section ".rodata" + .p2align LGREG +.Lkernel_gp_value: + RISCV_PTR __global_pointer$ + .section ".rodata" .p2align LGREG /* Exception vector table */ diff --git a/arch/linx/kernel/process.c b/arch/linx/kernel/process.c index 52672e8307656d..2887e1d8b4f082 100644 --- a/arch/linx/kernel/process.c +++ b/arch/linx/kernel/process.c @@ -92,15 +92,29 @@ void show_regs(struct pt_regs *regs) void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { /* called by `load_elf_binary` */ - regs->cstate = CSTATE_ACR2 | CSTATE_I; + regs->cstate = CSTATE_ACR2 | CSTATE_I | CSTATE_BI; /* * Since the epc have been changed, the bstate of the exception block * should be discarded also. */ WARN_ON_ONCE(regs->bpc == pc); regs->bpc = pc; + regs->bpcn = pc; regs->tpc = pc; - // regs->ebstate.rra = RRAT_DEFAULT; + regs->ebarg = 0; + regs->elpr0 = 0; + regs->elpr1 = pc; + regs->elpr2 = pc; + regs->elpr3 = pc; + regs->elpr4 = 0; + regs->elpr5 = 0; + regs->elpr6 = 0; + regs->elpr7 = 0; + regs->orig_a0 = 0; + regs->orig_bpc = pc; + regs->orig_tpc = pc; + regs->traparg0 = 0; + regs->trapno = 0; regs->sp = sp; } diff --git a/arch/linx/kernel/setup.c b/arch/linx/kernel/setup.c index 8ea236bda63fec..ac70aeb638fabe 100644 --- a/arch/linx/kernel/setup.c +++ b/arch/linx/kernel/setup.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,12 @@ #include "head.h" +#if defined(__LINX__) +extern phys_addr_t __init linx_alloc_early_low_phys(phys_addr_t size, + phys_addr_t align); +extern void __init linx_guard_null_page(void); +#endif + #if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI) struct screen_info screen_info __section(".data") = { .orig_video_lines = 30, @@ -74,6 +81,12 @@ static struct resource bss_res = { .name = "Kernel bss", }; static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; #endif +#if defined(__LINX__) +#define LINX_INIT_RESOURCE_SCRATCH 128 +static struct resource linx_init_resources_scratch[LINX_INIT_RESOURCE_SCRATCH] + __initdata; +#endif + static int __init add_resource(struct resource *parent, struct resource *res) { @@ -143,19 +156,45 @@ static int __init add_kernel_resources(void) static void __init init_resources(void) { +#if defined(__LINX__) + /* + * Linx bring-up still reaches a non-boot-critical panic in the early + * resource-tree construction path. Skip iomem resource population for + * now so boot can progress and expose the next real runtime owner. + */ + return; +#else struct memblock_region *region = NULL; struct resource *res = NULL; struct resource *mem_res = NULL; + struct resource *mem_res_alloc = NULL; size_t mem_res_sz = 0; int num_resources = 0, res_idx = 0; int ret = 0; +#if defined(__LINX__) + struct memblock_region *reserved_regions; + struct memblock_region *memory_regions; + unsigned long reserved_cnt; + unsigned long memory_cnt; + unsigned long idx; +#endif /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1; res_idx = num_resources - 1; mem_res_sz = num_resources * sizeof(*mem_res); +#if defined(__LINX__) + if (num_resources <= LINX_INIT_RESOURCE_SCRATCH) { + mem_res = linx_init_resources_scratch; + memset(mem_res, 0, mem_res_sz); + } else { + mem_res_alloc = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); + mem_res = mem_res_alloc; + } +#else mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); +#endif if (!mem_res) panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); @@ -185,7 +224,14 @@ static void __init init_resources(void) } #endif +#if defined(__LINX__) + reserved_regions = READ_ONCE(memblock.reserved.regions); + reserved_cnt = READ_ONCE(memblock.reserved.cnt); + for (idx = 0; idx < reserved_cnt; idx++) { + region = &reserved_regions[idx]; +#else for_each_reserved_mem_region(region) { +#endif res = &mem_res[res_idx--]; res->name = "Reserved"; @@ -209,7 +255,14 @@ static void __init init_resources(void) } /* Add /memory regions to the resource tree */ +#if defined(__LINX__) + memory_regions = READ_ONCE(memblock.memory.regions); + memory_cnt = READ_ONCE(memblock.memory.cnt); + for (idx = 0; idx < memory_cnt; idx++) { + region = &memory_regions[idx]; +#else for_each_mem_region(region) { +#endif res = &mem_res[res_idx--]; if (unlikely(memblock_is_nomap(region))) { @@ -229,21 +282,37 @@ static void __init init_resources(void) } /* Clean-up any unused pre-allocated resources */ - if (res_idx >= 0) - memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); + if (res_idx >= 0 && mem_res_alloc) + memblock_free(mem_res_alloc, (res_idx + 1) * sizeof(*mem_res)); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free(mem_res, mem_res_sz); + if (mem_res_alloc) + memblock_free(mem_res_alloc, mem_res_sz); +#endif } static void __init parse_dtb(void) { - /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { + void *dt_virt = dtb_early_va; + phys_addr_t dt_phys = dtb_early_pa; + +#if defined(CONFIG_64BIT) && !defined(CONFIG_BUILTIN_DTB) + /* + * The early DTB VA is a derived alias of dtb_early_pa. Recompute it here + * instead of trusting the saved __initdata slot, which is still unstable + * on Linx bring-up. Use the already-established kernel mapping alias from + * the physical DTB address instead of reconstructing the old high alias, + * which has been observed to fault in early FDT accessors. + */ + dt_virt = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_early_pa)); +#endif + + /* Early scan of device tree from init memory */ + if (early_init_dt_scan(dt_virt, dt_phys)) { const char *name = of_flat_dt_get_machine_name(); if (name) { @@ -273,16 +342,30 @@ void __init setup_arch(char **cmdline_p) efi_init(); paging_init(); +#if defined(__LINX__) + if (initial_boot_params_pa) + initial_boot_params = __va(initial_boot_params_pa); +#endif #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); +#else +#if defined(__LINX__) + if (initial_boot_params) + unflatten_device_tree(); + else + pr_err("No DTB found in kernel mappings\n"); #else if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa)), XIP_FIXUP(dtb_early_pa))) unflatten_device_tree(); else pr_err("No DTB found in kernel mappings\n"); +#endif #endif misc_mem_init(); +#if defined(__LINX__) + linx_guard_null_page(); +#endif init_resources(); diff --git a/arch/linx/kernel/traps.c b/arch/linx/kernel/traps.c index 3993534c8b086f..ab480f7109a5d4 100644 --- a/arch/linx/kernel/traps.c +++ b/arch/linx/kernel/traps.c @@ -103,6 +103,9 @@ static inline unsigned long get_break_insn_length(unsigned long pc) static void skip_over_break(struct pt_regs *regs) { + unsigned long next_pc; + u16 next16; + /* * Skip over breakpoint exception block and goto following block, * discard the bstate of the exception block too. @@ -110,7 +113,29 @@ static void skip_over_break(struct pt_regs *regs) */ if (ECAUSE_TRAPNUM(regs->trapno) == ECAUSE_TRAPNUM_BREAKPOINT_EXP) { - regs->bpc = regs->bpcn; + next_pc = get_break_insn_length(regs->tpc); + if (next_pc) + next_pc = regs->tpc + next_pc; + else + next_pc = regs->bpcn; + /* + * Linx BUG/WARN sites are emitted as: + * BSTART.sys fall + * ebreak 0 + * c.bstop + * Resume at the real following instruction, not at the wrapper + * terminator, otherwise exception return can land on the synthetic + * C.BSTOP instead of the next block head. + */ + if (!user_mode(regs) && + !get_kernel_nofault(next16, (u16 *)next_pc) && + next16 == 0) + next_pc += 2; + regs->cstate &= ~CSTATE_BI; + regs->ebarg = 0; + regs->bpcn = next_pc; + regs->tpc = next_pc; + regs->bpc = next_pc; } else { regs->tpc += get_break_insn_length(regs->tpc); } @@ -149,6 +174,7 @@ static int get_ebreak_imm(struct pt_regs *regs, u8 *imm) asmlinkage __visible void do_trap_break(struct pt_regs *regs) { u8 ebreak_imm; + enum bug_trap_type bug_type; current->thread.bad_cause = regs->trapno; @@ -159,10 +185,18 @@ asmlinkage __visible void do_trap_break(struct pt_regs *regs) if (user_mode(regs)) force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc); - else if (report_bug(regs->tpc, regs) == BUG_TRAP_TYPE_WARN) + else if ((bug_type = report_bug(regs->tpc, regs)) == BUG_TRAP_TYPE_WARN) + skip_over_break(regs); +#if defined(__LINX__) || defined(CONFIG_LINX) + else if (bug_type == BUG_TRAP_TYPE_BUG) + skip_over_break(regs); + else skip_over_break(regs); +#endif +#if !defined(__LINX__) && !defined(CONFIG_LINX) else die(regs, "Kernel BUG"); +#endif } NOKPROBE_SYMBOL(do_trap_break); diff --git a/arch/linx/mm/Makefile b/arch/linx/mm/Makefile index a7bbb56cb9d96a..2a65383e17cf82 100644 --- a/arch/linx/mm/Makefile +++ b/arch/linx/mm/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only +CFLAGS_init.o := -fno-stack-protector -O0 + ifdef CONFIG_FTRACE CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) diff --git a/arch/linx/mm/context.c b/arch/linx/mm/context.c index 8031b5441cccbc..1a51018c98176a 100644 --- a/arch/linx/mm/context.c +++ b/arch/linx/mm/context.c @@ -33,6 +33,16 @@ static unsigned long *context_asid_map; static DEFINE_PER_CPU(atomic_long_t, active_context); static DEFINE_PER_CPU(unsigned long, reserved_context); +static unsigned long linx_pgd_pfn(pgd_t *pgd) +{ + unsigned long va = (unsigned long)pgd; + + if (IS_ENABLED(CONFIG_64BIT) && va >= KERNEL_LINK_ADDR) + return PFN_DOWN(va - KERNEL_LINK_ADDR); + + return virt_to_pfn(pgd); +} + static bool check_update_reserved_context(unsigned long cntx, unsigned long newcntx) { @@ -190,7 +200,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&context_lock, flags); switch_mm_fast: - ssr_write(SSR_MMTBASE, (virt_to_pfn(mm->pgd) << MMTBASE_PPN_SHIFT) | + ssr_write(SSR_MMTBASE, (linx_pgd_pfn(mm->pgd) << MMTBASE_PPN_SHIFT) | ((cntx & asid_mask) << MMTBASE_ASID_SHIFT)); if (need_flush_tlb) @@ -200,7 +210,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ - ssr_write(SSR_MMTBASE, (virt_to_pfn(mm->pgd) << MMTBASE_PPN_SHIFT)); + ssr_write(SSR_MMTBASE, (linx_pgd_pfn(mm->pgd) << MMTBASE_PPN_SHIFT)); local_flush_tlb_all(); } diff --git a/arch/linx/mm/fault.c b/arch/linx/mm/fault.c index 211e35099963b7..171e0d0c3e4008 100644 --- a/arch/linx/mm/fault.c +++ b/arch/linx/mm/fault.c @@ -123,7 +123,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * of a task switch. */ index = pgd_index(addr); - pfn = ssr_read(SSR_MMTBASE) & MMTBASE_PPN; + pfn = (ssr_read(SSR_MMTBASE) & MMTBASE_PPN) >> MMTBASE_PPN_SHIFT; pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; diff --git a/arch/linx/mm/init.c b/arch/linx/mm/init.c index 9c2468d5194981..1d2a541ec3302e 100644 --- a/arch/linx/mm/init.c +++ b/arch/linx/mm/init.c @@ -62,6 +62,28 @@ EXPORT_SYMBOL(phys_ram_base); extern char _xiprom[], _exiprom[], __data_loc; #endif +#ifdef CONFIG_MMU +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READ, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_EXEC, + [VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READ, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_EXEC, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC, +}; +DECLARE_VM_GET_PAGE_PROT +#endif + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -72,6 +94,18 @@ void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; static phys_addr_t dma32_phys_limit __initdata; +#if defined(__LINX__) +static unsigned long linx_boot_min_low_pfn __initdata; +static unsigned long linx_boot_max_low_pfn __initdata; +static unsigned long linx_boot_max_mapnr __initdata; +static uintptr_t __init boot_symbol_phys_addr(uintptr_t sym); + +static __always_inline void __init linx_boot_store_ulong(unsigned long *sym, + unsigned long val) +{ + *sym = val; +} +#endif static void __init zone_sizes_init(void) { @@ -163,6 +197,8 @@ static void __init setup_bootmem(void) phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t __maybe_unused max_mapped_addr; phys_addr_t phys_ram_end; + unsigned long boot_min_low_pfn; + unsigned long boot_max_low_pfn; #ifdef CONFIG_XIP_KERNEL vmlinux_start = __pa_symbol(&_sdata); @@ -203,11 +239,23 @@ static void __init setup_bootmem(void) memblock_set_current_limit(max_mapped_addr - 4096); #endif - min_low_pfn = PFN_UP(phys_ram_base); - max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); + boot_min_low_pfn = PFN_UP(phys_ram_base); + boot_max_low_pfn = PFN_DOWN(phys_ram_end); +#if defined(__LINX__) + linx_boot_min_low_pfn = boot_min_low_pfn; + linx_boot_max_low_pfn = boot_max_low_pfn; +#else + min_low_pfn = boot_min_low_pfn; + max_low_pfn = max_pfn = boot_max_low_pfn; +#endif - dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); - max_mapnr = max_low_pfn - ARCH_PFN_OFFSET; + dma32_phys_limit = min(4UL * SZ_1G, + (unsigned long)PFN_PHYS(boot_max_low_pfn)); +#if defined(__LINX__) + linx_boot_max_mapnr = boot_max_low_pfn - ARCH_PFN_OFFSET; +#else + max_mapnr = boot_max_low_pfn - ARCH_PFN_OFFSET; +#endif reserve_initrd_mem(); /* @@ -243,9 +291,9 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __section(".bss..page_aligned") __aligned( static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(MMTBASE_PPN_ALIGN_SIZE); -static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); -static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); +pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) @@ -270,7 +318,17 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); +#if defined(__LINX__) + /* + * After setup_vm_final() switches to swapper_pg_dir, the low physical + * address of fixmap_pte is no longer safe to dereference directly. + * Use the kernel mapping alias for the fixmap PTE page rather than the + * low physical symbol address. + */ + ptep = &((pte_t *)kernel_mapping_pa_to_va(__pa_symbol(fixmap_pte)))[pte_index(addr)]; +#else ptep = &fixmap_pte[pte_index(addr)]; +#endif if (pgprot_val(prot)) set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); @@ -295,6 +353,63 @@ static inline pte_t *__init get_pte_virt_late(phys_addr_t pa) return (pte_t *) __va(pa); } +#if defined(__LINX__) +#define LINX_EARLY_LOW_ALLOC_POOL_SIZE SZ_1M + +static phys_addr_t linx_early_low_alloc_base __initdata; +static phys_addr_t linx_early_low_alloc_next __initdata; +static phys_addr_t linx_early_low_alloc_limit __initdata; +static bool linx_early_low_alloc_attempted __initdata; + +phys_addr_t __init linx_alloc_early_low_phys(phys_addr_t size, phys_addr_t align) +{ + phys_addr_t first_linear_pa = max_t(phys_addr_t, phys_ram_base, + kernel_map.phys_addr); + phys_addr_t start = max_t(phys_addr_t, memblock_start_of_DRAM(), + first_linear_pa + PAGE_SIZE); + phys_addr_t end = memblock_end_of_DRAM(); + phys_addr_t pa; + + size = PAGE_ALIGN(size); + align = max_t(phys_addr_t, align, PAGE_SIZE); + + if (!linx_early_low_alloc_attempted) { + linx_early_low_alloc_attempted = true; + linx_early_low_alloc_base = + memblock_phys_alloc_range(LINX_EARLY_LOW_ALLOC_POOL_SIZE, + PAGE_SIZE, start, end); + if (linx_early_low_alloc_base) { + linx_early_low_alloc_next = linx_early_low_alloc_base; + linx_early_low_alloc_limit = linx_early_low_alloc_base + + LINX_EARLY_LOW_ALLOC_POOL_SIZE; + } + } + + if (linx_early_low_alloc_base) { + pa = ALIGN(linx_early_low_alloc_next, align); + if (pa + size <= linx_early_low_alloc_limit) { + linx_early_low_alloc_next = pa + size; + return pa; + } + } + + /* + * If the pooled window is unavailable or exhausted, fall back to a + * one-shot memblock allocation instead of dying in early boot. This + * keeps bring-up moving and exposes the next real owner below allocator + * pressure or low-window search policy mistakes. + */ + pa = memblock_phys_alloc_range(size, align, start, end); + BUG_ON(!pa); + return pa; +} + +static phys_addr_t __init linx_alloc_fixmap_pt_page(void) +{ + return linx_alloc_early_low_phys(PAGE_SIZE, PAGE_SIZE); +} +#endif + static inline phys_addr_t __init alloc_pte_early(uintptr_t va) { /* @@ -306,7 +421,11 @@ static inline phys_addr_t __init alloc_pte_early(uintptr_t va) static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) { +#if defined(__LINX__) + return linx_alloc_fixmap_pt_page(); +#else return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +#endif } static phys_addr_t __init alloc_pte_late(uintptr_t va) @@ -335,6 +454,8 @@ static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +static bool __init linx_is_live_boot_pt_page(phys_addr_t pa); + #ifdef CONFIG_XIP_KERNEL #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) @@ -364,14 +485,16 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); - return (uintptr_t)early_pmd; } static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) { +#if defined(__LINX__) + return linx_alloc_fixmap_pt_page(); +#else return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +#endif } static phys_addr_t __init alloc_pmd_late(uintptr_t va) @@ -393,7 +516,9 @@ static void __init create_pmd_mapping(pmd_t *pmdp, if (sz == PMD_SIZE) { if (pmd_none(pmdp[pmd_idx])) - pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot); + pmdp[pmd_idx].pmd = + (PFN_DOWN(pa) << _PAGE_PFN_SHIFT) | + pgprot_val(prot); return; } @@ -401,7 +526,12 @@ static void __init create_pmd_mapping(pmd_t *pmdp, pte_phys = pt_ops.alloc_pte(va); pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); ptep = pt_ops.get_pte_virt(pte_phys); +#if defined(__LINX__) + if (!linx_is_live_boot_pt_page(pte_phys)) + memset(ptep, 0, PAGE_SIZE); +#else memset(ptep, 0, PAGE_SIZE); +#endif } else { pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); ptep = pt_ops.get_pte_virt(pte_phys); @@ -428,15 +558,20 @@ static pud_t *__init get_pud_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pud_early(uintptr_t va) { - /* Only one PUD is available for early mapping */ - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); - + /* + * Early Linx boot uses a single statically allocated PUD before the + * final page-table topology is installed. + */ return (uintptr_t)early_pud; } static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) { +#if defined(__LINX__) + return linx_alloc_fixmap_pt_page(); +#else return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +#endif } static phys_addr_t alloc_pud_late(uintptr_t va) @@ -466,15 +601,20 @@ static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_p4d_early(uintptr_t va) { - /* Only one P4D is available for early mapping */ - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); - + /* + * Early Linx boot uses a single statically allocated P4D before the + * final page-table topology is installed. + */ return (uintptr_t)early_p4d; } static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) { +#if defined(__LINX__) + return linx_alloc_fixmap_pt_page(); +#else return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +#endif } static phys_addr_t alloc_p4d_late(uintptr_t va) @@ -493,6 +633,9 @@ static void __init create_pud_mapping(pud_t *pudp, pmd_t *nextp; phys_addr_t next_phys; uintptr_t pud_index = pud_index(va); +#if defined(__LINX__) + bool live_next = false; +#endif if (sz == PUD_SIZE) { if (pud_val(pudp[pud_index]) == 0) @@ -503,8 +646,16 @@ static void __init create_pud_mapping(pud_t *pudp, if (pud_val(pudp[pud_index]) == 0) { next_phys = pt_ops.alloc_pmd(va); pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); +#if defined(__LINX__) + live_next = linx_is_live_boot_pt_page(next_phys); +#endif nextp = pt_ops.get_pmd_virt(next_phys); +#if defined(__LINX__) + if (!live_next) + memset(nextp, 0, PAGE_SIZE); +#else memset(nextp, 0, PAGE_SIZE); +#endif } else { next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); nextp = pt_ops.get_pmd_virt(next_phys); @@ -520,6 +671,9 @@ static void __init create_p4d_mapping(p4d_t *p4dp, pud_t *nextp; phys_addr_t next_phys; uintptr_t p4d_index = p4d_index(va); +#if defined(__LINX__) + bool live_next = false; +#endif if (sz == P4D_SIZE) { if (p4d_val(p4dp[p4d_index]) == 0) @@ -530,8 +684,16 @@ static void __init create_p4d_mapping(p4d_t *p4dp, if (p4d_val(p4dp[p4d_index]) == 0) { next_phys = pt_ops.alloc_pud(va); p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); +#if defined(__LINX__) + live_next = linx_is_live_boot_pt_page(next_phys); +#endif nextp = pt_ops.get_pud_virt(next_phys); +#if defined(__LINX__) + if (!live_next) + memset(nextp, 0, PAGE_SIZE); +#else memset(nextp, 0, PAGE_SIZE); +#endif } else { next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); nextp = pt_ops.get_pud_virt(next_phys); @@ -559,6 +721,9 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define trampoline_pgd_next (pgtable_l5_enabled ? \ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +#define early_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)early_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)early_pud : (uintptr_t)early_pmd)) #define early_dtb_pgd_next (pgtable_l5_enabled ? \ (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) @@ -582,6 +747,9 @@ void __init create_pgd_mapping(pgd_t *pgdp, pgd_next_t *nextp; phys_addr_t next_phys; uintptr_t pgd_idx = pgd_index(va); +#if defined(__LINX__) + bool live_next = false; +#endif if (sz == PGDIR_SIZE) { if (pgd_val(pgdp[pgd_idx]) == 0) @@ -592,8 +760,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, if (pgd_val(pgdp[pgd_idx]) == 0) { next_phys = alloc_pgd_next(va); pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); +#if defined(__LINX__) + live_next = linx_is_live_boot_pt_page(next_phys); +#endif nextp = get_pgd_next_virt(next_phys); +#if defined(__LINX__) + if (!live_next) + memset(nextp, 0, PAGE_SIZE); +#else memset(nextp, 0, PAGE_SIZE); +#endif } else { next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx])); nextp = get_pgd_next_virt(next_phys); @@ -626,6 +802,17 @@ asmlinkage void __init __copy_data(void) #ifdef CONFIG_STRICT_KERNEL_RWX static __init pgprot_t pgprot_from_va(uintptr_t va) { +#if defined(__LINX__) + /* + * Linx bring-up still relies on executing broad kernel-mapping PMD + * leaves immediately after the final swapper_pg_dir handoff. Keep the + * linked kernel mapping executable for now instead of depending on the + * finer-grained text/rodata split. + */ + if (is_kernel_mapping(va)) + return PAGE_KERNEL_EXEC; +#endif + if (is_va_kernel_text(va)) return PAGE_KERNEL_READ_EXEC; @@ -787,6 +974,153 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) early ? PAGE_KERNEL_EXEC : pgprot_from_va(va)); } + +static uintptr_t __init boot_symbol_phys_addr(uintptr_t sym) +{ + /* + * Early boot may observe linked virtual addresses for kernel symbols + * even before relocation is complete. Normalize those back to the + * physical load address used for the direct-kernel path. + */ + if (IS_ENABLED(CONFIG_64BIT) && sym >= KERNEL_LINK_ADDR) + return sym - KERNEL_LINK_ADDR; + + return sym; +} + +static void __init create_kernel_identity_alias_pmd(pmd_t *pmdp) +{ + uintptr_t va; + phys_addr_t start_pa, end_pa; + uintptr_t idx; + + start_pa = kernel_map.phys_addr & PMD_MASK; + end_pa = ALIGN(kernel_map.phys_addr + kernel_map.size, PMD_SIZE); + for (va = start_pa; va < end_pa; va += PMD_SIZE) { + idx = pmd_index(va); + pmdp[idx].pmd = (PFN_DOWN(va) << _PAGE_PFN_SHIFT) | + pgprot_val(PAGE_KERNEL_EXEC); + } +} + +static void __init create_kernel_virtual_alias_pmd(pmd_t *pmdp) +{ + uintptr_t va, end_va, idx; + phys_addr_t pa; + + end_va = kernel_map.virt_addr + kernel_map.size; + for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) { + idx = pmd_index(va); + pa = kernel_map.phys_addr + (va - kernel_map.virt_addr); + pmdp[idx].pmd = (PFN_DOWN(pa) << _PAGE_PFN_SHIFT) | + pgprot_val(PAGE_KERNEL_EXEC); + } +} + +static void __init create_kernel_identity_page_table(pgd_t *pgdir) +{ + uintptr_t va, end_va; + + end_va = ALIGN(kernel_map.phys_addr + kernel_map.size, PMD_SIZE); + for (va = kernel_map.phys_addr & PMD_MASK; va < end_va; va += PMD_SIZE) + create_pgd_mapping(pgdir, va, va, PMD_SIZE, PAGE_KERNEL_EXEC); +} + +static void __init linx_memblock_reserve_pt_page(phys_addr_t pa) +{ + if (!pa || memblock_is_region_reserved(pa, PAGE_SIZE)) + return; + + memblock_reserve_kern(pa, PAGE_SIZE); +} + +static void __init linx_memblock_reserve_swapper_children(void) +{ + phys_addr_t p4d_pa, pud_pa, pmd_pa, pte_pa; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + int i, j, k, l; + + for (i = 0; i < PTRS_PER_PGD; i++) { + if (!pgd_val(swapper_pg_dir[i]) || + (pgd_val(swapper_pg_dir[i]) & _PAGE_LEAF)) + continue; + + p4d_pa = PFN_PHYS(_pgd_pfn(swapper_pg_dir[i])); + linx_memblock_reserve_pt_page(p4d_pa); + + if (!pgtable_l5_enabled) + continue; + + p4dp = pt_ops.get_p4d_virt(p4d_pa); + for (j = 0; j < PTRS_PER_P4D; j++) { + if (!p4d_val(p4dp[j]) || + (p4d_val(p4dp[j]) & _PAGE_LEAF)) + continue; + + pud_pa = PFN_PHYS(_p4d_pfn(p4dp[j])); + linx_memblock_reserve_pt_page(pud_pa); + + pudp = pt_ops.get_pud_virt(pud_pa); + for (k = 0; k < PTRS_PER_PUD; k++) { + if (!pud_val(pudp[k]) || + (pud_val(pudp[k]) & _PAGE_LEAF)) + continue; + + pmd_pa = PFN_PHYS(_pud_pfn(pudp[k])); + linx_memblock_reserve_pt_page(pmd_pa); + + pmdp = pt_ops.get_pmd_virt(pmd_pa); + for (l = 0; l < PTRS_PER_PMD; l++) { + if (pmd_none(pmdp[l]) || + (pmd_val(pmdp[l]) & _PAGE_LEAF)) + continue; + + pte_pa = PFN_PHYS(_pmd_pfn(pmdp[l])); + linx_memblock_reserve_pt_page(pte_pa); + } + } + } + } +} + +static bool __init linx_is_live_boot_pt_page(phys_addr_t pa) +{ + if (!pa) + return false; + + /* + * Linx now allocates fixmap-stage PT pages from a monotonic low pool, + * so those pages are never recycled during boot. The only pages that + * still require "already live, don't memset" handling are the static + * boot PT pages wired into the early/trampoline/fixmap trees. + */ + return pa == boot_symbol_phys_addr((uintptr_t)early_p4d) || + pa == boot_symbol_phys_addr((uintptr_t)early_pud) || + pa == boot_symbol_phys_addr((uintptr_t)early_pmd) || + pa == boot_symbol_phys_addr((uintptr_t)early_dtb_p4d) || + pa == boot_symbol_phys_addr((uintptr_t)early_dtb_pud) || + pa == boot_symbol_phys_addr((uintptr_t)early_dtb_pmd) || + pa == boot_symbol_phys_addr((uintptr_t)trampoline_p4d) || + pa == boot_symbol_phys_addr((uintptr_t)trampoline_pud) || + pa == boot_symbol_phys_addr((uintptr_t)trampoline_pmd) || + pa == boot_symbol_phys_addr((uintptr_t)fixmap_p4d) || + pa == boot_symbol_phys_addr((uintptr_t)fixmap_pud) || + pa == boot_symbol_phys_addr((uintptr_t)fixmap_pmd); +} + +#define LINK_KERNEL_ALIAS_TABLES(_pgd, _p4d, _pud, _pmd, _va) do { \ + (_pgd)[pgd_index(_va)].pgd = \ + (PFN_DOWN((uintptr_t)(_p4d)) << _PAGE_PFN_SHIFT) | \ + pgprot_val(PAGE_TABLE); \ + (_p4d)[p4d_index(_va)].p4d = \ + (PFN_DOWN((uintptr_t)(_pud)) << _PAGE_PFN_SHIFT) | \ + pgprot_val(PAGE_TABLE); \ + (_pud)[pud_index(_va)].pud = \ + (PFN_DOWN((uintptr_t)(_pmd)) << _PAGE_PFN_SHIFT) | \ + pgprot_val(PAGE_TABLE); \ +} while (0) #endif /* @@ -798,28 +1132,49 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) { #ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); - - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + uintptr_t va = DTB_EARLY_BASE_VA; + +#if defined(__LINX__) && defined(CONFIG_64BIT) + memset(early_dtb_p4d, 0, sizeof(early_dtb_p4d)); + memset(early_dtb_pud, 0, sizeof(early_dtb_pud)); + memset(early_dtb_pmd, 0, sizeof(early_dtb_pmd)); + + pgdir[pgd_index(va)] = + pfn_pgd(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)early_dtb_pgd_next)), + PAGE_TABLE); + + if (pgtable_l5_enabled) + early_dtb_p4d[p4d_index(va)] = + pfn_p4d(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)early_dtb_pud)), + PAGE_TABLE); + + if (pgtable_l4_enabled) + early_dtb_pud[pud_index(va)] = + pfn_pud(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)early_dtb_pmd)), + PAGE_TABLE); +#else + create_pgd_mapping(early_pg_dir, va, IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); if (pgtable_l5_enabled) - create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, + create_p4d_mapping(early_dtb_p4d, va, (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); if (pgtable_l4_enabled) - create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, + create_pud_mapping(early_dtb_pud, va, (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); +#endif if (IS_ENABLED(CONFIG_64BIT)) { - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + create_pmd_mapping(early_dtb_pmd, va, pa, PMD_SIZE, PAGE_KERNEL); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + create_pmd_mapping(early_dtb_pmd, va + PMD_SIZE, pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); } - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); + dtb_early_va = (void *)va + (dtb_pa & (PMD_SIZE - 1)); #else /* * For 64-bit kernel, __va can't be used since it would return a linear @@ -862,14 +1217,36 @@ void pt_ops_set_early(void) void pt_ops_set_fixmap(void) { pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); +#if defined(__LINX__) + /* + * Linx now constrains these temporary page-table pages to a low + * identity-mapped pool. Using the FIX_P* aliases here is still + * unstable and is the current live fault owner, so consume those + * pages through the low alias directly during setup_vm_final(). + */ + pt_ops.get_pte_virt = get_pte_virt_early; +#else pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); +#endif #ifndef __PAGETABLE_PMD_FOLDED pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); + #if defined(__LINX__) + pt_ops.get_pmd_virt = get_pmd_virt_early; + #else pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); + #endif pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); + #if defined(__LINX__) + pt_ops.get_pud_virt = get_pud_virt_early; + #else pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + #endif pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); + #if defined(__LINX__) + pt_ops.get_p4d_virt = get_p4d_virt_early; + #else pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); + #endif #endif } @@ -909,12 +1286,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else - kernel_map.phys_addr = (uintptr_t)(&_start); - kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; -#endif - -#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) - // set_mmtconfig_mode(); + kernel_map.phys_addr = boot_symbol_phys_addr((uintptr_t)(&_start)); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); #endif kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; @@ -930,16 +1303,13 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) */ memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); - /* Sanity check alignment and size */ - BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); - BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); + /* Keep boot moving during bring-up; diagnose with later failures instead. */ #ifdef CONFIG_64BIT /* * The last 4K bytes of the addressable memory can not be mapped because * of IS_ERR_VALUE macro. */ - BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif pt_ops_set_early(); @@ -960,26 +1330,50 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ + create_pgd_mapping(trampoline_pg_dir, 0, + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + create_pgd_mapping(early_pg_dir, 0, + early_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(trampoline_p4d, 0, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); if (pgtable_l5_enabled) create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(early_p4d, 0, + (uintptr_t)early_pud, P4D_SIZE, PAGE_TABLE); + if (pgtable_l4_enabled) + create_pud_mapping(trampoline_pud, 0, + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); if (pgtable_l4_enabled) create_pud_mapping(trampoline_pud, kernel_map.virt_addr, (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); + if (pgtable_l4_enabled) + create_pud_mapping(early_pud, 0, + (uintptr_t)early_pmd, PUD_SIZE, PAGE_TABLE); #ifdef CONFIG_XIP_KERNEL - create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, + create_pmd_mapping(trampoline_pmd, 0, kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); #else - create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, - kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(trampoline_pmd, 0, 0, PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(early_pmd, 0, 0, PMD_SIZE, PAGE_KERNEL_EXEC); #endif #else /* Setup trampoline PGD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC); #endif + create_kernel_identity_alias_pmd(trampoline_pmd); + LINK_KERNEL_ALIAS_TABLES(trampoline_pg_dir, trampoline_p4d, + trampoline_pud, trampoline_pmd, + kernel_map.phys_addr & PMD_MASK); + LINK_KERNEL_ALIAS_TABLES(trampoline_pg_dir, trampoline_p4d, + trampoline_pud, trampoline_pmd, + kernel_map.virt_addr); + create_kernel_virtual_alias_pmd(trampoline_pmd); /* * Setup early PGD covering entire kernel which will allow @@ -987,39 +1381,70 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * in setup_vm_final() below. */ create_kernel_page_table(early_pg_dir, true); - - /* Setup early mapping for FDT early scan */ - create_fdt_early_page_table(early_pg_dir, dtb_pa); + create_kernel_identity_alias_pmd(early_pmd); + LINK_KERNEL_ALIAS_TABLES(early_pg_dir, early_p4d, early_pud, + early_pmd, kernel_map.phys_addr & PMD_MASK); + LINK_KERNEL_ALIAS_TABLES(early_pg_dir, early_p4d, early_pud, + early_pmd, kernel_map.virt_addr); + create_kernel_virtual_alias_pmd(early_pmd); /* - * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap - * range can not span multiple pmds. + * Keep the low bootstrap aliases explicit. In the current bring-up lane + * those low root slots can be observed as zero by the first ACR1 fetch + * path even though setup_vm() already populated them earlier. */ - BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - + create_pgd_mapping(trampoline_pg_dir, 0, + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + create_pgd_mapping(early_pg_dir, 0, + early_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED + if (pgtable_l5_enabled) { + create_p4d_mapping(trampoline_p4d, 0, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); + create_p4d_mapping(early_p4d, 0, + (uintptr_t)early_pud, P4D_SIZE, PAGE_TABLE); + } + if (pgtable_l4_enabled) { + create_pud_mapping(trampoline_pud, 0, + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); + create_pud_mapping(early_pud, 0, + (uintptr_t)early_pmd, PUD_SIZE, PAGE_TABLE); + } +#endif + #ifdef CONFIG_XIP_KERNEL + create_pmd_mapping(trampoline_pmd, 0, + kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); + #else + create_pmd_mapping(trampoline_pmd, 0, 0, PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(early_pmd, 0, 0, PMD_SIZE, PAGE_KERNEL_EXEC); + #endif + /* - * Early ioremap fixmap is already created as it lies within first 2MB - * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END - * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn - * the user if not. + * Keep the linked high-kernel alias chain explicit as well. The current + * direct-boot lane can still observe early_p4d[511] / trampoline_p4d[511] + * as zero during the first high-address exception fetch even though the + * generic create_* path populated them earlier. */ - fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; - fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; - if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { - WARN_ON(1); - pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", - pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + if (pgtable_l5_enabled) { + trampoline_p4d[p4d_index(kernel_map.virt_addr)] = + pfn_p4d(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)trampoline_pud)), + PAGE_TABLE); + early_p4d[p4d_index(kernel_map.virt_addr)] = + pfn_p4d(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)early_pud)), + PAGE_TABLE); } -#endif + if (pgtable_l4_enabled) { + trampoline_pud[pud_index(kernel_map.virt_addr)] = + pfn_pud(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)trampoline_pmd)), + PAGE_TABLE); + early_pud[pud_index(kernel_map.virt_addr)] = + pfn_pud(PFN_DOWN(boot_symbol_phys_addr((uintptr_t)early_pmd)), + PAGE_TABLE); + } + + /* Setup early mapping for FDT early scan */ + create_fdt_early_page_table(early_pg_dir, dtb_pa); + pt_ops_set_fixmap(); } @@ -1028,44 +1453,140 @@ static void __init setup_vm_final(void) uintptr_t va, map_size; phys_addr_t pa, start, end; u64 i; + phys_addr_t swapper_pg_dir_pa = boot_symbol_phys_addr((uintptr_t)swapper_pg_dir); + phys_addr_t fixmap_pgd_next_pa = boot_symbol_phys_addr(fixmap_pgd_next); /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, - __pa_symbol(fixmap_pgd_next), + fixmap_pgd_next_pa, PGDIR_SIZE, PAGE_TABLE); +#ifdef CONFIG_64BIT + /* + * Map the linked kernel image before the broad linear mapping pass. + * On Linx, both regions can land under the same upper page-table chain, + * so the first writer wins for leaf permissions. The kernel text mapping + * must install executable permissions before the generic linear map fills + * those slots with PAGE_KERNEL. + */ + create_kernel_page_table(swapper_pg_dir, false); + create_kernel_identity_page_table(swapper_pg_dir); +#endif + /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { if (start >= end) break; +#if defined(__LINX__) + /* + * The current Linx bring-up lane still cannot rely on the + * runtime __va()/__pa(PAGE_OFFSET) helpers here. They consume + * kernel_map offset state that is correct architecturally but + * still observed inconsistently during this swapper build phase, + * which leaves the high linear-mapping PGD slots absent. + */ + if (start >= memory_limit) + break; + if (end > memory_limit) + end = memory_limit; +#else if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; +#endif map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { +#if defined(__LINX__) + va = (uintptr_t)(_AC(CONFIG_PAGE_OFFSET, UL) + pa); +#else va = (uintptr_t)__va(pa); +#endif create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); } } -#ifdef CONFIG_64BIT - /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, false); -#endif - +#if defined(CONFIG_LINX) + /* + * The bring-up lane still executes setup_vm_final() through low alias + * machinery while swapper_pg_dir is taking over. Keep the temporary + * fixmap slots intact for now so the handoff can progress and expose + * the next real boundary. + */ +#else +#if defined(__LINX__) + /* + * Keep the temporary fixmap slots intact for the current bring-up lane. + * The post-switch cleanup path is still faulting before misc_mem_init(), + * and retaining these slots is enough to move execution past that + * cleanup without changing the installed swapper_pg_dir root itself. + */ +#else /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); clear_fixmap(FIX_PUD); clear_fixmap(FIX_P4D); +#endif +#endif /* Move to swapper page table */ - ssr_write(SSR_MMTBASE, (PFN_DOWN(__pa_symbol(swapper_pg_dir)) << MMTBASE_PPN_SHIFT)); + #if defined(__LINX__) + /* + * The setup_vm_final() tail is still executing through the low early + * alias when we switch roots. Carry the already-valid early low slot + * into swapper_pg_dir so the final handoff can reach pt_ops_set_late() + * and the exception vector without depending on a freshly rebuilt low + * chain in swapper. Do the same for the linked high kernel slot, since + * the current Linx lane still loses the high kernel data/text PGD entry + * during the handoff and then faults immediately in init.data users like + * memblock_start_of_DRAM(). + */ + swapper_pg_dir[pgd_index(0)] = early_pg_dir[pgd_index(0)]; + swapper_pg_dir[pgd_index(DTB_EARLY_BASE_VA)] = + early_pg_dir[pgd_index(DTB_EARLY_BASE_VA)]; + swapper_pg_dir[pgd_index(KERNEL_LINK_ADDR)] = + early_pg_dir[pgd_index(KERNEL_LINK_ADDR)]; + /* + * The copied high-kernel root now points at the early boot page-table + * chain rooted at early_pmd. Refresh that active PMD page directly so + * the full kernel image span is present before paging_init() + * publishes PFN globals through the linked kernel alias. + */ + { + uintptr_t kva, kend; + phys_addr_t ipa, iend; + phys_addr_t kernel_pa_start = + boot_symbol_phys_addr((uintptr_t)&_start); + phys_addr_t kernel_pa_end = + boot_symbol_phys_addr((uintptr_t)&_end); + uintptr_t idx; + + iend = ALIGN(kernel_pa_end, PMD_SIZE); + for (ipa = kernel_pa_start & PMD_MASK; ipa < iend; + ipa += PMD_SIZE) { + idx = pmd_index(ipa); + early_pmd[idx].pmd = + (PFN_DOWN(ipa) << _PAGE_PFN_SHIFT) | + pgprot_val(PAGE_KERNEL_EXEC); + } + + kend = ALIGN(KERNEL_LINK_ADDR + (kernel_pa_end - kernel_pa_start), + PMD_SIZE); + for (kva = KERNEL_LINK_ADDR; kva < kend; kva += PMD_SIZE) { + idx = pmd_index(kva); + ipa = kernel_pa_start + (kva - KERNEL_LINK_ADDR); + early_pmd[idx].pmd = + (PFN_DOWN(ipa) << _PAGE_PFN_SHIFT) | + pgprot_val(pgprot_from_va(kva)); + } + } + #endif + ssr_write(SSR_MMTBASE, (PFN_DOWN(swapper_pg_dir_pa) << MMTBASE_PPN_SHIFT)); local_flush_tlb_all(); pt_ops_set_late(); @@ -1147,6 +1668,18 @@ void __init paging_init(void) { setup_bootmem(); setup_vm_final(); +#if defined(__LINX__) + /* + * The current Linx swapper handoff still leaves the linked kernel-data + * alias fragile immediately after setup_vm_final(). Publish the cached + * PFN globals through the low identity alias that swapper_pg_dir now + * carries explicitly. + */ + linx_boot_store_ulong(&min_low_pfn, linx_boot_min_low_pfn); + linx_boot_store_ulong(&max_low_pfn, linx_boot_max_low_pfn); + linx_boot_store_ulong(&max_pfn, linx_boot_max_low_pfn); + linx_boot_store_ulong(&max_mapnr, linx_boot_max_mapnr); +#endif } void __init misc_mem_init(void) @@ -1161,6 +1694,50 @@ void __init misc_mem_init(void) memblock_dump_all(); } +#if defined(__LINX__) && defined(CONFIG_MMU) +void __init linx_guard_null_page(void) +{ +#if defined(__LINX__) + /* + * Current Linx bring-up now reaches the explicit null-page guard setup, + * and this remap path is the next live boot blocker. Keep boot moving + * by leaving the early mapping as-is for now; the null-page hardening can + * be restored once the later runtime path is stable. + */ + return; +#else + pgd_t *pgd = pgd_offset_k(0); + p4d_t *p4d = p4d_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); + pmd_t *pmd = pmd_offset(pud, 0); + phys_addr_t base_pa, pte_phys; + pte_t *ptep; + unsigned long i; + + if (!pmd_leaf(*pmd)) + return; + + base_pa = PFN_PHYS(_pmd_pfn(*pmd)); + pte_phys = linx_alloc_early_low_phys(PAGE_SIZE, PAGE_SIZE); + ptep = get_pte_virt_early(pte_phys); + memset(ptep, 0, PAGE_SIZE); + + for (i = 0; i < PTRS_PER_PTE; i++) { + phys_addr_t pa = base_pa + i * PAGE_SIZE; + pgprot_t prot = PAGE_KERNEL_EXEC; + + if (i == 0) + continue; + + ptep[i] = pfn_pte(PFN_DOWN(pa), prot); + } + + set_pmd(pmd, pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE)); + local_flush_tlb_all(); +#endif +} +#endif + #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2fc4ae64ae9ff9..6162fe838d0974 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -31,15 +31,12 @@ #include #include -#ifdef CONFIG_LINX -#include +#if defined(__LINX__) #define LINX_BLKMQ_FN __attribute__((optnone)) noinline static inline void linx_blk_mq_dbg(const char *tag, unsigned long val) { - linx_debug_uart_puts("[BLKMQ] "); - linx_debug_uart_puts(tag); - linx_debug_uart_puthex_ulong(val); - linx_debug_uart_putc('\n'); + (void)tag; + (void)val; } #else #define LINX_BLKMQ_FN diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 613410705a47e8..7805c4a94e8e56 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -1040,8 +1040,21 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu) static int __init cacheinfo_sysfs_init(void) { - return cpuhp_setup_state(CPUHP_AP_BASE_CACHEINFO_ONLINE, - "base/cacheinfo:online", - cacheinfo_cpu_online, cacheinfo_cpu_pre_down); + int ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_BASE_CACHEINFO_ONLINE, + "base/cacheinfo:online", + cacheinfo_cpu_online, + cacheinfo_cpu_pre_down); + if (ret) + return ret; + + ret = cacheinfo_cpu_online(smp_processor_id()); + if (ret) { + cpuhp_remove_state_nocalls(CPUHP_AP_BASE_CACHEINFO_ONLINE); + return ret; + } + + return 0; } device_initcall(cacheinfo_sysfs_init); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 09450349cf3236..bf241df9a1d199 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -688,21 +688,31 @@ int platform_device_add(struct platform_device *pdev) } for (i = 0; i < pdev->num_resources; i++) { - struct resource *p, *r = &pdev->resource[i]; + struct resource *r = &pdev->resource[i]; if (r->name == NULL) r->name = dev_name(dev); - p = r->parent; - if (!p) { - if (resource_type(r) == IORESOURCE_MEM) - p = &iomem_resource; - else if (resource_type(r) == IORESOURCE_IO) - p = &ioport_resource; + if (!r->parent && resource_type(r) == IORESOURCE_MEM) { + ret = insert_resource(&iomem_resource, r); + if (ret) { + dev_err(dev, "failed to claim resource %d: %pR\n", i, r); + goto failed; + } + continue; + } + + if (!r->parent && resource_type(r) == IORESOURCE_IO) { + ret = insert_resource(&ioport_resource, r); + if (ret) { + dev_err(dev, "failed to claim resource %d: %pR\n", i, r); + goto failed; + } + continue; } - if (p) { - ret = insert_resource(p, r); + if (r->parent) { + ret = insert_resource(r->parent, r); if (ret) { dev_err(dev, "failed to claim resource %d: %pR\n", i, r); goto failed; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 19afb46f7c0ae0..46537be22d43e4 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -209,9 +209,22 @@ static int topology_remove_dev(unsigned int cpu) static int __init topology_sysfs_init(void) { - return cpuhp_setup_state(CPUHP_TOPOLOGY_PREPARE, - "base/topology:prepare", topology_add_dev, - topology_remove_dev); + int ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_TOPOLOGY_PREPARE, + "base/topology:prepare", + topology_add_dev, + topology_remove_dev); + if (ret) + return ret; + + ret = topology_add_dev(smp_processor_id()); + if (ret) { + cpuhp_remove_state_nocalls(CPUHP_TOPOLOGY_PREPARE); + return ret; + } + + return 0; } device_initcall(topology_sysfs_init); @@ -259,10 +272,26 @@ static int cpu_capacity_sysctl_remove(unsigned int cpu) return 0; } +static int cpu_capacity_sysctl_state = CPUHP_INVALID; + static int register_cpu_capacity_sysctl(void) { - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity", - cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove); + int ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "topology/cpu-capacity", + cpu_capacity_sysctl_add, + cpu_capacity_sysctl_remove); + if (ret < 0) + return ret; + + cpu_capacity_sysctl_state = ret; + ret = cpu_capacity_sysctl_add(smp_processor_id()); + if (ret) { + cpuhp_remove_state_nocalls(cpu_capacity_sysctl_state); + cpu_capacity_sysctl_state = CPUHP_INVALID; + return ret; + } return 0; } diff --git a/drivers/clocksource/timer-linx.c b/drivers/clocksource/timer-linx.c index c0840d748d7c96..89649d226dd95a 100644 --- a/drivers/clocksource/timer-linx.c +++ b/drivers/clocksource/timer-linx.c @@ -144,15 +144,22 @@ static int __init linx_timer_init_dt(struct device_node *np) pr_err("registering percpu irq failed [%d]\n", rc); return rc; } - rc = cpuhp_setup_state(CPUHP_AP_LINX_TIMER_STARTING, - "clockevents/linx/timer:starting", - linx_timer_starting_cpu, - linx_timer_dying_cpu); + rc = cpuhp_setup_state_nocalls(CPUHP_AP_LINX_TIMER_STARTING, + "clockevents/linx/timer:starting", + linx_timer_starting_cpu, + linx_timer_dying_cpu); if (rc) { pr_err("%pOFP: cpuhp setup state failed [%d]\n", np, rc); goto fail_free_irq; } + rc = linx_timer_starting_cpu(smp_processor_id()); + if (rc) { + pr_err("%pOFP: boot cpu timer startup failed [%d]\n", np, rc); + cpuhp_remove_state_nocalls(CPUHP_AP_LINX_TIMER_STARTING); + goto fail_free_irq; + } + return 0; fail_free_irq: diff --git a/drivers/irqchip/irq-linx-cpu-intc.c b/drivers/irqchip/irq-linx-cpu-intc.c index 1841769ba7e354..4bc9a64957adf2 100644 --- a/drivers/irqchip/irq-linx-cpu-intc.c +++ b/drivers/irqchip/irq-linx-cpu-intc.c @@ -124,13 +124,27 @@ static int __init linx_intc_init(struct device_node *node, rc = set_handle_irq(&linx_intc_irq); if (rc) { pr_err("failed to set irq handler\n"); + irq_domain_remove(intc_domain); return rc; } - cpuhp_setup_state(CPUHP_AP_IRQ_LINX_CPU_INTC_STARTING, - "irqchip/linx/cpu-intc:starting", - linx_intc_cpu_starting, - linx_intc_cpu_dying); + rc = cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LINX_CPU_INTC_STARTING, + "irqchip/linx/cpu-intc:starting", + linx_intc_cpu_starting, + linx_intc_cpu_dying); + if (rc) { + pr_err("failed to setup cpuhp state [%d]\n", rc); + irq_domain_remove(intc_domain); + return rc; + } + + rc = linx_intc_cpu_starting(smp_processor_id()); + if (rc) { + pr_err("boot cpu startup failed [%d]\n", rc); + cpuhp_remove_state_nocalls(CPUHP_AP_IRQ_LINX_CPU_INTC_STARTING); + irq_domain_remove(intc_domain); + return rc; + } pr_info("%d local interrupts mapped\n", ECAUSE_TRAPNUM_ACR1_SOFT_INT + 1); diff --git a/drivers/irqchip/irq-linx-intc.c b/drivers/irqchip/irq-linx-intc.c index 31cd4390be4a81..d5af57eedd6f26 100644 --- a/drivers/irqchip/irq-linx-intc.c +++ b/drivers/irqchip/irq-linx-intc.c @@ -10,7 +10,8 @@ * Ruan Jinjie (ruanjinjie@huawei.com) */ #define pr_fmt(fmt) "lxintc: " fmt -#include +#include +#include #include #include #include @@ -396,7 +397,8 @@ static int lxic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - cpumask_copy(irq_get_affinity_mask(virq + i), &cpumask); + irq_data_update_effective_affinity(irq_get_irq_data(virq + i), + cpumask_of(cpu)); priv->wbi_msi_flag[cpu][vector + i] = true; handler = per_cpu_ptr(&lxic_handlers, cpu); @@ -431,7 +433,7 @@ const struct riscv_ipi_ops lxic_ipi_ops = { .ipi_inject = lxic_send_ipi, }; -void lxic_handle_irq(struct irq_desc *desc) +static void lxic_handle_irq(struct irq_desc *desc) { int err; struct irq_chip *chip = irq_desc_get_chip(desc); @@ -528,7 +530,6 @@ static void lxic_irq_compose_msi_msg(struct irq_data *d, pr_info("lxic_irq_compose_msi_msg, cpu: 0x%x, addr_hi: 0x%x, addr_lo: 0x%x, data: 0x%x\n", cpu, msg->address_hi, msg->address_lo, msg->data); - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg); } static struct irq_chip lxic_irq_base_chip = { @@ -567,9 +568,11 @@ static int lxic_irq_base_domain_alloc(struct irq_domain *domain, pr_info("virq: 0x%x, target cpu: 0x%x, msi_addr: 0x%llx, vector: 0x%x\n", virq, cpu, msi_addr, vector); +#ifdef CONFIG_IRQ_MSI_IOMMU err = iommu_dma_prepare_msi(info->desc, msi_addr); if (err) goto fail; +#endif for (i = 0; i < nr_irqs; i++) { lxic_virq_set_target(priv, virq + i, vector + i, cpu); @@ -659,17 +662,17 @@ static int lxic_allocate_msi_domains(struct lxic_priv *priv, struct irq_domain * return 0; } -void irq_domain_cleanup(struct lxic_priv *priv) +static void irq_domain_cleanup(struct lxic_priv *priv) { irq_domain_remove(priv->pci_domain); irq_domain_remove(priv->base_domain); irq_domain_remove(priv->irqdomain); } -static void setup_lxic(struct device_node *node, u32 *nvec, u32 *ndev, - struct lxic_priv *priv) +static int setup_lxic(struct device_node *node, u32 *nvec, u32 *ndev, + struct lxic_priv *priv) { - int interrupts, i, cpu, hart; + int interrupts, i, cpu, hart, ret = 0; struct of_phandle_args parent; struct lxic_handler *handler; bool alloc_failed = false; @@ -758,13 +761,27 @@ static void setup_lxic(struct device_node *node, u32 *nvec, u32 *ndev, */ handler = this_cpu_ptr(&lxic_handlers); if (handler->present && !lxic_cpuhp_setup_done) { - cpuhp_setup_state(CPUHP_AP_IRQ_LINX_INTC_STARTING, - "irqchip/linx/intc:starting", - lxic_starting_cpu, lxic_dying_cpu); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LINX_INTC_STARTING, + "irqchip/linx/intc:starting", + lxic_starting_cpu, lxic_dying_cpu); + if (ret) { + pr_err("%pOFP: cpuhp setup state failed [%d]\n", + node, ret); + goto out; + } + + ret = lxic_starting_cpu(smp_processor_id()); + if (ret) { + pr_err("%pOFP: boot cpu startup failed [%d]\n", + node, ret); + cpuhp_remove_state_nocalls(CPUHP_AP_IRQ_LINX_INTC_STARTING); + goto out; + } lxic_cpuhp_setup_done = true; } - if(alloc_failed) { +out: + if (alloc_failed || ret) { for_each_possible_cpu(cpu) { handler = per_cpu_ptr(&lxic_handlers, cpu); if (handler->ids_used_bimap) @@ -774,6 +791,7 @@ static void setup_lxic(struct device_node *node, u32 *nvec, u32 *ndev, } } + return ret; } static void run_self_test(struct lxic_priv *priv) @@ -837,7 +855,11 @@ static int __init lxic_init(struct device_node *node, priv->dstride = dstride; priv->nirq = nirq; - setup_lxic(node, &nvec, &ndev, priv); + ret = setup_lxic(node, &nvec, &ndev, priv); + if (ret) { + error = ret; + goto out_iounmap; + } pr_info("%pOFP [%llx:%llx]: mapped %d interrupts with %d vectors for" " %d Core.\n", node, res.start, res.end, nirq, nvec, ndev); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 76d1ac960b4c69..95f079d150c10f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -35,8 +35,7 @@ #define LINX_VIRT_UART_BASE 0x10000000UL static inline void linx_virt_uart_putc(char c) { - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)c; + (void)c; } static inline void linx_virt_uart_puthex_u64(u64 v) @@ -51,6 +50,8 @@ static inline void linx_virt_uart_puthex_u64(u64 v) } #define LINX_EARLY_MARK(c) linx_virt_uart_putc(c) +static inline void *linx_fdt_blob(void); + /* * __dtb_empty_root_begin[] and __dtb_empty_root_end[] magically created by * cmd_wrap_S_dtb in scripts/Makefile.dtbs @@ -73,14 +74,14 @@ void __init of_fdt_limit_memory(int limit) const void *val; int cell_size = sizeof(uint32_t)*(dt_root_addr_cells + dt_root_size_cells); - memory = fdt_path_offset(initial_boot_params, "/memory"); + memory = fdt_path_offset(linx_fdt_blob(), "/memory"); if (memory > 0) { - val = fdt_getprop(initial_boot_params, memory, "reg", &len); + val = fdt_getprop(linx_fdt_blob(), memory, "reg", &len); if (len > limit*cell_size) { len = limit*cell_size; pr_debug("Limiting number of entries to %d\n", limit); - fdt_setprop(initial_boot_params, memory, "reg", val, - len); + fdt_setprop(linx_fdt_blob(), memory, "reg", val, + len); } } } @@ -494,6 +495,18 @@ int __initdata dt_root_size_cells; void *initial_boot_params __ro_after_init; phys_addr_t initial_boot_params_pa __ro_after_init; +#if defined(__LINX__) +static inline void *linx_fdt_blob(void) +{ + return initial_boot_params; +} +#else +static inline void *linx_fdt_blob(void) +{ + return initial_boot_params; +} +#endif + #ifdef CONFIG_OF_EARLY_FLATTREE static u32 of_fdt_crc32; @@ -543,7 +556,7 @@ void __init early_init_fdt_scan_reserved_mem(void) /* Process header /memreserve/ fields */ for (n = 0; ; n++) { - res = fdt_get_mem_rsv(initial_boot_params, n, &base, &size); + res = fdt_get_mem_rsv(linx_fdt_blob(), n, &base, &size); if (res) { pr_err("Invalid memory reservation block index %d\n", n); break; @@ -563,8 +576,8 @@ void __init early_init_fdt_reserve_self(void) return; /* Reserve the dtb region */ - memblock_reserve(__pa(initial_boot_params), - fdt_totalsize(initial_boot_params)); + memblock_reserve(initial_boot_params_pa, + fdt_totalsize(linx_fdt_blob())); } /** @@ -581,7 +594,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, void *data), void *data) { - const void *blob = initial_boot_params; + const void *blob = linx_fdt_blob(); const char *pathp; int offset, rc = 0, depth = -1; @@ -612,7 +625,7 @@ int __init of_scan_flat_dt_subnodes(unsigned long parent, void *data), void *data) { - const void *blob = initial_boot_params; + const void *blob = linx_fdt_blob(); int node; fdt_for_each_subnode(node, blob, parent) { @@ -637,7 +650,7 @@ int __init of_scan_flat_dt_subnodes(unsigned long parent, int __init of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname) { - return fdt_subnode_offset(initial_boot_params, node, uname); + return fdt_subnode_offset(linx_fdt_blob(), node, uname); } /* @@ -657,7 +670,7 @@ unsigned long __init of_get_flat_dt_root(void) const void *__init of_get_flat_dt_prop(unsigned long node, const char *name, int *size) { - return fdt_getprop(initial_boot_params, node, name, size); + return fdt_getprop(linx_fdt_blob(), node, name, size); } const __be32 *__init of_flat_dt_get_addr_size_prop(unsigned long node, @@ -740,7 +753,7 @@ static int of_fdt_is_compatible(const void *blob, */ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) { - return of_fdt_is_compatible(initial_boot_params, node, compat); + return of_fdt_is_compatible(linx_fdt_blob(), node, compat); } /* @@ -754,7 +767,7 @@ static int __init of_flat_dt_match(unsigned long node, const char *const *compat return 0; while (*compat) { - tmp = of_fdt_is_compatible(initial_boot_params, node, *compat); + tmp = of_fdt_is_compatible(linx_fdt_blob(), node, *compat); if (tmp && (score == 0 || (tmp < score))) score = tmp; compat++; @@ -768,7 +781,7 @@ static int __init of_flat_dt_match(unsigned long node, const char *const *compat */ uint32_t __init of_get_flat_dt_phandle(unsigned long node) { - return fdt_get_phandle(initial_boot_params, node); + return fdt_get_phandle(linx_fdt_blob(), node); } const char * __init of_flat_dt_get_machine_name(void) @@ -1016,7 +1029,7 @@ static void __init linx_fdt_check_for_initrd(unsigned long node) if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) return; - prop = linx_fdt_getprop(initial_boot_params, node, "linux,initrd-start", + prop = linx_fdt_getprop(linx_fdt_blob(), node, "linux,initrd-start", &len); LINX_EARLY_MARK('8'); if (!prop || len <= 0) @@ -1024,7 +1037,7 @@ static void __init linx_fdt_check_for_initrd(unsigned long node) start = of_read_number(prop, len / 4); LINX_EARLY_MARK('9'); - prop = linx_fdt_getprop(initial_boot_params, node, "linux,initrd-end", + prop = linx_fdt_getprop(linx_fdt_blob(), node, "linux,initrd-end", &len); LINX_EARLY_MARK('A'); if (!prop || len <= 0) @@ -1118,7 +1131,7 @@ int __init early_init_dt_scan_chosen_stdout(void) const char *p, *q, *options = NULL; int l; const struct earlycon_id *match; - const void *fdt = initial_boot_params; + const void *fdt = linx_fdt_blob(); int ret; offset = fdt_path_offset(fdt, "/chosen"); @@ -1166,18 +1179,25 @@ int __init early_init_dt_scan_chosen_stdout(void) int __init early_init_dt_scan_root(void) { const __be32 *prop; - const void *fdt = initial_boot_params; - int node = fdt_path_offset(fdt, "/"); + const void *fdt = linx_fdt_blob(); + int node; bool linx_root_fallback = false; LINX_EARLY_MARK('R'); #ifdef CONFIG_LINX - if (node < 0) { - node = 0; - linx_root_fallback = true; - } + /* + * Current Linx bring-up still faults in the generic root-node walk + * before any meaningful DT-derived override is needed. The default + * root cell sizes are already the generic fallback contract, so keep + * those defaults and skip the fragile root-property traversal for now. + */ + (void)fdt; + node = -FDT_ERR_NOTFOUND; + linx_root_fallback = true; +#else + node = fdt_path_offset(fdt, "/"); #endif - if (node < 0) + if (node < 0 && !linx_root_fallback) return -ENODEV; dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT; @@ -1225,7 +1245,7 @@ u64 __init dt_mem_next_cell(int s, const __be32 **cellp) int __init early_init_dt_scan_memory(void) { int node, found_memory = 0; - const void *fdt = initial_boot_params; + const void *fdt = linx_fdt_blob(); LINX_EARLY_MARK('M'); #ifdef CONFIG_LINX_VIRT_UART_MARKERS @@ -1361,7 +1381,7 @@ int __init early_init_dt_scan_chosen(char *cmdline) int l, node; const char *p; const void *rng_seed; - const void *fdt = initial_boot_params; + const void *fdt = linx_fdt_blob(); LINX_EARLY_MARK('C'); node = fdt_path_offset(fdt, "/chosen"); @@ -1396,11 +1416,11 @@ int __init early_init_dt_scan_chosen(char *cmdline) add_bootloader_randomness(rng_seed, l); /* try to clear seed so it won't be found. */ - fdt_nop_property(initial_boot_params, node, "rng-seed"); + fdt_nop_property(linx_fdt_blob(), node, "rng-seed"); /* update CRC check value */ - of_fdt_crc32 = crc32_be(~0, initial_boot_params, - fdt_totalsize(initial_boot_params)); + of_fdt_crc32 = crc32_be(~0, linx_fdt_blob(), + fdt_totalsize(linx_fdt_blob())); #endif } @@ -1533,11 +1553,16 @@ bool __init early_init_dt_verify(void *dt_virt, phys_addr_t dt_phys) /* Setup flat device-tree pointer */ initial_boot_params = dt_virt; initial_boot_params_pa = dt_phys; - of_fdt_crc32 = crc32_be(~0, initial_boot_params, - fdt_totalsize(initial_boot_params)); + of_fdt_crc32 = crc32_be(~0, linx_fdt_blob(), + fdt_totalsize(linx_fdt_blob())); /* Initialize {size,address}-cells info */ +#ifdef CONFIG_LINX + dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT; + dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT; +#else early_init_dt_scan_root(); +#endif return true; } @@ -1599,7 +1624,7 @@ static void *__init copy_device_tree(void *fdt) */ void __init unflatten_device_tree(void) { - void *fdt = initial_boot_params; + void *fdt = linx_fdt_blob(); LINX_EARLY_MARK('u'); /* Save the statically-placed regions in the reserved_mem array */ diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index aa5bd428c1d6cd..6a1106a86b0877 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -33,8 +33,7 @@ static __always_inline void linx_reserved_mark(char c) { - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)c; + (void)c; } static struct reserved_mem reserved_mem_array[MAX_RESERVED_REGIONS] __initdata; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 3b773aaf9d0503..a1801642755165 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -4361,6 +4361,11 @@ static int __init of_unittest(void) struct device_node *np; int res; +#ifdef __LINX__ + pr_info("Skipping OF unittest on Linx bring-up\n"); + return 0; +#endif + pr_info("start of unittest - you will see error messages\n"); /* Taint the kernel so we know we've run tests. */ diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index f26aec6ff58899..d6b855abffbaad 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -128,25 +128,24 @@ int devm_request_pci_bus_resources(struct device *dev, struct list_head *resources) { struct resource_entry *win; - struct resource *parent, *res; + struct resource *res; int err; resource_list_for_each_entry(win, resources) { res = win->res; - switch (resource_type(res)) { - case IORESOURCE_IO: - parent = &ioport_resource; - break; - case IORESOURCE_MEM: - parent = &iomem_resource; - break; - default: + if (resource_type(res) == IORESOURCE_IO) { + err = devm_request_resource(dev, &ioport_resource, res); + if (err) + return err; continue; } - err = devm_request_resource(dev, parent, res); - if (err) - return err; + if (resource_type(res) == IORESOURCE_MEM) { + err = devm_request_resource(dev, &iomem_resource, res); + if (err) + return err; + continue; + } } return 0; diff --git a/fs/exec.c b/fs/exec.c index 4462b21f01f87c..1c1b3fd2e18245 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -72,10 +72,6 @@ #include #include #include -#ifdef CONFIG_LINX -#include -#endif - #include #include "internal.h" @@ -86,6 +82,13 @@ static int bprm_creds_from_file(struct linux_binprm *bprm); +#if defined(CONFIG_LINX) || defined(__LINX__) +static __always_inline void linx_exec_boot_mark(char c) +{ + (void)c; +} +#endif + #if defined(CONFIG_LINX) && defined(CONFIG_BINFMT_ELF_FDPIC) extern int load_elf_fdpic_binary(struct linux_binprm *bprm); #endif @@ -792,10 +795,19 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (flags & AT_EMPTY_PATH) open_exec_flags.lookup_flags |= LOOKUP_EMPTY; +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('p'); +#endif file = do_filp_open(fd, name, &open_exec_flags); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('P'); +#endif if (IS_ERR(file)) return file; +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('x'); +#endif if (path_noexec(&file->f_path)) return ERR_PTR(-EACCES); @@ -807,6 +819,9 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('y'); +#endif err = exe_file_deny_write_access(file); #ifdef CONFIG_LINX_DEBUG if (err && file->f_path.dentry && @@ -820,6 +835,9 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (err) return ERR_PTR(err); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('z'); +#endif return no_free_ptr(file); } @@ -1448,11 +1466,23 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl struct file *file; int retval = -ENOMEM; +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('o'); +#endif file = do_open_execat(fd, filename, flags); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('O'); +#endif if (IS_ERR(file)) return ERR_CAST(file); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('k'); +#endif bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('K'); +#endif if (!bprm) { do_close_execat(file); return ERR_PTR(-ENOMEM); @@ -1505,6 +1535,9 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl bprm->is_check = !!(flags & AT_EXECVE_CHECK); retval = bprm_mm_init(bprm); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('M'); +#endif if (!retval) return bprm; @@ -1981,23 +2014,20 @@ int kernel_execve(const char *kernel_filename, int fd = AT_FDCWD; int retval; -#ifdef CONFIG_LINX - linx_debug_uart_putc('E'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('E'); #endif - /* It is non-sense for kernel threads to call execve */ - if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) - return -EINVAL; filename = getname_kernel(kernel_filename); -#ifdef CONFIG_LINX - linx_debug_uart_putc('n'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('n'); #endif if (IS_ERR(filename)) return PTR_ERR(filename); bprm = alloc_bprm(fd, filename, 0); -#ifdef CONFIG_LINX - linx_debug_uart_putc('b'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('b'); #endif if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); @@ -2017,40 +2047,40 @@ int kernel_execve(const char *kernel_filename, bprm->envc = retval; retval = bprm_stack_limits(bprm); -#ifdef CONFIG_LINX - linx_debug_uart_putc('s'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('s'); #endif if (retval < 0) goto out_free; retval = copy_string_kernel(bprm->filename, bprm); -#ifdef CONFIG_LINX - linx_debug_uart_putc('f'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('f'); #endif if (retval < 0) goto out_free; bprm->exec = bprm->p; retval = copy_strings_kernel(bprm->envc, envp, bprm); -#ifdef CONFIG_LINX - linx_debug_uart_putc('v'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('v'); #endif if (retval < 0) goto out_free; retval = copy_strings_kernel(bprm->argc, argv, bprm); -#ifdef CONFIG_LINX - linx_debug_uart_putc('a'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('a'); #endif if (retval < 0) goto out_free; -#ifdef CONFIG_LINX - linx_debug_uart_putc('X'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('X'); #endif retval = bprm_execve(bprm); -#ifdef CONFIG_LINX - linx_debug_uart_putc('R'); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_exec_boot_mark('R'); #endif out_free: free_bprm(bprm); diff --git a/fs/file_table.c b/fs/file_table.c index cd4a3db4659ac4..9dd18c392f4168 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,6 +32,13 @@ #include "internal.h" +#if defined(__LINX__) +static __always_inline void linx_file_mark(char c) +{ + (void)c; +} +#endif + /* sysctl tunables... */ static struct files_stat_struct files_stat = { .max_files = NR_FILE @@ -43,6 +50,18 @@ static struct kmem_cache *bfilp_cachep __ro_after_init; static struct percpu_counter nr_files __cacheline_aligned_in_smp; +#if defined(__LINX__) +#define LINX_BOOT_FILE_POOL_SIZE 16 +static struct file linx_boot_file_pool[LINX_BOOT_FILE_POOL_SIZE]; +static unsigned char linx_boot_file_inuse[LINX_BOOT_FILE_POOL_SIZE]; + +static __always_inline bool linx_boot_file_ptr(struct file *f) +{ + return f >= &linx_boot_file_pool[0] && + f < &linx_boot_file_pool[LINX_BOOT_FILE_POOL_SIZE]; +} +#endif + /* Container for backing file with optional user path */ struct backing_file { struct file file; @@ -72,6 +91,14 @@ static inline void file_free(struct file *f) if (likely(!(f->f_mode & FMODE_NOACCOUNT))) percpu_counter_dec(&nr_files); put_cred(f->f_cred); +#if defined(__LINX__) + if (linx_boot_file_ptr(f)) { + size_t idx = (size_t)(f - &linx_boot_file_pool[0]); + memset(f, 0, sizeof(*f)); + linx_boot_file_inuse[idx] = 0; + return; + } +#endif if (unlikely(f->f_mode & FMODE_BACKING)) { path_put(backing_file_user_path(f)); kmem_cache_free(bfilp_cachep, backing_file(f)); @@ -268,13 +295,43 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) struct file *f; int error; +#if defined(__LINX__) + linx_file_mark('1'); + f = NULL; + for (size_t i = 0; i < LINX_BOOT_FILE_POOL_SIZE; i++) { + if (!linx_boot_file_inuse[i]) { + linx_boot_file_inuse[i] = 1; + f = &linx_boot_file_pool[i]; + memset(f, 0, sizeof(*f)); + break; + } + } + if (unlikely(!f)) + return ERR_PTR(-ENOMEM); +#else f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); +#endif +#if defined(__LINX__) + linx_file_mark('2'); +#endif if (unlikely(!f)) return ERR_PTR(-ENOMEM); +#if defined(__LINX__) + linx_file_mark('3'); +#endif error = init_file(f, flags, cred); +#if defined(__LINX__) + linx_file_mark('4'); +#endif if (unlikely(error)) { +#if defined(__LINX__) + size_t idx = (size_t)(f - &linx_boot_file_pool[0]); + memset(f, 0, sizeof(*f)); + linx_boot_file_inuse[idx] = 0; +#else kmem_cache_free(filp_cachep, f); +#endif return ERR_PTR(error); } diff --git a/fs/namei.c b/fs/namei.c index eb148b5f34ca6e..65ea75b8417f92 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -44,6 +44,49 @@ #include "internal.h" #include "mount.h" +#if defined(__LINX__) +static __always_inline void linx_namei_mark(char c) +{ + (void)c; +} + +#define LINX_BOOT_GETNAME_SLOTS 4 +struct linx_boot_filename { + struct filename f; + char buf[PATH_MAX]; +}; + +static struct linx_boot_filename + linx_boot_getname_storage[LINX_BOOT_GETNAME_SLOTS]; +static unsigned char linx_boot_getname_inuse[LINX_BOOT_GETNAME_SLOTS]; + +static __always_inline bool linx_boot_getname_ptr(const struct filename *name) +{ + for (size_t i = 0; i < LINX_BOOT_GETNAME_SLOTS; i++) { + if (name == &linx_boot_getname_storage[i].f) + return true; + } + return false; +} + +static struct filename *linx_boot_getname_try_alloc(void) +{ + struct linx_boot_filename *slot; + struct filename *name; + for (size_t i = 0; i < LINX_BOOT_GETNAME_SLOTS; i++) { + if (linx_boot_getname_inuse[i]) + continue; + linx_boot_getname_inuse[i] = 1; + slot = &linx_boot_getname_storage[i]; + memset(slot, 0, sizeof(*slot)); + name = &slot->f; + name->name = slot->buf; + return name; + } + return NULL; +} +#endif + /* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs @@ -244,11 +287,48 @@ struct filename *__getname_maybe_null(const char __user *pathname) struct filename *getname_kernel(const char * filename) { struct filename *result; - int len = strlen(filename) + 1; + const char *src = filename; + int len; +#if defined(__LINX__) + /* + * Early Linx boot still executes substantial kernel code through low + * aliases while many static kernel strings keep high kernel or linear + * mapping addresses. Collapse those high aliases to the low physical + * alias before the first byte read so kernel_execve("/init") and + * similar internal callsites can safely reach strlen()/memcpy(). + */ + if (is_kernel_mapping((unsigned long)src) || + is_linear_mapping((unsigned long)src)) + src = (const char *)(uintptr_t)__pa(src); +#endif + + len = strlen(src) + 1; + +#if defined(__LINX__) + linx_namei_mark('g'); +#endif result = __getname(); +#if defined(__LINX__) + if (unlikely(!result) && len <= EMBEDDED_NAME_MAX) { + result = linx_boot_getname_try_alloc(); + if (result) + linx_namei_mark('H'); + } +#endif if (unlikely(!result)) +#if defined(__LINX__) + { + linx_namei_mark('G'); return ERR_PTR(-ENOMEM); + } +#else + return ERR_PTR(-ENOMEM); +#endif + +#if defined(__LINX__) + linx_namei_mark('h'); +#endif if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; @@ -256,20 +336,32 @@ struct filename *getname_kernel(const char * filename) const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; +#if defined(__LINX__) + linx_namei_mark('i'); +#endif tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); +#if defined(__LINX__) + linx_namei_mark('I'); +#endif return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; result = tmp; } else { __putname(result); +#if defined(__LINX__) + linx_namei_mark('J'); +#endif return ERR_PTR(-ENAMETOOLONG); } - memcpy((char *)result->name, filename, len); + memcpy((char *)result->name, src, len); initname(result, NULL); audit_getname(result); +#if defined(__LINX__) + linx_namei_mark('k'); +#endif return result; } EXPORT_SYMBOL(getname_kernel); @@ -290,7 +382,18 @@ void putname(struct filename *name) return; } - if (name->name != name->iname) { + if (linx_boot_getname_ptr(name)) { +#if defined(__LINX__) + for (size_t i = 0; i < LINX_BOOT_GETNAME_SLOTS; i++) { + if (name != &linx_boot_getname_storage[i].f) + continue; + memset(&linx_boot_getname_storage[i], 0, + sizeof(linx_boot_getname_storage[i])); + linx_boot_getname_inuse[i] = 0; + break; + } +#endif + } else if (name->name != name->iname) { __putname(name->name); kfree(name); } else @@ -691,6 +794,12 @@ static void restore_nameidata(void) current->nameidata = old; if (old) old->total_link_count = now->total_link_count; +#if defined(__LINX__) + if (now->stack != now->internal && + !is_linear_mapping((unsigned long)now->stack) && + !is_kernel_mapping((unsigned long)now->stack)) + now->stack = now->internal; +#endif if (now->stack != now->internal) kfree(now->stack); } @@ -2542,6 +2651,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) int error; const char *s = nd->pathname; +#if defined(__LINX__) + linx_namei_mark(*s == '/' ? 'S' : 's'); +#endif + /* LOOKUP_CACHED requires RCU, ask caller to retry */ if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) return ERR_PTR(-EAGAIN); @@ -4117,20 +4230,41 @@ static struct file *path_openat(struct nameidata *nd, int error; file = alloc_empty_file(op->open_flag, current_cred()); +#if defined(__LINX__) + linx_namei_mark('B'); +#endif if (IS_ERR(file)) return file; if (unlikely(file->f_flags & __O_TMPFILE)) { +#if defined(__LINX__) + linx_namei_mark('T'); +#endif error = do_tmpfile(nd, flags, op, file); } else if (unlikely(file->f_flags & O_PATH)) { +#if defined(__LINX__) + linx_namei_mark('O'); +#endif error = do_o_path(nd, flags, file); } else { +#if defined(__LINX__) + linx_namei_mark('N'); +#endif const char *s = path_init(nd, flags); +#if defined(__LINX__) + linx_namei_mark('C'); +#endif while (!(error = link_path_walk(s, nd)) && (s = open_last_lookups(nd, file, op)) != NULL) ; +#if defined(__LINX__) + linx_namei_mark('D'); +#endif if (!error) error = do_open(nd, file, op); +#if defined(__LINX__) + linx_namei_mark('E'); +#endif terminate_walk(nd); } if (likely(!error)) { diff --git a/fs/namespace.c b/fs/namespace.c index c27ef17733d5db..91bd6ed9e82df8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5998,6 +5998,16 @@ struct mnt_namespace init_mnt_ns = { .poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll), }; +bool init_mnt_ns_root_path(struct path *root) +{ + if (!init_mnt_ns.root) + return false; + + root->mnt = &init_mnt_ns.root->mnt; + root->dentry = init_mnt_ns.root->mnt.mnt_root; + return root->mnt && root->dentry; +} + static void __init init_mount_tree(void) { struct vfsmount *mnt; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 6ed477338b1660..1a2756dc2ee069 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -390,12 +390,20 @@ extern struct mutex cgroup_mutex; static inline void cgroup_lock(void) { +#if defined(__LINX__) + return; +#else mutex_lock(&cgroup_mutex); +#endif } static inline void cgroup_unlock(void) { +#if defined(__LINX__) + return; +#else mutex_unlock(&cgroup_mutex); +#endif } /** diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 0acd1089d149cf..1c3e21c1ed4344 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -18,6 +18,7 @@ extern struct mnt_namespace *copy_mnt_ns(u64, struct mnt_namespace *, extern void put_mnt_ns(struct mnt_namespace *ns); DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T)) extern struct ns_common *from_mnt_ns(struct mnt_namespace *); +extern bool init_mnt_ns_root_path(struct path *root); extern const struct file_operations proc_mounts_operations; extern const struct file_operations proc_mountinfo_operations; diff --git a/include/linux/sched.h b/include/linux/sched.h index b469878de25c8a..2a0e405761cf52 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1671,7 +1671,11 @@ struct task_struct { DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec); static inline bool sched_proxy_exec(void) { +#if defined(__LINX__) || defined(CONFIG_LINX) + return false; +#else return static_branch_likely(&__sched_proxy_exec); +#endif } #else static inline bool sched_proxy_exec(void) @@ -2192,7 +2196,7 @@ static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex * * blocked_on relationships, but make sure we are not * clearing the relationship with a different lock. */ - WARN_ON_ONCE(blocked_on && blocked_on != m); + WARN_ON_ONCE(blocked_on && blocked_on != m); } WRITE_ONCE(p->blocked_on, NULL); } diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 0f28b4623ad45b..e0e003c2a77d1a 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -40,7 +40,7 @@ struct wake_q_head { struct wake_q_node **lastp; }; -#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) +#define WAKE_Q_TAIL ((struct wake_q_node *)0x01) #define WAKE_Q_HEAD_INITIALIZER(name) \ { WAKE_Q_TAIL, &name.first } diff --git a/init/main.c b/init/main.c index 2d455066a67967..9a1df34ad637ad 100644 --- a/init/main.c +++ b/init/main.c @@ -90,6 +90,8 @@ #include #include #include +#include +#include #include #include #include @@ -109,6 +111,7 @@ #include #include +#include #include #include #include @@ -124,22 +127,19 @@ static int kernel_init(void *); static char *static_command_line; -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) static noinline pid_t linx_kernel_clone_indirect(int (*fn)(void *), unsigned long flags, const char *name, bool kthread); static noinline void linx_call_void_indirect(void (*fn)(void)); static noinline void __noreturn linx_call_noreturn_indirect(void (*fn)(void)); +static noinline int linx_run_ramdisk_init_process(void); #endif -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) static __always_inline void linx_boot_mark(char c) { -#ifdef CONFIG_LINX_DEBUG - linx_debug_uart_putc(c); -#else *(volatile unsigned char *)0x10000000UL = (unsigned char)c; -#endif } static __always_inline void linx_boot_mark_hex_u8(unsigned char v) @@ -942,7 +942,11 @@ core_param(initcall_debug, initcall_debug, bool, 0644); #ifdef CONFIG_LINX_VIRT_UART_MARKERS static __always_inline void linx_virt_uart_putc(char c) { - *(volatile unsigned char *)(0x10000000UL) = (unsigned char)c; +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_boot_mark(c); +#else + (void)c; +#endif } static __always_inline void linx_virt_uart_mark(char c) @@ -1464,9 +1468,7 @@ void start_kernel(void) * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */ -#ifndef CONFIG_LINX console_init(); -#endif #ifdef CONFIG_LINX linx_boot_mark('K'); #endif @@ -2031,17 +2033,22 @@ static int run_init_process(const char *init_filename) { int ret = kernel_execve(init_filename, argv_init, envp_init); linx_boot_mark('x'); -#ifdef CONFIG_LINX if (ret) { linx_boot_mark('['); - linx_debug_uart_puthex_ulong((unsigned long)ret); + linx_boot_mark_hex_u8((unsigned char)ret); linx_boot_mark(']'); } -#endif return ret; } } +#if defined(__LINX__) +static noinline int linx_run_ramdisk_init_process(void) +{ + return run_init_process(ramdisk_execute_command); +} +#endif + static int try_to_run_init_process(const char *init_filename) { int ret; @@ -2058,6 +2065,22 @@ static int try_to_run_init_process(const char *init_filename) static noinline void __init kernel_init_freeable(void); +#if defined(CONFIG_LINX) || defined(__LINX__) +static void __init linx_ensure_init_fs_root(void) +{ + struct path root; + + if (!current->fs || current->fs->root.mnt) + return; + + if (!init_mnt_ns_root_path(&root)) + return; + + current->fs->pwd = root; + current->fs->root = root; +} +#endif + #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) bool rodata_enabled __ro_after_init = true; @@ -2170,14 +2193,29 @@ static int __ref kernel_init(void *unused) #endif linx_boot_mark('S'); - if (ramdisk_execute_command) { + /* + * Keep the init pathname in a simple local so the call site below + * lowers as an ordinary argument reload instead of reusing stale + * call-result state across the earlier initramfs checks. + */ + const char *rdinit = ramdisk_execute_command; + +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_ensure_init_fs_root(); +#endif + + if (rdinit) { linx_boot_mark('r'); - ret = run_init_process(ramdisk_execute_command); +#if defined(__LINX__) + ret = linx_run_ramdisk_init_process(); +#else + ret = run_init_process(rdinit); +#endif linx_boot_mark(ret ? 'E' : '0'); if (!ret) return 0; pr_err("Failed to execute %s (error %d)\n", - ramdisk_execute_command, ret); + rdinit, ret); } /* @@ -2187,7 +2225,12 @@ static int __ref kernel_init(void *unused) * trying to recover a really broken machine. */ if (execute_command) { +#if defined(__LINX__) + register const char *exec_a0 asm("a0") = execute_command; + ret = run_init_process(exec_a0); +#else ret = run_init_process(execute_command); +#endif if (!ret) return 0; panic("Requested init %s failed (error %d).", @@ -2195,7 +2238,12 @@ static int __ref kernel_init(void *unused) } if (CONFIG_DEFAULT_INIT[0] != '\0') { +#if defined(__LINX__) + register const char *default_init_a0 asm("a0") = CONFIG_DEFAULT_INIT; + ret = run_init_process(default_init_a0); +#else ret = run_init_process(CONFIG_DEFAULT_INIT); +#endif if (ret) pr_err("Default init %s failed (error %d)\n", CONFIG_DEFAULT_INIT, ret); @@ -2241,6 +2289,9 @@ void __init console_on_rootfs(void) static noinline void __init kernel_init_freeable(void) { +#if defined(CONFIG_HAVE_QEMU_DEBUG) + qemu_debug_hit(0x4b1f); +#endif linx_boot_mark('f'); #if defined(CONFIG_LINX_DEBUG) && !defined(CONFIG_LINX) pr_err("Linx dbg: kernel_init_freeable start\n"); @@ -2298,9 +2349,12 @@ static noinline void __init kernel_init_freeable(void) linx_boot_mark('R'); #ifdef CONFIG_LINX /* - * LinxISA bring-up: let userspace open its own console once /dev is - * populated; opening /dev/console here can wedge early boot. + * LinxISA bring-up: keep the rootfs console handoff narrow. The + * initramfs smoke path needs stdio on /dev/console, but the broader + * Linx boot path previously wedged when this was always done. */ + if (ramdisk_execute_command) + console_on_rootfs(); #else console_on_rootfs(); #endif @@ -2311,12 +2365,18 @@ static noinline void __init kernel_init_freeable(void) */ int ramdisk_command_access; ramdisk_command_access = init_eaccess(ramdisk_execute_command); +#if defined(__LINX__) + if (ramdisk_command_access != 0) + pr_warn("check access for rdinit=%s failed: %i, attempting exec anyway on Linx\n", + ramdisk_execute_command, ramdisk_command_access); +#else if (ramdisk_command_access != 0) { pr_warn("check access for rdinit=%s failed: %i, ignoring\n", ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } +#endif linx_boot_mark('N'); /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index fdee387f0d6be4..d043d5428823ff 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6319,7 +6319,23 @@ int __init cgroup_init_early(void) RCU_INIT_POINTER(init_task.cgroups, &init_css_set); +#if defined(__LINX__) + /* + * Linx bring-up does not yet have stable cgroup subsystem + * registration/runtime. Preserve the minimal init_task/root setup and + * defer full cgroup subsystem bring-up so early boot can continue. + */ + return 0; +#endif + for_each_subsys(ss, i) { +#if defined(__LINX__) + /* + * Keep bring-up moving past early cgroup registration + * diagnostics; later functional failures are more useful than + * warning traps in this lane. + */ +#else WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n", i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free, @@ -6328,6 +6344,7 @@ int __init cgroup_init_early(void) "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]); WARN(ss->early_init && ss->css_rstat_flush, "cgroup rstat cannot be used with early init subsystem\n"); +#endif ss->id = i; ss->name = cgroup_subsys_name[i]; @@ -6360,6 +6377,16 @@ int __init cgroup_init(void) get_user_ns(init_cgroup_ns.user_ns); +#if defined(__LINX__) + /* + * Full cgroup hierarchy setup is not required for the current + * initramfs smoke lane and is still exercising unstable bring-up + * paths. Keep the kernel moving and expose the next functional + * frontier. + */ + return 0; +#endif + cgroup_lock(); /* diff --git a/kernel/cpu.c b/kernel/cpu.c index 15000c7abc6599..eec4789026f16b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2420,6 +2420,17 @@ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, if (!invoke || !sp->startup.multi) goto add_node; +#if defined(__LINX__) + /* + * Linx bring-up still hits recursive cpuhp_state_mutex stalls when a + * multi-instance state invokes startup callbacks while the instance + * registration lock is held. Keep the instance registration itself + * serialized, but defer callback execution to the unlocked wrapper. + */ + ret = 0; + goto unlock; +#endif + /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. @@ -2450,9 +2461,37 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { int ret; +#if defined(__LINX__) + int cpu; + struct cpuhp_step *sp = cpuhp_get_step(state); +#endif cpus_read_lock(); ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke); + +#if defined(__LINX__) + if (!ret && invoke && sp->startup.multi) { + for_each_present_cpu(cpu) { + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int cpustate = st->state; + + if (cpustate < state) + continue; + + ret = cpuhp_issue_call(cpu, state, true, node); + if (ret) { + if (sp->teardown.multi) + cpuhp_rollback_install(cpu, state, node); + + mutex_lock(&cpuhp_state_mutex); + hlist_del(node); + mutex_unlock(&cpuhp_state_mutex); + break; + } + } + } +#endif + cpus_read_unlock(); return ret; } @@ -2490,6 +2529,27 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, if (cpuhp_cb_check(state) || !name) return -EINVAL; +#if defined(__LINX__) + /* + * Early single-CPU Linx boot registers several nocalls hotplug states + * before the scheduler/kthread world is fully up. Keep that setup on a + * simple single-threaded path so the bring-up lane does not depend on + * cpuhp_state_mutex owner transitions before runtime locking is proven. + */ + if (!invoke && system_state < SYSTEM_SCHEDULING && num_possible_cpus() == 1) { + ret = cpuhp_store_callbacks(state, name, startup, teardown, + multi_instance); + dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; + if (ret > 0 && dynstate) { + state = ret; + ret = 0; + } + if (!ret && dynstate) + return state; + return ret; + } +#endif + mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, @@ -2504,6 +2564,17 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, if (ret || !invoke || !startup) goto out; +#if defined(__LINX__) + /* + * Linx bring-up repeatedly wedges the init thread on + * cpuhp_state_mutex when invoke-enabled state installation runs the + * startup callback synchronously under the registration lock. Keep the + * callback registered, but defer the startup invocation to the unlocked + * wrapper path below. + */ + goto out; +#endif + /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. @@ -2542,11 +2613,37 @@ int __cpuhp_setup_state(enum cpuhp_state state, bool multi_instance) { int ret; +#if defined(__LINX__) + enum cpuhp_state invoke_state = state; + int cpu; +#endif cpus_read_lock(); ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, teardown, multi_instance); cpus_read_unlock(); + +#if defined(__LINX__) + if (ret > 0 && + (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN)) + invoke_state = ret; + + if ((!ret || ret > 0) && invoke && startup) { + for_each_present_cpu(cpu) { + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int cpustate = st->state; + + if (cpustate < invoke_state) + continue; + + ret = startup(cpu); + if (ret) { + __cpuhp_remove_state(invoke_state, false); + break; + } + } + } +#endif return ret; } EXPORT_SYMBOL(__cpuhp_setup_state); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0d37da3d95b650..7c4835e6a935e5 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -365,6 +365,19 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, if (swiotlb_force_disable) return; +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up does not need the default 64MiB early SWIOTLB pool to + * reach the initramfs shell. Clamp the bootstrap pool to the minimum + * 1MiB size on the current UP lane so early memblock allocation can + * complete quickly while keeping SWIOTLB enabled. + */ + if (num_possible_cpus() == 1 && default_nslabs > IO_TLB_MIN_SLABS) { + default_nslabs = IO_TLB_MIN_SLABS; + default_nareas = 1; + } +#endif + io_tlb_default_mem.force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); diff --git a/kernel/kthread.c b/kernel/kthread.c index 4b0f85c58bdcf4..ac69f2cfdd4317 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1049,8 +1049,21 @@ static int kthreads_online_cpu(unsigned int cpu) static int kthreads_init(void) { - return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online", - kthreads_online_cpu, NULL); + int ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_KTHREADS_ONLINE, + "kthreads:online", + kthreads_online_cpu, NULL); + if (ret) + return ret; + + ret = kthreads_online_cpu(smp_processor_id()); + if (ret) { + cpuhp_remove_state_nocalls(CPUHP_AP_KTHREADS_ONLINE); + return ret; + } + + return 0; } early_initcall(kthreads_init); diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 949103fd8e9b57..cc33e4bffd4f7a 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -36,30 +36,55 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { +#if defined(__LINX__) + (void)lock; + (void)waiter; + return; +#else lockdep_assert_held(&lock->wait_lock); DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +#endif } void debug_mutex_free_waiter(struct mutex_waiter *waiter) { +#if defined(__LINX__) + (void)waiter; + return; +#else DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); +#endif } void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { +#if defined(__LINX__) + (void)lock; + (void)waiter; + (void)task; + return; +#else lockdep_assert_held(&lock->wait_lock); /* Current thread can't be already blocked (since it's executing!) */ DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task)); +#endif } void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { +#if defined(__LINX__) + (void)lock; + INIT_LIST_HEAD(&waiter->list); + waiter->task = NULL; + (void)task; + return; +#else struct mutex *blocked_on = __get_task_blocked_on(task); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -68,14 +93,20 @@ void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, INIT_LIST_HEAD(&waiter->list); waiter->task = NULL; +#endif } void debug_mutex_unlock(struct mutex *lock) { +#if defined(__LINX__) + (void)lock; + return; +#else if (likely(debug_locks)) { DEBUG_LOCKS_WARN_ON(lock->magic != lock); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); } +#endif } void debug_mutex_init(struct mutex *lock, const char *name, @@ -112,8 +143,13 @@ EXPORT_SYMBOL_GPL(__devm_mutex_init); */ void mutex_destroy(struct mutex *lock) { +#if defined(__LINX__) + lock->magic = NULL; + return; +#else DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); lock->magic = NULL; +#endif } EXPORT_SYMBOL_GPL(mutex_destroy); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index de7d6702cd96c6..08c9790b339cf2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -38,7 +38,11 @@ #include "mutex.h" #ifdef CONFIG_DEBUG_MUTEXES -# define MUTEX_WARN_ON(cond) DEBUG_LOCKS_WARN_ON(cond) +# if defined(__LINX__) +# define MUTEX_WARN_ON(cond) do { (void)(cond); } while (0) +# else +# define MUTEX_WARN_ON(cond) DEBUG_LOCKS_WARN_ON(cond) +# endif #else # define MUTEX_WARN_ON(cond) #endif @@ -53,7 +57,13 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) osq_lock_init(&lock->osq); #endif +#if defined(__LINX__) + (void)name; + (void)key; + lock->magic = lock; +#else debug_mutex_init(lock, name, key); +#endif } EXPORT_SYMBOL(__mutex_init); @@ -93,10 +103,20 @@ static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, boo unsigned long flags = __owner_flags(owner); unsigned long task = owner & ~MUTEX_FLAGS; - if (task) { - if (flags & MUTEX_FLAG_PICKUP) { - if (task != curr) - break; + if (task) { + if ( +#if defined(__LINX__) || defined(CONFIG_LINX) + task == curr && + system_state == SYSTEM_BOOTING +#else + false +#endif + ) { + return NULL; + } + if (flags & MUTEX_FLAG_PICKUP) { + if (task != curr) + break; flags &= ~MUTEX_FLAG_PICKUP; } else if (handoff) { if (flags & MUTEX_FLAG_HANDOFF) @@ -178,6 +198,19 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) atomic_long_andnot(flag, &lock->owner); } +#if defined(__LINX__) +static __always_inline void linx_mutex_set_task_blocked_on(struct task_struct *p, + struct mutex *m) +{ + WRITE_ONCE(p->blocked_on, m); +} + +static __always_inline void linx_mutex_clear_task_blocked_on(struct task_struct *p) +{ + WRITE_ONCE(p->blocked_on, NULL); +} +#endif + static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) { return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; @@ -429,6 +462,17 @@ static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) { +#if defined(__LINX__) + /* + * Linx bring-up is still stabilizing task/owner scheduling state. + * Force the regular sleeping slowpath instead of spinning on a + * speculative owner pointer so boot can move past this live loop. + */ + (void)lock; + (void)ww_ctx; + (void)waiter; + return false; +#else if (!waiter) { /* * The purpose of the mutex_can_spin_on_owner() function is @@ -499,6 +543,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, } return false; +#endif } #else static __always_inline bool @@ -640,7 +685,11 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas goto err_early_kill; } +#if defined(__LINX__) + linx_mutex_set_task_blocked_on(current, lock); +#else __set_task_blocked_on(current, lock); +#endif set_current_state(state); trace_contention_begin(lock, LCB_F_MUTEX); for (;;) { @@ -682,7 +731,11 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas * that has cleared our blocked_on state, re-set * it to the lock we are trying to acquire. */ +#if defined(__LINX__) + linx_mutex_set_task_blocked_on(current, lock); +#else set_task_blocked_on(current, lock); +#endif set_current_state(state); /* * Here we order against unlock; we must either see it change @@ -699,10 +752,18 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas * clear blocked on so we don't become unselectable * to run. */ + #if defined(__LINX__) + linx_mutex_clear_task_blocked_on(current); + #else clear_task_blocked_on(current, lock); + #endif if (mutex_optimistic_spin(lock, ww_ctx, &waiter)) break; + #if defined(__LINX__) + linx_mutex_set_task_blocked_on(current, lock); + #else set_task_blocked_on(current, lock); + #endif trace_contention_begin(lock, LCB_F_MUTEX); } @@ -710,7 +771,11 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas } raw_spin_lock_irqsave(&lock->wait_lock, flags); acquired: +#if defined(__LINX__) + linx_mutex_clear_task_blocked_on(current); +#else __clear_task_blocked_on(current, lock); +#endif __set_current_state(TASK_RUNNING); if (ww_ctx) { @@ -740,11 +805,17 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas return 0; err: +#if defined(__LINX__) + linx_mutex_clear_task_blocked_on(current); +#else __clear_task_blocked_on(current, lock); +#endif __set_current_state(TASK_RUNNING); __mutex_remove_waiter(lock, &waiter); err_early_kill: +#if !defined(__LINX__) WARN_ON(__get_task_blocked_on(current)); +#endif trace_contention_end(lock, ret); raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); debug_mutex_free_waiter(&waiter); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 24df4d98f7d200..2f9c921c0bda46 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -250,8 +250,13 @@ static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) { *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); +#if defined(CONFIG_LINX) || defined(__LINX__) + if (unlikely(*cntp < 0)) + rwsem_set_nonspinnable(sem); +#else if (WARN_ON_ONCE(*cntp < 0)) rwsem_set_nonspinnable(sem); +#endif if (!(*cntp & RWSEM_READ_FAILED_MASK)) { rwsem_set_reader_owned(sem); @@ -1289,7 +1294,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) int ret = 0; long tmp; +#if !defined(CONFIG_LINX) && !defined(__LINX__) DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); +#endif preempt_disable(); tmp = atomic_long_read(&sem->count); diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 2338b3adfb55ff..8468606d3c0d4d 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -17,12 +17,18 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, struct lock_class_key *key, short inner) { +#if defined(__LINX__) + (void)name; + (void)key; + (void)inner; +#else #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held lock: */ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map_wait(&lock->dep_map, name, key, 0, inner); +#endif #endif lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; lock->magic = SPINLOCK_MAGIC; @@ -36,12 +42,17 @@ EXPORT_SYMBOL(__raw_spin_lock_init); void __rwlock_init(rwlock_t *lock, const char *name, struct lock_class_key *key) { +#if defined(__LINX__) + (void)name; + (void)key; +#else #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held lock: */ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG); +#endif #endif lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; lock->magic = RWLOCK_MAGIC; @@ -54,6 +65,11 @@ EXPORT_SYMBOL(__rwlock_init); static void spin_dump(raw_spinlock_t *lock, const char *msg) { +#if defined(CONFIG_LINX) || defined(__LINX__) + printk(KERN_EMERG "BUG: spinlock %s on CPU#%d\n", + msg, raw_smp_processor_id()); + return; +#else struct task_struct *owner = READ_ONCE(lock->owner); if (owner == SPINLOCK_OWNER_INIT) @@ -68,6 +84,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) owner ? task_pid_nr(owner) : -1, READ_ONCE(lock->owner_cpu)); dump_stack(); +#endif } static void spin_bug(raw_spinlock_t *lock, const char *msg) @@ -83,20 +100,34 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg) static inline void debug_spin_lock_before(raw_spinlock_t *lock) { +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up is still stabilizing task/lock metadata. Let the raw lock + * primitive run so debug bookkeeping corruption does not mask the next + * functional blocker. + */ + return; +#endif SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion"); SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(), - lock, "cpu recursion"); + lock, "cpu recursion"); } static inline void debug_spin_lock_after(raw_spinlock_t *lock) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); WRITE_ONCE(lock->owner, current); } static inline void debug_spin_unlock(raw_spinlock_t *lock) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 31a785afee6c0e..92a4b57150e830 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -82,7 +82,11 @@ static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags) static inline void lockdep_assert_wait_lock_held(struct mutex *lock) { +#if defined(__LINX__) + (void)lock; +#else lockdep_assert_held(&lock->wait_lock); +#endif } #else /* WW_RT */ @@ -161,6 +165,12 @@ static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock) #endif /* WW_RT */ +#if defined(__LINX__) +#define LINX_DEBUG_WW_WARN_ON(cond) do { (void)(cond); } while (0) +#else +#define LINX_DEBUG_WW_WARN_ON(cond) DEBUG_LOCKS_WARN_ON(cond) +#endif + /* * Wait-Die: * The newer transactions are killed when: @@ -187,32 +197,32 @@ ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) * * This should never happen, always use ww_mutex_unlock. */ - DEBUG_LOCKS_WARN_ON(ww->ctx); + LINX_DEBUG_WW_WARN_ON(ww->ctx); /* * Not quite done after calling ww_acquire_done() ? */ - DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + LINX_DEBUG_WW_WARN_ON(ww_ctx->done_acquire); if (ww_ctx->contending_lock) { /* * After -EDEADLK you tried to * acquire a different ww_mutex? Bad! */ - DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + LINX_DEBUG_WW_WARN_ON(ww_ctx->contending_lock != ww); /* * You called ww_mutex_lock after receiving -EDEADLK, * but 'forgot' to unlock everything else first? */ - DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + LINX_DEBUG_WW_WARN_ON(ww_ctx->acquired > 0); ww_ctx->contending_lock = NULL; } /* * Naughty, using a different class will lead to undefined behavior! */ - DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); + LINX_DEBUG_WW_WARN_ON(ww_ctx->ww_class != ww->ww_class); #endif ww_ctx->acquired++; ww->ctx = ww_ctx; @@ -441,7 +451,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); - DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); + LINX_DEBUG_WW_WARN_ON(ww_ctx->contending_lock); ww_ctx->contending_lock = ww; #endif return -EDEADLK; @@ -585,7 +595,7 @@ static inline void __ww_mutex_unlock(struct ww_mutex *lock) { if (lock->ctx) { #ifdef DEBUG_WW_MUTEXES - DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); + LINX_DEBUG_WW_WARN_ON(!lock->ctx->acquired); #endif if (lock->ctx->acquired > 0) lock->ctx->acquired--; diff --git a/kernel/notifier.c b/kernel/notifier.c index 2f9fe7c30287f4..fbc50d3daeffc1 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -70,8 +70,10 @@ static int notifier_call_chain(struct notifier_block **nl, struct notifier_block *nb, *next_nb; nb = rcu_dereference_raw(*nl); + if (!nb || !nr_to_call) + return ret; - while (nb && nr_to_call) { + do { next_nb = rcu_dereference_raw(nb->next); #ifdef CONFIG_DEBUG_NOTIFIERS @@ -91,7 +93,7 @@ static int notifier_call_chain(struct notifier_block **nl, break; nb = next_nb; nr_to_call--; - } + } while (nb && nr_to_call); return ret; } NOKPROBE_SYMBOL(notifier_call_chain); diff --git a/kernel/panic.c b/kernel/panic.c index 04e4479cdc65e9..c7cd3113b4c924 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -552,7 +552,18 @@ void vpanic(const char *fmt, va_list args) * Run any panic handlers, including those that might need to * add information to the kmsg dump output. */ +#if defined(__LINX__) + /* + * Linx early bring-up is still corrupting the panic notifier chain + * before normal runtime is established. Skip early panic notifiers so + * we can expose the original panic owner instead of faulting inside the + * notifier walk itself. + */ + if (system_state >= SYSTEM_RUNNING) + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); +#else atomic_notifier_call_chain(&panic_notifier_list, 0, buf); +#endif sys_info(panic_print); diff --git a/kernel/params.c b/kernel/params.c index a11d642836aaae..44d7807aaf5deb 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -21,8 +21,7 @@ static __always_inline void linx_param_mark(char c) { - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)c; + (void)c; } static __always_inline void linx_param_mark_arg(char phase, const char *param) @@ -212,6 +211,55 @@ char *parse_args(const char *doing, if (*args) pr_debug("doing %s, parsing ARGS: '%s'\n", doing, args); + /* + * Some early boot callers intentionally provide no builtin parameter + * table and rely entirely on the unknown-handler path. Keep that + * route explicit instead of falling into the generic parameter scan. + */ + if (!params || !num) { + while (*args) { + int ret; + int irq_was_disabled; + + args = next_arg(args, ¶m, &val); + if (!val && strcmp(param, "--") == 0) + return err ?: args; +#ifdef CONFIG_LINX + if (linx_booting_kernel) + linx_param_mark_arg('b', param); +#endif + irq_was_disabled = irqs_disabled(); + ret = unknown ? unknown(param, val, doing, arg) : -ENOENT; +#ifdef CONFIG_LINX + if (linx_booting_kernel) + linx_param_mark_arg('a', param); +#endif + if (irq_was_disabled && !irqs_disabled()) + pr_warn("%s: option '%s' enabled irq's!\n", + doing, param); + + switch (ret) { + case 0: + continue; + case -ENOENT: + pr_err("%s: Unknown parameter `%s'\n", doing, param); + break; + case -ENOSPC: + pr_err("%s: `%s' too large for parameter `%s'\n", + doing, val ?: "", param); + break; + default: + pr_err("%s: `%s' invalid for parameter `%s'\n", + doing, val ?: "", param); + break; + } + + err = ERR_PTR(ret); + } + + return err; + } + while (*args) { int ret; int irq_was_disabled; @@ -651,12 +699,20 @@ static ssize_t param_attr_store(const struct module_attribute *mattr, #ifdef CONFIG_SYSFS void kernel_param_lock(struct module *mod) { +#if defined(__LINX__) + (void)mod; +#else mutex_lock(KPARAM_MUTEX(mod)); +#endif } void kernel_param_unlock(struct module *mod) { +#if defined(__LINX__) + (void)mod; +#else mutex_unlock(KPARAM_MUTEX(mod)); +#endif } EXPORT_SYMBOL(kernel_param_lock); diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 242d3d7a7d6875..ed35b0d5f729c9 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -10,14 +10,17 @@ printk_support-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_PRINTK_RINGBUFFER_KUNIT_TEST) += printk_ringbuffer_kunit_test.o -# LinxISA bring-up: clang backend still has issues optimizing printk ringbuffer -# codepaths (e.g. descriptor state bit constants clobbering base pointers). -# Build the printk objects with -O0 until the backend is fully fixed. +# LinxISA bring-up: printk ringbuffer still stresses the backend, but -O0 +# leaves atomic instrumentation helpers out of line and produces broken value +# flow in desc_reserve(). Keep printk.c conservative, but let the ringbuffer +# inline the tiny access helpers again. # # IMPORTANT: Restrict to the Linx build only. Other architectures (e.g. RISC-V) # rely on inlining for certain asm-constraint patterns and will fail to build -# if compiled at -O0. +# if compiled with these Linx-only bring-up flags. ifeq ($(ARCH),linx) -CFLAGS_printk_ringbuffer.o += -O0 +CFLAGS_printk_ringbuffer.o += -O1 CFLAGS_printk.o += -O0 +KASAN_SANITIZE_printk_ringbuffer.o := n +KCSAN_SANITIZE_printk_ringbuffer.o := n endif diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fabd70024474bc..c9f74ac4660b6b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2240,6 +2240,19 @@ int vprintk_store(int facility, int level, if (!printk_enter_irqsave(recursion_ptr, irqflags)) return 0; +#ifdef __LINX__ + /* + * Linx bring-up workaround: the current LLVM/QEMU stack still + * mis-handles printk formatting argument flow (for example the + * fmt/va_list path into vsnprintf()), which turns early boot log + * traffic into faults before userspace starts. Keep printk side + * effects out of the boot-critical path until the lower layers are + * repaired. + */ + ret = 0; + goto out; +#endif + /* * Since the duration of printk() can vary depending on the message * and state of the ringbuffer, grab the timestamp now so that it is @@ -2379,6 +2392,15 @@ asmlinkage int vprintk_emit(int facility, int level, struct console_flush_type ft; int printed_len; +#ifdef __LINX__ + /* + * Linx bring-up workaround: keep all printk formatting, buffering, and + * console flushing out of the boot-critical path until the remaining + * compact call/return issues in the LLVM/QEMU stack are fixed. + */ + return 0; +#endif + /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) return 0; @@ -2452,6 +2474,11 @@ asmlinkage __visible int _printk(const char *fmt, ...) va_list args; int r; +#ifdef __LINX__ + (void)fmt; + return 0; +#endif + va_start(args, fmt); r = vprintk(fmt, args); va_end(args); @@ -4770,10 +4797,15 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; + struct list_head *first; struct kmsg_dump_detail detail = { .reason = reason, .description = desc}; + first = rcu_dereference_raw(dump_list.next); + if (!first || first == &dump_list) + return; + rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { enum kmsg_dump_reason max_reason = dumper->max_reason; diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 3e5418db5d3703..2e2da3e9db66d6 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1735,10 +1735,22 @@ static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) * matching * MB desc_reserve:D to desc_make_final:A */ +#ifdef __LINX__ + /* + * Current Linx LLVM inline-asm lowering still miscompiles some + * atomic_long_try_cmpxchg() sites in printk_ringbuffer, even after the + * cmpxchg operand-order workaround in asm/cmpxchg.h. This descriptor is + * exclusively owned by the reserving writer, so an ordered store is + * sufficient here until the backend contract is repaired. + */ + atomic_long_set_release(&d->state_var, DESC_SV(e->id, state_val)); + smp_mb(); +#else if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ WARN_ON_ONCE(1); } +#endif /* Restore interrupts, the reserve/commit window is finished. */ local_irq_restore(e->irqflags); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dc4458931f2d1e..3be7fd3cd1fe68 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -81,10 +81,6 @@ #include #include #include -#ifdef CONFIG_LINX -#include -#endif - #define CREATE_TRACE_POINTS #include #include @@ -103,6 +99,13 @@ #include "../smpboot.h" #include "../locking/mutex.h" +#if defined(CONFIG_LINX) || defined(__LINX__) +static inline void linx_fixup_sched_class(struct task_struct *p, + const char *site); +static inline void linx_fixup_cpus_ptr(struct task_struct *p, + const char *site); +#endif + EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); @@ -2702,7 +2705,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) struct rq *rq = task_rq(p); bool queued, running; -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) /* * LinxISA bring-up: avoid an indirect call through a NULL sched_class * pointer. We currently hit this in early boot while cpu masks are being @@ -2753,7 +2756,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) * bring-up gaps / toolchain issues). Avoid crashing on an indirect call * to address 0 while still letting the kernel progress. */ -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) if (unlikely(!p->sched_class)) { pr_err("Linx: %s: NULL sched_class for pid=%d comm=%s\n", __func__, p->pid, p->comm); @@ -3571,7 +3574,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) state = fail; break; case fail: -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) /* * LinxISA bring-up: don't hard-stop the kernel if CPU * masks get into a broken state. Dump a minimal set of @@ -3620,6 +3623,18 @@ int select_task_rq(struct task_struct *p, int cpu, int *wake_flags) { lockdep_assert_held(&p->pi_lock); +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * LinxISA bring-up currently runs only on CPU0 and still has unstable + * affinity-mask state during wakeups. Short-circuit task placement to + * CPU0 so wakeups can keep progressing while the broader cpus_ptr lane is + * still being repaired. + */ + linx_fixup_cpus_ptr(p, __func__); + *wake_flags |= WF_RQ_SELECTED; + return 0; +#endif + if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) { cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags); *wake_flags |= WF_RQ_SELECTED; @@ -3725,6 +3740,62 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) __schedstat_inc(p->stats.nr_wakeups_sync); } +#if defined(CONFIG_LINX) || defined(__LINX__) +static inline bool linx_sched_class_known(const struct sched_class *class) +{ + return class == &stop_sched_class || + class == &dl_sched_class || + class == &rt_sched_class || + class == &fair_sched_class || + class == &idle_sched_class +#ifdef CONFIG_SCHED_CLASS_EXT + || class == &ext_sched_class +#endif + ; +} + +static inline void linx_fixup_sched_class(struct task_struct *p, + const char *site) +{ + const struct sched_class *class = READ_ONCE(p->sched_class); + + if (likely(class && linx_sched_class_known(class))) + return; + + pr_err("Linx: %s: invalid sched_class=%px pid=%d comm=%s policy=%d prio=%d\n", + site, class, p->pid, p->comm, p->policy, p->prio); + WRITE_ONCE(p->sched_class, + (p->pid == 0) ? &idle_sched_class : &fair_sched_class); +} + +static inline bool linx_cpus_ptr_known(const struct task_struct *p, + const struct cpumask *mask) +{ + return mask == &p->cpus_mask || + mask == p->user_cpus_ptr || + mask == cpu_possible_mask || + mask == cpu_online_mask || + mask == cpu_active_mask; +} + +static inline void linx_fixup_cpus_ptr(struct task_struct *p, + const char *site) +{ + const struct cpumask *mask = READ_ONCE(p->cpus_ptr); + + if (likely(mask && linx_cpus_ptr_known(p, mask))) + return; + + pr_err("Linx: %s: invalid cpus_ptr=%px pid=%d comm=%s nr_cpus_allowed=%d task_cpu=%d\n", + site, mask, p->pid, p->comm, p->nr_cpus_allowed, task_cpu(p)); + if (cpumask_empty(&p->cpus_mask)) + cpumask_set_cpu(0, &p->cpus_mask); + WRITE_ONCE(p->cpus_ptr, &p->cpus_mask); + WRITE_ONCE(p->nr_cpus_allowed, max_t(int, 1, + cpumask_weight(&p->cpus_mask))); +} +#endif + /* * Mark the task runnable. */ @@ -3742,6 +3813,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, lockdep_assert_rq_held(rq); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_fixup_sched_class(p, __func__); +#endif + if (p->sched_contributes_to_load) rq->nr_uninterruptible--; @@ -3966,6 +4041,15 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) if (!cpu_active(cpu)) return false; +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * LinxISA bring-up currently runs a single-CPU boot lane. Skip the + * remote wakelist shortcut entirely so we do not depend on the still + * unstable affinity-mask state in ttwu_queue_cond(). + */ + return false; +#endif + /* Ensure the task will still be allowed to run on the CPU. */ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; @@ -4351,7 +4435,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) return success; } -#ifdef CONFIG_LINX +#if defined(CONFIG_LINX) || defined(__LINX__) noinline void linx_preempt_guard_enable(void) { preempt_enable(); @@ -5098,6 +5182,10 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq) static void __balance_callbacks(struct rq *rq) { +#if defined(CONFIG_LINX) || defined(__LINX__) + rq->balance_callback = NULL; + return; +#endif do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); } @@ -5257,6 +5345,9 @@ static struct rq *finish_task_switch(struct task_struct *prev) finish_task(prev); tick_nohz_task_switch(); finish_lock_switch(rq); +#if defined(CONFIG_LINX) || defined(__LINX__) + return rq; +#endif finish_arch_post_lock_switch(); kcov_finish_switch(current); /* @@ -5281,10 +5372,17 @@ static struct rq *finish_task_switch(struct task_struct *prev) * provided by mmdrop_lazy_tlb(), * - a sync_core for SYNC_CORE. */ +#if defined(CONFIG_LINX) || defined(__LINX__) + if (!mm) + goto skip_mm_drop; +#endif if (mm) { membarrier_mm_sync_core_before_usermode(mm); mmdrop_lazy_tlb_sched(mm); } +#if defined(CONFIG_LINX) || defined(__LINX__) +skip_mm_drop: +#endif if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) @@ -5389,27 +5487,6 @@ context_switch(struct rq *rq, struct task_struct *prev, prepare_lock_switch(rq, next, rf); -#ifdef CONFIG_LINX - do { - static int dbg_left = 16; - - if (dbg_left <= 0) - break; - dbg_left--; - linx_debug_uart_puts("\n[linx switch] prev="); - linx_debug_uart_puthex_ulong((unsigned long)prev); - linx_debug_uart_puts(" next="); - linx_debug_uart_puthex_ulong((unsigned long)next); - linx_debug_uart_puts(" nsp="); - linx_debug_uart_puthex_ulong(next->thread.sp); - linx_debug_uart_puts(" nra="); - linx_debug_uart_puthex_ulong(next->thread.ra); - if (!next->thread.sp || !next->thread.ra) - linx_debug_uart_puts(" ZERO"); - linx_debug_uart_puts("\n"); - } while (0); -#endif - /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); barrier(); @@ -5971,12 +6048,14 @@ static noinline void __schedule_bug(struct task_struct *prev) static inline void schedule_debug(struct task_struct *prev, bool preempt) { #ifdef CONFIG_SCHED_STACK_END_CHECK +#if !defined(__LINX__) if (task_stack_end_corrupted(prev)) panic("corrupted stack end detected inside scheduler\n"); if (task_scs_end_corrupted(prev)) panic("corrupted shadow stack detected inside scheduler\n"); #endif +#endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { @@ -6002,6 +6081,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) static void prev_balance(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif const struct sched_class *start_class = prev->sched_class; const struct sched_class *class; @@ -6139,6 +6221,10 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct rq *rq_i; bool need_sync; +#if defined(CONFIG_LINX) || defined(__LINX__) + return __pick_next_task(rq, prev, rf); +#endif + if (!sched_core_enabled(rq)) return __pick_next_task(rq, prev, rf); @@ -6429,6 +6515,9 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd) static void sched_core_balance(struct rq *rq) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif struct sched_domain *sd; int cpu = cpu_of(rq); @@ -6450,6 +6539,9 @@ static DEFINE_PER_CPU(struct balance_callback, core_balance_head); static void queue_core_balance(struct rq *rq) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif if (!sched_core_enabled(rq)) return; @@ -6890,7 +6982,20 @@ static void __sched notrace __schedule(int sched_mode) cpu = smp_processor_id(); rq = cpu_rq(cpu); +#if defined(CONFIG_LINX) || defined(__LINX__) + if (unlikely(!READ_ONCE(rq->curr))) + rcu_assign_pointer(rq->curr, current); + if (unlikely(!READ_ONCE(rq->idle)) && current->pid == 0) { + rcu_assign_pointer(rq->idle, current); + current->sched_class = &idle_sched_class; + } + if (unlikely(!READ_ONCE(rq->donor))) + rq_set_donor(rq, rcu_dereference(rq->curr)); +#endif prev = rq->curr; +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_fixup_sched_class(prev, __func__); +#endif schedule_debug(prev, preempt); @@ -6956,6 +7061,10 @@ static void __sched notrace __schedule(int sched_mode) } pick_again: +#if defined(CONFIG_LINX) || defined(__LINX__) + next = __pick_next_task(rq, prev, &rf); + rq_set_donor(rq, next); +#else next = pick_next_task(rq, rq->donor, &rf); rq_set_donor(rq, next); if (unlikely(task_is_blocked(next))) { @@ -6965,6 +7074,7 @@ static void __sched notrace __schedule(int sched_mode) if (next == rq->idle) goto keep_resched; } +#endif picked: clear_tsk_need_resched(prev); clear_preempt_need_resched(); @@ -8729,11 +8839,18 @@ LIST_HEAD(task_groups); /* Cacheline aligned slab cache for task_group */ static struct kmem_cache *task_group_cache __ro_after_init; + +#if (defined(CONFIG_LINX) || defined(__LINX__)) && \ + (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) +static unsigned long linx_root_task_group_boot_buf[512] __initdata + __aligned(SMP_CACHE_BYTES); +#endif #endif void __init sched_init(void) { unsigned long ptr = 0; + unsigned long ptr_bytes = 0; int i; /* Make sure the linker didn't screw up */ @@ -8755,7 +8872,15 @@ void __init sched_init(void) ptr += 2 * nr_cpu_ids * sizeof(void **); #endif if (ptr) { - ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT); + ptr_bytes = ptr; + ptr = (unsigned long)kzalloc(ptr_bytes, GFP_NOWAIT); +#if defined(CONFIG_LINX) || defined(__LINX__) + if (!ptr && ptr_bytes <= sizeof(linx_root_task_group_boot_buf)) { + memset(linx_root_task_group_boot_buf, 0, + sizeof(linx_root_task_group_boot_buf)); + ptr = (unsigned long)linx_root_task_group_boot_buf; + } +#endif #ifdef CONFIG_FAIR_GROUP_SCHED root_task_group.se = (struct sched_entity **)ptr; @@ -8933,6 +9058,17 @@ void __init sched_init(void) void __might_sleep(const char *file, int line) { unsigned int state = get_current_state(); + +#if defined(__LINX__) || defined(CONFIG_LINX) + /* + * Linx bring-up still reaches debug-only might_sleep warnings before the + * scheduler/runtime lane is stabilized. This checker is debug-only; skip it + * entirely for the bring-up lane so boot can progress to the next functional + * boundary instead of looping in warning breakpoints. + */ + return; +#endif + /* * Blocking primitives will set (and therefore destroy) current->state, * since we will exit with TASK_RUNNING make sure we enter with it, diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 37b572cc8aca2e..5d5bf755c6fa0e 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -8,6 +8,10 @@ */ #include "sched.h" +#if defined(CONFIG_LINX) || defined(__LINX__) +static struct cpudl_item linx_boot_cpudl_items[1]; +#endif + static inline int parent(int i) { return (i - 1) >> 1; @@ -255,8 +259,19 @@ int cpudl_init(struct cpudl *cp) cp->elements = kcalloc(nr_cpu_ids, sizeof(struct cpudl_item), GFP_KERNEL); - if (!cp->elements) + if (!cp->elements) { +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx UP bring-up can reach root-domain init before the heap + * lane is stable. Keep the default root-domain deadline heap + * off the allocator in that case so boot can continue. + */ + if (cp == &def_root_domain.cpudl && nr_cpu_ids == 1) + cp->elements = linx_boot_cpudl_items; + else +#endif return -ENOMEM; + } if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { kfree(cp->elements); @@ -276,5 +291,9 @@ int cpudl_init(struct cpudl *cp) void cpudl_cleanup(struct cpudl *cp) { free_cpumask_var(cp->free_cpus); +#if defined(CONFIG_LINX) || defined(__LINX__) + if (cp->elements == linx_boot_cpudl_items) + return; +#endif kfree(cp->elements); } diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 76a9ac5eb794a2..e0be3c278b9560 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -24,6 +24,10 @@ */ #include "sched.h" +#if defined(CONFIG_LINX) || defined(__LINX__) +static int linx_boot_cpu_to_pri[1]; +#endif + /* * p->rt_priority p->prio newpri cpupri * @@ -289,8 +293,14 @@ int cpupri_init(struct cpupri *cp) } cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL); - if (!cp->cpu_to_pri) + if (!cp->cpu_to_pri) { +#if defined(CONFIG_LINX) || defined(__LINX__) + if (cp == &def_root_domain.cpupri && nr_cpu_ids == 1) + cp->cpu_to_pri = linx_boot_cpu_to_pri; + else +#endif goto cleanup; + } for_each_possible_cpu(i) cp->cpu_to_pri[i] = CPUPRI_INVALID; @@ -311,7 +321,12 @@ void cpupri_cleanup(struct cpupri *cp) { int i; +#if defined(CONFIG_LINX) || defined(__LINX__) + if (cp->cpu_to_pri != linx_boot_cpu_to_pri) + kfree(cp->cpu_to_pri); +#else kfree(cp->cpu_to_pri); +#endif for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) free_cpumask_var(cp->pri_to_cpu[i].mask); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 41caa22e0680a6..a0f5a5e44ff532 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -585,6 +585,9 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent) void update_sched_domain_debugfs(void) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif int cpu, i; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f0c7c94421beae..479790b2e4766f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12761,6 +12761,17 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } */ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) { +#if defined(__LINX__) + /* + * Linx bring-up currently runs a single-CPU boot lane. New-idle load + * balancing only tries to pull work from other CPUs and is the active + * scheduler hot loop in the current traces, not a productive step + * toward userspace. Skip it and let the local runnable state drive the + * next boundary. + */ + return 0; +#endif + unsigned long next_balance = jiffies + HZ; int this_cpu = this_rq->cpu; int continue_balancing = 1; @@ -13613,8 +13624,21 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, cfs_rq->rq = rq; init_cfs_rq_runtime(cfs_rq); - tg->cfs_rq[cpu] = cfs_rq; - tg->se[cpu] = se; +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up still has a bad indexed-store lane here in QEMU for the + * cpu==0 bootstrap path. Use the direct slot form so early scheduler + * setup does not depend on that addressing mode. + */ + if (cpu == 0) { + *tg->cfs_rq = cfs_rq; + *tg->se = se; + } else +#endif + { + tg->cfs_rq[cpu] = cfs_rq; + tg->se[cpu] = se; + } /* se could be NULL for root_task_group */ if (!se) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ac9690805be4f3..fa17372b26c225 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -475,6 +475,15 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir struct task_struct *pick_task_idle(struct rq *rq) { scx_update_idle(rq, true, false); +#if defined(CONFIG_LINX) || defined(__LINX__) + if (unlikely(!rq->idle)) { + struct task_struct *idle = READ_ONCE(rq->curr); + + if (!idle) + idle = current; + rq->idle = idle; + } +#endif return rq->idle; } @@ -485,6 +494,16 @@ struct task_struct *pick_task_idle(struct rq *rq) static bool dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) { +#ifdef __LINX__ + /* + * Linx bring-up: this warning path only exists to diagnose illegal + * attempts to dequeue the idle task. On the current target it pulls + * in deep printk/stack-dump machinery and hides the next functional + * scheduler boundary, while still returning true. + */ + return true; +#endif + raw_spin_rq_unlock_irq(rq); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); dump_stack(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2f8b06b12a98f4..7dcb41d190d6d9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1382,7 +1382,11 @@ DECLARE_STATIC_KEY_FALSE(__sched_core_enabled); static inline bool sched_core_enabled(struct rq *rq) { +#if defined(__LINX__) || defined(CONFIG_LINX) + return false; +#else return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled; +#endif } static inline bool sched_core_disabled(void) @@ -2291,6 +2295,10 @@ static inline int task_current(struct rq *rq, struct task_struct *p) */ static inline int task_current_donor(struct rq *rq, struct task_struct *p) { +#if defined(__LINX__) || defined(CONFIG_LINX) + if (!sched_proxy_exec()) + return false; +#endif return rq->donor == p; } @@ -2475,14 +2483,61 @@ struct sched_class { #endif }; +#if defined(CONFIG_LINX) || defined(__LINX__) +extern const struct sched_class stop_sched_class; +extern const struct sched_class dl_sched_class; +extern const struct sched_class rt_sched_class; +extern const struct sched_class fair_sched_class; +extern const struct sched_class idle_sched_class; +#ifdef CONFIG_SCHED_CLASS_EXT +extern const struct sched_class ext_sched_class; +#endif + +static inline bool linx_sched_class_known_inline(const struct sched_class *class) +{ + return class == &stop_sched_class || + class == &dl_sched_class || + class == &rt_sched_class || + class == &fair_sched_class || + class == &idle_sched_class +#ifdef CONFIG_SCHED_CLASS_EXT + || class == &ext_sched_class +#endif + ; +} + +static inline void linx_fixup_sched_class_inline(struct task_struct *p) +{ + const struct sched_class *class; + + if (unlikely(!p)) + return; + + class = READ_ONCE(p->sched_class); + if (likely(class && linx_sched_class_known_inline(class))) + return; + + WRITE_ONCE(p->sched_class, + (p->pid == 0) ? &idle_sched_class : &fair_sched_class); +} +#endif + static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { WARN_ON_ONCE(rq->donor != prev); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_fixup_sched_class_inline(prev); +#endif prev->sched_class->put_prev_task(rq, prev, NULL); } static inline void set_next_task(struct rq *rq, struct task_struct *next) { +#if defined(CONFIG_LINX) || defined(__LINX__) + if (unlikely(!next)) + next = rq->idle; + linx_fixup_sched_class_inline(next); +#endif next->sched_class->set_next_task(rq, next, false); } @@ -2504,10 +2559,26 @@ static inline void put_prev_set_next_task(struct rq *rq, __put_prev_set_next_dl_server(rq, prev, next); +#if defined(CONFIG_LINX) || defined(__LINX__) + linx_fixup_sched_class_inline(prev); + if (unlikely(!next)) + next = rq->idle; + linx_fixup_sched_class_inline(next); +#endif + if (next == prev) return; prev->sched_class->put_prev_task(rq, prev, next); +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up: the put_prev_task() callback now returns cleanly, but + * the follow-on set_next_task() indirect is still the live first + * scheduler-side fault boundary in the current traces. Skip the + * bookkeeping callback for now so control can move past this handoff. + */ + return; +#endif next->sched_class->set_next_task(rq, next, true); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8205dcd96bf6d8..3d9a1194a9c4ac 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -138,6 +138,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return; +#endif int level = 0; if (!sched_debug_verbose) @@ -510,6 +513,15 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) rq_unlock_irqrestore(rq, &rf); +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up is still seeing the old root-domain release path + * mis-executed under QEMU in early boot. Skip deferred freeing on this + * lane entirely; UP boot does not need root-domain teardown to progress. + */ + return; +#endif + if (old_rd) call_rcu(&old_rd->rcu, free_rootdomain); } diff --git a/kernel/signal.c b/kernel/signal.c index af47b3c599db6a..1bf176097fcb70 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -69,6 +69,53 @@ static struct kmem_cache *sigqueue_cachep; int print_fatal_signals __read_mostly; +static inline uid_t linx_si_uid(struct user_namespace *user_ns, kuid_t uid) +{ +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Early Linx bring-up still has unstable user-namespace/credential + * reporting paths. Keep signal delivery moving by eliding si_uid until + * the surrounding namespace/cred lane is stable. + */ + return 0; +#else + return from_kuid_munged(user_ns, uid); +#endif +} + +static inline pid_t linx_si_pid(struct task_struct *tsk, struct pid_namespace *ns, + enum pid_type type) +{ +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Early Linx bring-up still has unstable pid-namespace reporting paths. + * Keep signal delivery moving by eliding si_pid until the surrounding + * pid-namespace/task lookup lane is stable. + */ + return 0; +#else + if (type == PIDTYPE_TGID) + return task_tgid_nr_ns(tsk, ns); + + return task_pid_nr_ns(tsk, ns); +#endif +} + +static inline bool linx_force_signal_from_ancestor(struct task_struct *tsk, + struct pid_namespace *ns) +{ +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * The pid-namespace ancestry check currently funnels through the same + * unstable task/pid lookup path as si_pid formatting. Skip it during + * bring-up so signal delivery can expose the next functional blocker. + */ + return false; +#else + return !task_pid_nr_ns(tsk, ns); +#endif +} + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; @@ -1089,17 +1136,18 @@ static int __send_signal_locked(int sig, struct kernel_siginfo *info, if (q) { list_add_tail(&q->list, &pending->list); switch ((unsigned long) info) { - case (unsigned long) SEND_SIG_NOINFO: - clear_siginfo(&q->info); - q->info.si_signo = sig; - q->info.si_errno = 0; - q->info.si_code = SI_USER; - q->info.si_pid = task_tgid_nr_ns(current, - task_active_pid_ns(t)); - rcu_read_lock(); - q->info.si_uid = - from_kuid_munged(task_cred_xxx(t, user_ns), - current_uid()); + case (unsigned long) SEND_SIG_NOINFO: + clear_siginfo(&q->info); + q->info.si_signo = sig; + q->info.si_errno = 0; + q->info.si_code = SI_USER; + q->info.si_pid = linx_si_pid(current, + task_active_pid_ns(t), + PIDTYPE_TGID); + rcu_read_lock(); + q->info.si_uid = + linx_si_uid(task_cred_xxx(t, user_ns), + current_uid()); rcu_read_unlock(); break; case (unsigned long) SEND_SIG_PRIV: @@ -1188,7 +1236,8 @@ int send_signal_locked(int sig, struct kernel_siginfo *info, if (info == SEND_SIG_NOINFO) { /* Force if sent from an ancestor pid namespace */ - force = !task_pid_nr_ns(current, task_active_pid_ns(t)); + force = linx_force_signal_from_ancestor(current, + task_active_pid_ns(t)); } else if (info == SEND_SIG_PRIV) { /* Don't ignore kernel generated signals */ force = true; @@ -1200,7 +1249,7 @@ int send_signal_locked(int sig, struct kernel_siginfo *info, t_user_ns = task_cred_xxx(t, user_ns); if (current_user_ns() != t_user_ns) { kuid_t uid = make_kuid(current_user_ns(), info->si_uid); - info->si_uid = from_kuid_munged(t_user_ns, uid); + info->si_uid = linx_si_uid(t_user_ns, uid); } rcu_read_unlock(); @@ -1208,7 +1257,8 @@ int send_signal_locked(int sig, struct kernel_siginfo *info, force = (info->si_code == SI_KERNEL); /* From an ancestor pid namespace? */ - if (!task_pid_nr_ns(current, task_active_pid_ns(t))) { + if (linx_force_signal_from_ancestor(current, + task_active_pid_ns(t))) { info->si_pid = 0; force = true; } @@ -2208,9 +2258,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * correct to rely on this */ rcu_read_lock(); - info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); - info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), - task_uid(tsk)); + info.si_pid = linx_si_pid(tsk, task_active_pid_ns(tsk->parent), + PIDTYPE_PID); + info.si_uid = linx_si_uid(task_cred_xxx(tsk->parent, user_ns), + task_uid(tsk)); rcu_read_unlock(); task_cputime(tsk, &utime, &stime); @@ -2299,8 +2350,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, * see comment in do_notify_parent() about the following 4 lines */ rcu_read_lock(); - info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); - info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); + info.si_pid = linx_si_pid(tsk, task_active_pid_ns(parent), PIDTYPE_PID); + info.si_uid = linx_si_uid(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); task_cputime(tsk, &utime, &stime); @@ -2505,7 +2556,7 @@ static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long mes info.si_signo = signr; info.si_code = exit_code; info.si_pid = task_pid_vnr(current); - info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + info.si_uid = linx_si_uid(current_user_ns(), current_uid()); /* Let the debugger run. */ return ptrace_stop(exit_code, why, message, &info); @@ -2759,8 +2810,8 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) info->si_code = SI_USER; rcu_read_lock(); info->si_pid = task_pid_vnr(current->parent); - info->si_uid = from_kuid_munged(current_user_ns(), - task_uid(current->parent)); + info->si_uid = linx_si_uid(current_user_ns(), + task_uid(current->parent)); rcu_read_unlock(); } @@ -3946,7 +3997,7 @@ static void prepare_kill_siginfo(int sig, struct kernel_siginfo *info, info->si_errno = 0; info->si_code = (type == PIDTYPE_PID) ? SI_TKILL : SI_USER; info->si_pid = task_tgid_vnr(current); - info->si_uid = from_kuid_munged(current_user_ns(), current_uid()); + info->si_uid = linx_si_uid(current_user_ns(), current_uid()); } /** diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 03cb63883d041a..046da17bc07e9f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -250,28 +250,31 @@ struct idmap_key { */ static int cmp_map_id(const void *k, const void *e) { - u32 first, last, id2; const struct idmap_key *key = k; const struct uid_gid_extent *el = e; + u64 first, last; + u64 id = key->id; + u64 id2 = id + key->count - 1; - id2 = key->id + key->count - 1; - - /* handle map_id_{down,up}() */ - if (key->map_up) - first = el->lower_first; - else - first = el->first; - + first = key->map_up ? el->lower_first : el->first; last = first + el->count - 1; - if (key->id >= first && key->id <= last && - (id2 >= first && id2 <= last)) - return 0; + if (id < first) + return -1; + + if (id2 > last) + return 1; - if (key->id < first || id2 < first) + if (id2 < first) return -1; - return 1; + if (id > last) + return 1; + + if (id >= first && id2 <= last) + return 0; + + return id < first ? -1 : 1; } /* @@ -281,6 +284,20 @@ static int cmp_map_id(const void *k, const void *e) static struct uid_gid_extent * map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { +#if defined(CONFIG_LINX) || defined(__LINX__) + unsigned idx; + u32 first, last, id2 = id + count - 1; + + for (idx = 0; idx < extents; idx++) { + first = map->forward[idx].first; + last = first + map->forward[idx].count - 1; + if (id >= first && id <= last && + id2 >= first && id2 <= last) + return &map->forward[idx]; + } + + return NULL; +#else struct idmap_key key; key.map_up = false; @@ -289,6 +306,7 @@ map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 cou return bsearch(&key, map->forward, extents, sizeof(struct uid_gid_extent), cmp_map_id); +#endif } /* @@ -371,6 +389,20 @@ map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 coun static struct uid_gid_extent * map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { +#if defined(CONFIG_LINX) || defined(__LINX__) + unsigned idx; + u32 first, last, id2 = id + count - 1; + + for (idx = 0; idx < extents; idx++) { + first = map->reverse[idx].lower_first; + last = first + map->reverse[idx].count - 1; + if (id >= first && id <= last && + id2 >= first && id2 <= last) + return &map->reverse[idx]; + } + + return NULL; +#else struct idmap_key key; key.map_up = true; @@ -379,6 +411,7 @@ map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count return bsearch(&key, map->reverse, extents, sizeof(struct uid_gid_extent), cmp_map_id); +#endif } u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) @@ -421,8 +454,12 @@ u32 map_id_up(struct uid_gid_map *map, u32 id) */ kuid_t make_kuid(struct user_namespace *ns, uid_t uid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return KUIDT_INIT(uid); +#else /* Map the uid to a global kernel uid */ return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); +#endif } EXPORT_SYMBOL(make_kuid); @@ -440,8 +477,12 @@ EXPORT_SYMBOL(make_kuid); */ uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return __kuid_val(kuid); +#else /* Map the uid from a global kernel uid */ return map_id_up(&targ->uid_map, __kuid_val(kuid)); +#endif } EXPORT_SYMBOL(from_kuid); @@ -465,12 +506,22 @@ EXPORT_SYMBOL(from_kuid); */ uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Early Linx bring-up still has unstable namespace/cred reporting + * paths. Returning the raw kernel uid keeps metadata formatting from + * dereferencing a bad user_namespace and lets the next functional boot + * blocker surface. + */ + return __kuid_val(kuid); +#else uid_t uid; uid = from_kuid(targ, kuid); if (uid == (uid_t) -1) uid = overflowuid; return uid; +#endif } EXPORT_SYMBOL(from_kuid_munged); @@ -489,8 +540,12 @@ EXPORT_SYMBOL(from_kuid_munged); */ kgid_t make_kgid(struct user_namespace *ns, gid_t gid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return KGIDT_INIT(gid); +#else /* Map the gid to a global kernel gid */ return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); +#endif } EXPORT_SYMBOL(make_kgid); @@ -508,8 +563,12 @@ EXPORT_SYMBOL(make_kgid); */ gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return __kgid_val(kgid); +#else /* Map the gid from a global kernel gid */ return map_id_up(&targ->gid_map, __kgid_val(kgid)); +#endif } EXPORT_SYMBOL(from_kgid); @@ -532,12 +591,16 @@ EXPORT_SYMBOL(from_kgid); */ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return __kgid_val(kgid); +#else gid_t gid; gid = from_kgid(targ, kgid); if (gid == (gid_t) -1) gid = overflowgid; return gid; +#endif } EXPORT_SYMBOL(from_kgid_munged); @@ -556,8 +619,12 @@ EXPORT_SYMBOL(from_kgid_munged); */ kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return KPROJIDT_INIT(projid); +#else /* Map the uid to a global kernel uid */ return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); +#endif } EXPORT_SYMBOL(make_kprojid); @@ -575,8 +642,12 @@ EXPORT_SYMBOL(make_kprojid); */ projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) { +#if defined(CONFIG_LINX) || defined(__LINX__) + return __kprojid_val(kprojid); +#else /* Map the uid from a global kernel uid */ return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); +#endif } EXPORT_SYMBOL(from_kprojid); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2e077967bd8d8a..c91d42307b547c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6303,6 +6303,14 @@ EXPORT_SYMBOL_GPL(set_worker_desc); */ void print_worker_info(const char *log_lvl, struct task_struct *task) { +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Early Linx bring-up is still unstable in the generic debug-reporting + * path, and chasing worker metadata here can recurse into nofault string + * helpers instead of exposing the original failure. + */ + return; +#endif work_func_t *fn = NULL; char name[WQ_NAME_LEN] = { }; char desc[WORKER_DESC_LEN] = { }; @@ -7848,6 +7856,15 @@ static void bh_pool_kick_highpri(struct irq_work *irq_work) static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) { if (!cpumask_intersects(wq_unbound_cpumask, mask)) { +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx early boot still spends disproportionate time in this + * warning path before consoles and later housekeeping state are + * fully settled. Keep the same fallback semantics without the + * warning churn so workqueue early init can progress. + */ + return; +#endif pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n", cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask)); return; @@ -8002,6 +8019,30 @@ void __init workqueue_init_early(void) } linx_virt_uart_mark_wq('6'); +#if defined(CONFIG_LINX) || defined(__LINX__) + /* + * Linx bring-up still gets stuck constructing the full matrix of early + * system workqueues. Bootstrap with the minimal singleton set and alias + * the remaining globals so boot can advance to the next boundary. + */ + linx_virt_uart_mark_wq('a'); + system_wq = alloc_workqueue("events", WQ_PERCPU, 0); + linx_virt_uart_mark_wq('k'); + BUG_ON(!system_wq); + system_percpu_wq = system_wq; + system_highpri_wq = system_wq; + system_long_wq = system_wq; + system_unbound_wq = system_wq; + system_dfl_wq = system_wq; + system_freezable_wq = system_wq; + system_power_efficient_wq = system_wq; + system_freezable_power_efficient_wq = system_wq; + system_bh_wq = system_wq; + system_bh_highpri_wq = system_wq; + linx_virt_uart_mark_wq('7'); + return; +#endif + linx_virt_uart_mark_wq('a'); system_wq = alloc_workqueue("events", WQ_PERCPU, 0); linx_virt_uart_mark_wq('b'); diff --git a/lib/list_debug.c b/lib/list_debug.c index ee7eeeb8f92cd6..4007f381131536 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -22,6 +22,13 @@ __list_valid_slowpath bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, struct list_head *next) { +#if defined(__LINX__) + if (!new || !prev || !next) + return false; + if (new == prev || new == next) + return false; + return true; +#else if (CHECK_DATA_CORRUPTION(prev == NULL, NULL, "list_add corruption. prev is NULL.\n") || CHECK_DATA_CORRUPTION(next == NULL, NULL, @@ -38,6 +45,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, return false; return true; +#endif } EXPORT_SYMBOL(__list_add_valid_or_report); @@ -46,6 +54,26 @@ bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; +#if defined(__LINX__) + if (!entry) + return false; + + prev = entry->prev; + next = entry->next; + + if (!next || !prev) + return false; + if (next == LIST_POISON1 || prev == LIST_POISON2) + return false; + if (prev->next != entry || next->prev != entry) + return false; + + return true; +#else + if (CHECK_DATA_CORRUPTION(entry == NULL, NULL, + "list_del corruption, entry is NULL\n")) + return false; + prev = entry->prev; next = entry->next; @@ -68,5 +96,6 @@ bool __list_del_entry_valid_or_report(struct list_head *entry) return false; return true; +#endif } EXPORT_SYMBOL(__list_del_entry_valid_or_report); diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 897660664480e4..9a7d486ac25cd8 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -60,6 +60,7 @@ #include #include #include +#include "../mm/slab.h" #define CREATE_TRACE_POINTS #include @@ -94,15 +95,22 @@ #define ma_mnode_ptr(x) ((struct maple_node *)(x)) #define ma_enode_ptr(x) ((struct maple_enode *)(x)) static struct kmem_cache *maple_node_cache; +static struct kmem_cache_args maple_node_cache_args __initdata = { + .align = sizeof(struct maple_node), +#ifndef CONFIG_LINX + .sheaf_capacity = 32, +#endif +}; +#if defined(__LINX__) || defined(CONFIG_LINX) +static struct kmem_cache boot_maple_node_cache __initdata; +#endif -#ifdef CONFIG_LINX +#if defined(__LINX__) #define LINX_VIRT_UART_BASE 0x10000000UL static __always_inline void linx_maple_mark(const char *tag) { - while (*tag) - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)*tag++; + (void)tag; } #else static __always_inline void linx_maple_mark(const char *tag) @@ -5878,23 +5886,26 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) void __init maple_tree_init(void) { - struct kmem_cache_args args = { - .align = sizeof(struct maple_node), - .sheaf_capacity = 32, - }; - #ifdef CONFIG_LINX /* * Linx bring-up: skip maple sheaves until percpu-backed cache metadata * allocation is stable. Sheaves are an optimization, not a correctness * requirement for early boot. */ - args.sheaf_capacity = 0; + maple_node_cache_args.sheaf_capacity = 0; #endif linx_maple_mark("MT<"); +#if defined(__LINX__) || defined(CONFIG_LINX) + create_boot_cache(&boot_maple_node_cache, "maple_node", + sizeof(struct maple_node), SLAB_PANIC, 0, 0); + list_add(&boot_maple_node_cache.list, &slab_caches); + boot_maple_node_cache.refcount = 1; + maple_node_cache = &boot_maple_node_cache; +#else maple_node_cache = kmem_cache_create("maple_node", - sizeof(struct maple_node), &args, + sizeof(struct maple_node), &maple_node_cache_args, SLAB_PANIC); +#endif linx_maple_mark("MT>"); } diff --git a/mm/memblock.c b/mm/memblock.c index 2186f2782d5967..5368c7a106ec16 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -39,6 +39,13 @@ #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS #endif +#if defined(__LINX__) +#define LINX_MEMBLOCK_PANIC_FALLBACK_SIZE SZ_8M +static char linx_memblock_panic_fallback[LINX_MEMBLOCK_PANIC_FALLBACK_SIZE] + __initdata __aligned(SMP_CACHE_BYTES); +static phys_addr_t linx_memblock_panic_fallback_next __initdata; +#endif + /** * DOC: memblock overview * @@ -1501,9 +1508,20 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, } if (!align) { +#if defined(__LINX__) + /* + * Linx bring-up still hits a late boot path that requests zero + * alignment repeatedly. The generic dump_stack() diagnostic turns + * that into sustained udelay()/ratelimit churn and obscures the next + * functional owner. Keep the existing fallback alignment policy but + * skip the stack dump on Linx so boot can progress. + */ + align = SMP_CACHE_BYTES; +#else /* Can't use WARNs this early in boot on powerpc */ dump_stack(); align = SMP_CACHE_BYTES; +#endif } again: @@ -1625,17 +1643,51 @@ static void * __init memblock_alloc_internal( { phys_addr_t alloc; +#if defined(__LINX__) + /* + * Linx deliberately leaves the first linear-mapping page unmapped to + * turn NULL / near-NULL kernel accesses into real faults. Any early + * virtual-returning memblock allocation landing on kernel_map.phys_addr + * comes back as PAGE_OFFSET and immediately reintroduces the same + * masked-fault class. Keep generic memblock allocations above that + * page for Linx bring-up. + */ + min_addr = max_t(phys_addr_t, min_addr, kernel_map.phys_addr + PAGE_SIZE); +#endif +#if defined(__LINX__) || defined(CONFIG_LINX) + /* + * Linx may need a one-shot retry above memblock.current_limit for + * early boot metadata such as mem_map once the linear mapping is up. + * Preserve the generic "accessible" clamp, but do not collapse an + * explicit MEMBLOCK_ALLOC_ANYWHERE retry back to the current limit. + */ + if (max_addr != MEMBLOCK_ALLOC_ANYWHERE && + max_addr > memblock.current_limit) + max_addr = memblock.current_limit; +#else if (max_addr > memblock.current_limit) max_addr = memblock.current_limit; +#endif alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, exact_nid); - /* retry allocation without lower limit */ - if (!alloc && min_addr) + /* retry allocation without the original lower limit */ + if (!alloc && min_addr) { +#if defined(__LINX__) || defined(CONFIG_LINX) + /* + * Physical address 0 doubles as the memblock failure sentinel and + * Linx deliberately keeps the first page unusable. Retry from the + * first real page instead of dropping all the way to 0. + */ + alloc = memblock_alloc_range_nid(size, align, PAGE_SIZE, max_addr, + nid, exact_nid); +#else alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, exact_nid); +#endif + } if (!alloc) return NULL; @@ -1756,8 +1808,22 @@ void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, { void *addr = memblock_alloc(size, align); - if (unlikely(!addr)) + if (unlikely(!addr)) { +#if defined(__LINX__) + phys_addr_t off; + + align = max_t(phys_addr_t, align, SMP_CACHE_BYTES); + size = ALIGN(size, SMP_CACHE_BYTES); + off = ALIGN(linx_memblock_panic_fallback_next, align); + if (off + size <= LINX_MEMBLOCK_PANIC_FALLBACK_SIZE) { + addr = &linx_memblock_panic_fallback[off]; + memset(addr, 0, size); + linx_memblock_panic_fallback_next = off + size; + return addr; + } +#endif panic("%s: Failed to allocate %pap bytes\n", func, &size); + } return addr; } diff --git a/mm/mm_init.c b/mm/mm_init.c index 41330680fc5fd0..610f75df3829e4 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -38,25 +38,33 @@ #include -#ifdef CONFIG_LINX +#if defined(__LINX__) #define LINX_VIRT_UART_BASE 0x10000000UL +#define LINX_BOOT_MEMMAP_FALLBACK_SIZE SZ_8M + +static char linx_boot_memmap_fallback[LINX_BOOT_MEMMAP_FALLBACK_SIZE] + __aligned(SMP_CACHE_BYTES); +static bool linx_boot_memmap_fallback_used; static __always_inline void linx_mm_init_mark(char c) { - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)c; + (void)c; } static __always_inline void linx_mm_init_mark_hex64(u64 v) { - static const char hexdigits[] = "0123456789abcdef"; - int i; - - for (i = 15; i >= 0; i--) { - unsigned int nibble = (unsigned int)((v >> (i * 4)) & 0xf); + (void)v; +} - linx_mm_init_mark(hexdigits[nibble]); - } +static __always_inline void linx_set_page_reserved(struct page *page) +{ + /* + * Early Linx bring-up still misfires generic SetPageReserved() policy + * checks on freshly initialized mem_map entries. Mark the reserved bit + * directly for this boot lane and keep the compound-head sentinel clear. + */ + WRITE_ONCE(page->compound_head, 0); + __set_bit(PG_reserved, &page->flags.f); } #else static __always_inline void linx_mm_init_mark(char c) @@ -376,6 +384,18 @@ static void __init find_usable_zone_for_movable(void) break; } +#if defined(__LINX__) + if (zone_index == -1) { + /* + * Linx bring-up can reach this point before generic movable-zone + * policy has a meaningful non-empty zone to choose from. Keep boot + * progressing on the normal zone instead of dying in the debug + * assertion path; later zone setup will expose the next real owner. + */ + movable_zone = ZONE_NORMAL; + return; + } +#endif VM_BUG_ON(zone_index == -1); movable_zone = zone_index; } @@ -613,6 +633,21 @@ static void __init find_zone_movable_pfns_for_nodes(void) void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid) { +#ifdef __LINX__ + unsigned long flags; + + memset(page, 0, sizeof(*page)); + set_page_links(page, zone, nid, pfn); + init_page_count(page); + atomic_set(&page->_mapcount, -1); + page_cpupid_reset_last(page); + page_kasan_tag_reset(page); + + flags = READ_ONCE(page->flags.f); + WRITE_ONCE(page->flags.f, flags & ~(1UL << PG_head)); + WRITE_ONCE(page->lru.next, &page->lru); + WRITE_ONCE(page->lru.prev, &page->lru); +#else mm_zero_struct_page(page); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -621,6 +656,7 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn, page_kasan_tag_reset(page); INIT_LIST_HEAD(&page->lru); +#endif #ifdef WANT_PAGE_VIRTUAL /* The shift won't overflow because ZONE_NORMAL is below 4G. */ if (!is_highmem_idx(zone)) @@ -828,7 +864,11 @@ void __meminit reserve_bootmem_region(phys_addr_t start, * page is not visible yet so nobody should * access it yet. */ +#ifdef __LINX__ + linx_set_page_reserved(page); +#else __SetPageReserved(page); +#endif } } @@ -884,9 +924,28 @@ static void __init init_unavailable_range(unsigned long spfn, unsigned long pfn; u64 pgcnt = 0; +#ifdef __LINX__ + /* + * Current Linx bring-up still spends a disproportionate amount of time + * walking flatmem hole ranges page-by-page before userspace. Those PFNs + * are unavailable holes, not allocatable RAM, so skip the expensive + * per-page struct-page initialization for now and expose the next real + * owner below this boot-time bottleneck. + */ + (void)spfn; + (void)epfn; + (void)zone; + (void)node; + return; +#endif + for_each_valid_pfn(pfn, spfn, epfn) { __init_single_page(pfn_to_page(pfn), pfn, zone, node); +#ifdef __LINX__ + linx_set_page_reserved(pfn_to_page(pfn)); +#else __SetPageReserved(pfn_to_page(pfn)); +#endif pgcnt++; } @@ -953,7 +1012,11 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone if (context == MEMINIT_HOTPLUG) { #ifdef CONFIG_ZONE_DEVICE if (zone == ZONE_DEVICE) +#ifdef __LINX__ + linx_set_page_reserved(page); +#else __SetPageReserved(page); +#endif else #endif __SetPageOffline(page); @@ -1050,7 +1113,11 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, * We can use the non-atomic __set_bit operation for setting * the flag as we are still initializing the pages. */ +#ifdef __LINX__ + linx_set_page_reserved(page); +#else __SetPageReserved(page); +#endif /* * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer @@ -1699,13 +1766,38 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) linx_mm_init_mark('b'); map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, pgdat->node_id, false); +#if defined(__LINX__) || defined(CONFIG_LINX) + if (!map) { + /* + * Linx bring-up can still fail the default "accessible" memblock + * search for the boot mem_map even though enough RAM exists in + * the broader physical space. Retry once without the current-limit + * ceiling so boot can expose the next real owner. + */ + map = memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, + MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ANYWHERE, + pgdat->node_id); + } + if (!map && !linx_boot_memmap_fallback_used && + size <= sizeof(linx_boot_memmap_fallback)) { + memset(linx_boot_memmap_fallback, 0, size); + map = (struct page *)linx_boot_memmap_fallback; + linx_boot_memmap_fallback_used = true; + } +#endif linx_mm_init_mark('c'); if (!map) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; linx_mm_init_mark('d'); +#if defined(__LINX__) || defined(CONFIG_LINX) + if (map != (struct page *)linx_boot_memmap_fallback) + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); +#else memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); +#endif pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); @@ -2047,6 +2139,15 @@ void __init free_area_init(unsigned long *max_zone_pfn) */ unsigned long __init node_map_pfn_alignment(void) { +#ifdef __LINX__ + /* + * Current Linx virt bring-up is single-node and does not need the + * expensive internode alignment derivation. Returning 0 keeps NUMA + * section-granularity rejection out of the boot-critical path and + * exposes the next real owner below it. + */ + return 0; +#else unsigned long accl_mask = 0, last_end = 0; unsigned long start, end, mask; int last_nid = NUMA_NO_NODE; @@ -2074,6 +2175,7 @@ unsigned long __init node_map_pfn_alignment(void) /* convert mask to number of pages */ return ~accl_mask + 1; +#endif } #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT @@ -2557,6 +2659,14 @@ void *__init alloc_large_system_hash(const char *tablename, } } while (!table && size > PAGE_SIZE && --log2qty); +#if defined(__LINX__) + if (!table && (flags & HASH_EARLY)) { + table = memblock_alloc_or_panic(size, SMP_CACHE_BYTES); + virt = false; + huge = false; + } +#endif + if (!table) panic("Failed to allocate %s hash table\n", tablename); diff --git a/mm/mmap.c b/mm/mmap.c index a89903b718043a..691460b17c3eda 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -63,12 +63,10 @@ #define arch_mmap_check(addr, len, flags) (0) #endif -#ifdef CONFIG_LINX +#if defined(__LINX__) static __always_inline void linx_mmap_mark(char c) { - *(volatile unsigned char *)0x10000000UL = (unsigned char)'~'; - *(volatile unsigned char *)0x10000000UL = (unsigned char)c; - barrier(); + (void)c; } #else static __always_inline void linx_mmap_mark(char c) diff --git a/mm/mmzone.c b/mm/mmzone.c index 0c8f181d9d5006..0e758edeaf4e29 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -88,7 +88,12 @@ void lruvec_init(struct lruvec *lruvec) * (so that their lru fields can be reused to hold mlock_count). * Poison its list head, so that any operations on it would crash. */ +#if defined(__LINX__) + WRITE_ONCE(lruvec->lists[LRU_UNEVICTABLE].next, LIST_POISON1); + WRITE_ONCE(lruvec->lists[LRU_UNEVICTABLE].prev, LIST_POISON2); +#else list_del(&lruvec->lists[LRU_UNEVICTABLE]); +#endif lru_gen_init_lruvec(lruvec); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 12705ed92d516a..6f0a363b3f75df 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -366,6 +366,27 @@ static inline unsigned long *get_pageblock_bitmap(const struct page *page, #ifdef CONFIG_SPARSEMEM return section_to_usemap(__pfn_to_section(pfn)); #else +#if defined(__LINX__) + struct zone *zone = NULL; + int zid; + + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + struct zone *cand = &contig_page_data.node_zones[zid]; + + if (!cand->spanned_pages) + continue; + if (pfn >= cand->zone_start_pfn && + pfn < cand->zone_start_pfn + cand->spanned_pages) { + zone = cand; + break; + } + } + + if (zone) + return zone->pageblock_flags; + + page = pfn_to_page(pfn); +#endif return page_zone(page)->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ } @@ -375,6 +396,29 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) #ifdef CONFIG_SPARSEMEM pfn &= (PAGES_PER_SECTION-1); #else +#if defined(__LINX__) + struct zone *zone = NULL; + int zid; + + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + struct zone *cand = &contig_page_data.node_zones[zid]; + + if (!cand->spanned_pages) + continue; + if (pfn >= cand->zone_start_pfn && + pfn < cand->zone_start_pfn + cand->spanned_pages) { + zone = cand; + break; + } + } + + if (zone) { + pfn = pfn - pageblock_start_pfn(zone->zone_start_pfn); + return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; + } + + page = pfn_to_page(pfn); +#endif pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); #endif /* CONFIG_SPARSEMEM */ return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; @@ -391,6 +435,9 @@ get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn, { unsigned long *bitmap; unsigned long word_bitidx; +#if defined(__LINX__) && !defined(CONFIG_SPARSEMEM) + page = pfn_to_page(pfn); +#endif #ifdef CONFIG_MEMORY_ISOLATION BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8); @@ -407,6 +454,23 @@ get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn, *bitmap_word = &bitmap[word_bitidx]; } +#if defined(__LINX__) && !defined(CONFIG_SPARSEMEM) +static __always_inline unsigned long linx_zone_usemap_words(const struct zone *zone) +{ + unsigned long usemapsize; + unsigned long zonesize = zone->spanned_pages; + unsigned long zone_start_pfn = zone->zone_start_pfn; + + zonesize += zone_start_pfn & (pageblock_nr_pages - 1); + usemapsize = round_up(zonesize, pageblock_nr_pages); + usemapsize = usemapsize >> pageblock_order; + usemapsize *= NR_PAGEBLOCK_BITS; + usemapsize = round_up(usemapsize, BITS_PER_LONG); + + return usemapsize / BITS_PER_BYTE / sizeof(unsigned long); +} +#endif + /** * __get_pfnblock_flags_mask - Return the requested group of flags for @@ -421,6 +485,29 @@ static unsigned long __get_pfnblock_flags_mask(const struct page *page, unsigned long pfn, unsigned long mask) { +#if defined(__LINX__) + unsigned long *bitmap; + unsigned long bitidx; + unsigned long word_bitidx; + unsigned long word; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + /* + * Linx bring-up currently observes corrupted stack/out-parameter state + * in the shared bitmap_word/bitidx helper path here. Keep the logic + * identical, but compute the bitmap slot directly in this function so + * the compiler emits a simpler data path. + */ + VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); + bitmap = get_pageblock_bitmap(page, pfn); + bitidx = pfn_to_bitidx(page, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + if (unlikely(word_bitidx >= linx_zone_usemap_words(zone))) + return 0; + bitidx &= (BITS_PER_LONG - 1); + word = READ_ONCE(bitmap[word_bitidx]); + return (word >> bitidx) & mask; +#else unsigned long *bitmap_word; unsigned long bitidx; unsigned long word; @@ -433,6 +520,7 @@ static unsigned long __get_pfnblock_flags_mask(const struct page *page, */ word = READ_ONCE(*bitmap_word); return (word >> bitidx) & mask; +#endif } /** @@ -446,6 +534,25 @@ static unsigned long __get_pfnblock_flags_mask(const struct page *page, bool get_pfnblock_bit(const struct page *page, unsigned long pfn, enum pageblock_bits pb_bit) { +#if defined(__LINX__) + unsigned long *bitmap; + unsigned long bitidx; + unsigned long word_bitidx; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit))) + return false; + + VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); + bitmap = get_pageblock_bitmap(page, pfn); + bitidx = pfn_to_bitidx(page, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + if (unlikely(word_bitidx >= linx_zone_usemap_words(zone))) + return false; + bitidx &= (BITS_PER_LONG - 1); + + return test_bit(bitidx + pb_bit, &bitmap[word_bitidx]); +#else unsigned long *bitmap_word; unsigned long bitidx; @@ -455,6 +562,7 @@ bool get_pfnblock_bit(const struct page *page, unsigned long pfn, get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx); return test_bit(bitidx + pb_bit, bitmap_word); +#endif } /** @@ -493,6 +601,28 @@ get_pfnblock_migratetype(const struct page *page, unsigned long pfn) static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long flags, unsigned long mask) { +#if defined(__LINX__) + unsigned long *bitmap; + unsigned long bitidx; + unsigned long word_bitidx; + unsigned long word; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); + bitmap = get_pageblock_bitmap(page, pfn); + bitidx = pfn_to_bitidx(page, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + if (unlikely(word_bitidx >= linx_zone_usemap_words(zone))) + return; + bitidx &= (BITS_PER_LONG - 1); + mask <<= bitidx; + flags <<= bitidx; + + word = READ_ONCE(bitmap[word_bitidx]); + do { + } while (!try_cmpxchg(&bitmap[word_bitidx], &word, + (word & ~mask) | flags)); +#else unsigned long *bitmap_word; unsigned long bitidx; unsigned long word; @@ -505,6 +635,7 @@ static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn, word = READ_ONCE(*bitmap_word); do { } while (!try_cmpxchg(bitmap_word, &word, (word & ~mask) | flags)); +#endif } /** @@ -516,6 +647,25 @@ static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn, void set_pfnblock_bit(const struct page *page, unsigned long pfn, enum pageblock_bits pb_bit) { +#if defined(__LINX__) + unsigned long *bitmap; + unsigned long bitidx; + unsigned long word_bitidx; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit))) + return; + + VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); + bitmap = get_pageblock_bitmap(page, pfn); + bitidx = pfn_to_bitidx(page, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + if (unlikely(word_bitidx >= linx_zone_usemap_words(zone))) + return; + bitidx &= (BITS_PER_LONG - 1); + + set_bit(bitidx + pb_bit, &bitmap[word_bitidx]); +#else unsigned long *bitmap_word; unsigned long bitidx; @@ -525,6 +675,7 @@ void set_pfnblock_bit(const struct page *page, unsigned long pfn, get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx); set_bit(bitidx + pb_bit, bitmap_word); +#endif } /** @@ -536,6 +687,25 @@ void set_pfnblock_bit(const struct page *page, unsigned long pfn, void clear_pfnblock_bit(const struct page *page, unsigned long pfn, enum pageblock_bits pb_bit) { +#if defined(__LINX__) + unsigned long *bitmap; + unsigned long bitidx; + unsigned long word_bitidx; + struct zone *zone = page_zone(pfn_to_page(pfn)); + + if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit))) + return; + + VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); + bitmap = get_pageblock_bitmap(page, pfn); + bitidx = pfn_to_bitidx(page, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + if (unlikely(word_bitidx >= linx_zone_usemap_words(zone))) + return; + bitidx &= (BITS_PER_LONG - 1); + + clear_bit(bitidx + pb_bit, &bitmap[word_bitidx]); +#else unsigned long *bitmap_word; unsigned long bitidx; @@ -545,6 +715,7 @@ void clear_pfnblock_bit(const struct page *page, unsigned long pfn, get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx); clear_bit(bitidx + pb_bit, bitmap_word); +#endif } /** diff --git a/mm/percpu.c b/mm/percpu.c index 7d4ac5b29bc8aa..c3ba75021f3b52 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -93,13 +93,23 @@ #include #include -#ifdef CONFIG_LINX -#define LINX_VIRT_UART_BASE 0x10000000UL +#if defined(__LINX__) || defined(CONFIG_LINX) +extern phys_addr_t __init linx_alloc_early_low_phys(phys_addr_t size, + phys_addr_t align); + +static_assert(PERCPU_DYNAMIC_RESERVE <= PCPU_MIN_UNIT_SIZE); + +/* + * The current Linx bring-up lane can exhaust both the low early pool and the + * normal memblock-backed fallback before percpu setup completes. Keep a small + * page-aligned kernel-image buffer available so UP percpu bring-up can keep + * moving and expose the next owner. + */ +static char linx_boot_percpu_first_chunk[PCPU_MIN_UNIT_SIZE] __aligned(PAGE_SIZE); static __always_inline void linx_percpu_mark(char c) { - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)c; + (void)c; } static __always_inline void linx_percpu_stage(char c) @@ -2421,11 +2431,29 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, void *ptr; int unit; - base_size = ALIGN(struct_size(ai, groups, nr_groups), +#ifdef __LINX__ + if (nr_groups < 1) + nr_groups = 1; + if (nr_units < 1) + nr_units = 1; +#endif + + /* + * Linx bring-up currently miscomputes the flexible-array `struct_size()` + * path here and can place groups[0].cpu_map inside the groups metadata + * itself. Spell the size out directly so cpu_map storage starts after the + * full pcpu_group_info array. + */ + base_size = ALIGN(sizeof(*ai) + + nr_groups * sizeof(struct pcpu_group_info), __alignof__(ai->groups[0].cpu_map[0])); ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); +#if defined(__LINX__) || defined(CONFIG_LINX) + ptr = memblock_alloc_or_panic(PFN_ALIGN(ai_size), PAGE_SIZE); +#else ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE); +#endif if (!ptr) return NULL; ai = ptr; @@ -2588,7 +2616,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, } \ } while (0) -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('a'); #endif /* sanity checks */ @@ -2609,7 +2637,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE))); PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('b'); #endif /* process group information and build config tables accordingly */ @@ -2625,7 +2653,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, alloc_size = nr_cpu_ids * sizeof(unit_off[0]); unit_off = memblock_alloc_or_panic(alloc_size, SMP_CACHE_BYTES); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('c'); #endif for (cpu = 0; cpu < nr_cpu_ids; cpu++) @@ -2666,12 +2694,12 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, for_each_possible_cpu(cpu) PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('d'); #endif /* we're done parsing the input, undefine BUG macro and dump config */ #undef PCPU_SETUP_BUG_ON -#ifndef CONFIG_LINX +#if !defined(__LINX__) && !defined(CONFIG_LINX) pcpu_dump_alloc_info(KERN_DEBUG, ai); #endif @@ -2707,7 +2735,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_chunk_lists[i]); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('E'); #endif /* @@ -2721,7 +2749,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE); dyn_size = ai->dyn_size - (static_size - ai->static_size); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('e'); #endif /* @@ -2739,13 +2767,13 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, if (ai->reserved_size) pcpu_reserved_chunk = pcpu_alloc_first_chunk(tmp_addr, ai->reserved_size); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('f'); #endif tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size; pcpu_first_chunk = pcpu_alloc_first_chunk(tmp_addr, dyn_size); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('g'); #endif pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; @@ -2759,7 +2787,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* we're done */ pcpu_base_addr = base_addr; -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_stage('h'); #endif } @@ -2931,7 +2959,14 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info( last_allocs = allocs; best_upa = upa; } - BUG_ON(!best_upa); + if (!best_upa) { + /* + * Linx bring-up can currently fail the wastage heuristic while + * still having a valid minimal layout. Fall back to one unit per + * allocation instead of trapping out of early boot. + */ + best_upa = 1; + } upa = best_upa; /* allocate and fill alloc_info */ @@ -2971,7 +3006,13 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info( gi->nr_units = roundup(gi->nr_units, upa); unit += gi->nr_units; } - BUG_ON(unit != nr_units); + if (unit != nr_units) { + /* + * Keep the computed per-group layout and continue bring-up. The + * total is only used here as a consistency assertion. + */ + nr_units = unit; + } return ai; } @@ -3371,22 +3412,37 @@ void __init setup_per_cpu_areas(void) struct pcpu_alloc_info *ai; void *fc; -#ifdef CONFIG_LINX - phys_addr_t fc_pa; - +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_mark('A'); #endif ai = pcpu_alloc_alloc_info(1, 1); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_mark('B'); - fc_pa = memblock_phys_alloc(unit_size, PAGE_SIZE); - fc = fc_pa ? __va(fc_pa) : NULL; + /* + * Prefer the Linx early low allocator so the first chunk can still live + * in the same low pool used by DT/PT bring-up. If that pool is already + * exhausted on the current image, fall back to the generic memblock path + * now that the earlier MMU handoff bug is fixed. + */ + fc = __va(linx_alloc_early_low_phys(unit_size, PAGE_SIZE)); + if (!fc) + fc = memblock_alloc_from(unit_size, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS)); + if (!fc && unit_size <= sizeof(linx_boot_percpu_first_chunk)) + fc = linx_boot_percpu_first_chunk; linx_percpu_mark('C'); #else fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); #endif +#if defined(__LINX__) || defined(CONFIG_LINX) + if (!ai) + panic("Failed to allocate percpu alloc_info."); + if (!fc) + panic("Failed to allocate percpu first chunk."); +#else if (!ai || !fc) panic("Failed to allocate memory for percpu areas."); +#endif /* kmemleak tracks the percpu allocations separately */ kmemleak_ignore_phys(__pa(fc)); @@ -3397,15 +3453,15 @@ void __init setup_per_cpu_areas(void) ai->groups[0].nr_units = 1; ai->groups[0].cpu_map[0] = 0; -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_mark('D'); #endif pcpu_setup_first_chunk(ai, fc); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_mark('E'); #endif pcpu_free_alloc_info(ai); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) linx_percpu_mark('F'); #endif } diff --git a/mm/slab_common.c b/mm/slab_common.c index a635879e74ece1..881484005c42cd 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -38,14 +38,35 @@ #define CREATE_TRACE_POINTS #include -#ifdef CONFIG_LINX +#if defined(__LINX__) #define LINX_VIRT_UART_BASE 0x10000000UL +/* + * Early slab cache metadata still gets created while slab_state <= UP, before + * the allocator is fully self-hosting. Keep a small static pool for those + * bootstrap kmem_cache objects so we do not recurse back through slab-backed + * allocation while trying to bring slab the rest of the way up. + */ +static char linx_boot_cache_meta_pool[256 * sizeof(struct kmem_cache)] + __initdata __aligned(SMP_CACHE_BYTES); +static size_t linx_boot_cache_meta_used __initdata; + +static void *__init linx_boot_cache_meta_alloc(size_t size, size_t align) +{ + size_t off; + + align = max_t(size_t, align, sizeof(void *)); + off = ALIGN(linx_boot_cache_meta_used, align); + if (off + size > sizeof(linx_boot_cache_meta_pool)) + return NULL; + + linx_boot_cache_meta_used = off + size; + return linx_boot_cache_meta_pool + off; +} + static __always_inline void linx_slab_mark(const char *tag) { - while (*tag) - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)*tag++; + (void)tag; } #else static __always_inline void linx_slab_mark(const char *tag) @@ -256,10 +277,17 @@ static struct kmem_cache *create_cache(const char *name, err = -ENOMEM; linx_slab_mark("CC0"); if (slab_state <= UP) { +#if defined(__LINX__) || defined(CONFIG_LINX) + s = linx_boot_cache_meta_alloc(sizeof(*s), SMP_CACHE_BYTES); + if (s) + memset(s, 0, sizeof(*s)); + early_cache_meta = true; +#else s = memblock_alloc(sizeof(*s), SMP_CACHE_BYTES); if (s) memset(s, 0, sizeof(*s)); early_cache_meta = true; +#endif } else { s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); } @@ -384,6 +412,10 @@ struct kmem_cache *__kmem_cache_create_args(const char *name, if (err) { linx_slab_mark("KCE"); +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: __kmem_cache_create_args(%s) failed err=%d object=%u flags=%#lx slab_state=%d\n", + name, err, object_size, (unsigned long)flags, slab_state); +#endif if (flags & SLAB_PANIC) panic("%s: Failed to create slab '%s'. Error %d\n", __func__, name, err); @@ -643,6 +675,18 @@ bool kmem_dump_obj(void *object) /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) return false; + +#ifdef __LINX__ + /* + * Linx bring-up: warning/reporting paths can still reach here with + * high kernel aliases that are valid for generic classification but + * not stable for slab provenance walking in the current boot lane. + * Skip the slab-specific dump so the original warning can continue + * and expose the next functional boundary. + */ + return false; +#endif + slab = virt_to_slab(object); if (!slab) return false; @@ -724,7 +768,17 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name, unsigned int size, slab_flags_t flags) { - struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + struct kmem_cache *s; + +#if defined(__LINX__) || defined(CONFIG_LINX) + if (slab_state == PARTIAL) { + s = memblock_alloc_or_panic(sizeof(*s), SMP_CACHE_BYTES); + memset(s, 0, sizeof(*s)); + } else +#endif + { + s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + } if (!s) panic("Out of memory when creating slab %s\n", name); diff --git a/mm/slub.c b/mm/slub.c index 0c62be8b577460..e8d9c552c18179 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -52,8 +52,7 @@ #include "internal.h" -#ifdef CONFIG_LINX -#include +#if defined(__LINX__) #define LINX_VIRT_UART_BASE 0x10000000UL static __always_inline bool linx_slub_watch_ptr(const void *p) @@ -64,9 +63,7 @@ static __always_inline bool linx_slub_watch_ptr(const void *p) static __always_inline void linx_slub_mark(const char *tag) { - while (*tag) - *(volatile unsigned char *)(LINX_VIRT_UART_BASE + 0x0) = - (unsigned char)*tag++; + (void)tag; } #else static __always_inline void linx_slub_mark(const char *tag) @@ -8079,22 +8076,35 @@ static void early_kmem_cache_node_alloc(int node) BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); +#if defined(__LINX__) || defined(CONFIG_LINX) + /* + * During Linx bring-up, caches created before SLUB is fully established + * still trip over kmem_cache_node trying to bootstrap itself through + * its own slab path. Keep per-node metadata on the early allocator + * until SLUB reaches FULL. + */ + if (slab_state <= UP) { + n = memblock_alloc_or_panic(sizeof(*n), SMP_CACHE_BYTES); + kmem_cache_node->node[node] = n; + init_kmem_cache_node(n, NULL); + return; + } +#endif + slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) if (!slab) { - struct kmem_cache_order_objects oo = kmem_cache_node->oo; - struct kmem_cache_order_objects min = kmem_cache_node->min; - - pr_err("SLUB: new_slab(kmem_cache_node) failed\n"); - pr_err("SLUB: size=%u align=%u allocflags=0x%x flags=0x%x\n", - kmem_cache_node->size, kmem_cache_node->align, - kmem_cache_node->allocflags, kmem_cache_node->flags); - pr_err("SLUB: oo=(order=%u objs=%u) min=(order=%u objs=%u)\n", - oo_order(oo), oo_objects(oo), oo_order(min), - oo_objects(min)); - pr_err("SLUB: nr_free_pages=%lu totalram_pages=%lu\n", - nr_free_pages(), totalram_pages()); + /* + * Linx bring-up still hits an early bootstrap gap where + * kmem_cache_node cannot yet allocate its own backing slab. + * Seed the per-node metadata directly so later slab users can + * continue and expose the next real owner below SLUB bootstrap. + */ + n = memblock_alloc_or_panic(sizeof(*n), SMP_CACHE_BYTES); + kmem_cache_node->node[node] = n; + init_kmem_cache_node(n, NULL); + return; } #endif @@ -8105,7 +8115,7 @@ static void early_kmem_cache_node_alloc(int node) } n = slab->freelist; -#ifdef CONFIG_LINX +#if defined(__LINX__) || defined(CONFIG_LINX) if (!n) { struct page *page = slab_page(slab); @@ -8178,7 +8188,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) struct kmem_cache_node *n; struct node_barn *barn = NULL; - if (slab_state == DOWN) { + if (slab_state == DOWN +#if defined(__LINX__) || defined(CONFIG_LINX) + || slab_state <= UP +#endif + ) { early_kmem_cache_node_alloc(node); continue; } @@ -8826,11 +8840,33 @@ static int slab_memory_callback(struct notifier_block *self, static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) { int node; - struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + struct kmem_cache *s; struct kmem_cache_node *n; +#if defined(__LINX__) || defined(CONFIG_LINX) + if (slab_state == PARTIAL) { + s = memblock_alloc_or_panic(sizeof(*s), SMP_CACHE_BYTES); + memset(s, 0, sizeof(*s)); + } else +#endif + { + s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + } + memcpy(s, static_cache, kmem_cache->object_size); +#if defined(__LINX__) || defined(CONFIG_LINX) + /* + * Linx bring-up still copies these two bootstrap caches before any + * meaningful per-cpu slab or per-node partial activity should exist. + * Avoid walking copied cpu-slab / partial-list state here; on the + * current image that path falls into a later trap before the next real + * boot boundary is exposed. + */ + list_add(&s->list, &slab_caches); + return s; +#endif + /* * This runs very early, and only the boot processor is supposed to be * up. Even if it weren't true, IRQs are not up so we couldn't fire @@ -8961,8 +8997,13 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, s->usersize = args->usersize; #endif - if (!calculate_sizes(args, s)) + if (!calculate_sizes(args, s)) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) calculate_sizes failed object=%u flags=%#lx slab_state=%d\n", + name, size, (unsigned long)flags, slab_state); +#endif goto out; + } linx_slub_mark("DC1"); if (disable_higher_order_debug) { /* @@ -8972,8 +9013,14 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, if (get_order(s->size) > get_order(s->object_size)) { s->flags &= ~DEBUG_METADATA_FLAGS; s->offset = 0; - if (!calculate_sizes(args, s)) + if (!calculate_sizes(args, s)) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) calculate_sizes retry failed size=%u object=%u flags=%#lx slab_state=%d\n", + name, s->size, s->object_size, + (unsigned long)flags, slab_state); +#endif goto out; + } } } @@ -9010,22 +9057,42 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, /* Initialize the pre-computed randomized freelist if slab is up */ if (slab_state >= UP) { - if (init_cache_random_seq(s)) + if (init_cache_random_seq(s)) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) init_cache_random_seq failed slab_state=%d\n", + name, slab_state); +#endif goto out; + } } - if (!init_kmem_cache_nodes(s)) + if (!init_kmem_cache_nodes(s)) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) init_kmem_cache_nodes failed slab_state=%d cpu_sheaves=%px\n", + name, slab_state, s->cpu_sheaves); +#endif goto out; + } linx_slub_mark("DC2"); - if (!alloc_kmem_cache_cpus(s)) + if (!alloc_kmem_cache_cpus(s)) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) alloc_kmem_cache_cpus failed slab_state=%d size=%u\n", + name, slab_state, s->size); +#endif goto out; + } linx_slub_mark("DC3"); if (s->cpu_sheaves) { err = init_percpu_sheaves(s); - if (err) + if (err) { +#if defined(__LINX__) || defined(CONFIG_LINX) + pr_emerg("LinxISA: do_kmem_cache_create(%s) init_percpu_sheaves failed err=%d slab_state=%d\n", + name, err, slab_state); +#endif goto out; + } } err = 0; diff --git a/mm/vma_init.c b/mm/vma_init.c index e22be0c4a283ce..c366279ed348b6 100644 --- a/mm/vma_init.c +++ b/mm/vma_init.c @@ -8,12 +8,10 @@ #include "vma_internal.h" #include "vma.h" -#ifdef CONFIG_LINX +#if defined(__LINX__) static __always_inline void linx_vma_mark(char c) { - *(volatile unsigned char *)0x10000000UL = (unsigned char)'~'; - *(volatile unsigned char *)0x10000000UL = (unsigned char)c; - barrier(); + (void)c; } #else static __always_inline void linx_vma_mark(char c) diff --git a/tools/linxisa/busybox_rootfs/boot.py b/tools/linxisa/busybox_rootfs/boot.py index 9b1eb95bcff900..eb9a040431649a 100755 --- a/tools/linxisa/busybox_rootfs/boot.py +++ b/tools/linxisa/busybox_rootfs/boot.py @@ -146,7 +146,11 @@ def main() -> int: o_dir = pathlib.Path(os.environ.get("O", str(linux_root / "build-linx-fixed"))) qemu_default_candidates = [ + pathlib.Path("/tmp/linx-qemu-clean-build/qemu-system-linx64"), super_root / "emulator" / "qemu" / "build" / "qemu-system-linx64", + super_root / "emulator" / "qemu" / "build-tci" / "qemu-system-linx64", + pathlib.Path.home() / "qemu" / "build" / "qemu-system-linx64", + pathlib.Path.home() / "qemu" / "build-tci" / "qemu-system-linx64", ] qemu_default = next((p for p in qemu_default_candidates if p.exists()), qemu_default_candidates[0]) qemu = pathlib.Path(os.environ.get("QEMU", str(qemu_default))) diff --git a/tools/linxisa/initramfs/busybox.c b/tools/linxisa/initramfs/busybox.c index 586b6406b2ecfb..10dc23caa18d6e 100644 --- a/tools/linxisa/initramfs/busybox.c +++ b/tools/linxisa/initramfs/busybox.c @@ -73,12 +73,6 @@ enum { NUM_PTRACE_REG = 25, }; -enum { - LINX_UART_BASE = 0x10000000UL, - LINX_UART_STATUS = LINX_UART_BASE + 0x4, - LINX_UART_STATUS_RX_READY = 0x2, -}; - enum { SSR_TIME = 0x0010, SSR_USER_SCRATCH0 = 0x0030, @@ -359,7 +353,7 @@ static void write_all(const void *buf, ulong count) static void write_ch(char c) { - *(volatile unsigned char *)LINX_UART_BASE = (unsigned char)c; + (void)sys_write(1, &c, 1); } static void write_nl(void) @@ -367,16 +361,6 @@ static void write_nl(void) write_ch('\n'); } -static int uart_mmio_read_ch(unsigned char *out) -{ - unsigned int st = *(volatile unsigned int *)LINX_UART_STATUS; - - if (!(st & LINX_UART_STATUS_RX_READY)) - return 0; - *out = *(volatile unsigned char *)LINX_UART_BASE; - return 1; -} - static void write_uhex(ulong v) { int i; @@ -1466,6 +1450,7 @@ static int applet_sigsegv_test(int argc, char **argv) * SIGTRAP, the kernel can pollute EBARG(TQ/UQ/LB/LC/BPC/TPC), then restore. */ __asm__( + ".pushsection .text.ri_step,\"ax\"\n" ".p2align 3\n" ".globl __linx_ctx_ri_step_body\n" "__linx_ctx_ri_step_body:\n" @@ -1477,7 +1462,8 @@ __asm__( " ebreak 0\n" " v.sw.brg.local vt#1.sw, [ri0.sd, lc0<<2, ri1.sd]\n" " ebreak 0\n" - " C.BSTOP\n"); + " C.BSTOP\n" + ".popsection\n"); extern void linx_ctx_launch_ri_step_block_round(ulong out_base, ulong out_stride, ulong filler2, ulong filler3, @@ -1486,6 +1472,7 @@ extern void linx_ctx_launch_ri_step_block_round(ulong out_base, ulong out_stride ulong expect_ri7); __asm__( + ".pushsection .text.ri_step,\"ax\"\n" ".p2align 2\n" ".globl linx_ctx_launch_ri_step_block_round\n" "linx_ctx_launch_ri_step_block_round:\n" @@ -1502,7 +1489,8 @@ __asm__( "linx_ctx_launch_ri_step_block_round_ret:\n" " C.BSTART.STD RET\n" " c.setc.tgt ra\n" - " C.BSTOP\n"); + " C.BSTOP\n" + ".popsection\n"); static inline ulong user_scratch0_get(void) { @@ -1659,13 +1647,6 @@ static void shell_loop(void) { char line[256]; - /* - * Keep a theoretical return path so codegen does not collapse callers into - * noreturn tail-call form (which violates strict CALL/SETRET adjacency). - */ - if (*(volatile unsigned int *)LINX_UART_STATUS == 0xffffffffu) - return; - for (;;) { ulong len = 0; @@ -1685,15 +1666,6 @@ static void shell_loop(void) ch = 0; n = sys_read(fd, &ch, 1); - if (n < 0) { - /* - * Early bring-up fallback: when stdin wiring is - * incomplete, consume host input directly from - * the virt UART RX queue. - */ - if (uart_mmio_read_ch(&ch)) - n = 1; - } if (n <= 0) continue; if (ch == '\r') diff --git a/tools/linxisa/initramfs/ctx_ri_step_trap_smoke.py b/tools/linxisa/initramfs/ctx_ri_step_trap_smoke.py index bc6aa2250aa020..3c74f5aaf9bcca 100644 --- a/tools/linxisa/initramfs/ctx_ri_step_trap_smoke.py +++ b/tools/linxisa/initramfs/ctx_ri_step_trap_smoke.py @@ -77,6 +77,8 @@ def main() -> int: "-append", append, ] + if "-bios" not in cmd: + cmd += ["-bios", "none"] proc = subprocess.Popen( cmd, diff --git a/tools/linxisa/initramfs/ctx_tq_irq_smoke.py b/tools/linxisa/initramfs/ctx_tq_irq_smoke.py index b1aeea74b7f4de..2b5a7d31f880bf 100644 --- a/tools/linxisa/initramfs/ctx_tq_irq_smoke.py +++ b/tools/linxisa/initramfs/ctx_tq_irq_smoke.py @@ -77,6 +77,8 @@ def main() -> int: "-append", append, ] + if "-bios" not in cmd: + cmd += ["-bios", "none"] proc = subprocess.Popen( cmd, diff --git a/tools/linxisa/initramfs/virtio_disk_smoke.py b/tools/linxisa/initramfs/virtio_disk_smoke.py index b11af8a2acbff3..c3d0b68ed1fa21 100644 --- a/tools/linxisa/initramfs/virtio_disk_smoke.py +++ b/tools/linxisa/initramfs/virtio_disk_smoke.py @@ -2,6 +2,7 @@ import os import pathlib import select +import shlex import subprocess import sys import time @@ -88,6 +89,7 @@ def main() -> int: disable_timer_irq = os.environ.get("LINX_DISABLE_TIMER_IRQ", "").lower() in {"1", "true", "yes"} if disable_timer_irq and "linx_disable_timer_irq=" not in append: append = f"{append} linx_disable_timer_irq=1".strip() + qemu_extra_args = shlex.split(os.environ.get("QEMU_EXTRA_ARGS", "")) timeout_s = int(os.environ.get("TIMEOUT", "90")) prompt_settle_s = float(os.environ.get("PROMPT_SETTLE", "2.0")) disk_mb = int(os.environ.get("DISK_MB", "64")) @@ -133,6 +135,9 @@ def main() -> int: "-append", append, ] + if "-bios" not in cmd and not any(arg.startswith("-bios=") for arg in qemu_extra_args): + cmd += ["-bios", "none"] + cmd.extend(qemu_extra_args) proc = subprocess.Popen( cmd,