From 5b10966a8d8e210d59db36424df40e5de9b50bda Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Fri, 16 Sep 2022 19:21:40 +0300 Subject: [PATCH 01/20] Initial implementation for StackAllocator --- .../src/nodes/kernels/stack_allocator.hpp | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp new file mode 100644 index 00000000000000..3195441ef9c650 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -0,0 +1,283 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace ov { +namespace intel_cpu { + +namespace x64 = dnnl::impl::cpu::x64; + +class StackAllocator final { +public: + using Ptr = std::shared_ptr; + + class Address; + + StackAllocator(x64::jit_generator& code_gen) + : StackAllocator{code_gen, code_gen.rbp} { + } + + StackAllocator(x64::jit_generator& code_gen, + const Xbyak::Reg& bp) + : code_generator{code_gen} + , base_pointer{bp} { + checkUnique(true); + code_gen.mov(base_pointer, code_gen.rsp); + } + + ~StackAllocator() { + release(); + checkUnique(false); + } + + void release() { + current_offset = {}; + commit(); + } + + void commit() { + if (current_offset > offset) { + code_generator.sub(code_generator.rsp, current_offset - offset); + offset = current_offset; + } else if (offset > current_offset) { + code_generator.add(code_generator.rsp, offset - current_offset); + offset = current_offset; + } + } + + friend void stack_mov(Address& addr, const Xbyak::Xmm& vmm); + friend void stack_mov(Address& addr, const Xbyak::Reg& reg); + friend void stack_mov(const Xbyak::Xmm& vmm, const Address& addr); + friend void stack_mov(const Xbyak::Reg& reg, const Address& addr); + +private: + struct Allocation { + using Ptr = std::shared_ptr; + + Allocation(const Xbyak::Address& address, + const size_t offset, + const size_t size) + : address(address) + , offset(offset) + , size(size) {} + + bool is_used = true; + Xbyak::Address address; + size_t offset{}; + size_t size{}; + }; + + Allocation::Ptr allocate(const size_t alloc_size) { + std::vector free_allocations{}; + for (const auto& alloc : allocations) { + if (!alloc->is_used && alloc_size <= alloc->size) { + free_allocations.push_back(alloc); + } + } + if (!free_allocations.empty()) { + std::sort(free_allocations.begin(), free_allocations.end(), + [](const Allocation::Ptr& alloc0, const Allocation::Ptr& alloc1) { + return alloc0->size < alloc1->size; + }); + const auto alloc = free_allocations.front(); + alloc->is_used = true; + return alloc; + } else { + current_offset += alloc_size; + Xbyak::Address addr = code_generator.ptr[base_pointer - current_offset]; + const auto alloc = std::make_shared(addr, current_offset, alloc_size); + allocations.push_back(alloc); + return alloc; + } + } + + void deallocate() { + while (!allocations.empty()) { + const auto& last = allocations.back(); + if (last->is_used) { + break; + } + current_offset -= last->size; + allocations.pop_back(); + } + } + + void checkUnique(bool isCtor) { + static thread_local bool isCreated = false; + if (isCtor) { + if (isCreated) { + IE_THROW() << "There should be only one instance of StackAllocator per thread !!"; + } + isCreated = true; + } else { + isCreated = false; + } + } + + x64::jit_generator& code_generator; + const Xbyak::Reg& base_pointer; + + size_t offset{}; + size_t current_offset{}; + std::vector allocations{}; +}; + +class StackAllocator::Address final { +public: + Address() = default; + + Address(StackAllocator::Ptr stack_allocator, + const size_t alloc_size) + : stack_allocator_{stack_allocator} + , allocation_{stack_allocator_->allocate(alloc_size)} { + } + + ~Address() { + release(); + } + + Address(Address&& addr) noexcept { + this->operator=(std::move(addr)); + } + + Address& operator=(Address&& addr) noexcept { + release(); + stack_allocator_ = std::move(addr.stack_allocator_); + allocation_ = std::move(addr.allocation_); + return *this; + } + + void release() { + if (allocation_) { + allocation_->is_used = false; + } + if (stack_allocator_) { + stack_allocator_->deallocate(); + } + allocation_ = {}; + stack_allocator_ = {}; + } + + operator Xbyak::Address&() { + ensureValid(); + stack_allocator_->commit(); + return allocation_->address; + } + + operator const Xbyak::Address&() const { + ensureValid(); + stack_allocator_->commit(); + return allocation_->address; + } + + Address& operator=(const Xbyak::Xmm& vmm) { + stack_mov(*this, vmm); + return *this; + } + + Address& operator=(const Xbyak::Reg& reg) { + stack_mov(*this, reg); + return *this; + } + + friend void ::ov::intel_cpu::stack_mov(Address& addr, const Xbyak::Xmm& vmm); + friend void ::ov::intel_cpu::stack_mov(Address& addr, const Xbyak::Reg& reg); + friend void ::ov::intel_cpu::stack_mov(const Xbyak::Xmm& vmm, const Address& addr); + friend void ::ov::intel_cpu::stack_mov(const Xbyak::Reg& reg, const Address& addr); + +private: + void ensureSize(const Xbyak::Reg& reg) const { + ensureValid(); + const size_t reg_size = reg.getBit() / 8; + if (reg_size > allocation_->size) { + IE_THROW() << "reg size is bigger than space allocated in StackAllocator !!"; + } + } + + void ensureValid() const { + if (!stack_allocator_ || !allocation_) { + IE_THROW() << "StackAllocator::Address is either not initialized or released !!"; + } + } + + x64::jit_generator& generator() const { + return stack_allocator_->code_generator; + } + + StackAllocator::Ptr stack_allocator_; + Allocation::Ptr allocation_; +}; + +inline +void stack_mov(StackAllocator::Address& addr, const Xbyak::Xmm& vmm) { + addr.ensureSize(vmm); + x64::jit_generator& generator = addr.generator(); + if (vmm.isXMM()) { + generator.uni_vmovdqu(addr.allocation_->address, Xbyak::Xmm{vmm.getIdx()}); + } else if (vmm.isYMM()) { + generator.uni_vmovdqu(addr.allocation_->address, Xbyak::Ymm{vmm.getIdx()}); + } else if (vmm.isZMM()) { + generator.uni_vmovdqu(addr.allocation_->address, Xbyak::Zmm{vmm.getIdx()}); + } else { + IE_THROW() << "Unknown simd register !!"; + } +} + +inline +void stack_mov(StackAllocator::Address& addr, const Xbyak::Reg& reg) { + addr.ensureSize(reg); + x64::jit_generator& generator = addr.generator(); + if (reg.isREG(8)) { + generator.mov(addr.allocation_->address, Xbyak::Reg8{reg.getIdx()}); + } else if (reg.isREG(16)) { + generator.mov(addr.allocation_->address, Xbyak::Reg16{reg.getIdx()}); + } else if (reg.isREG(32)) { + generator.mov(addr.allocation_->address, Xbyak::Reg32{reg.getIdx()}); + } else if (reg.isREG(64)) { + generator.mov(addr.allocation_->address, Xbyak::Reg64{reg.getIdx()}); + } else { + IE_THROW() << "Unknown general purpose register !!"; + } +} + +inline +void stack_mov(const Xbyak::Xmm& vmm, const StackAllocator::Address& addr) { + addr.ensureSize(vmm); + x64::jit_generator& generator = addr.generator(); + if (vmm.isXMM()) { + generator.uni_vmovdqu(Xbyak::Xmm{vmm.getIdx()}, addr.allocation_->address); + } else if (vmm.isYMM()) { + generator.uni_vmovdqu(Xbyak::Ymm{vmm.getIdx()}, addr.allocation_->address); + } else if (vmm.isZMM()) { + generator.uni_vmovdqu(Xbyak::Zmm{vmm.getIdx()}, addr.allocation_->address); + } else { + IE_THROW() << "Unknown simd register !!"; + } +} + +inline +void stack_mov(const Xbyak::Reg& reg, const StackAllocator::Address& addr) { + addr.ensureSize(reg); + x64::jit_generator& generator = addr.generator(); + if (reg.isREG(8)) { + generator.mov(Xbyak::Reg8{reg.getIdx()}, addr.allocation_->address); + } else if (reg.isREG(16)) { + generator.mov(Xbyak::Reg16{reg.getIdx()}, addr.allocation_->address); + } else if (reg.isREG(32)) { + generator.mov(Xbyak::Reg32{reg.getIdx()}, addr.allocation_->address); + } else if (reg.isREG(64)) { + generator.mov(Xbyak::Reg64{reg.getIdx()}, addr.allocation_->address); + } else { + IE_THROW() << "Unknown general purpose register !!"; + } +} + +} // namespace intel_cpu +} // namespace ov From f5d563c268395bd274209ef448b34536117e6560 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sat, 17 Sep 2022 14:57:09 +0300 Subject: [PATCH 02/20] Created StackAllocatorTest --- .../unit/nodes/kernels/stack_allocator.cpp | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp new file mode 100644 index 00000000000000..389d5ca88751d8 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -0,0 +1,171 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +using namespace ov::intel_cpu; +using namespace InferenceEngine; + +class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { +protected: + DECLARE_CPU_JIT_AUX_FUNCTIONS(StackAllocatorTest) + + void SetUp() override { + } + + void TearDown() override { + } + + template + F create_kernel() { + const status_t code = jit_generator::create_kernel(); + if (code != dnnl::impl::status::success) { + IE_THROW() << "Could not create kernel. Error code: " << std::to_string(code) << ". " + << "Xbyak error code: " << Xbyak::ConvertErrorToString(Xbyak::GetError()); + } + return reinterpret_cast(jit_ker()); + } + + void generate() override { + this->preamble(); + stack_allocator_ = std::make_shared(*this); + kernel_(); + stack_allocator_->commit(); + stack_allocator_.reset(); + this->postamble(); + } + + std::function kernel_; + std::shared_ptr stack_allocator_; +}; + +TEST_F(StackAllocatorTest, ValueEqual) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rax, 1); + cmp(rbx.cvt32(), reg_100_addr); + je(l_equal); + mov(rax, 0); + L(l_equal); + }; + auto f = create_kernel(); + int r = f(); + EXPECT_EQ(r, 1); +} + +TEST_F(StackAllocatorTest, ValueNotEqual) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rcx.cvt32(), reg_100_addr); + mov(rbx.cvt32(), 201); + mov(rax, 1); + cmp(rbx.cvt32(), rcx.cvt32()); + je(l_equal); + mov(rax, 0); + L(l_equal); + }; + auto f = create_kernel(); + int r = f(); + EXPECT_EQ(r, 0); +} + +TEST_F(StackAllocatorTest, AddressCheck) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int32_t)}; + EXPECT_EQ(static_cast(reg_0_5_addr), ptr[rbp - sizeof(int32_t)]); + }; + create_kernel(); +} + +TEST_F(StackAllocatorTest, LoopSuccess) { + kernel_ = [this]() { + Xbyak::Label l_equal; + Xbyak::Label l_loop; + Xbyak::Label l_end; + xor_(rcx, rcx); + StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + stack_allocator_->commit(); + L(l_loop); + { + cmp(rcx, 10); + je(l_end); + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rdx.cvt32(), reg_100_addr); + mov(rbx.cvt32(), 201); + mov(rax, 1); + add(rcx, 1); + cmp(rbx.cvt32(), rdx.cvt32()); + jne(l_equal); + { + mov(rax, 0); + jmp(l_loop); + } + L(l_equal); + { + jmp(l_loop); + } + } + L(l_end); + reg_100_addr.release(); + }; + auto f = create_kernel(); + const int r = f(); + EXPECT_EQ(r, 1); +} + +TEST_F(StackAllocatorTest, LoopFailed) { + kernel_ = [this]() { + Xbyak::Label l_equal; + Xbyak::Label l_loop; + Xbyak::Label l_end; + xor_(rcx, rcx); + + StackAllocator::Address reg_temp0_addr{stack_allocator_, sizeof(float)}; + StackAllocator::Address reg_temp1_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address reg_200_addr{stack_allocator_, sizeof(int32_t)}; + stack_allocator_->commit(); + L(l_loop); + { + cmp(rcx, 10); + je(l_end); + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rbx.cvt32(), 200); + reg_200_addr.release(); + // NOTE: During implicit conversion to Xbyak::Address& will be thrown the exception + stack_mov(reg_200_addr, rbx.cvt32()); + mov(rdx.cvt32(), reg_100_addr); + mov(rbx.cvt32(), 201); + cmp(rbx.cvt32(), rdx.cvt32()); + mov(rax, 1); + add(rcx, 1); + jne(l_equal); + { + reg_temp1_addr.release(); + mov(rax, 0); + jmp(l_loop); + } + L(l_equal); + { + jmp(l_loop); + } + } + L(l_end); + }; + EXPECT_ANY_THROW(create_kernel()); +} From 6a6aab327dd9fc3ad184433aaa7fc15fa935fa11 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Thu, 22 Sep 2022 20:39:42 +0300 Subject: [PATCH 03/20] Added StackAllocator::Reg<> for simplifing saving registers on stack --- .../src/nodes/kernels/stack_allocator.hpp | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index 3195441ef9c650..83904a98b0e1de 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -19,6 +19,8 @@ class StackAllocator final { using Ptr = std::shared_ptr; class Address; + template + class Reg; StackAllocator(x64::jit_generator& code_gen) : StackAllocator{code_gen, code_gen.rbp} { @@ -129,17 +131,17 @@ class StackAllocator final { std::vector allocations{}; }; -class StackAllocator::Address final { +class StackAllocator::Address { public: Address() = default; Address(StackAllocator::Ptr stack_allocator, - const size_t alloc_size) + const size_t alloc_size) : stack_allocator_{stack_allocator} , allocation_{stack_allocator_->allocate(alloc_size)} { } - ~Address() { + virtual ~Address() { release(); } @@ -177,12 +179,12 @@ class StackAllocator::Address final { return allocation_->address; } - Address& operator=(const Xbyak::Xmm& vmm) { + virtual Address& operator=(const Xbyak::Xmm& vmm) { stack_mov(*this, vmm); return *this; } - Address& operator=(const Xbyak::Reg& reg) { + virtual Address& operator=(const Xbyak::Reg& reg) { stack_mov(*this, reg); return *this; } @@ -215,6 +217,31 @@ class StackAllocator::Address final { Allocation::Ptr allocation_; }; +template +class StackAllocator::Reg : public StackAllocator::Address { +public: + static_assert(std::is_base_of::value, "TReg should be a Xbyak::Reg based !!"); + + Reg() = default; + + Reg(StackAllocator::Ptr stack_allocator) + : Address{stack_allocator, TReg{}.getBit() / sizeof(uint8_t)} { + } + + Reg(Reg&& addr) noexcept = default; + Reg& operator=(Reg&& addr) noexcept = default; + + Reg& operator=(const Xbyak::Xmm& vmm) override { + Address::operator=(vmm); + return *this; + } + + Reg& operator=(const Xbyak::Reg& reg) override { + Address::operator=(reg); + return *this; + } +}; + inline void stack_mov(StackAllocator::Address& addr, const Xbyak::Xmm& vmm) { addr.ensureSize(vmm); From a6c148d44ad538ea79a010ed2bbeed9f8cb59ae3 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Thu, 22 Sep 2022 20:54:48 +0300 Subject: [PATCH 04/20] Added tests for new StackAllocator::Reg<> class --- .../unit/nodes/kernels/stack_allocator.cpp | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 389d5ca88751d8..b5777ab910ff90 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -45,7 +45,7 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { std::shared_ptr stack_allocator_; }; -TEST_F(StackAllocatorTest, ValueEqual) { +TEST_F(StackAllocatorTest, Address_ValueEqual) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; @@ -62,7 +62,24 @@ TEST_F(StackAllocatorTest, ValueEqual) { EXPECT_EQ(r, 1); } -TEST_F(StackAllocatorTest, ValueNotEqual) { +TEST_F(StackAllocatorTest, Reg32_ValueEqual) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Reg reg_100_addr{stack_allocator_}; + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rax, 1); + cmp(rbx.cvt32(), reg_100_addr); + je(l_equal); + mov(rax, 0); + L(l_equal); + }; + auto f = create_kernel(); + int r = f(); + EXPECT_EQ(r, 1); +} + +TEST_F(StackAllocatorTest, Address_ValueNotEqual) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; @@ -81,6 +98,25 @@ TEST_F(StackAllocatorTest, ValueNotEqual) { EXPECT_EQ(r, 0); } +TEST_F(StackAllocatorTest, Reg32_ValueNotEqual) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Reg reg_100_addr{stack_allocator_}; + mov(rbx.cvt32(), 100); + stack_mov(reg_100_addr, rbx.cvt32()); + mov(rcx.cvt32(), reg_100_addr); + mov(rbx.cvt32(), 201); + mov(rax, 1); + cmp(rbx.cvt32(), rcx.cvt32()); + je(l_equal); + mov(rax, 0); + L(l_equal); + }; + auto f = create_kernel(); + int r = f(); + EXPECT_EQ(r, 0); +} + TEST_F(StackAllocatorTest, AddressCheck) { kernel_ = [this]() { Xbyak::Label l_equal; From d4e4ce63dab3598a2535b4fcd1d9f9439423f47d Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Fri, 7 Oct 2022 15:01:55 +0300 Subject: [PATCH 05/20] Added alignment for StackAllocator --- .../src/nodes/kernels/stack_allocator.hpp | 119 +++++++++++++++--- 1 file changed, 103 insertions(+), 16 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index 83904a98b0e1de..864ca1b82e76a0 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -26,16 +26,28 @@ class StackAllocator final { : StackAllocator{code_gen, code_gen.rbp} { } + StackAllocator(x64::jit_generator& code_gen, const Xbyak::Reg& bp) + : StackAllocator{code_gen, bp, 1} { + } + + StackAllocator(x64::jit_generator& code_gen, const size_t alignment) + : StackAllocator{code_gen, code_gen.rbp, alignment} { + } + StackAllocator(x64::jit_generator& code_gen, - const Xbyak::Reg& bp) + const Xbyak::Reg& bp, + const size_t alignment) : code_generator{code_gen} - , base_pointer{bp} { + , base_pointer{Xbyak::Reg64{bp.getIdx()}} + , alignment{alignment} { checkUnique(true); - code_gen.mov(base_pointer, code_gen.rsp); + alignStack(true); + code_generator.mov(base_pointer, code_generator.rsp); } ~StackAllocator() { release(); + alignStack(false); checkUnique(false); } @@ -76,25 +88,76 @@ class StackAllocator final { size_t size{}; }; - Allocation::Ptr allocate(const size_t alloc_size) { + void alignStack(bool isCtor) { + if (1 != alignment) { + constexpr size_t kReg64Size = 0x08; + if (isCtor) { + Xbyak::Label l_stack_aligned; + const Xbyak::Reg64 reg_base_stack_offset{base_pointer.getIdx()}; + code_generator.mov(reg_base_stack_offset, static_cast(kReg64Size)); + + const Xbyak::Reg64 reg_base_addr{Xbyak::Operand::RAX}; + const Xbyak::Reg64 reg_reminder{Xbyak::Operand::RDX}; + const Xbyak::Reg64 reg_alignment{Xbyak::Operand::RCX}; + + code_generator.push(reg_base_addr); + code_generator.push(reg_reminder); + code_generator.push(reg_alignment); + + code_generator.xor_(reg_reminder, reg_reminder); + + code_generator.mov(reg_base_addr, code_generator.rsp); + code_generator.add(reg_base_addr, 3 * kReg64Size - kReg64Size); + code_generator.mov(reg_alignment, alignment); + code_generator.idiv(reg_alignment); + code_generator.cmp(reg_reminder, static_cast(0x00)); + code_generator.je(l_stack_aligned); + code_generator.add(reg_base_stack_offset, reg_reminder); + code_generator.L(l_stack_aligned); + + code_generator.pop(reg_alignment); + code_generator.pop(reg_reminder); + code_generator.pop(reg_base_addr); + + code_generator.sub(code_generator.rsp, reg_base_stack_offset); + code_generator.mov(code_generator.ptr[code_generator.rsp], reg_base_stack_offset); + } else { + code_generator.add(code_generator.rsp, code_generator.ptr[code_generator.rsp]); + } + } + } + + Allocation::Ptr allocate(const size_t alloc_size, const size_t requested_alignment) { + if (alignment % requested_alignment != 0) { + IE_THROW() << "Requested alignment should have 0 reminder of alignment % align !!"; + } + std::vector free_allocations{}; for (const auto& alloc : allocations) { - if (!alloc->is_used && alloc_size <= alloc->size) { + if (!alloc->is_used && + alloc_size <= alloc->size && + (alloc->offset % requested_alignment) == 0) { free_allocations.push_back(alloc); } } + std::sort(free_allocations.begin(), free_allocations.end(), + [](const Allocation::Ptr& alloc0, const Allocation::Ptr& alloc1) { + return alloc0->size < alloc1->size; + }); if (!free_allocations.empty()) { - std::sort(free_allocations.begin(), free_allocations.end(), - [](const Allocation::Ptr& alloc0, const Allocation::Ptr& alloc1) { - return alloc0->size < alloc1->size; - }); const auto alloc = free_allocations.front(); alloc->is_used = true; return alloc; } else { - current_offset += alloc_size; + size_t alloc_offset = 0; + if (requested_alignment > 1) { + alloc_offset = (requested_alignment - ((current_offset+alloc_size) % requested_alignment)); + } + + const size_t aligned_alloc_size = alloc_offset + alloc_size; + current_offset += aligned_alloc_size; Xbyak::Address addr = code_generator.ptr[base_pointer - current_offset]; - const auto alloc = std::make_shared(addr, current_offset, alloc_size); + const auto alloc = std::make_shared(addr, current_offset, aligned_alloc_size); allocations.push_back(alloc); return alloc; } @@ -124,21 +187,28 @@ class StackAllocator final { } x64::jit_generator& code_generator; - const Xbyak::Reg& base_pointer; + const Xbyak::Reg base_pointer; size_t offset{}; size_t current_offset{}; + size_t alignment{}; std::vector allocations{}; }; +void stack_mov(StackAllocator::Address& addr, const Xbyak::Xmm& vmm); +void stack_mov(StackAllocator::Address& addr, const Xbyak::Reg& reg); +void stack_mov(const Xbyak::Xmm& vmm, const StackAllocator::Address& addr); +void stack_mov(const Xbyak::Reg& reg, const StackAllocator::Address& addr); + class StackAllocator::Address { public: Address() = default; Address(StackAllocator::Ptr stack_allocator, - const size_t alloc_size) + const size_t alloc_size, + const size_t requested_alignment = 1) : stack_allocator_{stack_allocator} - , allocation_{stack_allocator_->allocate(alloc_size)} { + , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment)} { } virtual ~Address() { @@ -204,11 +274,15 @@ class StackAllocator::Address { } void ensureValid() const { - if (!stack_allocator_ || !allocation_) { + if (!isInitialized()) { IE_THROW() << "StackAllocator::Address is either not initialized or released !!"; } } + bool isInitialized() const { + return stack_allocator_ && allocation_; + } + x64::jit_generator& generator() const { return stack_allocator_->code_generator; } @@ -225,7 +299,7 @@ class StackAllocator::Reg : public StackAllocator::Address { Reg() = default; Reg(StackAllocator::Ptr stack_allocator) - : Address{stack_allocator, TReg{}.getBit() / sizeof(uint8_t)} { + : Address{stack_allocator, TReg{}.getBit() / 8, getAlignment()} { } Reg(Reg&& addr) noexcept = default; @@ -240,6 +314,19 @@ class StackAllocator::Reg : public StackAllocator::Address { Address::operator=(reg); return *this; } + +private: + static size_t getAlignment() { + if (std::is_same::value) { + return x64::cpu_isa_traits::vlen; + } else if (std::is_same::value) { + return x64::cpu_isa_traits::vlen; + } else if (std::is_same::value) { + return x64::cpu_isa_traits::vlen; + } else { + return 1; + } + } }; inline From 092b73956fa6642e7ff9fba4a8135f0f2654fe4c Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Fri, 7 Oct 2022 15:16:14 +0300 Subject: [PATCH 06/20] Added saving data from stack to register --- .../intel_cpu/src/nodes/kernels/registers_pool.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp b/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp index ed12fa71182bfb..cfa930dfbd613f 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp @@ -9,6 +9,7 @@ #include "ie_common.h" #include "utils/cpu_utils.hpp" #include +#include "stack_allocator.hpp" namespace ov { namespace intel_cpu { @@ -49,16 +50,22 @@ class RegistersPool { Reg(const RegistersPool::Ptr& regPool) { initialize(regPool); } Reg(const RegistersPool::Ptr& regPool, int requestedIdx) { initialize(regPool, requestedIdx); } ~Reg() { release(); } + Reg(Reg&& other) noexcept { + this->operator=(std::move(other)); + } Reg& operator=(Reg&& other) noexcept { release(); reg = other.reg; regPool = std::move(other.regPool); return *this; } - Reg(Reg&& other) noexcept : reg(other.reg), regPool(std::move(other.regPool)) {} operator TReg&() { ensureValid(); return reg; } operator const TReg&() const { ensureValid(); return reg; } operator Xbyak::RegExp() const { ensureValid(); return reg; } + Reg& operator=(const StackAllocator::Address& addr) { + stack_mov(*this, addr); + return *this; + } int getIdx() const { ensureValid(); return reg.getIdx(); } friend Xbyak::RegExp operator+(const Reg& lhs, const Xbyak::RegExp& rhs) { lhs.ensureValid(); From 3b1267868f2873a4c841fd1cd89290e0e01c99f3 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sat, 8 Oct 2022 13:56:02 +0300 Subject: [PATCH 07/20] Update tests for StackAllocator --- .../unit/nodes/kernels/stack_allocator.cpp | 88 ++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index b5777ab910ff90..5e45d58915d506 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -2,15 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include +#include #include #include #include using namespace ov::intel_cpu; -using namespace InferenceEngine; + +constexpr int x64::cpu_isa_traits::vlen; +constexpr int x64::cpu_isa_traits::vlen; +constexpr int x64::cpu_isa_traits::vlen; class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { protected: @@ -45,6 +48,32 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { std::shared_ptr stack_allocator_; }; +class AlignedStackAllocatorTest : public StackAllocatorTest { +public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(AlignedStackAllocatorTest) + + void SetUp() override { + } + + void TearDown() override { + } + + void generate() override { + this->preamble(); +// if (x64::mayiuse(x64::avx512_core)) { +// stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); +// } else if (x64::mayiuse(x64::avx2)) { +// stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); +// } else { + stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); +// } + kernel_(); + stack_allocator_->commit(); + stack_allocator_.reset(); + this->postamble(); + } +}; + TEST_F(StackAllocatorTest, Address_ValueEqual) { kernel_ = [this]() { Xbyak::Label l_equal; @@ -205,3 +234,58 @@ TEST_F(StackAllocatorTest, LoopFailed) { }; EXPECT_ANY_THROW(create_kernel()); } + +TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; + StackAllocator::Address reg_0_11_addr{stack_allocator_, 16}; + stack_allocator_->commit(); + addps(xmm0, reg_0_11_addr); + }; + auto f = create_kernel(); + ASSERT_DEATH({ + f(); + }, ""); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; + StackAllocator::Address reg_0_11_addr{stack_allocator_, 16, 16}; + stack_allocator_->commit(); + addps(xmm0, reg_0_11_addr); + }; + auto f = create_kernel(); + f(); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{stack_allocator_}; + stack_allocator_->commit(); + addps(xmm0, reg_0_11_addr); + }; + auto f = create_kernel(); + f(); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{stack_allocator_}; + StackAllocator::Address reg_0_7_addr{stack_allocator_, sizeof(int8_t)}; + stack_allocator_->commit(); + addps(xmm0, reg_0_11_addr); + reg_0_11_addr.release(); + StackAllocator::Reg reg_0_12_addr{stack_allocator_}; + stack_allocator_->commit(); + addps(xmm0, reg_0_12_addr); + }; + auto f = create_kernel(); + f(); +} From 937de8602d25a0e63367a298147860fe46bab23f Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 13:00:14 +0300 Subject: [PATCH 08/20] Added Transaction class --- .../src/nodes/kernels/stack_allocator.hpp | 127 ++++++++++++++---- 1 file changed, 100 insertions(+), 27 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index 864ca1b82e76a0..e29d9847b6504c 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -18,6 +18,7 @@ class StackAllocator final { public: using Ptr = std::shared_ptr; + class Transaction; class Address; template class Reg; @@ -56,16 +57,6 @@ class StackAllocator final { commit(); } - void commit() { - if (current_offset > offset) { - code_generator.sub(code_generator.rsp, current_offset - offset); - offset = current_offset; - } else if (offset > current_offset) { - code_generator.add(code_generator.rsp, offset - current_offset); - offset = current_offset; - } - } - friend void stack_mov(Address& addr, const Xbyak::Xmm& vmm); friend void stack_mov(Address& addr, const Xbyak::Reg& reg); friend void stack_mov(const Xbyak::Xmm& vmm, const Address& addr); @@ -83,6 +74,7 @@ class StackAllocator final { , size(size) {} bool is_used = true; + bool is_transaction = false; Xbyak::Address address; size_t offset{}; size_t size{}; @@ -127,7 +119,9 @@ class StackAllocator final { } } - Allocation::Ptr allocate(const size_t alloc_size, const size_t requested_alignment) { + Allocation::Ptr allocate(const size_t alloc_size, + const size_t requested_alignment, + const bool is_transaction = false) { if (alignment % requested_alignment != 0) { IE_THROW() << "Requested alignment should have 0 reminder of alignment % align !!"; } @@ -147,6 +141,7 @@ class StackAllocator final { if (!free_allocations.empty()) { const auto alloc = free_allocations.front(); alloc->is_used = true; + alloc->is_transaction = is_transaction; return alloc; } else { size_t alloc_offset = 0; @@ -158,6 +153,7 @@ class StackAllocator final { current_offset += aligned_alloc_size; Xbyak::Address addr = code_generator.ptr[base_pointer - current_offset]; const auto alloc = std::make_shared(addr, current_offset, aligned_alloc_size); + alloc->is_transaction = is_transaction; allocations.push_back(alloc); return alloc; } @@ -186,9 +182,24 @@ class StackAllocator final { } } + void commit() { + if (current_offset > offset) { + code_generator.sub(code_generator.rsp, current_offset - offset); + offset = current_offset; + } else if (offset > current_offset) { + code_generator.add(code_generator.rsp, offset - current_offset); + offset = current_offset; + } + for (auto& alloc : allocations) { + alloc->is_transaction = false; + } + is_transaction_= false; + } + x64::jit_generator& code_generator; const Xbyak::Reg base_pointer; + bool is_transaction_{}; size_t offset{}; size_t current_offset{}; size_t alignment{}; @@ -200,52 +211,109 @@ void stack_mov(StackAllocator::Address& addr, const Xbyak::Reg& reg); void stack_mov(const Xbyak::Xmm& vmm, const StackAllocator::Address& addr); void stack_mov(const Xbyak::Reg& reg, const StackAllocator::Address& addr); +class StackAllocator::Transaction { +public: + friend class StackAllocator::Address; + + Transaction(StackAllocator::Ptr stack_allocator) + : stack_allocator_{stack_allocator} { + checkUnique(true); + } + + ~Transaction() { + checkUnique(false); + commit(); + } + + void begin() { + stack_allocator_->is_transaction_ = true; + } + + void commit() { + stack_allocator_->is_transaction_ = false; + stack_allocator_->commit(); + } + +private: + void checkUnique(bool isCtor) { + static thread_local bool isCreated = false; + if (isCtor) { + if (isCreated) { + IE_THROW() << "There should be only one instance of Transaction per thread !!"; + } + isCreated = true; + } else { + isCreated = false; + } + } + + StackAllocator::Ptr stack_allocator_; +}; + class StackAllocator::Address { public: Address() = default; + Address(Transaction& transaction, + const size_t alloc_size, + const size_t requested_alignment = 1) + : transaction_{&transaction} + , stack_allocator_{transaction.stack_allocator_} + , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment, true)} { + transaction.begin(); + } + Address(StackAllocator::Ptr stack_allocator, const size_t alloc_size, const size_t requested_alignment = 1) : stack_allocator_{stack_allocator} , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment)} { + if (stack_allocator_->is_transaction_) { + IE_THROW() << "Cannot allocate Address out of transaction. Please, finish first transaction !!"; + } + stack_allocator_->commit(); } virtual ~Address() { - release(); + if (transaction_) { + release(*transaction_); + } else { + release(); + stack_allocator_->commit(); + } } - Address(Address&& addr) noexcept { - this->operator=(std::move(addr)); - } + Address(Address&& addr) noexcept = delete; + Address& operator=(Address&& addr) noexcept = delete; - Address& operator=(Address&& addr) noexcept { - release(); - stack_allocator_ = std::move(addr.stack_allocator_); - allocation_ = std::move(addr.allocation_); - return *this; + void release(Transaction& transaction) { + if (allocation_ && stack_allocator_) { + transaction.begin(); + allocation_->is_used = false; + stack_allocator_->deallocate(); + } + allocation_ = {}; } void release() { - if (allocation_) { + if (allocation_ && stack_allocator_) { + if (stack_allocator_->is_transaction_) { + IE_THROW() << "Cannot release Address out of transaction. Please, finish first transaction !!"; + } allocation_->is_used = false; - } - if (stack_allocator_) { stack_allocator_->deallocate(); + stack_allocator_->commit(); } allocation_ = {}; - stack_allocator_ = {}; } operator Xbyak::Address&() { ensureValid(); - stack_allocator_->commit(); return allocation_->address; } operator const Xbyak::Address&() const { ensureValid(); - stack_allocator_->commit(); return allocation_->address; } @@ -280,13 +348,14 @@ class StackAllocator::Address { } bool isInitialized() const { - return stack_allocator_ && allocation_; + return stack_allocator_ && allocation_ && !allocation_->is_transaction; } x64::jit_generator& generator() const { return stack_allocator_->code_generator; } + Transaction* transaction_{}; StackAllocator::Ptr stack_allocator_; Allocation::Ptr allocation_; }; @@ -298,6 +367,10 @@ class StackAllocator::Reg : public StackAllocator::Address { Reg() = default; + Reg(StackAllocator::Transaction& transaction) + : Address{transaction, TReg{}.getBit() / 8, getAlignment()} { + } + Reg(StackAllocator::Ptr stack_allocator) : Address{stack_allocator, TReg{}.getBit() / 8, getAlignment()} { } From a0ec185487c6f475d6b3b343cf14f87fd45ebc08 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 13:04:55 +0300 Subject: [PATCH 09/20] Updated stack_allocator test for Transaction --- .../unit/nodes/kernels/stack_allocator.cpp | 122 +++++++++++++++--- 1 file changed, 101 insertions(+), 21 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 5e45d58915d506..52aab49dc297e7 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -39,7 +39,7 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { this->preamble(); stack_allocator_ = std::make_shared(*this); kernel_(); - stack_allocator_->commit(); + stack_allocator_->release(); stack_allocator_.reset(); this->postamble(); } @@ -68,7 +68,7 @@ class AlignedStackAllocatorTest : public StackAllocatorTest { stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); // } kernel_(); - stack_allocator_->commit(); + stack_allocator_->release(); stack_allocator_.reset(); this->postamble(); } @@ -162,7 +162,6 @@ TEST_F(StackAllocatorTest, LoopSuccess) { Xbyak::Label l_end; xor_(rcx, rcx); StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; - stack_allocator_->commit(); L(l_loop); { cmp(rcx, 10); @@ -199,11 +198,12 @@ TEST_F(StackAllocatorTest, LoopFailed) { Xbyak::Label l_end; xor_(rcx, rcx); - StackAllocator::Address reg_temp0_addr{stack_allocator_, sizeof(float)}; - StackAllocator::Address reg_temp1_addr{stack_allocator_, sizeof(int32_t)}; - StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; - StackAllocator::Address reg_200_addr{stack_allocator_, sizeof(int32_t)}; - stack_allocator_->commit(); + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_temp0_addr{transaction, sizeof(float)}; + StackAllocator::Address reg_temp1_addr{transaction, sizeof(int32_t)}; + StackAllocator::Address reg_100_addr{transaction, sizeof(int32_t)}; + StackAllocator::Address reg_200_addr{transaction, sizeof(int32_t)}; + transaction.commit(); L(l_loop); { cmp(rcx, 10); @@ -238,9 +238,10 @@ TEST_F(StackAllocatorTest, LoopFailed) { TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; - StackAllocator::Address reg_0_11_addr{stack_allocator_, 16}; - stack_allocator_->commit(); + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address reg_0_11_addr{transaction, 16}; + transaction.commit(); addps(xmm0, reg_0_11_addr); }; auto f = create_kernel(); @@ -252,9 +253,10 @@ TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; - StackAllocator::Address reg_0_11_addr{stack_allocator_, 16, 16}; - stack_allocator_->commit(); + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address reg_0_11_addr{transaction, 16, 16}; + transaction.commit(); addps(xmm0, reg_0_11_addr); }; auto f = create_kernel(); @@ -264,9 +266,10 @@ TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { kernel_ = [this]() { Xbyak::Label l_equal; + StackAllocator::Transaction transaction{stack_allocator_}; StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; StackAllocator::Reg reg_0_11_addr{stack_allocator_}; - stack_allocator_->commit(); + transaction.commit(); addps(xmm0, reg_0_11_addr); }; auto f = create_kernel(); @@ -276,16 +279,93 @@ TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{stack_allocator_}; - StackAllocator::Address reg_0_7_addr{stack_allocator_, sizeof(int8_t)}; - stack_allocator_->commit(); + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + transaction.commit(); + addps(xmm0, reg_0_11_addr); + reg_0_11_addr.release(); + StackAllocator::Reg reg_0_12_addr{transaction}; + transaction.commit(); + addps(xmm0, reg_0_12_addr); + }; + auto f = create_kernel(); + f(); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_122_addr{stack_allocator_}; + transaction.commit(); + addps(xmm0, reg_0_11_addr); + reg_0_11_addr.release(); + StackAllocator::Reg reg_0_12_addr{transaction}; + transaction.commit(); + addps(xmm0, reg_0_12_addr); + }; + + EXPECT_ANY_THROW(create_kernel()); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; addps(xmm0, reg_0_11_addr); + transaction.commit(); reg_0_11_addr.release(); - StackAllocator::Reg reg_0_12_addr{stack_allocator_}; - stack_allocator_->commit(); + StackAllocator::Reg reg_0_12_addr{transaction}; + transaction.commit(); + addps(xmm0, reg_0_12_addr); + }; + + EXPECT_ANY_THROW(create_kernel()); +} + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + transaction.commit(); + addps(xmm0, reg_0_11_addr); + reg_0_11_addr.release(transaction); + reg_0_7_addr.release(transaction); + StackAllocator::Reg reg_0_12_addr{transaction}; + transaction.commit(); addps(xmm0, reg_0_12_addr); }; + auto f = create_kernel(); f(); } + +TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) { + kernel_ = [this]() { + Xbyak::Label l_equal; + StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + transaction.commit(); + addps(xmm0, reg_0_11_addr); + reg_0_11_addr.release(transaction); + reg_0_7_addr.release(); + StackAllocator::Reg reg_0_12_addr{transaction}; + transaction.commit(); + addps(xmm0, reg_0_12_addr); + }; + + EXPECT_ANY_THROW(create_kernel()); +} From 28c56ad3cc153a85d1fb0c65a8545be624dc26bf Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 17:25:58 +0300 Subject: [PATCH 10/20] Updated tests for StackAllocator for Transaction --- .../unit/nodes/kernels/stack_allocator.cpp | 195 +++++++++--------- 1 file changed, 103 insertions(+), 92 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 52aab49dc297e7..4c0d030d209296 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -19,6 +19,14 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { protected: DECLARE_CPU_JIT_AUX_FUNCTIONS(StackAllocatorTest) + StackAllocatorTest() + : StackAllocatorTest(x64::isa_all) { + } + + explicit StackAllocatorTest(x64::cpu_isa_t max_cpu_isa) + : x64::jit_generator(jit_name(), nullptr, 256 * 1024, true, max_cpu_isa) { + } + void SetUp() override { } @@ -48,32 +56,6 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { std::shared_ptr stack_allocator_; }; -class AlignedStackAllocatorTest : public StackAllocatorTest { -public: - DECLARE_CPU_JIT_AUX_FUNCTIONS(AlignedStackAllocatorTest) - - void SetUp() override { - } - - void TearDown() override { - } - - void generate() override { - this->preamble(); -// if (x64::mayiuse(x64::avx512_core)) { -// stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); -// } else if (x64::mayiuse(x64::avx2)) { -// stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); -// } else { - stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); -// } - kernel_(); - stack_allocator_->release(); - stack_allocator_.reset(); - this->postamble(); - } -}; - TEST_F(StackAllocatorTest, Address_ValueEqual) { kernel_ = [this]() { Xbyak::Label l_equal; @@ -149,8 +131,8 @@ TEST_F(StackAllocatorTest, Reg32_ValueNotEqual) { TEST_F(StackAllocatorTest, AddressCheck) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int32_t)}; - EXPECT_EQ(static_cast(reg_0_5_addr), ptr[rbp - sizeof(int32_t)]); + StackAllocator::Address dword_addr{stack_allocator_, sizeof(int32_t)}; + EXPECT_EQ(static_cast(dword_addr), ptr[rbp - sizeof(int32_t)]); }; create_kernel(); } @@ -239,10 +221,10 @@ TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Address reg_0_11_addr{transaction, 16}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address xmm0_addr{transaction, 16}; transaction.commit(); - addps(xmm0, reg_0_11_addr); + addps(xmm0, xmm0_addr); }; auto f = create_kernel(); ASSERT_DEATH({ @@ -250,122 +232,151 @@ TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { }, ""); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { - kernel_ = [this]() { +template +class AlignedStackAllocatorTest : public StackAllocatorTest { +public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(AlignedStackAllocatorTest) + + AlignedStackAllocatorTest() + : StackAllocatorTest(T::isa) { + } + + void SetUp() override { + } + + void TearDown() override { + } + + void generate() override { + this->preamble(); + stack_allocator_ = std::make_shared(*this, x64::cpu_isa_traits::vlen); + kernel_(); + stack_allocator_->release(); + stack_allocator_.reset(); + this->postamble(); + } +}; + +template +struct IsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; + +using dasdasd = ::testing::Types, IsaParam, IsaParam>; +TYPED_TEST_SUITE(AlignedStackAllocatorTest, dasdasd); + +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Address reg_0_11_addr{transaction, 16, 16}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address xmm0_addr{transaction, 16, 16}; transaction.commit(); - addps(xmm0, reg_0_11_addr); + this->addps(this->xmm0, xmm0_addr); }; - auto f = create_kernel(); + auto f = this->template create_kernel(); f(); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{stack_allocator_, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{stack_allocator_}; - transaction.commit(); - addps(xmm0, reg_0_11_addr); + StackAllocator::Address byte_addr{this->stack_allocator_, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{this->stack_allocator_}; + this->addps(this->xmm0, xmm0_addr); }; - auto f = create_kernel(); + auto f = this->template create_kernel(); f(); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; transaction.commit(); - addps(xmm0, reg_0_11_addr); - reg_0_11_addr.release(); + this->addps(this->xmm0, xmm0_addr); + xmm0_addr.release(); StackAllocator::Reg reg_0_12_addr{transaction}; transaction.commit(); - addps(xmm0, reg_0_12_addr); + this->addps(this->xmm0, reg_0_12_addr); }; - auto f = create_kernel(); + auto f = this->template create_kernel(); f(); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_122_addr{stack_allocator_}; + StackAllocator::Reg reg_0_122_addr{this->stack_allocator_}; transaction.commit(); - addps(xmm0, reg_0_11_addr); - reg_0_11_addr.release(); + this->addps(this->xmm0, xmm0_addr); + xmm0_addr.release(); StackAllocator::Reg reg_0_12_addr{transaction}; transaction.commit(); - addps(xmm0, reg_0_12_addr); + this->addps(this->xmm0, reg_0_12_addr); }; - EXPECT_ANY_THROW(create_kernel()); + EXPECT_ANY_THROW(this->template create_kernel()); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; - addps(xmm0, reg_0_11_addr); + this->addps(this->xmm0, xmm0_addr); transaction.commit(); - reg_0_11_addr.release(); + xmm0_addr.release(); StackAllocator::Reg reg_0_12_addr{transaction}; transaction.commit(); - addps(xmm0, reg_0_12_addr); + this->addps(this->xmm0, reg_0_12_addr); }; - EXPECT_ANY_THROW(create_kernel()); + EXPECT_ANY_THROW(this->template create_kernel()); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; transaction.commit(); - addps(xmm0, reg_0_11_addr); - reg_0_11_addr.release(transaction); + this->addps(this->xmm0, xmm0_addr); + xmm0_addr.release(transaction); reg_0_7_addr.release(transaction); StackAllocator::Reg reg_0_12_addr{transaction}; transaction.commit(); - addps(xmm0, reg_0_12_addr); + this->addps(this->xmm0, reg_0_12_addr); }; - auto f = create_kernel(); + auto f = this->template create_kernel(); f(); } -TEST_F(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) { - kernel_ = [this]() { +TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) { + this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address reg_0_5_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_11_addr{transaction}; + StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; transaction.commit(); - addps(xmm0, reg_0_11_addr); - reg_0_11_addr.release(transaction); + this->uni_vaddps(this->xmm0, this->xmm0, xmm0_addr); + xmm0_addr.release(transaction); reg_0_7_addr.release(); StackAllocator::Reg reg_0_12_addr{transaction}; transaction.commit(); - addps(xmm0, reg_0_12_addr); + this->addps(this->xmm0, reg_0_12_addr); }; - EXPECT_ANY_THROW(create_kernel()); + EXPECT_ANY_THROW(this->template create_kernel()); } From e70e1a7f572e07f0fe43f426d1792faa96bcbd99 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 23:15:08 +0300 Subject: [PATCH 11/20] Made StackAllocator::Address not copiable and movable --- .../src/nodes/kernels/stack_allocator.hpp | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index e29d9847b6504c..f0a8b3d19d63a2 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -182,6 +182,14 @@ class StackAllocator final { } } + bool isTransaction() const { + return is_transaction_; + } + + void begin() { + is_transaction_ = true; + } + void commit() { if (current_offset > offset) { code_generator.sub(code_generator.rsp, current_offset - offset); @@ -190,10 +198,10 @@ class StackAllocator final { code_generator.add(code_generator.rsp, offset - current_offset); offset = current_offset; } + is_transaction_ = false; for (auto& alloc : allocations) { alloc->is_transaction = false; } - is_transaction_= false; } x64::jit_generator& code_generator; @@ -226,11 +234,10 @@ class StackAllocator::Transaction { } void begin() { - stack_allocator_->is_transaction_ = true; + stack_allocator_->begin(); } void commit() { - stack_allocator_->is_transaction_ = false; stack_allocator_->commit(); } @@ -252,8 +259,6 @@ class StackAllocator::Transaction { class StackAllocator::Address { public: - Address() = default; - Address(Transaction& transaction, const size_t alloc_size, const size_t requested_alignment = 1) @@ -268,12 +273,17 @@ class StackAllocator::Address { const size_t requested_alignment = 1) : stack_allocator_{stack_allocator} , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment)} { - if (stack_allocator_->is_transaction_) { + if (stack_allocator_->isTransaction()) { IE_THROW() << "Cannot allocate Address out of transaction. Please, finish first transaction !!"; } stack_allocator_->commit(); } + Address(const Address& addr) = delete; + Address& operator=(const Address& addr) = delete; + Address(Address&& addr) noexcept = delete; + Address& operator=(Address&& addr) noexcept = delete; + virtual ~Address() { if (transaction_) { release(*transaction_); @@ -283,9 +293,6 @@ class StackAllocator::Address { } } - Address(Address&& addr) noexcept = delete; - Address& operator=(Address&& addr) noexcept = delete; - void release(Transaction& transaction) { if (allocation_ && stack_allocator_) { transaction.begin(); @@ -297,7 +304,7 @@ class StackAllocator::Address { void release() { if (allocation_ && stack_allocator_) { - if (stack_allocator_->is_transaction_) { + if (stack_allocator_->isTransaction()) { IE_THROW() << "Cannot release Address out of transaction. Please, finish first transaction !!"; } allocation_->is_used = false; @@ -365,8 +372,6 @@ class StackAllocator::Reg : public StackAllocator::Address { public: static_assert(std::is_base_of::value, "TReg should be a Xbyak::Reg based !!"); - Reg() = default; - Reg(StackAllocator::Transaction& transaction) : Address{transaction, TReg{}.getBit() / 8, getAlignment()} { } @@ -375,9 +380,6 @@ class StackAllocator::Reg : public StackAllocator::Address { : Address{stack_allocator, TReg{}.getBit() / 8, getAlignment()} { } - Reg(Reg&& addr) noexcept = default; - Reg& operator=(Reg&& addr) noexcept = default; - Reg& operator=(const Xbyak::Xmm& vmm) override { Address::operator=(vmm); return *this; From 7bf863c9456d8ff7c349c03e3d2f8b2bf90ffd40 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 23:31:53 +0300 Subject: [PATCH 12/20] Changed names of variables in tests for StackAllocator --- .../unit/nodes/kernels/stack_allocator.cpp | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 4c0d030d209296..f801ea652a33d7 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -221,7 +221,7 @@ TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Address xmm0_addr{transaction, 16}; transaction.commit(); addps(xmm0, xmm0_addr); @@ -267,7 +267,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Address xmm0_addr{transaction, 16, 16}; transaction.commit(); this->addps(this->xmm0, xmm0_addr); @@ -279,7 +279,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address byte_addr{this->stack_allocator_, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{this->stack_allocator_, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{this->stack_allocator_}; this->addps(this->xmm0, xmm0_addr); }; @@ -291,15 +291,15 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; - StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; transaction.commit(); this->addps(this->xmm0, xmm0_addr); xmm0_addr.release(); - StackAllocator::Reg reg_0_12_addr{transaction}; + StackAllocator::Reg xmm1_addr{transaction}; transaction.commit(); - this->addps(this->xmm0, reg_0_12_addr); + this->addps(this->xmm0, xmm1_addr); }; auto f = this->template create_kernel(); f(); @@ -309,16 +309,16 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; - StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg reg_0_122_addr{this->stack_allocator_}; + StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; + StackAllocator::Reg xmm1_addr{this->stack_allocator_}; transaction.commit(); this->addps(this->xmm0, xmm0_addr); xmm0_addr.release(); - StackAllocator::Reg reg_0_12_addr{transaction}; + StackAllocator::Reg xmm2_addr{transaction}; transaction.commit(); - this->addps(this->xmm0, reg_0_12_addr); + this->addps(this->xmm0, xmm2_addr); }; EXPECT_ANY_THROW(this->template create_kernel()); @@ -328,15 +328,15 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; - StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; this->addps(this->xmm0, xmm0_addr); transaction.commit(); xmm0_addr.release(); - StackAllocator::Reg reg_0_12_addr{transaction}; + StackAllocator::Reg xmm1_addr{transaction}; transaction.commit(); - this->addps(this->xmm0, reg_0_12_addr); + this->addps(this->xmm0, xmm1_addr); }; EXPECT_ANY_THROW(this->template create_kernel()); @@ -346,16 +346,16 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; - StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; transaction.commit(); this->addps(this->xmm0, xmm0_addr); xmm0_addr.release(transaction); - reg_0_7_addr.release(transaction); - StackAllocator::Reg reg_0_12_addr{transaction}; + byte1_addr.release(transaction); + StackAllocator::Reg xmm1_addr{transaction}; transaction.commit(); - this->addps(this->xmm0, reg_0_12_addr); + this->addps(this->xmm0, xmm1_addr); }; auto f = this->template create_kernel(); @@ -366,16 +366,16 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; - StackAllocator::Address byte_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; - StackAllocator::Address reg_0_7_addr{transaction, sizeof(int8_t)}; + StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; transaction.commit(); this->uni_vaddps(this->xmm0, this->xmm0, xmm0_addr); xmm0_addr.release(transaction); - reg_0_7_addr.release(); - StackAllocator::Reg reg_0_12_addr{transaction}; + byte1_addr.release(); + StackAllocator::Reg xmm1_addr{transaction}; transaction.commit(); - this->addps(this->xmm0, reg_0_12_addr); + this->addps(this->xmm0, xmm1_addr); }; EXPECT_ANY_THROW(this->template create_kernel()); From 5104f839ce770d937a3a5e602fbbea3d6b222175 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Sun, 9 Oct 2022 23:35:18 +0300 Subject: [PATCH 13/20] Fixed name of types for IsaParam --- .../intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index f801ea652a33d7..f6de3ab78f0d40 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -260,8 +260,8 @@ class AlignedStackAllocatorTest : public StackAllocatorTest { template struct IsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; -using dasdasd = ::testing::Types, IsaParam, IsaParam>; -TYPED_TEST_SUITE(AlignedStackAllocatorTest, dasdasd); +using IsaParamTypes = ::testing::Types, IsaParam, IsaParam>; +TYPED_TEST_SUITE(AlignedStackAllocatorTest, IsaParamTypes); TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { this->kernel_ = [this]() { From 126f609390e07b2394fabb07d3f425c516e9004a Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Mon, 10 Oct 2022 13:10:51 +0300 Subject: [PATCH 14/20] Added tests for StackAllocator::Reg and StackAllocator::Reg --- .../unit/nodes/kernels/stack_allocator.cpp | 124 ++++++++++++++++-- 1 file changed, 115 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index f6de3ab78f0d40..9e5af041aec5a5 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -128,7 +128,7 @@ TEST_F(StackAllocatorTest, Reg32_ValueNotEqual) { EXPECT_EQ(r, 0); } -TEST_F(StackAllocatorTest, AddressCheck) { +TEST_F(StackAllocatorTest, Address_Check) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address dword_addr{stack_allocator_, sizeof(int32_t)}; @@ -217,7 +217,7 @@ TEST_F(StackAllocatorTest, LoopFailed) { EXPECT_ANY_THROW(create_kernel()); } -TEST_F(StackAllocatorTest, AddressCheckAlignmentFailed) { +TEST_F(StackAllocatorTest, Address_CheckAlignmentFailed) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{stack_allocator_}; @@ -263,7 +263,7 @@ struct IsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; using IsaParamTypes = ::testing::Types, IsaParam, IsaParam>; TYPED_TEST_SUITE(AlignedStackAllocatorTest, IsaParamTypes); -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentSuccess) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -276,7 +276,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentSuccess) { f(); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentRegSuccess) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address byte0_addr{this->stack_allocator_, sizeof(int8_t)}; @@ -287,7 +287,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentRegSuccess) { f(); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -305,7 +305,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess) f(); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess2) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -324,7 +324,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess2) EXPECT_ANY_THROW(this->template create_kernel()); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess3) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -342,7 +342,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess3) EXPECT_ANY_THROW(this->template create_kernel()); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess4) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -362,7 +362,7 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess4) f(); } -TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess5) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -380,3 +380,109 @@ TYPED_TEST(AlignedStackAllocatorTest, AddressCheckAlignmentReuseAddressSuccess5) EXPECT_ANY_THROW(this->template create_kernel()); } + +TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueEqual) { + static const uint32_t data[4] = {1024, 2135, 3246, 4357}; + this->kernel_ = [this]() { + Xbyak::Label l_not_equal; + Xbyak::Xmm vmm0{0}; + Xbyak::Xmm vmm1{1}; + StackAllocator::Reg value_on_stack{this->stack_allocator_}; + this->mov(this->rbx, reinterpret_cast(data)); + this->uni_vmovups(vmm0, this->ptr[this->rbx]); + value_on_stack = vmm0; + this->uni_vpxor(vmm1, vmm1, vmm1); + this->uni_vmovups(vmm1, value_on_stack); + this->mov(this->rax, 0); + this->uni_vpcmpeqd(vmm0, vmm0, vmm1); + this->uni_vtestps(vmm0, vmm0); + this->jz(l_not_equal); + this->mov(this->rax, 1); + this->L(l_not_equal); + }; + auto f = this->template create_kernel(); + int r = f(); + EXPECT_EQ(r, 1); +} + +TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueNotEqual) { + static const uint32_t data[4] = {1024, 2135, 3246, 4357}; + this->kernel_ = [this]() { + Xbyak::Label l_not_equal; + Xbyak::Xmm vmm0{0}; + Xbyak::Xmm vmm1{1}; + StackAllocator::Reg value_on_stack{this->stack_allocator_}; + this->mov(this->rbx, reinterpret_cast(data)); + this->uni_vmovups(vmm0, this->ptr[this->rbx]); + value_on_stack = vmm0; + this->uni_vpxor(vmm1, vmm1, vmm1); + this->uni_vmovups(vmm1, value_on_stack); + this->uni_vpxor(vmm0, vmm0, vmm0); + this->mov(this->rax, 0); + this->uni_vpcmpeqd(vmm0, vmm0, vmm1); + this->uni_vtestps(vmm0, vmm0); + this->jz(l_not_equal); + this->mov(this->rax, 1); + this->L(l_not_equal); + }; + auto f = this->template create_kernel(); + int r = f(); + EXPECT_EQ(r, 0); +} + +TYPED_TEST(AlignedStackAllocatorTest, Ymm_ValueEqual) { + if (TypeParam::isa != x64::avx2) { + GTEST_SKIP() << "Skipping test for isa = " << static_cast(TypeParam::isa); + } + static const uint32_t data[8] = {1024, 2135, 3246, 4357, + 2124, 3235, 4346, 5457}; + this->kernel_ = [this]() { + Xbyak::Label l_not_equal; + Xbyak::Ymm vmm0{0}; + Xbyak::Ymm vmm1{1}; + StackAllocator::Reg value_on_stack{this->stack_allocator_}; + this->mov(this->rbx, reinterpret_cast(data)); + this->uni_vmovups(vmm0, this->ptr[this->rbx]); + value_on_stack = vmm0; + this->uni_vpxor(vmm1, vmm1, vmm1); + this->uni_vmovups(vmm1, value_on_stack); + this->mov(this->rax, 0); + this->uni_vpcmpeqd(vmm0, vmm0, vmm1); + this->uni_vtestps(vmm0, vmm0); + this->jz(l_not_equal); + this->mov(this->rax, 1); + this->L(l_not_equal); + }; + auto f = this->template create_kernel(); + int r = f(); + EXPECT_EQ(r, 1); +} + +TYPED_TEST(AlignedStackAllocatorTest, Ymm_ValueNotEqual) { + if (TypeParam::isa != x64::avx2) { + GTEST_SKIP() << "Skipping test for isa = " << static_cast(TypeParam::isa); + } + static const uint32_t data[8] = {1024, 2135, 3246, 4357, + 2124, 3235, 4346, 5457}; + this->kernel_ = [this]() { + Xbyak::Label l_not_equal; + Xbyak::Ymm vmm0{0}; + Xbyak::Ymm vmm1{1}; + StackAllocator::Reg value_on_stack{this->stack_allocator_}; + this->mov(this->rbx, reinterpret_cast(data)); + this->uni_vmovups(vmm0, this->ptr[this->rbx]); + value_on_stack = vmm0; + this->uni_vpxor(vmm1, vmm1, vmm1); + this->uni_vmovups(vmm1, value_on_stack); + this->uni_vpxor(vmm0, vmm0, vmm0); + this->mov(this->rax, 0); + this->uni_vpcmpeqd(vmm0, vmm0, vmm1); + this->uni_vtestps(vmm0, vmm0); + this->jz(l_not_equal); + this->mov(this->rax, 1); + this->L(l_not_equal); + }; + auto f = this->template create_kernel(); + int r = f(); + EXPECT_EQ(r, 0); +} From 905c8e07783c6189e12c84a6e405190ad0332135 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Mon, 10 Oct 2022 13:20:13 +0300 Subject: [PATCH 15/20] Renamed some tests to describe what it is doing --- .../unit/nodes/kernels/stack_allocator.cpp | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 9e5af041aec5a5..6d031b74ed6c87 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -56,7 +56,7 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { std::shared_ptr stack_allocator_; }; -TEST_F(StackAllocatorTest, Address_ValueEqual) { +TEST_F(StackAllocatorTest, Address_Value_Equal) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; @@ -73,7 +73,7 @@ TEST_F(StackAllocatorTest, Address_ValueEqual) { EXPECT_EQ(r, 1); } -TEST_F(StackAllocatorTest, Reg32_ValueEqual) { +TEST_F(StackAllocatorTest, Reg32_Value_Equal) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Reg reg_100_addr{stack_allocator_}; @@ -90,7 +90,7 @@ TEST_F(StackAllocatorTest, Reg32_ValueEqual) { EXPECT_EQ(r, 1); } -TEST_F(StackAllocatorTest, Address_ValueNotEqual) { +TEST_F(StackAllocatorTest, Address_Value_NotEqual) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; @@ -109,7 +109,7 @@ TEST_F(StackAllocatorTest, Address_ValueNotEqual) { EXPECT_EQ(r, 0); } -TEST_F(StackAllocatorTest, Reg32_ValueNotEqual) { +TEST_F(StackAllocatorTest, Reg32_Value_NotEqual) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Reg reg_100_addr{stack_allocator_}; @@ -128,7 +128,7 @@ TEST_F(StackAllocatorTest, Reg32_ValueNotEqual) { EXPECT_EQ(r, 0); } -TEST_F(StackAllocatorTest, Address_Check) { +TEST_F(StackAllocatorTest, Address_PtrCheck_Success) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address dword_addr{stack_allocator_, sizeof(int32_t)}; @@ -137,7 +137,7 @@ TEST_F(StackAllocatorTest, Address_Check) { create_kernel(); } -TEST_F(StackAllocatorTest, LoopSuccess) { +TEST_F(StackAllocatorTest, Loop_Success) { kernel_ = [this]() { Xbyak::Label l_equal; Xbyak::Label l_loop; @@ -173,7 +173,7 @@ TEST_F(StackAllocatorTest, LoopSuccess) { EXPECT_EQ(r, 1); } -TEST_F(StackAllocatorTest, LoopFailed) { +TEST_F(StackAllocatorTest, Loop_Fail) { kernel_ = [this]() { Xbyak::Label l_equal; Xbyak::Label l_loop; @@ -217,7 +217,7 @@ TEST_F(StackAllocatorTest, LoopFailed) { EXPECT_ANY_THROW(create_kernel()); } -TEST_F(StackAllocatorTest, Address_CheckAlignmentFailed) { +TEST_F(StackAllocatorTest, Address_CheckAlignment_Fail) { kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{stack_allocator_}; @@ -263,7 +263,7 @@ struct IsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; using IsaParamTypes = ::testing::Types, IsaParam, IsaParam>; TYPED_TEST_SUITE(AlignedStackAllocatorTest, IsaParamTypes); -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignment_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -276,7 +276,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentSuccess) { f(); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentRegSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Reg_CheckAlignment_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Address byte0_addr{this->stack_allocator_, sizeof(int8_t)}; @@ -287,7 +287,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentRegSuccess) { f(); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess) { +TYPED_TEST(AlignedStackAllocatorTest, Address_Reuse_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -305,7 +305,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess) f(); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess2) { +TYPED_TEST(AlignedStackAllocatorTest, Transaction_CheckAllocation_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -324,7 +324,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess2 EXPECT_ANY_THROW(this->template create_kernel()); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess3) { +TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressBeforeCommit_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -342,7 +342,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess3 EXPECT_ANY_THROW(this->template create_kernel()); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess4) { +TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressAfterCommit_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -362,7 +362,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess4 f(); } -TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess5) { +TYPED_TEST(AlignedStackAllocatorTest, Transaction_ReleaseAddressBeforeCommit_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; StackAllocator::Transaction transaction{this->stack_allocator_}; @@ -381,7 +381,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignmentReuseAddressSuccess5 EXPECT_ANY_THROW(this->template create_kernel()); } -TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueEqual) { +TYPED_TEST(AlignedStackAllocatorTest, Xmm_Value_Equal) { static const uint32_t data[4] = {1024, 2135, 3246, 4357}; this->kernel_ = [this]() { Xbyak::Label l_not_equal; @@ -405,7 +405,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueEqual) { EXPECT_EQ(r, 1); } -TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueNotEqual) { +TYPED_TEST(AlignedStackAllocatorTest, Xmm_Value_NotEqual) { static const uint32_t data[4] = {1024, 2135, 3246, 4357}; this->kernel_ = [this]() { Xbyak::Label l_not_equal; @@ -430,7 +430,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Xmm_ValueNotEqual) { EXPECT_EQ(r, 0); } -TYPED_TEST(AlignedStackAllocatorTest, Ymm_ValueEqual) { +TYPED_TEST(AlignedStackAllocatorTest, Ymm_Value_Equal) { if (TypeParam::isa != x64::avx2) { GTEST_SKIP() << "Skipping test for isa = " << static_cast(TypeParam::isa); } @@ -458,7 +458,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Ymm_ValueEqual) { EXPECT_EQ(r, 1); } -TYPED_TEST(AlignedStackAllocatorTest, Ymm_ValueNotEqual) { +TYPED_TEST(AlignedStackAllocatorTest, Ymm_Value_NotEqual) { if (TypeParam::isa != x64::avx2) { GTEST_SKIP() << "Skipping test for isa = " << static_cast(TypeParam::isa); } From e95f4e5cb4e4f4b7424e8a30e20510848a569bb1 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Mon, 10 Oct 2022 13:51:04 +0300 Subject: [PATCH 16/20] Pass StackAllocator to Transaction, Address and Reg<> by reference --- .../src/nodes/kernels/stack_allocator.hpp | 42 ++++++++--------- .../unit/nodes/kernels/stack_allocator.cpp | 46 ++++++++++--------- 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index f0a8b3d19d63a2..30191f50a9f6d4 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -16,8 +16,6 @@ namespace x64 = dnnl::impl::cpu::x64; class StackAllocator final { public: - using Ptr = std::shared_ptr; - class Transaction; class Address; template @@ -223,7 +221,7 @@ class StackAllocator::Transaction { public: friend class StackAllocator::Address; - Transaction(StackAllocator::Ptr stack_allocator) + Transaction(StackAllocator& stack_allocator) : stack_allocator_{stack_allocator} { checkUnique(true); } @@ -234,11 +232,11 @@ class StackAllocator::Transaction { } void begin() { - stack_allocator_->begin(); + stack_allocator_.begin(); } void commit() { - stack_allocator_->commit(); + stack_allocator_.commit(); } private: @@ -254,7 +252,7 @@ class StackAllocator::Transaction { } } - StackAllocator::Ptr stack_allocator_; + StackAllocator& stack_allocator_; }; class StackAllocator::Address { @@ -264,19 +262,19 @@ class StackAllocator::Address { const size_t requested_alignment = 1) : transaction_{&transaction} , stack_allocator_{transaction.stack_allocator_} - , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment, true)} { + , allocation_{stack_allocator_.allocate(alloc_size, requested_alignment, true)} { transaction.begin(); } - Address(StackAllocator::Ptr stack_allocator, + Address(StackAllocator& stack_allocator, const size_t alloc_size, const size_t requested_alignment = 1) : stack_allocator_{stack_allocator} - , allocation_{stack_allocator_->allocate(alloc_size, requested_alignment)} { - if (stack_allocator_->isTransaction()) { + , allocation_{stack_allocator_.allocate(alloc_size, requested_alignment)} { + if (stack_allocator_.isTransaction()) { IE_THROW() << "Cannot allocate Address out of transaction. Please, finish first transaction !!"; } - stack_allocator_->commit(); + stack_allocator_.commit(); } Address(const Address& addr) = delete; @@ -289,27 +287,27 @@ class StackAllocator::Address { release(*transaction_); } else { release(); - stack_allocator_->commit(); + stack_allocator_.commit(); } } void release(Transaction& transaction) { - if (allocation_ && stack_allocator_) { + if (allocation_) { transaction.begin(); allocation_->is_used = false; - stack_allocator_->deallocate(); + stack_allocator_.deallocate(); } allocation_ = {}; } void release() { - if (allocation_ && stack_allocator_) { - if (stack_allocator_->isTransaction()) { + if (allocation_) { + if (stack_allocator_.isTransaction()) { IE_THROW() << "Cannot release Address out of transaction. Please, finish first transaction !!"; } allocation_->is_used = false; - stack_allocator_->deallocate(); - stack_allocator_->commit(); + stack_allocator_.deallocate(); + stack_allocator_.commit(); } allocation_ = {}; } @@ -355,15 +353,15 @@ class StackAllocator::Address { } bool isInitialized() const { - return stack_allocator_ && allocation_ && !allocation_->is_transaction; + return allocation_ && !allocation_->is_transaction; } x64::jit_generator& generator() const { - return stack_allocator_->code_generator; + return stack_allocator_.code_generator; } Transaction* transaction_{}; - StackAllocator::Ptr stack_allocator_; + StackAllocator& stack_allocator_; Allocation::Ptr allocation_; }; @@ -376,7 +374,7 @@ class StackAllocator::Reg : public StackAllocator::Address { : Address{transaction, TReg{}.getBit() / 8, getAlignment()} { } - Reg(StackAllocator::Ptr stack_allocator) + Reg(StackAllocator& stack_allocator) : Address{stack_allocator, TReg{}.getBit() / 8, getAlignment()} { } diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 6d031b74ed6c87..1793be57de1b3d 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -52,6 +52,10 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { this->postamble(); } + StackAllocator& stack_allocator() { + return *stack_allocator_; + } + std::function kernel_; std::shared_ptr stack_allocator_; }; @@ -59,7 +63,7 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { TEST_F(StackAllocatorTest, Address_Value_Equal) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address reg_100_addr{stack_allocator(), sizeof(int32_t)}; mov(rbx.cvt32(), 100); stack_mov(reg_100_addr, rbx.cvt32()); mov(rax, 1); @@ -76,7 +80,7 @@ TEST_F(StackAllocatorTest, Address_Value_Equal) { TEST_F(StackAllocatorTest, Reg32_Value_Equal) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Reg reg_100_addr{stack_allocator_}; + StackAllocator::Reg reg_100_addr{stack_allocator()}; mov(rbx.cvt32(), 100); stack_mov(reg_100_addr, rbx.cvt32()); mov(rax, 1); @@ -93,7 +97,7 @@ TEST_F(StackAllocatorTest, Reg32_Value_Equal) { TEST_F(StackAllocatorTest, Address_Value_NotEqual) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address reg_100_addr{stack_allocator(), sizeof(int32_t)}; mov(rbx.cvt32(), 100); stack_mov(reg_100_addr, rbx.cvt32()); mov(rcx.cvt32(), reg_100_addr); @@ -112,7 +116,7 @@ TEST_F(StackAllocatorTest, Address_Value_NotEqual) { TEST_F(StackAllocatorTest, Reg32_Value_NotEqual) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Reg reg_100_addr{stack_allocator_}; + StackAllocator::Reg reg_100_addr{stack_allocator()}; mov(rbx.cvt32(), 100); stack_mov(reg_100_addr, rbx.cvt32()); mov(rcx.cvt32(), reg_100_addr); @@ -131,7 +135,7 @@ TEST_F(StackAllocatorTest, Reg32_Value_NotEqual) { TEST_F(StackAllocatorTest, Address_PtrCheck_Success) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address dword_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address dword_addr{stack_allocator(), sizeof(int32_t)}; EXPECT_EQ(static_cast(dword_addr), ptr[rbp - sizeof(int32_t)]); }; create_kernel(); @@ -143,7 +147,7 @@ TEST_F(StackAllocatorTest, Loop_Success) { Xbyak::Label l_loop; Xbyak::Label l_end; xor_(rcx, rcx); - StackAllocator::Address reg_100_addr{stack_allocator_, sizeof(int32_t)}; + StackAllocator::Address reg_100_addr{stack_allocator(), sizeof(int32_t)}; L(l_loop); { cmp(rcx, 10); @@ -180,7 +184,7 @@ TEST_F(StackAllocatorTest, Loop_Fail) { Xbyak::Label l_end; xor_(rcx, rcx); - StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Transaction transaction{stack_allocator()}; StackAllocator::Address reg_temp0_addr{transaction, sizeof(float)}; StackAllocator::Address reg_temp1_addr{transaction, sizeof(int32_t)}; StackAllocator::Address reg_100_addr{transaction, sizeof(int32_t)}; @@ -220,7 +224,7 @@ TEST_F(StackAllocatorTest, Loop_Fail) { TEST_F(StackAllocatorTest, Address_CheckAlignment_Fail) { kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{stack_allocator_}; + StackAllocator::Transaction transaction{stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Address xmm0_addr{transaction, 16}; transaction.commit(); @@ -266,7 +270,7 @@ TYPED_TEST_SUITE(AlignedStackAllocatorTest, IsaParamTypes); TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignment_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Address xmm0_addr{transaction, 16, 16}; transaction.commit(); @@ -279,8 +283,8 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignment_Success) { TYPED_TEST(AlignedStackAllocatorTest, Reg_CheckAlignment_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Address byte0_addr{this->stack_allocator_, sizeof(int8_t)}; - StackAllocator::Reg xmm0_addr{this->stack_allocator_}; + StackAllocator::Address byte0_addr{this->stack_allocator(), sizeof(int8_t)}; + StackAllocator::Reg xmm0_addr{this->stack_allocator()}; this->addps(this->xmm0, xmm0_addr); }; auto f = this->template create_kernel(); @@ -290,7 +294,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Reg_CheckAlignment_Success) { TYPED_TEST(AlignedStackAllocatorTest, Address_Reuse_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; @@ -308,11 +312,11 @@ TYPED_TEST(AlignedStackAllocatorTest, Address_Reuse_Success) { TYPED_TEST(AlignedStackAllocatorTest, Transaction_CheckAllocation_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; - StackAllocator::Reg xmm1_addr{this->stack_allocator_}; + StackAllocator::Reg xmm1_addr{this->stack_allocator()}; transaction.commit(); this->addps(this->xmm0, xmm0_addr); xmm0_addr.release(); @@ -327,7 +331,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Transaction_CheckAllocation_Fail) { TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressBeforeCommit_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; @@ -345,7 +349,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressBeforeCommit_Fail) { TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressAfterCommit_Success) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; @@ -365,7 +369,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Transaction_UseAddressAfterCommit_Success) TYPED_TEST(AlignedStackAllocatorTest, Transaction_ReleaseAddressBeforeCommit_Fail) { this->kernel_ = [this]() { Xbyak::Label l_equal; - StackAllocator::Transaction transaction{this->stack_allocator_}; + StackAllocator::Transaction transaction{this->stack_allocator()}; StackAllocator::Address byte0_addr{transaction, sizeof(int8_t)}; StackAllocator::Reg xmm0_addr{transaction}; StackAllocator::Address byte1_addr{transaction, sizeof(int8_t)}; @@ -387,7 +391,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Xmm_Value_Equal) { Xbyak::Label l_not_equal; Xbyak::Xmm vmm0{0}; Xbyak::Xmm vmm1{1}; - StackAllocator::Reg value_on_stack{this->stack_allocator_}; + StackAllocator::Reg value_on_stack{this->stack_allocator()}; this->mov(this->rbx, reinterpret_cast(data)); this->uni_vmovups(vmm0, this->ptr[this->rbx]); value_on_stack = vmm0; @@ -411,7 +415,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Xmm_Value_NotEqual) { Xbyak::Label l_not_equal; Xbyak::Xmm vmm0{0}; Xbyak::Xmm vmm1{1}; - StackAllocator::Reg value_on_stack{this->stack_allocator_}; + StackAllocator::Reg value_on_stack{this->stack_allocator()}; this->mov(this->rbx, reinterpret_cast(data)); this->uni_vmovups(vmm0, this->ptr[this->rbx]); value_on_stack = vmm0; @@ -440,7 +444,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Ymm_Value_Equal) { Xbyak::Label l_not_equal; Xbyak::Ymm vmm0{0}; Xbyak::Ymm vmm1{1}; - StackAllocator::Reg value_on_stack{this->stack_allocator_}; + StackAllocator::Reg value_on_stack{this->stack_allocator()}; this->mov(this->rbx, reinterpret_cast(data)); this->uni_vmovups(vmm0, this->ptr[this->rbx]); value_on_stack = vmm0; @@ -468,7 +472,7 @@ TYPED_TEST(AlignedStackAllocatorTest, Ymm_Value_NotEqual) { Xbyak::Label l_not_equal; Xbyak::Ymm vmm0{0}; Xbyak::Ymm vmm1{1}; - StackAllocator::Reg value_on_stack{this->stack_allocator_}; + StackAllocator::Reg value_on_stack{this->stack_allocator()}; this->mov(this->rbx, reinterpret_cast(data)); this->uni_vmovups(vmm0, this->ptr[this->rbx]); value_on_stack = vmm0; From 4de9d1a6264b6401c2216c21765313a1e2064cd6 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Thu, 13 Oct 2022 16:47:39 +0300 Subject: [PATCH 17/20] Made StackAllocator::Address isInitialized() public and added explicit operator bool() --- .../intel_cpu/src/nodes/kernels/stack_allocator.hpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp index 30191f50a9f6d4..98139307d594b5 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/stack_allocator.hpp @@ -332,6 +332,14 @@ class StackAllocator::Address { return *this; } + explicit operator bool() const { + return isInitialized(); + } + + bool isInitialized() const { + return allocation_ && !allocation_->is_transaction; + } + friend void ::ov::intel_cpu::stack_mov(Address& addr, const Xbyak::Xmm& vmm); friend void ::ov::intel_cpu::stack_mov(Address& addr, const Xbyak::Reg& reg); friend void ::ov::intel_cpu::stack_mov(const Xbyak::Xmm& vmm, const Address& addr); @@ -352,10 +360,6 @@ class StackAllocator::Address { } } - bool isInitialized() const { - return allocation_ && !allocation_->is_transaction; - } - x64::jit_generator& generator() const { return stack_allocator_.code_generator; } From ca20c1352d8824935ed04ada70f390ce87611600 Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Thu, 13 Oct 2022 19:27:22 +0300 Subject: [PATCH 18/20] Fix CI issue with redefinition of struct IsaParam --- .../tests/unit/nodes/kernels/stack_allocator.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 1793be57de1b3d..1b97e5660e1f7b 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -2,7 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#if __GNUC__ >= 5 +// Disable -Wsuggest-override warnings in gtest +#pragma GCC diagnostic ignored "-Wsuggest-override" +#endif #include + #include #include #include @@ -262,10 +267,14 @@ class AlignedStackAllocatorTest : public StackAllocatorTest { }; template -struct IsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; +struct StackAllocatorTestIsaParam { static constexpr x64::cpu_isa_t isa = Isa; }; + +using StackAllocatorTestIsaParamTypes = ::testing::Types< + StackAllocatorTestIsaParam, + StackAllocatorTestIsaParam, + StackAllocatorTestIsaParam>; -using IsaParamTypes = ::testing::Types, IsaParam, IsaParam>; -TYPED_TEST_SUITE(AlignedStackAllocatorTest, IsaParamTypes); +TYPED_TEST_SUITE(AlignedStackAllocatorTest, StackAllocatorTestIsaParamTypes); TYPED_TEST(AlignedStackAllocatorTest, Address_CheckAlignment_Success) { this->kernel_ = [this]() { From cc891cbad83160b87bd53770c42def49b555177d Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Tue, 1 Nov 2022 17:39:55 +0200 Subject: [PATCH 19/20] Remove using using namespace dnnl::impl::cpu; and changed it to alias --- src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp | 2 +- .../intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp b/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp index cfa930dfbd613f..9d087b2a4e5314 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/registers_pool.hpp @@ -14,7 +14,7 @@ namespace ov { namespace intel_cpu { -using namespace dnnl::impl::cpu; +namespace x64 = dnnl::impl::cpu::x64; /** * The RegistersPool is the base class for the IsaRegistersPool template: diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index 1b97e5660e1f7b..cf152b85d98b1e 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -40,7 +40,7 @@ class StackAllocatorTest : public ::testing::Test, public x64::jit_generator { template F create_kernel() { - const status_t code = jit_generator::create_kernel(); + const dnnl::impl::status_t code = jit_generator::create_kernel(); if (code != dnnl::impl::status::success) { IE_THROW() << "Could not create kernel. Error code: " << std::to_string(code) << ". " << "Xbyak error code: " << Xbyak::ConvertErrorToString(Xbyak::GetError()); From 99650b45f2914eec3a0fd93d1a3bb97e4760d13b Mon Sep 17 00:00:00 2001 From: Denis Kotov Date: Wed, 2 Nov 2022 12:33:27 +0200 Subject: [PATCH 20/20] Fix macos compile-time issues --- .../intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp index cf152b85d98b1e..e4963ddef9a534 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/kernels/stack_allocator.cpp @@ -272,7 +272,7 @@ struct StackAllocatorTestIsaParam { static constexpr x64::cpu_isa_t isa = Isa; } using StackAllocatorTestIsaParamTypes = ::testing::Types< StackAllocatorTestIsaParam, StackAllocatorTestIsaParam, - StackAllocatorTestIsaParam>; + StackAllocatorTestIsaParam >; TYPED_TEST_SUITE(AlignedStackAllocatorTest, StackAllocatorTestIsaParamTypes);