Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
caba51c
Minimal reworked impl with IR test passing
galderz Jan 8, 2026
f339cdf
Make it a template framework test
galderz Jan 8, 2026
e22aacd
Remove unnecessary information
galderz Jan 8, 2026
8e26f00
Support non-power-of-2 chains
galderz Jan 9, 2026
cf1e875
Remove is_counted()
galderz Jan 9, 2026
d00cadd
Test intermediate use of value
galderz Jan 9, 2026
21a2fc8
Extend impl to other AddNodes, MinL for now
galderz Jan 12, 2026
2080acc
Test MinL
galderz Jan 12, 2026
66c7718
Refactor AddNode construction to build_add
galderz Jan 12, 2026
6cc392c
Expand to Min/Max integer
galderz Jan 12, 2026
9d0a0b7
Not all AddNodes can reassociate so stick to Min/Max for now
galderz Jan 12, 2026
959fc49
Expand to Min/Max Float
galderz Jan 12, 2026
fd56d4c
Expand to Min/Max for doubles
galderz Jan 12, 2026
538ac8a
Expand to Float16
galderz Jan 12, 2026
f7ca35c
Add IR expectations for Float16
galderz Jan 13, 2026
2c085de
Comment/revert Float16 changes, has a different IR shape
galderz Jan 13, 2026
26f9f81
Add support for AddI/AddL
galderz Jan 15, 2026
f0118c5
Test with AddL, commented test for AddI
galderz Jan 15, 2026
b2b8ae2
Phi with more than one output not enough, new approach needed
galderz Jan 19, 2026
7b336c2
Deal with Phi with more than 1 chain, try each
galderz Jan 20, 2026
a02478c
Use auxiliary methods throughout that don't expose Add nodes
galderz Jan 20, 2026
5bed557
Test passing for AddI
galderz Jan 20, 2026
510223e
Auxiliary test, to be removed in the end
galderz Jan 20, 2026
69ff537
Add support for OrL reassociation
galderz Jan 20, 2026
f873057
Add support for OrI reassociation
galderz Jan 20, 2026
c70e5b4
Add support for XOrI and XOrL
galderz Jan 20, 2026
7ec2b93
Remove auxiliary tests
galderz Jan 20, 2026
7e77b03
Wrap new functionality in UseNewCode and rename test
galderz Jan 21, 2026
09b1ceb
Adjust test description
galderz Jan 21, 2026
7d40ff3
Separate edge case scenarios that apply to all to a separate test
galderz Jan 21, 2026
3f04d07
Update test description
galderz Jan 21, 2026
040fe47
Chain has to be of same original opcode, fixes sum + max mixed
galderz Jan 21, 2026
648f1b6
Check against given opcode when reassociating
galderz Jan 21, 2026
3da5478
Remove UseNewCode protection
galderz Jan 21, 2026
56395e6
Add missing Verify.java @compile
galderz Jan 22, 2026
91a6967
Limit to Long Min/Max and add some documentation
galderz Jan 29, 2026
7f4dbe3
Use MinMaxNode::build_min_max_long instead of roll own
galderz Feb 6, 2026
3b6d371
Use unique_out instead of hand rolled loop
galderz Feb 6, 2026
d7cf51f
Avoid work list by using an iterator that allows deletes
galderz Feb 6, 2026
66f3cd7
Copy node notes to new nodes
galderz Feb 6, 2026
6ce4dfc
Revert "Copy node notes to new nodes"
galderz Mar 3, 2026
4f38d49
Revert "Avoid work list by using an iterator that allows deletes"
galderz Mar 3, 2026
6f6aeda
Revert "Use unique_out instead of hand rolled loop"
galderz Mar 3, 2026
f4413ef
Improved: Use unique_out instead of hand rolled loop
galderz Mar 3, 2026
f2975ba
Improved avoid work list with iterator allowing deletes
galderz Mar 3, 2026
bf4c27e
Copy nodes to new nodes
galderz Mar 3, 2026
82678b5
Combine templated and IR tests into a single class
galderz Mar 3, 2026
78df7c4
Refactor reassociation to loopopts and encapsulate
galderz Mar 3, 2026
1dfc62b
Minor adjustments
galderz Mar 3, 2026
6eaa040
Small refactorings after PR review
galderz Mar 3, 2026
2332973
Add some documentation
galderz Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/hotspot/share/opto/loopnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5353,6 +5353,10 @@ void PhaseIdealLoop::build_and_optimize() {
}
C->set_major_progress();
}

if (!C->major_progress()) {
reassociate_reduction_chains();
}
}

#ifndef PRODUCT
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/loopnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,8 @@ class PhaseIdealLoop : public PhaseTransform {
void eliminate_useless_zero_trip_guard();
void eliminate_useless_multiversion_if();

void reassociate_reduction_chains();

public:
// Change the control input of expensive nodes to allow commoning by
// IGVN when it is guaranteed to not result in a more frequent
Expand Down
128 changes: 128 additions & 0 deletions src/hotspot/share/opto/loopopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4508,6 +4508,134 @@ bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old
return true;
}

// Reassociates latency-bound reduction loop chains for long Min/Max that have a shape like this:
// OP(A, OP(B, OP(C, Phi)))
// To become the following by shifting the Phi node to the front and shifting the rest of inputs:
// OP(Phi, OP(A, OP(B, C)))
// This transformation reduces latency thanks to an increase CPU-level parallel processing.
// This increased parallelism can produce register pressure as a side effect.
// This is why the optimization currently only applies to specific AddNode subclasses
// that can particularly suffer in certain scenarios, e.g. long Min/Max.
// Any attempt to expand this to other AddNode types should take this into consideration.
class ReassociateReductionChains : public StackObj {
public:
ReassociateReductionChains(IdealLoopTree* loop, PhaseIdealLoop* phase) : _loop(loop), _phase(phase) {
}

void reassociate_chains() {
Node* loop_head = _loop->head();
for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
Node* loop_head_use = loop_head->fast_out(i);
if (loop_head_use->is_Phi()) {
PhiNode* phi = loop_head_use->as_Phi();
for (DUIterator j = phi->outs(); phi->has_out(j); j++) {
Node* n = phi->out(j);
if (try_reassociate_chain(n, phi)) {
--j;
}
}
}
}
}

private:
IdealLoopTree* _loop;
PhaseIdealLoop* _phase;

static bool is_associative(Node* n) {
return n->Opcode() == Op_MinL || n->Opcode() == Op_MaxL;
}

bool try_reassociate_chain(Node* n, PhiNode* phi) {
if (!is_associative(n)) {
return false;
}

Node* chain_head = nullptr;
Node* current = n;
int opcode = current->Opcode();

int chain_length = 1;
while (current != nullptr) {
if (current->outcnt() != 1) {
break;
}

Node* use = nullptr;
Node* out = current->unique_out();
if (out->Opcode() == opcode) {
use = out;
}

if (use != nullptr) {
if (!_phase->ctrl_is_member(_loop, use)) {
// Only interested in commutative add nodes that are in use in the loop
return false;
}
if (use->in(1)->Opcode() == opcode && use->in(2)->Opcode() == opcode) {
// A chain to reassociate cannot be constructed
// when the chain can have multiple paths
return false;
}

chain_length++;
chain_head = use;
}

current = use;
}

if (chain_length < 2) {
// Only reassociate long enough chains
return false;
}

Node* reassociated = do_reassociate_chain(chain_head, opcode, phi);

Node* new_chain_head = MinMaxNode::build_min_max_long(&_phase->igvn(), phi, reassociated, opcode == Op_MaxL);
_phase->register_new_node(new_chain_head, _loop->head());
_phase->C->copy_node_notes_to(new_chain_head, chain_head);
_phase->igvn().replace_node(chain_head, new_chain_head);

return true;
}

Node* do_reassociate_chain(Node* node, int opcode, PhiNode* phi) {
if (phi == node->in(1)) {
return node->in(2);
}

if (phi == node->in(2)) {
return node->in(1);
}

Node* left;
Node* right;
if (node->in(1)->Opcode() == opcode) {
left = do_reassociate_chain(node->in(1), opcode, phi);
right = node->in(2);
} else {
left = node->in(1);
right = do_reassociate_chain(node->in(2), opcode, phi);
}

Node* reassoc = MinMaxNode::build_min_max_long(&_phase->igvn(), left, right, opcode == Op_MaxL);
_phase->register_new_node(reassoc, _loop->head());
_phase->C->copy_node_notes_to(reassoc, node);
return reassoc;
}
};

void PhaseIdealLoop::reassociate_reduction_chains() {
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* loop = iter.current();
if (loop->is_innermost()) {
ReassociateReductionChains rrc(loop, this);
rrc.reassociate_chains();
}
}
}

// AutoVectorize the loop: replace scalar ops with vector ops.
PhaseIdealLoop::AutoVectorizeStatus
PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) {
Expand Down
Loading