From 2351eed9b7347d19198778ad1a6e59e5cf00a35c Mon Sep 17 00:00:00 2001 From: Badre BSAILA <54767641+pedrobsaila@users.noreply.github.com> Date: Sun, 11 Feb 2024 19:02:58 +0000 Subject: [PATCH 01/27] first draft --- PimpMyApp/PimpMyApp.csproj | 10 +++ PimpMyApp/Program.cs | 17 +++++ src/coreclr/jit/optimizebools.cpp | 120 ++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 PimpMyApp/PimpMyApp.csproj create mode 100644 PimpMyApp/Program.cs diff --git a/PimpMyApp/PimpMyApp.csproj b/PimpMyApp/PimpMyApp.csproj new file mode 100644 index 00000000000000..3477df059b152e --- /dev/null +++ b/PimpMyApp/PimpMyApp.csproj @@ -0,0 +1,10 @@ + + + + Exe + net9.0 + enable + enable + + + \ No newline at end of file diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs new file mode 100644 index 00000000000000..947955bef1dafa --- /dev/null +++ b/PimpMyApp/Program.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace PimpMyApp; + +public class Program +{ + public static int Main() + { + return Bambala(23, 45); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Bambala(int x, int y) => (x | 5) | (y | 3); +} diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 5b609c0b4fb090..9be7eeece05ab8 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -29,6 +29,15 @@ struct OptTestInfo bool isBool; // If the compTree is boolean expression }; +struct IntBoolOpDsc +{ + GenTree** lclVarArr; + int32_t lclVarArrLength; + ssize_t* ctsArray; + int32_t ctsArrayLength; + bool onlyIntVarCtsOrOp; +}; + //----------------------------------------------------------------------------- // OptBoolsDsc: Descriptor used for Boolean Optimization // @@ -1891,6 +1900,117 @@ PhaseStatus Compiler::optOptimizeBools() // We're only interested in conditional jumps here + if (b1->KindIs(BBJ_RETURN)) + { + Statement* b2 = b1->firstStmt(); + if (b2 != nullptr) + { + GenTree* b3 = b2->GetRootNode(); + if (b3 != nullptr && b3->OperIs(GT_RETURN) && b3->TypeIs(TYP_INT)) + { + IntBoolOpDsc intBoolOpDsc; + intBoolOpDsc.ctsArray = nullptr; + intBoolOpDsc.ctsArrayLength = 0; + intBoolOpDsc.lclVarArr = nullptr; + intBoolOpDsc.lclVarArrLength = 0; + intBoolOpDsc.onlyIntVarCtsOrOp = true; + + GenTree* b4 = b3->gtPrev; + while (b4 != nullptr) + { + if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) + { + intBoolOpDsc.onlyIntVarCtsOrOp = false; + break; + } + + switch (b4->gtOper) + { + case GT_LCL_VAR: + { + intBoolOpDsc.lclVarArrLength++; + if (intBoolOpDsc.lclVarArr == nullptr) + { + intBoolOpDsc.lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + } + else + { + intBoolOpDsc.lclVarArr = reinterpret_cast( + realloc(intBoolOpDsc.lclVarArr, sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + } + + intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; + break; + } + case GT_CNS_INT: + { + intBoolOpDsc.ctsArrayLength++; + if (intBoolOpDsc.ctsArray == nullptr) + { + intBoolOpDsc.ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + } + else + { + intBoolOpDsc.ctsArray = reinterpret_cast(realloc(intBoolOpDsc.ctsArray, sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + } + ssize_t constant = b4->AsIntConCommon()->IconValue(); + intBoolOpDsc.ctsArray[intBoolOpDsc.ctsArrayLength - 1] = constant; + break; + } + default: + { + break; + } + } + + b4 = b4->gtPrev; + } + + if (intBoolOpDsc.onlyIntVarCtsOrOp && intBoolOpDsc.ctsArrayLength >= 2 + && intBoolOpDsc.lclVarArrLength >= 2) + { + GenTreeOp* intVarTree = this->gtNewOperNode(GT_OR, TYP_INT, + intBoolOpDsc.lclVarArr[0], intBoolOpDsc.lclVarArr[1]); + intVarTree->gtPrev = intBoolOpDsc.lclVarArr[0]; + intVarTree->gtNext = intBoolOpDsc.lclVarArr[1]; + intBoolOpDsc.lclVarArr[1]->gtPrev = intVarTree; + intBoolOpDsc.lclVarArr[0]->gtPrev = nullptr; + intBoolOpDsc.lclVarArr[0]->gtNext = intVarTree; + for (int i = 2; i < intBoolOpDsc.lclVarArrLength; i++) + { + GenTreeOp* newIntVarTree = this->gtNewOperNode(GT_OR, TYP_INT, intVarTree, intBoolOpDsc.lclVarArr[i]); + newIntVarTree->gtPrev = intVarTree; + intVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; + } + + size_t optimizedCst = 0; + for (int i = 0; i < intBoolOpDsc.ctsArrayLength; i++) + { + optimizedCst = optimizedCst | intBoolOpDsc.ctsArray[i]; + } + + GenTreeIntCon* optimizedCstTree = this->gtNewIconNode(optimizedCst, TYP_INT); + optimizedCstTree->gtPrev = nullptr; + GenTreeOp* optimizedTree = this->gtNewOperNode(GT_OR, TYP_INT, intVarTree, optimizedCstTree); + optimizedTree->gtPrev = intVarTree; + intVarTree->gtPrev = optimizedCstTree; + b3->gtPrev = optimizedTree; + JITDUMP("lets the fire begin"); + } + + if (intBoolOpDsc.ctsArray != nullptr) + { + free(intBoolOpDsc.ctsArray); + } + + if (intBoolOpDsc.lclVarArr != nullptr) + { + free(intBoolOpDsc.lclVarArr); + } + } + } + } + if (!b1->KindIs(BBJ_COND)) { continue; From d7c81407456d19f9a8dfa3d6480ae6c8f37ee87f Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 17 Feb 2024 14:42:48 +0100 Subject: [PATCH 02/27] fix assert errors --- PimpMyApp/Program.cs | 5 ++++- src/coreclr/jit/optimizebools.cpp | 33 ++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index 947955bef1dafa..2b614553cc828b 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -9,9 +9,12 @@ public class Program { public static int Main() { - return Bambala(23, 45); + return Bambala2(23, 45, 66, 27); } [MethodImpl(MethodImplOptions.NoInlining)] private static int Bambala(int x, int y) => (x | 5) | (y | 3); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Bambala2(int u, int x, int y, int z) => (u | 2) | (x | 5) | (y | 3) | (z | 6); } diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 9be7eeece05ab8..1b9cf9a62a760a 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1971,16 +1971,21 @@ PhaseStatus Compiler::optOptimizeBools() { GenTreeOp* intVarTree = this->gtNewOperNode(GT_OR, TYP_INT, intBoolOpDsc.lclVarArr[0], intBoolOpDsc.lclVarArr[1]); - intVarTree->gtPrev = intBoolOpDsc.lclVarArr[0]; - intVarTree->gtNext = intBoolOpDsc.lclVarArr[1]; - intBoolOpDsc.lclVarArr[1]->gtPrev = intVarTree; + intVarTree->gtPrev = intBoolOpDsc.lclVarArr[1]; + intBoolOpDsc.lclVarArr[1]->gtNext = intVarTree; + intBoolOpDsc.lclVarArr[1]->gtPrev = intBoolOpDsc.lclVarArr[0]; + intBoolOpDsc.lclVarArr[0]->gtNext = intBoolOpDsc.lclVarArr[1]; intBoolOpDsc.lclVarArr[0]->gtPrev = nullptr; - intBoolOpDsc.lclVarArr[0]->gtNext = intVarTree; + GenTree* temp = intVarTree; + for (int i = 2; i < intBoolOpDsc.lclVarArrLength; i++) { - GenTreeOp* newIntVarTree = this->gtNewOperNode(GT_OR, TYP_INT, intVarTree, intBoolOpDsc.lclVarArr[i]); - newIntVarTree->gtPrev = intVarTree; - intVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; + GenTreeOp* newIntVarTree = this->gtNewOperNode(GT_OR, TYP_INT, temp, intBoolOpDsc.lclVarArr[i]); + newIntVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; + intBoolOpDsc.lclVarArr[i]->gtNext = newIntVarTree; + intBoolOpDsc.lclVarArr[i]->gtPrev = temp; + temp->gtNext = intBoolOpDsc.lclVarArr[i]; + temp = newIntVarTree; } size_t optimizedCst = 0; @@ -1990,11 +1995,17 @@ PhaseStatus Compiler::optOptimizeBools() } GenTreeIntCon* optimizedCstTree = this->gtNewIconNode(optimizedCst, TYP_INT); - optimizedCstTree->gtPrev = nullptr; - GenTreeOp* optimizedTree = this->gtNewOperNode(GT_OR, TYP_INT, intVarTree, optimizedCstTree); - optimizedTree->gtPrev = intVarTree; - intVarTree->gtPrev = optimizedCstTree; + GenTreeOp* optimizedTree = this->gtNewOperNode(GT_OR, TYP_INT, temp, optimizedCstTree); + optimizedTree->gtPrev = optimizedCstTree; + optimizedCstTree->gtNext = optimizedTree; + optimizedCstTree->gtPrev = temp; + temp->gtNext = optimizedCstTree; b3->gtPrev = optimizedTree; + b3->AsOp()->gtOp1 = optimizedTree; + optimizedTree->gtNext = b3; + b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); + b2->SetTreeListEnd(optimizedCstTree); + numReturn++; JITDUMP("lets the fire begin"); } From b4dc366290dec096aeaf2b559e542c1ee199ada0 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 17 Feb 2024 22:16:36 +0100 Subject: [PATCH 03/27] handle the case where the block to optimize in the middle --- PimpMyApp/Program.cs | 8 +- src/coreclr/jit/optimizebools.cpp | 269 ++++++++++++++++++------------ 2 files changed, 170 insertions(+), 107 deletions(-) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index 2b614553cc828b..6e09c8f86a26da 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -9,12 +9,12 @@ public class Program { public static int Main() { - return Bambala2(23, 45, 66, 27); + return Bambala(6, 23, 45, 66, 27); } - [MethodImpl(MethodImplOptions.NoInlining)] - private static int Bambala(int x, int y) => (x | 5) | (y | 3); + // [MethodImpl(MethodImplOptions.NoInlining)] + // private static int Bambala(int x, int y) => (x | 5) | (y | 3); [MethodImpl(MethodImplOptions.NoInlining)] - private static int Bambala2(int u, int x, int y, int z) => (u | 2) | (x | 5) | (y | 3) | (z | 6); + private static int Bambala(int p, int u, int x, int y, int z) => p * ((u | 2) | (x | 5) | (y | 3) | (z | 6)); } diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 1b9cf9a62a760a..158a8678cd1521 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -35,7 +35,8 @@ struct IntBoolOpDsc int32_t lclVarArrLength; ssize_t* ctsArray; int32_t ctsArrayLength; - bool onlyIntVarCtsOrOp; + GenTree* start; + GenTree* end; }; //----------------------------------------------------------------------------- @@ -1743,6 +1744,151 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) return opr1; } +IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) +{ + IntBoolOpDsc intBoolOpDsc; + intBoolOpDsc.ctsArray = nullptr; + intBoolOpDsc.ctsArrayLength = 0; + intBoolOpDsc.lclVarArr = nullptr; + intBoolOpDsc.lclVarArrLength = 0; + intBoolOpDsc.start = nullptr; + intBoolOpDsc.end = nullptr; + + if (b3 == nullptr) + { + return intBoolOpDsc; + } + + GenTree* b4 = b3->gtPrev; + while (b4 != nullptr) + { + if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) + { + if (intBoolOpDsc.start == nullptr) + { + if (b4->OperIsUnary()) + { + b4 = b4->gtPrev; + } + else + { + b4 = b4->gtPrev->gtPrev; + } + continue; + } + + if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + { + intBoolOpDsc.end = b4; + return intBoolOpDsc; + } + else + { + intBoolOpDsc.start = nullptr; + intBoolOpDsc.end = nullptr; + free(intBoolOpDsc.ctsArray); + intBoolOpDsc.ctsArrayLength = 0; + intBoolOpDsc.ctsArray = nullptr; + free(intBoolOpDsc.lclVarArr); + intBoolOpDsc.lclVarArrLength = 0; + intBoolOpDsc.lclVarArr = nullptr; + b4 = b4->gtPrev; + continue; + } + } + + if (intBoolOpDsc.start == nullptr) + { + intBoolOpDsc.start = b4->gtNext; + } + + switch (b4->gtOper) + { + case GT_LCL_VAR: + { + intBoolOpDsc.lclVarArrLength++; + if (intBoolOpDsc.lclVarArr == nullptr) + { + intBoolOpDsc.lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + } + else + { + intBoolOpDsc.lclVarArr = reinterpret_cast( + realloc(intBoolOpDsc.lclVarArr, sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + } + + intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; + break; + } + case GT_CNS_INT: + { + intBoolOpDsc.ctsArrayLength++; + if (intBoolOpDsc.ctsArray == nullptr) + { + intBoolOpDsc.ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + } + else + { + intBoolOpDsc.ctsArray = reinterpret_cast(realloc(intBoolOpDsc.ctsArray, sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + } + ssize_t constant = b4->AsIntConCommon()->IconValue(); + intBoolOpDsc.ctsArray[intBoolOpDsc.ctsArrayLength - 1] = constant; + break; + } + default: + { + break; + } + } + + b4 = b4->gtPrev; + } + + return intBoolOpDsc; +} + +void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc intBoolOpDsc) +{ + GenTreeOp* intVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, + intBoolOpDsc.lclVarArr[0], intBoolOpDsc.lclVarArr[1]); + intVarTree->gtPrev = intBoolOpDsc.lclVarArr[1]; + intBoolOpDsc.lclVarArr[1]->gtNext = intVarTree; + intBoolOpDsc.lclVarArr[1]->gtPrev = intBoolOpDsc.lclVarArr[0]; + intBoolOpDsc.lclVarArr[0]->gtNext = intBoolOpDsc.lclVarArr[1]; + intBoolOpDsc.lclVarArr[0]->gtPrev = intBoolOpDsc.end; + GenTree* tempIntVatTree = intVarTree; + + for (int i = 2; i < intBoolOpDsc.lclVarArrLength; i++) + { + GenTreeOp* newIntVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, intBoolOpDsc.lclVarArr[i]); + newIntVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; + intBoolOpDsc.lclVarArr[i]->gtNext = newIntVarTree; + intBoolOpDsc.lclVarArr[i]->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = intBoolOpDsc.lclVarArr[i]; + tempIntVatTree = newIntVarTree; + } + + size_t optimizedCst = 0; + for (int i = 0; i < intBoolOpDsc.ctsArrayLength; i++) + { + optimizedCst = optimizedCst | intBoolOpDsc.ctsArray[i]; + } + + GenTreeIntCon* optimizedCstTree = compiler->gtNewIconNode(optimizedCst, TYP_INT); + GenTreeOp* optimizedTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); + optimizedTree->gtPrev = optimizedCstTree; + optimizedCstTree->gtNext = optimizedTree; + optimizedCstTree->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = optimizedCstTree; + intBoolOpDsc.start->gtPrev = optimizedTree; + optimizedTree->gtNext = intBoolOpDsc.start; + + if (intBoolOpDsc.start->OperIs(GT_RETURN)) + { + intBoolOpDsc.start->AsOp()->gtOp1 = optimizedTree; + } +} + //----------------------------------------------------------------------------- // optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes // @@ -1900,6 +2046,11 @@ PhaseStatus Compiler::optOptimizeBools() // We're only interested in conditional jumps here + if (!b1->KindIs(BBJ_COND, BBJ_RETURN)) + { + continue; + } + if (b1->KindIs(BBJ_RETURN)) { Statement* b2 = b1->firstStmt(); @@ -1908,122 +2059,34 @@ PhaseStatus Compiler::optOptimizeBools() GenTree* b3 = b2->GetRootNode(); if (b3 != nullptr && b3->OperIs(GT_RETURN) && b3->TypeIs(TYP_INT)) { - IntBoolOpDsc intBoolOpDsc; - intBoolOpDsc.ctsArray = nullptr; - intBoolOpDsc.ctsArrayLength = 0; - intBoolOpDsc.lclVarArr = nullptr; - intBoolOpDsc.lclVarArrLength = 0; - intBoolOpDsc.onlyIntVarCtsOrOp = true; - - GenTree* b4 = b3->gtPrev; - while (b4 != nullptr) + IntBoolOpDsc intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); + while (intBoolOpDsc.ctsArrayLength >= 2 + && intBoolOpDsc.lclVarArrLength >= 2) { - if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) - { - intBoolOpDsc.onlyIntVarCtsOrOp = false; - break; - } + OptimizeIntBoolOp(this, intBoolOpDsc); - switch (b4->gtOper) + if (intBoolOpDsc.end == nullptr) { - case GT_LCL_VAR: - { - intBoolOpDsc.lclVarArrLength++; - if (intBoolOpDsc.lclVarArr == nullptr) - { - intBoolOpDsc.lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); - } - else - { - intBoolOpDsc.lclVarArr = reinterpret_cast( - realloc(intBoolOpDsc.lclVarArr, sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); - } - - intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; - break; - } - case GT_CNS_INT: - { - intBoolOpDsc.ctsArrayLength++; - if (intBoolOpDsc.ctsArray == nullptr) - { - intBoolOpDsc.ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); - } - else - { - intBoolOpDsc.ctsArray = reinterpret_cast(realloc(intBoolOpDsc.ctsArray, sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); - } - ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc.ctsArray[intBoolOpDsc.ctsArrayLength - 1] = constant; - break; - } - default: - { - break; - } + b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); } - b4 = b4->gtPrev; - } - - if (intBoolOpDsc.onlyIntVarCtsOrOp && intBoolOpDsc.ctsArrayLength >= 2 - && intBoolOpDsc.lclVarArrLength >= 2) - { - GenTreeOp* intVarTree = this->gtNewOperNode(GT_OR, TYP_INT, - intBoolOpDsc.lclVarArr[0], intBoolOpDsc.lclVarArr[1]); - intVarTree->gtPrev = intBoolOpDsc.lclVarArr[1]; - intBoolOpDsc.lclVarArr[1]->gtNext = intVarTree; - intBoolOpDsc.lclVarArr[1]->gtPrev = intBoolOpDsc.lclVarArr[0]; - intBoolOpDsc.lclVarArr[0]->gtNext = intBoolOpDsc.lclVarArr[1]; - intBoolOpDsc.lclVarArr[0]->gtPrev = nullptr; - GenTree* temp = intVarTree; - - for (int i = 2; i < intBoolOpDsc.lclVarArrLength; i++) + if (intBoolOpDsc.ctsArray != nullptr) { - GenTreeOp* newIntVarTree = this->gtNewOperNode(GT_OR, TYP_INT, temp, intBoolOpDsc.lclVarArr[i]); - newIntVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; - intBoolOpDsc.lclVarArr[i]->gtNext = newIntVarTree; - intBoolOpDsc.lclVarArr[i]->gtPrev = temp; - temp->gtNext = intBoolOpDsc.lclVarArr[i]; - temp = newIntVarTree; + free(intBoolOpDsc.ctsArray); + intBoolOpDsc.ctsArray = nullptr; } - size_t optimizedCst = 0; - for (int i = 0; i < intBoolOpDsc.ctsArrayLength; i++) + if (intBoolOpDsc.lclVarArr != nullptr) { - optimizedCst = optimizedCst | intBoolOpDsc.ctsArray[i]; + free(intBoolOpDsc.lclVarArr); + intBoolOpDsc.lclVarArr = nullptr; } - - GenTreeIntCon* optimizedCstTree = this->gtNewIconNode(optimizedCst, TYP_INT); - GenTreeOp* optimizedTree = this->gtNewOperNode(GT_OR, TYP_INT, temp, optimizedCstTree); - optimizedTree->gtPrev = optimizedCstTree; - optimizedCstTree->gtNext = optimizedTree; - optimizedCstTree->gtPrev = temp; - temp->gtNext = optimizedCstTree; - b3->gtPrev = optimizedTree; - b3->AsOp()->gtOp1 = optimizedTree; - optimizedTree->gtNext = b3; - b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); - b2->SetTreeListEnd(optimizedCstTree); + numReturn++; - JITDUMP("lets the fire begin"); - } - - if (intBoolOpDsc.ctsArray != nullptr) - { - free(intBoolOpDsc.ctsArray); - } - - if (intBoolOpDsc.lclVarArr != nullptr) - { - free(intBoolOpDsc.lclVarArr); + intBoolOpDsc = GetNextIntBoolOpToOptimize(intBoolOpDsc.end); } } } - } - - if (!b1->KindIs(BBJ_COND)) - { continue; } From f3221cda75774743ca8d5b2411d4c36bfe05d233 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 18 Feb 2024 14:41:17 +0100 Subject: [PATCH 04/27] fix validation issue when block to optimize is last and refactors --- PimpMyApp/Program.cs | 7 +- src/coreclr/jit/optimizebools.cpp | 110 +++++++++++++++++------------- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index 6e09c8f86a26da..435e7cacd6d852 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -9,12 +9,13 @@ public class Program { public static int Main() { - return Bambala(6, 23, 45, 66, 27); + // return Bambala(6, 23); + return (int)Bambala(2, 6, 23, 45, 66, 2); } // [MethodImpl(MethodImplOptions.NoInlining)] - // private static int Bambala(int x, int y) => (x | 5) | (y | 3); + // private static int Bambala(int x, int y) => x | y | 5; [MethodImpl(MethodImplOptions.NoInlining)] - private static int Bambala(int p, int u, int x, int y, int z) => p * ((u | 2) | (x | 5) | (y | 3) | (z | 6)); + private static long Bambala(int m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); } diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 158a8678cd1521..5870f613542c7c 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1744,6 +1744,28 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) return opr1; } +void CleanIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) +{ + intBoolOpDsc->start = nullptr; + intBoolOpDsc->end = nullptr; + + if (intBoolOpDsc->ctsArray != nullptr) + { + free(intBoolOpDsc->ctsArray); + intBoolOpDsc->ctsArray = nullptr; + } + + intBoolOpDsc->ctsArrayLength = 0; + + if (intBoolOpDsc->lclVarArr != nullptr) + { + free(intBoolOpDsc->lclVarArr); + intBoolOpDsc->lclVarArr = nullptr; + } + + intBoolOpDsc->lclVarArrLength = 0; +} + IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { IntBoolOpDsc intBoolOpDsc; @@ -1766,13 +1788,13 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { if (intBoolOpDsc.start == nullptr) { - if (b4->OperIsUnary()) + if (b4->OperIsBinary()) { - b4 = b4->gtPrev; + b4 = b4->gtPrev->gtPrev; } else { - b4 = b4->gtPrev->gtPrev; + b4 = b4->gtPrev; } continue; } @@ -1784,14 +1806,7 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) } else { - intBoolOpDsc.start = nullptr; - intBoolOpDsc.end = nullptr; - free(intBoolOpDsc.ctsArray); - intBoolOpDsc.ctsArrayLength = 0; - intBoolOpDsc.ctsArray = nullptr; - free(intBoolOpDsc.lclVarArr); - intBoolOpDsc.lclVarArrLength = 0; - intBoolOpDsc.lclVarArr = nullptr; + CleanIntBoolOpDsc(&intBoolOpDsc); b4 = b4->gtPrev; continue; } @@ -1883,10 +1898,46 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc intBoolOpDsc) intBoolOpDsc.start->gtPrev = optimizedTree; optimizedTree->gtNext = intBoolOpDsc.start; - if (intBoolOpDsc.start->OperIs(GT_RETURN)) + if (intBoolOpDsc.start->OperIsUnary()) { intBoolOpDsc.start->AsOp()->gtOp1 = optimizedTree; } + else if (intBoolOpDsc.start->gtNext != nullptr && intBoolOpDsc.start->gtNext->OperIsBinary()) + { + intBoolOpDsc.start->gtNext->AsOp()->gtOp1 = optimizedTree; + } +} + +unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) +{ + unsigned int result = 0; + Statement* b2 = b1->firstStmt(); + if (b2 != nullptr) + { + GenTree* b3 = b2->GetRootNode(); + if (b3 != nullptr && b3->OperIs(GT_RETURN)) + { + IntBoolOpDsc intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); + while (intBoolOpDsc.ctsArrayLength >= 2 + && intBoolOpDsc.lclVarArrLength >= 2) + { + OptimizeIntBoolOp(compiler, intBoolOpDsc); + + if (intBoolOpDsc.end == nullptr) + { + b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); + } + + CleanIntBoolOpDsc(&intBoolOpDsc); + result++; + intBoolOpDsc = GetNextIntBoolOpToOptimize(intBoolOpDsc.end); + } + + CleanIntBoolOpDsc(&intBoolOpDsc); + } + } + + return result; } //----------------------------------------------------------------------------- @@ -2053,40 +2104,7 @@ PhaseStatus Compiler::optOptimizeBools() if (b1->KindIs(BBJ_RETURN)) { - Statement* b2 = b1->firstStmt(); - if (b2 != nullptr) - { - GenTree* b3 = b2->GetRootNode(); - if (b3 != nullptr && b3->OperIs(GT_RETURN) && b3->TypeIs(TYP_INT)) - { - IntBoolOpDsc intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); - while (intBoolOpDsc.ctsArrayLength >= 2 - && intBoolOpDsc.lclVarArrLength >= 2) - { - OptimizeIntBoolOp(this, intBoolOpDsc); - - if (intBoolOpDsc.end == nullptr) - { - b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); - } - - if (intBoolOpDsc.ctsArray != nullptr) - { - free(intBoolOpDsc.ctsArray); - intBoolOpDsc.ctsArray = nullptr; - } - - if (intBoolOpDsc.lclVarArr != nullptr) - { - free(intBoolOpDsc.lclVarArr); - intBoolOpDsc.lclVarArr = nullptr; - } - - numReturn++; - intBoolOpDsc = GetNextIntBoolOpToOptimize(intBoolOpDsc.end); - } - } - } + numReturn += TryOptimizeIntBoolOp(this, b1); continue; } From 05ea7a7158a18bebe216823b6bf5bc6d9e358b1f Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Wed, 21 Feb 2024 21:37:04 +0100 Subject: [PATCH 05/27] limit reach of optimization --- PimpMyApp/Program.cs | 8 ++++++-- src/coreclr/jit/optimizebools.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index 435e7cacd6d852..b4d9528b7e83c1 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Runtime.CompilerServices; +using System; namespace PimpMyApp; @@ -9,7 +10,7 @@ public class Program { public static int Main() { - // return Bambala(6, 23); + // return (int)Bambala(6, 23, 40); return (int)Bambala(2, 6, 23, 45, 66, 2); } @@ -17,5 +18,8 @@ public static int Main() // private static int Bambala(int x, int y) => x | y | 5; [MethodImpl(MethodImplOptions.NoInlining)] - private static long Bambala(int m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); + private static long Bambala(long m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); + + // [MethodImpl(MethodImplOptions.NoInlining)] + // private static long Bambala(int m, int p, int c) => Math.Clamp(m, p, c); } diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 5870f613542c7c..1a881bf7452701 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1775,6 +1775,7 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc.lclVarArrLength = 0; intBoolOpDsc.start = nullptr; intBoolOpDsc.end = nullptr; + int orOpCount = 0; if (b3 == nullptr) { @@ -1821,6 +1822,18 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { case GT_LCL_VAR: { + if (orOpCount <= 0) + { + if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + { + intBoolOpDsc.end = b4; + return intBoolOpDsc; + } + + CleanIntBoolOpDsc(&intBoolOpDsc); + break; + } + intBoolOpDsc.lclVarArrLength++; if (intBoolOpDsc.lclVarArr == nullptr) { @@ -1833,10 +1846,23 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) } intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; + orOpCount--; break; } case GT_CNS_INT: { + if (orOpCount <= 0) + { + if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + { + intBoolOpDsc.end = b4; + return intBoolOpDsc; + } + + CleanIntBoolOpDsc(&intBoolOpDsc); + break; + } + intBoolOpDsc.ctsArrayLength++; if (intBoolOpDsc.ctsArray == nullptr) { @@ -1848,8 +1874,12 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) } ssize_t constant = b4->AsIntConCommon()->IconValue(); intBoolOpDsc.ctsArray[intBoolOpDsc.ctsArrayLength - 1] = constant; + orOpCount--; break; } + case GT_OR: + orOpCount += 2; + break; default: { break; From b534f72805e23f480a7c41701ec6fd8f8715bbb5 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 25 Feb 2024 16:22:30 +0100 Subject: [PATCH 06/27] fix the issue with expressions inside call param --- src/coreclr/jit/optimizebools.cpp | 97 ++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 1a881bf7452701..304e611a103ae0 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1809,6 +1809,7 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { CleanIntBoolOpDsc(&intBoolOpDsc); b4 = b4->gtPrev; + orOpCount = 0; continue; } } @@ -1844,7 +1845,7 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc.lclVarArr = reinterpret_cast( realloc(intBoolOpDsc.lclVarArr, sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); } - + intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; orOpCount--; break; @@ -1858,7 +1859,7 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc.end = b4; return intBoolOpDsc; } - + CleanIntBoolOpDsc(&intBoolOpDsc); break; } @@ -1878,7 +1879,16 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) break; } case GT_OR: - orOpCount += 2; + if (b4->gtPrev != nullptr && b4->gtPrev->OperIs(GT_CNS_INT, GT_LCL_VAR)) + { + orOpCount++; + + if (b4->gtPrev->gtPrev != nullptr && b4->gtPrev->gtPrev->OperIs(GT_CNS_INT, GT_LCL_VAR)) + { + orOpCount++; + } + } + break; default: { @@ -1892,31 +1902,31 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) return intBoolOpDsc; } -void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc intBoolOpDsc) +void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) { - GenTreeOp* intVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, - intBoolOpDsc.lclVarArr[0], intBoolOpDsc.lclVarArr[1]); - intVarTree->gtPrev = intBoolOpDsc.lclVarArr[1]; - intBoolOpDsc.lclVarArr[1]->gtNext = intVarTree; - intBoolOpDsc.lclVarArr[1]->gtPrev = intBoolOpDsc.lclVarArr[0]; - intBoolOpDsc.lclVarArr[0]->gtNext = intBoolOpDsc.lclVarArr[1]; - intBoolOpDsc.lclVarArr[0]->gtPrev = intBoolOpDsc.end; + GenTreeOp* intVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, + intBoolOpDsc->lclVarArr[0], intBoolOpDsc->lclVarArr[1]); + intVarTree->gtPrev = intBoolOpDsc->lclVarArr[1]; + intBoolOpDsc->lclVarArr[1]->gtNext = intVarTree; + intBoolOpDsc->lclVarArr[1]->gtPrev = intBoolOpDsc->lclVarArr[0]; + intBoolOpDsc->lclVarArr[0]->gtNext = intBoolOpDsc->lclVarArr[1]; + intBoolOpDsc->lclVarArr[0]->gtPrev = intBoolOpDsc->end; GenTree* tempIntVatTree = intVarTree; - - for (int i = 2; i < intBoolOpDsc.lclVarArrLength; i++) - { - GenTreeOp* newIntVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, intBoolOpDsc.lclVarArr[i]); - newIntVarTree->gtPrev = intBoolOpDsc.lclVarArr[i]; - intBoolOpDsc.lclVarArr[i]->gtNext = newIntVarTree; - intBoolOpDsc.lclVarArr[i]->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = intBoolOpDsc.lclVarArr[i]; + + for (int i = 2; i < intBoolOpDsc->lclVarArrLength; i++) + { + GenTreeOp* newIntVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, intBoolOpDsc->lclVarArr[i]); + newIntVarTree->gtPrev = intBoolOpDsc->lclVarArr[i]; + intBoolOpDsc->lclVarArr[i]->gtNext = newIntVarTree; + intBoolOpDsc->lclVarArr[i]->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = intBoolOpDsc->lclVarArr[i]; tempIntVatTree = newIntVarTree; } size_t optimizedCst = 0; - for (int i = 0; i < intBoolOpDsc.ctsArrayLength; i++) + for (int i = 0; i < intBoolOpDsc->ctsArrayLength; i++) { - optimizedCst = optimizedCst | intBoolOpDsc.ctsArray[i]; + optimizedCst = optimizedCst | intBoolOpDsc->ctsArray[i]; } GenTreeIntCon* optimizedCstTree = compiler->gtNewIconNode(optimizedCst, TYP_INT); @@ -1925,16 +1935,47 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc intBoolOpDsc) optimizedCstTree->gtNext = optimizedTree; optimizedCstTree->gtPrev = tempIntVatTree; tempIntVatTree->gtNext = optimizedCstTree; - intBoolOpDsc.start->gtPrev = optimizedTree; - optimizedTree->gtNext = intBoolOpDsc.start; + intBoolOpDsc->start->gtPrev = optimizedTree; + optimizedTree->gtNext = intBoolOpDsc->start; - if (intBoolOpDsc.start->OperIsUnary()) + if (intBoolOpDsc->start->OperIsUnary()) { - intBoolOpDsc.start->AsOp()->gtOp1 = optimizedTree; + intBoolOpDsc->start->AsOp()->gtOp1 = optimizedTree; } - else if (intBoolOpDsc.start->gtNext != nullptr && intBoolOpDsc.start->gtNext->OperIsBinary()) + else if (intBoolOpDsc->start->gtNext != nullptr) { - intBoolOpDsc.start->gtNext->AsOp()->gtOp1 = optimizedTree; + if (intBoolOpDsc->start->gtNext->OperIsBinary()) + { + intBoolOpDsc->start->gtNext->AsOp()->gtOp1 = optimizedTree; + } + else if (intBoolOpDsc->start->gtNext->OperIs(GT_CALL)) + { + GenTreeCall* call = intBoolOpDsc->start->gtNext->AsCall(); + IteratorPair args = call->gtArgs.Args(); + CallArgs::ArgIterator nextArg = args.begin(); + CallArg* nextCallArg = nextArg.GetArg(); + + if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + { + nextCallArg->SetLateNode(optimizedTree); + } + else + { + nextArg = nextArg.operator++(); + nextCallArg = nextArg.GetArg(); + while (nextCallArg != nullptr) + { + if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + { + nextCallArg->SetLateNode(optimizedTree); + break; + } + + nextArg = nextArg.operator++(); + nextCallArg = nextArg.GetArg(); + } + } + } } } @@ -1951,7 +1992,7 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) while (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) { - OptimizeIntBoolOp(compiler, intBoolOpDsc); + OptimizeIntBoolOp(compiler, &intBoolOpDsc); if (intBoolOpDsc.end == nullptr) { From c5acc44c2ac3b539e320a4dd6d9972999f4a4267 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Fri, 1 Mar 2024 00:23:21 +0100 Subject: [PATCH 07/27] handle cases for end not null and start is binary --- PimpMyApp/Program.cs | 17 ++-- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/optimizebools.cpp | 130 ++++++++++++++++++------------ 3 files changed, 90 insertions(+), 60 deletions(-) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index b4d9528b7e83c1..996a38f3b206ca 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -10,16 +10,19 @@ public class Program { public static int Main() { - // return (int)Bambala(6, 23, 40); - return (int)Bambala(2, 6, 23, 45, 66, 2); + Console.WriteLine("Hi"); + int result = Bambala1(6, 23); + result += Bambala2(6, 23, 45, 66, 2); + long result2 = Bambala3(6, 23, 45, 66, 2, 2); + return result + (int)result2; } - // [MethodImpl(MethodImplOptions.NoInlining)] - // private static int Bambala(int x, int y) => x | y | 5; + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Bambala1(int x, int y) => (x | 3) | (y | 5); [MethodImpl(MethodImplOptions.NoInlining)] - private static long Bambala(long m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); + private static int Bambala2(int p, int u, int x, int y, int z) => ((u | 2) | (x | 5) | (y | 3) | (z | 6)) * p + ((x | 6) | (u | 7)); - // [MethodImpl(MethodImplOptions.NoInlining)] - // private static long Bambala(int m, int p, int c) => Math.Clamp(m, p, c); + [MethodImpl(MethodImplOptions.NoInlining)] + private static long Bambala3(long m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 048d6eced96d02..19233507de2f0e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -83,6 +83,7 @@ class SpanningTreeVisitor; // defined in fgprofile.cpp class CSE_DataFlow; // defined in optcse.cpp struct CSEdsc; // defined in optcse.h class CSE_HeuristicCommon; // defined in optcse.h +struct IntBoolOpDsc; // defined in optimizer.cpp class OptBoolsDsc; // defined in optimizer.cpp struct RelopImplicationInfo; // defined in redundantbranchopts.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp @@ -5898,7 +5899,7 @@ class Compiler void fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, BasicBlock* newSucc); void fgRemoveEhfSuccessor(BasicBlock* block, const unsigned succIndex); - + void fgRemoveEhfSuccessor(FlowEdge* succEdge); void fgReplaceJumpTarget(BasicBlock* block, BasicBlock* oldTarget, BasicBlock* newTarget); diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 304e611a103ae0..2a8fb7bc023db0 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1746,6 +1746,11 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) void CleanIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) { + if (intBoolOpDsc == nullptr) + { + return; + } + intBoolOpDsc->start = nullptr; intBoolOpDsc->end = nullptr; @@ -1766,57 +1771,50 @@ void CleanIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) intBoolOpDsc->lclVarArrLength = 0; } -IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) +IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) { - IntBoolOpDsc intBoolOpDsc; - intBoolOpDsc.ctsArray = nullptr; - intBoolOpDsc.ctsArrayLength = 0; - intBoolOpDsc.lclVarArr = nullptr; - intBoolOpDsc.lclVarArrLength = 0; - intBoolOpDsc.start = nullptr; - intBoolOpDsc.end = nullptr; - int orOpCount = 0; - if (b3 == nullptr) { - return intBoolOpDsc; + return nullptr; } + IntBoolOpDsc* intBoolOpDsc = reinterpret_cast(malloc(sizeof(IntBoolOpDsc))); + intBoolOpDsc->ctsArray = nullptr; + intBoolOpDsc->ctsArrayLength = 0; + intBoolOpDsc->lclVarArr = nullptr; + intBoolOpDsc->lclVarArrLength = 0; + intBoolOpDsc->start = nullptr; + intBoolOpDsc->end = nullptr; + int orOpCount = 0; + GenTree* b4 = b3->gtPrev; while (b4 != nullptr) { if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) { - if (intBoolOpDsc.start == nullptr) + if (intBoolOpDsc->start == nullptr) { - if (b4->OperIsBinary()) - { - b4 = b4->gtPrev->gtPrev; - } - else - { - b4 = b4->gtPrev; - } + b4 = b4->gtPrev; continue; } - if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { - intBoolOpDsc.end = b4; + intBoolOpDsc->end = b4; return intBoolOpDsc; } else { - CleanIntBoolOpDsc(&intBoolOpDsc); + CleanIntBoolOpDsc(intBoolOpDsc); b4 = b4->gtPrev; orOpCount = 0; continue; } } - if (intBoolOpDsc.start == nullptr) + if (intBoolOpDsc->start == nullptr) { - intBoolOpDsc.start = b4->gtNext; + intBoolOpDsc->start = b4->gtNext; } switch (b4->gtOper) @@ -1825,28 +1823,28 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { if (orOpCount <= 0) { - if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { - intBoolOpDsc.end = b4; + intBoolOpDsc->end = b4; return intBoolOpDsc; } - CleanIntBoolOpDsc(&intBoolOpDsc); + CleanIntBoolOpDsc(intBoolOpDsc); break; } - intBoolOpDsc.lclVarArrLength++; - if (intBoolOpDsc.lclVarArr == nullptr) + intBoolOpDsc->lclVarArrLength++; + if (intBoolOpDsc->lclVarArr == nullptr) { - intBoolOpDsc.lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + intBoolOpDsc->lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); } else { - intBoolOpDsc.lclVarArr = reinterpret_cast( - realloc(intBoolOpDsc.lclVarArr, sizeof(GenTree*) * intBoolOpDsc.lclVarArrLength)); + intBoolOpDsc->lclVarArr = reinterpret_cast( + realloc(intBoolOpDsc->lclVarArr, sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); } - intBoolOpDsc.lclVarArr[intBoolOpDsc.lclVarArrLength - 1] = b4; + intBoolOpDsc->lclVarArr[intBoolOpDsc->lclVarArrLength - 1] = b4; orOpCount--; break; } @@ -1854,27 +1852,27 @@ IntBoolOpDsc GetNextIntBoolOpToOptimize(GenTree* b3) { if (orOpCount <= 0) { - if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { - intBoolOpDsc.end = b4; + intBoolOpDsc->end = b4; return intBoolOpDsc; } - CleanIntBoolOpDsc(&intBoolOpDsc); + CleanIntBoolOpDsc(intBoolOpDsc); break; } - intBoolOpDsc.ctsArrayLength++; - if (intBoolOpDsc.ctsArray == nullptr) + intBoolOpDsc->ctsArrayLength++; + if (intBoolOpDsc->ctsArray == nullptr) { - intBoolOpDsc.ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + intBoolOpDsc->ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); } else { - intBoolOpDsc.ctsArray = reinterpret_cast(realloc(intBoolOpDsc.ctsArray, sizeof(ssize_t) * intBoolOpDsc.ctsArrayLength)); + intBoolOpDsc->ctsArray = reinterpret_cast(realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); } ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc.ctsArray[intBoolOpDsc.ctsArrayLength - 1] = constant; + intBoolOpDsc->ctsArray[intBoolOpDsc->ctsArrayLength - 1] = constant; orOpCount--; break; } @@ -1911,6 +1909,12 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) intBoolOpDsc->lclVarArr[1]->gtPrev = intBoolOpDsc->lclVarArr[0]; intBoolOpDsc->lclVarArr[0]->gtNext = intBoolOpDsc->lclVarArr[1]; intBoolOpDsc->lclVarArr[0]->gtPrev = intBoolOpDsc->end; + + if (intBoolOpDsc->end != nullptr) + { + intBoolOpDsc->end->gtNext = intBoolOpDsc->lclVarArr[0]; + } + GenTree* tempIntVatTree = intVarTree; for (int i = 2; i < intBoolOpDsc->lclVarArrLength; i++) @@ -1942,6 +1946,10 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) { intBoolOpDsc->start->AsOp()->gtOp1 = optimizedTree; } + else if (intBoolOpDsc->start->OperIsBinary()) + { + intBoolOpDsc->start->AsOp()->gtOp2 = optimizedTree; + } else if (intBoolOpDsc->start->gtNext != nullptr) { if (intBoolOpDsc->start->gtNext->OperIsBinary()) @@ -1988,23 +1996,28 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) GenTree* b3 = b2->GetRootNode(); if (b3 != nullptr && b3->OperIs(GT_RETURN)) { - IntBoolOpDsc intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); - while (intBoolOpDsc.ctsArrayLength >= 2 - && intBoolOpDsc.lclVarArrLength >= 2) + IntBoolOpDsc* intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); + + if (intBoolOpDsc == nullptr) { - OptimizeIntBoolOp(compiler, &intBoolOpDsc); + return 0; + } + + if (intBoolOpDsc->ctsArrayLength >= 2 + && intBoolOpDsc->lclVarArrLength >= 2) + { + OptimizeIntBoolOp(compiler, intBoolOpDsc); - if (intBoolOpDsc.end == nullptr) + if (intBoolOpDsc->end == nullptr) { - b2->SetTreeList(intBoolOpDsc.lclVarArr[0]); + b2->SetTreeList(intBoolOpDsc->lclVarArr[0]); } - CleanIntBoolOpDsc(&intBoolOpDsc); result++; - intBoolOpDsc = GetNextIntBoolOpToOptimize(intBoolOpDsc.end); } - CleanIntBoolOpDsc(&intBoolOpDsc); + CleanIntBoolOpDsc(intBoolOpDsc); + free(intBoolOpDsc); } } @@ -2134,6 +2147,14 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) // +--* LCL_VAR int V00 arg0 // \--* CNS_INT int 0 // +// Case 16: ((x | 5) | (y | 2)) => ((x | y) | 7) +// * RETURN int $VN.Void +// \--* OR int +// +--* OR int +// | +--* LCL_VAR int V01 arg1 u:1 (last use) $81 +// | \--* LCL_VAR int V00 arg0 u:1 (last use) $80 +// \--* CNS_INT int 7 +// // Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1), // (x == 0 || y == 0) because currently their comptree is not marked as boolean expression. // When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression @@ -2175,7 +2196,12 @@ PhaseStatus Compiler::optOptimizeBools() if (b1->KindIs(BBJ_RETURN)) { - numReturn += TryOptimizeIntBoolOp(this, b1); + if (TryOptimizeIntBoolOp(this, b1) > 0) + { + numReturn++; + retry = true; + } + continue; } From 1333ac695478b428c3db117a9cd8500888e7b26d Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Fri, 1 Mar 2024 00:42:08 +0100 Subject: [PATCH 08/27] add comments --- src/coreclr/jit/optimizebools.cpp | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 2a8fb7bc023db0..f2e13e8a479a34 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1744,7 +1744,13 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) return opr1; } -void CleanIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) +//----------------------------------------------------------------------------- +// ReinitIntBoolOpDsc: Procedure that reinitialize IntBoolOpDsc reference +// +// Arguments: +// intBoolOpDsc the reference for INT OR operations to be folded +// +void ReinitIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) { if (intBoolOpDsc == nullptr) { @@ -1771,6 +1777,17 @@ void CleanIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) intBoolOpDsc->lclVarArrLength = 0; } +//----------------------------------------------------------------------------- +// GetNextIntBoolOpToOptimize: Function used for searching constant INT OR operation that can be folded +// +// Arguments: +// b3 the tree to inspect +// +// Return: +// On success, return the start and end offset of code to optimize and the variables and constants to be folded. +// +// Notes: +// We look for consecutive blocks that have GT_OR, GT_LCL_VAR, GT_CNS_INT nodes. IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) { if (b3 == nullptr) @@ -1805,7 +1822,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) } else { - CleanIntBoolOpDsc(intBoolOpDsc); + ReinitIntBoolOpDsc(intBoolOpDsc); b4 = b4->gtPrev; orOpCount = 0; continue; @@ -1829,7 +1846,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) return intBoolOpDsc; } - CleanIntBoolOpDsc(intBoolOpDsc); + ReinitIntBoolOpDsc(intBoolOpDsc); break; } @@ -1858,7 +1875,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) return intBoolOpDsc; } - CleanIntBoolOpDsc(intBoolOpDsc); + ReinitIntBoolOpDsc(intBoolOpDsc); break; } @@ -1900,6 +1917,15 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) return intBoolOpDsc; } +//----------------------------------------------------------------------------- +// OptimizeIntBoolOp: Procedure that fold constant INT OR operations +// +// Arguments: +// compiler compiler reference +// intBoolOpDsc the reference for INT OR operations to be folded +// +// Notes: +// We recreate nodes so as to eliminate excessive constants void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) { GenTreeOp* intVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, @@ -1987,6 +2013,15 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) } } +//----------------------------------------------------------------------------- +// OptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and fold them +// +// Arguments: +// compiler compiler reference +// b1 the block code to inspect for operations to fold +// +// Return: +// 1 if a block was folded, 0 if not unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) { unsigned int result = 0; @@ -2016,7 +2051,7 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) result++; } - CleanIntBoolOpDsc(intBoolOpDsc); + ReinitIntBoolOpDsc(intBoolOpDsc); free(intBoolOpDsc); } } From ada9f29f8d1d52bbb32bd4d3dd71ca62aafecc9d Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Fri, 1 Mar 2024 01:34:25 +0100 Subject: [PATCH 09/27] add tests --- PimpMyApp/Program.cs | 18 +++++++++++++ .../JIT/opt/OptimizeBools/optboolsreturn.cs | 25 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs index 996a38f3b206ca..9b136bcf24e766 100644 --- a/PimpMyApp/Program.cs +++ b/PimpMyApp/Program.cs @@ -14,6 +14,12 @@ public static int Main() int result = Bambala1(6, 23); result += Bambala2(6, 23, 45, 66, 2); long result2 = Bambala3(6, 23, 45, 66, 2, 2); + + if (Or10Or5(14, 23) != Or15(14, 23) || Or10Or5(78, 11) != Or15(78, 11)) + { + Console.WriteLine("Oups"); + } + return result + (int)result2; } @@ -25,4 +31,16 @@ public static int Main() [MethodImpl(MethodImplOptions.NoInlining)] private static long Bambala3(long m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Or10Or5(int x, int y) + { + return (x | 10) | (y | 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Or15(int x, int y) + { + return (x | y) | 15; + } } diff --git a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs index 93dd6a0991b89d..8775c79599482c 100644 --- a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs +++ b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs @@ -206,6 +206,19 @@ private static bool LessThanZeroBis(int x) { return x <= 0 && x != 0; } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Or10Or5(int x, int y) + { + return (x | 10) | (y | 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Or15(int x, int y) + { + return (x | y) | 15; + } + [MethodImpl(MethodImplOptions.NoInlining)] private static bool AreBothGreatThanZero(int x, int y) { @@ -1116,6 +1129,18 @@ public static int TestEntryPoint() return 101; } + if (Or10Or5(14, 23) != Or15(14, 23)) + { + Console.WriteLine("CBoolTest:Or10Or5(14, 23) failed"); + return 101; + } + + if (Or10Or5(78, 11) != Or15(78, 11)) + { + Console.WriteLine("CBoolTest:Or10Or5(78, 11) failed"); + return 101; + } + Console.WriteLine("PASSED"); return 100; } From f278ba06951618ade236c0b8da48d77f48a2dc79 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Fri, 1 Mar 2024 01:44:25 +0100 Subject: [PATCH 10/27] format --- src/coreclr/jit/optimizebools.cpp | 84 ++++++++++++++++--------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index f2e13e8a479a34..2f18f69dcd40fc 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -31,12 +31,12 @@ struct OptTestInfo struct IntBoolOpDsc { - GenTree** lclVarArr; - int32_t lclVarArrLength; - ssize_t* ctsArray; - int32_t ctsArrayLength; - GenTree* start; - GenTree* end; + GenTree** lclVarArr; + int32_t lclVarArrLength; + ssize_t* ctsArray; + int32_t ctsArrayLength; + GenTree* start; + GenTree* end; }; //----------------------------------------------------------------------------- @@ -1758,7 +1758,7 @@ void ReinitIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) } intBoolOpDsc->start = nullptr; - intBoolOpDsc->end = nullptr; + intBoolOpDsc->end = nullptr; if (intBoolOpDsc->ctsArray != nullptr) { @@ -1795,14 +1795,14 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) return nullptr; } - IntBoolOpDsc* intBoolOpDsc = reinterpret_cast(malloc(sizeof(IntBoolOpDsc))); - intBoolOpDsc->ctsArray = nullptr; - intBoolOpDsc->ctsArrayLength = 0; - intBoolOpDsc->lclVarArr = nullptr; + IntBoolOpDsc* intBoolOpDsc = reinterpret_cast(malloc(sizeof(IntBoolOpDsc))); + intBoolOpDsc->ctsArray = nullptr; + intBoolOpDsc->ctsArrayLength = 0; + intBoolOpDsc->lclVarArr = nullptr; intBoolOpDsc->lclVarArrLength = 0; - intBoolOpDsc->start = nullptr; - intBoolOpDsc->end = nullptr; - int orOpCount = 0; + intBoolOpDsc->start = nullptr; + intBoolOpDsc->end = nullptr; + int orOpCount = 0; GenTree* b4 = b3->gtPrev; while (b4 != nullptr) @@ -1823,7 +1823,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) else { ReinitIntBoolOpDsc(intBoolOpDsc); - b4 = b4->gtPrev; + b4 = b4->gtPrev; orOpCount = 0; continue; } @@ -1853,7 +1853,8 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc->lclVarArrLength++; if (intBoolOpDsc->lclVarArr == nullptr) { - intBoolOpDsc->lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); + intBoolOpDsc->lclVarArr = + reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); } else { @@ -1882,13 +1883,15 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc->ctsArrayLength++; if (intBoolOpDsc->ctsArray == nullptr) { - intBoolOpDsc->ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); + intBoolOpDsc->ctsArray = + reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); } else { - intBoolOpDsc->ctsArray = reinterpret_cast(realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); + intBoolOpDsc->ctsArray = reinterpret_cast( + realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); } - ssize_t constant = b4->AsIntConCommon()->IconValue(); + ssize_t constant = b4->AsIntConCommon()->IconValue(); intBoolOpDsc->ctsArray[intBoolOpDsc->ctsArrayLength - 1] = constant; orOpCount--; break; @@ -1928,9 +1931,9 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) // We recreate nodes so as to eliminate excessive constants void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) { - GenTreeOp* intVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, - intBoolOpDsc->lclVarArr[0], intBoolOpDsc->lclVarArr[1]); - intVarTree->gtPrev = intBoolOpDsc->lclVarArr[1]; + GenTreeOp* intVarTree = + compiler->gtNewOperNode(GT_OR, TYP_INT, intBoolOpDsc->lclVarArr[0], intBoolOpDsc->lclVarArr[1]); + intVarTree->gtPrev = intBoolOpDsc->lclVarArr[1]; intBoolOpDsc->lclVarArr[1]->gtNext = intVarTree; intBoolOpDsc->lclVarArr[1]->gtPrev = intBoolOpDsc->lclVarArr[0]; intBoolOpDsc->lclVarArr[0]->gtNext = intBoolOpDsc->lclVarArr[1]; @@ -1946,11 +1949,11 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) for (int i = 2; i < intBoolOpDsc->lclVarArrLength; i++) { GenTreeOp* newIntVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, intBoolOpDsc->lclVarArr[i]); - newIntVarTree->gtPrev = intBoolOpDsc->lclVarArr[i]; + newIntVarTree->gtPrev = intBoolOpDsc->lclVarArr[i]; intBoolOpDsc->lclVarArr[i]->gtNext = newIntVarTree; intBoolOpDsc->lclVarArr[i]->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = intBoolOpDsc->lclVarArr[i]; - tempIntVatTree = newIntVarTree; + tempIntVatTree->gtNext = intBoolOpDsc->lclVarArr[i]; + tempIntVatTree = newIntVarTree; } size_t optimizedCst = 0; @@ -1960,13 +1963,13 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) } GenTreeIntCon* optimizedCstTree = compiler->gtNewIconNode(optimizedCst, TYP_INT); - GenTreeOp* optimizedTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); - optimizedTree->gtPrev = optimizedCstTree; - optimizedCstTree->gtNext = optimizedTree; - optimizedCstTree->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = optimizedCstTree; - intBoolOpDsc->start->gtPrev = optimizedTree; - optimizedTree->gtNext = intBoolOpDsc->start; + GenTreeOp* optimizedTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); + optimizedTree->gtPrev = optimizedCstTree; + optimizedCstTree->gtNext = optimizedTree; + optimizedCstTree->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = optimizedCstTree; + intBoolOpDsc->start->gtPrev = optimizedTree; + optimizedTree->gtNext = intBoolOpDsc->start; if (intBoolOpDsc->start->OperIsUnary()) { @@ -1984,10 +1987,10 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) } else if (intBoolOpDsc->start->gtNext->OperIs(GT_CALL)) { - GenTreeCall* call = intBoolOpDsc->start->gtNext->AsCall(); - IteratorPair args = call->gtArgs.Args(); - CallArgs::ArgIterator nextArg = args.begin(); - CallArg* nextCallArg = nextArg.GetArg(); + GenTreeCall* call = intBoolOpDsc->start->gtNext->AsCall(); + IteratorPair args = call->gtArgs.Args(); + CallArgs::ArgIterator nextArg = args.begin(); + CallArg* nextCallArg = nextArg.GetArg(); if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) { @@ -1995,7 +1998,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) } else { - nextArg = nextArg.operator++(); + nextArg = nextArg.operator++(); nextCallArg = nextArg.GetArg(); while (nextCallArg != nullptr) { @@ -2005,7 +2008,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) break; } - nextArg = nextArg.operator++(); + nextArg = nextArg.operator++(); nextCallArg = nextArg.GetArg(); } } @@ -2025,7 +2028,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) { unsigned int result = 0; - Statement* b2 = b1->firstStmt(); + Statement* b2 = b1->firstStmt(); if (b2 != nullptr) { GenTree* b3 = b2->GetRootNode(); @@ -2038,8 +2041,7 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) return 0; } - if (intBoolOpDsc->ctsArrayLength >= 2 - && intBoolOpDsc->lclVarArrLength >= 2) + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { OptimizeIntBoolOp(compiler, intBoolOpDsc); From 5a877b98feed22ea2bf0124c6a7716eab135cfc4 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Fri, 1 Mar 2024 01:45:08 +0100 Subject: [PATCH 11/27] delete tests folder --- PimpMyApp/PimpMyApp.csproj | 10 --------- PimpMyApp/Program.cs | 46 -------------------------------------- 2 files changed, 56 deletions(-) delete mode 100644 PimpMyApp/PimpMyApp.csproj delete mode 100644 PimpMyApp/Program.cs diff --git a/PimpMyApp/PimpMyApp.csproj b/PimpMyApp/PimpMyApp.csproj deleted file mode 100644 index 3477df059b152e..00000000000000 --- a/PimpMyApp/PimpMyApp.csproj +++ /dev/null @@ -1,10 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - \ No newline at end of file diff --git a/PimpMyApp/Program.cs b/PimpMyApp/Program.cs deleted file mode 100644 index 9b136bcf24e766..00000000000000 --- a/PimpMyApp/Program.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.CompilerServices; -using System; - -namespace PimpMyApp; - -public class Program -{ - public static int Main() - { - Console.WriteLine("Hi"); - int result = Bambala1(6, 23); - result += Bambala2(6, 23, 45, 66, 2); - long result2 = Bambala3(6, 23, 45, 66, 2, 2); - - if (Or10Or5(14, 23) != Or15(14, 23) || Or10Or5(78, 11) != Or15(78, 11)) - { - Console.WriteLine("Oups"); - } - - return result + (int)result2; - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static int Bambala1(int x, int y) => (x | 3) | (y | 5); - - [MethodImpl(MethodImplOptions.NoInlining)] - private static int Bambala2(int p, int u, int x, int y, int z) => ((u | 2) | (x | 5) | (y | 3) | (z | 6)) * p + ((x | 6) | (u | 7)); - - [MethodImpl(MethodImplOptions.NoInlining)] - private static long Bambala3(long m, int p, int u, int x, int y, int z) => m * System.Math.BigMul(((u | 2) | (x | 5) | (y | 3) | (z | 6)), p); - - [MethodImpl(MethodImplOptions.NoInlining)] - private static int Or10Or5(int x, int y) - { - return (x | 10) | (y | 5); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static int Or15(int x, int y) - { - return (x | y) | 15; - } -} From 02e748e4cfe794ea41e53cc0e174bf2743b03194 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 3 Mar 2024 19:43:22 +0100 Subject: [PATCH 12/27] fix the case where the second param is to opt --- src/coreclr/jit/optimizebools.cpp | 83 +++++++++++++++++++------------ 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 2f18f69dcd40fc..7547763a836e64 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1802,9 +1802,15 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc->lclVarArrLength = 0; intBoolOpDsc->start = nullptr; intBoolOpDsc->end = nullptr; - int orOpCount = 0; + int orOpCount = 1; GenTree* b4 = b3->gtPrev; + + while (b4 != nullptr && !b4->OperIs(GT_OR)) + { + b4 = b4->gtPrev; + } + while (b4 != nullptr) { if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) @@ -1897,16 +1903,20 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) break; } case GT_OR: - if (b4->gtPrev != nullptr && b4->gtPrev->OperIs(GT_CNS_INT, GT_LCL_VAR)) + if (orOpCount <= 0) { - orOpCount++; - - if (b4->gtPrev->gtPrev != nullptr && b4->gtPrev->gtPrev->OperIs(GT_CNS_INT, GT_LCL_VAR)) + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { - orOpCount++; + intBoolOpDsc->end = b4; + return intBoolOpDsc; } + + ReinitIntBoolOpDsc(intBoolOpDsc); + orOpCount = 1; + intBoolOpDsc->start = b4->gtNext; } + orOpCount++; break; default: { @@ -1979,45 +1989,54 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) { intBoolOpDsc->start->AsOp()->gtOp2 = optimizedTree; } - else if (intBoolOpDsc->start->gtNext != nullptr) + else if (intBoolOpDsc->start->gtNext != nullptr && intBoolOpDsc->start->gtNext->OperIsBinary()) { - if (intBoolOpDsc->start->gtNext->OperIsBinary()) - { - intBoolOpDsc->start->gtNext->AsOp()->gtOp1 = optimizedTree; - } - else if (intBoolOpDsc->start->gtNext->OperIs(GT_CALL)) + intBoolOpDsc->start->gtNext->AsOp()->gtOp1 = optimizedTree; + } + else + { + GenTree* functionCallCandidate = intBoolOpDsc->start; + bool parameterAssigningDone = false; + while (functionCallCandidate != nullptr && !parameterAssigningDone) { - GenTreeCall* call = intBoolOpDsc->start->gtNext->AsCall(); - IteratorPair args = call->gtArgs.Args(); - CallArgs::ArgIterator nextArg = args.begin(); - CallArg* nextCallArg = nextArg.GetArg(); - - if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) - { - nextCallArg->SetLateNode(optimizedTree); - } - else + if (functionCallCandidate->OperIs(GT_CALL)) { - nextArg = nextArg.operator++(); - nextCallArg = nextArg.GetArg(); - while (nextCallArg != nullptr) - { - if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) - { - nextCallArg->SetLateNode(optimizedTree); - break; - } + GenTreeCall* call = functionCallCandidate->AsCall(); + IteratorPair args = call->gtArgs.Args(); + CallArgs::ArgIterator nextArg = args.begin(); + CallArg* nextCallArg = nextArg.GetArg(); + if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + { + nextCallArg->SetLateNode(optimizedTree); + parameterAssigningDone = true; + } + else + { nextArg = nextArg.operator++(); nextCallArg = nextArg.GetArg(); + while (nextCallArg != nullptr) + { + if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + { + nextCallArg->SetLateNode(optimizedTree); + parameterAssigningDone = true; + break; + } + + nextArg = nextArg.operator++(); + nextCallArg = nextArg.GetArg(); + } } } + + functionCallCandidate = functionCallCandidate->gtNext; } } } //----------------------------------------------------------------------------- -// OptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and fold them +// TryOptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and if found, folds them // // Arguments: // compiler compiler reference From 47543c0cadd53740ceeffaf27e8f19b0e54fc9b5 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 10 Mar 2024 18:58:40 +0100 Subject: [PATCH 13/27] refactor GetNextIntBoolOpToOptimize --- src/coreclr/jit/optimizebools.cpp | 170 ++++++++++++++++-------------- 1 file changed, 89 insertions(+), 81 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 7547763a836e64..cf31915eabb50d 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1777,6 +1777,25 @@ void ReinitIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) intBoolOpDsc->lclVarArrLength = 0; } +//----------------------------------------------------------------------------- +// GetNextOrOp: Function used for searching the next GT_OR node +// +// Arguments: +// b3 the tree to inspect +// +// Return: +// On success, return the next GT_OR node or nullptr if it fails +// +GenTree* GetNextOrOp(GenTree* b4) +{ + while (b4 != nullptr && !b4->OperIs(GT_OR)) + { + b4 = b4->gtPrev; + } + + return b4; +} + //----------------------------------------------------------------------------- // GetNextIntBoolOpToOptimize: Function used for searching constant INT OR operation that can be folded // @@ -1802,25 +1821,20 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc->lclVarArrLength = 0; intBoolOpDsc->start = nullptr; intBoolOpDsc->end = nullptr; - int orOpCount = 1; + int orOpCount = 2; - GenTree* b4 = b3->gtPrev; + GenTree* b4 = GetNextOrOp(b3->gtPrev); - while (b4 != nullptr && !b4->OperIs(GT_OR)) + if (b4 != nullptr) { - b4 = b4->gtPrev; + intBoolOpDsc->start = b4->gtNext; + b4 = b4->gtPrev; } while (b4 != nullptr) { if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) { - if (intBoolOpDsc->start == nullptr) - { - b4 = b4->gtPrev; - continue; - } - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { intBoolOpDsc->end = b4; @@ -1829,98 +1843,92 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) else { ReinitIntBoolOpDsc(intBoolOpDsc); - b4 = b4->gtPrev; - orOpCount = 0; - continue; - } - } + b4 = GetNextOrOp(b4); - if (intBoolOpDsc->start == nullptr) - { - intBoolOpDsc->start = b4->gtNext; + if (b4 != nullptr) + { + orOpCount = 2; + intBoolOpDsc->start = b4->gtNext; + } + else + { + break; + } + } } - switch (b4->gtOper) + if (orOpCount <= 0) { - case GT_LCL_VAR: + if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) { - if (orOpCount <= 0) - { - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) - { - intBoolOpDsc->end = b4; - return intBoolOpDsc; - } + intBoolOpDsc->end = b4; + return intBoolOpDsc; + } - ReinitIntBoolOpDsc(intBoolOpDsc); - break; - } + ReinitIntBoolOpDsc(intBoolOpDsc); - intBoolOpDsc->lclVarArrLength++; - if (intBoolOpDsc->lclVarArr == nullptr) - { - intBoolOpDsc->lclVarArr = - reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); - } - else - { - intBoolOpDsc->lclVarArr = reinterpret_cast( - realloc(intBoolOpDsc->lclVarArr, sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); - } + if (!b4->OperIs(GT_OR)) + { + b4 = GetNextOrOp(b4); + } - intBoolOpDsc->lclVarArr[intBoolOpDsc->lclVarArrLength - 1] = b4; - orOpCount--; + if (b4 != nullptr) + { + orOpCount = 2; + intBoolOpDsc->start = b4->gtNext; + } + else + { break; } - case GT_CNS_INT: + } + else + { + switch (b4->gtOper) { - if (orOpCount <= 0) + case GT_LCL_VAR: { - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) + intBoolOpDsc->lclVarArrLength++; + if (intBoolOpDsc->lclVarArr == nullptr) { - intBoolOpDsc->end = b4; - return intBoolOpDsc; + intBoolOpDsc->lclVarArr = + reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); + } + else + { + intBoolOpDsc->lclVarArr = reinterpret_cast( + realloc(intBoolOpDsc->lclVarArr, sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); } - ReinitIntBoolOpDsc(intBoolOpDsc); + intBoolOpDsc->lclVarArr[intBoolOpDsc->lclVarArrLength - 1] = b4; + orOpCount--; break; } - - intBoolOpDsc->ctsArrayLength++; - if (intBoolOpDsc->ctsArray == nullptr) - { - intBoolOpDsc->ctsArray = - reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); - } - else - { - intBoolOpDsc->ctsArray = reinterpret_cast( - realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); - } - ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc->ctsArray[intBoolOpDsc->ctsArrayLength - 1] = constant; - orOpCount--; - break; - } - case GT_OR: - if (orOpCount <= 0) + case GT_CNS_INT: { - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) + intBoolOpDsc->ctsArrayLength++; + if (intBoolOpDsc->ctsArray == nullptr) { - intBoolOpDsc->end = b4; - return intBoolOpDsc; + intBoolOpDsc->ctsArray = + reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); } - - ReinitIntBoolOpDsc(intBoolOpDsc); - orOpCount = 1; - intBoolOpDsc->start = b4->gtNext; + else + { + intBoolOpDsc->ctsArray = reinterpret_cast( + realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); + } + ssize_t constant = b4->AsIntConCommon()->IconValue(); + intBoolOpDsc->ctsArray[intBoolOpDsc->ctsArrayLength - 1] = constant; + orOpCount--; + break; + } + case GT_OR: + orOpCount++; + break; + default: + { + break; } - - orOpCount++; - break; - default: - { - break; } } From 0c71f6782c87caa575df462c6cdc287114eab045 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 29 Mar 2025 12:50:14 +0100 Subject: [PATCH 14/27] fix merge issue 1 --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/optimizebools.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4571efd26adb0b..6b8596da9e87fc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -83,6 +83,7 @@ class SpanningTreeVisitor; // defined in fgprofile.cpp class CSE_DataFlow; // defined in optcse.cpp struct CSEdsc; // defined in optcse.h class CSE_HeuristicCommon; // defined in optcse.h +struct IntBoolOpDsc; // defined in optimizer.cpp class OptBoolsDsc; // defined in optimizer.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp class ProfileSynthesis; // defined in profilesynthesis.h diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 9d16bd18892748..197f62ab264407 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1970,7 +1970,6 @@ PhaseStatus Compiler::optOptimizeBools() { if (TryOptimizeIntBoolOp(this, b1) > 0) { - numReturn++; retry = true; } From 9cb0eeb8435d6dc32fd3d73b648b0db7f890ae9f Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 29 Mar 2025 22:32:50 +0100 Subject: [PATCH 15/27] address reviewer remark --- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/optimizebools.cpp | 184 ++++++++++++++++++------------ 2 files changed, 112 insertions(+), 75 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6b8596da9e87fc..73e47693fe9ec8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -83,7 +83,7 @@ class SpanningTreeVisitor; // defined in fgprofile.cpp class CSE_DataFlow; // defined in optcse.cpp struct CSEdsc; // defined in optcse.h class CSE_HeuristicCommon; // defined in optcse.h -struct IntBoolOpDsc; // defined in optimizer.cpp +class IntBoolOpDsc; // defined in optimizer.cpp class OptBoolsDsc; // defined in optimizer.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp class ProfileSynthesis; // defined in profilesynthesis.h @@ -5386,6 +5386,7 @@ class Compiler FoldResult fgFoldConditional(BasicBlock* block); bool fgFoldCondToReturnBlock(BasicBlock* block); + unsigned int TryOptimizeIntBoolOp(BasicBlock* block); struct MorphUnreachableInfo { diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 197f62ab264407..179e4998b28f5f 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -57,14 +57,23 @@ struct OptTestInfo } }; -struct IntBoolOpDsc +class IntBoolOpDsc { +private: GenTree** lclVarArr; int32_t lclVarArrLength; ssize_t* ctsArray; int32_t ctsArrayLength; GenTree* start; GenTree* end; + Compiler* m_comp; + +public: + static IntBoolOpDsc* GetNextIntBoolOp(GenTree* b3, Compiler* comp); + bool TryOptimize(); + void Reinit(); + bool EndIsNull(); + GenTree* GetLclVarArrayFirst(); }; //----------------------------------------------------------------------------- @@ -1449,36 +1458,28 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) } //----------------------------------------------------------------------------- -// ReinitIntBoolOpDsc: Procedure that reinitialize IntBoolOpDsc reference +// Reinit: Procedure that reinitialize IntBoolOpDsc reference // -// Arguments: -// intBoolOpDsc the reference for INT OR operations to be folded -// -void ReinitIntBoolOpDsc(IntBoolOpDsc* intBoolOpDsc) +void IntBoolOpDsc::Reinit() { - if (intBoolOpDsc == nullptr) - { - return; - } - - intBoolOpDsc->start = nullptr; - intBoolOpDsc->end = nullptr; + start = nullptr; + end = nullptr; - if (intBoolOpDsc->ctsArray != nullptr) + if (ctsArray != nullptr) { - free(intBoolOpDsc->ctsArray); - intBoolOpDsc->ctsArray = nullptr; + free(ctsArray); + ctsArray = nullptr; } - intBoolOpDsc->ctsArrayLength = 0; + ctsArrayLength = 0; - if (intBoolOpDsc->lclVarArr != nullptr) + if (lclVarArr != nullptr) { - free(intBoolOpDsc->lclVarArr); - intBoolOpDsc->lclVarArr = nullptr; + free(lclVarArr); + lclVarArr = nullptr; } - intBoolOpDsc->lclVarArrLength = 0; + lclVarArrLength = 0; } //----------------------------------------------------------------------------- @@ -1501,17 +1502,18 @@ GenTree* GetNextOrOp(GenTree* b4) } //----------------------------------------------------------------------------- -// GetNextIntBoolOpToOptimize: Function used for searching constant INT OR operation that can be folded +// GetNextIntBoolOp: Function used for searching constant INT OR operation that can be folded // // Arguments: // b3 the tree to inspect +// compiler compiler reference // // Return: // On success, return the start and end offset of code to optimize and the variables and constants to be folded. // // Notes: // We look for consecutive blocks that have GT_OR, GT_LCL_VAR, GT_CNS_INT nodes. -IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) +IntBoolOpDsc* IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) { if (b3 == nullptr) { @@ -1525,6 +1527,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) intBoolOpDsc->lclVarArrLength = 0; intBoolOpDsc->start = nullptr; intBoolOpDsc->end = nullptr; + intBoolOpDsc->m_comp = comp; int orOpCount = 2; GenTree* b4 = GetNextOrOp(b3->gtPrev); @@ -1546,7 +1549,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) } else { - ReinitIntBoolOpDsc(intBoolOpDsc); + intBoolOpDsc->Reinit(); b4 = GetNextOrOp(b4); if (b4 != nullptr) @@ -1569,7 +1572,7 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) return intBoolOpDsc; } - ReinitIntBoolOpDsc(intBoolOpDsc); + intBoolOpDsc->Reinit(); if (!b4->OperIs(GT_OR)) { @@ -1643,71 +1646,78 @@ IntBoolOpDsc* GetNextIntBoolOpToOptimize(GenTree* b3) } //----------------------------------------------------------------------------- -// OptimizeIntBoolOp: Procedure that fold constant INT OR operations +// TryOptimize: Procedure that fold constant INT OR operations // // Arguments: // compiler compiler reference // intBoolOpDsc the reference for INT OR operations to be folded // +// Return: +// True if it could optimize the operation and false elsewhere +// // Notes: // We recreate nodes so as to eliminate excessive constants -void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) +bool IntBoolOpDsc::TryOptimize() { - GenTreeOp* intVarTree = - compiler->gtNewOperNode(GT_OR, TYP_INT, intBoolOpDsc->lclVarArr[0], intBoolOpDsc->lclVarArr[1]); - intVarTree->gtPrev = intBoolOpDsc->lclVarArr[1]; - intBoolOpDsc->lclVarArr[1]->gtNext = intVarTree; - intBoolOpDsc->lclVarArr[1]->gtPrev = intBoolOpDsc->lclVarArr[0]; - intBoolOpDsc->lclVarArr[0]->gtNext = intBoolOpDsc->lclVarArr[1]; - intBoolOpDsc->lclVarArr[0]->gtPrev = intBoolOpDsc->end; + if (ctsArrayLength < 2 || lclVarArrLength < 2) + { + return false; + } + + GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, lclVarArr[0], lclVarArr[1]); + intVarTree->gtPrev = lclVarArr[1]; + lclVarArr[1]->gtNext = intVarTree; + lclVarArr[1]->gtPrev = lclVarArr[0]; + lclVarArr[0]->gtNext = lclVarArr[1]; + lclVarArr[0]->gtPrev = end; - if (intBoolOpDsc->end != nullptr) + if (end != nullptr) { - intBoolOpDsc->end->gtNext = intBoolOpDsc->lclVarArr[0]; + end->gtNext = lclVarArr[0]; } GenTree* tempIntVatTree = intVarTree; - for (int i = 2; i < intBoolOpDsc->lclVarArrLength; i++) + for (int i = 2; i < lclVarArrLength; i++) { - GenTreeOp* newIntVarTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, intBoolOpDsc->lclVarArr[i]); - newIntVarTree->gtPrev = intBoolOpDsc->lclVarArr[i]; - intBoolOpDsc->lclVarArr[i]->gtNext = newIntVarTree; - intBoolOpDsc->lclVarArr[i]->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = intBoolOpDsc->lclVarArr[i]; - tempIntVatTree = newIntVarTree; + GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, lclVarArr[i]); + newIntVarTree->gtPrev = lclVarArr[i]; + lclVarArr[i]->gtNext = newIntVarTree; + lclVarArr[i]->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = lclVarArr[i]; + tempIntVatTree = newIntVarTree; } size_t optimizedCst = 0; - for (int i = 0; i < intBoolOpDsc->ctsArrayLength; i++) + for (int i = 0; i < ctsArrayLength; i++) { - optimizedCst = optimizedCst | intBoolOpDsc->ctsArray[i]; + optimizedCst = optimizedCst | ctsArray[i]; } - GenTreeIntCon* optimizedCstTree = compiler->gtNewIconNode(optimizedCst, TYP_INT); - GenTreeOp* optimizedTree = compiler->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); + GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, TYP_INT); + GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); optimizedTree->gtPrev = optimizedCstTree; optimizedCstTree->gtNext = optimizedTree; optimizedCstTree->gtPrev = tempIntVatTree; tempIntVatTree->gtNext = optimizedCstTree; - intBoolOpDsc->start->gtPrev = optimizedTree; - optimizedTree->gtNext = intBoolOpDsc->start; + start->gtPrev = optimizedTree; + optimizedTree->gtNext = start; - if (intBoolOpDsc->start->OperIsUnary()) + if (start->OperIsUnary()) { - intBoolOpDsc->start->AsOp()->gtOp1 = optimizedTree; + start->AsOp()->gtOp1 = optimizedTree; } - else if (intBoolOpDsc->start->OperIsBinary()) + else if (start->OperIsBinary()) { - intBoolOpDsc->start->AsOp()->gtOp2 = optimizedTree; + start->AsOp()->gtOp2 = optimizedTree; } - else if (intBoolOpDsc->start->gtNext != nullptr && intBoolOpDsc->start->gtNext->OperIsBinary()) + else if (start->gtNext != nullptr && start->gtNext->OperIsBinary()) { - intBoolOpDsc->start->gtNext->AsOp()->gtOp1 = optimizedTree; + start->gtNext->AsOp()->gtOp1 = optimizedTree; } else { - GenTree* functionCallCandidate = intBoolOpDsc->start; + GenTree* functionCallCandidate = start; bool parameterAssigningDone = false; while (functionCallCandidate != nullptr && !parameterAssigningDone) { @@ -1718,7 +1728,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) CallArgs::ArgIterator nextArg = args.begin(); CallArg* nextCallArg = nextArg.GetArg(); - if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + if (nextCallArg->GetNode()->gtNext == start) { nextCallArg->SetLateNode(optimizedTree); parameterAssigningDone = true; @@ -1729,7 +1739,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) nextCallArg = nextArg.GetArg(); while (nextCallArg != nullptr) { - if (nextCallArg->GetNode()->gtNext == intBoolOpDsc->start) + if (nextCallArg->GetNode()->gtNext == start) { nextCallArg->SetLateNode(optimizedTree); parameterAssigningDone = true; @@ -1745,6 +1755,33 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) functionCallCandidate = functionCallCandidate->gtNext; } } + + return true; +} + +//----------------------------------------------------------------------------- +// EndIsNull: Function that checks whether the end of operation is null +// +// Return: +// true if end is null and false elsewhere +bool IntBoolOpDsc::EndIsNull() +{ + return end == nullptr; +} + +//----------------------------------------------------------------------------- +// GetLclVarArrayFirst: Function that returns the first lcl var of operation if it exists +// +// Return: +// first lcl var of operation if it exists and nullptr elsewhere +GenTree* IntBoolOpDsc::GetLclVarArrayFirst() +{ + if (lclVarArrLength > 0 && lclVarArr != nullptr) + { + return lclVarArr[0]; + } + + return nullptr; } //----------------------------------------------------------------------------- @@ -1756,7 +1793,7 @@ void OptimizeIntBoolOp(Compiler* compiler, IntBoolOpDsc* intBoolOpDsc) // // Return: // 1 if a block was folded, 0 if not -unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) +unsigned int Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) { unsigned int result = 0; Statement* b2 = b1->firstStmt(); @@ -1765,26 +1802,24 @@ unsigned int TryOptimizeIntBoolOp(Compiler* compiler, BasicBlock* b1) GenTree* b3 = b2->GetRootNode(); if (b3 != nullptr && b3->OperIs(GT_RETURN)) { - IntBoolOpDsc* intBoolOpDsc = GetNextIntBoolOpToOptimize(b3); + IntBoolOpDsc* intBoolOpDsc = IntBoolOpDsc::GetNextIntBoolOp(b3, this); if (intBoolOpDsc == nullptr) { return 0; } - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) + if (intBoolOpDsc->TryOptimize()) { - OptimizeIntBoolOp(compiler, intBoolOpDsc); - - if (intBoolOpDsc->end == nullptr) + if (intBoolOpDsc->EndIsNull()) { - b2->SetTreeList(intBoolOpDsc->lclVarArr[0]); + b2->SetTreeList(intBoolOpDsc->GetLclVarArrayFirst()); } result++; } - ReinitIntBoolOpDsc(intBoolOpDsc); + intBoolOpDsc->Reinit(); free(intBoolOpDsc); } } @@ -1959,23 +1994,24 @@ PhaseStatus Compiler::optOptimizeBools() numCond++; } - // We're only interested in conditional jumps here - - if (!b1->KindIs(BBJ_COND, BBJ_RETURN)) - { - continue; - } - if (b1->KindIs(BBJ_RETURN)) { - if (TryOptimizeIntBoolOp(this, b1) > 0) + if (TryOptimizeIntBoolOp(b1) > 0) { + numCond++; retry = true; } continue; } + // We're only interested in conditional jumps here + + if (!b1->KindIs(BBJ_COND)) + { + continue; + } + // If there is no next block, we're done BasicBlock* b2 = b1->GetFalseTarget(); From ad9f590a243d90ee53bb7c73f52f1f9c697498e0 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 30 Mar 2025 10:27:19 +0200 Subject: [PATCH 16/27] some refactos --- src/coreclr/jit/optimizebools.cpp | 175 ++++++++++++++++++------------ 1 file changed, 104 insertions(+), 71 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 179e4998b28f5f..124e2acc8e7015 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -59,21 +59,41 @@ struct OptTestInfo class IntBoolOpDsc { +private: + IntBoolOpDsc(Compiler* comp) + { + ctsArray = nullptr; + ctsArrayLength = 0; + ctsArrayCapacity = 0; + lclVarArr = nullptr; + lclVarArrLength = 0; + lclVarArrCapacity = 0; + start = nullptr; + end = nullptr; + m_comp = comp; + } + private: GenTree** lclVarArr; int32_t lclVarArrLength; + int32_t lclVarArrCapacity; ssize_t* ctsArray; int32_t ctsArrayLength; + int32_t ctsArrayCapacity; GenTree* start; GenTree* end; Compiler* m_comp; public: - static IntBoolOpDsc* GetNextIntBoolOp(GenTree* b3, Compiler* comp); - bool TryOptimize(); - void Reinit(); - bool EndIsNull(); - GenTree* GetLclVarArrayFirst(); + static IntBoolOpDsc GetNextIntBoolOp(GenTree* b3, Compiler* comp); + bool TryOptimize(); + void Reinit(); + bool EndIsNull(); + GenTree* GetLclVarArrayFirst(); + +private: + void AppendToLclVarArray(GenTree* b3); + void AppendToCtsArray(ssize_t b3); }; //----------------------------------------------------------------------------- @@ -1471,7 +1491,8 @@ void IntBoolOpDsc::Reinit() ctsArray = nullptr; } - ctsArrayLength = 0; + ctsArrayLength = 0; + ctsArrayCapacity = 0; if (lclVarArr != nullptr) { @@ -1479,7 +1500,8 @@ void IntBoolOpDsc::Reinit() lclVarArr = nullptr; } - lclVarArrLength = 0; + lclVarArrLength = 0; + lclVarArrCapacity = 0; } //----------------------------------------------------------------------------- @@ -1513,49 +1535,37 @@ GenTree* GetNextOrOp(GenTree* b4) // // Notes: // We look for consecutive blocks that have GT_OR, GT_LCL_VAR, GT_CNS_INT nodes. -IntBoolOpDsc* IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) +IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) { - if (b3 == nullptr) - { - return nullptr; - } - - IntBoolOpDsc* intBoolOpDsc = reinterpret_cast(malloc(sizeof(IntBoolOpDsc))); - intBoolOpDsc->ctsArray = nullptr; - intBoolOpDsc->ctsArrayLength = 0; - intBoolOpDsc->lclVarArr = nullptr; - intBoolOpDsc->lclVarArrLength = 0; - intBoolOpDsc->start = nullptr; - intBoolOpDsc->end = nullptr; - intBoolOpDsc->m_comp = comp; - int orOpCount = 2; + IntBoolOpDsc intBoolOpDsc = IntBoolOpDsc(comp); + int orOpCount = 2; GenTree* b4 = GetNextOrOp(b3->gtPrev); if (b4 != nullptr) { - intBoolOpDsc->start = b4->gtNext; - b4 = b4->gtPrev; + intBoolOpDsc.start = b4->gtNext; + b4 = b4->gtPrev; } while (b4 != nullptr) { if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) { - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) + if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) { - intBoolOpDsc->end = b4; + intBoolOpDsc.end = b4; return intBoolOpDsc; } else { - intBoolOpDsc->Reinit(); + intBoolOpDsc.Reinit(); b4 = GetNextOrOp(b4); if (b4 != nullptr) { - orOpCount = 2; - intBoolOpDsc->start = b4->gtNext; + orOpCount = 2; + intBoolOpDsc.start = b4->gtNext; } else { @@ -1566,13 +1576,13 @@ IntBoolOpDsc* IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) if (orOpCount <= 0) { - if (intBoolOpDsc->ctsArrayLength >= 2 && intBoolOpDsc->lclVarArrLength >= 2) + if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) { - intBoolOpDsc->end = b4; + intBoolOpDsc.end = b4; return intBoolOpDsc; } - intBoolOpDsc->Reinit(); + intBoolOpDsc.Reinit(); if (!b4->OperIs(GT_OR)) { @@ -1581,8 +1591,8 @@ IntBoolOpDsc* IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) if (b4 != nullptr) { - orOpCount = 2; - intBoolOpDsc->start = b4->gtNext; + orOpCount = 2; + intBoolOpDsc.start = b4->gtNext; } else { @@ -1595,37 +1605,14 @@ IntBoolOpDsc* IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) { case GT_LCL_VAR: { - intBoolOpDsc->lclVarArrLength++; - if (intBoolOpDsc->lclVarArr == nullptr) - { - intBoolOpDsc->lclVarArr = - reinterpret_cast(malloc(sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); - } - else - { - intBoolOpDsc->lclVarArr = reinterpret_cast( - realloc(intBoolOpDsc->lclVarArr, sizeof(GenTree*) * intBoolOpDsc->lclVarArrLength)); - } - - intBoolOpDsc->lclVarArr[intBoolOpDsc->lclVarArrLength - 1] = b4; + intBoolOpDsc.AppendToLclVarArray(b4); orOpCount--; break; } case GT_CNS_INT: { - intBoolOpDsc->ctsArrayLength++; - if (intBoolOpDsc->ctsArray == nullptr) - { - intBoolOpDsc->ctsArray = - reinterpret_cast(malloc(sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); - } - else - { - intBoolOpDsc->ctsArray = reinterpret_cast( - realloc(intBoolOpDsc->ctsArray, sizeof(ssize_t) * intBoolOpDsc->ctsArrayLength)); - } - ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc->ctsArray[intBoolOpDsc->ctsArrayLength - 1] = constant; + ssize_t constant = b4->AsIntConCommon()->IconValue(); + intBoolOpDsc.AppendToCtsArray(constant); orOpCount--; break; } @@ -1784,6 +1771,58 @@ GenTree* IntBoolOpDsc::GetLclVarArrayFirst() return nullptr; } +//----------------------------------------------------------------------------- +// AppendToLclVarArray: Append the lcl var tree to lcl var arrays +// +// Arguments: +// tree lcl var tree +// +void IntBoolOpDsc::AppendToLclVarArray(GenTree* tree) +{ + if (lclVarArrLength == lclVarArrCapacity) + { + if (lclVarArrCapacity == 0) + { + lclVarArrCapacity = 4; + lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * lclVarArrCapacity)); + } + else + { + lclVarArrCapacity = lclVarArrCapacity * 2; + lclVarArr = reinterpret_cast(realloc(lclVarArr, sizeof(GenTree*) * lclVarArrCapacity)); + } + } + + lclVarArrLength++; + lclVarArr[lclVarArrLength - 1] = tree; +} + +//----------------------------------------------------------------------------- +// AppendToCtsArray: Append the constant to constant arrays +// +// Arguments: +// cts constant value +// +void IntBoolOpDsc::AppendToCtsArray(ssize_t cts) +{ + if (ctsArrayLength == ctsArrayCapacity) + { + if (ctsArrayCapacity == 0) + { + ctsArrayCapacity = 4; + ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * ctsArrayCapacity)); + } + else + { + ctsArrayCapacity = ctsArrayCapacity * 2; + ctsArray = reinterpret_cast(realloc(ctsArray, sizeof(ssize_t) * ctsArrayCapacity)); + } + } + + ctsArrayLength++; + ctsArray[ctsArrayLength - 1] = cts; +} + //----------------------------------------------------------------------------- // TryOptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and if found, folds them // @@ -1802,25 +1841,19 @@ unsigned int Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) GenTree* b3 = b2->GetRootNode(); if (b3 != nullptr && b3->OperIs(GT_RETURN)) { - IntBoolOpDsc* intBoolOpDsc = IntBoolOpDsc::GetNextIntBoolOp(b3, this); - - if (intBoolOpDsc == nullptr) - { - return 0; - } + IntBoolOpDsc intBoolOpDsc = IntBoolOpDsc::GetNextIntBoolOp(b3, this); - if (intBoolOpDsc->TryOptimize()) + if (intBoolOpDsc.TryOptimize()) { - if (intBoolOpDsc->EndIsNull()) + if (intBoolOpDsc.EndIsNull()) { - b2->SetTreeList(intBoolOpDsc->GetLclVarArrayFirst()); + b2->SetTreeList(intBoolOpDsc.GetLclVarArrayFirst()); } result++; } - intBoolOpDsc->Reinit(); - free(intBoolOpDsc); + intBoolOpDsc.Reinit(); } } From 0adbb6e75694fb586bc479e4d39fadb4cdd3f7cc Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 30 Mar 2025 12:14:43 +0200 Subject: [PATCH 17/27] avoid freeing array aggressively --- src/coreclr/jit/optimizebools.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 124e2acc8e7015..4b475b31ae7897 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -87,11 +87,12 @@ class IntBoolOpDsc public: static IntBoolOpDsc GetNextIntBoolOp(GenTree* b3, Compiler* comp); bool TryOptimize(); - void Reinit(); bool EndIsNull(); GenTree* GetLclVarArrayFirst(); + void Free(); private: + void Reinit(); void AppendToLclVarArray(GenTree* b3); void AppendToCtsArray(ssize_t b3); }; @@ -1481,6 +1482,17 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) // Reinit: Procedure that reinitialize IntBoolOpDsc reference // void IntBoolOpDsc::Reinit() +{ + start = nullptr; + end = nullptr; + ctsArrayLength = 0; + lclVarArrLength = 0; +} + +//----------------------------------------------------------------------------- +// Free: Procedure that frees IntBoolOpDsc reference +// +void IntBoolOpDsc::Free() { start = nullptr; end = nullptr; @@ -1633,7 +1645,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) } //----------------------------------------------------------------------------- -// TryOptimize: Procedure that fold constant INT OR operations +// TryOptimize: Function that fold constant INT OR operations // // Arguments: // compiler compiler reference @@ -1772,7 +1784,7 @@ GenTree* IntBoolOpDsc::GetLclVarArrayFirst() } //----------------------------------------------------------------------------- -// AppendToLclVarArray: Append the lcl var tree to lcl var arrays +// AppendToLclVarArray: Procedure that append the lcl var tree to lcl var arrays // // Arguments: // tree lcl var tree @@ -1853,7 +1865,7 @@ unsigned int Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) result++; } - intBoolOpDsc.Reinit(); + intBoolOpDsc.Free(); } } From b4e1440d20094899e22cea42a1acdb4df0f543e5 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 30 Mar 2025 19:07:56 +0200 Subject: [PATCH 18/27] fix wrong comments --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/optimizebools.cpp | 19 +++++++------------ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 73e47693fe9ec8..012779e0f80a7c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5386,7 +5386,7 @@ class Compiler FoldResult fgFoldConditional(BasicBlock* block); bool fgFoldCondToReturnBlock(BasicBlock* block); - unsigned int TryOptimizeIntBoolOp(BasicBlock* block); + bool TryOptimizeIntBoolOp(BasicBlock* block); struct MorphUnreachableInfo { diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 4b475b31ae7897..8aad9bbb7e73b7 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1647,10 +1647,6 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) //----------------------------------------------------------------------------- // TryOptimize: Function that fold constant INT OR operations // -// Arguments: -// compiler compiler reference -// intBoolOpDsc the reference for INT OR operations to be folded -// // Return: // True if it could optimize the operation and false elsewhere // @@ -1810,7 +1806,7 @@ void IntBoolOpDsc::AppendToLclVarArray(GenTree* tree) } //----------------------------------------------------------------------------- -// AppendToCtsArray: Append the constant to constant arrays +// AppendToCtsArray: Procedure that appends the constant to constant arrays // // Arguments: // cts constant value @@ -1839,14 +1835,13 @@ void IntBoolOpDsc::AppendToCtsArray(ssize_t cts) // TryOptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and if found, folds them // // Arguments: -// compiler compiler reference // b1 the block code to inspect for operations to fold // // Return: -// 1 if a block was folded, 0 if not -unsigned int Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) +// True if the block was folded and false elsewhere +bool Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) { - unsigned int result = 0; + bool folded = false; Statement* b2 = b1->firstStmt(); if (b2 != nullptr) { @@ -1862,14 +1857,14 @@ unsigned int Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) b2->SetTreeList(intBoolOpDsc.GetLclVarArrayFirst()); } - result++; + folded = true; } intBoolOpDsc.Free(); } } - return result; + return folded; } //----------------------------------------------------------------------------- @@ -2041,7 +2036,7 @@ PhaseStatus Compiler::optOptimizeBools() if (b1->KindIs(BBJ_RETURN)) { - if (TryOptimizeIntBoolOp(b1) > 0) + if (TryOptimizeIntBoolOp(b1)) { numCond++; retry = true; From b4b50d28c6d6b81bf2ef955fe94a7ca72bac2f4d Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 30 Mar 2025 19:09:25 +0200 Subject: [PATCH 19/27] format code with jitutils --- src/coreclr/jit/optimizebools.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 8aad9bbb7e73b7..c264782923da63 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1841,8 +1841,8 @@ void IntBoolOpDsc::AppendToCtsArray(ssize_t cts) // True if the block was folded and false elsewhere bool Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) { - bool folded = false; - Statement* b2 = b1->firstStmt(); + bool folded = false; + Statement* b2 = b1->firstStmt(); if (b2 != nullptr) { GenTree* b3 = b2->GetRootNode(); From d2ec3520e7c28886f5f2c27f70df10bdf23afc40 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Tue, 1 Apr 2025 01:39:47 +0200 Subject: [PATCH 20/27] fix reviewer second round remarks --- src/coreclr/jit/optimizebools.cpp | 169 ++++++++---------------------- 1 file changed, 42 insertions(+), 127 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index c264782923da63..46dfdc3626e64f 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -61,40 +61,27 @@ class IntBoolOpDsc { private: IntBoolOpDsc(Compiler* comp) + : m_comp(comp) + , ctsArray(comp->getAllocator(CMK_ArrayStack)) + , lclVarArr(comp->getAllocator(CMK_ArrayStack)) + , start(nullptr) + , end(nullptr) { - ctsArray = nullptr; - ctsArrayLength = 0; - ctsArrayCapacity = 0; - lclVarArr = nullptr; - lclVarArrLength = 0; - lclVarArrCapacity = 0; - start = nullptr; - end = nullptr; - m_comp = comp; } private: - GenTree** lclVarArr; - int32_t lclVarArrLength; - int32_t lclVarArrCapacity; - ssize_t* ctsArray; - int32_t ctsArrayLength; - int32_t ctsArrayCapacity; - GenTree* start; - GenTree* end; - Compiler* m_comp; + Compiler* m_comp; + ArrayStack ctsArray; + ArrayStack lclVarArr; + GenTree* start; + GenTree* end; public: static IntBoolOpDsc GetNextIntBoolOp(GenTree* b3, Compiler* comp); bool TryOptimize(); bool EndIsNull(); GenTree* GetLclVarArrayFirst(); - void Free(); - -private: - void Reinit(); - void AppendToLclVarArray(GenTree* b3); - void AppendToCtsArray(ssize_t b3); + void Reinit(); }; //----------------------------------------------------------------------------- @@ -1479,41 +1466,14 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) } //----------------------------------------------------------------------------- -// Reinit: Procedure that reinitialize IntBoolOpDsc reference +// Reinit: Procedure that reinitializes IntBoolOpDsc reference // void IntBoolOpDsc::Reinit() -{ - start = nullptr; - end = nullptr; - ctsArrayLength = 0; - lclVarArrLength = 0; -} - -//----------------------------------------------------------------------------- -// Free: Procedure that frees IntBoolOpDsc reference -// -void IntBoolOpDsc::Free() { start = nullptr; end = nullptr; - - if (ctsArray != nullptr) - { - free(ctsArray); - ctsArray = nullptr; - } - - ctsArrayLength = 0; - ctsArrayCapacity = 0; - - if (lclVarArr != nullptr) - { - free(lclVarArr); - lclVarArr = nullptr; - } - - lclVarArrLength = 0; - lclVarArrCapacity = 0; + ctsArray.Reset(); + lclVarArr.Reset(); } //----------------------------------------------------------------------------- @@ -1564,7 +1524,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) { if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) { - if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + if (intBoolOpDsc.ctsArray.Height() >= 2 && intBoolOpDsc.lclVarArr.Height() >= 2) { intBoolOpDsc.end = b4; return intBoolOpDsc; @@ -1588,7 +1548,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) if (orOpCount <= 0) { - if (intBoolOpDsc.ctsArrayLength >= 2 && intBoolOpDsc.lclVarArrLength >= 2) + if (intBoolOpDsc.ctsArray.Height() >= 2 && intBoolOpDsc.lclVarArr.Height() >= 2) { intBoolOpDsc.end = b4; return intBoolOpDsc; @@ -1617,14 +1577,14 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) { case GT_LCL_VAR: { - intBoolOpDsc.AppendToLclVarArray(b4); + intBoolOpDsc.lclVarArr.Push(b4); orOpCount--; break; } case GT_CNS_INT: { ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc.AppendToCtsArray(constant); + intBoolOpDsc.ctsArray.Push(constant); orOpCount--; break; } @@ -1645,7 +1605,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) } //----------------------------------------------------------------------------- -// TryOptimize: Function that fold constant INT OR operations +// TryOptimize: Function that folds constant INT OR operations // // Return: // True if it could optimize the operation and false elsewhere @@ -1654,39 +1614,46 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) // We recreate nodes so as to eliminate excessive constants bool IntBoolOpDsc::TryOptimize() { + int ctsArrayLength = ctsArray.Height(); + int lclVarArrLength = lclVarArr.Height(); + if (ctsArrayLength < 2 || lclVarArrLength < 2) { return false; } - GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, lclVarArr[0], lclVarArr[1]); - intVarTree->gtPrev = lclVarArr[1]; - lclVarArr[1]->gtNext = intVarTree; - lclVarArr[1]->gtPrev = lclVarArr[0]; - lclVarArr[0]->gtNext = lclVarArr[1]; - lclVarArr[0]->gtPrev = end; + GenTree* firstLclVar = lclVarArr.Bottom(0); + GenTree* secondLclVar = lclVarArr.Bottom(1); + GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, firstLclVar, secondLclVar); + intVarTree->gtPrev = secondLclVar; + secondLclVar->gtNext = intVarTree; + secondLclVar->gtPrev = firstLclVar; + firstLclVar->gtNext = secondLclVar; + firstLclVar->gtPrev = end; if (end != nullptr) { - end->gtNext = lclVarArr[0]; + end->gtNext = firstLclVar; } GenTree* tempIntVatTree = intVarTree; for (int i = 2; i < lclVarArrLength; i++) { - GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, lclVarArr[i]); - newIntVarTree->gtPrev = lclVarArr[i]; - lclVarArr[i]->gtNext = newIntVarTree; - lclVarArr[i]->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = lclVarArr[i]; + GenTree* ithLclVar = lclVarArr.Bottom(i); + GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, ithLclVar); + newIntVarTree->gtPrev = ithLclVar; + ithLclVar->gtNext = newIntVarTree; + ithLclVar->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = ithLclVar; tempIntVatTree = newIntVarTree; } size_t optimizedCst = 0; for (int i = 0; i < ctsArrayLength; i++) { - optimizedCst = optimizedCst | ctsArray[i]; + size_t ithCts = ctsArray.Bottom(i); + optimizedCst = optimizedCst | ithCts; } GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, TYP_INT); @@ -1771,66 +1738,14 @@ bool IntBoolOpDsc::EndIsNull() // first lcl var of operation if it exists and nullptr elsewhere GenTree* IntBoolOpDsc::GetLclVarArrayFirst() { - if (lclVarArrLength > 0 && lclVarArr != nullptr) + if (lclVarArr.Height() > 0) { - return lclVarArr[0]; + return lclVarArr.Bottom(0); } return nullptr; } -//----------------------------------------------------------------------------- -// AppendToLclVarArray: Procedure that append the lcl var tree to lcl var arrays -// -// Arguments: -// tree lcl var tree -// -void IntBoolOpDsc::AppendToLclVarArray(GenTree* tree) -{ - if (lclVarArrLength == lclVarArrCapacity) - { - if (lclVarArrCapacity == 0) - { - lclVarArrCapacity = 4; - lclVarArr = reinterpret_cast(malloc(sizeof(GenTree*) * lclVarArrCapacity)); - } - else - { - lclVarArrCapacity = lclVarArrCapacity * 2; - lclVarArr = reinterpret_cast(realloc(lclVarArr, sizeof(GenTree*) * lclVarArrCapacity)); - } - } - - lclVarArrLength++; - lclVarArr[lclVarArrLength - 1] = tree; -} - -//----------------------------------------------------------------------------- -// AppendToCtsArray: Procedure that appends the constant to constant arrays -// -// Arguments: -// cts constant value -// -void IntBoolOpDsc::AppendToCtsArray(ssize_t cts) -{ - if (ctsArrayLength == ctsArrayCapacity) - { - if (ctsArrayCapacity == 0) - { - ctsArrayCapacity = 4; - ctsArray = reinterpret_cast(malloc(sizeof(ssize_t) * ctsArrayCapacity)); - } - else - { - ctsArrayCapacity = ctsArrayCapacity * 2; - ctsArray = reinterpret_cast(realloc(ctsArray, sizeof(ssize_t) * ctsArrayCapacity)); - } - } - - ctsArrayLength++; - ctsArray[ctsArrayLength - 1] = cts; -} - //----------------------------------------------------------------------------- // TryOptimizeIntBoolOp: Procedure that looks for constant INT OR operations to fold and if found, folds them // @@ -1860,7 +1775,7 @@ bool Compiler::TryOptimizeIntBoolOp(BasicBlock* b1) folded = true; } - intBoolOpDsc.Free(); + intBoolOpDsc.Reinit(); } } From c6f075b5777634d82c3fbbe6232f904cfa2c4e86 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 12 Apr 2025 22:38:39 +0200 Subject: [PATCH 21/27] handle long --- src/coreclr/jit/optimizebools.cpp | 10 ++++---- .../JIT/opt/OptimizeBools/optboolsreturn.cs | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index a9e68b8ee91d66..5e8b0d7fcec6b8 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1522,7 +1522,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) while (b4 != nullptr) { - if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT)) + if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT, TYP_LONG)) { if (intBoolOpDsc.ctsArray.Height() >= 2 && intBoolOpDsc.lclVarArr.Height() >= 2) { @@ -1624,7 +1624,7 @@ bool IntBoolOpDsc::TryOptimize() GenTree* firstLclVar = lclVarArr.Bottom(0); GenTree* secondLclVar = lclVarArr.Bottom(1); - GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, firstLclVar, secondLclVar); + GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, firstLclVar->gtType == TYP_INT && secondLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, firstLclVar, secondLclVar); intVarTree->gtPrev = secondLclVar; secondLclVar->gtNext = intVarTree; secondLclVar->gtPrev = firstLclVar; @@ -1641,7 +1641,7 @@ bool IntBoolOpDsc::TryOptimize() for (int i = 2; i < lclVarArrLength; i++) { GenTree* ithLclVar = lclVarArr.Bottom(i); - GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, ithLclVar); + GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && ithLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, tempIntVatTree, ithLclVar); newIntVarTree->gtPrev = ithLclVar; ithLclVar->gtNext = newIntVarTree; ithLclVar->gtPrev = tempIntVatTree; @@ -1656,8 +1656,8 @@ bool IntBoolOpDsc::TryOptimize() optimizedCst = optimizedCst | ithCts; } - GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, TYP_INT); - GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, TYP_INT, tempIntVatTree, optimizedCstTree); + GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, optimizedCst <= INT_MAX && optimizedCst >= INT_MIN ? TYP_INT : TYP_LONG); + GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT : TYP_LONG, tempIntVatTree, optimizedCstTree); optimizedTree->gtPrev = optimizedCstTree; optimizedCstTree->gtNext = optimizedTree; optimizedCstTree->gtPrev = tempIntVatTree; diff --git a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs index 8775c79599482c..2f2091ebdb475f 100644 --- a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs +++ b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs @@ -219,6 +219,18 @@ private static int Or15(int x, int y) return (x | y) | 15; } + [MethodImpl(MethodImplOptions.NoInlining)] + private static long LongOr10Or5(long x, long y) + { + return (x | 10) | (y | 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static long LongOr15(long x, long y) + { + return (x | y) | 15; + } + [MethodImpl(MethodImplOptions.NoInlining)] private static bool AreBothGreatThanZero(int x, int y) { @@ -1141,6 +1153,18 @@ public static int TestEntryPoint() return 101; } + if (LongOr10Or5(14, 23) != LongOr15(14, 23)) + { + Console.WriteLine("CBoolTest:LongOr10Or5(14, 23) failed"); + return 101; + } + + if (LongOr10Or5(78, 11) != LongOr15(78, 11)) + { + Console.WriteLine("CBoolTest:LongOr10Or5(78, 11) failed"); + return 101; + } + Console.WriteLine("PASSED"); return 100; } From 998ede28a00706e7d05a6c160844c65550a4dbc7 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 12 Apr 2025 22:43:09 +0200 Subject: [PATCH 22/27] fix file formatting --- src/coreclr/jit/optimizebools.cpp | 54 +++++++++++++++++++------------ 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 5e8b0d7fcec6b8..01ff9cea6b74fa 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1624,12 +1624,15 @@ bool IntBoolOpDsc::TryOptimize() GenTree* firstLclVar = lclVarArr.Bottom(0); GenTree* secondLclVar = lclVarArr.Bottom(1); - GenTreeOp* intVarTree = m_comp->gtNewOperNode(GT_OR, firstLclVar->gtType == TYP_INT && secondLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, firstLclVar, secondLclVar); - intVarTree->gtPrev = secondLclVar; - secondLclVar->gtNext = intVarTree; - secondLclVar->gtPrev = firstLclVar; - firstLclVar->gtNext = secondLclVar; - firstLclVar->gtPrev = end; + GenTreeOp* intVarTree = + m_comp->gtNewOperNode(GT_OR, + firstLclVar->gtType == TYP_INT && secondLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, + firstLclVar, secondLclVar); + intVarTree->gtPrev = secondLclVar; + secondLclVar->gtNext = intVarTree; + secondLclVar->gtPrev = firstLclVar; + firstLclVar->gtNext = secondLclVar; + firstLclVar->gtPrev = end; if (end != nullptr) { @@ -1640,13 +1643,17 @@ bool IntBoolOpDsc::TryOptimize() for (int i = 2; i < lclVarArrLength; i++) { - GenTree* ithLclVar = lclVarArr.Bottom(i); - GenTreeOp* newIntVarTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && ithLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, tempIntVatTree, ithLclVar); - newIntVarTree->gtPrev = ithLclVar; - ithLclVar->gtNext = newIntVarTree; - ithLclVar->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = ithLclVar; - tempIntVatTree = newIntVarTree; + GenTree* ithLclVar = lclVarArr.Bottom(i); + GenTreeOp* newIntVarTree = + m_comp->gtNewOperNode(GT_OR, + tempIntVatTree->gtType == TYP_INT && ithLclVar->gtType == TYP_INT ? TYP_INT + : TYP_LONG, + tempIntVatTree, ithLclVar); + newIntVarTree->gtPrev = ithLclVar; + ithLclVar->gtNext = newIntVarTree; + ithLclVar->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = ithLclVar; + tempIntVatTree = newIntVarTree; } size_t optimizedCst = 0; @@ -1656,14 +1663,19 @@ bool IntBoolOpDsc::TryOptimize() optimizedCst = optimizedCst | ithCts; } - GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, optimizedCst <= INT_MAX && optimizedCst >= INT_MIN ? TYP_INT : TYP_LONG); - GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT : TYP_LONG, tempIntVatTree, optimizedCstTree); - optimizedTree->gtPrev = optimizedCstTree; - optimizedCstTree->gtNext = optimizedTree; - optimizedCstTree->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = optimizedCstTree; - start->gtPrev = optimizedTree; - optimizedTree->gtNext = start; + GenTreeIntCon* optimizedCstTree = + m_comp->gtNewIconNode(optimizedCst, optimizedCst <= INT_MAX && optimizedCst >= INT_MIN ? TYP_INT : TYP_LONG); + GenTreeOp* optimizedTree = + m_comp->gtNewOperNode(GT_OR, + tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT + : TYP_LONG, + tempIntVatTree, optimizedCstTree); + optimizedTree->gtPrev = optimizedCstTree; + optimizedCstTree->gtNext = optimizedTree; + optimizedCstTree->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = optimizedCstTree; + start->gtPrev = optimizedTree; + optimizedTree->gtNext = start; if (start->OperIsUnary()) { From e4e35bfc77610bddbdfb1303a6619453eeafab02 Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sat, 12 Apr 2025 22:55:14 +0200 Subject: [PATCH 23/27] fix formatting in israarm64 --- src/coreclr/jit/lsraarm64.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 54e5057420aa72..e79748f8555673 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1481,14 +1481,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { srcCount += BuildContainedCselUses(containedCselOp, delayFreeOp, candidates); } - else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2) && !HWIntrinsicInfo::HasImmediateOperand(intrin.id)) + else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2) && + !HWIntrinsicInfo::HasImmediateOperand(intrin.id)) { - // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions (e.g. - // "MLA (by element)") have encoding that restricts what registers that can be used for the indexed element when - // the element size is H (i.e. 2 bytes). + // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions + // (e.g. "MLA (by element)") have encoding that restricts what registers that can be used for the indexed + // element when the element size is H (i.e. 2 bytes). if (((opNum == 2) || (opNum == 3))) { - // For those intrinsics, just force the delay-free registers, so they do not conflict with the definition. + // For those intrinsics, just force the delay-free registers, so they do not conflict with the + // definition. srcCount += BuildDelayFreeUses(operand, nullptr, candidates); } else From 34eccbe4b9bdfbfce3560d6dd14d52555068d23f Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 13 Apr 2025 00:03:47 +0200 Subject: [PATCH 24/27] handle short and byte types --- src/coreclr/jit/optimizebools.cpp | 5 +- .../JIT/opt/OptimizeBools/optboolsreturn.cs | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 01ff9cea6b74fa..a1aa626185e77d 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1522,7 +1522,7 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) while (b4 != nullptr) { - if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT) || !b4->TypeIs(TYP_INT, TYP_LONG)) + if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT, GT_CAST) || !b4->TypeIs(TYP_INT, TYP_LONG)) { if (intBoolOpDsc.ctsArray.Height() >= 2 && intBoolOpDsc.lclVarArr.Height() >= 2) { @@ -1664,7 +1664,8 @@ bool IntBoolOpDsc::TryOptimize() } GenTreeIntCon* optimizedCstTree = - m_comp->gtNewIconNode(optimizedCst, optimizedCst <= INT_MAX && optimizedCst >= INT_MIN ? TYP_INT : TYP_LONG); + m_comp->gtNewIconNode(optimizedCst, + optimizedCst <= INT32_MAX && optimizedCst >= INT32_MIN ? TYP_INT : TYP_LONG); GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT diff --git a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs index 2f2091ebdb475f..4151ea8fdcf049 100644 --- a/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs +++ b/src/tests/JIT/opt/OptimizeBools/optboolsreturn.cs @@ -231,6 +231,30 @@ private static long LongOr15(long x, long y) return (x | y) | 15; } + [MethodImpl(MethodImplOptions.NoInlining)] + private static int ByteOr10Or5(byte x, byte y) + { + return (x | 10) | (y | 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int ByteOr15(byte x, byte y) + { + return (x | y) | 15; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int ShortOr10Or5(short x, short y) + { + return (x | 10) | (y | 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int ShortOr15(short x, short y) + { + return (x | y) | 15; + } + [MethodImpl(MethodImplOptions.NoInlining)] private static bool AreBothGreatThanZero(int x, int y) { @@ -1165,6 +1189,30 @@ public static int TestEntryPoint() return 101; } + if (ByteOr10Or5(14, 23) != ByteOr15(14, 23)) + { + Console.WriteLine("CBoolTest:ByteOr10Or5(14, 23) failed"); + return 101; + } + + if (ByteOr10Or5(78, 11) != ByteOr15(78, 11)) + { + Console.WriteLine("CBoolTest:ByteOr10Or5(78, 11) failed"); + return 101; + } + + if (ShortOr10Or5(14, 23) != ShortOr15(14, 23)) + { + Console.WriteLine("CBoolTest:ShortOr10Or5(14, 23) failed"); + return 101; + } + + if (ShortOr10Or5(78, 11) != ShortOr15(78, 11)) + { + Console.WriteLine("CBoolTest:ShortOr10Or5(78, 11) failed"); + return 101; + } + Console.WriteLine("PASSED"); return 100; } From 7053af7d032b146eb3c18fb88441f53193193f9c Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 13 Apr 2025 17:43:26 +0200 Subject: [PATCH 25/27] use movzx for short and bytes --- src/coreclr/jit/optimizebools.cpp | 114 +++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index a1aa626185e77d..a925d9cf1db612 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -70,11 +70,11 @@ class IntBoolOpDsc } private: - Compiler* m_comp; - ArrayStack ctsArray; - ArrayStack lclVarArr; - GenTree* start; - GenTree* end; + Compiler* m_comp; + ArrayStack ctsArray; + ArrayStack lclVarArr; + GenTree* start; + GenTree* end; public: static IntBoolOpDsc GetNextIntBoolOp(GenTree* b3, Compiler* comp); @@ -1522,7 +1522,10 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) while (b4 != nullptr) { - if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT, GT_CAST) || !b4->TypeIs(TYP_INT, TYP_LONG)) + if (!b4->OperIs(GT_OR, GT_LCL_VAR, GT_CNS_INT, GT_CNS_LNG, GT_CAST) || + (b4->OperIs(GT_CAST) && + (!b4->AsCast()->CastOp()->OperIs(GT_LCL_VAR) || !b4->AsCast()->CastOp()->TypeIs(TYP_INT, TYP_LONG))) || + !b4->TypeIs(TYP_INT, TYP_LONG)) { if (intBoolOpDsc.ctsArray.Height() >= 2 && intBoolOpDsc.lclVarArr.Height() >= 2) { @@ -1582,15 +1585,21 @@ IntBoolOpDsc IntBoolOpDsc::GetNextIntBoolOp(GenTree* b3, Compiler* comp) break; } case GT_CNS_INT: + case GT_CNS_LNG: { - ssize_t constant = b4->AsIntConCommon()->IconValue(); - intBoolOpDsc.ctsArray.Push(constant); + intBoolOpDsc.ctsArray.Push(b4->AsIntConCommon()); orOpCount--; break; } case GT_OR: orOpCount++; break; + case GT_CAST: + { + intBoolOpDsc.lclVarArr.Push(b4); + b4 = b4->gtPrev; + break; + } default: { break; @@ -1630,13 +1639,41 @@ bool IntBoolOpDsc::TryOptimize() firstLclVar, secondLclVar); intVarTree->gtPrev = secondLclVar; secondLclVar->gtNext = intVarTree; - secondLclVar->gtPrev = firstLclVar; - firstLclVar->gtNext = secondLclVar; - firstLclVar->gtPrev = end; + + if (secondLclVar->OperIs(GT_LCL_VAR)) + { + secondLclVar->gtPrev = firstLclVar; + firstLclVar->gtNext = secondLclVar; + } + else + { + assert(secondLclVar->OperIs(GT_CAST)); + GenTree* secondCastLclVar = secondLclVar->AsCast()->CastOp(); + secondLclVar->gtPrev = secondCastLclVar; + secondCastLclVar->gtNext = secondLclVar; + secondCastLclVar->gtPrev = firstLclVar; + firstLclVar->gtNext = secondCastLclVar; + } + + GenTree* lastLclVarLink = nullptr; + if (firstLclVar->OperIs(GT_LCL_VAR)) + { + firstLclVar->gtPrev = end; + lastLclVarLink = firstLclVar; + } + else + { + assert(firstLclVar->OperIs(GT_CAST)); + GenTree* firstCastLclVar = firstLclVar->AsCast()->CastOp(); + firstLclVar->gtPrev = firstCastLclVar; + firstCastLclVar->gtNext = firstLclVar; + firstCastLclVar->gtPrev = end; + lastLclVarLink = firstCastLclVar; + } if (end != nullptr) { - end->gtNext = firstLclVar; + end->gtNext = lastLclVarLink; } GenTree* tempIntVatTree = intVarTree; @@ -1649,24 +1686,43 @@ bool IntBoolOpDsc::TryOptimize() tempIntVatTree->gtType == TYP_INT && ithLclVar->gtType == TYP_INT ? TYP_INT : TYP_LONG, tempIntVatTree, ithLclVar); - newIntVarTree->gtPrev = ithLclVar; - ithLclVar->gtNext = newIntVarTree; - ithLclVar->gtPrev = tempIntVatTree; - tempIntVatTree->gtNext = ithLclVar; - tempIntVatTree = newIntVarTree; + newIntVarTree->gtPrev = ithLclVar; + ithLclVar->gtNext = newIntVarTree; + + if (ithLclVar->OperIs(GT_LCL_VAR)) + { + ithLclVar->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = ithLclVar; + } + else + { + assert(ithLclVar->OperIs(GT_CAST)); + GenTree* ithCastLclVar = ithLclVar->AsCast()->CastOp(); + ithLclVar->gtPrev = ithCastLclVar; + ithCastLclVar->gtNext = ithLclVar; + ithCastLclVar->gtPrev = tempIntVatTree; + tempIntVatTree->gtNext = ithCastLclVar; + } + + tempIntVatTree = newIntVarTree; } - size_t optimizedCst = 0; + size_t optimizedCst = 0; + var_types optimizedCstType = TYP_INT; for (int i = 0; i < ctsArrayLength; i++) { - size_t ithCts = ctsArray.Bottom(i); - optimizedCst = optimizedCst | ithCts; + GenTreeIntConCommon* ithCts = ctsArray.Bottom(i); + + if (optimizedCstType == TYP_INT && ithCts->gtType == TYP_LONG) + { + optimizedCstType = TYP_LONG; + } + + optimizedCst = optimizedCst | ithCts->IconValue(); } - GenTreeIntCon* optimizedCstTree = - m_comp->gtNewIconNode(optimizedCst, - optimizedCst <= INT32_MAX && optimizedCst >= INT32_MIN ? TYP_INT : TYP_LONG); - GenTreeOp* optimizedTree = + GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, optimizedCstType); + GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT : TYP_LONG, @@ -1753,7 +1809,15 @@ GenTree* IntBoolOpDsc::GetLclVarArrayFirst() { if (lclVarArr.Height() > 0) { - return lclVarArr.Bottom(0); + GenTree* firstLclVar = lclVarArr.Bottom(0); + + if (firstLclVar->OperIs(GT_LCL_VAR)) + { + return firstLclVar; + } + + assert(firstLclVar->OperIs(GT_CAST)); + return firstLclVar->AsCast()->CastOp(); } return nullptr; From a7e8886c2500c3f103338c379d4aec28f1eb608b Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Mon, 14 Apr 2025 01:55:37 +0200 Subject: [PATCH 26/27] fix build error --- src/coreclr/jit/optimizebools.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index a925d9cf1db612..f3cc2a4de51e92 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1707,7 +1707,7 @@ bool IntBoolOpDsc::TryOptimize() tempIntVatTree = newIntVarTree; } - size_t optimizedCst = 0; + INT64 optimizedCst = 0; var_types optimizedCstType = TYP_INT; for (int i = 0; i < ctsArrayLength; i++) { @@ -1718,11 +1718,13 @@ bool IntBoolOpDsc::TryOptimize() optimizedCstType = TYP_LONG; } - optimizedCst = optimizedCst | ithCts->IconValue(); + optimizedCst = optimizedCst | ithCts->IntegralValue(); } - GenTreeIntCon* optimizedCstTree = m_comp->gtNewIconNode(optimizedCst, optimizedCstType); - GenTreeOp* optimizedTree = + GenTree* optimizedCstTree = optimizedCstType == TYP_INT + ? m_comp->gtNewIconNode((ssize_t)optimizedCst, optimizedCstType) + : m_comp->gtNewLconNode(optimizedCst); + GenTreeOp* optimizedTree = m_comp->gtNewOperNode(GT_OR, tempIntVatTree->gtType == TYP_INT && optimizedCstTree->gtType == TYP_INT ? TYP_INT : TYP_LONG, From 2c16636f949e2230fb29b14e7837559c8286102f Mon Sep 17 00:00:00 2001 From: pedrobsaila Date: Sun, 18 May 2025 15:33:28 +0200 Subject: [PATCH 27/27] fix formatting issue --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4a64ee1cf5b019..a6b7a2ee05268e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -83,7 +83,7 @@ class SpanningTreeVisitor; // defined in fgprofile.cpp class CSE_DataFlow; // defined in optcse.cpp struct CSEdsc; // defined in optcse.h class CSE_HeuristicCommon; // defined in optcse.h -class IntBoolOpDsc; // defined in optimizer.cpp +class IntBoolOpDsc; // defined in optimizer.cpp class OptBoolsDsc; // defined in optimizer.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp class ProfileSynthesis; // defined in profilesynthesis.h