From fe46cdbf016f83c6612d2615d36815ac6d81dd85 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Thu, 12 Jun 2025 21:17:06 +0800 Subject: [PATCH 01/13] add integrated operations for memory access and loop control --- CMakeLists.txt | 8 +-- include/NeuraDialect/NeuraOps.td | 75 ++++++++++++++++++++++++ test/affine2neura/gpt2-node27/compile.sh | 3 + test/affine2neura/gpt2-node27/node27.cpp | 14 +++++ test/affine2neura/gpt2-node30/compile.sh | 3 + test/affine2neura/gpt2-node30/node30.cpp | 15 +++++ 6 files changed, 114 insertions(+), 4 deletions(-) create mode 100755 test/affine2neura/gpt2-node27/compile.sh create mode 100644 test/affine2neura/gpt2-node27/node27.cpp create mode 100755 test/affine2neura/gpt2-node30/compile.sh create mode 100644 test/affine2neura/gpt2-node30/node30.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a83b5b7..adaf6c9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 01e54159..0b191516 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -101,6 +101,34 @@ def Neura_StoreOp : Op { // let assemblyFormat = "$value `,` $addr `,` $predicate attr-dict"; } +// Defines a load operation with integrated address calculation. +def Neura_LoadIndexedOp: Op{ + let summary = "Load with integrated address calculation for multi-dimensional arrays"; + let description = [{ + Calculates the address using the base address and indices. + Load the value at the calculated address. + Example: + %value = neura.load_indexed %base [%arg1, %arg2] : f32 + }]; + let arguments = (ins Arg:$base, Variadic:$indices, Optional:$predicate); + let results = (outs AnyType:$result); + let assemblyFormat = "type($base) $base `[` $indices `]` ($predicate^ `:` type($predicate))? attr-dict `:` type($result)"; +} + +//Defines a store operation with integrated address calculation. +def Neura_StoreIndexedOp: Op { + let summary = "Store with integrated address calculation for multi-dimensional arrays"; + let description = [{ + Calculates the address using the base address and indices. + Store the value at the calculated address. + Example: + neura.store_indexed %value, %base [%arg1, %arg2] : f32 + }]; + let arguments = (ins AnyType:$value, Arg:$base, Variadic:$indices, Optional:$predicate); + let results = (outs); + let assemblyFormat = "$value `to` type($base) $base `[` $indices `]` ($predicate^ `:` type($predicate))? attr-dict `:` type($value)"; +} + // Defines a pointer computation operation. def Neura_GEP : Op { let summary = "Pointer computation using offset indices"; @@ -253,3 +281,50 @@ def Neura_ReserveOp : Op { let results = (outs AnyType:$result); let assemblyFormat = "attr-dict `:` type($result)"; } + +// ---------------------------------------------------- +// Defines loop related operations. + +// Loop iteration operation for index increament and compare +def Neura_LoopIterOp : Op { + let summary = "CGRA-optimized loop iteration operation"; + let description = [{ + Takes the current loop index, a step value, and an upper bound as the inputs. + Outputs the next loop index and a boolean condition indicating whether the loop should continue. + + Example: + %next_index, %continue = neura.loop_control current_index: 0, step: 1, bound: 10 : i32 i1}]; + + let arguments = (ins Index: $current_index, + Index:$step, + Index:$bound, + Optional:$loop_type, // 0: <, 1: <=, 2: >, 3: >= + Optional:$predicate); + let results = (outs Index:$next_index, I1:$continue_condition); + let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `:` type($bound) ($loop_type^ `:` type($loop_type))? ($predicate^ `:` type($predicate))? attr-dict `:` type($next_index) type($continue_condition)"; +} + +// Loop control operation that integrates loop iteration and control flow. +def Neura_LoopControlOp: Op{ + let summary = "Intergrated loop control operation for simple loops"; + let description = [{ + This operation is an integrated loop control operation that combines the loop iteration and control flow. + It has three main actions: + 1. Calculates the next iteration's index: `next_index = current_index + step` + 2. Checks if the loop should continue based on the current index and bound. + 3. If the loop should continue, it branches to the loop body, and yields related values. + 4. Otherwise, it exits the loop. + }]; + let arguments = (ins Index:$current_index, // Current loop index + Index:$step, + Index:$bound, + DefaultValuedAttr:$loop_type, // Loop type: "lt", "le", "gt", "ge", "eq", "ne" + Variadic:$passthrough_args // Additional arguments to pass through to the successors + ); + let results = (outs); + let successors = (successor + AnySuccessor:$body, // loop body successors + AnySuccessor:$exit // exit successors + ); + let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `,` `loop_type` `:` $loop_type (`passthrough` `(` $passthrough_args^ `:` type($passthrough_args) `)`)? `then` $body `else` $exit attr-dict"; +} \ No newline at end of file diff --git a/test/affine2neura/gpt2-node27/compile.sh b/test/affine2neura/gpt2-node27/compile.sh new file mode 100755 index 00000000..e1c6c965 --- /dev/null +++ b/test/affine2neura/gpt2-node27/compile.sh @@ -0,0 +1,3 @@ +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node27_unroll.cpp -S --raise-scf-to-affine -o ./node27.mlir +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27.mlir --affine-loop-unroll="unroll-factor=2" -o ./node27_unroll.mlir +# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/gpt2-node27/node27.cpp b/test/affine2neura/gpt2-node27/node27.cpp new file mode 100644 index 00000000..3bcf72c2 --- /dev/null +++ b/test/affine2neura/gpt2-node27/node27.cpp @@ -0,0 +1,14 @@ +float input[1][16][4][16]; +float output[1][4][16][16]; + +int main() { + for (int arg2 = 0; arg2 < 1; arg2++) { + for (int arg3 = 0; arg3 < 16; arg3++) { + for (int arg4 = 0; arg4 < 4; arg4 += 1) { + for (int arg5 = 0; arg5 < 16; arg5 += 1) { + output[arg2][arg3][arg4][arg5] = input[arg2][arg4][arg3][arg5]; + } + } + } + } +} \ No newline at end of file diff --git a/test/affine2neura/gpt2-node30/compile.sh b/test/affine2neura/gpt2-node30/compile.sh new file mode 100755 index 00000000..28b23b23 --- /dev/null +++ b/test/affine2neura/gpt2-node30/compile.sh @@ -0,0 +1,3 @@ +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node30.cpp -S --raise-scf-to-affine -o ./node30.mlir +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node30.mlir --affine-loop-unroll="unroll-factor=2" -o ./node30_unroll.mlir +# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/gpt2-node30/node30.cpp b/test/affine2neura/gpt2-node30/node30.cpp new file mode 100644 index 00000000..01177f33 --- /dev/null +++ b/test/affine2neura/gpt2-node30/node30.cpp @@ -0,0 +1,15 @@ +float A[1][4][16][64]; +// float B=20.0; +float C[1][4][16][64]; + +int main() { + for (int arg2 = 0; arg2 < 1; arg2++) { + for (int arg3 = 0; arg3 < 4; arg3++) { + for (int arg4 = 0; arg4 < 16; arg4++) { + for (int arg5 = 0; arg5 < 64; arg5++) { + C[arg2][arg3][arg4][arg5] = A[arg2][arg3][arg4][arg5] * 10; + } + } + } + } +} \ No newline at end of file From 93955fa0d463fc7964c2a99421a673b513bb25ce Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Fri, 13 Jun 2025 16:45:19 +0800 Subject: [PATCH 02/13] convert affine load/store/for into neura dialect --- include/Conversion/ConversionPasses.h | 1 + include/Conversion/ConversionPasses.td | 6 + include/NeuraDialect/NeuraOps.td | 2 +- .../AffineToNeura/AffineToNeuraPass.cpp | 317 ++++++++++++++++++ lib/Conversion/AffineToNeura/CMakeLists.txt | 15 + .../ArithToNeura/ArithToNeuraPass.cpp | 2 +- lib/Conversion/CMakeLists.txt | 1 + .../LlvmToNeura/LlvmToNeuraPass.cpp | 2 +- test/affine2neura/gpt2-node11/compile.sh | 3 + test/affine2neura/gpt2-node11/node11.cpp | 12 + test/affine2neura/simpleloop/compile.sh | 3 + test/affine2neura/simpleloop/simple.cpp | 12 + tools/mlir-neura-opt/mlir-neura-opt.cpp | 3 + 13 files changed, 376 insertions(+), 3 deletions(-) create mode 100644 lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp create mode 100644 lib/Conversion/AffineToNeura/CMakeLists.txt create mode 100755 test/affine2neura/gpt2-node11/compile.sh create mode 100644 test/affine2neura/gpt2-node11/node11.cpp create mode 100755 test/affine2neura/simpleloop/compile.sh create mode 100644 test/affine2neura/simpleloop/simple.cpp diff --git a/include/Conversion/ConversionPasses.h b/include/Conversion/ConversionPasses.h index 2477bb3d..36e5db18 100644 --- a/include/Conversion/ConversionPasses.h +++ b/include/Conversion/ConversionPasses.h @@ -19,6 +19,7 @@ namespace mlir { // Conversion passes. std::unique_ptr createLowerArithToNeuraPass(); std::unique_ptr createLowerLlvmToNeuraPass(); +std::unique_ptr createLowerAffineToNeuraPass(); #define GEN_PASS_REGISTRATION #include "Conversion/ConversionPasses.h.inc" diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td index 7fca77bb..77ee2ef1 100644 --- a/include/Conversion/ConversionPasses.td +++ b/include/Conversion/ConversionPasses.td @@ -20,4 +20,10 @@ def LowerLlvmToNeura : Pass<"lower-llvm-to-neura", "ModuleOp">{ let constructor = "mlir::createLowerLlvmToNeuraPass()"; } +def LowerAffineToNeura : Pass<"lower-affine-to-neura", "FuncOp">{ + let summary = "Lower affine to Neura dialect"; + let description = [{Lower affine dialect operations to Neura dialect operations.}]; + let constructor = "mlir::createLowerAffineToNeuraPass()"; +} + #endif // CONVERSION_PASSES_TD \ No newline at end of file diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 0b191516..bfba1d4f 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -11,7 +11,7 @@ def Neura_ConstantOp : Op { OptionalAttr:$predicate // Add optional predicate attribute ); let results = (outs AnyType:$result); - // let assemblyFormat = "attr-dict `:` type($result)"; + let assemblyFormat = "attr-dict `:` type($result)"; } // Defines an addition operation. diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp new file mode 100644 index 00000000..e080da9e --- /dev/null +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -0,0 +1,317 @@ +#include "Conversion/ConversionPasses.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Region.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace mlir; +using namespace mlir::neura; +using namespace mlir::func; + +#define GEN_PASS_DEF_LOWERAFFINETONEURA +#include "Conversion/ConversionPasses.h.inc" + +namespace { +struct AffineLoadLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(affine::AffineLoadOp loadOp, + PatternRewriter &rewriter) const override { + auto loc = loadOp.getLoc(); + auto memref = loadOp.getMemref(); + AffineMap map = loadOp.getAffineMap(); + ValueRange mapOperands = loadOp.getMapOperands(); + // Get the indices for the load operation + SmallVector newIndices; + newIndices.reserve(map.getNumResults()); + llvm::errs() << "Lowering affine load operation: " << loadOp << "\n"; + llvm::errs() << "Number of results in affine map: " << map.getNumResults() + << "\n"; + for (auto expr : map.getResults()) { + llvm::errs() << "Map expr: " << expr << "\n"; + } + + for (AffineExpr expr : map.getResults()) { + if (auto constExpr = expr.dyn_cast()) { + auto indexType = rewriter.getIndexType(); + auto valueAttr = + rewriter.getIntegerAttr(indexType, constExpr.getValue()); + newIndices.push_back(rewriter.create( + loc, indexType, valueAttr, nullptr)); + } else if (auto dimExpr = expr.dyn_cast()) { + if (dimExpr.getPosition() >= map.getNumDims() || + dimExpr.getPosition() >= + mapOperands + .size()) { // Check against mapOperands size for safety + return loadOp.emitError( + "affine map dimension out of bounds for map operands"); + } + newIndices.push_back(mapOperands[dimExpr.getPosition()]); + } else if (auto symExpr = expr.dyn_cast()) { + unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); + if (symbolOperandIndex >= mapOperands.size()) { + return loadOp.emitError( + "affine map symbol out of bounds for map operands"); + } + newIndices.push_back(mapOperands[symbolOperandIndex]); + } else { + // For more complex affine expressions (e.g., d0 + c1), + // materialize the result using affine.apply. + // neura.load_indexed expects individual index values. + // This is a temporary workaround for complex expressions. + llvm::errs() << "Complex affine expression: " << expr << "\n"; + AffineMap singleResultMap = AffineMap::get( + map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); + Value complexIndex = rewriter.create( + loc, singleResultMap, mapOperands); + newIndices.push_back(complexIndex); + } + } + + auto memRefType = memref.getType().dyn_cast(); + if (!memRefType) { + return loadOp.emitError("base of load is not a MemRefType"); + } + if (newIndices.size() != static_cast(memRefType.getRank())) { + return loadOp.emitError("number of indices from affine map (") + << newIndices.size() << ") does not match memref rank (" + << memRefType.getRank() << ")"; + } + + // Create the neura.load_indexed operation + auto newLoadOp = rewriter.create( + loc, loadOp.getType(), memref, ValueRange{newIndices}, nullptr); + + rewriter.replaceOp(loadOp, newLoadOp.getResult()); + return success(); + } +}; + +struct AffineStoreLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(affine::AffineStoreOp storeOp, + PatternRewriter &rewriter) const override { + auto loc = storeOp.getLoc(); + auto memref = storeOp.getMemref(); + auto value = storeOp.getValueToStore(); + AffineMap map = storeOp.getAffineMap(); + ValueRange mapOperands = storeOp.getMapOperands(); + + SmallVector newIndices; + newIndices.reserve(map.getNumResults()); + + for (AffineExpr expr : map.getResults()) { + if (auto constExpr = expr.dyn_cast()) { + auto indexType = rewriter.getIndexType(); + auto valueAttr = + rewriter.getIntegerAttr(indexType, constExpr.getValue()); + newIndices.push_back(rewriter.create( + loc, indexType, valueAttr, nullptr)); + } else if (auto dimExpr = expr.dyn_cast()) { + if (dimExpr.getPosition() >= map.getNumDims() || + dimExpr.getPosition() >= mapOperands.size()) { + return storeOp.emitError( + "affine map dimension out of bounds for map operands"); + } + newIndices.push_back(mapOperands[dimExpr.getPosition()]); + } else if (auto symExpr = expr.dyn_cast()) { + unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); + if (symbolOperandIndex >= mapOperands.size()) { + return storeOp.emitError( + "affine map symbol out of bounds for map operands"); + } + newIndices.push_back(mapOperands[symbolOperandIndex]); + } else { + // For more complex affine expressions, materialize the result using + // affine.apply. This is a temporary workaround for complex expressions. + AffineMap singleResultMap = AffineMap::get( + map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); + Value complexIndex = rewriter.create( + loc, singleResultMap, mapOperands); + newIndices.push_back(complexIndex); + } + } + + auto memRefType = memref.getType().dyn_cast(); + if (!memRefType) { + return storeOp.emitError("base of store is not a MemRefType"); + } + if (newIndices.size() != static_cast(memRefType.getRank())) { + return storeOp.emitError("number of indices from affine map (") + << newIndices.size() << ") does not match memref rank (" + << memRefType.getRank() << ")"; + } + + rewriter.create(loc, value, memref, + ValueRange{newIndices}, nullptr); + rewriter.eraseOp(storeOp); + return success(); + } +}; + +struct AffineForLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(affine::AffineForOp forOp, + PatternRewriter &rewriter) const override { + auto loc = forOp.getLoc(); + auto indexType = rewriter.getIndexType(); + + // 1. Extract loop parameters (lower bound, upper bound, step) + Value lowerBoundVal; + if (forOp.hasConstantLowerBound()) { + int lowerBoundConstant = forOp.getConstantLowerBound(); + auto lowerBoundAttr = + rewriter.getIntegerAttr(indexType, lowerBoundConstant); + lowerBoundVal = rewriter.create( + loc, indexType, lowerBoundAttr, nullptr); + } else { + // If the lower bound is not constant, we need to use affine.apply + // This is a temporary workaround for non-constant lower bounds. + llvm::errs() << "Using affine.apply for unconstant lower bound\n"; + affine::AffineBound lowerBound = forOp.getLowerBound(); + AffineMap lowerBoundMap = lowerBound.getMap(); + ValueRange lowerBoundOperands = forOp.getLowerBoundOperands(); + lowerBoundVal = rewriter.create( + loc, lowerBoundMap, lowerBoundOperands); + } + + Value upperBoundVal; + if (forOp.hasConstantUpperBound()) { + int upperBoundConstant = forOp.getConstantUpperBound(); + auto upperBoundAttr = + rewriter.getIntegerAttr(indexType, upperBoundConstant); + upperBoundVal = rewriter.create( + loc, indexType, upperBoundAttr, nullptr); + } else { + // For non-constant upper bounds, we also use affine.apply + llvm::errs() << "Using affine.apply for unconstant upper bound\n"; + affine::AffineBound upperBound = forOp.getUpperBound(); + AffineMap upperBoundMap = upperBound.getMap(); + ValueRange upperBoundOperands = forOp.getUpperBoundOperands(); + upperBoundVal = rewriter.create( + loc, upperBoundMap, upperBoundOperands); + } + + auto stepAttr = rewriter.getIntegerAttr(indexType, forOp.getStep()); + Value stepVal = + rewriter.create(loc, indexType, stepAttr, nullptr); + llvm::errs() << "lower bound: " << lowerBoundVal + << ", upper bound: " << upperBoundVal << ", step: " << stepVal + << "\n"; + + // 2. Block structure + Block *originBlock = rewriter.getInsertionBlock(); + auto originPoint = rewriter.getInsertionPoint(); + Region *parentRegion = originBlock->getParent(); + + Block *headerBlock = rewriter.createBlock( + parentRegion, std::next(Region::iterator(originBlock)), {indexType}, + {loc}); + Block *bodyBlock = rewriter.createBlock( + parentRegion, std::next(Region::iterator(headerBlock)), {indexType}, + {loc}); + Block *exitBlock = rewriter.createBlock( + parentRegion, std::next(Region::iterator(bodyBlock))); + Block *continueBlock = rewriter.splitBlock(originBlock, originPoint); + + // 3. origin -> header + rewriter.setInsertionPointToEnd(originBlock); + rewriter.create(loc, ValueRange{lowerBoundVal}, headerBlock); + + // 4. header: loop_control + rewriter.setInsertionPointToEnd(headerBlock); + rewriter.create( + loc, + headerBlock->getArgument(0), // current index + stepVal, upperBoundVal, rewriter.getStringAttr("lt"), + ValueRange{}, // passthrough + bodyBlock, exitBlock); + + // 5. body: clone forOp body, mapping index + rewriter.setInsertionPointToStart(bodyBlock); + Value currentIndex = bodyBlock->getArgument(0); + if (!forOp.getRegion().empty()) { + Block &sourceBlock = forOp.getRegion().front(); + IRMapping mapping; + mapping.map(sourceBlock.getArgument(0), currentIndex); + for (auto &op : llvm::make_range(sourceBlock.begin(), + std::prev(sourceBlock.end()))) { + Operation *clonedOp = rewriter.clone(op, mapping); + for (unsigned i = 0; i < op.getNumResults(); ++i) + mapping.map(op.getResult(i), clonedOp->getResult(i)); + } + } + + // 6. body 结尾跳 header,传当前 index + rewriter.setInsertionPointToEnd(bodyBlock); + rewriter.create(loc, ValueRange{currentIndex}, headerBlock); + + // 7. exit 跳 continue + rewriter.setInsertionPointToEnd(exitBlock); + rewriter.create(loc, ValueRange{}, continueBlock); + + // 8. 移除原 affine.for + rewriter.eraseOp(forOp); + + return success(); + } +}; + +struct LowerAffineToNeuraPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass) + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + StringRef getArgument() const override { return "lower-affine-to-neura"; } + StringRef getDescription() const override { + return "Lower affine operations to Neura dialect operations"; + } + + void runOnOperation() override { + FuncOp funcOp = getOperation(); + MLIRContext *context = funcOp.getContext(); + + // ConversionTarget target(*context); + // target.addIllegalOp(); + // target.addLegalDialect(); + + RewritePatternSet patterns(context); + patterns.add( + context); + + if (failed(applyPatternsAndFoldGreedily(getOperation(), + std::move(patterns)))) { + funcOp.emitError("Failed to lower affine operations to Neura dialect"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr mlir::createLowerAffineToNeuraPass() { + return std::make_unique(); +} \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/CMakeLists.txt b/lib/Conversion/AffineToNeura/CMakeLists.txt new file mode 100644 index 00000000..fc71ff70 --- /dev/null +++ b/lib/Conversion/AffineToNeura/CMakeLists.txt @@ -0,0 +1,15 @@ +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_mlir_conversion_library(MLIRNeuraAffineToNeuraPass + AffineToNeuraPass.cpp + + DEPENDS + MLIRConversionIncGen + + LINK_LIBS PUBLIC + MLIRIR + MLIRPass + MLIRSupport + MLIRTransforms + # MLIRNeura +) \ No newline at end of file diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp index ab952519..af926302 100644 --- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp +++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp @@ -24,7 +24,7 @@ using namespace mlir::func; using namespace mlir::neura; #define GEN_PASS_DEF_LOWERARITHTONEURA -#include "NeuraDialect/NeuraPasses.h.inc" +#include "Conversion/ConversionPasses.h.inc" namespace{ diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 1dbce29f..2a33d1e2 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_subdirectory(ArithToNeura) add_subdirectory(LlvmToNeura) +add_subdirectory(AffineToNeura) # add_mlir_library( # MLIRNeuraConversion diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index c9c2fe23..39d72b39 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -25,7 +25,7 @@ using namespace mlir; using namespace mlir::neura; #define GEN_PASS_DEF_LOWERLLVMTONEURA -#include "NeuraDialect/NeuraPasses.h.inc" +#include "Conversion/ConversionPasses.h.inc" namespace { diff --git a/test/affine2neura/gpt2-node11/compile.sh b/test/affine2neura/gpt2-node11/compile.sh new file mode 100755 index 00000000..4d3eeaee --- /dev/null +++ b/test/affine2neura/gpt2-node11/compile.sh @@ -0,0 +1,3 @@ +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node11.cpp -S --raise-scf-to-affine -o ./node11.mlir +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node11.mlir --affine-loop-unroll="unroll-factor=2" -o ./node11_unroll.mlir +# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/gpt2-node11/node11.cpp b/test/affine2neura/gpt2-node11/node11.cpp new file mode 100644 index 00000000..45e4262c --- /dev/null +++ b/test/affine2neura/gpt2-node11/node11.cpp @@ -0,0 +1,12 @@ +float input[1][16][64]; +float output[1][16]; + +int main() { + for (int arg2 = 0; arg2 < 1; arg2++) { + for (int arg3 = 0; arg3 < 16; arg3++) { + for (int arg4 = 0; arg4 < 64; arg4+=1) { + output[arg2][arg3] += input[arg2][arg3][arg4]; + } + } + } +} \ No newline at end of file diff --git a/test/affine2neura/simpleloop/compile.sh b/test/affine2neura/simpleloop/compile.sh new file mode 100755 index 00000000..f19caf0e --- /dev/null +++ b/test/affine2neura/simpleloop/compile.sh @@ -0,0 +1,3 @@ +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./simple.cpp -S --raise-scf-to-affine -o ./simple.mlir +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./simple.mlir --affine-loop-unroll="unroll-factor=2" -o ./simple_unroll.mlir +# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/simpleloop/simple.cpp b/test/affine2neura/simpleloop/simple.cpp new file mode 100644 index 00000000..6078f497 --- /dev/null +++ b/test/affine2neura/simpleloop/simple.cpp @@ -0,0 +1,12 @@ +float A[100]; +float C[100]; + +int main() { + const int size = 100; + for (int i = 0; i < size; ++i) { + float loaded_value = A[i]; // Instruction 1: Load value from A + float multiplied_value = loaded_value * 10.0f; // Instruction 2: Multiply the value + C[i] = multiplied_value; // Instruction 3: Store result into C + } + return 0; +} diff --git a/tools/mlir-neura-opt/mlir-neura-opt.cpp b/tools/mlir-neura-opt/mlir-neura-opt.cpp index d21664fb..5453a29d 100644 --- a/tools/mlir-neura-opt/mlir-neura-opt.cpp +++ b/tools/mlir-neura-opt/mlir-neura-opt.cpp @@ -3,6 +3,7 @@ #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/InitAllDialects.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/InitAllPasses.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Support/LogicalResult.h" @@ -20,6 +21,8 @@ int main(int argc, char **argv) { registry.insert(); registry.insert(); registry.insert(); + registry.insert(); + registry.insert(); mlir::neura::registerPasses(); mlir::registerPasses(); From 7e165475eee7982a204439828de473d3947c23e1 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Fri, 13 Jun 2025 18:55:43 +0800 Subject: [PATCH 03/13] lower affine.apply to neura.add --- include/NeuraDialect/NeuraOps.td | 5 +- .../AffineToNeura/AffineToNeuraPass.cpp | 54 +++++++++++++++---- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index bfba1d4f..1f76a981 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -1,6 +1,7 @@ // NeuraOps.td - Custom operation definitions. include "NeuraDialect/NeuraDialect.td" +include "mlir/IR/CommonTypeConstraints.td" // ---------------------------------------------------- // Defines basic scalar operations. @@ -18,8 +19,8 @@ def Neura_ConstantOp : Op { def Neura_AddOp : Op { let summary = "Integer addition operation"; let opName = "add"; - let arguments = (ins AnyInteger:$lhs, AnyInteger:$rhs, Optional:$predicate); - let results = (outs AnyInteger:$result); + let arguments = (ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs, Optional:$predicate); + let results = (outs SignlessIntegerLike:$result); // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)"; let traits = [SameOperandsAndResultElementType]; } diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp index e080da9e..51cbfe12 100644 --- a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -3,6 +3,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/IRMapping.h" @@ -275,6 +276,47 @@ struct AffineForLowering : public OpRewritePattern { } }; +struct AffineApplyLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(affine::AffineApplyOp applyOp, + PatternRewriter &rewriter) const override { + AffineMap map = applyOp.getAffineMap(); + ValueRange operands = applyOp.getMapOperands(); + auto loc = applyOp.getLoc(); + + if (map.getNumResults() != 1) { + return applyOp.emitError("AffineApplyOp must have a single result"); + } + + AffineExpr expr = map.getResult(0); + // d0 + cst + if (auto binExpr = expr.dyn_cast()) { + if (binExpr.getKind() == AffineExprKind::Add) { + if (auto dim = binExpr.getLHS().dyn_cast()) { + if (auto cst = binExpr.getRHS().dyn_cast()) { + auto cstVal = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), + cst.getValue()), + nullptr); + auto addOp = rewriter.create( + loc, cstVal.getType(), operands[dim.getPosition()], cstVal, + nullptr); + rewriter.replaceOp(applyOp, addOp.getResult()); + return success(); + } + } + } + } + + // You can add more cases here for different affine expressions + // For now, we will just emit an error for unsupported expressions. + return applyOp.emitError("Unsupported affine expression in AffineApplyOp: ") + << expr + << ". Only simple affine expressions like d0 + cst are supported."; + } +}; + struct LowerAffineToNeuraPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass) @@ -293,18 +335,12 @@ struct LowerAffineToNeuraPass FuncOp funcOp = getOperation(); MLIRContext *context = funcOp.getContext(); - // ConversionTarget target(*context); - // target.addIllegalOp(); - // target.addLegalDialect(); - RewritePatternSet patterns(context); - patterns.add( - context); + patterns.add(context); if (failed(applyPatternsAndFoldGreedily(getOperation(), - std::move(patterns)))) { + std::move(patterns)))) { funcOp.emitError("Failed to lower affine operations to Neura dialect"); signalPassFailure(); } From ae6134aeaa507977279754a7e65b88786355fc1d Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Fri, 13 Jun 2025 19:10:43 +0800 Subject: [PATCH 04/13] fix bugs --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index adaf6c9b..9a83b5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") From 8c3c6e5943710a6470317b18cd80ae6bcdb51669 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Fri, 13 Jun 2025 21:37:34 +0800 Subject: [PATCH 05/13] fix dyn_cast bugs --- CMakeLists.txt | 8 ++-- include/CMakeLists.txt | 3 +- include/Compiler/CMakeLists.txt | 3 ++ include/Compiler/CompilerPasses.h | 29 +++++++++++++++ include/Compiler/CompilerPasses.td | 17 +++++++++ include/NeuraDialect/NeuraPasses.h | 4 ++ lib/CMakeLists.txt | 3 +- .../AffineToNeura/AffineToNeuraPass.cpp | 37 ++++++++++++------- tools/neura-compiler/neura-compiler.cpp | 0 9 files changed, 84 insertions(+), 20 deletions(-) create mode 100644 include/Compiler/CMakeLists.txt create mode 100644 include/Compiler/CompilerPasses.h create mode 100644 include/Compiler/CompilerPasses.td create mode 100644 tools/neura-compiler/neura-compiler.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a83b5b7..adaf6c9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 599a4181..7ed6674c 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(NeuraDialect) -add_subdirectory(Conversion) \ No newline at end of file +add_subdirectory(Conversion) +# add_subdirectory(Compiler) \ No newline at end of file diff --git a/include/Compiler/CMakeLists.txt b/include/Compiler/CMakeLists.txt new file mode 100644 index 00000000..61aad97c --- /dev/null +++ b/include/Compiler/CMakeLists.txt @@ -0,0 +1,3 @@ +set(LLVM_TARGET_DEFINITIONS CompilerPasses.td) +mlir_tablegen(CompilerPasses.h.inc --gen-pass-decls) +add_public_tablegen_target(MLIRCompilerPassesIncGen) \ No newline at end of file diff --git a/include/Compiler/CompilerPasses.h b/include/Compiler/CompilerPasses.h new file mode 100644 index 00000000..36e5db18 --- /dev/null +++ b/include/Compiler/CompilerPasses.h @@ -0,0 +1,29 @@ +// ConversionPasses.h - Header file for conversion passes + +#ifndef CONVERSION_PASSES_H +#define CONVERSION_PASSES_H + +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassRegistry.h" +#include + +namespace mlir { + +// Passes defined in GraphPasses.td. +#define GEN_PASS_DECL +#include "Conversion/ConversionPasses.h.inc" + +// Conversion passes. +std::unique_ptr createLowerArithToNeuraPass(); +std::unique_ptr createLowerLlvmToNeuraPass(); +std::unique_ptr createLowerAffineToNeuraPass(); + +#define GEN_PASS_REGISTRATION +#include "Conversion/ConversionPasses.h.inc" + +} // namespace mlir + +#endif // CONVERSION_PASSES_H \ No newline at end of file diff --git a/include/Compiler/CompilerPasses.td b/include/Compiler/CompilerPasses.td new file mode 100644 index 00000000..ae1ad574 --- /dev/null +++ b/include/Compiler/CompilerPasses.td @@ -0,0 +1,17 @@ +// CompilerPasses.td - Passes for neura compiler + +#ifndef COMPILER_PASSES_TD +#define COMPILER_PASSES_TD + +include "mlir/Pass/PassBase.td" + +//=========================================================// +// Passes for the CGRA Mapping +//=========================================================// +def GenerateDFG: Pass<"generate-dfg", "ModuleOp">{ + let summary = "Generates a Data Flow Graph (DFG) for the Neura dialect"; + let description = [{This pass generates a DFG from the Neura dialect operations.}]; + let constructor = "neura::createGenerateDFGPass()"; +} + +#endif // COMPILER_PASSES_TD \ No newline at end of file diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index 9cdeef7f..baa5c6f8 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -16,6 +16,7 @@ namespace neura { // Passes defined in GraphPasses.td #define GEN_PASS_DECL #include "NeuraDialect/NeuraPasses.h.inc" +// Passes used for neura optimization and transformation std::unique_ptr createInsertDataMovPass(); std::unique_ptr createInsertCtrlMovPass(); std::unique_ptr createFusePatternsPass(); @@ -23,6 +24,9 @@ std::unique_ptr createAssignAcceleratorPass(); std::unique_ptr createTransformCtrlToDataFlowPass(); std::unique_ptr createLeveragePredicatedValuePass(); +// Passes used for neura compiler +std::unique_ptr createGenerateDFGPass(); + #define GEN_PASS_REGISTRATION #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 599a4181..7ed6674c 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(NeuraDialect) -add_subdirectory(Conversion) \ No newline at end of file +add_subdirectory(Conversion) +# add_subdirectory(Compiler) \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp index 51cbfe12..cd5085f7 100644 --- a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -50,13 +50,15 @@ struct AffineLoadLowering : public OpRewritePattern { } for (AffineExpr expr : map.getResults()) { - if (auto constExpr = expr.dyn_cast()) { + if (expr.isa()) { + auto constExpr = expr.cast(); auto indexType = rewriter.getIndexType(); auto valueAttr = rewriter.getIntegerAttr(indexType, constExpr.getValue()); newIndices.push_back(rewriter.create( loc, indexType, valueAttr, nullptr)); - } else if (auto dimExpr = expr.dyn_cast()) { + } else if (expr.isa()) { + auto dimExpr = expr.cast(); if (dimExpr.getPosition() >= map.getNumDims() || dimExpr.getPosition() >= mapOperands @@ -65,7 +67,8 @@ struct AffineLoadLowering : public OpRewritePattern { "affine map dimension out of bounds for map operands"); } newIndices.push_back(mapOperands[dimExpr.getPosition()]); - } else if (auto symExpr = expr.dyn_cast()) { + } else if (expr.isa()) { + auto symExpr = expr.cast(); unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); if (symbolOperandIndex >= mapOperands.size()) { return loadOp.emitError( @@ -86,7 +89,7 @@ struct AffineLoadLowering : public OpRewritePattern { } } - auto memRefType = memref.getType().dyn_cast(); + auto memRefType = memref.getType().cast(); if (!memRefType) { return loadOp.emitError("base of load is not a MemRefType"); } @@ -119,20 +122,23 @@ struct AffineStoreLowering : public OpRewritePattern { newIndices.reserve(map.getNumResults()); for (AffineExpr expr : map.getResults()) { - if (auto constExpr = expr.dyn_cast()) { + if (expr.isa()) { + auto constExpr = expr.cast(); auto indexType = rewriter.getIndexType(); auto valueAttr = rewriter.getIntegerAttr(indexType, constExpr.getValue()); newIndices.push_back(rewriter.create( loc, indexType, valueAttr, nullptr)); - } else if (auto dimExpr = expr.dyn_cast()) { + } else if (expr.isa()) { + auto dimExpr = expr.cast(); if (dimExpr.getPosition() >= map.getNumDims() || dimExpr.getPosition() >= mapOperands.size()) { return storeOp.emitError( "affine map dimension out of bounds for map operands"); } newIndices.push_back(mapOperands[dimExpr.getPosition()]); - } else if (auto symExpr = expr.dyn_cast()) { + } else if (expr.isa()) { + auto symExpr = expr.cast(); unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); if (symbolOperandIndex >= mapOperands.size()) { return storeOp.emitError( @@ -150,7 +156,7 @@ struct AffineStoreLowering : public OpRewritePattern { } } - auto memRefType = memref.getType().dyn_cast(); + auto memRefType = memref.getType().cast(); if (!memRefType) { return storeOp.emitError("base of store is not a MemRefType"); } @@ -290,10 +296,13 @@ struct AffineApplyLowering : public OpRewritePattern { AffineExpr expr = map.getResult(0); // d0 + cst - if (auto binExpr = expr.dyn_cast()) { + if (expr.isa()) { + auto binExpr = expr.cast(); if (binExpr.getKind() == AffineExprKind::Add) { - if (auto dim = binExpr.getLHS().dyn_cast()) { - if (auto cst = binExpr.getRHS().dyn_cast()) { + if (binExpr.getLHS().isa()) { + auto dim = binExpr.getLHS().cast(); + if (binExpr.getRHS().isa()) { + auto cst = binExpr.getRHS().cast(); auto cstVal = rewriter.create( loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), @@ -311,9 +320,9 @@ struct AffineApplyLowering : public OpRewritePattern { // You can add more cases here for different affine expressions // For now, we will just emit an error for unsupported expressions. - return applyOp.emitError("Unsupported affine expression in AffineApplyOp: ") - << expr - << ". Only simple affine expressions like d0 + cst are supported."; + return applyOp.emitError( + "Unsupported complex affine expression in AffineApplyOp.\n") + << "Only simple affine expressions like d0 + cst are supported.\n"; } }; diff --git a/tools/neura-compiler/neura-compiler.cpp b/tools/neura-compiler/neura-compiler.cpp new file mode 100644 index 00000000..e69de29b From 0c2406436d571d43ae1e1be8cbaa53d505159d5c Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Fri, 13 Jun 2025 23:30:49 +0800 Subject: [PATCH 06/13] set correct MLIR path in Cmakelists --- CMakeLists.txt | 8 ++++---- include/NeuraDialect/NeuraPasses.h | 2 ++ lib/NeuraDialect/CMakeLists.txt | 1 + lib/NeuraDialect/NeuraPasses.cpp | 3 +++ 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 lib/NeuraDialect/NeuraPasses.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index adaf6c9b..9a83b5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index baa5c6f8..27f039ae 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -13,6 +13,8 @@ namespace mlir { namespace neura { + + // Passes defined in GraphPasses.td #define GEN_PASS_DECL #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/lib/NeuraDialect/CMakeLists.txt b/lib/NeuraDialect/CMakeLists.txt index 50532491..34f19ac4 100644 --- a/lib/NeuraDialect/CMakeLists.txt +++ b/lib/NeuraDialect/CMakeLists.txt @@ -18,6 +18,7 @@ add_public_tablegen_target(MLIRNeuraDialectIncGen) add_mlir_dialect_library(MLIRNeura Neura.cpp NeuraTypes.cpp + NeuraPasses.cpp ADDITIONAL_HEADER_DIRS ${PROJECT_SOURCE_DIR}/include/NeuraDialect diff --git a/lib/NeuraDialect/NeuraPasses.cpp b/lib/NeuraDialect/NeuraPasses.cpp new file mode 100644 index 00000000..72b3a6a6 --- /dev/null +++ b/lib/NeuraDialect/NeuraPasses.cpp @@ -0,0 +1,3 @@ +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/Passes.h" + From 2f34550f7a2cec9ba67938a8db2701e30ab0d7c4 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Sat, 14 Jun 2025 10:34:25 +0800 Subject: [PATCH 07/13] change the target of arith2neura and affine2neura --- include/Conversion/ConversionPasses.td | 4 ++-- lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp | 8 ++++---- lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td index 77ee2ef1..cd8eb5ab 100644 --- a/include/Conversion/ConversionPasses.td +++ b/include/Conversion/ConversionPasses.td @@ -8,7 +8,7 @@ include "mlir/Pass/PassBase.td" //=========================================================// // Conversion passes //=========================================================// -def LowerArithToNeura : Pass<"lower-arith-to-neura", "FuncOp">{ +def LowerArithToNeura : Pass<"lower-arith-to-neura", "ModuleOp">{ let summary = "Lower arith to Neura dialect"; let description = [{Lower arith dialect operations to Neura dialect operations.}]; let constructor = "mlir::createLowerArithToNeuraPass()"; @@ -20,7 +20,7 @@ def LowerLlvmToNeura : Pass<"lower-llvm-to-neura", "ModuleOp">{ let constructor = "mlir::createLowerLlvmToNeuraPass()"; } -def LowerAffineToNeura : Pass<"lower-affine-to-neura", "FuncOp">{ +def LowerAffineToNeura : Pass<"lower-affine-to-neura", "ModuleOp">{ let summary = "Lower affine to Neura dialect"; let description = [{Lower affine dialect operations to Neura dialect operations.}]; let constructor = "mlir::createLowerAffineToNeuraPass()"; diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp index cd5085f7..de7e8d19 100644 --- a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -327,7 +327,7 @@ struct AffineApplyLowering : public OpRewritePattern { }; struct LowerAffineToNeuraPass - : public PassWrapper> { + : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass) void getDependentDialects(DialectRegistry ®istry) const override { @@ -341,8 +341,8 @@ struct LowerAffineToNeuraPass } void runOnOperation() override { - FuncOp funcOp = getOperation(); - MLIRContext *context = funcOp.getContext(); + ModuleOp moduleOp = getOperation(); + MLIRContext *context = moduleOp.getContext(); RewritePatternSet patterns(context); patterns.add { }; struct LowerArithToNeuraPass - : public PassWrapper> { + : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerArithToNeuraPass) From 5bc48842fe13e40a0fda5f78e0ee488e313f92c2 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Sat, 14 Jun 2025 11:02:48 +0800 Subject: [PATCH 08/13] change the operand type in neura.add --- include/NeuraDialect/NeuraOps.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 1f76a981..2c1b3235 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -19,8 +19,8 @@ def Neura_ConstantOp : Op { def Neura_AddOp : Op { let summary = "Integer addition operation"; let opName = "add"; - let arguments = (ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs, Optional:$predicate); - let results = (outs SignlessIntegerLike:$result); + let arguments = (ins AnyType:$lhs, AnyType:$rhs, Optional:$predicate); + let results = (outs AnyType:$result); // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)"; let traits = [SameOperandsAndResultElementType]; } From c0a861c7f8babf26fc0867ad0612bcafff242ca4 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Sat, 14 Jun 2025 12:56:55 +0800 Subject: [PATCH 09/13] change the assembly format in neura.loop_control --- CMakeLists.txt | 8 ++--- include/CMakeLists.txt | 3 +- include/Compiler/CMakeLists.txt | 3 -- include/Compiler/CompilerPasses.h | 29 ----------------- include/Compiler/CompilerPasses.td | 17 ---------- include/NeuraDialect/CMakeLists.txt | 12 +++---- include/NeuraDialect/NeuraOps.td | 4 +-- include/NeuraDialect/NeuraPasses.h | 4 +-- include/NeuraDialect/NeuraPasses.td | 21 ++++++++---- .../AffineToNeura/AffineToNeuraPass.cpp | 6 +++- lib/NeuraDialect/CMakeLists.txt | 32 ++++++++++--------- lib/NeuraDialect/NeuraPasses.cpp | 24 ++++++++++++++ tools/CMakeLists.txt | 3 +- tools/neura-compiler/CMakeLists.txt | 18 +++++++++++ tools/neura-compiler/neura-compiler.cpp | 32 +++++++++++++++++++ 15 files changed, 128 insertions(+), 88 deletions(-) delete mode 100644 include/Compiler/CMakeLists.txt delete mode 100644 include/Compiler/CompilerPasses.h delete mode 100644 include/Compiler/CompilerPasses.td create mode 100644 tools/neura-compiler/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a83b5b7..adaf6c9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 7ed6674c..599a4181 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -1,3 +1,2 @@ add_subdirectory(NeuraDialect) -add_subdirectory(Conversion) -# add_subdirectory(Compiler) \ No newline at end of file +add_subdirectory(Conversion) \ No newline at end of file diff --git a/include/Compiler/CMakeLists.txt b/include/Compiler/CMakeLists.txt deleted file mode 100644 index 61aad97c..00000000 --- a/include/Compiler/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS CompilerPasses.td) -mlir_tablegen(CompilerPasses.h.inc --gen-pass-decls) -add_public_tablegen_target(MLIRCompilerPassesIncGen) \ No newline at end of file diff --git a/include/Compiler/CompilerPasses.h b/include/Compiler/CompilerPasses.h deleted file mode 100644 index 36e5db18..00000000 --- a/include/Compiler/CompilerPasses.h +++ /dev/null @@ -1,29 +0,0 @@ -// ConversionPasses.h - Header file for conversion passes - -#ifndef CONVERSION_PASSES_H -#define CONVERSION_PASSES_H - -#include "NeuraDialect/NeuraDialect.h" -#include "NeuraDialect/NeuraOps.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Pass/PassRegistry.h" -#include - -namespace mlir { - -// Passes defined in GraphPasses.td. -#define GEN_PASS_DECL -#include "Conversion/ConversionPasses.h.inc" - -// Conversion passes. -std::unique_ptr createLowerArithToNeuraPass(); -std::unique_ptr createLowerLlvmToNeuraPass(); -std::unique_ptr createLowerAffineToNeuraPass(); - -#define GEN_PASS_REGISTRATION -#include "Conversion/ConversionPasses.h.inc" - -} // namespace mlir - -#endif // CONVERSION_PASSES_H \ No newline at end of file diff --git a/include/Compiler/CompilerPasses.td b/include/Compiler/CompilerPasses.td deleted file mode 100644 index ae1ad574..00000000 --- a/include/Compiler/CompilerPasses.td +++ /dev/null @@ -1,17 +0,0 @@ -// CompilerPasses.td - Passes for neura compiler - -#ifndef COMPILER_PASSES_TD -#define COMPILER_PASSES_TD - -include "mlir/Pass/PassBase.td" - -//=========================================================// -// Passes for the CGRA Mapping -//=========================================================// -def GenerateDFG: Pass<"generate-dfg", "ModuleOp">{ - let summary = "Generates a Data Flow Graph (DFG) for the Neura dialect"; - let description = [{This pass generates a DFG from the Neura dialect operations.}]; - let constructor = "neura::createGenerateDFGPass()"; -} - -#endif // COMPILER_PASSES_TD \ No newline at end of file diff --git a/include/NeuraDialect/CMakeLists.txt b/include/NeuraDialect/CMakeLists.txt index 1c9b30b5..96d06740 100644 --- a/include/NeuraDialect/CMakeLists.txt +++ b/include/NeuraDialect/CMakeLists.txt @@ -1,10 +1,10 @@ # Set TableGen include paths -set(MLIR_TABLEGEN_INCLUDES - ${PROJECT_SOURCE_DIR}/include - ${PROJECT_SOURCE_DIR}/include/NeuraDialect - ${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect - ${MLIR_MAIN_INCLUDE_DIR} - ${MLIR_INCLUDE_DIR}) +# set(MLIR_TABLEGEN_INCLUDES +# ${PROJECT_SOURCE_DIR}/include +# ${PROJECT_SOURCE_DIR}/include/NeuraDialect +# ${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect +# ${MLIR_MAIN_INCLUDE_DIR} +# ${MLIR_INCLUDE_DIR}) add_mlir_dialect(Neura neura) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 2c1b3235..43ded351 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -320,12 +320,12 @@ def Neura_LoopControlOp: Op{ Index:$step, Index:$bound, DefaultValuedAttr:$loop_type, // Loop type: "lt", "le", "gt", "ge", "eq", "ne" - Variadic:$passthrough_args // Additional arguments to pass through to the successors + Variadic:$body_args // Additional arguments to pass through to the successors ); let results = (outs); let successors = (successor AnySuccessor:$body, // loop body successors AnySuccessor:$exit // exit successors ); - let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `,` `loop_type` `:` $loop_type (`passthrough` `(` $passthrough_args^ `:` type($passthrough_args) `)`)? `then` $body `else` $exit attr-dict"; + let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `,` `loop_type` `:` $loop_type `then` $body(`(`$body_args^ `:` type($body_args)`)`)? `else` $exit attr-dict"; } \ No newline at end of file diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index 27f039ae..ff168337 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -13,7 +13,7 @@ namespace mlir { namespace neura { - +void registerNeuraLegalizePassPipeline(); // Passes defined in GraphPasses.td #define GEN_PASS_DECL @@ -27,7 +27,7 @@ std::unique_ptr createTransformCtrlToDataFlowPass(); std::unique_ptr createLeveragePredicatedValuePass(); // Passes used for neura compiler -std::unique_ptr createGenerateDFGPass(); +// std::unique_ptr createGenerateDFGPass(); #define GEN_PASS_REGISTRATION #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index f4ea76a7..6615c2e4 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -36,18 +36,27 @@ def InsertCtrlMov : Pass<"insert-ctrl-mov", "ModuleOp"> { def TransformCtrlToDataFlow : Pass<"transform-ctrl-to-data-flow", "ModuleOp"> { let summary = "Inserts ctrl move operations in the Neura dialect"; - let description = - [{Transform ctrl to predicate-based data flow.}]; + let description = [{Transform ctrl to predicate - based data flow.}]; let constructor = "neura::createTransformCtrlToDataFlowPass()"; } def LeveragePredicatedValue : Pass<"leverage-predicated-value", "ModuleOp"> { let summary = "Convert values to predicated values in Neura dialect"; - let description = [{ - This pass converts regular values to predicated values in Neura dialect operations. - Each value is wrapped in a predicated value type with a default true predicate. - }]; + let description = [{This pass converts regular values to predicated values in + Neura dialect operations + .Each value is wrapped in a predicated value type + with a default true predicate.}]; let constructor = "neura::createLeveragePredicatedValuePass()"; } +//=========================================================// +// Passes for the CGRA Mapping +//=========================================================// +// def GenerateDFG : Pass<"generate-dfg", "ModuleOp"> { +// let summary = "Generates a Data Flow Graph (DFG) for the Neura dialect"; +// let description = +// [{This pass generates a DFG from the Neura dialect operations.}]; +// let constructor = "neura::createGenerateDFGPass()"; +// } + #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp index de7e8d19..e5402667 100644 --- a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -245,11 +245,15 @@ struct AffineForLowering : public OpRewritePattern { // 4. header: loop_control rewriter.setInsertionPointToEnd(headerBlock); + SmallVector bodyArgs; + bodyArgs.push_back(headerBlock->getArgument(0)); // current index + // You can add more arguments if needed + rewriter.create( loc, headerBlock->getArgument(0), // current index stepVal, upperBoundVal, rewriter.getStringAttr("lt"), - ValueRange{}, // passthrough + bodyArgs, // passthrough bodyBlock, exitBlock); // 5. body: clone forOp body, mapping index diff --git a/lib/NeuraDialect/CMakeLists.txt b/lib/NeuraDialect/CMakeLists.txt index 34f19ac4..d9a626b6 100644 --- a/lib/NeuraDialect/CMakeLists.txt +++ b/lib/NeuraDialect/CMakeLists.txt @@ -1,18 +1,18 @@ -# Set include paths for TableGen -set(MLIR_TABLEGEN_INCLUDES - "-I${PROJECT_SOURCE_DIR}/include" - "-I${PROJECT_SOURCE_DIR}/include/NeuraDialect" - "-I${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect") +# # Set include paths for TableGen +# set(MLIR_TABLEGEN_INCLUDES +# "-I${PROJECT_SOURCE_DIR}/include" +# "-I${PROJECT_SOURCE_DIR}/include/NeuraDialect" +# "-I${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect") -# Generate TableGen files -set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/include/NeuraDialect/Neura.td) -mlir_tablegen(Neura.h.inc -gen-op-decls ${MLIR_TABLEGEN_INCLUDES}) -mlir_tablegen(Neura.cpp.inc -gen-op-defs ${MLIR_TABLEGEN_INCLUDES}) -mlir_tablegen(NeuraDialect.h.inc -gen-dialect-decls ${MLIR_TABLEGEN_INCLUDES}) -mlir_tablegen(NeuraDialect.cpp.inc -gen-dialect-defs ${MLIR_TABLEGEN_INCLUDES}) -mlir_tablegen(NeuraTypes.h.inc -gen-typedef-decls ${MLIR_TABLEGEN_INCLUDES}) -mlir_tablegen(NeuraTypes.cpp.inc -gen-typedef-defs ${MLIR_TABLEGEN_INCLUDES}) -add_public_tablegen_target(MLIRNeuraDialectIncGen) +# # Generate TableGen files +# set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/include/NeuraDialect/Neura.td) +# mlir_tablegen(Neura.h.inc -gen-op-decls ${MLIR_TABLEGEN_INCLUDES}) +# mlir_tablegen(Neura.cpp.inc -gen-op-defs ${MLIR_TABLEGEN_INCLUDES}) +# mlir_tablegen(NeuraDialect.h.inc -gen-dialect-decls ${MLIR_TABLEGEN_INCLUDES}) +# mlir_tablegen(NeuraDialect.cpp.inc -gen-dialect-defs ${MLIR_TABLEGEN_INCLUDES}) +# mlir_tablegen(NeuraTypes.h.inc -gen-typedef-decls ${MLIR_TABLEGEN_INCLUDES}) +# mlir_tablegen(NeuraTypes.cpp.inc -gen-typedef-defs ${MLIR_TABLEGEN_INCLUDES}) +# add_public_tablegen_target(MLIRNeuraDialectIncGen) # Add the dialect library add_mlir_dialect_library(MLIRNeura @@ -24,7 +24,9 @@ add_mlir_dialect_library(MLIRNeura ${PROJECT_SOURCE_DIR}/include/NeuraDialect DEPENDS - MLIRNeuraDialectIncGen + MLIRNeuraIncGen + MLIRNeuraTransformsIncGen + MLIRConversionIncGen LINK_LIBS PUBLIC MLIRIR diff --git a/lib/NeuraDialect/NeuraPasses.cpp b/lib/NeuraDialect/NeuraPasses.cpp index 72b3a6a6..11b92b13 100644 --- a/lib/NeuraDialect/NeuraPasses.cpp +++ b/lib/NeuraDialect/NeuraPasses.cpp @@ -1,3 +1,27 @@ #include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassRegistry.h" #include "mlir/Transforms/Passes.h" +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "NeuraDialect/NeuraPasses.h" +#include "NeuraDialect/NeuraTypes.h" +#include "Conversion/ConversionPasses.h" + +// This pass pipeline can convert all the other dialects into the Neura dialect +void mlir::neura::registerNeuraLegalizePassPipeline() { + PassPipelineRegistration<>("neura-legalize", + "Legalize operations to Neura dialect", + [](OpPassManager &pm) { + // Convert all the other dialects into the Neura dialect + pm.addPass(mlir::createLowerAffineToNeuraPass()); + pm.addPass(mlir::createLowerArithToNeuraPass()); + pm.addPass(mlir::createLowerLlvmToNeuraPass()); + + // Insert data and control movement operations + // pm.addPass(mlir::neura::createLeveragePredicatedValuePass()); + // pm.addPass(mlir::neura::createInsertDataMovPass()); + // pm.addPass(mlir::neura::createInsertCtrlMovPass()); + // pm.addPass(mlir::neura::createTransformCtrlToDataFlowPass()); + }); +} \ No newline at end of file diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 2f980553..8390f87c 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(mlir-neura-opt) -add_subdirectory(neura-interpreter) \ No newline at end of file +add_subdirectory(neura-interpreter) +add_subdirectory(neura-compiler) \ No newline at end of file diff --git a/tools/neura-compiler/CMakeLists.txt b/tools/neura-compiler/CMakeLists.txt new file mode 100644 index 00000000..69e78747 --- /dev/null +++ b/tools/neura-compiler/CMakeLists.txt @@ -0,0 +1,18 @@ +add_executable(neura-compiler neura-compiler.cpp) +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) +set(LIBS + ${dialect_libs} + ${conversion_libs} + MLIRNeuraTransforms + MLIRConversion + MLIRNeura + MLIRTransforms + MLIROptLib + MLIRPass + MLIRIR + MLIRParser + MLIRSupport + ) + +target_link_libraries(neura-compiler PRIVATE ${LIBS}) \ No newline at end of file diff --git a/tools/neura-compiler/neura-compiler.cpp b/tools/neura-compiler/neura-compiler.cpp index e69de29b..8180709e 100644 --- a/tools/neura-compiler/neura-compiler.cpp +++ b/tools/neura-compiler/neura-compiler.cpp @@ -0,0 +1,32 @@ +// neura-compiler.cpp + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/DLTI/DLTI.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/InitAllDialects.h" +#include "mlir/InitAllPasses.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" + +#include "Conversion/ConversionPasses.h" +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraPasses.h" + +int main(int argc, char **argv) { + // Registers MLIR dialects. + mlir::DialectRegistry registry; + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); + + mlir::neura::registerNeuraLegalizePassPipeline(); + + // Runs the MLIR optimizer. + return mlir::asMainReturnCode( + mlir::MlirOptMain(argc, argv, "Neura Dialect Optimizer", registry)); +} \ No newline at end of file From 083aea8e0ad9fab3be31f3dd855e11a26a6f7bdf Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Sat, 14 Jun 2025 12:57:17 +0800 Subject: [PATCH 10/13] change the assembly format in neura.loop_control --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index adaf6c9b..9a83b5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED YES) add_compile_options(-g) -set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) -set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) -set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) -set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) +# set(MLIR_DIR /home/lucas/llvm-project/build/lib/cmake/mlir) +# set(LLVM_DIR /home/lucas/llvm-project/build/lib/cmake/llvm) +# set(MLIR_SRC_DIR /home/lucas/llvm-project/mlir) +# set(MLIR_BINARY_DIR /home/lucas/llvm-project/build) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") From e917f0a6093d1ad2af86f72470ce4f4cdab7fcf8 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 17 Jun 2025 11:59:53 +0800 Subject: [PATCH 11/13] support lowring affine to neura & add some testes --- .github/workflows/main.yml | 2 +- include/NeuraDialect/NeuraOps.td | 2 +- include/NeuraDialect/NeuraPasses.td | 10 - .../AffineToNeura/AffineToNeuraPass.cpp | 534 +++++++++--------- test/.lit_test_times.txt | 4 + test/affine2neura/deep-nested/deep_nested.cpp | 31 + .../affine2neura/deep-nested/deep_nested.mlir | 47 ++ .../deep-nested/deep_nested_neura.mlir | 125 ++++ test/affine2neura/gpt2-node11/node11.cpp | 6 +- test/affine2neura/gpt2-node11/node11.mlir | 31 + .../gpt2-node11/node11_neura.mlir | 40 ++ test/affine2neura/gpt2-node27/compile.sh | 2 +- test/affine2neura/gpt2-node27/node27.cpp | 2 +- test/affine2neura/gpt2-node27/node27.mlir | 30 + .../gpt2-node27/node27_neura.mlir | 48 ++ .../gpt2-node27/node27_unroll.mlir | 23 + test/affine2neura/gpt2-node30/node30.cpp | 2 +- test/affine2neura/gpt2-node30/node30.mlir | 33 ++ .../gpt2-node30/node30_neura.mlir | 50 ++ .../gpt2-node30/node30_unroll.mlir | 26 + test/affine2neura/simpleloop/compile.sh | 3 - test/affine2neura/simpleloop/simple.cpp | 12 - 22 files changed, 774 insertions(+), 289 deletions(-) create mode 100644 test/.lit_test_times.txt create mode 100644 test/affine2neura/deep-nested/deep_nested.cpp create mode 100644 test/affine2neura/deep-nested/deep_nested.mlir create mode 100644 test/affine2neura/deep-nested/deep_nested_neura.mlir create mode 100644 test/affine2neura/gpt2-node11/node11.mlir create mode 100644 test/affine2neura/gpt2-node11/node11_neura.mlir create mode 100644 test/affine2neura/gpt2-node27/node27.mlir create mode 100644 test/affine2neura/gpt2-node27/node27_neura.mlir create mode 100644 test/affine2neura/gpt2-node27/node27_unroll.mlir create mode 100644 test/affine2neura/gpt2-node30/node30.mlir create mode 100644 test/affine2neura/gpt2-node30/node30_neura.mlir create mode 100644 test/affine2neura/gpt2-node30/node30_unroll.mlir delete mode 100755 test/affine2neura/simpleloop/compile.sh delete mode 100644 test/affine2neura/simpleloop/simple.cpp diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 181c399d..98b116d3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,7 +40,7 @@ jobs: run: | git clone https://github.com/llvm/llvm-project.git cd llvm-project - git checkout cd70802 + git checkout 6146a88 mkdir build && cd build cmake -G Ninja ../llvm \ -DLLVM_ENABLE_PROJECTS="mlir" \ diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 43ded351..686f8dd1 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -1,7 +1,6 @@ // NeuraOps.td - Custom operation definitions. include "NeuraDialect/NeuraDialect.td" -include "mlir/IR/CommonTypeConstraints.td" // ---------------------------------------------------- // Defines basic scalar operations. @@ -287,6 +286,7 @@ def Neura_ReserveOp : Op { // Defines loop related operations. // Loop iteration operation for index increament and compare +// TODO: Add support for more complex loop structures using LoopInterOp def Neura_LoopIterOp : Op { let summary = "CGRA-optimized loop iteration operation"; let description = [{ diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 6615c2e4..b488924e 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -49,14 +49,4 @@ def LeveragePredicatedValue : Pass<"leverage-predicated-value", "ModuleOp"> { let constructor = "neura::createLeveragePredicatedValuePass()"; } -//=========================================================// -// Passes for the CGRA Mapping -//=========================================================// -// def GenerateDFG : Pass<"generate-dfg", "ModuleOp"> { -// let summary = "Generates a Data Flow Graph (DFG) for the Neura dialect"; -// let description = -// [{This pass generates a DFG from the Neura dialect operations.}]; -// let constructor = "neura::createGenerateDFGPass()"; -// } - #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp index e5402667..9cf65348 100644 --- a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -1,3 +1,4 @@ +#include "Common/AcceleratorAttrs.h" #include "Conversion/ConversionPasses.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -5,12 +6,15 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Region.h" #include "mlir/IR/ValueRange.h" +#include "mlir/IR/Visitors.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" @@ -20,6 +24,7 @@ #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/Support/LogicalResult.h" #include "llvm/Support/raw_ostream.h" #include @@ -31,291 +36,155 @@ using namespace mlir::func; #include "Conversion/ConversionPasses.h.inc" namespace { +LogicalResult convertAffineMapToIndices(AffineMap map, ValueRange map_operands, + Location loc, PatternRewriter &rewriter, + SmallVector &new_indices) { + new_indices.clear(); + new_indices.reserve(map.getNumResults()); + for (AffineExpr expr : map.getResults()) { + if (AffineConstantExpr const_expr = dyn_cast(expr)) { + IndexType index_type = rewriter.getIndexType(); + IntegerAttr value_attr = + rewriter.getIntegerAttr(index_type, const_expr.getValue()); + new_indices.push_back(rewriter.create( + loc, index_type, value_attr, nullptr)); // nullptr is for predicated bit + } else if (AffineDimExpr dim_expr = dyn_cast(expr)) { + if (dim_expr.getPosition() >= map.getNumDims() || + dim_expr.getPosition() >= + map_operands + .size()) { // Check against mapOperands size for safety + return failure(); + } + new_indices.push_back(map_operands[dim_expr.getPosition()]); + } else if (AffineSymbolExpr sym_expr = dyn_cast(expr)) { + unsigned symbol_operand_index = map.getNumDims() + sym_expr.getPosition(); + if (symbol_operand_index >= map_operands.size()) { + return failure(); + } + new_indices.push_back(map_operands[symbol_operand_index]); + } else { + // For more complex affine expressions (e.g., d0 + c1), + // materialize the result using affine.apply. + // This is a temporary workaround for complex expressions. + // TODO: Handle more complex expressions. + llvm::errs() << "[affine2neura] Complex affine expression: " << expr + << "\n"; + AffineMap single_result_map = AffineMap::get( + map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); + Value complexIndex = rewriter.create( + loc, single_result_map, map_operands); + new_indices.push_back(complexIndex); + } + } + return success(); +} + struct AffineLoadLowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(affine::AffineLoadOp loadOp, + LogicalResult matchAndRewrite(affine::AffineLoadOp load_op, PatternRewriter &rewriter) const override { - auto loc = loadOp.getLoc(); - auto memref = loadOp.getMemref(); - AffineMap map = loadOp.getAffineMap(); - ValueRange mapOperands = loadOp.getMapOperands(); - // Get the indices for the load operation - SmallVector newIndices; - newIndices.reserve(map.getNumResults()); - llvm::errs() << "Lowering affine load operation: " << loadOp << "\n"; - llvm::errs() << "Number of results in affine map: " << map.getNumResults() - << "\n"; - for (auto expr : map.getResults()) { - llvm::errs() << "Map expr: " << expr << "\n"; + Location loc = load_op.getLoc(); + auto memref = load_op.getMemref(); + AffineMap map = load_op.getAffineMap(); + ValueRange map_operands = load_op.getMapOperands(); + // Gets the indices for the load operation + SmallVector new_indices; + if (failed(convertAffineMapToIndices(map, map_operands, loc, rewriter, + new_indices))) { + return load_op.emitError( + "[affine2neura] Failed to convert affine map to indices"); } - for (AffineExpr expr : map.getResults()) { - if (expr.isa()) { - auto constExpr = expr.cast(); - auto indexType = rewriter.getIndexType(); - auto valueAttr = - rewriter.getIntegerAttr(indexType, constExpr.getValue()); - newIndices.push_back(rewriter.create( - loc, indexType, valueAttr, nullptr)); - } else if (expr.isa()) { - auto dimExpr = expr.cast(); - if (dimExpr.getPosition() >= map.getNumDims() || - dimExpr.getPosition() >= - mapOperands - .size()) { // Check against mapOperands size for safety - return loadOp.emitError( - "affine map dimension out of bounds for map operands"); - } - newIndices.push_back(mapOperands[dimExpr.getPosition()]); - } else if (expr.isa()) { - auto symExpr = expr.cast(); - unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); - if (symbolOperandIndex >= mapOperands.size()) { - return loadOp.emitError( - "affine map symbol out of bounds for map operands"); - } - newIndices.push_back(mapOperands[symbolOperandIndex]); - } else { - // For more complex affine expressions (e.g., d0 + c1), - // materialize the result using affine.apply. - // neura.load_indexed expects individual index values. - // This is a temporary workaround for complex expressions. - llvm::errs() << "Complex affine expression: " << expr << "\n"; - AffineMap singleResultMap = AffineMap::get( - map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); - Value complexIndex = rewriter.create( - loc, singleResultMap, mapOperands); - newIndices.push_back(complexIndex); - } - } - - auto memRefType = memref.getType().cast(); - if (!memRefType) { - return loadOp.emitError("base of load is not a MemRefType"); + MemRefType memref_type = dyn_cast(memref.getType()); + if (!memref_type) { + return load_op.emitError( + "[affine2neura] Base of load is not a MemRefType"); } - if (newIndices.size() != static_cast(memRefType.getRank())) { - return loadOp.emitError("number of indices from affine map (") - << newIndices.size() << ") does not match memref rank (" - << memRefType.getRank() << ")"; + if (new_indices.size() != static_cast(memref_type.getRank())) { + return load_op.emitError( + "[affine2neura] Number of indices from affine map (") + << new_indices.size() << ") does not match memref rank (" + << memref_type.getRank() << ")"; } // Create the neura.load_indexed operation - auto newLoadOp = rewriter.create( - loc, loadOp.getType(), memref, ValueRange{newIndices}, nullptr); + LoadIndexedOp new_load_op = rewriter.create( + loc, load_op.getType(), memref, ValueRange{new_indices}, nullptr); // nullptr is for predicated bit - rewriter.replaceOp(loadOp, newLoadOp.getResult()); + rewriter.replaceOp(load_op, new_load_op.getResult()); return success(); } }; struct AffineStoreLowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(affine::AffineStoreOp storeOp, + LogicalResult matchAndRewrite(affine::AffineStoreOp store_op, PatternRewriter &rewriter) const override { - auto loc = storeOp.getLoc(); - auto memref = storeOp.getMemref(); - auto value = storeOp.getValueToStore(); - AffineMap map = storeOp.getAffineMap(); - ValueRange mapOperands = storeOp.getMapOperands(); - - SmallVector newIndices; - newIndices.reserve(map.getNumResults()); - - for (AffineExpr expr : map.getResults()) { - if (expr.isa()) { - auto constExpr = expr.cast(); - auto indexType = rewriter.getIndexType(); - auto valueAttr = - rewriter.getIntegerAttr(indexType, constExpr.getValue()); - newIndices.push_back(rewriter.create( - loc, indexType, valueAttr, nullptr)); - } else if (expr.isa()) { - auto dimExpr = expr.cast(); - if (dimExpr.getPosition() >= map.getNumDims() || - dimExpr.getPosition() >= mapOperands.size()) { - return storeOp.emitError( - "affine map dimension out of bounds for map operands"); - } - newIndices.push_back(mapOperands[dimExpr.getPosition()]); - } else if (expr.isa()) { - auto symExpr = expr.cast(); - unsigned symbolOperandIndex = map.getNumDims() + symExpr.getPosition(); - if (symbolOperandIndex >= mapOperands.size()) { - return storeOp.emitError( - "affine map symbol out of bounds for map operands"); - } - newIndices.push_back(mapOperands[symbolOperandIndex]); - } else { - // For more complex affine expressions, materialize the result using - // affine.apply. This is a temporary workaround for complex expressions. - AffineMap singleResultMap = AffineMap::get( - map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); - Value complexIndex = rewriter.create( - loc, singleResultMap, mapOperands); - newIndices.push_back(complexIndex); - } + Location loc = store_op.getLoc(); + auto memref = store_op.getMemref(); + Value value = store_op.getValueToStore(); + AffineMap map = store_op.getAffineMap(); + ValueRange mapOperands = store_op.getMapOperands(); + + SmallVector newIndices; + if (failed(convertAffineMapToIndices(map, mapOperands, loc, rewriter, + newIndices))) { + return store_op.emitError( + "[affine2neura] Failed to convert affine map to indices"); } - auto memRefType = memref.getType().cast(); + MemRefType memRefType = dyn_cast(memref.getType()); if (!memRefType) { - return storeOp.emitError("base of store is not a MemRefType"); + return store_op.emitError( + "[affine2neura] Base of store is not a MemRefType"); } if (newIndices.size() != static_cast(memRefType.getRank())) { - return storeOp.emitError("number of indices from affine map (") + return store_op.emitError( + "[affine2neura] Number of indices from affine map (") << newIndices.size() << ") does not match memref rank (" << memRefType.getRank() << ")"; } rewriter.create(loc, value, memref, - ValueRange{newIndices}, nullptr); - rewriter.eraseOp(storeOp); - return success(); - } -}; - -struct AffineForLowering : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(affine::AffineForOp forOp, - PatternRewriter &rewriter) const override { - auto loc = forOp.getLoc(); - auto indexType = rewriter.getIndexType(); - - // 1. Extract loop parameters (lower bound, upper bound, step) - Value lowerBoundVal; - if (forOp.hasConstantLowerBound()) { - int lowerBoundConstant = forOp.getConstantLowerBound(); - auto lowerBoundAttr = - rewriter.getIntegerAttr(indexType, lowerBoundConstant); - lowerBoundVal = rewriter.create( - loc, indexType, lowerBoundAttr, nullptr); - } else { - // If the lower bound is not constant, we need to use affine.apply - // This is a temporary workaround for non-constant lower bounds. - llvm::errs() << "Using affine.apply for unconstant lower bound\n"; - affine::AffineBound lowerBound = forOp.getLowerBound(); - AffineMap lowerBoundMap = lowerBound.getMap(); - ValueRange lowerBoundOperands = forOp.getLowerBoundOperands(); - lowerBoundVal = rewriter.create( - loc, lowerBoundMap, lowerBoundOperands); - } - - Value upperBoundVal; - if (forOp.hasConstantUpperBound()) { - int upperBoundConstant = forOp.getConstantUpperBound(); - auto upperBoundAttr = - rewriter.getIntegerAttr(indexType, upperBoundConstant); - upperBoundVal = rewriter.create( - loc, indexType, upperBoundAttr, nullptr); - } else { - // For non-constant upper bounds, we also use affine.apply - llvm::errs() << "Using affine.apply for unconstant upper bound\n"; - affine::AffineBound upperBound = forOp.getUpperBound(); - AffineMap upperBoundMap = upperBound.getMap(); - ValueRange upperBoundOperands = forOp.getUpperBoundOperands(); - upperBoundVal = rewriter.create( - loc, upperBoundMap, upperBoundOperands); - } - - auto stepAttr = rewriter.getIntegerAttr(indexType, forOp.getStep()); - Value stepVal = - rewriter.create(loc, indexType, stepAttr, nullptr); - llvm::errs() << "lower bound: " << lowerBoundVal - << ", upper bound: " << upperBoundVal << ", step: " << stepVal - << "\n"; - - // 2. Block structure - Block *originBlock = rewriter.getInsertionBlock(); - auto originPoint = rewriter.getInsertionPoint(); - Region *parentRegion = originBlock->getParent(); - - Block *headerBlock = rewriter.createBlock( - parentRegion, std::next(Region::iterator(originBlock)), {indexType}, - {loc}); - Block *bodyBlock = rewriter.createBlock( - parentRegion, std::next(Region::iterator(headerBlock)), {indexType}, - {loc}); - Block *exitBlock = rewriter.createBlock( - parentRegion, std::next(Region::iterator(bodyBlock))); - Block *continueBlock = rewriter.splitBlock(originBlock, originPoint); - - // 3. origin -> header - rewriter.setInsertionPointToEnd(originBlock); - rewriter.create(loc, ValueRange{lowerBoundVal}, headerBlock); - - // 4. header: loop_control - rewriter.setInsertionPointToEnd(headerBlock); - SmallVector bodyArgs; - bodyArgs.push_back(headerBlock->getArgument(0)); // current index - // You can add more arguments if needed - - rewriter.create( - loc, - headerBlock->getArgument(0), // current index - stepVal, upperBoundVal, rewriter.getStringAttr("lt"), - bodyArgs, // passthrough - bodyBlock, exitBlock); - - // 5. body: clone forOp body, mapping index - rewriter.setInsertionPointToStart(bodyBlock); - Value currentIndex = bodyBlock->getArgument(0); - if (!forOp.getRegion().empty()) { - Block &sourceBlock = forOp.getRegion().front(); - IRMapping mapping; - mapping.map(sourceBlock.getArgument(0), currentIndex); - for (auto &op : llvm::make_range(sourceBlock.begin(), - std::prev(sourceBlock.end()))) { - Operation *clonedOp = rewriter.clone(op, mapping); - for (unsigned i = 0; i < op.getNumResults(); ++i) - mapping.map(op.getResult(i), clonedOp->getResult(i)); - } - } - - // 6. body 结尾跳 header,传当前 index - rewriter.setInsertionPointToEnd(bodyBlock); - rewriter.create(loc, ValueRange{currentIndex}, headerBlock); - - // 7. exit 跳 continue - rewriter.setInsertionPointToEnd(exitBlock); - rewriter.create(loc, ValueRange{}, continueBlock); - - // 8. 移除原 affine.for - rewriter.eraseOp(forOp); - + ValueRange{newIndices}, nullptr); // nullptr is for predicated bit + rewriter.eraseOp(store_op); return success(); } }; struct AffineApplyLowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(affine::AffineApplyOp applyOp, + LogicalResult matchAndRewrite(affine::AffineApplyOp apply_op, PatternRewriter &rewriter) const override { - AffineMap map = applyOp.getAffineMap(); - ValueRange operands = applyOp.getMapOperands(); - auto loc = applyOp.getLoc(); + AffineMap map = apply_op.getAffineMap(); + ValueRange operands = apply_op.getMapOperands(); + Location loc = apply_op.getLoc(); if (map.getNumResults() != 1) { - return applyOp.emitError("AffineApplyOp must have a single result"); + return apply_op.emitError( + "[affine2neura] AffineApplyOp must have a single result"); } AffineExpr expr = map.getResult(0); - // d0 + cst - if (expr.isa()) { - auto binExpr = expr.cast(); - if (binExpr.getKind() == AffineExprKind::Add) { - if (binExpr.getLHS().isa()) { - auto dim = binExpr.getLHS().cast(); - if (binExpr.getRHS().isa()) { - auto cst = binExpr.getRHS().cast(); - auto cstVal = rewriter.create( + // Handle simple affine expressions like d0 + cst + // TODO: Handle more complex expressions + if (isa(expr)) { + AffineBinaryOpExpr bin_expr = dyn_cast(expr); + if (bin_expr.getKind() == AffineExprKind::Add) { + if (isa(bin_expr.getLHS())) { + AffineDimExpr dim = dyn_cast(bin_expr.getLHS()); + if (isa(bin_expr.getRHS())) { + AffineConstantExpr cst = + dyn_cast(bin_expr.getRHS()); + neura::ConstantOp cstVal = rewriter.create( loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), cst.getValue()), - nullptr); - auto addOp = rewriter.create( + nullptr); // nullptr is for predicated bit + neura::AddOp addOp = rewriter.create( loc, cstVal.getType(), operands[dim.getPosition()], cstVal, - nullptr); - rewriter.replaceOp(applyOp, addOp.getResult()); + nullptr); // nullptr is for predicated bit + rewriter.replaceOp(apply_op, addOp.getResult()); return success(); } } @@ -324,12 +193,143 @@ struct AffineApplyLowering : public OpRewritePattern { // You can add more cases here for different affine expressions // For now, we will just emit an error for unsupported expressions. - return applyOp.emitError( - "Unsupported complex affine expression in AffineApplyOp.\n") + return apply_op.emitError("[affine2neura] Unsupported complex affine " + "expression in AffineApplyOp.\n") << "Only simple affine expressions like d0 + cst are supported.\n"; } }; +LogicalResult lowerAffineFor(affine::AffineForOp for_op, OpBuilder &builder, + IRMapping &value_mapping) { + llvm::errs() << "[affine2neura] Lowering AffineForOp: " << for_op << "\n"; + Location loc = for_op.getLoc(); + IndexType index_type = builder.getIndexType(); + + // 1 Extract1 loop parameters (lower bound, upper bound, step) + Value lower_bound_val; + if (for_op.hasConstantLowerBound()) { + int64_t lower_bound_constant = for_op.getConstantLowerBound(); + lower_bound_val = builder.create( + loc, index_type, builder.getIndexAttr(lower_bound_constant), nullptr); // nullptr is for predicated bit + } else { + // If the lower bound is not constant, we need to use affine.apply + affine::AffineBound lower_bound = for_op.getLowerBound(); + AffineMap lower_bound_map = lower_bound.getMap(); + ValueRange lower_bound_operands = for_op.getLowerBoundOperands(); + lower_bound_val = builder.create( + loc, lower_bound_map, lower_bound_operands); + } + + Value upper_bound_val; + if (for_op.hasConstantUpperBound()) { + int64_t upper_bound_constant = for_op.getConstantUpperBound(); + upper_bound_val = builder.create( + loc, index_type, builder.getIndexAttr(upper_bound_constant), nullptr); // nullptr is for predicated bit + } else { + // For non-constant upper bounds, we also use affine.apply + affine::AffineBound upper_bound = for_op.getUpperBound(); + AffineMap upper_bound_map = upper_bound.getMap(); + ValueRange upper_bound_operands = for_op.getUpperBoundOperands(); + upper_bound_val = builder.create( + loc, upper_bound_map, upper_bound_operands); + } + + Value step_val = builder.create( + loc, index_type, builder.getIndexAttr(for_op.getStepAsInt()), nullptr); // nullptr is for predicated bit + + // 2 Creates the block structure + Block *origin_block = builder.getInsertionBlock(); + auto origin_point = builder.getInsertionPoint(); + Region *parent_region = origin_block->getParent(); + + // 2.1 Creates the header block + Block *header_block = builder.createBlock( + parent_region, std::next(Region::iterator(origin_block)), {index_type}, + {loc}); + // 2.2 Creates the body block + Block *body_block = builder.createBlock( + parent_region, std::next(Region::iterator(header_block)), {index_type}, + {loc}); + // 2.3 Creates the exit block + Block *exit_block = builder.createBlock( + parent_region, std::next(Region::iterator(body_block))); + // 2.4 Creates the continue block + Block *continue_block = origin_block->splitBlock(origin_point); + + // 3 Connects the blocks + // 3.1 Connects origin_block -> header_block + builder.setInsertionPointToEnd(origin_block); + builder.create(loc, ValueRange{lower_bound_val}, header_block); + + // 3.2 Connects header_block -> body_block + builder.setInsertionPointToEnd(header_block); + SmallVector body_args; + body_args.push_back(header_block->getArgument(0)); // current index + builder.create( + loc, header_block->getArgument(0), step_val, upper_bound_val, + builder.getStringAttr("lt"), body_args, body_block, exit_block); + + // 3.3 Clones the body of the original affine.for operation + // Assumes the body of the affine.for operation is a single block + // So we need to guarantee the sequence of handling the nested affine.for + // operations is correct. (From outermost to innermost) + builder.setInsertionPointToStart(body_block); + Value current_index = body_block->getArgument(0); + if (!for_op.getRegion().empty()) { + Block &source_block = for_op.getRegion().front(); + IRMapping mapping; + mapping.map(source_block.getArgument(0), current_index); + for (Operation &op : llvm::make_range(source_block.begin(), + std::prev(source_block.end()))) { + Operation *cloned_op = builder.clone(op, mapping); + for (unsigned i = 0; i < op.getNumResults(); ++i) + mapping.map(op.getResult(i), cloned_op->getResult(i)); + } + } + + // 3.4 Connects body_block -> header_block + builder.setInsertionPointToEnd(body_block); + builder.create(loc, ValueRange{current_index}, header_block); + + // 3.5 Connects exit_block -> continue_block + builder.setInsertionPointToEnd(exit_block); + builder.create(loc, ValueRange{}, continue_block); + + builder.setInsertionPointToStart(continue_block); + + for_op.erase(); + + return success(); +} + +affine::AffineForOp findOuterMostAffineFor(func::FuncOp &func_op) { + // Find the outermost affine.for operation + affine::AffineForOp top_for_op = nullptr; + func_op.walk([&](affine::AffineForOp for_op) { + // Checks if this for_op has any AffineForOp parent + Operation *parent_op = for_op->getParentOp(); + bool has_affine_for_parent = false; + + while (parent_op) { + if (isa(parent_op)) { + has_affine_for_parent = true; + break; + } + parent_op = parent_op->getParentOp(); + } + + // If it has no AffineForOp parent, it's a Ftop-level loop + if (!has_affine_for_parent) { + top_for_op = for_op; // Store the found operation + return WalkResult::interrupt(); // Stop walking + } + + return WalkResult::advance(); // Continue walking + }); + + return top_for_op; // Return the found operation +} + struct LowerAffineToNeuraPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass) @@ -345,18 +345,40 @@ struct LowerAffineToNeuraPass } void runOnOperation() override { - ModuleOp moduleOp = getOperation(); - MLIRContext *context = moduleOp.getContext(); - - RewritePatternSet patterns(context); - patterns.add(context); - - if (failed(applyPatternsAndFoldGreedily(getOperation(), - std::move(patterns)))) { - moduleOp.emitError("Failed to lower affine operations to Neura dialect"); - signalPassFailure(); - } + ModuleOp module_op = getOperation(); + MLIRContext *context = module_op.getContext(); + IRMapping mapping; + module_op.walk( + [&](func::FuncOp func_op) { + if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) { + auto target = func_op->getAttrOfType( + mlir::accel::kAcceleratorAttr); + if (target && target.getValue() == mlir::accel::kNeuraTarget) { + while (affine::AffineForOp outer_for_op = + findOuterMostAffineFor(func_op)) { + llvm::errs() + << "[affine2neura] Find outermost affine.for operation: " + << outer_for_op << "\n"; + OpBuilder builder(outer_for_op); + if (failed(lowerAffineFor(outer_for_op, builder, mapping))) { + outer_for_op.emitError("[affine2neura] Failed to lower " + "outermost affine.for operation"); + signalPassFailure(); + } + } + + RewritePatternSet patterns(context); + patterns.add(context); + + if (failed(applyPatternsGreedily(func_op.getOperation(), + std::move(patterns)))) { + func_op.emitError("[affine2neura] Failed to lower affine " + "operations to Neura dialect"); + signalPassFailure(); + } + } + } + }); } }; } // namespace diff --git a/test/.lit_test_times.txt b/test/.lit_test_times.txt new file mode 100644 index 00000000..961067b1 --- /dev/null +++ b/test/.lit_test_times.txt @@ -0,0 +1,4 @@ +7.853746e-03 affine2neura/gpt2-node27/node27.mlir +1.136017e-02 affine2neura/deep-nested/deep_nested.mlir +7.997274e-03 affine2neura/gpt2-node11/node11.mlir +7.548809e-03 affine2neura/gpt2-node30/node30.mlir diff --git a/test/affine2neura/deep-nested/deep_nested.cpp b/test/affine2neura/deep-nested/deep_nested.cpp new file mode 100644 index 00000000..405e6c5b --- /dev/null +++ b/test/affine2neura/deep-nested/deep_nested.cpp @@ -0,0 +1,31 @@ +int input_data[3][3][3]; +int output_data[3][3][3]; +float weights[3]; + +int deep_nested() { + // 10 nested loops + for (int i0 = 0; i0 < 3; i0++) { + for (int i1 = 0; i1 < 3; i1++) { + for (int i2 = 0; i2 < 3; i2++) { + for (int i3 = 0; i3 < 3; i3++) { + for (int i4 = 0; i4 < 3; i4++) { + for (int i5 = 0; i5 < 3; i5++) { + for (int i6 = 0; i6 < 3; i6++) { + for (int i7 = 0; i7 < 3; i7++) { + for (int i8 = 0; i8 < 3; i8++) { + for (int i9 = 0; i9 < 3; i9++) { + // Assuming some operation on input_data + output_data[i0][i1][i2] += + input_data[i0][i1][i2]; + } + } + } + } + } + } + } + } + } + } + return 0; +} diff --git a/test/affine2neura/deep-nested/deep_nested.mlir b/test/affine2neura/deep-nested/deep_nested.mlir new file mode 100644 index 00000000..cb5c5db9 --- /dev/null +++ b/test/affine2neura/deep-nested/deep_nested.mlir @@ -0,0 +1,47 @@ +// Check that the affine loop nest is correctly transformed to neura.loop_control +// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s +module attributes {} { + memref.global @input_data : memref<3x3x3xi32> = uninitialized + memref.global @output_data : memref<3x3x3xi32> = uninitialized + func.func @_Z11deep_nestedv() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = memref.get_global @output_data : memref<3x3x3xi32> + %1 = memref.get_global @input_data : memref<3x3x3xi32> + affine.for %arg0 = 0 to 3 { + affine.for %arg1 = 0 to 3 { + affine.for %arg2 = 0 to 3 { + affine.for %arg3 = 0 to 3 { + affine.for %arg4 = 0 to 3 { + affine.for %arg5 = 0 to 3 { + affine.for %arg6 = 0 to 3 { + affine.for %arg7 = 0 to 3 { + %2 = affine.load %1[%arg0, %arg1, %arg2] : memref<3x3x3xi32> + affine.for %arg8 = 0 to 3 { + affine.for %arg9 = 0 to 3 { + %3 = affine.load %0[%arg0, %arg1, %arg2] : memref<3x3x3xi32> + %4 = arith.addi %3, %2 : i32 + affine.store %4, %0[%arg0, %arg1, %arg2] : memref<3x3x3xi32> + } + } + } + } + } + } + } + } + } + } + return %c0_i32 : i32 + } +} + +// Verify function signature is preserved +// CHECK-LABEL: func.func @_Z11deep_nestedv() -> i32 + +// Verify all affine operations are eliminated +// CHECK-NOT: affine.for +// CHECK-NOT: affine.load +// CHECK-NOT: affine.store +// CHECK-NOT: affine.apply + +// CHECK-COUNT-10: neura.loop_control diff --git a/test/affine2neura/deep-nested/deep_nested_neura.mlir b/test/affine2neura/deep-nested/deep_nested_neura.mlir new file mode 100644 index 00000000..368b6dc5 --- /dev/null +++ b/test/affine2neura/deep-nested/deep_nested_neura.mlir @@ -0,0 +1,125 @@ +module { + memref.global @input_data : memref<3x3x3xi32> = uninitialized + memref.global @output_data : memref<3x3x3xi32> = uninitialized + func.func @_Z11deep_nestedv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = memref.get_global @output_data : memref<3x3x3xi32> + %1 = memref.get_global @input_data : memref<3x3x3xi32> + %2 = neura.constant {value = 0 : index} : index + %3 = neura.constant {value = 3 : index} : index + %4 = neura.constant {value = 1 : index} : index + neura.br %2 : index to ^bb2 + ^bb1: // pred: ^bb40 + return %c0_i32 : i32 + ^bb2(%5: index): // 2 preds: ^bb0, ^bb4 + neura.loop_control current_index : %5, step : %4, bound : %3, loop_type : "lt" then ^bb3(%5 : index) else ^bb40 + ^bb3(%6: index): // pred: ^bb2 + %7 = neura.constant {value = 0 : index} : index + %8 = neura.constant {value = 3 : index} : index + %9 = neura.constant {value = 1 : index} : index + neura.br %7 : index to ^bb5 + ^bb4: // pred: ^bb39 + neura.br %6 : index to ^bb2 + ^bb5(%10: index): // 2 preds: ^bb3, ^bb7 + neura.loop_control current_index : %10, step : %9, bound : %8, loop_type : "lt" then ^bb6(%10 : index) else ^bb39 + ^bb6(%11: index): // pred: ^bb5 + %12 = neura.constant {value = 0 : index} : index + %13 = neura.constant {value = 3 : index} : index + %14 = neura.constant {value = 1 : index} : index + neura.br %12 : index to ^bb8 + ^bb7: // pred: ^bb38 + neura.br %11 : index to ^bb5 + ^bb8(%15: index): // 2 preds: ^bb6, ^bb10 + neura.loop_control current_index : %15, step : %14, bound : %13, loop_type : "lt" then ^bb9(%15 : index) else ^bb38 + ^bb9(%16: index): // pred: ^bb8 + %17 = neura.constant {value = 0 : index} : index + %18 = neura.constant {value = 3 : index} : index + %19 = neura.constant {value = 1 : index} : index + neura.br %17 : index to ^bb11 + ^bb10: // pred: ^bb37 + neura.br %16 : index to ^bb8 + ^bb11(%20: index): // 2 preds: ^bb9, ^bb13 + neura.loop_control current_index : %20, step : %19, bound : %18, loop_type : "lt" then ^bb12(%20 : index) else ^bb37 + ^bb12(%21: index): // pred: ^bb11 + %22 = neura.constant {value = 0 : index} : index + %23 = neura.constant {value = 3 : index} : index + %24 = neura.constant {value = 1 : index} : index + neura.br %22 : index to ^bb14 + ^bb13: // pred: ^bb36 + neura.br %21 : index to ^bb11 + ^bb14(%25: index): // 2 preds: ^bb12, ^bb16 + neura.loop_control current_index : %25, step : %24, bound : %23, loop_type : "lt" then ^bb15(%25 : index) else ^bb36 + ^bb15(%26: index): // pred: ^bb14 + %27 = neura.constant {value = 0 : index} : index + %28 = neura.constant {value = 3 : index} : index + %29 = neura.constant {value = 1 : index} : index + neura.br %27 : index to ^bb17 + ^bb16: // pred: ^bb35 + neura.br %26 : index to ^bb14 + ^bb17(%30: index): // 2 preds: ^bb15, ^bb19 + neura.loop_control current_index : %30, step : %29, bound : %28, loop_type : "lt" then ^bb18(%30 : index) else ^bb35 + ^bb18(%31: index): // pred: ^bb17 + %32 = neura.constant {value = 0 : index} : index + %33 = neura.constant {value = 3 : index} : index + %34 = neura.constant {value = 1 : index} : index + neura.br %32 : index to ^bb20 + ^bb19: // pred: ^bb34 + neura.br %31 : index to ^bb17 + ^bb20(%35: index): // 2 preds: ^bb18, ^bb22 + neura.loop_control current_index : %35, step : %34, bound : %33, loop_type : "lt" then ^bb21(%35 : index) else ^bb34 + ^bb21(%36: index): // pred: ^bb20 + %37 = neura.constant {value = 0 : index} : index + %38 = neura.constant {value = 3 : index} : index + %39 = neura.constant {value = 1 : index} : index + neura.br %37 : index to ^bb23 + ^bb22: // pred: ^bb33 + neura.br %36 : index to ^bb20 + ^bb23(%40: index): // 2 preds: ^bb21, ^bb25 + neura.loop_control current_index : %40, step : %39, bound : %38, loop_type : "lt" then ^bb24(%40 : index) else ^bb33 + ^bb24(%41: index): // pred: ^bb23 + %42 = neura.load_indexed memref<3x3x3xi32> %1[%6, %11, %16] : i32 + %43 = neura.constant {value = 0 : index} : index + %44 = neura.constant {value = 3 : index} : index + %45 = neura.constant {value = 1 : index} : index + neura.br %43 : index to ^bb26 + ^bb25: // pred: ^bb32 + neura.br %41 : index to ^bb23 + ^bb26(%46: index): // 2 preds: ^bb24, ^bb28 + neura.loop_control current_index : %46, step : %45, bound : %44, loop_type : "lt" then ^bb27(%46 : index) else ^bb32 + ^bb27(%47: index): // pred: ^bb26 + %48 = neura.constant {value = 0 : index} : index + %49 = neura.constant {value = 3 : index} : index + %50 = neura.constant {value = 1 : index} : index + neura.br %48 : index to ^bb29 + ^bb28: // pred: ^bb31 + neura.br %47 : index to ^bb26 + ^bb29(%51: index): // 2 preds: ^bb27, ^bb30 + neura.loop_control current_index : %51, step : %50, bound : %49, loop_type : "lt" then ^bb30(%51 : index) else ^bb31 + ^bb30(%52: index): // pred: ^bb29 + %53 = neura.load_indexed memref<3x3x3xi32> %0[%6, %11, %16] : i32 + %54 = arith.addi %53, %42 : i32 + neura.store_indexed %54 to memref<3x3x3xi32> %0[%6, %11, %16] : i32 + neura.br %52 : index to ^bb29 + ^bb31: // pred: ^bb29 + neura.br : to ^bb28 + ^bb32: // pred: ^bb26 + neura.br : to ^bb25 + ^bb33: // pred: ^bb23 + neura.br : to ^bb22 + ^bb34: // pred: ^bb20 + neura.br : to ^bb19 + ^bb35: // pred: ^bb17 + neura.br : to ^bb16 + ^bb36: // pred: ^bb14 + neura.br : to ^bb13 + ^bb37: // pred: ^bb11 + neura.br : to ^bb10 + ^bb38: // pred: ^bb8 + neura.br : to ^bb7 + ^bb39: // pred: ^bb5 + neura.br : to ^bb4 + ^bb40: // pred: ^bb2 + neura.br : to ^bb1 + } +} + diff --git a/test/affine2neura/gpt2-node11/node11.cpp b/test/affine2neura/gpt2-node11/node11.cpp index 45e4262c..fdd7519f 100644 --- a/test/affine2neura/gpt2-node11/node11.cpp +++ b/test/affine2neura/gpt2-node11/node11.cpp @@ -1,12 +1,12 @@ float input[1][16][64]; float output[1][16]; -int main() { +int node11() { for (int arg2 = 0; arg2 < 1; arg2++) { for (int arg3 = 0; arg3 < 16; arg3++) { - for (int arg4 = 0; arg4 < 64; arg4+=1) { + for (int arg4 = 0; arg4 < 64; arg4+=1) output[arg2][arg3] += input[arg2][arg3][arg4]; - } } } + return 0; } \ No newline at end of file diff --git a/test/affine2neura/gpt2-node11/node11.mlir b/test/affine2neura/gpt2-node11/node11.mlir new file mode 100644 index 00000000..795bb45f --- /dev/null +++ b/test/affine2neura/gpt2-node11/node11.mlir @@ -0,0 +1,31 @@ +// Check that the affine loop nest is correctly transformed to neura.loop_control +// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s +module attributes {} { + memref.global @input : memref<1x16x64xf32> = uninitialized + memref.global @output : memref<1x16xf32> = uninitialized + func.func @_Z6node11v() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = memref.get_global @output : memref<1x16xf32> + %1 = memref.get_global @input : memref<1x16x64xf32> + affine.for %arg0 = 0 to 16 { + affine.for %arg1 = 0 to 64 { + %2 = affine.load %1[0, %arg0, %arg1] : memref<1x16x64xf32> + %3 = affine.load %0[0, %arg0] : memref<1x16xf32> + %4 = arith.addf %3, %2 : f32 + affine.store %4, %0[0, %arg0] : memref<1x16xf32> + } + } + return %c0_i32 : i32 + } +} + +// Verify function signature is preserved +// CHECK-LABEL: func.func @_Z6node11v() -> i32 + +// Verify all affine operations are eliminated +// CHECK-NOT: affine.for +// CHECK-NOT: affine.load +// CHECK-NOT: affine.store +// CHECK-NOT: affine.apply + +// CHECK-COUNT-2: neura.loop_control diff --git a/test/affine2neura/gpt2-node11/node11_neura.mlir b/test/affine2neura/gpt2-node11/node11_neura.mlir new file mode 100644 index 00000000..ccc214c7 --- /dev/null +++ b/test/affine2neura/gpt2-node11/node11_neura.mlir @@ -0,0 +1,40 @@ +module { + memref.global @input : memref<1x16x64xf32> = uninitialized + memref.global @output : memref<1x16xf32> = uninitialized + func.func @_Z6node11v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = memref.get_global @output : memref<1x16xf32> + %1 = memref.get_global @input : memref<1x16x64xf32> + %2 = neura.constant {value = 0 : index} : index + %3 = neura.constant {value = 16 : index} : index + %4 = neura.constant {value = 1 : index} : index + neura.br %2 : index to ^bb2 + ^bb1: // pred: ^bb8 + return %c0_i32 : i32 + ^bb2(%5: index): // 2 preds: ^bb0, ^bb4 + neura.loop_control current_index : %5, step : %4, bound : %3, loop_type : "lt" then ^bb3(%5 : index) else ^bb8 + ^bb3(%6: index): // pred: ^bb2 + %7 = neura.constant {value = 0 : index} : index + %8 = neura.constant {value = 64 : index} : index + %9 = neura.constant {value = 1 : index} : index + neura.br %7 : index to ^bb5 + ^bb4: // pred: ^bb7 + neura.br %6 : index to ^bb2 + ^bb5(%10: index): // 2 preds: ^bb3, ^bb6 + neura.loop_control current_index : %10, step : %9, bound : %8, loop_type : "lt" then ^bb6(%10 : index) else ^bb7 + ^bb6(%11: index): // pred: ^bb5 + %12 = neura.constant {value = 0 : index} : index + %13 = neura.load_indexed memref<1x16x64xf32> %1[%12, %6, %11] : f32 + %14 = neura.constant {value = 0 : index} : index + %15 = neura.load_indexed memref<1x16xf32> %0[%14, %6] : f32 + %16 = arith.addf %15, %13 : f32 + %17 = neura.constant {value = 0 : index} : index + neura.store_indexed %16 to memref<1x16xf32> %0[%17, %6] : f32 + neura.br %11 : index to ^bb5 + ^bb7: // pred: ^bb5 + neura.br : to ^bb4 + ^bb8: // pred: ^bb2 + neura.br : to ^bb1 + } +} + diff --git a/test/affine2neura/gpt2-node27/compile.sh b/test/affine2neura/gpt2-node27/compile.sh index e1c6c965..bc268f1a 100755 --- a/test/affine2neura/gpt2-node27/compile.sh +++ b/test/affine2neura/gpt2-node27/compile.sh @@ -1,3 +1,3 @@ -/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node27_unroll.cpp -S --raise-scf-to-affine -o ./node27.mlir +/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node27.cpp -S --raise-scf-to-affine -o ./node27.mlir /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27.mlir --affine-loop-unroll="unroll-factor=2" -o ./node27_unroll.mlir # /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/gpt2-node27/node27.cpp b/test/affine2neura/gpt2-node27/node27.cpp index 3bcf72c2..456aaabd 100644 --- a/test/affine2neura/gpt2-node27/node27.cpp +++ b/test/affine2neura/gpt2-node27/node27.cpp @@ -1,7 +1,7 @@ float input[1][16][4][16]; float output[1][4][16][16]; -int main() { +int node27() { for (int arg2 = 0; arg2 < 1; arg2++) { for (int arg3 = 0; arg3 < 16; arg3++) { for (int arg4 = 0; arg4 < 4; arg4 += 1) { diff --git a/test/affine2neura/gpt2-node27/node27.mlir b/test/affine2neura/gpt2-node27/node27.mlir new file mode 100644 index 00000000..3bc78ff5 --- /dev/null +++ b/test/affine2neura/gpt2-node27/node27.mlir @@ -0,0 +1,30 @@ +// Check that the affine loop nest is correctly transformed to neura.loop_control +// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s +module attributes {} { + memref.global @input : memref<1x16x4x16xf32> = uninitialized + memref.global @output : memref<1x4x16x16xf32> = uninitialized + func.func @_Z6node27v() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @output : memref<1x4x16x16xf32> + %2 = memref.get_global @input : memref<1x16x4x16xf32> + affine.for %arg0 = 0 to 16 { + affine.for %arg1 = 0 to 4 { + affine.for %arg2 = 0 to 16 { + %3 = affine.load %2[0, %arg1, %arg0, %arg2] : memref<1x16x4x16xf32> + affine.store %3, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x16xf32> + } + } + } + return %0 : i32 + } +} +// Verify function signature is preserved +// CHECK-LABEL: func.func @_Z6node27v() -> i32 + +// Verify all affine operations are eliminated +// CHECK-NOT: affine.for +// CHECK-NOT: affine.load +// CHECK-NOT: affine.store +// CHECK-NOT: affine.apply + +// CHECK-COUNT-3: neura.loop_control diff --git a/test/affine2neura/gpt2-node27/node27_neura.mlir b/test/affine2neura/gpt2-node27/node27_neura.mlir new file mode 100644 index 00000000..8680f78c --- /dev/null +++ b/test/affine2neura/gpt2-node27/node27_neura.mlir @@ -0,0 +1,48 @@ +module { + memref.global @input : memref<1x16x4x16xf32> = uninitialized + memref.global @output : memref<1x4x16x16xf32> = uninitialized + func.func @_Z6node27v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @output : memref<1x4x16x16xf32> + %2 = memref.get_global @input : memref<1x16x4x16xf32> + %3 = neura.constant {value = 0 : index} : index + %4 = neura.constant {value = 16 : index} : index + %5 = neura.constant {value = 1 : index} : index + neura.br %3 : index to ^bb2 + ^bb1: // pred: ^bb12 + return %0 : i32 + ^bb2(%6: index): // 2 preds: ^bb0, ^bb4 + neura.loop_control current_index : %6, step : %5, bound : %4, loop_type : "lt" then ^bb3(%6 : index) else ^bb12 + ^bb3(%7: index): // pred: ^bb2 + %8 = neura.constant {value = 0 : index} : index + %9 = neura.constant {value = 4 : index} : index + %10 = neura.constant {value = 1 : index} : index + neura.br %8 : index to ^bb5 + ^bb4: // pred: ^bb11 + neura.br %7 : index to ^bb2 + ^bb5(%11: index): // 2 preds: ^bb3, ^bb7 + neura.loop_control current_index : %11, step : %10, bound : %9, loop_type : "lt" then ^bb6(%11 : index) else ^bb11 + ^bb6(%12: index): // pred: ^bb5 + %13 = neura.constant {value = 0 : index} : index + %14 = neura.constant {value = 16 : index} : index + %15 = neura.constant {value = 1 : index} : index + neura.br %13 : index to ^bb8 + ^bb7: // pred: ^bb10 + neura.br %12 : index to ^bb5 + ^bb8(%16: index): // 2 preds: ^bb6, ^bb9 + neura.loop_control current_index : %16, step : %15, bound : %14, loop_type : "lt" then ^bb9(%16 : index) else ^bb10 + ^bb9(%17: index): // pred: ^bb8 + %18 = neura.constant {value = 0 : index} : index + %19 = neura.load_indexed memref<1x16x4x16xf32> %2[%18, %12, %7, %17] : f32 + %20 = neura.constant {value = 0 : index} : index + neura.store_indexed %19 to memref<1x4x16x16xf32> %1[%20, %7, %12, %17] : f32 + neura.br %17 : index to ^bb8 + ^bb10: // pred: ^bb8 + neura.br : to ^bb7 + ^bb11: // pred: ^bb5 + neura.br : to ^bb4 + ^bb12: // pred: ^bb2 + neura.br : to ^bb1 + } +} + diff --git a/test/affine2neura/gpt2-node27/node27_unroll.mlir b/test/affine2neura/gpt2-node27/node27_unroll.mlir new file mode 100644 index 00000000..7708b308 --- /dev/null +++ b/test/affine2neura/gpt2-node27/node27_unroll.mlir @@ -0,0 +1,23 @@ +#map = affine_map<(d0) -> (d0 + 1)> +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry, dense<64> : vector<4xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>, #dlti.dl_entry<"dlti.endianness", "little">>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} { + memref.global @input : memref<1x16x4x16xf32> = uninitialized + memref.global @output : memref<1x4x16x16xf32> = uninitialized + func.func @_Z6node27v() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @output : memref<1x4x16x16xf32> + %2 = memref.get_global @input : memref<1x16x4x16xf32> + affine.for %arg0 = 0 to 16 { + affine.for %arg1 = 0 to 4 { + affine.for %arg2 = 0 to 16 step 2 { + %3 = affine.load %2[0, %arg1, %arg0, %arg2] : memref<1x16x4x16xf32> + affine.store %3, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x16xf32> + %4 = affine.apply #map(%arg2) + %5 = affine.load %2[0, %arg1, %arg0, %4] : memref<1x16x4x16xf32> + affine.store %5, %1[0, %arg0, %arg1, %4] : memref<1x4x16x16xf32> + } + } + } + return %0 : i32 + } +} + diff --git a/test/affine2neura/gpt2-node30/node30.cpp b/test/affine2neura/gpt2-node30/node30.cpp index 01177f33..596450f8 100644 --- a/test/affine2neura/gpt2-node30/node30.cpp +++ b/test/affine2neura/gpt2-node30/node30.cpp @@ -2,7 +2,7 @@ float A[1][4][16][64]; // float B=20.0; float C[1][4][16][64]; -int main() { +int node30() { for (int arg2 = 0; arg2 < 1; arg2++) { for (int arg3 = 0; arg3 < 4; arg3++) { for (int arg4 = 0; arg4 < 16; arg4++) { diff --git a/test/affine2neura/gpt2-node30/node30.mlir b/test/affine2neura/gpt2-node30/node30.mlir new file mode 100644 index 00000000..9d3b77d0 --- /dev/null +++ b/test/affine2neura/gpt2-node30/node30.mlir @@ -0,0 +1,33 @@ +// Check that the affine loop nest is correctly transformed to neura.loop_control +// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s +module attributes {} { + memref.global @A : memref<1x4x16x64xf32> = uninitialized + memref.global @C : memref<1x4x16x64xf32> = uninitialized + func.func @_Z6node30v() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %cst = arith.constant 1.000000e+01 : f32 + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @C : memref<1x4x16x64xf32> + %2 = memref.get_global @A : memref<1x4x16x64xf32> + affine.for %arg0 = 0 to 4 { + affine.for %arg1 = 0 to 16 { + affine.for %arg2 = 0 to 64 { + %3 = affine.load %2[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> + %4 = arith.mulf %3, %cst : f32 + affine.store %4, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> + } + } + } + return %0 : i32 + } +} + +// Verify function signature is preserved +// CHECK-LABEL: func.func @_Z6node30v() -> i32 + +// Verify all affine operations are eliminated +// CHECK-NOT: affine.for +// CHECK-NOT: affine.load +// CHECK-NOT: affine.store +// CHECK-NOT: affine.apply + +// CHECK-COUNT-3: neura.loop_control diff --git a/test/affine2neura/gpt2-node30/node30_neura.mlir b/test/affine2neura/gpt2-node30/node30_neura.mlir new file mode 100644 index 00000000..7a3c641d --- /dev/null +++ b/test/affine2neura/gpt2-node30/node30_neura.mlir @@ -0,0 +1,50 @@ +module { + memref.global @A : memref<1x4x16x64xf32> = uninitialized + memref.global @C : memref<1x4x16x64xf32> = uninitialized + func.func @_Z6node30v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { + %cst = arith.constant 1.000000e+01 : f32 + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @C : memref<1x4x16x64xf32> + %2 = memref.get_global @A : memref<1x4x16x64xf32> + %3 = neura.constant {value = 0 : index} : index + %4 = neura.constant {value = 4 : index} : index + %5 = neura.constant {value = 1 : index} : index + neura.br %3 : index to ^bb2 + ^bb1: // pred: ^bb12 + return %0 : i32 + ^bb2(%6: index): // 2 preds: ^bb0, ^bb4 + neura.loop_control current_index : %6, step : %5, bound : %4, loop_type : "lt" then ^bb3(%6 : index) else ^bb12 + ^bb3(%7: index): // pred: ^bb2 + %8 = neura.constant {value = 0 : index} : index + %9 = neura.constant {value = 16 : index} : index + %10 = neura.constant {value = 1 : index} : index + neura.br %8 : index to ^bb5 + ^bb4: // pred: ^bb11 + neura.br %7 : index to ^bb2 + ^bb5(%11: index): // 2 preds: ^bb3, ^bb7 + neura.loop_control current_index : %11, step : %10, bound : %9, loop_type : "lt" then ^bb6(%11 : index) else ^bb11 + ^bb6(%12: index): // pred: ^bb5 + %13 = neura.constant {value = 0 : index} : index + %14 = neura.constant {value = 64 : index} : index + %15 = neura.constant {value = 1 : index} : index + neura.br %13 : index to ^bb8 + ^bb7: // pred: ^bb10 + neura.br %12 : index to ^bb5 + ^bb8(%16: index): // 2 preds: ^bb6, ^bb9 + neura.loop_control current_index : %16, step : %15, bound : %14, loop_type : "lt" then ^bb9(%16 : index) else ^bb10 + ^bb9(%17: index): // pred: ^bb8 + %18 = neura.constant {value = 0 : index} : index + %19 = neura.load_indexed memref<1x4x16x64xf32> %2[%18, %7, %12, %17] : f32 + %20 = arith.mulf %19, %cst : f32 + %21 = neura.constant {value = 0 : index} : index + neura.store_indexed %20 to memref<1x4x16x64xf32> %1[%21, %7, %12, %17] : f32 + neura.br %17 : index to ^bb8 + ^bb10: // pred: ^bb8 + neura.br : to ^bb7 + ^bb11: // pred: ^bb5 + neura.br : to ^bb4 + ^bb12: // pred: ^bb2 + neura.br : to ^bb1 + } +} + diff --git a/test/affine2neura/gpt2-node30/node30_unroll.mlir b/test/affine2neura/gpt2-node30/node30_unroll.mlir new file mode 100644 index 00000000..e55fe54e --- /dev/null +++ b/test/affine2neura/gpt2-node30/node30_unroll.mlir @@ -0,0 +1,26 @@ +#map = affine_map<(d0) -> (d0 + 1)> +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<64> : vector<4xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>, #dlti.dl_entry<"dlti.endianness", "little">>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} { + memref.global @A : memref<1x4x16x64xf32> = uninitialized + memref.global @C : memref<1x4x16x64xf32> = uninitialized + func.func @_Z6node30v() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %cst = arith.constant 1.000000e+01 : f32 + %0 = llvm.mlir.undef : i32 + %1 = memref.get_global @C : memref<1x4x16x64xf32> + %2 = memref.get_global @A : memref<1x4x16x64xf32> + affine.for %arg0 = 0 to 4 { + affine.for %arg1 = 0 to 16 { + affine.for %arg2 = 0 to 64 step 2 { + %3 = affine.load %2[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> + %4 = arith.mulf %3, %cst : f32 + affine.store %4, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> + %5 = affine.apply #map(%arg2) + %6 = affine.load %2[0, %arg0, %arg1, %5] : memref<1x4x16x64xf32> + %7 = arith.mulf %6, %cst : f32 + affine.store %7, %1[0, %arg0, %arg1, %5] : memref<1x4x16x64xf32> + } + } + } + return %0 : i32 + } +} + diff --git a/test/affine2neura/simpleloop/compile.sh b/test/affine2neura/simpleloop/compile.sh deleted file mode 100755 index f19caf0e..00000000 --- a/test/affine2neura/simpleloop/compile.sh +++ /dev/null @@ -1,3 +0,0 @@ -/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./simple.cpp -S --raise-scf-to-affine -o ./simple.mlir -/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./simple.mlir --affine-loop-unroll="unroll-factor=2" -o ./simple_unroll.mlir -# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir \ No newline at end of file diff --git a/test/affine2neura/simpleloop/simple.cpp b/test/affine2neura/simpleloop/simple.cpp deleted file mode 100644 index 6078f497..00000000 --- a/test/affine2neura/simpleloop/simple.cpp +++ /dev/null @@ -1,12 +0,0 @@ -float A[100]; -float C[100]; - -int main() { - const int size = 100; - for (int i = 0; i < size; ++i) { - float loaded_value = A[i]; // Instruction 1: Load value from A - float multiplied_value = loaded_value * 10.0f; // Instruction 2: Multiply the value - C[i] = multiplied_value; // Instruction 3: Store result into C - } - return 0; -} From 6594f4a4723eb6c5f70e5df5dd1827984b61fc5a Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 17 Jun 2025 14:56:41 +0800 Subject: [PATCH 12/13] [fix] solve confics with main --- include/NeuraDialect/NeuraOps.td | 2 +- test/.lit_test_times.txt | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index dc76c021..48dd0b54 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -11,7 +11,7 @@ def Neura_ConstantOp : Op { OptionalAttr:$predicate // Add optional predicate attribute ); let results = (outs AnyType:$result); - let assemblyFormat = "attr-dict `:` type($result)"; + // let assemblyFormat = "attr-dict `:` type($result)"; } // Defines an addition operation. diff --git a/test/.lit_test_times.txt b/test/.lit_test_times.txt index 961067b1..9a9da3f0 100644 --- a/test/.lit_test_times.txt +++ b/test/.lit_test_times.txt @@ -1,4 +1,7 @@ 7.853746e-03 affine2neura/gpt2-node27/node27.mlir -1.136017e-02 affine2neura/deep-nested/deep_nested.mlir -7.997274e-03 affine2neura/gpt2-node11/node11.mlir +9.844303e-03 affine2neura/deep-nested/deep_nested.mlir +6.515980e-03 affine2neura/gpt2-node11/node11.mlir 7.548809e-03 affine2neura/gpt2-node30/node30.mlir +9.920120e-03 neura/ctrl/branch.mlir +1.126695e-02 neura/ctrl/branch_no_arg.mlir +9.951830e-03 neura/ctrl/branch_for.mlir From 9d4ee2553974131824cc5678b5758e6d797a3635 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 17 Jun 2025 16:37:12 +0800 Subject: [PATCH 13/13] [test] add test --- test/.lit_test_times.txt | 33 ++++- .../deep-nested/deep_nested_neura.mlir | 125 ------------------ .../gpt2-node11/node11_neura.mlir | 40 ------ .../gpt2-node27/node27_neura.mlir | 48 ------- .../gpt2-node27/node27_unroll.mlir | 23 ---- .../gpt2-node30/node30_neura.mlir | 50 ------- .../gpt2-node30/node30_unroll.mlir | 26 ---- 7 files changed, 26 insertions(+), 319 deletions(-) delete mode 100644 test/affine2neura/deep-nested/deep_nested_neura.mlir delete mode 100644 test/affine2neura/gpt2-node11/node11_neura.mlir delete mode 100644 test/affine2neura/gpt2-node27/node27_neura.mlir delete mode 100644 test/affine2neura/gpt2-node27/node27_unroll.mlir delete mode 100644 test/affine2neura/gpt2-node30/node30_neura.mlir delete mode 100644 test/affine2neura/gpt2-node30/node30_unroll.mlir diff --git a/test/.lit_test_times.txt b/test/.lit_test_times.txt index 9a9da3f0..c0405ae5 100644 --- a/test/.lit_test_times.txt +++ b/test/.lit_test_times.txt @@ -1,7 +1,26 @@ -7.853746e-03 affine2neura/gpt2-node27/node27.mlir -9.844303e-03 affine2neura/deep-nested/deep_nested.mlir -6.515980e-03 affine2neura/gpt2-node11/node11.mlir -7.548809e-03 affine2neura/gpt2-node30/node30.mlir -9.920120e-03 neura/ctrl/branch.mlir -1.126695e-02 neura/ctrl/branch_no_arg.mlir -9.951830e-03 neura/ctrl/branch_for.mlir +2.734089e-02 affine2neura/gpt2-node27/node27.mlir +1.068902e-02 affine2neura/deep-nested/deep_nested.mlir +2.698708e-02 affine2neura/gpt2-node11/node11.mlir +2.851033e-02 affine2neura/gpt2-node30/node30.mlir +3.188467e-02 neura/ctrl/branch.mlir +2.987862e-02 neura/ctrl/branch_no_arg.mlir +1.032019e-02 neura/ctrl/branch_for.mlir +-5.869865e-04 affine2neura/deep-nested/deep_nested_neura.mlir +-5.869865e-04 affine2neura/gpt2-node11/node11_neura.mlir +-3.650188e-04 affine2neura/gpt2-node27/node27_neura.mlir +-6.232262e-04 affine2neura/gpt2-node27/node27_unroll.mlir +-3.950596e-04 affine2neura/gpt2-node30/node30_neura.mlir +-4.494190e-04 affine2neura/gpt2-node30/node30_unroll.mlir +6.077766e-03 arith2neura/add.mlir +2.200377e-01 c2llvm2mlir/test.mlir +5.845070e-03 neura/arith_add.mlir +5.631447e-03 neura/fadd_fadd.mlir +9.507132e-02 neura/for_loop/test.mlir +2.653909e-02 neura/interpreter/add.mlir +5.800486e-03 neura/interpreter/interpreter.mlir +1.472716e-01 neura/interpreter/lower_and_interpret.mlir +1.522479e-01 neura/interpreter/lower_and_interpret_subf.mlir +2.643609e-02 neura/interpreter/predicated_data.mlir +2.739096e-02 neura/llvm_add.mlir +2.676344e-02 neura/llvm_sub.mlir +2.563691e-02 test.mlir diff --git a/test/affine2neura/deep-nested/deep_nested_neura.mlir b/test/affine2neura/deep-nested/deep_nested_neura.mlir deleted file mode 100644 index 368b6dc5..00000000 --- a/test/affine2neura/deep-nested/deep_nested_neura.mlir +++ /dev/null @@ -1,125 +0,0 @@ -module { - memref.global @input_data : memref<3x3x3xi32> = uninitialized - memref.global @output_data : memref<3x3x3xi32> = uninitialized - func.func @_Z11deep_nestedv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { - %c0_i32 = arith.constant 0 : i32 - %0 = memref.get_global @output_data : memref<3x3x3xi32> - %1 = memref.get_global @input_data : memref<3x3x3xi32> - %2 = neura.constant {value = 0 : index} : index - %3 = neura.constant {value = 3 : index} : index - %4 = neura.constant {value = 1 : index} : index - neura.br %2 : index to ^bb2 - ^bb1: // pred: ^bb40 - return %c0_i32 : i32 - ^bb2(%5: index): // 2 preds: ^bb0, ^bb4 - neura.loop_control current_index : %5, step : %4, bound : %3, loop_type : "lt" then ^bb3(%5 : index) else ^bb40 - ^bb3(%6: index): // pred: ^bb2 - %7 = neura.constant {value = 0 : index} : index - %8 = neura.constant {value = 3 : index} : index - %9 = neura.constant {value = 1 : index} : index - neura.br %7 : index to ^bb5 - ^bb4: // pred: ^bb39 - neura.br %6 : index to ^bb2 - ^bb5(%10: index): // 2 preds: ^bb3, ^bb7 - neura.loop_control current_index : %10, step : %9, bound : %8, loop_type : "lt" then ^bb6(%10 : index) else ^bb39 - ^bb6(%11: index): // pred: ^bb5 - %12 = neura.constant {value = 0 : index} : index - %13 = neura.constant {value = 3 : index} : index - %14 = neura.constant {value = 1 : index} : index - neura.br %12 : index to ^bb8 - ^bb7: // pred: ^bb38 - neura.br %11 : index to ^bb5 - ^bb8(%15: index): // 2 preds: ^bb6, ^bb10 - neura.loop_control current_index : %15, step : %14, bound : %13, loop_type : "lt" then ^bb9(%15 : index) else ^bb38 - ^bb9(%16: index): // pred: ^bb8 - %17 = neura.constant {value = 0 : index} : index - %18 = neura.constant {value = 3 : index} : index - %19 = neura.constant {value = 1 : index} : index - neura.br %17 : index to ^bb11 - ^bb10: // pred: ^bb37 - neura.br %16 : index to ^bb8 - ^bb11(%20: index): // 2 preds: ^bb9, ^bb13 - neura.loop_control current_index : %20, step : %19, bound : %18, loop_type : "lt" then ^bb12(%20 : index) else ^bb37 - ^bb12(%21: index): // pred: ^bb11 - %22 = neura.constant {value = 0 : index} : index - %23 = neura.constant {value = 3 : index} : index - %24 = neura.constant {value = 1 : index} : index - neura.br %22 : index to ^bb14 - ^bb13: // pred: ^bb36 - neura.br %21 : index to ^bb11 - ^bb14(%25: index): // 2 preds: ^bb12, ^bb16 - neura.loop_control current_index : %25, step : %24, bound : %23, loop_type : "lt" then ^bb15(%25 : index) else ^bb36 - ^bb15(%26: index): // pred: ^bb14 - %27 = neura.constant {value = 0 : index} : index - %28 = neura.constant {value = 3 : index} : index - %29 = neura.constant {value = 1 : index} : index - neura.br %27 : index to ^bb17 - ^bb16: // pred: ^bb35 - neura.br %26 : index to ^bb14 - ^bb17(%30: index): // 2 preds: ^bb15, ^bb19 - neura.loop_control current_index : %30, step : %29, bound : %28, loop_type : "lt" then ^bb18(%30 : index) else ^bb35 - ^bb18(%31: index): // pred: ^bb17 - %32 = neura.constant {value = 0 : index} : index - %33 = neura.constant {value = 3 : index} : index - %34 = neura.constant {value = 1 : index} : index - neura.br %32 : index to ^bb20 - ^bb19: // pred: ^bb34 - neura.br %31 : index to ^bb17 - ^bb20(%35: index): // 2 preds: ^bb18, ^bb22 - neura.loop_control current_index : %35, step : %34, bound : %33, loop_type : "lt" then ^bb21(%35 : index) else ^bb34 - ^bb21(%36: index): // pred: ^bb20 - %37 = neura.constant {value = 0 : index} : index - %38 = neura.constant {value = 3 : index} : index - %39 = neura.constant {value = 1 : index} : index - neura.br %37 : index to ^bb23 - ^bb22: // pred: ^bb33 - neura.br %36 : index to ^bb20 - ^bb23(%40: index): // 2 preds: ^bb21, ^bb25 - neura.loop_control current_index : %40, step : %39, bound : %38, loop_type : "lt" then ^bb24(%40 : index) else ^bb33 - ^bb24(%41: index): // pred: ^bb23 - %42 = neura.load_indexed memref<3x3x3xi32> %1[%6, %11, %16] : i32 - %43 = neura.constant {value = 0 : index} : index - %44 = neura.constant {value = 3 : index} : index - %45 = neura.constant {value = 1 : index} : index - neura.br %43 : index to ^bb26 - ^bb25: // pred: ^bb32 - neura.br %41 : index to ^bb23 - ^bb26(%46: index): // 2 preds: ^bb24, ^bb28 - neura.loop_control current_index : %46, step : %45, bound : %44, loop_type : "lt" then ^bb27(%46 : index) else ^bb32 - ^bb27(%47: index): // pred: ^bb26 - %48 = neura.constant {value = 0 : index} : index - %49 = neura.constant {value = 3 : index} : index - %50 = neura.constant {value = 1 : index} : index - neura.br %48 : index to ^bb29 - ^bb28: // pred: ^bb31 - neura.br %47 : index to ^bb26 - ^bb29(%51: index): // 2 preds: ^bb27, ^bb30 - neura.loop_control current_index : %51, step : %50, bound : %49, loop_type : "lt" then ^bb30(%51 : index) else ^bb31 - ^bb30(%52: index): // pred: ^bb29 - %53 = neura.load_indexed memref<3x3x3xi32> %0[%6, %11, %16] : i32 - %54 = arith.addi %53, %42 : i32 - neura.store_indexed %54 to memref<3x3x3xi32> %0[%6, %11, %16] : i32 - neura.br %52 : index to ^bb29 - ^bb31: // pred: ^bb29 - neura.br : to ^bb28 - ^bb32: // pred: ^bb26 - neura.br : to ^bb25 - ^bb33: // pred: ^bb23 - neura.br : to ^bb22 - ^bb34: // pred: ^bb20 - neura.br : to ^bb19 - ^bb35: // pred: ^bb17 - neura.br : to ^bb16 - ^bb36: // pred: ^bb14 - neura.br : to ^bb13 - ^bb37: // pred: ^bb11 - neura.br : to ^bb10 - ^bb38: // pred: ^bb8 - neura.br : to ^bb7 - ^bb39: // pred: ^bb5 - neura.br : to ^bb4 - ^bb40: // pred: ^bb2 - neura.br : to ^bb1 - } -} - diff --git a/test/affine2neura/gpt2-node11/node11_neura.mlir b/test/affine2neura/gpt2-node11/node11_neura.mlir deleted file mode 100644 index ccc214c7..00000000 --- a/test/affine2neura/gpt2-node11/node11_neura.mlir +++ /dev/null @@ -1,40 +0,0 @@ -module { - memref.global @input : memref<1x16x64xf32> = uninitialized - memref.global @output : memref<1x16xf32> = uninitialized - func.func @_Z6node11v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { - %c0_i32 = arith.constant 0 : i32 - %0 = memref.get_global @output : memref<1x16xf32> - %1 = memref.get_global @input : memref<1x16x64xf32> - %2 = neura.constant {value = 0 : index} : index - %3 = neura.constant {value = 16 : index} : index - %4 = neura.constant {value = 1 : index} : index - neura.br %2 : index to ^bb2 - ^bb1: // pred: ^bb8 - return %c0_i32 : i32 - ^bb2(%5: index): // 2 preds: ^bb0, ^bb4 - neura.loop_control current_index : %5, step : %4, bound : %3, loop_type : "lt" then ^bb3(%5 : index) else ^bb8 - ^bb3(%6: index): // pred: ^bb2 - %7 = neura.constant {value = 0 : index} : index - %8 = neura.constant {value = 64 : index} : index - %9 = neura.constant {value = 1 : index} : index - neura.br %7 : index to ^bb5 - ^bb4: // pred: ^bb7 - neura.br %6 : index to ^bb2 - ^bb5(%10: index): // 2 preds: ^bb3, ^bb6 - neura.loop_control current_index : %10, step : %9, bound : %8, loop_type : "lt" then ^bb6(%10 : index) else ^bb7 - ^bb6(%11: index): // pred: ^bb5 - %12 = neura.constant {value = 0 : index} : index - %13 = neura.load_indexed memref<1x16x64xf32> %1[%12, %6, %11] : f32 - %14 = neura.constant {value = 0 : index} : index - %15 = neura.load_indexed memref<1x16xf32> %0[%14, %6] : f32 - %16 = arith.addf %15, %13 : f32 - %17 = neura.constant {value = 0 : index} : index - neura.store_indexed %16 to memref<1x16xf32> %0[%17, %6] : f32 - neura.br %11 : index to ^bb5 - ^bb7: // pred: ^bb5 - neura.br : to ^bb4 - ^bb8: // pred: ^bb2 - neura.br : to ^bb1 - } -} - diff --git a/test/affine2neura/gpt2-node27/node27_neura.mlir b/test/affine2neura/gpt2-node27/node27_neura.mlir deleted file mode 100644 index 8680f78c..00000000 --- a/test/affine2neura/gpt2-node27/node27_neura.mlir +++ /dev/null @@ -1,48 +0,0 @@ -module { - memref.global @input : memref<1x16x4x16xf32> = uninitialized - memref.global @output : memref<1x4x16x16xf32> = uninitialized - func.func @_Z6node27v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { - %0 = llvm.mlir.undef : i32 - %1 = memref.get_global @output : memref<1x4x16x16xf32> - %2 = memref.get_global @input : memref<1x16x4x16xf32> - %3 = neura.constant {value = 0 : index} : index - %4 = neura.constant {value = 16 : index} : index - %5 = neura.constant {value = 1 : index} : index - neura.br %3 : index to ^bb2 - ^bb1: // pred: ^bb12 - return %0 : i32 - ^bb2(%6: index): // 2 preds: ^bb0, ^bb4 - neura.loop_control current_index : %6, step : %5, bound : %4, loop_type : "lt" then ^bb3(%6 : index) else ^bb12 - ^bb3(%7: index): // pred: ^bb2 - %8 = neura.constant {value = 0 : index} : index - %9 = neura.constant {value = 4 : index} : index - %10 = neura.constant {value = 1 : index} : index - neura.br %8 : index to ^bb5 - ^bb4: // pred: ^bb11 - neura.br %7 : index to ^bb2 - ^bb5(%11: index): // 2 preds: ^bb3, ^bb7 - neura.loop_control current_index : %11, step : %10, bound : %9, loop_type : "lt" then ^bb6(%11 : index) else ^bb11 - ^bb6(%12: index): // pred: ^bb5 - %13 = neura.constant {value = 0 : index} : index - %14 = neura.constant {value = 16 : index} : index - %15 = neura.constant {value = 1 : index} : index - neura.br %13 : index to ^bb8 - ^bb7: // pred: ^bb10 - neura.br %12 : index to ^bb5 - ^bb8(%16: index): // 2 preds: ^bb6, ^bb9 - neura.loop_control current_index : %16, step : %15, bound : %14, loop_type : "lt" then ^bb9(%16 : index) else ^bb10 - ^bb9(%17: index): // pred: ^bb8 - %18 = neura.constant {value = 0 : index} : index - %19 = neura.load_indexed memref<1x16x4x16xf32> %2[%18, %12, %7, %17] : f32 - %20 = neura.constant {value = 0 : index} : index - neura.store_indexed %19 to memref<1x4x16x16xf32> %1[%20, %7, %12, %17] : f32 - neura.br %17 : index to ^bb8 - ^bb10: // pred: ^bb8 - neura.br : to ^bb7 - ^bb11: // pred: ^bb5 - neura.br : to ^bb4 - ^bb12: // pred: ^bb2 - neura.br : to ^bb1 - } -} - diff --git a/test/affine2neura/gpt2-node27/node27_unroll.mlir b/test/affine2neura/gpt2-node27/node27_unroll.mlir deleted file mode 100644 index 7708b308..00000000 --- a/test/affine2neura/gpt2-node27/node27_unroll.mlir +++ /dev/null @@ -1,23 +0,0 @@ -#map = affine_map<(d0) -> (d0 + 1)> -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry, dense<64> : vector<4xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>, #dlti.dl_entry<"dlti.endianness", "little">>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} { - memref.global @input : memref<1x16x4x16xf32> = uninitialized - memref.global @output : memref<1x4x16x16xf32> = uninitialized - func.func @_Z6node27v() -> i32 attributes {llvm.linkage = #llvm.linkage} { - %0 = llvm.mlir.undef : i32 - %1 = memref.get_global @output : memref<1x4x16x16xf32> - %2 = memref.get_global @input : memref<1x16x4x16xf32> - affine.for %arg0 = 0 to 16 { - affine.for %arg1 = 0 to 4 { - affine.for %arg2 = 0 to 16 step 2 { - %3 = affine.load %2[0, %arg1, %arg0, %arg2] : memref<1x16x4x16xf32> - affine.store %3, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x16xf32> - %4 = affine.apply #map(%arg2) - %5 = affine.load %2[0, %arg1, %arg0, %4] : memref<1x16x4x16xf32> - affine.store %5, %1[0, %arg0, %arg1, %4] : memref<1x4x16x16xf32> - } - } - } - return %0 : i32 - } -} - diff --git a/test/affine2neura/gpt2-node30/node30_neura.mlir b/test/affine2neura/gpt2-node30/node30_neura.mlir deleted file mode 100644 index 7a3c641d..00000000 --- a/test/affine2neura/gpt2-node30/node30_neura.mlir +++ /dev/null @@ -1,50 +0,0 @@ -module { - memref.global @A : memref<1x4x16x64xf32> = uninitialized - memref.global @C : memref<1x4x16x64xf32> = uninitialized - func.func @_Z6node30v() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { - %cst = arith.constant 1.000000e+01 : f32 - %0 = llvm.mlir.undef : i32 - %1 = memref.get_global @C : memref<1x4x16x64xf32> - %2 = memref.get_global @A : memref<1x4x16x64xf32> - %3 = neura.constant {value = 0 : index} : index - %4 = neura.constant {value = 4 : index} : index - %5 = neura.constant {value = 1 : index} : index - neura.br %3 : index to ^bb2 - ^bb1: // pred: ^bb12 - return %0 : i32 - ^bb2(%6: index): // 2 preds: ^bb0, ^bb4 - neura.loop_control current_index : %6, step : %5, bound : %4, loop_type : "lt" then ^bb3(%6 : index) else ^bb12 - ^bb3(%7: index): // pred: ^bb2 - %8 = neura.constant {value = 0 : index} : index - %9 = neura.constant {value = 16 : index} : index - %10 = neura.constant {value = 1 : index} : index - neura.br %8 : index to ^bb5 - ^bb4: // pred: ^bb11 - neura.br %7 : index to ^bb2 - ^bb5(%11: index): // 2 preds: ^bb3, ^bb7 - neura.loop_control current_index : %11, step : %10, bound : %9, loop_type : "lt" then ^bb6(%11 : index) else ^bb11 - ^bb6(%12: index): // pred: ^bb5 - %13 = neura.constant {value = 0 : index} : index - %14 = neura.constant {value = 64 : index} : index - %15 = neura.constant {value = 1 : index} : index - neura.br %13 : index to ^bb8 - ^bb7: // pred: ^bb10 - neura.br %12 : index to ^bb5 - ^bb8(%16: index): // 2 preds: ^bb6, ^bb9 - neura.loop_control current_index : %16, step : %15, bound : %14, loop_type : "lt" then ^bb9(%16 : index) else ^bb10 - ^bb9(%17: index): // pred: ^bb8 - %18 = neura.constant {value = 0 : index} : index - %19 = neura.load_indexed memref<1x4x16x64xf32> %2[%18, %7, %12, %17] : f32 - %20 = arith.mulf %19, %cst : f32 - %21 = neura.constant {value = 0 : index} : index - neura.store_indexed %20 to memref<1x4x16x64xf32> %1[%21, %7, %12, %17] : f32 - neura.br %17 : index to ^bb8 - ^bb10: // pred: ^bb8 - neura.br : to ^bb7 - ^bb11: // pred: ^bb5 - neura.br : to ^bb4 - ^bb12: // pred: ^bb2 - neura.br : to ^bb1 - } -} - diff --git a/test/affine2neura/gpt2-node30/node30_unroll.mlir b/test/affine2neura/gpt2-node30/node30_unroll.mlir deleted file mode 100644 index e55fe54e..00000000 --- a/test/affine2neura/gpt2-node30/node30_unroll.mlir +++ /dev/null @@ -1,26 +0,0 @@ -#map = affine_map<(d0) -> (d0 + 1)> -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<64> : vector<4xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry, dense<32> : vector<4xi32>>, #dlti.dl_entry : vector<4xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>, #dlti.dl_entry<"dlti.endianness", "little">>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", "polygeist.target-cpu" = "x86-64", "polygeist.target-features" = "+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87", "polygeist.tune-cpu" = "generic"} { - memref.global @A : memref<1x4x16x64xf32> = uninitialized - memref.global @C : memref<1x4x16x64xf32> = uninitialized - func.func @_Z6node30v() -> i32 attributes {llvm.linkage = #llvm.linkage} { - %cst = arith.constant 1.000000e+01 : f32 - %0 = llvm.mlir.undef : i32 - %1 = memref.get_global @C : memref<1x4x16x64xf32> - %2 = memref.get_global @A : memref<1x4x16x64xf32> - affine.for %arg0 = 0 to 4 { - affine.for %arg1 = 0 to 16 { - affine.for %arg2 = 0 to 64 step 2 { - %3 = affine.load %2[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> - %4 = arith.mulf %3, %cst : f32 - affine.store %4, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32> - %5 = affine.apply #map(%arg2) - %6 = affine.load %2[0, %arg0, %arg1, %5] : memref<1x4x16x64xf32> - %7 = arith.mulf %6, %cst : f32 - affine.store %7, %1[0, %arg0, %arg1, %5] : memref<1x4x16x64xf32> - } - } - } - return %0 : i32 - } -} -