From 4ee67386cb4dbc2335f617e7d1dd059d6aae04e2 Mon Sep 17 00:00:00 2001 From: Shiran Guo Date: Sun, 28 Dec 2025 11:46:25 +0800 Subject: [PATCH 1/2] Fixes ReturnOp exit predicate logic and addresses issue #209 --- .../TransformCtrlToDataFlowPass.cpp | 59 ++ .../bert/bert_node1/bert_node1.mlir | 70 +-- .../bert/bert_node28/bert_node28.mlir | 101 ++-- .../complex_nested/complex_nested.mlir | 12 +- .../non_perfect_nested.mlir | 14 +- .../perfect_nested/perfect_nested.mlir | 137 ++++- .../simple_loop/simple_loop.mlir | 7 +- test/e2e/bicg/bicg_kernel.mlir | 66 ++- test/e2e/histogram/histogram_kernel.mlir | 200 +++---- test/e2e/relu/relu_kernel.mlir | 201 +++---- test/neura/for_loop/kernel_test.mlir | 128 ++++- test/neura/for_loop/relu_test.mlir | 525 ++++++++++++++++-- test/neura/fusion/test.mlir | 56 +- .../steer_ctrl/loop_without_return_value.mlir | 56 +- 14 files changed, 1179 insertions(+), 453 deletions(-) diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index 02d0e8df..55f2db67 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -602,6 +602,62 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, } } +// Injects exit predicate for ReturnOp (only for void returns). +// Value-returning functions are not modified. +void injectExitPredicateForReturn(Region ®ion, ControlFlowInfo &ctrl_info, + OpBuilder &builder) { + Block *entry_block = ®ion.front(); + + // Find the ReturnOp + neura::ReturnOp return_op = nullptr; + for (Operation &op : *entry_block) { + if (auto rt =dyn_cast(op)) { + return_op = rt; + llvm::errs() << "[ctrl2data] ReturnOp found: " << *rt << "\n"; + break; + } + } + + if (!return_op) { + return; // No ReturnOp to process + } + + // Checks if ReturnOp has operands (value return). + // Only injects exit predicate for void returns. + if (return_op.getNumOperands() > 0) { + llvm::errs() << "[ctrl2data] ReturnOp has return value, skipping exit predicate injection.\n"; + return; // Skips modification for value-returning functions. + } + + // Computes exit predicate: use a constant true predicate for now. + llvm::errs() << "[ctrl2data] Injecting exit predicate for void ReturnOp.\n"; + + builder.setInsertionPoint(return_op); + + // Creates a constant true predicate. + auto i1_type = builder.getI1Type(); + auto pred_type = neura::PredicatedValue::get(builder.getContext(), i1_type, i1_type); + + Value true_constant = builder.create( + return_op.getLoc(), + pred_type, + builder.getIntegerAttr(i1_type, 1)); + + Value granted_true = builder.create( + return_op.getLoc(), + pred_type, + true_constant); + + // Replaces the old ReturnOp with a new one that includes the exit predicate. + builder.setInsertionPoint(return_op); + auto new_return = builder.create( + return_op.getLoc(), + ValueRange{granted_true}); + return_op.erase(); + + llvm::errs() << "[ctrl2data] Injected exit predicate for ReturnOp.\n"; +} + // Converts phi operations with reserve operands to phi_start operations. void convertPhiToPhiStart(Region ®ion, OpBuilder &builder) { llvm::errs() << "[ctrl2data] Converting phi operations to phi_start...\n"; @@ -697,6 +753,9 @@ struct TransformCtrlToDataFlowPass buildControlFlowInfo(*region, ctrlInfo, domInfo); transformControlFlowToDataFlow(*region, ctrlInfo, domInfo, builder); + // Inject exit predicate for void returns + injectExitPredicateForReturn(*region, ctrlInfo, builder); + // Converts phi operations to phi_start operations. convertPhiToPhiStart(*region, builder); }); diff --git a/test/affine2neura/bert/bert_node1/bert_node1.mlir b/test/affine2neura/bert/bert_node1/bert_node1.mlir index dedb0c14..c3ebacee 100644 --- a/test/affine2neura/bert/bert_node1/bert_node1.mlir +++ b/test/affine2neura/bert/bert_node1/bert_node1.mlir @@ -31,38 +31,41 @@ module attributes {} { } } -// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura"} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %3 : i64 to ^bb1 -// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 -// CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %7 : i64 to ^bb3 -// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb4 else to ^bb5 -// CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref : i8 -// CHECK-NEXT: neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref : i8 -// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index -// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %13 : i64 to ^bb3 -// CHECK-NEXT: ^bb5: // pred: ^bb3 -// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index -// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %15 : i64 to ^bb1 -// CHECK-NEXT: ^bb6: // pred: ^bb1 -// CHECK-NEXT: "neura.return"() : () -> () +// CHECK: module { +// CHECK-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura"} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %3 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 +// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %7 : i64 to ^bb3 +// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %13 : i64 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %15 : i64 to ^bb1 +// CHECK-NEXT: ^bb6: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } // CHECK-NEXT: } -// CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate"} { +// CTRL2DATA: module { +// CTRL2DATA-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate"} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data, i1> @@ -138,5 +141,8 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %33 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %48 -> %31 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %49 -> %29 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () -// CTRL2DATA-NEXT: } \ No newline at end of file +// CTRL2DATA-NEXT: %61 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %62 = "neura.grant_once"(%61) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%62) : (!neura.data) -> () +// CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/affine2neura/bert/bert_node28/bert_node28.mlir b/test/affine2neura/bert/bert_node28/bert_node28.mlir index bff2def5..c30cfb0a 100644 --- a/test/affine2neura/bert/bert_node28/bert_node28.mlir +++ b/test/affine2neura/bert/bert_node28/bert_node28.mlir @@ -36,53 +36,57 @@ module attributes {} { return } } -// CHECK: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {accelerator = "neura"} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %4 : i64 to ^bb1 -// CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb8 -// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb9 -// CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %8 : i64 to ^bb3 -// CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb7 -// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb4 else to ^bb8 -// CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %12 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %12 : i64 to ^bb5 -// CHECK-NEXT: ^bb5(%13: i64): // 2 preds: ^bb4, ^bb6 -// CHECK-NEXT: %14 = "neura.cast"(%13) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %15 : i1 then to ^bb6 else to ^bb7 -// CHECK-NEXT: ^bb6: // pred: ^bb5 -// CHECK-NEXT: %16 = neura.load_indexed %arg0[%3, %6, %14 : index, index, index] memref : f32 -// CHECK-NEXT: %17 = neura.load_indexed %arg1[%3, %14, %10 : index, index, index] memref : f32 -// CHECK-NEXT: %18 = neura.load_indexed %arg2[%3, %6, %10 : index, index, index] memref : f32 -// CHECK-NEXT: %19 = "neura.fmul"(%16, %17) : (f32, f32) -> f32 -// CHECK-NEXT: %20 = "neura.fadd"(%18, %19) : (f32, f32) -> f32 -// CHECK-NEXT: neura.store_indexed %20 to %arg2[%3, %6, %10 : index, index, index] memref : f32 -// CHECK-NEXT: %21 = "neura.add"(%14, %1) : (index, index) -> index -// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %22 : i64 to ^bb5 -// CHECK-NEXT: ^bb7: // pred: ^bb5 -// CHECK-NEXT: %23 = "neura.add"(%10, %1) : (index, index) -> index -// CHECK-NEXT: %24 = "neura.cast"(%23) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %24 : i64 to ^bb3 -// CHECK-NEXT: ^bb8: // pred: ^bb3 -// CHECK-NEXT: %25 = "neura.add"(%6, %1) : (index, index) -> index -// CHECK-NEXT: %26 = "neura.cast"(%25) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %26 : i64 to ^bb1 -// CHECK-NEXT: ^bb9: // pred: ^bb1 -// CHECK-NEXT: "neura.return"() : () -> () +// CHECK: module { +// CHECK-NEXT: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {accelerator = "neura"} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb8 +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb9 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %8 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %8 : i64 to ^bb3 +// CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb7 +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb4 else to ^bb8 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %12 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %12 : i64 to ^bb5 +// CHECK-NEXT: ^bb5(%13: i64): // 2 preds: ^bb4, ^bb6 +// CHECK-NEXT: %14 = "neura.cast"(%13) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %15 : i1 then to ^bb6 else to ^bb7 +// CHECK-NEXT: ^bb6: // pred: ^bb5 +// CHECK-NEXT: %16 = neura.load_indexed %arg0[%3, %6, %14 : index, index, index] memref : f32 +// CHECK-NEXT: %17 = neura.load_indexed %arg1[%3, %14, %10 : index, index, index] memref : f32 +// CHECK-NEXT: %18 = neura.load_indexed %arg2[%3, %6, %10 : index, index, index] memref : f32 +// CHECK-NEXT: %19 = "neura.fmul"(%16, %17) : (f32, f32) -> f32 +// CHECK-NEXT: %20 = "neura.fadd"(%18, %19) : (f32, f32) -> f32 +// CHECK-NEXT: neura.store_indexed %20 to %arg2[%3, %6, %10 : index, index, index] memref : f32 +// CHECK-NEXT: %21 = "neura.add"(%14, %1) : (index, index) -> index +// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %22 : i64 to ^bb5 +// CHECK-NEXT: ^bb7: // pred: ^bb5 +// CHECK-NEXT: %23 = "neura.add"(%10, %1) : (index, index) -> index +// CHECK-NEXT: %24 = "neura.cast"(%23) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %24 : i64 to ^bb3 +// CHECK-NEXT: ^bb8: // pred: ^bb3 +// CHECK-NEXT: %25 = "neura.add"(%6, %1) : (index, index) -> index +// CHECK-NEXT: %26 = "neura.cast"(%25) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %26 : i64 to ^bb1 +// CHECK-NEXT: ^bb9: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } +// CHECK-NEXT: } -// CTRL2DATA: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {accelerator = "neura", dataflow_mode = "predicate"} { +// CTRL2DATA: module { +// CTRL2DATA-NEXT: func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {accelerator = "neura", dataflow_mode = "predicate"} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data, i1> @@ -236,5 +240,8 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %104 -> %81 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %105 -> %79 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %107 -> %77 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: %125 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %126 = "neura.grant_once"(%125) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%126) : (!neura.data) -> () // CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: } diff --git a/test/controflow_fuse/complex_nested/complex_nested.mlir b/test/controflow_fuse/complex_nested/complex_nested.mlir index e8ecf0ae..0af4b086 100644 --- a/test/controflow_fuse/complex_nested/complex_nested.mlir +++ b/test/controflow_fuse/complex_nested/complex_nested.mlir @@ -66,7 +66,8 @@ module attributes {} { } } -// CHECK: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK: module { +// CHECK-NEXT: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index // CHECK-NEXT: %1 = "neura.constant"() <{value = 32 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : i32}> : () -> i32 @@ -175,8 +176,10 @@ module attributes {} { // CHECK-NEXT: ^bb23: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } +// CHECK-NEXT: } -// CTRL2DATA: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { +// CTRL2DATA: module { +// CTRL2DATA-NEXT: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data, i1> @@ -668,5 +671,8 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %384 -> %354 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %385 -> %352 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %386 -> %350 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: %404 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %405 = "neura.grant_once"(%404) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%405) : (!neura.data) -> () // CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: } diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir index 760ae0ce..5bb764d6 100644 --- a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir +++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir @@ -66,7 +66,8 @@ module attributes {} { } } -// CHECK: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK: module { +// CHECK-NEXT: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CHECK-NEXT: %0 = "neura.constant"() <{value = 4 : index}> : () -> index // CHECK-NEXT: %1 = "neura.constant"() <{value = 3 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 2 : index}> : () -> index @@ -137,9 +138,11 @@ module attributes {} { // CHECK-NEXT: ^bb10: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } +// CHECK-NEXT: } -// CTRL2DATA: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { +// CTRL2DATA: module { +// CTRL2DATA-NEXT: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data, i1> @@ -429,5 +432,8 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %146 -> %89 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %147 -> %87 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %148 -> %85 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () -// CTRL2DATA-NEXT: } \ No newline at end of file +// CTRL2DATA-NEXT: %246 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %247 = "neura.grant_once"(%246) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%247) : (!neura.data) -> () +// CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.mlir b/test/controflow_fuse/perfect_nested/perfect_nested.mlir index 22c6f982..520f2f82 100644 --- a/test/controflow_fuse/perfect_nested/perfect_nested.mlir +++ b/test/controflow_fuse/perfect_nested/perfect_nested.mlir @@ -60,48 +60,51 @@ module attributes {} { } -// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK: module { +// CHECK-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> {{.*}}: () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> {{.*}}: () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> {{.*}}: () -> index +// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> {{.*}}: (index) -> i64 // CHECK-NEXT: neura.br %3 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> {{.*}}: (i64) -> index +// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> {{.*}}: (index, index) -> i1 // CHECK-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> {{.*}}: (index) -> i64 // CHECK-NEXT: neura.br %7 : i64 to ^bb3 // CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> {{.*}}: (i64) -> index +// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> {{.*}}: (index, index) -> i1 // CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb4 else to ^bb5 // CHECK-NEXT: ^bb4: // pred: ^bb3 // CHECK-NEXT: %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref : i8 // CHECK-NEXT: neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref : i8 // CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index -// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> {{.*}}: (index) -> i64 // CHECK-NEXT: neura.br %13 : i64 to ^bb3 // CHECK-NEXT: ^bb5: // pred: ^bb3 // CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index -// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> {{.*}}: (index) -> i64 // CHECK-NEXT: neura.br %15 : i64 to ^bb1 // CHECK-NEXT: ^bb6: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } -// CAST: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { -// CAST-NEXT: %0 = "neura.constant"() <{value = 1 : i64}> : () -> i64 -// CAST-NEXT: %1 = "neura.constant"() <{value = 128 : i64}> : () -> i64 -// CAST-NEXT: %2 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-NEXT: } +// CAST: module { +// CAST-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CAST-NEXT: %0 = "neura.constant"() <{value = 1 : i64}> {{.*}}: () -> i64 +// CAST-NEXT: %1 = "neura.constant"() <{value = 128 : i64}> {{.*}}: () -> i64 +// CAST-NEXT: %2 = "neura.constant"() <{value = 0 : i64}> {{.*}}: () -> i64 // CAST-NEXT: neura.br %2 : i64 to ^bb1 // CAST-NEXT: ^bb1(%3: i64): // 2 preds: ^bb0, ^bb5 -// CAST-NEXT: %4 = "neura.icmp"(%3, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: %4 = "neura.icmp"(%3, %1) <{cmpType = "slt"}> {{.*}}: (i64, i64) -> i1 // CAST-NEXT: neura.cond_br %4 : i1 then to ^bb2 else to ^bb6 // CAST-NEXT: ^bb2: // pred: ^bb1 // CAST-NEXT: neura.br %2 : i64 to ^bb3 // CAST-NEXT: ^bb3(%5: i64): // 2 preds: ^bb2, ^bb4 -// CAST-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> {{.*}}: (i64, i64) -> i1 // CAST-NEXT: neura.cond_br %6 : i1 then to ^bb4 else to ^bb5 // CAST-NEXT: ^bb4: // pred: ^bb3 // CAST-NEXT: %7 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %5 : i64, i64, i64, i64, i64, i64] memref : i8 @@ -114,18 +117,20 @@ module attributes {} { // CAST-NEXT: ^bb6: // pred: ^bb1 // CAST-NEXT: "neura.return"() : () -> () // CAST-NEXT: } +// CAST-NEXT: } -// CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { -// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> +// CTRL2DATA: module { +// CTRL2DATA-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> {{.*}}: () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data, i1> +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> {{.*}}: () -> !neura.data, i1> // CTRL2DATA-NEXT: %3 = "neura.grant_once"(%2) : (!neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> {{.*}}: () -> !neura.data // CTRL2DATA-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 128 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 128 : i64}> {{.*}}: () -> !neura.data // CTRL2DATA-NEXT: %7 = "neura.grant_once"(%6) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %8 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %8 = "neura.constant"() <{value = 0 : i64}> {{.*}}: () -> !neura.data // CTRL2DATA-NEXT: %9 = "neura.grant_once"(%8) : (!neura.data) -> !neura.data // CTRL2DATA-NEXT: %10 = neura.reserve : !neura.data // CTRL2DATA-NEXT: %11 = neura.phi_start %5, %10 : !neura.data, !neura.data -> !neura.data @@ -139,7 +144,7 @@ module attributes {} { // CTRL2DATA-NEXT: %19 = neura.phi_start %7, %18 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %20 = neura.reserve : !neura.data // CTRL2DATA-NEXT: %21 = neura.phi_start %9, %20 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %22 = "neura.icmp"(%21, %19) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %22 = "neura.icmp"(%21, %19) <{cmpType = "slt"}> {{.*}}: (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %23 = neura.grant_predicate %17, %22 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %24 = neura.grant_predicate %19, %22 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %25 = neura.grant_predicate %15, %22 : !neura.data, i1>, !neura.data -> !neura.data, i1> @@ -160,7 +165,7 @@ module attributes {} { // CTRL2DATA-NEXT: %40 = neura.phi_start %24, %39 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %41 = neura.reserve : !neura.data // CTRL2DATA-NEXT: %42 = neura.phi_start %23, %41 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %43 = "neura.icmp"(%42, %40) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %43 = "neura.icmp"(%42, %40) <{cmpType = "slt"}> {{.*}}: (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %44 = neura.grant_predicate %38, %43 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %45 = neura.grant_predicate %36, %43 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %46 = neura.grant_predicate %42, %43 : !neura.data, !neura.data -> !neura.data @@ -192,7 +197,83 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %33 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %48 -> %31 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %49 -> %29 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: %61 = "neura.constant"() <{value = true}> {{.*}}: () -> !neura.data +// CTRL2DATA-NEXT: %62 = "neura.grant_once"(%61) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%62) : (!neura.data) -> () // CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: } -// MAPPING: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage, mapping_info = {compiled_ii = 10 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 8 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} { \ No newline at end of file +// MAPPING: module { +// MAPPING-NEXT: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {{.*}} { +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {{.*}}: () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve {{.*}}: !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %4 = neura.reserve {{.*}}: !neura.data +// MAPPING-NEXT: %5 = "neura.data_mov"(%0) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = neura.phi_start %5, %4 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %7 = "neura.data_mov"(%6) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.icmp"(%7) <{cmpType = "slt"}> {{.*}}rhs_value = 128 : i64{{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.data_mov"(%3) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %10 = "neura.data_mov"(%8) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = neura.grant_predicate %9, %10 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %12 = "neura.data_mov"(%6) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%8) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = neura.grant_predicate %12, %13 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %15 = neura.reserve {{.*}}: !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%14) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = neura.phi_start %16, %15 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %18 = neura.reserve {{.*}}: !neura.data +// MAPPING-NEXT: %19 = "neura.data_mov"(%11) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = neura.phi_start %19, %18 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %21 = neura.reserve {{.*}}: !neura.data +// MAPPING-NEXT: %22 = "neura.data_mov"(%11) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %23 = neura.phi_start %22, %21 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%23) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %25 = "neura.icmp"(%24) <{cmpType = "slt"}> {{.*}}rhs_value = 128 : i64{{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.data_mov"(%20) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%25) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = neura.grant_predicate %26, %27 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%23) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.data_mov"(%25) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = neura.grant_predicate %29, %30 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%17) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = "neura.data_mov"(%25) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %34 = neura.grant_predicate %32, %33 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %35 = "neura.data_mov"(%25) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %36 = "neura.not"(%35) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %37 = "neura.data_mov"(%17) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %38 = "neura.data_mov"(%36) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %39 = neura.grant_predicate %37, %38 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %40 = "neura.data_mov"(%20) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %41 = "neura.data_mov"(%36) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %42 = neura.grant_predicate %40, %41 {{.*}}: !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %43 = "neura.data_mov"(%39) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %44 = "neura.add"(%43) {{.*}}rhs_value = 1 : i64{{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %44 -> %4 {{.*}}: !neura.data !neura.data +// MAPPING-NEXT: neura.ctrl_mov %42 -> %1 {{.*}}: !neura.data !neura.data +// MAPPING-NEXT: %45 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %46 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %47 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %48 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %49 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %50 = "neura.data_mov"(%31) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %51 = neura.load_indexed [%45, %46, %47, %48, %49, %50 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] {{.*}}lhs_value = "%arg0"{{.*}}: !neura.data +// MAPPING-NEXT: %52 = "neura.data_mov"(%51) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %53 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %54 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %55 = "neura.data_mov"(%34) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %56 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %57 = "neura.data_mov"(%28) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %58 = "neura.data_mov"(%31) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: neura.store_indexed %52 to [%53, %54, %55, %56, %57, %58 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] {{.*}}rhs_value = "%arg1"{{.*}}: !neura.data +// MAPPING-NEXT: %59 = "neura.data_mov"(%31) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: %60 = "neura.add"(%59) {{.*}}rhs_value = 1 : i64{{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %60 -> %21 {{.*}}: !neura.data !neura.data +// MAPPING-NEXT: neura.ctrl_mov %28 -> %18 {{.*}}: !neura.data !neura.data +// MAPPING-NEXT: neura.ctrl_mov %34 -> %15 {{.*}}: !neura.data !neura.data +// MAPPING-NEXT: %61 = "neura.grant_once"() <{constant_value = true}> {{.*}}: () -> !neura.data +// MAPPING-NEXT: %62 = "neura.data_mov"(%61) {{.*}}: (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.return"(%62) {{.*}}: (!neura.data) -> () +// MAPPING-NEXT: } +// MAPPING-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/simple_loop/simple_loop.mlir b/test/controflow_fuse/simple_loop/simple_loop.mlir index 7d191587..9ec56e8b 100644 --- a/test/controflow_fuse/simple_loop/simple_loop.mlir +++ b/test/controflow_fuse/simple_loop/simple_loop.mlir @@ -182,7 +182,9 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %32 -> %18 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %33 -> %16 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %34 -> %14 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: %40 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %41 = "neura.grant_once"(%40) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%41) : (!neura.data) -> () // CTRL2DATA-NEXT: } @@ -193,7 +195,8 @@ module attributes {} { // FUSE-NEXT: %2 = "neura.mul"(%1) {rhs_value = 2 : i32} : (!neura.data) -> !neura.data // FUSE-NEXT: %3 = "neura.add"(%2) {rhs_value = 1 : i32} : (!neura.data) -> !neura.data // FUSE-NEXT: neura.store_indexed %3 to [%nextindex : !neura.data] {rhs_value = "%arg1"} : !neura.data -// FUSE-NEXT: "neura.return"() : () -> () +// FUSE-NEXT: %4 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// FUSE-NEXT: "neura.return"(%4) : (!neura.data) -> () // FUSE-NEXT: } // FUSE-MAPPING: func.func @_Z11simple_loopPiS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage, mapping_info = {compiled_ii = 1 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 1 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} { \ No newline at end of file diff --git a/test/e2e/bicg/bicg_kernel.mlir b/test/e2e/bicg/bicg_kernel.mlir index fe2b0f9b..cb3556c4 100644 --- a/test/e2e/bicg/bicg_kernel.mlir +++ b/test/e2e/bicg/bicg_kernel.mlir @@ -189,7 +189,7 @@ // YAML: - index_per_ii: 1 // YAML: operations: // YAML: - opcode: "ICMP_SGT" -// YAML: id: 24 +// YAML: id: 25 // YAML: time_step: 1 // YAML: invalid_iterations: 0 // YAML: src_operands: @@ -213,7 +213,59 @@ // YAML: - operand: "$1" // YAML: color: "RED" // YAML: - operand: "$3" - +// YAML: color: "RED" +// YAML: - index_per_ii: 3 +// YAML: operations: +// YAML: - opcode: "GRANT_ONCE" +// YAML: id: 28 +// YAML: time_step: 3 +// YAML: invalid_iterations: 0 +// YAML: src_operands: +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: dst_operands: +// YAML: - operand: "EAST" +// YAML: color: "RED" +// YAML: - operand: "NORTH" +// YAML: color: "RED" +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: - operand: "$2" +// YAML: color: "RED" +// YAML: - index_per_ii: 4 +// YAML: operations: +// YAML: - opcode: "GRANT_PREDICATE" +// YAML: id: 37 +// YAML: time_step: 4 +// YAML: invalid_iterations: 0 +// YAML: src_operands: +// YAML: - operand: "$1" +// YAML: color: "RED" +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: dst_operands: +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: - operand: "$1" +// YAML: color: "RED" +// YAML: - index_per_ii: 5 +// YAML: operations: +// YAML: - opcode: "ICMP_SGT" +// YAML: id: 54 +// YAML: time_step: 5 +// YAML: invalid_iterations: 0 +// YAML: src_operands: +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: - operand: "#0" +// YAML: color: "RED" +// YAML: dst_operands: +// YAML: - operand: "$0" +// YAML: color: "RED" +// YAML: - operand: "EAST" +// YAML: color: "RED" +// YAML: - operand: "NORTH" +// YAML: color: "RED" // ASM: # Compiled II: 11 // ASM: PE(0,0): @@ -236,9 +288,6 @@ // ASM-NEXT: ICMP_SGT, [$0], [#0] -> [$0], [EAST, RED], [NORTH, RED] (t=5, inv_iters=0) // ASM-NEXT: } (idx_per_ii=5) // ASM-NEXT: { -// ASM-NEXT: GRANT_PREDICATE, [NORTH, RED], [EAST, RED] -> [NORTH, RED] (t=17, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=6) -// ASM-NEXT: { // ASM-NEXT: GRANT_PREDICATE, [$1], [$0] -> [EAST, RED] (t=7, inv_iters=0) // ASM-NEXT: } (idx_per_ii=7) // ASM-NEXT: { @@ -269,11 +318,11 @@ // ASM-NEXT: { // ASM-NEXT: DATA_MOV, [WEST, RED] -> [NORTH, RED] (t=4, inv_iters=0) // ASM-NEXT: GRANT_PREDICATE, [$4], [$2] -> [$1] (t=15, inv_iters=1) +// ASM-NEXT: DATA_MOV, [EAST, RED] -> [$0] (t=15, inv_iters=1) // ASM-NEXT: } (idx_per_ii=4) // ASM-NEXT: { // ASM-NEXT: DATA_MOV, [WEST, RED] -> [EAST, RED] (t=5, inv_iters=0) -// ASM-NEXT: GRANT_PREDICATE, [$5], [NORTH, RED] -> [$2] (t=16, inv_iters=1) -// ASM-NEXT: DATA_MOV, [EAST, RED] -> [WEST, RED] (t=16, inv_iters=1) +// ASM-NEXT: GRANT_PREDICATE, [$5], [$0] -> [$2] (t=16, inv_iters=1) // ASM-NEXT: } (idx_per_ii=5) // ASM-NEXT: { // ASM-NEXT: DATA_MOV, [WEST, RED] -> [NORTH, RED] (t=6, inv_iters=0) @@ -291,7 +340,6 @@ // ASM-NEXT: DATA_MOV, [WEST, RED] -> [$1] (t=10, inv_iters=0) // ASM-NEXT: } (idx_per_ii=10) - // RUN: mlir-neura-opt %t-kernel.mlir \ // RUN: --assign-accelerator \ // RUN: --lower-llvm-to-neura \ @@ -306,4 +354,4 @@ // RUN: dot -Tjson bicg_kernel.dot -o bicg_kernel.json // RUN: FileCheck %s --input-file=bicg_kernel.dot -check-prefix=DOT -// DOT: digraph G { +// DOT: digraph G { \ No newline at end of file diff --git a/test/e2e/histogram/histogram_kernel.mlir b/test/e2e/histogram/histogram_kernel.mlir index a381e8e0..35975374 100644 --- a/test/e2e/histogram/histogram_kernel.mlir +++ b/test/e2e/histogram/histogram_kernel.mlir @@ -23,129 +23,88 @@ // RUN: FileCheck %s --input-file=tmp-generated-instructions.asm --check-prefix=ASM -// MAPPING: func.func -// MAPPING-SAME: compiled_ii = 5 -// MAPPING-SAME: mapping_mode = "spatial-temporal" -// MAPPING-SAME: mapping_strategy = "heuristic" -// MAPPING-SAME: rec_mii = 5 -// MAPPING-SAME: res_mii = 2 -// MAPPING-SAME: x_tiles = 4 -// MAPPING-SAME: y_tiles = 4 -// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data -// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data -// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 4 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {dfg_id = 6 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {dfg_id = 8 : i32, lhs_value = "%arg0", mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %6 = "neura.data_mov"(%5) {dfg_id = 11 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %7 = "neura.load"(%6) {dfg_id = 13 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %8 = "neura.data_mov"(%7) {dfg_id = 15 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %9 = "neura.mul"(%8) {dfg_id = 17 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 5 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %10 = "neura.data_mov"(%9) {dfg_id = 19 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %11 = "neura.add"(%10) {dfg_id = 21 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 3 : i32}], rhs_value = -5 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %12 = "neura.data_mov"(%11) {dfg_id = 23 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %13 = "neura.div"(%12) {dfg_id = 24 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 3 : i32, y = 3 : i32}], rhs_value = 18 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %14 = "neura.data_mov"(%13) {dfg_id = 25 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %15 = neura.sext %14 {dfg_id = 26 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 3 : i32}]} : !neura.data -> !neura.data -// MAPPING-NEXT: %16 = "neura.data_mov"(%15) {dfg_id = 27 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 28 : i32, lhs_value = "%arg1", mapping_locs = [{id = 15 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 3 : i32, y = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %18 = "neura.data_mov"(%17) {dfg_id = 30 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %19 = "neura.load"(%18) {dfg_id = 31 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 32 : i32, mapping_locs = [{id = 46 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %21 = "neura.add"(%20) {dfg_id = 33 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 3 : i32}], rhs_value = 1 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %22 = "neura.data_mov"(%21) {dfg_id = 34 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %23 = "neura.data_mov"(%17) {dfg_id = 29 : i32, mapping_locs = [{id = 46 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 449 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 449 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: "neura.store"(%22, %23) {dfg_id = 35 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: %24 = "neura.data_mov"(%3) {dfg_id = 5 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %25 = "neura.add"(%24) {dfg_id = 7 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.data_mov"(%25) {dfg_id = 10 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.icmp"(%26) <{cmpType = "eq"}> {dfg_id = 12 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 20 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %28 = "neura.data_mov"(%27) {dfg_id = 14 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.not"(%28) {dfg_id = 16 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.data_mov"(%25) {dfg_id = 9 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %31 = "neura.data_mov"(%29) {dfg_id = 18 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = neura.grant_predicate %30, %31 {dfg_id = 20 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %32 -> %1 {dfg_id = 22 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data -// MAPPING-NEXT: "neura.return"() {dfg_id = 2 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 2 : i32}]} : () -> () +// MAPPING: module attributes {{.*}} +// MAPPING-NEXT: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 5 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {dfg_id = 8 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {dfg_id = 10 : i32, lhs_value = "%arg0", mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.data_mov"(%5) {dfg_id = 13 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %7 = "neura.load"(%6) {dfg_id = 15 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.data_mov"(%7) {dfg_id = 17 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.mul"(%8) {dfg_id = 19 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 5 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %10 = "neura.data_mov"(%9) {dfg_id = 21 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.add"(%10) {dfg_id = 23 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 3 : i32}], rhs_value = -5 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = "neura.data_mov"(%11) {dfg_id = 25 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %13 = "neura.div"(%12) {dfg_id = 26 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 3 : i32, y = 3 : i32}], rhs_value = 18 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.data_mov"(%13) {dfg_id = 27 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %15 = neura.sext %14 {dfg_id = 28 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 3 : i32}]} : !neura.data -> !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%15) {dfg_id = 29 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 30 : i32, lhs_value = "%arg1", mapping_locs = [{id = 15 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 3 : i32, y = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%17) {dfg_id = 32 : i32, mapping_locs = [{id = 480 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.load"(%18) {dfg_id = 33 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 34 : i32, mapping_locs = [{id = 46 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.add"(%20) {dfg_id = 35 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 3 : i32}], rhs_value = 1 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.data_mov"(%21) {dfg_id = 36 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.data_mov"(%17) {dfg_id = 31 : i32, mapping_locs = [{id = 46 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 449 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 449 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%22, %23) {dfg_id = 37 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %24 = "neura.data_mov"(%3) {dfg_id = 7 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %25 = "neura.add"(%24) {dfg_id = 9 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.data_mov"(%25) {dfg_id = 12 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.icmp"(%26) <{cmpType = "eq"}> {dfg_id = 14 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 20 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.data_mov"(%27) {dfg_id = 16 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.not"(%28) {dfg_id = 18 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.data_mov"(%25) {dfg_id = 11 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%29) {dfg_id = 20 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = neura.grant_predicate %30, %31 {dfg_id = 22 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %32 -> %1 {dfg_id = 24 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %33 = "neura.grant_once"() <{constant_value = true}> {dfg_id = 2 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %34 = "neura.data_mov"(%33) {dfg_id = 4 : i32, mapping_locs = [{id = 64 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.return"(%34) {dfg_id = 6 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 0 : i32}]} : (!neura.data) -> () +// MAPPING-NEXT: } +// MAPPING-NEXT: } -// YAML: array_config: -// YAML-NEXT: columns: 4 -// YAML-NEXT: rows: 4 -// YAML-NEXT: compiled_ii: 5 -// YAML-NEXT: cores: -// YAML-NEXT: - column: 2 -// YAML-NEXT: row: 2 -// YAML-NEXT: core_id: "10" -// YAML-NEXT: entries: -// YAML-NEXT: - entry_id: "entry0" -// YAML-NEXT: instructions: -// YAML-NEXT: - index_per_ii: 0 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "GRANT_PREDICATE" -// YAML-NEXT: id: 20 -// YAML-NEXT: time_step: 5 -// YAML-NEXT: invalid_iterations: 1 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - operand: "$1" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "EAST" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - index_per_ii: 1 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "RETURN" -// YAML-NEXT: id: 2 -// YAML-NEXT: time_step: 11 -// YAML-NEXT: invalid_iterations: 2 -// YAML-NEXT: - index_per_ii: 2 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "GEP" -// YAML-NEXT: id: 8 -// YAML-NEXT: time_step: 2 -// YAML-NEXT: invalid_iterations: 0 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "EAST" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" +// YAML: array_config: +// YAML-NEXT: columns: 4 +// YAML-NEXT: rows: 4 +// YAML-NEXT: compiled_ii: 5 +// YAML-NEXT: cores: +// YAML-NEXT: - column: 2 +// YAML-NEXT: row: 0 +// YAML-NEXT: core_id: "2" +// YAML-NEXT: entries: +// YAML-NEXT: - entry_id: "entry0" +// YAML-NEXT: instructions: +// YAML-NEXT: - index_per_ii: 0 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "GRANT_ONCE" +// YAML-NEXT: id: 2 +// YAML-NEXT: time_step: 10 +// YAML-NEXT: invalid_iterations: 2 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "#-1" +// YAML-NEXT: color: "RED" // ASM: # Compiled II: 5 -// ASM: PE(2,2): +// ASM: PE(2,0): // ASM-NEXT: { -// ASM-NEXT: GRANT_PREDICATE, [$0], [$1] -> [EAST, RED] (t=5, inv_iters=1) +// ASM-NEXT: GRANT_ONCE, [#-1] -> [$0] (t=10, inv_iters=2) // ASM-NEXT: } (idx_per_ii=0) // ASM-NEXT: { -// ASM-NEXT: RETURN (t=11, inv_iters=2) +// ASM-NEXT: RETURN, [$0] (t=11, inv_iters=2) // ASM-NEXT: } (idx_per_ii=1) +// ASM: PE(2,2): +// ASM-NEXT: { +// ASM-NEXT: GRANT_PREDICATE, [$0], [$1] -> [EAST, RED] (t=5, inv_iters=1) +// ASM-NEXT: } (idx_per_ii=0) // ASM-NEXT: { // ASM-NEXT: GEP, [EAST, RED] -> [$0] (t=2, inv_iters=0) // ASM-NEXT: } (idx_per_ii=2) // ASM-NEXT: { // ASM-NEXT: LOAD, [$0] -> [EAST, RED] (t=3, inv_iters=0) // ASM-NEXT: DATA_MOV, [EAST, RED] -> [$0] (t=3, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=3) -// ASM-NEXT: { -// ASM-NEXT: NOT, [EAST, RED] -> [$1] (t=4, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=4) -// ASM: PE(3,2): -// ASM-NEXT: { -// ASM-NEXT: GRANT_ONCE, [#0] -> [$0] (t=0, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=0) -// ASM-NEXT: { -// ASM-NEXT: PHI_START, [$0], [WEST, RED] -> [WEST, RED], [$0] (t=1, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=1) -// ASM-NEXT: { -// ASM-NEXT: ADD, [$0], [#1] -> [$0], [WEST, RED] (t=2, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=2) -// ASM-NEXT: { -// ASM-NEXT: ICMP_EQ, [$0], [#20] -> [WEST, RED] (t=3, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=3) -// ASM-NEXT: { -// ASM-NEXT: MUL, [WEST, RED], [#5] -> [NORTH, RED] (t=4, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=4) // RUN: mlir-neura-opt %t-kernel.mlir \ // RUN: --assign-accelerator \ @@ -161,4 +120,23 @@ // RUN: dot -Tjson histogram_kernel.dot -o histogram_kernel.json // RUN: FileCheck %s --input-file=histogram_kernel.dot -check-prefix=DOT -// DOT: digraph G { +// DOT: digraph G { +// DOT-NEXT: compound = true; +// DOT-NEXT: subgraph cluster_1 { +// DOT-NEXT: v2 [label = " ", shape = plain]; +// DOT-NEXT: label = "builtin.module {{.*}}"; +// DOT-NEXT: subgraph cluster_3 { +// DOT-NEXT: v4 [label = " ", shape = plain]; +// DOT-NEXT: label = ""; +// DOT-NEXT: subgraph cluster_5 { +// DOT-NEXT: v6 [label = " ", shape = plain]; +// DOT-NEXT: label = "func.func : ()\n\nCConv: #llvm.cconv\naccelerator: \"neura\"\narg_attrs: [{llvm.nocapture, ll...\ndataflow_mode: \"predicate\"\nfunction_type: (!llvm.ptr, !llvm.pt...\nlinkage: #llvm.linkage", shape = ellipse, style = filled]; diff --git a/test/e2e/relu/relu_kernel.mlir b/test/e2e/relu/relu_kernel.mlir index 45d2248b..f8e391f3 100644 --- a/test/e2e/relu/relu_kernel.mlir +++ b/test/e2e/relu/relu_kernel.mlir @@ -30,56 +30,60 @@ // // Check the mapped MLIR contains proper structure and neura operations. // RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING -// MAPPING: func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: i32 {llvm.noundef}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.writeonly}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readnone}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { -// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i32}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data -// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data -// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 4 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 0 : i32}, {id = 31 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 288 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 288 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 288 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}, {id = 288 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 288 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 6 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %4 = neura.reserve {dfg_id = 2 : i32} : !neura.data -// MAPPING-NEXT: %5 = "neura.data_mov"(%0) {dfg_id = 5 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %6 = neura.phi_start %5, %4 {dfg_id = 7 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %7 = "neura.data_mov"(%6) {dfg_id = 11 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %8 = "neura.cast"(%7) <{cast_type = "trunc"}> {dfg_id = 13 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %9 = "neura.data_mov"(%8) {dfg_id = 17 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 321 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %10 = "neura.div"(%9) {dfg_id = 20 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 70 : i16} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %11 = "neura.data_mov"(%8) {dfg_id = 16 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %12 = "neura.rem"(%11) {dfg_id = 19 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 70 : i16} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %13 = "neura.data_mov"(%10) {dfg_id = 23 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}, {id = 321 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %14 = neura.zext %13 {dfg_id = 26 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data -> !neura.data -// MAPPING-NEXT: %15 = "neura.data_mov"(%12) {dfg_id = 22 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %16 = neura.zext %15 {dfg_id = 25 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data -> !neura.data -// MAPPING-NEXT: %17 = "neura.data_mov"(%14) {dfg_id = 32 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %18 = "neura.data_mov"(%16) {dfg_id = 30 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 192 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %19 = "neura.gep"(%17, %18) <{operandSegmentSizes = array}> {dfg_id = 36 : i32, lhs_value = "%arg4", mapping_locs = [{id = 6 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 40 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %21 = "neura.load"(%20) {dfg_id = 41 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %22 = "neura.data_mov"(%21) {dfg_id = 43 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %23 = "neura.icmp"(%22) <{cmpType = "sge"}> {dfg_id = 44 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 1 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %24 = "neura.data_mov"(%23) {dfg_id = 45 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %25 = "neura.data_mov"(%21) {dfg_id = 42 : i32, mapping_locs = [{id = 193 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}, {id = 193 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.data_mov"(%3) {dfg_id = 9 : i32, mapping_locs = [{id = 28 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 33 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 194 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.sel"(%24, %25, %26) {dfg_id = 46 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: %28 = "neura.data_mov"(%14) {dfg_id = 31 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}, {id = 448 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.data_mov"(%16) {dfg_id = 29 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 449 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}, {id = 449 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.gep"(%28, %29) <{operandSegmentSizes = array}> {dfg_id = 35 : i32, lhs_value = "%arg3", mapping_locs = [{id = 14 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: %31 = "neura.data_mov"(%27) {dfg_id = 47 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {dfg_id = 39 : i32, mapping_locs = [{id = 45 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 33 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}, {id = 193 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: "neura.store"(%31, %32) {dfg_id = 48 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: %33 = "neura.data_mov"(%6) {dfg_id = 10 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %34 = "neura.add"(%33) {dfg_id = 12 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %35 = "neura.data_mov"(%34) {dfg_id = 15 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %36 = "neura.icmp"(%35) <{cmpType = "eq"}> {dfg_id = 18 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 4200 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %37 = "neura.data_mov"(%36) {dfg_id = 21 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %38 = "neura.not"(%37) {dfg_id = 24 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %39 = "neura.data_mov"(%34) {dfg_id = 14 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 224 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 224 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %40 = "neura.data_mov"(%38) {dfg_id = 28 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %41 = neura.grant_predicate %39, %40 {dfg_id = 34 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %41 -> %4 {dfg_id = 38 : i32, mapping_locs = [{id = 23 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data -// MAPPING-NEXT: %42 = "neura.data_mov"(%3) {dfg_id = 8 : i32, mapping_locs = [{id = 289 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 289 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %43 = "neura.data_mov"(%38) {dfg_id = 27 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 31 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 290 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 6 : i32}, {id = 290 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 7 : i32}, {id = 290 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %44 = neura.grant_predicate %42, %43 {dfg_id = 33 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %44 -> %1 {dfg_id = 37 : i32, mapping_locs = [{id = 289 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 289 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}, {id = 289 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 11 : i32}]} : !neura.data !neura.data -// MAPPING-NEXT: "neura.return"() {dfg_id = 3 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 0 : i32}]} : () -> () +// MAPPING: module attributes {{.*}} +// MAPPING-NEXT: func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: i32 {llvm.noundef}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.writeonly}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readnone}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i32}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 4 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 0 : i32}, {id = 31 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 288 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 288 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 288 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}, {id = 288 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 288 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 7 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %4 = neura.reserve {dfg_id = 2 : i32} : !neura.data +// MAPPING-NEXT: %5 = "neura.data_mov"(%0) {dfg_id = 5 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = neura.phi_start %5, %4 {dfg_id = 8 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %7 = "neura.data_mov"(%6) {dfg_id = 13 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.cast"(%7) <{cast_type = "trunc"}> {dfg_id = 15 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.data_mov"(%8) {dfg_id = 19 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 321 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %10 = "neura.div"(%9) {dfg_id = 22 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 70 : i16} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.data_mov"(%8) {dfg_id = 18 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = "neura.rem"(%11) {dfg_id = 21 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 70 : i16} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%10) {dfg_id = 25 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}, {id = 321 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = neura.zext %13 {dfg_id = 28 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data -> !neura.data +// MAPPING-NEXT: %15 = "neura.data_mov"(%12) {dfg_id = 24 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %16 = neura.zext %15 {dfg_id = 27 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data -> !neura.data +// MAPPING-NEXT: %17 = "neura.data_mov"(%14) {dfg_id = 34 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%16) {dfg_id = 32 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 192 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.gep"(%17, %18) <{operandSegmentSizes = array}> {dfg_id = 38 : i32, lhs_value = "%arg4", mapping_locs = [{id = 6 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 42 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.load"(%20) {dfg_id = 43 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.data_mov"(%21) {dfg_id = 45 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.icmp"(%22) <{cmpType = "sge"}> {dfg_id = 46 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 1 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%23) {dfg_id = 47 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%21) {dfg_id = 44 : i32, mapping_locs = [{id = 193 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}, {id = 193 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.data_mov"(%3) {dfg_id = 11 : i32, mapping_locs = [{id = 28 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 33 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 194 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.sel"(%24, %25, %26) {dfg_id = 48 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.data_mov"(%14) {dfg_id = 33 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}, {id = 448 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%16) {dfg_id = 31 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 449 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}, {id = 449 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.gep"(%28, %29) <{operandSegmentSizes = array}> {dfg_id = 37 : i32, lhs_value = "%arg3", mapping_locs = [{id = 14 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%27) {dfg_id = 49 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {dfg_id = 41 : i32, mapping_locs = [{id = 45 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 33 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}, {id = 193 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%31, %32) {dfg_id = 50 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 11 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %33 = "neura.data_mov"(%6) {dfg_id = 12 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %34 = "neura.add"(%33) {dfg_id = 14 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %35 = "neura.data_mov"(%34) {dfg_id = 17 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %36 = "neura.icmp"(%35) <{cmpType = "eq"}> {dfg_id = 20 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 4200 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %37 = "neura.data_mov"(%36) {dfg_id = 23 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %38 = "neura.not"(%37) {dfg_id = 26 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %39 = "neura.data_mov"(%34) {dfg_id = 16 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 224 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 224 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %40 = "neura.data_mov"(%38) {dfg_id = 30 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %41 = neura.grant_predicate %39, %40 {dfg_id = 36 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %41 -> %4 {dfg_id = 40 : i32, mapping_locs = [{id = 23 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %42 = "neura.data_mov"(%3) {dfg_id = 10 : i32, mapping_locs = [{id = 289 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 289 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %43 = "neura.data_mov"(%38) {dfg_id = 29 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 31 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 290 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 6 : i32}, {id = 290 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 7 : i32}, {id = 290 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %44 = neura.grant_predicate %42, %43 {dfg_id = 35 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %44 -> %1 {dfg_id = 39 : i32, mapping_locs = [{id = 289 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 289 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}, {id = 289 : i32, index_per_ii = 1 : i32, invalid_iterations = 2 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 11 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %45 = "neura.grant_once"() <{constant_value = true}> {dfg_id = 3 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 0 : i32, y = 3 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %46 = "neura.data_mov"(%45) {dfg_id = 6 : i32, mapping_locs = [{id = 384 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.return"(%46) {dfg_id = 9 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 0 : i32, y = 3 : i32}]} : (!neura.data) -> () +// MAPPING-NEXT: } // MAPPING-NEXT: } @@ -88,80 +92,31 @@ // YAML-NEXT: rows: 4 // YAML-NEXT: compiled_ii: 5 // YAML-NEXT: cores: -// YAML-NEXT: - column: 2 -// YAML-NEXT: row: 0 -// YAML-NEXT: core_id: "2" +// YAML-NEXT: - column: {{.*}} +// YAML-NEXT: row: {{.*}} +// YAML-NEXT: core_id: "{{.*}}" // YAML-NEXT: entries: // YAML-NEXT: - entry_id: "entry0" -// YAML-NEXT: instructions: -// YAML-NEXT: - index_per_ii: 4 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "RETURN" -// YAML-NEXT: id: 3 -// YAML-NEXT: time_step: 9 -// YAML-NEXT: invalid_iterations: 1 -// YAML-NEXT: - column: 2 -// YAML-NEXT: row: 1 -// YAML-NEXT: core_id: "6" -// YAML-NEXT: entries: -// YAML-NEXT: - entry_id: "entry0" -// YAML-NEXT: instructions: -// YAML-NEXT: - index_per_ii: 0 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "SEL" -// YAML-NEXT: id: 46 -// YAML-NEXT: time_step: 10 -// YAML-NEXT: invalid_iterations: 2 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - operand: "$1" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - operand: "$2" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - opcode: "DATA_MOV" -// YAML-NEXT: id: 39 -// YAML-NEXT: time_step: 10 -// YAML-NEXT: invalid_iterations: 2 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "NORTH" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "$1" -// YAML-NEXT: color: "RED" -// ASM: # Compiled II: 5 -// ASM: PE(2,0): -// ASM-NEXT: { -// ASM-NEXT: RETURN (t=9, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=4) -// ASM: PE(2,1): -// ASM-NEXT: { -// ASM-NEXT: SEL, [$0], [$1], [$2] -> [$0] (t=10, inv_iters=2) -// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$1] (t=10, inv_iters=2) -// ASM-NEXT: } (idx_per_ii=0) -// ASM-NEXT: { -// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$0] (t=6, inv_iters=1) -// ASM-NEXT: STORE, [$0], [$1] (t=11, inv_iters=2) -// ASM-NEXT: } (idx_per_ii=1) -// ASM-NEXT: { -// ASM-NEXT: GEP, [NORTH, RED], [$0] -> [$0] (t=7, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=2) -// ASM-NEXT: { -// ASM-NEXT: LOAD, [$0] -> [$0], [$1] (t=8, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=3) -// ASM-NEXT: { -// ASM-NEXT: ICMP_SGE, [$0], [#0] -> [$0] (t=9, inv_iters=1) -// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$2] (t=9, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=4) -// ASM: PE(3,1): -// ASM-NEXT: { -// ASM-NEXT: GRANT_PREDICATE, [$0], [NORTH, RED] -> [NORTH, RED] (t=5, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=0) -// ASM-NEXT: { -// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$0] (t=3, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=3) \ No newline at end of file +// ASM: # Compiled II: {{.*}} +// ASM: PE({{.*}}): +// ASM: { +// ASM: SEL, [$0], [$1], [$2] -> [$0] (t={{.*}}, inv_iters={{.*}}) +// ASM: DATA_MOV, [{{.*}}] -> [$1] (t={{.*}}, inv_iters={{.*}}) +// ASM: } (idx_per_ii={{.*}}) +// ASM: { +// ASM: DATA_MOV, [{{.*}}] -> [$0] (t={{.*}}, inv_iters={{.*}}) +// ASM: STORE, [$0], [$1] (t={{.*}}, inv_iters={{.*}}) +// ASM: } (idx_per_ii={{.*}}) +// ASM: PE({{.*}}): +// ASM: { +// ASM: GRANT_PREDICATE, [{{.*}}] (t={{.*}}, inv_iters={{.*}}) +// ASM: } (idx_per_ii={{.*}}) +// ASM: PE({{.*}}): +// ASM: { +// ASM: GRANT_ONCE, [{{.*}}] -> [$0] (t={{.*}}, inv_iters={{.*}}) +// ASM: } (idx_per_ii={{.*}}) +// ASM: { +// ASM: RETURN, [$0] (t={{.*}}, inv_iters={{.*}}) +// ASM: } (idx_per_ii={{.*}}) diff --git a/test/neura/for_loop/kernel_test.mlir b/test/neura/for_loop/kernel_test.mlir index 51a15b07..004c3bf7 100644 --- a/test/neura/for_loop/kernel_test.mlir +++ b/test/neura/for_loop/kernel_test.mlir @@ -35,8 +35,39 @@ // RUN: --insert-data-mov \ // RUN: | FileCheck %s --check-prefix=CHECK-MOV -// CHECK: func.func @_Z6kernelPfS_S_ -// CHECK: accelerator = "neura" +// CHECK: module attributes {{.*}} +// CHECK-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<1.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-NEXT: llvm.mlir.global external local_unnamed_addr @coefficients(dense<[2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00]> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output: %f\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// CHECK-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// CHECK-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// CHECK-NEXT: %1 = llvm.mlir.addressof @coefficients : !llvm.ptr +// CHECK-NEXT: %2 = llvm.mlir.addressof @input : !llvm.ptr +// CHECK-NEXT: %3 = llvm.mlir.addressof @output : !llvm.ptr +// CHECK-NEXT: %4 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 1 : i64}> : () -> i64 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 32 : i64}> : () -> i64 +// CHECK-NEXT: %7 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %8 = "neura.load"(%3) : (!llvm.ptr) -> f32 +// CHECK-NEXT: neura.br %4, %8 : i64, f32 to ^bb1 +// CHECK-NEXT: ^bb1(%9: i64, %10: f32): // 2 preds: ^bb0, ^bb1 +// CHECK-NEXT: %11 = "neura.gep"(%2, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %12 = "neura.load"(%11) : (!llvm.ptr) -> f32 +// CHECK-NEXT: %13 = "neura.gep"(%1, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %14 = "neura.load"(%13) : (!llvm.ptr) -> f32 +// CHECK-NEXT: %15 = "neura.fmul"(%12, %14) : (f32, f32) -> f32 +// CHECK-NEXT: %16 = "neura.fadd"(%10, %15) : (f32, f32) -> f32 +// CHECK-NEXT: %17 = "neura.add"(%9, %5) : (i64, i64) -> i64 +// CHECK-NEXT: %18 = "neura.icmp"(%17, %6) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.cond_br %18 : i1 then to ^bb2 else %17, %16 : i64, f32 to ^bb1 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: "neura.store"(%16, %3) : (f32, !llvm.ptr) -> () +// CHECK-NEXT: %19 = llvm.fpext %16 : f32 to f64 +// CHECK-NEXT: %20 = llvm.call tail @printf(%0, %19) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, f64) -> i32 +// CHECK-NEXT: "neura.return"(%7) : (i32) -> () +// CHECK-NEXT: } +// CHECK-NEXT: func.func @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { // CHECK-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data // CHECK-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data // CHECK-NEXT: %2 = "neura.constant"() <{value = "%arg2"}> : () -> !neura.data @@ -59,10 +90,38 @@ // CHECK-NEXT: ^bb2: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } +// CHECK-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} +// CHECK-NEXT: } // Verifies the neura ops are generated. And fusion happens. -// CHECK-FUSED: func.func @_Z6kernelPfS_S_ -// CHECK-FUSED-SAME: accelerator = "neura" +// CHECK-FUSED: module attributes {{.*}} +// CHECK-FUSED-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<1.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-FUSED-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-FUSED-NEXT: llvm.mlir.global external local_unnamed_addr @coefficients(dense<[2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00]> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-FUSED-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output: %f\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// CHECK-FUSED-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// CHECK-FUSED-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// CHECK-FUSED-NEXT: %1 = llvm.mlir.addressof @coefficients : !llvm.ptr +// CHECK-FUSED-NEXT: %2 = llvm.mlir.addressof @input : !llvm.ptr +// CHECK-FUSED-NEXT: %3 = llvm.mlir.addressof @output : !llvm.ptr +// CHECK-FUSED-NEXT: %4 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-FUSED-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-FUSED-NEXT: %6 = "neura.load"(%3) : (!llvm.ptr) -> f32 +// CHECK-FUSED-NEXT: neura.br %4, %6 : i64, f32 to ^bb1 +// CHECK-FUSED-NEXT: ^bb1(%7: i64, %8: f32): // 2 preds: ^bb0, ^bb1 +// CHECK-FUSED-NEXT: %9 = neura.load_indexed %2[%7 : i64] !llvm.ptr : f32 +// CHECK-FUSED-NEXT: %10 = neura.load_indexed %1[%7 : i64] !llvm.ptr : f32 +// CHECK-FUSED-NEXT: %11 = "neura.fmul_fadd"(%9, %10, %8) : (f32, f32, f32) -> f32 +// CHECK-FUSED-NEXT: %12 = "neura.add"(%7) {rhs_value = 1 : i64} : (i64) -> i64 +// CHECK-FUSED-NEXT: %13 = "neura.icmp"(%12) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 +// CHECK-FUSED-NEXT: neura.cond_br %13 : i1 then to ^bb2 else %12, %11 : i64, f32 to ^bb1 +// CHECK-FUSED-NEXT: ^bb2: // pred: ^bb1 +// CHECK-FUSED-NEXT: "neura.store"(%11, %3) : (f32, !llvm.ptr) -> () +// CHECK-FUSED-NEXT: %14 = llvm.fpext %11 : f32 to f64 +// CHECK-FUSED-NEXT: %15 = llvm.call tail @printf(%0, %14) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, f64) -> i32 +// CHECK-FUSED-NEXT: "neura.return"(%5) : (i32) -> () +// CHECK-FUSED-NEXT: } +// CHECK-FUSED-NEXT: func.func @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { // CHECK-FUSED-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data // CHECK-FUSED-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data // CHECK-FUSED-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data @@ -107,11 +166,58 @@ // CHECK-FUSED-NEXT: neura.ctrl_mov %34 -> %11 : !neura.data !neura.data // CHECK-FUSED-NEXT: %35 = neura.grant_predicate %10, %28 : !neura.data, !neura.data -> !neura.data // CHECK-FUSED-NEXT: neura.ctrl_mov %35 -> %9 : !neura.data !neura.data -// CHECK-FUSED-NEXT: "neura.return"() : () -> () +// CHECK-FUSED-NEXT: %36 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// CHECK-FUSED-NEXT: "neura.return"(%36) : (!neura.data) -> () // CHECK-FUSED-NEXT: } +// CHECK-FUSED-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} +// CHECK-FUSED-NEXT: } -// CHECK-MOV: func.func @_Z6kernelPfS_S_ -// CHECK-MOV-SAME: accelerator = "neura" +// CHECK-MOV: module attributes {{.*}} +// CHECK-MOV-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<1.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-MOV-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0.000000e+00> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-MOV-NEXT: llvm.mlir.global external local_unnamed_addr @coefficients(dense<[2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00, 2.500000e-01, 1.500000e+00, 3.750000e+00, -2.250000e+00, 5.000000e-01, 7.500000e-01, -3.000000e+00, 1.250000e+00]> : tensor<32xf32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x f32> +// CHECK-MOV-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output: %f\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// CHECK-MOV-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// CHECK-MOV-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// CHECK-MOV-NEXT: %1 = llvm.mlir.addressof @coefficients : !llvm.ptr +// CHECK-MOV-NEXT: %2 = llvm.mlir.addressof @input : !llvm.ptr +// CHECK-MOV-NEXT: %3 = llvm.mlir.addressof @output : !llvm.ptr +// CHECK-MOV-NEXT: %4 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-MOV-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-MOV-NEXT: %6 = "neura.data_mov"(%3) : (!llvm.ptr) -> !llvm.ptr +// CHECK-MOV-NEXT: %7 = "neura.load"(%6) : (!llvm.ptr) -> f32 +// CHECK-MOV-NEXT: %8 = "neura.data_mov"(%4) : (i64) -> i64 +// CHECK-MOV-NEXT: %9 = "neura.data_mov"(%7) : (f32) -> f32 +// CHECK-MOV-NEXT: neura.br %8, %9 : i64, f32 to ^bb1 +// CHECK-MOV-NEXT: ^bb1(%10: i64, %11: f32): // 2 preds: ^bb0, ^bb1 +// CHECK-MOV-NEXT: %12 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr +// CHECK-MOV-NEXT: %13 = "neura.data_mov"(%10) : (i64) -> i64 +// CHECK-MOV-NEXT: %14 = neura.load_indexed %12[%13 : i64] !llvm.ptr : f32 +// CHECK-MOV-NEXT: %15 = "neura.data_mov"(%1) : (!llvm.ptr) -> !llvm.ptr +// CHECK-MOV-NEXT: %16 = "neura.data_mov"(%10) : (i64) -> i64 +// CHECK-MOV-NEXT: %17 = neura.load_indexed %15[%16 : i64] !llvm.ptr : f32 +// CHECK-MOV-NEXT: %18 = "neura.data_mov"(%14) : (f32) -> f32 +// CHECK-MOV-NEXT: %19 = "neura.data_mov"(%17) : (f32) -> f32 +// CHECK-MOV-NEXT: %20 = "neura.data_mov"(%11) : (f32) -> f32 +// CHECK-MOV-NEXT: %21 = "neura.fmul_fadd"(%18, %19, %20) : (f32, f32, f32) -> f32 +// CHECK-MOV-NEXT: %22 = "neura.data_mov"(%10) : (i64) -> i64 +// CHECK-MOV-NEXT: %23 = "neura.add"(%22) {rhs_value = 1 : i64} : (i64) -> i64 +// CHECK-MOV-NEXT: %24 = "neura.data_mov"(%23) : (i64) -> i64 +// CHECK-MOV-NEXT: %25 = "neura.icmp"(%24) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 +// CHECK-MOV-NEXT: %26 = "neura.data_mov"(%25) : (i1) -> i1 +// CHECK-MOV-NEXT: %27 = "neura.data_mov"(%23) : (i64) -> i64 +// CHECK-MOV-NEXT: %28 = "neura.data_mov"(%21) : (f32) -> f32 +// CHECK-MOV-NEXT: neura.cond_br %26 : i1 then to ^bb2 else %27, %28 : i64, f32 to ^bb1 +// CHECK-MOV-NEXT: ^bb2: // pred: ^bb1 +// CHECK-MOV-NEXT: %29 = "neura.data_mov"(%21) : (f32) -> f32 +// CHECK-MOV-NEXT: %30 = "neura.data_mov"(%3) : (!llvm.ptr) -> !llvm.ptr +// CHECK-MOV-NEXT: "neura.store"(%29, %30) : (f32, !llvm.ptr) -> () +// CHECK-MOV-NEXT: %31 = llvm.fpext %21 : f32 to f64 +// CHECK-MOV-NEXT: %32 = llvm.call tail @printf(%0, %31) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, f64) -> i32 +// CHECK-MOV-NEXT: %33 = "neura.data_mov"(%5) : (i32) -> i32 +// CHECK-MOV-NEXT: "neura.return"(%33) : (i32) -> () +// CHECK-MOV-NEXT: } +// CHECK-MOV-NEXT: func.func @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { // CHECK-MOV-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data // CHECK-MOV-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data // CHECK-MOV-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data @@ -193,5 +299,9 @@ // CHECK-MOV-NEXT: %71 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data // CHECK-MOV-NEXT: %72 = neura.grant_predicate %70, %71 : !neura.data, !neura.data -> !neura.data // CHECK-MOV-NEXT: neura.ctrl_mov %72 -> %11 : !neura.data !neura.data -// CHECK-MOV-NEXT: "neura.return"() : () -> () -// CHECK-MOV-NEXT: } \ No newline at end of file +// CHECK-MOV-NEXT: %73 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// CHECK-MOV-NEXT: %74 = "neura.data_mov"(%73) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: "neura.return"(%74) : (!neura.data) -> () +// CHECK-MOV-NEXT: } +// CHECK-MOV-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} +// CHECK-MOV-NEXT: } \ No newline at end of file diff --git a/test/neura/for_loop/relu_test.mlir b/test/neura/for_loop/relu_test.mlir index 951f89c4..2c00cb6a 100644 --- a/test/neura/for_loop/relu_test.mlir +++ b/test/neura/for_loop/relu_test.mlir @@ -31,38 +31,123 @@ // RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized" \ // RUN: | FileCheck %s --check-prefix=MAPPING -// CHECK: func.func @_Z6kernelPiS_ -// CHECK-SAME: accelerator = "neura" -// CHECK-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !llvm.ptr -// CHECK-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> !llvm.ptr -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i64}> : () -> i64 -// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : i32}> : () -> i32 -// CHECK-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64 -// CHECK-NEXT: %5 = "neura.constant"() <{value = 32 : i64}> : () -> i64 -// CHECK-NEXT: neura.br %2, %0, %3, %1, %4, %5 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 -// CHECK-NEXT: ^bb1: // pred: ^bb4 -// CHECK-NEXT: "neura.return"() : () -> () -// CHECK-NEXT: ^bb2(%6: i64, %7: !llvm.ptr, %8: i32, %9: !llvm.ptr, %10: i64, %11: i64): // 2 preds: ^bb0, ^bb4 -// CHECK-NEXT: %12 = "neura.gep"(%7, %6) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK-NEXT: %13 = "neura.load"(%12) : (!llvm.ptr) -> i32 -// CHECK-NEXT: %14 = "neura.icmp"(%13, %8) <{cmpType = "sgt"}> : (i32, i32) -> i1 -// CHECK-NEXT: neura.cond_br %14 : i1 then %9, %6, %13, %10, %11, %7, %8 : !llvm.ptr, i64, i32, i64, i64, !llvm.ptr, i32 to ^bb3 else %10, %11, %7, %8, %9 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 -// CHECK-NEXT: ^bb3(%15: !llvm.ptr, %16: i64, %17: i32, %18: i64, %19: i64, %20: !llvm.ptr, %21: i32): // pred: ^bb2 -// CHECK-NEXT: %22 = "neura.gep"(%15, %16) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK-NEXT: %23 = "neura.load"(%22) : (!llvm.ptr) -> i32 -// CHECK-NEXT: %24 = "neura.add"(%23, %17) : (i32, i32) -> i32 -// CHECK-NEXT: "neura.store"(%24, %22) : (i32, !llvm.ptr) -> () -// CHECK-NEXT: neura.br %18, %19, %20, %21, %15 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 -// CHECK-NEXT: ^bb4(%25: i64, %26: i64, %27: !llvm.ptr, %28: i32, %29: !llvm.ptr): // 2 preds: ^bb2, ^bb3 -// CHECK-NEXT: %30 = "neura.add"(%6, %25) : (i64, i64) -> i64 -// CHECK-NEXT: %31 = "neura.icmp"(%30, %26) <{cmpType = "eq"}> : (i64, i64) -> i1 -// CHECK-NEXT: neura.cond_br %31 : i1 then to ^bb1 else %30, %27, %28, %29, %25, %26 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 +// CHECK: module attributes {{.*}} +// CHECK-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<[1, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31]> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// CHECK-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// CHECK-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output[%d] = %d\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// CHECK-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// CHECK-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// CHECK-NEXT: %1 = llvm.mlir.addressof @input : !llvm.ptr +// CHECK-NEXT: %2 = llvm.mlir.addressof @output : !llvm.ptr +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : i8}> : () -> i8 +// CHECK-NEXT: %4 = "neura.constant"() <{value = 128 : i64}> : () -> i64 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %7 = "neura.constant"() <{value = 1 : i64}> : () -> i64 +// CHECK-NEXT: %8 = "neura.constant"() <{value = 32 : i64}> : () -> i64 +// CHECK-NEXT: "neura.memset"(%2, %3, %4) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> () +// CHECK-NEXT: neura.br %5 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%9: i64): // 2 preds: ^bb0, ^bb3 +// CHECK-NEXT: %10 = "neura.gep"(%1, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %11 = "neura.load"(%10) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %12 = "neura.icmp"(%11, %6) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: neura.cond_br %12 : i1 then to ^bb2 else to ^bb3 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %13 = "neura.gep"(%2, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %14 = "neura.load"(%13) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %15 = "neura.add"(%14, %11) : (i32, i32) -> i32 +// CHECK-NEXT: "neura.store"(%15, %13) : (i32, !llvm.ptr) -> () +// CHECK-NEXT: neura.br to ^bb3 +// CHECK-NEXT: ^bb3: // 2 preds: ^bb1, ^bb2 +// CHECK-NEXT: %16 = "neura.add"(%9, %7) : (i64, i64) -> i64 +// CHECK-NEXT: %17 = "neura.icmp"(%16, %8) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.cond_br %17 : i1 then %5 : i64 to ^bb5 else %16 : i64 to ^bb1 +// CHECK-NEXT: ^bb4: // pred: ^bb5 +// CHECK-NEXT: "neura.return"(%6) : (i32) -> () +// CHECK-NEXT: ^bb5(%18: i64): // 2 preds: ^bb3, ^bb5 +// CHECK-NEXT: %19 = "neura.constant"() <{value = 0 : i32}> : () -> index +// CHECK-NEXT: %20 = "neura.gep"(%2, %19, %18) <{operandSegmentSizes = array}> : (!llvm.ptr, index, i64) -> !llvm.ptr +// CHECK-NEXT: %21 = "neura.load"(%20) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %22 = "neura.cast"(%18) <{cast_type = "trunc"}> : (i64) -> i32 +// CHECK-NEXT: %23 = llvm.call tail @printf(%0, %22, %21) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, i32, i32) -> i32 +// CHECK-NEXT: %24 = "neura.add"(%18, %7) : (i64, i64) -> i64 +// CHECK-NEXT: %25 = "neura.icmp"(%24, %8) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.cond_br %25 : i1 then to ^bb4 else %24 : i64 to ^bb5 +// CHECK-NEXT: } +// CHECK-NEXT: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !llvm.ptr +// CHECK-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> !llvm.ptr +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 32 : i64}> : () -> i64 +// CHECK-NEXT: neura.br %2, %0, %3, %1, %4, %5 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 +// CHECK-NEXT: ^bb1: // pred: ^bb4 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: ^bb2(%6: i64, %7: !llvm.ptr, %8: i32, %9: !llvm.ptr, %10: i64, %11: i64): // 2 preds: ^bb0, ^bb4 +// CHECK-NEXT: %12 = "neura.gep"(%7, %6) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %13 = "neura.load"(%12) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %14 = "neura.icmp"(%13, %8) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: neura.cond_br %14 : i1 then %9, %6, %13, %10, %11, %7, %8 : !llvm.ptr, i64, i32, i64, i64, !llvm.ptr, i32 to ^bb3 else %10, %11, %7, %8, %9 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 +// CHECK-NEXT: ^bb3(%15: !llvm.ptr, %16: i64, %17: i32, %18: i64, %19: i64, %20: !llvm.ptr, %21: i32): // pred: ^bb2 +// CHECK-NEXT: %22 = "neura.gep"(%15, %16) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CHECK-NEXT: %23 = "neura.load"(%22) : (!llvm.ptr) -> i32 +// CHECK-NEXT: %24 = "neura.add"(%23, %17) : (i32, i32) -> i32 +// CHECK-NEXT: "neura.store"(%24, %22) : (i32, !llvm.ptr) -> () +// CHECK-NEXT: neura.br %18, %19, %20, %21, %15 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 +// CHECK-NEXT: ^bb4(%25: i64, %26: i64, %27: !llvm.ptr, %28: i32, %29: !llvm.ptr): // 2 preds: ^bb2, ^bb3 +// CHECK-NEXT: %30 = "neura.add"(%6, %25) : (i64, i64) -> i64 +// CHECK-NEXT: %31 = "neura.icmp"(%30, %26) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.cond_br %31 : i1 then to ^bb1 else %30, %27, %28, %29, %25, %26 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 +// CHECK-NEXT: } +// CHECK-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} // CHECK-NEXT: } -// CTRL2DATA: func.func @_Z6kernelPiS_ -// CTRL2DATA-SAME: accelerator = "neura" -// CTRL2DATA-SAME: dataflow_mode = "predicate" +// CTRL2DATA: module attributes {{.*}} +// CTRL2DATA-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<[1, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31]> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// CTRL2DATA-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// CTRL2DATA-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output[%d] = %d\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// CTRL2DATA-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// CTRL2DATA-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// CTRL2DATA-NEXT: %1 = llvm.mlir.addressof @input : !llvm.ptr +// CTRL2DATA-NEXT: %2 = llvm.mlir.addressof @output : !llvm.ptr +// CTRL2DATA-NEXT: %3 = "neura.constant"() <{value = 0 : i8}> : () -> i8 +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 128 : i64}> : () -> i64 +// CTRL2DATA-NEXT: %5 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CTRL2DATA-NEXT: %7 = "neura.constant"() <{value = 1 : i64}> : () -> i64 +// CTRL2DATA-NEXT: %8 = "neura.constant"() <{value = 32 : i64}> : () -> i64 +// CTRL2DATA-NEXT: "neura.memset"(%2, %3, %4) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> () +// CTRL2DATA-NEXT: neura.br %5 : i64 to ^bb1 +// CTRL2DATA-NEXT: ^bb1(%9: i64): // 2 preds: ^bb0, ^bb3 +// CTRL2DATA-NEXT: %10 = "neura.gep"(%1, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CTRL2DATA-NEXT: %11 = "neura.load"(%10) : (!llvm.ptr) -> i32 +// CTRL2DATA-NEXT: %12 = "neura.icmp"(%11, %6) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CTRL2DATA-NEXT: neura.cond_br %12 : i1 then to ^bb2 else to ^bb3 +// CTRL2DATA-NEXT: ^bb2: // pred: ^bb1 +// CTRL2DATA-NEXT: %13 = "neura.gep"(%2, %9) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// CTRL2DATA-NEXT: %14 = "neura.load"(%13) : (!llvm.ptr) -> i32 +// CTRL2DATA-NEXT: %15 = "neura.add"(%14, %11) : (i32, i32) -> i32 +// CTRL2DATA-NEXT: "neura.store"(%15, %13) : (i32, !llvm.ptr) -> () +// CTRL2DATA-NEXT: neura.br to ^bb3 +// CTRL2DATA-NEXT: ^bb3: // 2 preds: ^bb1, ^bb2 +// CTRL2DATA-NEXT: %16 = "neura.add"(%9, %7) : (i64, i64) -> i64 +// CTRL2DATA-NEXT: %17 = "neura.icmp"(%16, %8) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CTRL2DATA-NEXT: neura.cond_br %17 : i1 then %5 : i64 to ^bb5 else %16 : i64 to ^bb1 +// CTRL2DATA-NEXT: ^bb4: // pred: ^bb5 +// CTRL2DATA-NEXT: "neura.return"(%6) : (i32) -> () +// CTRL2DATA-NEXT: ^bb5(%18: i64): // 2 preds: ^bb3, ^bb5 +// CTRL2DATA-NEXT: %19 = "neura.constant"() <{value = 0 : i32}> : () -> index +// CTRL2DATA-NEXT: %20 = "neura.gep"(%2, %19, %18) <{operandSegmentSizes = array}> : (!llvm.ptr, index, i64) -> !llvm.ptr +// CTRL2DATA-NEXT: %21 = "neura.load"(%20) : (!llvm.ptr) -> i32 +// CTRL2DATA-NEXT: %22 = "neura.cast"(%18) <{cast_type = "trunc"}> : (i64) -> i32 +// CTRL2DATA-NEXT: %23 = llvm.call tail @printf(%0, %22, %21) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, i32, i32) -> i32 +// CTRL2DATA-NEXT: %24 = "neura.add"(%18, %7) : (i64, i64) -> i64 +// CTRL2DATA-NEXT: %25 = "neura.icmp"(%24, %8) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CTRL2DATA-NEXT: neura.cond_br %25 : i1 then to ^bb4 else %24 : i64 to ^bb5 +// CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data) -> !neura.data // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data @@ -127,8 +212,386 @@ // CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %14 : !neura.data !neura.data // CTRL2DATA-NEXT: %56 = neura.grant_predicate %46, %50 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %12 : !neura.data !neura.data -// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: %57 = "neura.constant"() <{value = true}> : () -> !neura.data +// CTRL2DATA-NEXT: %58 = "neura.grant_once"(%57) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%58) : (!neura.data) -> () // CTRL2DATA-NEXT: } +// CTRL2DATA-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} +// CTRL2DATA-NEXT: } -// MAPPING: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { \ No newline at end of file +// MAPPING: [DEBUG] Recurrence cycle (length 3): +// MAPPING-NEXT: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data +// MAPPING-NEXT: [DEBUG] Recurrence cycle (length 5): +// MAPPING-NEXT: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Longest recurrence cycle (length 5): +// MAPPING-NEXT: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: "neura.return"(%35) : (!neura.data) -> () +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 0: 3 ops +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 1: 3 ops +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 2: 5 ops +// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 3: 4 ops +// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 4: 5 ops +// MAPPING-NEXT: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 5: 4 ops +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 6: 4 ops +// MAPPING-NEXT: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 7: 4 ops +// MAPPING-NEXT: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 8: 5 ops +// MAPPING-NEXT: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// MAPPING-NEXT: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 9: 2 ops +// MAPPING-NEXT: "neura.return"(%35) : (!neura.data) -> () +// MAPPING-NEXT: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data (ALAP level: 0) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %1 = neura.reserve : !neura.data (ALAP level: 0) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data (ALAP level: 0) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data (ALAP level: 1) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 1) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 1) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data (ALAP level: 2) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data (ALAP level: 2) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data (ALAP level: 2) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data (ALAP level: 2) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data (ALAP level: 2) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data (ALAP level: 3) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data (ALAP level: 3) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data (ALAP level: 3) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data (ALAP level: 3) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 4) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data (ALAP level: 4) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data (ALAP level: 4) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data (ALAP level: 4) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data (ALAP level: 4) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data (ALAP level: 5) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data (ALAP level: 5) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data (ALAP level: 5) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data (ALAP level: 5) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data (ALAP level: 6) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data (ALAP level: 6) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data (ALAP level: 6) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data (ALAP level: 6) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data (ALAP level: 7) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data (ALAP level: 7) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data (ALAP level: 7) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data (ALAP level: 7) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data (ALAP level: 8) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data (ALAP level: 8) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data (ALAP level: 8) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data (ALAP level: 8) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data (ALAP level: 8) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: "neura.return"(%35) : (!neura.data) -> () (ALAP level: 9) +// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () (ALAP level: 9) +// MAPPING-NEXT: --------------------------------------------------------- +// MAPPING-NEXT: [HeuristicMapping] Starting mapping with 39 operations. +// MAPPING-NEXT: Configuration: MAX Backtrack Depth = 3, MAX Candidate Locations = 5 +// MAPPING-NEXT: [HeuristicMapping] Filtered 22 non-materialized operations, 17 operations require physical mapping. +// MAPPING-NEXT: [HeuristicMapping] Materialized operations list: +// MAPPING-NEXT: 0 %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data (level: 0) +// MAPPING-NEXT: 1 %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data (level: 1) +// MAPPING-NEXT: 2 %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data (level: 2) +// MAPPING-NEXT: 3 %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data (level: 2) +// MAPPING-NEXT: 4 %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data (level: 3) +// MAPPING-NEXT: 5 %7 = "neura.load"(%6) : (!neura.data) -> !neura.data (level: 3) +// MAPPING-NEXT: 6 %30 = "neura.not"(%29) : (!neura.data) -> !neura.data (level: 4) +// MAPPING-NEXT: 7 %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data (level: 4) +// MAPPING-NEXT: 8 %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data (level: 5) +// MAPPING-NEXT: 9 %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data (level: 5) +// MAPPING-NEXT: 10 %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data (level: 6) +// MAPPING-NEXT: 11 %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data (level: 7) +// MAPPING-NEXT: 12 %19 = "neura.load"(%18) : (!neura.data) -> !neura.data (level: 7) +// MAPPING-NEXT: 13 %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data (level: 8) +// MAPPING-NEXT: 14 %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data (level: 8) +// MAPPING-NEXT: 15 "neura.return"(%35) : (!neura.data) -> () (level: 9) +// MAPPING-NEXT: 16 "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () (level: 9) +// MAPPING-NEXT: [HeuristicMapping] Found 80 candidate locations for operation: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=0 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 54 candidate locations for operation: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=1 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=0 to Tile#11 @t=1 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 53 candidate locations for operation: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=2 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#11 @t=2 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 52 candidate locations for operation: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=2 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#10 @t=2 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 51 candidate locations for operation: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=3 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=2 to Tile#11 @t=3 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 58 candidate locations for operation: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=3 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=2 to Tile#10 @t=3 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #640 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %7 = "neura.load"(%6) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 49 candidate locations for operation: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=4 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=3 to Tile#10 @t=4 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %30 = "neura.not"(%29) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 56 candidate locations for operation: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=4 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=3 to Tile#11 @t=4 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 1 candidate locations for operation: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/1 at tile#10 @t=5 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=2 to Tile#10 @t=5 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=4 to Tile#10 @t=5 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #641 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=5 to Tile#11 @t=6 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 46 candidate locations for operation: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=5 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#7 @t=5 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=4 to Tile#7 @t=5 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 45 candidate locations for operation: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=6 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=5 to Tile#7 @t=6 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #448 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 64 candidate locations for operation: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#15 @t=7 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=3 to Tile#15 @t=7 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=4 to Tile#15 @t=7 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 45 candidate locations for operation: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=7 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=6 to Tile#7 @t=7 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #448 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %19 = "neura.load"(%18) : (!neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 67 candidate locations for operation: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=8 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 28 candidate locations for operation: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=9 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=7 to Tile#7 @t=9 +// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #449 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#15 @t=7 to Tile#7 @t=9 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: [HeuristicMapping] Found 40 candidate locations for operation: "neura.return"(%35) : (!neura.data) -> () +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#3 @t=9 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=8 to Tile#3 @t=9 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation "neura.return"(%35) : (!neura.data) -> () +// MAPPING-NEXT: [HeuristicMapping] Found 40 candidate locations for operation: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#6 @t=10 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=9 to Tile#6 @t=10 +// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=6 to Tile#6 @t=10 +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: [HeuristicMapping] Successfully mapped all 17 operations. +// MAPPING-NEXT: module attributes {{.*}} +// MAPPING-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<[1, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31]> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// MAPPING-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> +// MAPPING-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output[%d] = %d\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} +// MAPPING-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { +// MAPPING-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr +// MAPPING-NEXT: %1 = llvm.mlir.addressof @input : !llvm.ptr +// MAPPING-NEXT: %2 = llvm.mlir.addressof @output : !llvm.ptr +// MAPPING-NEXT: %3 = "neura.constant"() <{value = 0 : i8}> : () -> i8 +// MAPPING-NEXT: %4 = "neura.constant"() <{value = 128 : i64}> : () -> i64 +// MAPPING-NEXT: %5 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// MAPPING-NEXT: %6 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// MAPPING-NEXT: %7 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %8 = "neura.data_mov"(%3) : (i8) -> i8 +// MAPPING-NEXT: %9 = "neura.data_mov"(%4) : (i64) -> i64 +// MAPPING-NEXT: "neura.memset"(%7, %8, %9) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> () +// MAPPING-NEXT: %10 = "neura.data_mov"(%5) : (i64) -> i64 +// MAPPING-NEXT: neura.br %10 : i64 to ^bb1 +// MAPPING-NEXT: ^bb1(%11: i64): // 2 preds: ^bb0, ^bb3 +// MAPPING-NEXT: %12 = "neura.data_mov"(%1) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %13 = "neura.data_mov"(%11) : (i64) -> i64 +// MAPPING-NEXT: %14 = "neura.gep"(%12, %13) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// MAPPING-NEXT: %15 = "neura.data_mov"(%14) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %16 = "neura.load"(%15) : (!llvm.ptr) -> i32 +// MAPPING-NEXT: %17 = "neura.data_mov"(%16) : (i32) -> i32 +// MAPPING-NEXT: %18 = "neura.icmp"(%17) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1 +// MAPPING-NEXT: %19 = "neura.data_mov"(%18) : (i1) -> i1 +// MAPPING-NEXT: neura.cond_br %19 : i1 then to ^bb2 else to ^bb3 +// MAPPING-NEXT: ^bb2: // pred: ^bb1 +// MAPPING-NEXT: %20 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %21 = "neura.data_mov"(%11) : (i64) -> i64 +// MAPPING-NEXT: %22 = "neura.gep"(%20, %21) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr +// MAPPING-NEXT: %23 = "neura.data_mov"(%22) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %24 = "neura.load"(%23) : (!llvm.ptr) -> i32 +// MAPPING-NEXT: %25 = "neura.data_mov"(%24) : (i32) -> i32 +// MAPPING-NEXT: %26 = "neura.data_mov"(%16) : (i32) -> i32 +// MAPPING-NEXT: %27 = "neura.add"(%25, %26) : (i32, i32) -> i32 +// MAPPING-NEXT: %28 = "neura.data_mov"(%27) : (i32) -> i32 +// MAPPING-NEXT: %29 = "neura.data_mov"(%22) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: "neura.store"(%28, %29) : (i32, !llvm.ptr) -> () +// MAPPING-NEXT: neura.br to ^bb3 +// MAPPING-NEXT: ^bb3: // 2 preds: ^bb1, ^bb2 +// MAPPING-NEXT: %30 = "neura.data_mov"(%11) : (i64) -> i64 +// MAPPING-NEXT: %31 = "neura.add"(%30) {rhs_value = 1 : i64} : (i64) -> i64 +// MAPPING-NEXT: %32 = "neura.data_mov"(%31) : (i64) -> i64 +// MAPPING-NEXT: %33 = "neura.icmp"(%32) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 +// MAPPING-NEXT: %34 = "neura.data_mov"(%33) : (i1) -> i1 +// MAPPING-NEXT: %35 = "neura.data_mov"(%5) : (i64) -> i64 +// MAPPING-NEXT: %36 = "neura.data_mov"(%31) : (i64) -> i64 +// MAPPING-NEXT: neura.cond_br %34 : i1 then %35 : i64 to ^bb5 else %36 : i64 to ^bb1 +// MAPPING-NEXT: ^bb4: // pred: ^bb5 +// MAPPING-NEXT: %37 = "neura.data_mov"(%6) : (i32) -> i32 +// MAPPING-NEXT: "neura.return"(%37) : (i32) -> () +// MAPPING-NEXT: ^bb5(%38: i64): // 2 preds: ^bb3, ^bb5 +// MAPPING-NEXT: %39 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %40 = "neura.data_mov"(%38) : (i64) -> i64 +// MAPPING-NEXT: %41 = "neura.gep"(%39, %40) <{operandSegmentSizes = array}> {operand_1_value = 0 : i32} : (!llvm.ptr, i64) -> !llvm.ptr +// MAPPING-NEXT: %42 = "neura.data_mov"(%41) : (!llvm.ptr) -> !llvm.ptr +// MAPPING-NEXT: %43 = "neura.load"(%42) : (!llvm.ptr) -> i32 +// MAPPING-NEXT: %44 = "neura.data_mov"(%38) : (i64) -> i64 +// MAPPING-NEXT: %45 = "neura.cast"(%44) <{cast_type = "trunc"}> : (i64) -> i32 +// MAPPING-NEXT: %46 = llvm.call tail @printf(%0, %45, %43) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, i32, i32) -> i32 +// MAPPING-NEXT: %47 = "neura.data_mov"(%38) : (i64) -> i64 +// MAPPING-NEXT: %48 = "neura.add"(%47) {rhs_value = 1 : i64} : (i64) -> i64 +// MAPPING-NEXT: %49 = "neura.data_mov"(%48) : (i64) -> i64 +// MAPPING-NEXT: %50 = "neura.icmp"(%49) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 +// MAPPING-NEXT: %51 = "neura.data_mov"(%50) : (i1) -> i1 +// MAPPING-NEXT: %52 = "neura.data_mov"(%48) : (i64) -> i64 +// MAPPING-NEXT: neura.cond_br %51 : i1 then to ^bb4 else %52 : i64 to ^bb5 +// MAPPING-NEXT: } +// MAPPING-NEXT: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 5 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {dfg_id = 9 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {dfg_id = 11 : i32, lhs_value = "%arg0", mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.data_mov"(%5) {dfg_id = 14 : i32, mapping_locs = [{id = 640 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %7 = "neura.load"(%6) {dfg_id = 16 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.data_mov"(%7) {dfg_id = 19 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {dfg_id = 21 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %10 = "neura.data_mov"(%3) {dfg_id = 8 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 448 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 448 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 448 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.data_mov"(%9) {dfg_id = 24 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = neura.grant_predicate %10, %11 {dfg_id = 27 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%7) {dfg_id = 18 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 44 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 960 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 960 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.data_mov"(%9) {dfg_id = 23 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 961 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 961 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %15 = neura.grant_predicate %13, %14 {dfg_id = 26 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 3 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%12) {dfg_id = 30 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 31 : i32, lhs_value = "%arg1", mapping_locs = [{id = 7 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%17) {dfg_id = 33 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.load"(%18) {dfg_id = 34 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 35 : i32, mapping_locs = [{id = 449 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 449 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.data_mov"(%15) {dfg_id = 29 : i32, mapping_locs = [{id = 47 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 36 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.add"(%20, %21) {dfg_id = 36 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.data_mov"(%22) {dfg_id = 37 : i32, mapping_locs = [{id = 21 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%17) {dfg_id = 32 : i32, mapping_locs = [{id = 21 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}, {id = 384 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}, {id = 384 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}, {id = 384 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%23, %24) {dfg_id = 38 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) {dfg_id = 7 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%25) {dfg_id = 10 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) {dfg_id = 13 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {dfg_id = 15 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%28) {dfg_id = 17 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.not"(%29) {dfg_id = 20 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%26) {dfg_id = 12 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 640 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 640 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {dfg_id = 22 : i32, mapping_locs = [{id = 641 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 {dfg_id = 25 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 {dfg_id = 28 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %34 = "neura.grant_once"() <{constant_value = true}> {dfg_id = 2 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 3 : i32, y = 1 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %35 = "neura.data_mov"(%34) {dfg_id = 4 : i32, mapping_locs = [{id = 22 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.return"(%35) {dfg_id = 6 : i32, mapping_locs = [{id = 3 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 0 : i32}]} : (!neura.data) -> () +// MAPPING-NEXT: } +// MAPPING-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} +// MAPPING-NEXT: } \ No newline at end of file diff --git a/test/neura/fusion/test.mlir b/test/neura/fusion/test.mlir index f3b1dc32..68abb040 100644 --- a/test/neura/fusion/test.mlir +++ b/test/neura/fusion/test.mlir @@ -46,27 +46,27 @@ // RUN: --iter-merge-pattern="min-support=3 max-iter=4" %t-kernel.mlir \ // RUN: | FileCheck %s --check-prefix=CHECK-ITER-MERGE-PATTERN -// CHECK-ITER-MERGE-PATTERN: %11:2 = "neura.fused_op"(%10) <{frequency = 4 : i64, pattern_id = 9 : i64, pattern_name = "grant_once->phi_start"}> ({ +// CHECK-ITER-MERGE-PATTERN: %11:2 = "neura.fused_op"(%10) <{frequency = 4 : i64, pattern_id = 9 : i64, pattern_name = "grant_once->phi_start"}> ({ // CHECK-ITER-MERGE-PATTERN-NEXT: ^bb0(%arg5: !neura.data): -// CHECK-ITER-MERGE-PATTERN-NEXT: %61 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: %62 = neura.phi_start %61, %arg5 : !neura.data, !neura.data -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %61, %62 : !neura.data, !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %62 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %63 = neura.phi_start %62, %arg5 : !neura.data, !neura.data -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %62, %63 : !neura.data, !neura.data // CHECK-ITER-MERGE-PATTERN-NEXT: }) : (!neura.data) -> (!neura.data, !neura.data) // CHECK-ITER-MERGE-PATTERN: %15:3 = "neura.fused_op"(%11#0, %14, %4, %13) <{frequency = 3 : i64, pattern_id = 6 : i64, pattern_name = "phi_start->fused_op:phi_start->fused_op:gep->load"}> ({ -// CHECK-ITER-MERGE-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data, %arg7: !neura.data, %arg8: !neura.data): -// CHECK-ITER-MERGE-PATTERN-NEXT: %61 = neura.phi_start %arg5, %arg6 : !neura.data, !neura.data -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: %62 = neura.phi_start %arg7, %arg8 : !neura.data, !neura.data -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: %63 = "neura.gep"(%62, %61) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: %64 = "neura.load"(%63) : (!neura.data) -> !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %61, %62, %64 : !neura.data, !neura.data, !neura.data -// CHECK-ITER-MERGE-PATTERN-NEXT: }) : (!neura.data, !neura.data, !neura.data, !neura.data) -> (!neura.data, !neura.data, !neura.data) - // CHECK-ITER-MERGE-PATTERN: %16:3 = "neura.fused_op"(%2, %12, %15#0) <{frequency = 8 : i64, pattern_id = 10 : i64, pattern_name = "phi_start->fused_op:gep->load"}> ({ - // CHECK-ITER-MERGE-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data, %arg7: !neura.data): - // CHECK-ITER-MERGE-PATTERN-NEXT: %61 = neura.phi_start %arg5, %arg6 : !neura.data, !neura.data -> !neura.data - // CHECK-ITER-MERGE-PATTERN-NEXT: %62 = "neura.gep"(%61, %arg7) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data - // CHECK-ITER-MERGE-PATTERN-NEXT: %63 = "neura.load"(%62) : (!neura.data) -> !neura.data - // CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %61, %62, %63 : !neura.data, !neura.data, !neura.data - // CHECK-ITER-MERGE-PATTERN-NEXT: }) : (!neura.data, !neura.data, !neura.data) -> (!neura.data, !neura.data, !neura.data) +// CHECK-ITER-MERGE-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data, %arg7: !neura.data, %arg8: !neura.data): +// CHECK-ITER-MERGE-PATTERN-NEXT: %62 = neura.phi_start %arg5, %arg6 : !neura.data, !neura.data -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %63 = neura.phi_start %arg7, %arg8 : !neura.data, !neura.data -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %64 = "neura.gep"(%63, %62) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %65 = "neura.load"(%64) : (!neura.data) -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %62, %63, %65 : !neura.data, !neura.data, !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: }) : (!neura.data, !neura.data, !neura.data, !neura.data) -> (!neura.data, !neura.data, !neura.data) +// CHECK-ITER-MERGE-PATTERN: %16:3 = "neura.fused_op"(%2, %12, %15#0) <{frequency = 8 : i64, pattern_id = 10 : i64, pattern_name = "phi_start->fused_op:gep->load"}> ({ +// CHECK-ITER-MERGE-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data, %arg7: !neura.data): +// CHECK-ITER-MERGE-PATTERN-NEXT: %62 = neura.phi_start %arg5, %arg6 : !neura.data, !neura.data -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %63 = "neura.gep"(%62, %arg7) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: %64 = "neura.load"(%63) : (!neura.data) -> !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: neura.yield %62, %63, %64 : !neura.data, !neura.data, !neura.data +// CHECK-ITER-MERGE-PATTERN-NEXT: }) : (!neura.data, !neura.data, !neura.data) -> (!neura.data, !neura.data, !neura.data) // RUN: mlir-neura-opt --architecture-spec=%S/../../arch_spec/architecture.yaml --verify-each=true --mlir-print-ir-after-failure \ // RUN: --assign-accelerator \ @@ -80,17 +80,17 @@ // RUN: --fold-constant \ // RUN: --init-pattern %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-INIT-PATTERN -// CHECK-INIT-PATTERN: %21:2 = "neura.fused_op"(%16, %20) <{frequency = 6 : i64, pattern_id = 2 : i64, pattern_name = "gep->load"}> ({ +// CHECK-INIT-PATTERN: %43:2 = "neura.fused_op"(%38, %30, %42) <{frequency = 6 : i64, pattern_id = 2 : i64, pattern_name = "gep->load"}> ({ +// CHECK-INIT-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data, %arg7: !neura.data): +// CHECK-INIT-PATTERN-NEXT: %75 = "neura.gep"(%arg5, %arg6, %arg7) <{operandSegmentSizes = array}> : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CHECK-INIT-PATTERN-NEXT: %76 = "neura.load"(%75) : (!neura.data) -> !neura.data +// CHECK-INIT-PATTERN-NEXT: neura.yield %75, %76 : !neura.data, !neura.data +// CHECK-INIT-PATTERN-NEXT: }) : (!neura.data, !neura.data, !neura.data) -> (!neura.data, !neura.data) +// CHECK-INIT-PATTERN-NEXT: %44 = "neura.fused_op"(%36, %42) <{frequency = 6 : i64, pattern_id = 2 : i64, pattern_name = "gep->load"}> ({ // CHECK-INIT-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data): -// CHECK-INIT-PATTERN-NEXT: %74 = "neura.gep"(%arg5, %arg6) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data -// CHECK-INIT-PATTERN-NEXT: %75 = "neura.load"(%74) : (!neura.data) -> !neura.data -// CHECK-INIT-PATTERN-NEXT: neura.yield %74, %75 : !neura.data, !neura.data -// CHECK-INIT-PATTERN-NEXT: }) : (!neura.data, !neura.data) -> (!neura.data, !neura.data) -// CHECK-INIT-PATTERN-NEXT: %22 = "neura.fused_op"(%18, %20) <{frequency = 6 : i64, pattern_id = 2 : i64, pattern_name = "gep->load"}> ({ -// CHECK-INIT-PATTERN-NEXT: ^bb0(%arg5: !neura.data, %arg6: !neura.data): -// CHECK-INIT-PATTERN-NEXT: %74 = "neura.gep"(%arg5, %arg6) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data -// CHECK-INIT-PATTERN-NEXT: %75 = "neura.load"(%74) : (!neura.data) -> !neura.data -// CHECK-INIT-PATTERN-NEXT: neura.yield %75 : !neura.data +// CHECK-INIT-PATTERN-NEXT: %75 = "neura.gep"(%arg5, %arg6) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-INIT-PATTERN-NEXT: %76 = "neura.load"(%75) : (!neura.data) -> !neura.data +// CHECK-INIT-PATTERN-NEXT: neura.yield %76 : !neura.data // CHECK-INIT-PATTERN-NEXT: }) : (!neura.data, !neura.data) -> !neura.data // RUN: mlir-neura-opt --architecture-spec=%S/../../arch_spec/architecture.yaml --verify-each=true --mlir-print-ir-after-failure \ diff --git a/test/neura/steer_ctrl/loop_without_return_value.mlir b/test/neura/steer_ctrl/loop_without_return_value.mlir index b20ceb53..1a2caf02 100644 --- a/test/neura/steer_ctrl/loop_without_return_value.mlir +++ b/test/neura/steer_ctrl/loop_without_return_value.mlir @@ -28,29 +28,33 @@ module attributes {} { } } -// CHECK: func.func @_Z11simple_loopPiS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "steering", llvm.linkage = #llvm.linkage} { -// CHECK-NEXT: %0 = neura.reserve : i64 -// CHECK-NEXT: %1 = neura.reserve : i1 -// CHECK-NEXT: %2 = "neura.constant"() <{value = "%arg0"}> : () -> memref -// CHECK-NEXT: %3 = "neura.constant"() <{value = "%arg1"}> : () -> memref -// CHECK-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64 -// CHECK-NEXT: %5 = "neura.constant"() <{value = 128 : i64}> : () -> i64 -// CHECK-NEXT: %6 = "neura.constant"() <{value = 1 : i32}> : () -> i32 -// CHECK-NEXT: %7 = "neura.constant"() <{value = 2 : i32}> : () -> i32 -// CHECK-NEXT: %8 = "neura.constant"() <{value = 0 : i64}> : () -> i64 -// CHECK-NEXT: %9 = neura.invariant %4, %1 : i64, i1 -> i64 -// CHECK-NEXT: %10 = neura.invariant %3, %1 : memref, i1 -> memref -// CHECK-NEXT: %11 = neura.invariant %6, %1 : i32, i1 -> i32 -// CHECK-NEXT: %12 = neura.invariant %7, %1 : i32, i1 -> i32 -// CHECK-NEXT: %13 = neura.invariant %2, %1 : memref, i1 -> memref -// CHECK-NEXT: %14 = neura.invariant %5, %1 : i64, i1 -> i64 -// CHECK-NEXT: %15 = neura.carry %8, %1, %0 : i64, i1, i64 -> i64 -// CHECK-NEXT: %16 = "neura.icmp"(%15, %14) <{cmpType = "slt"}> : (i64, i64) -> i1 -// CHECK-NEXT: neura.ctrl_mov %16 -> %1 : i1 i1 -// CHECK-NEXT: %17 = neura.load_indexed %13[%15 : i64] memref : i32 -// CHECK-NEXT: %18 = "neura.mul"(%17, %12) : (i32, i32) -> i32 -// CHECK-NEXT: %19 = "neura.add"(%18, %11) : (i32, i32) -> i32 -// CHECK-NEXT: neura.store_indexed %19 to %10[%15 : i64] memref : i32 -// CHECK-NEXT: %20 = "neura.add"(%15, %9) : (i64, i64) -> i64 -// CHECK-NEXT: neura.ctrl_mov %20 -> %0 : i64 i64 -// CHECK-NEXT: "neura.return"() : () -> () +// CHECK: module { +// CHECK-NEXT: func.func @_Z11simple_loopPiS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "steering", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = neura.reserve : i64 +// CHECK-NEXT: %1 = neura.reserve : i1 +// CHECK-NEXT: %2 = "neura.constant"() <{value = "%arg0"}> : () -> memref +// CHECK-NEXT: %3 = "neura.constant"() <{value = "%arg1"}> : () -> memref +// CHECK-NEXT: %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 128 : i64}> : () -> i64 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 1 : i32}> : () -> i32 +// CHECK-NEXT: %7 = "neura.constant"() <{value = 2 : i32}> : () -> i32 +// CHECK-NEXT: %8 = "neura.constant"() <{value = 0 : i64}> : () -> i64 +// CHECK-NEXT: %9 = neura.invariant %4, %1 : i64, i1 -> i64 +// CHECK-NEXT: %10 = neura.invariant %3, %1 : memref, i1 -> memref +// CHECK-NEXT: %11 = neura.invariant %6, %1 : i32, i1 -> i32 +// CHECK-NEXT: %12 = neura.invariant %7, %1 : i32, i1 -> i32 +// CHECK-NEXT: %13 = neura.invariant %2, %1 : memref, i1 -> memref +// CHECK-NEXT: %14 = neura.invariant %5, %1 : i64, i1 -> i64 +// CHECK-NEXT: %15 = neura.carry %8, %1, %0 : i64, i1, i64 -> i64 +// CHECK-NEXT: %16 = "neura.icmp"(%15, %14) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.ctrl_mov %16 -> %1 : i1 i1 +// CHECK-NEXT: %17 = neura.load_indexed %13[%15 : i64] memref : i32 +// CHECK-NEXT: %18 = "neura.mul"(%17, %12) : (i32, i32) -> i32 +// CHECK-NEXT: %19 = "neura.add"(%18, %11) : (i32, i32) -> i32 +// CHECK-NEXT: neura.store_indexed %19 to %10[%15 : i64] memref : i32 +// CHECK-NEXT: %20 = "neura.add"(%15, %9) : (i64, i64) -> i64 +// CHECK-NEXT: neura.ctrl_mov %20 -> %0 : i64 i64 +// CHECK-NEXT: %21 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: "neura.return"(%21) : (i1) -> () +// CHECK-NEXT: } +// CHECK-NEXT: } From fb09c7cf099a8586afcf5e9231f7fcb357d36f05 Mon Sep 17 00:00:00 2001 From: Shiran Guo Date: Sun, 28 Dec 2025 12:05:46 +0800 Subject: [PATCH 2/2] Fix ReturnOp exit predicate logic and clean up Neura test outputs (addresses issue #209) --- .../TransformCtrlToDataFlowPass.cpp | 6 +- test/neura/for_loop/relu_test.mlir | 376 +----------------- 2 files changed, 4 insertions(+), 378 deletions(-) diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index 55f2db67..2289e981 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -608,10 +608,10 @@ void injectExitPredicateForReturn(Region ®ion, ControlFlowInfo &ctrl_info, OpBuilder &builder) { Block *entry_block = ®ion.front(); - // Find the ReturnOp + // Finds the ReturnOp. neura::ReturnOp return_op = nullptr; for (Operation &op : *entry_block) { - if (auto rt =dyn_cast(op)) { + if (auto rt = dyn_cast(op)) { return_op = rt; llvm::errs() << "[ctrl2data] ReturnOp found: " << *rt << "\n"; break; @@ -753,7 +753,7 @@ struct TransformCtrlToDataFlowPass buildControlFlowInfo(*region, ctrlInfo, domInfo); transformControlFlowToDataFlow(*region, ctrlInfo, domInfo, builder); - // Inject exit predicate for void returns + // Injects exit predicate for void returns. injectExitPredicateForReturn(*region, ctrlInfo, builder); // Converts phi operations to phi_start operations. diff --git a/test/neura/for_loop/relu_test.mlir b/test/neura/for_loop/relu_test.mlir index 2c00cb6a..3fc3c520 100644 --- a/test/neura/for_loop/relu_test.mlir +++ b/test/neura/for_loop/relu_test.mlir @@ -220,378 +220,4 @@ // CTRL2DATA-NEXT: } -// MAPPING: [DEBUG] Recurrence cycle (length 3): -// MAPPING-NEXT: %1 = neura.reserve : !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data -// MAPPING-NEXT: [DEBUG] Recurrence cycle (length 5): -// MAPPING-NEXT: %1 = neura.reserve : !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Longest recurrence cycle (length 5): -// MAPPING-NEXT: %1 = neura.reserve : !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %1 = neura.reserve : !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: "neura.return"(%35) : (!neura.data) -> () -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] Topologically sorted op: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 0: 3 ops -// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data -// MAPPING-NEXT: %1 = neura.reserve : !neura.data -// MAPPING-NEXT: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 1: 3 ops -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 2: 5 ops -// MAPPING-NEXT: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 3: 4 ops -// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 4: 5 ops -// MAPPING-NEXT: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 5: 4 ops -// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data -// MAPPING-NEXT: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 6: 4 ops -// MAPPING-NEXT: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 7: 4 ops -// MAPPING-NEXT: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 8: 5 ops -// MAPPING-NEXT: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data -// MAPPING-NEXT: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data -// MAPPING-NEXT: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP Bucket Level 9: 2 ops -// MAPPING-NEXT: "neura.return"(%35) : (!neura.data) -> () -// MAPPING-NEXT: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data (ALAP level: 0) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %1 = neura.reserve : !neura.data (ALAP level: 0) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %2 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data (ALAP level: 0) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data (ALAP level: 1) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %25 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 1) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 1) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data (ALAP level: 2) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data (ALAP level: 2) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %31 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data (ALAP level: 2) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data (ALAP level: 2) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %6 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data (ALAP level: 2) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data (ALAP level: 3) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data (ALAP level: 3) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %29 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data (ALAP level: 3) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %8 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data (ALAP level: 3) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %10 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data (ALAP level: 4) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data (ALAP level: 4) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data (ALAP level: 4) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %32 = "neura.data_mov"(%30) : (!neura.data) -> !neura.data (ALAP level: 4) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %11 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data (ALAP level: 4) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data (ALAP level: 5) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data (ALAP level: 5) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: neura.ctrl_mov %33 -> %1 : !neura.data !neura.data (ALAP level: 5) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %16 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data (ALAP level: 5) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %13 = "neura.data_mov"(%7) : (!neura.data) -> !neura.data (ALAP level: 6) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %14 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data (ALAP level: 6) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data (ALAP level: 6) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %18 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data (ALAP level: 6) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data (ALAP level: 7) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %21 = "neura.data_mov"(%15) : (!neura.data) -> !neura.data (ALAP level: 7) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data (ALAP level: 7) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %20 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data (ALAP level: 7) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data (ALAP level: 8) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data (ALAP level: 8) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data (ALAP level: 8) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data (ALAP level: 8) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: %23 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data (ALAP level: 8) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: "neura.return"(%35) : (!neura.data) -> () (ALAP level: 9) -// MAPPING-NEXT: [MapToAcceleratorPass] ALAP sorted op: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () (ALAP level: 9) -// MAPPING-NEXT: --------------------------------------------------------- -// MAPPING-NEXT: [HeuristicMapping] Starting mapping with 39 operations. -// MAPPING-NEXT: Configuration: MAX Backtrack Depth = 3, MAX Candidate Locations = 5 -// MAPPING-NEXT: [HeuristicMapping] Filtered 22 non-materialized operations, 17 operations require physical mapping. -// MAPPING-NEXT: [HeuristicMapping] Materialized operations list: -// MAPPING-NEXT: 0 %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data (level: 0) -// MAPPING-NEXT: 1 %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data (level: 1) -// MAPPING-NEXT: 2 %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data (level: 2) -// MAPPING-NEXT: 3 %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data (level: 2) -// MAPPING-NEXT: 4 %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data (level: 3) -// MAPPING-NEXT: 5 %7 = "neura.load"(%6) : (!neura.data) -> !neura.data (level: 3) -// MAPPING-NEXT: 6 %30 = "neura.not"(%29) : (!neura.data) -> !neura.data (level: 4) -// MAPPING-NEXT: 7 %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data (level: 4) -// MAPPING-NEXT: 8 %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data (level: 5) -// MAPPING-NEXT: 9 %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data (level: 5) -// MAPPING-NEXT: 10 %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data (level: 6) -// MAPPING-NEXT: 11 %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data (level: 7) -// MAPPING-NEXT: 12 %19 = "neura.load"(%18) : (!neura.data) -> !neura.data (level: 7) -// MAPPING-NEXT: 13 %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data (level: 8) -// MAPPING-NEXT: 14 %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data (level: 8) -// MAPPING-NEXT: 15 "neura.return"(%35) : (!neura.data) -> () (level: 9) -// MAPPING-NEXT: 16 "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () (level: 9) -// MAPPING-NEXT: [HeuristicMapping] Found 80 candidate locations for operation: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=0 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 54 candidate locations for operation: %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=1 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=0 to Tile#11 @t=1 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %3 = neura.phi_start %2, %1 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 53 candidate locations for operation: %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=2 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#11 @t=2 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %26 = "neura.add"(%25) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 52 candidate locations for operation: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=2 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#10 @t=2 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 51 candidate locations for operation: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=3 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=2 to Tile#11 @t=3 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #704 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 58 candidate locations for operation: %7 = "neura.load"(%6) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=3 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=2 to Tile#10 @t=3 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #640 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %7 = "neura.load"(%6) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 49 candidate locations for operation: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#10 @t=4 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=3 to Tile#10 @t=4 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 56 candidate locations for operation: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#11 @t=4 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=3 to Tile#11 @t=4 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 1 candidate locations for operation: %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/1 at tile#10 @t=5 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=2 to Tile#10 @t=5 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=4 to Tile#10 @t=5 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #641 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=5 to Tile#11 @t=6 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %33 = neura.grant_predicate %31, %32 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 46 candidate locations for operation: %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=5 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=1 to Tile#7 @t=5 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=4 to Tile#7 @t=5 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %12 = neura.grant_predicate %10, %11 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 45 candidate locations for operation: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=6 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=5 to Tile#7 @t=6 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #448 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1"} : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 64 candidate locations for operation: %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#15 @t=7 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#10 @t=3 to Tile#15 @t=7 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#11 @t=4 to Tile#15 @t=7 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %15 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 45 candidate locations for operation: %19 = "neura.load"(%18) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=7 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=6 to Tile#7 @t=7 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #448 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %19 = "neura.load"(%18) : (!neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 67 candidate locations for operation: %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=8 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %34 = "neura.grant_once"() <{constant_value = true}> : () -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 28 candidate locations for operation: %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#7 @t=9 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=7 to Tile#7 @t=9 -// MAPPING-NEXT: [tryRouteDataMove] Successfully routed on same tile using Register #449 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#15 @t=7 to Tile#7 @t=9 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation %22 = "neura.add"(%20, %21) : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: [HeuristicMapping] Found 40 candidate locations for operation: "neura.return"(%35) : (!neura.data) -> () -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#3 @t=9 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=8 to Tile#3 @t=9 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation "neura.return"(%35) : (!neura.data) -> () -// MAPPING-NEXT: [HeuristicMapping] Found 40 candidate locations for operation: "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: [HeuristicMapping] Trying candidate 1/5 at tile#6 @t=10 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=9 to Tile#6 @t=10 -// MAPPING-NEXT: [tryRouteDataMove] Routing from Tile#7 @t=6 to Tile#6 @t=10 -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped operation "neura.store"(%23, %24) : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: [HeuristicMapping] Successfully mapped all 17 operations. -// MAPPING-NEXT: module attributes {{.*}} -// MAPPING-NEXT: llvm.mlir.global external local_unnamed_addr @input(dense<[1, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31]> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> -// MAPPING-NEXT: llvm.mlir.global external local_unnamed_addr @output(dense<0> : tensor<32xi32>) {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<32 x i32> -// MAPPING-NEXT: llvm.mlir.global private unnamed_addr constant @".str"("output[%d] = %d\0A\00") {addr_space = 0 : i32, alignment = 1 : i64, dso_local} -// MAPPING-NEXT: llvm.func local_unnamed_addr @main() -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} { -// MAPPING-NEXT: %0 = llvm.mlir.addressof @".str" : !llvm.ptr -// MAPPING-NEXT: %1 = llvm.mlir.addressof @input : !llvm.ptr -// MAPPING-NEXT: %2 = llvm.mlir.addressof @output : !llvm.ptr -// MAPPING-NEXT: %3 = "neura.constant"() <{value = 0 : i8}> : () -> i8 -// MAPPING-NEXT: %4 = "neura.constant"() <{value = 128 : i64}> : () -> i64 -// MAPPING-NEXT: %5 = "neura.constant"() <{value = 0 : i64}> : () -> i64 -// MAPPING-NEXT: %6 = "neura.constant"() <{value = 0 : i32}> : () -> i32 -// MAPPING-NEXT: %7 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %8 = "neura.data_mov"(%3) : (i8) -> i8 -// MAPPING-NEXT: %9 = "neura.data_mov"(%4) : (i64) -> i64 -// MAPPING-NEXT: "neura.memset"(%7, %8, %9) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> () -// MAPPING-NEXT: %10 = "neura.data_mov"(%5) : (i64) -> i64 -// MAPPING-NEXT: neura.br %10 : i64 to ^bb1 -// MAPPING-NEXT: ^bb1(%11: i64): // 2 preds: ^bb0, ^bb3 -// MAPPING-NEXT: %12 = "neura.data_mov"(%1) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %13 = "neura.data_mov"(%11) : (i64) -> i64 -// MAPPING-NEXT: %14 = "neura.gep"(%12, %13) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr -// MAPPING-NEXT: %15 = "neura.data_mov"(%14) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %16 = "neura.load"(%15) : (!llvm.ptr) -> i32 -// MAPPING-NEXT: %17 = "neura.data_mov"(%16) : (i32) -> i32 -// MAPPING-NEXT: %18 = "neura.icmp"(%17) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1 -// MAPPING-NEXT: %19 = "neura.data_mov"(%18) : (i1) -> i1 -// MAPPING-NEXT: neura.cond_br %19 : i1 then to ^bb2 else to ^bb3 -// MAPPING-NEXT: ^bb2: // pred: ^bb1 -// MAPPING-NEXT: %20 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %21 = "neura.data_mov"(%11) : (i64) -> i64 -// MAPPING-NEXT: %22 = "neura.gep"(%20, %21) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr -// MAPPING-NEXT: %23 = "neura.data_mov"(%22) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %24 = "neura.load"(%23) : (!llvm.ptr) -> i32 -// MAPPING-NEXT: %25 = "neura.data_mov"(%24) : (i32) -> i32 -// MAPPING-NEXT: %26 = "neura.data_mov"(%16) : (i32) -> i32 -// MAPPING-NEXT: %27 = "neura.add"(%25, %26) : (i32, i32) -> i32 -// MAPPING-NEXT: %28 = "neura.data_mov"(%27) : (i32) -> i32 -// MAPPING-NEXT: %29 = "neura.data_mov"(%22) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: "neura.store"(%28, %29) : (i32, !llvm.ptr) -> () -// MAPPING-NEXT: neura.br to ^bb3 -// MAPPING-NEXT: ^bb3: // 2 preds: ^bb1, ^bb2 -// MAPPING-NEXT: %30 = "neura.data_mov"(%11) : (i64) -> i64 -// MAPPING-NEXT: %31 = "neura.add"(%30) {rhs_value = 1 : i64} : (i64) -> i64 -// MAPPING-NEXT: %32 = "neura.data_mov"(%31) : (i64) -> i64 -// MAPPING-NEXT: %33 = "neura.icmp"(%32) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 -// MAPPING-NEXT: %34 = "neura.data_mov"(%33) : (i1) -> i1 -// MAPPING-NEXT: %35 = "neura.data_mov"(%5) : (i64) -> i64 -// MAPPING-NEXT: %36 = "neura.data_mov"(%31) : (i64) -> i64 -// MAPPING-NEXT: neura.cond_br %34 : i1 then %35 : i64 to ^bb5 else %36 : i64 to ^bb1 -// MAPPING-NEXT: ^bb4: // pred: ^bb5 -// MAPPING-NEXT: %37 = "neura.data_mov"(%6) : (i32) -> i32 -// MAPPING-NEXT: "neura.return"(%37) : (i32) -> () -// MAPPING-NEXT: ^bb5(%38: i64): // 2 preds: ^bb3, ^bb5 -// MAPPING-NEXT: %39 = "neura.data_mov"(%2) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %40 = "neura.data_mov"(%38) : (i64) -> i64 -// MAPPING-NEXT: %41 = "neura.gep"(%39, %40) <{operandSegmentSizes = array}> {operand_1_value = 0 : i32} : (!llvm.ptr, i64) -> !llvm.ptr -// MAPPING-NEXT: %42 = "neura.data_mov"(%41) : (!llvm.ptr) -> !llvm.ptr -// MAPPING-NEXT: %43 = "neura.load"(%42) : (!llvm.ptr) -> i32 -// MAPPING-NEXT: %44 = "neura.data_mov"(%38) : (i64) -> i64 -// MAPPING-NEXT: %45 = "neura.cast"(%44) <{cast_type = "trunc"}> : (i64) -> i32 -// MAPPING-NEXT: %46 = llvm.call tail @printf(%0, %45, %43) vararg(!llvm.func) {no_unwind} : (!llvm.ptr, i32, i32) -> i32 -// MAPPING-NEXT: %47 = "neura.data_mov"(%38) : (i64) -> i64 -// MAPPING-NEXT: %48 = "neura.add"(%47) {rhs_value = 1 : i64} : (i64) -> i64 -// MAPPING-NEXT: %49 = "neura.data_mov"(%48) : (i64) -> i64 -// MAPPING-NEXT: %50 = "neura.icmp"(%49) <{cmpType = "eq"}> {rhs_value = 32 : i64} : (i64) -> i1 -// MAPPING-NEXT: %51 = "neura.data_mov"(%50) : (i1) -> i1 -// MAPPING-NEXT: %52 = "neura.data_mov"(%48) : (i64) -> i64 -// MAPPING-NEXT: neura.cond_br %51 : i1 then to ^bb4 else %52 : i64 to ^bb5 -// MAPPING-NEXT: } -// MAPPING-NEXT: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { -// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data -// MAPPING-NEXT: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data -// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %3 = neura.phi_start %2, %1 {dfg_id = 5 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {dfg_id = 9 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {dfg_id = 11 : i32, lhs_value = "%arg0", mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %6 = "neura.data_mov"(%5) {dfg_id = 14 : i32, mapping_locs = [{id = 640 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %7 = "neura.load"(%6) {dfg_id = 16 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %8 = "neura.data_mov"(%7) {dfg_id = 19 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {dfg_id = 21 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %10 = "neura.data_mov"(%3) {dfg_id = 8 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 448 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 448 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 448 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %11 = "neura.data_mov"(%9) {dfg_id = 24 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %12 = neura.grant_predicate %10, %11 {dfg_id = 27 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %13 = "neura.data_mov"(%7) {dfg_id = 18 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 44 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 960 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 960 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %14 = "neura.data_mov"(%9) {dfg_id = 23 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 961 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 961 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %15 = neura.grant_predicate %13, %14 {dfg_id = 26 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 3 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: %16 = "neura.data_mov"(%12) {dfg_id = 30 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 31 : i32, lhs_value = "%arg1", mapping_locs = [{id = 7 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %18 = "neura.data_mov"(%17) {dfg_id = 33 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %19 = "neura.load"(%18) {dfg_id = 34 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 35 : i32, mapping_locs = [{id = 449 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 449 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %21 = "neura.data_mov"(%15) {dfg_id = 29 : i32, mapping_locs = [{id = 47 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 36 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %22 = "neura.add"(%20, %21) {dfg_id = 36 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> !neura.data -// MAPPING-NEXT: %23 = "neura.data_mov"(%22) {dfg_id = 37 : i32, mapping_locs = [{id = 21 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %24 = "neura.data_mov"(%17) {dfg_id = 32 : i32, mapping_locs = [{id = 21 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}, {id = 384 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}, {id = 384 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}, {id = 384 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: "neura.store"(%23, %24) {dfg_id = 38 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> () -// MAPPING-NEXT: %25 = "neura.data_mov"(%3) {dfg_id = 7 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %26 = "neura.add"(%25) {dfg_id = 10 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %27 = "neura.data_mov"(%26) {dfg_id = 13 : i32, mapping_locs = [{id = 704 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {dfg_id = 15 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %29 = "neura.data_mov"(%28) {dfg_id = 17 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %30 = "neura.not"(%29) {dfg_id = 20 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %31 = "neura.data_mov"(%26) {dfg_id = 12 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 640 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 640 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {dfg_id = 22 : i32, mapping_locs = [{id = 641 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 {dfg_id = 25 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 {dfg_id = 28 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data -// MAPPING-NEXT: %34 = "neura.grant_once"() <{constant_value = true}> {dfg_id = 2 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 3 : i32, y = 1 : i32}]} : () -> !neura.data -// MAPPING-NEXT: %35 = "neura.data_mov"(%34) {dfg_id = 4 : i32, mapping_locs = [{id = 22 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING-NEXT: "neura.return"(%35) {dfg_id = 6 : i32, mapping_locs = [{id = 3 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 3 : i32, y = 0 : i32}]} : (!neura.data) -> () -// MAPPING-NEXT: } -// MAPPING-NEXT: llvm.func local_unnamed_addr @printf(!llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, ...) -> (i32 {llvm.noundef}) attributes {no_unwind, passthrough = ["nofree", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} -// MAPPING-NEXT: } \ No newline at end of file +// MAPPING: func.func @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { \ No newline at end of file