Skip to content
Closed
80 changes: 80 additions & 0 deletions include/Conversion/AffineToNeura/LoopNestAnalysis.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//===- LoopNestAnalysis.h - Analyze affine loop nests ----------*- C++ -*-===//
//
// Loop nest analysis for affine loops.
//
// Features:
// 1. Build loop hierarchy tree (parent-child relationships, nesting depth)
// 2. Identify perfect vs imperfect nesting
// 3. Support valid signal reuse optimization for nested loops
//
//===----------------------------------------------------------------------===//
#ifndef CONVERSION_AFFINE_TO_NEURA_LOOP_NEST_ANALYSIS_H
#define CONVERSION_AFFINE_TO_NEURA_LOOP_NEST_ANALYSIS_H

#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Operation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include <memory>

namespace mlir {
namespace neura {

/// Loop information structure - Stores all analysis information for a single loop.
struct LoopInfo {
affine::AffineForOp loop; // The loop operation itself.
LoopInfo *parent = nullptr; // Parent loop (nullptr if top-level).
llvm::SmallVector<LoopInfo *, 4> children; // Child loops list.
unsigned depth = 0; // Nesting depth (0=top-level).
bool isPerfectNest = true; // Whether it is a perfect nest.

// Operations list for imperfect nesting.
llvm::SmallVector<Operation *, 4> operationsBeforeChild; // Operations before child loops.
llvm::SmallVector<Operation *, 4> operationsAfterChild; // Operations after child loops.

LoopInfo(affine::AffineForOp loop) : loop(loop) {}
};

/// Loop nest analysis class.
///
/// Purpose: Provides loop hierarchy information for AffineToNeura pass to support optimization decisions.
///
/// Usage example:
/// LoopNestAnalysis analysis(func_op);
/// analysis.dump(); // Prints analysis results.
/// LoopInfo *info = analysis.getLoopInfo(loop);
/// if (info && info->parent) {
/// // This is a nested loop, can reuse parent's valid signal.
/// }
class LoopNestAnalysis {
public:
/// Constructor - Performs loop nest analysis on the given function.
explicit LoopNestAnalysis(func::FuncOp func);

/// Query interfaces.
LoopInfo *getLoopInfo(affine::AffineForOp loop) const; // Gets loop information.
llvm::ArrayRef<LoopInfo *> getTopLevelLoops() const { return topLevelLoops; } // Gets top-level loops.
llvm::ArrayRef<std::unique_ptr<LoopInfo>> getAllLoops() const { return allLoops; } // Gets all loops.
bool isPerfectNest(affine::AffineForOp loop) const; // Checks if perfect nest.
LoopInfo *getParentLoop(affine::AffineForOp loop) const; // Gets parent loop.
llvm::ArrayRef<LoopInfo *> getChildLoops(affine::AffineForOp loop) const; // Gets child loops.

/// Debug interface - Prints analysis results.
void dump() const;

private:
/// Internal analysis methods.
void buildLoopNestTree(func::FuncOp func); // Builds loop hierarchy tree.
void analyzePerfectNests(); // Analyzes perfect nest characteristics.

/// Data members.
llvm::DenseMap<Operation *, LoopInfo *> loopMap; // Loop fast lookup table.
llvm::SmallVector<std::unique_ptr<LoopInfo>, 8> allLoops; // All loops (owns ownership).
llvm::SmallVector<LoopInfo *, 4> topLevelLoops; // Top-level loop pointers list.
};

} // namespace neura
} // namespace mlir

#endif
1 change: 1 addition & 0 deletions include/Conversion/ConversionPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ std::unique_ptr<mlir::Pass> createLowerArithToNeuraPass();
std::unique_ptr<mlir::Pass> createLowerLlvmToNeuraPass();
std::unique_ptr<mlir::Pass> createLowerMemRefToNeuraPass();
std::unique_ptr<mlir::Pass> createLowerBuiltinToNeuraPass();
std::unique_ptr<mlir::Pass> createLowerAffineToNeuraPass();

#define GEN_PASS_REGISTRATION
#include "Conversion/ConversionPasses.h.inc"
Expand Down
12 changes: 12 additions & 0 deletions include/Conversion/ConversionPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,16 @@ def LowerBuiltinToNeura : Pass<"lower-builtin-to-neura", "ModuleOp">{
let constructor = "mlir::createLowerBuiltinToNeuraPass()";
}

def LowerAffineToNeura : Pass<"lower-affine-to-neura", "func::FuncOp">{
let summary = "Lower Affine perfect nested loops to Neura loop_control operations";
let description = [{
Converts perfectly nested affine.for loops directly to Neura dialect using
loop_control operations, avoiding the need to flatten to LLVM IR first.
This preserves loop structure information for better optimization on
dataflow architectures.
}];
let constructor = "mlir::createLowerAffineToNeuraPass()";
let dependentDialects = ["mlir::neura::NeuraDialect", "mlir::affine::AffineDialect"];
}

#endif // CONVERSION_PASSES_TD
4 changes: 3 additions & 1 deletion include/NeuraDialect/Architecture/Architecture.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ enum OperationKind {
// Loop control operations.
ILoopControl = 34,
// Constant operations.
IConstant = 35
IConstant = 35,
// Steering control fused operations.
ICarryInvariant = 36, IConditionalSelect = 37, IInvariantGroup = 38
};

//===----------------------------------------------------------------------===//
Expand Down
4 changes: 4 additions & 0 deletions include/NeuraDialect/Mapping/mapping_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ OperationKind getOperationKindFromMlirOp(Operation *op);
// Returns true if the operation does not need CGRA tile placement.
bool is_non_materialized(Operation *op);

// Returns true if the operation is a steering-mode operation that doesn't
// require DataMovOp wrapping (e.g., constants, carry, invariant, etc.).
bool is_steering_unwrapped_op(Operation *op);

// Returns true if the operation is a materialized reserve user, i.e.,
// phi, invariant, carry.
bool isMaterializedReserveUser(Operation *op);
Expand Down
129 changes: 129 additions & 0 deletions include/NeuraDialect/NeuraOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -657,4 +657,133 @@ def Neura_InvariantOp : Op<NeuraDialect, "invariant">{
let arguments = (ins AnyType:$initial, AnyType:$condition);
let results = (outs AnyType:$result);
let assemblyFormat = "$initial `,` $condition attr-dict `:` type($initial) `,` type($condition) `->` type($result)";
}

// ============================================================================
// FUSED OPERATIONS FOR RECMII OPTIMIZATION
// ============================================================================

// Defines the carry_invariant fused operation.
def Neura_CarryInvariantOp : Op<NeuraDialect, "carry_invariant">{
let summary = "Fused carry and invariant operation for nested loops.";
let description = [{
Combines carry and invariant operations into a single operation to reduce RecMII.
This is optimized for nested loop patterns where an inner loop's carry result
is used as an invariant in the outer loop.

Semantics:
- If inner_condition is false (first inner iteration): return initial value
- Else if outer_condition is false (outer loop active, inner loop invariant):
return initial value from inner carry
- Else: return carried value

Replaces the pattern:
%carry_result = neura.carry %init, %inner_cond, %carried
%inv_result = neura.invariant %carry_result, %outer_cond

With:
%result = neura.carry_invariant %init, %inner_cond, %outer_cond, %carried

RecMII Impact: Reduces 2 operations to 1 operation (-50% on critical path)

Example:
%out = neura.carry_invariant %init, %inner_cond, %outer_cond, %carried
: i64, i1, i1, i64 -> i64
}];

let arguments = (ins
AnyType:$initial,
AnyType:$inner_condition,
AnyType:$outer_condition,
AnyType:$carried
);
let results = (outs AnyType:$result);

let assemblyFormat = [{
$initial `,` $inner_condition `,` $outer_condition `,` $carried attr-dict
`:` type($initial) `,` type($inner_condition) `,` type($outer_condition) `,`
type($carried) `->` type($result)
}];
}

// Defines the conditional_select fused operation.
def Neura_ConditionalSelectOp : Op<NeuraDialect, "cond_select">{
let summary = "Fused comparison and conditional selection operation.";
let description = [{
Combines comparison (icmp) and conditional selection (false_steer) into a
single atomic operation to reduce RecMII.

Semantics:
- Performs comparison: result = (lhs <predicate> rhs)
- If result is false: return value
- If result is true: return default value (typically from hardware)

Replaces the pattern:
%cond = neura.icmp %lhs, %rhs <{cmpType = "slt"}>
%result = neura.false_steer %value, %cond

With:
%result = neura.cond_select %lhs, %rhs, %value <{predicate = "slt"}>

RecMII Impact: Reduces 2 operations to 1 operation (-50% on critical path)

Supported predicates: "eq", "ne", "slt", "sle", "sgt", "sge", "ult", "ule", "ugt", "uge"

Example:
%out = neura.cond_select %a, %b, %val <{predicate = "slt"}>
: i64, i64, i64 -> i64
}];

let arguments = (ins
AnyType:$lhs,
AnyType:$rhs,
AnyType:$value,
StrAttr:$predicate
);
let results = (outs AnyType:$result);

let assemblyFormat = [{
$lhs `,` $rhs `,` $value attr-dict `:` type($lhs) `,` type($rhs) `,`
type($value) `->` type($result)
}];
}

// Defines the invariant_group batch operation.
def Neura_InvariantGroupOp : Op<NeuraDialect, "invariant_group">{
let summary = "Batch invariant extraction for multiple values.";
let description = [{
Extracts multiple invariants with the same condition in a single operation.
This is optimized for nested loops where many values need to be marked as
invariant with respect to the outer loop.

Hardware can optimize this by:
- Sharing condition checking logic
- Parallel invariant extraction
- Reduced control overhead

Replaces multiple individual invariant operations:
%inv1 = neura.invariant %val1, %cond
%inv2 = neura.invariant %val2, %cond
%inv3 = neura.invariant %val3, %cond

With a single batch operation:
%inv1, %inv2, %inv3 = neura.invariant_group %val1, %val2, %val3, %cond

ResMII Impact: Reduces N operations to 1 operation (improves resource utilization)

Example:
%out1, %out2, %out3 = neura.invariant_group %in1, %in2, %in3, %cond
: i64, i64, i64, i1 -> i64, i64, i64
}];

let arguments = (ins
Variadic<AnyType>:$inputs,
AnyType:$condition
);
let results = (outs Variadic<AnyType>:$outputs);

let assemblyFormat = [{
$inputs `,` $condition attr-dict `:` type($inputs) `,` type($condition)
`->` type($outputs)
}];
}
1 change: 1 addition & 0 deletions include/NeuraDialect/NeuraPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,5 @@ def RemovePredicatedType : Pass<"remove-predicated-type", "ModuleOp"> {
}];
let constructor = "neura::createRemovePredicatedTypePass()";
}

#endif // NEURA_PASSES_TD
Loading
Loading