diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 181c399d..98b116d3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -40,7 +40,7 @@ jobs:
       run: |
         git clone https://github.com/llvm/llvm-project.git
         cd llvm-project
-        git checkout cd70802
+        git checkout 6146a88
         mkdir build && cd build
         cmake -G Ninja ../llvm \
           -DLLVM_ENABLE_PROJECTS="mlir" \
diff --git a/include/Conversion/ConversionPasses.h b/include/Conversion/ConversionPasses.h
index 2477bb3d..36e5db18 100644
--- a/include/Conversion/ConversionPasses.h
+++ b/include/Conversion/ConversionPasses.h
@@ -19,6 +19,7 @@ namespace mlir {
 // Conversion passes.
 std::unique_ptr<mlir::Pass> createLowerArithToNeuraPass();
 std::unique_ptr<mlir::Pass> createLowerLlvmToNeuraPass();
+std::unique_ptr<mlir::Pass> createLowerAffineToNeuraPass();
 
 #define GEN_PASS_REGISTRATION
 #include "Conversion/ConversionPasses.h.inc"
diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td
index 7fca77bb..cd8eb5ab 100644
--- a/include/Conversion/ConversionPasses.td
+++ b/include/Conversion/ConversionPasses.td
@@ -8,7 +8,7 @@ include "mlir/Pass/PassBase.td"
 //=========================================================//
 // Conversion passes
 //=========================================================//
-def LowerArithToNeura : Pass<"lower-arith-to-neura", "FuncOp">{
+def LowerArithToNeura : Pass<"lower-arith-to-neura", "ModuleOp">{
   let summary = "Lower arith to Neura dialect";
   let description = [{Lower arith dialect operations to Neura dialect operations.}];
   let constructor = "mlir::createLowerArithToNeuraPass()";
@@ -20,4 +20,10 @@ def LowerLlvmToNeura : Pass<"lower-llvm-to-neura", "ModuleOp">{
   let constructor = "mlir::createLowerLlvmToNeuraPass()";
 }
 
+def LowerAffineToNeura : Pass<"lower-affine-to-neura", "ModuleOp">{
+  let summary = "Lower affine to Neura dialect";
+  let description = [{Lower affine dialect operations to Neura dialect operations.}];
+  let constructor = "mlir::createLowerAffineToNeuraPass()";
+}
+
 #endif // CONVERSION_PASSES_TD
\ No newline at end of file
diff --git a/include/NeuraDialect/CMakeLists.txt b/include/NeuraDialect/CMakeLists.txt
index 1c9b30b5..96d06740 100644
--- a/include/NeuraDialect/CMakeLists.txt
+++ b/include/NeuraDialect/CMakeLists.txt
@@ -1,10 +1,10 @@
 # Set TableGen include paths
-set(MLIR_TABLEGEN_INCLUDES 
-    ${PROJECT_SOURCE_DIR}/include
-    ${PROJECT_SOURCE_DIR}/include/NeuraDialect
-    ${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect
-    ${MLIR_MAIN_INCLUDE_DIR}
-    ${MLIR_INCLUDE_DIR})
+# set(MLIR_TABLEGEN_INCLUDES 
+#     ${PROJECT_SOURCE_DIR}/include
+#     ${PROJECT_SOURCE_DIR}/include/NeuraDialect
+#     ${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect
+#     ${MLIR_MAIN_INCLUDE_DIR}
+#     ${MLIR_INCLUDE_DIR})
 
 add_mlir_dialect(Neura neura)
 
diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td
index 628190c4..48dd0b54 100644
--- a/include/NeuraDialect/NeuraOps.td
+++ b/include/NeuraDialect/NeuraOps.td
@@ -101,6 +101,34 @@ def Neura_StoreOp : Op<NeuraDialect, "store"> {
   // let assemblyFormat = "$value `,` $addr `,` $predicate attr-dict";
 }
 
+// Defines a load operation with integrated address calculation.
+def Neura_LoadIndexedOp: Op<NeuraDialect, "load_indexed", [AttrSizedOperandSegments]>{
+  let summary = "Load with integrated address calculation for multi-dimensional arrays";
+  let description = [{
+    Calculates the address using the base address and indices.
+    Load the value at the calculated address.
+    Example:
+      %value = neura.load_indexed %base [%arg1, %arg2] : f32
+  }];
+  let arguments = (ins Arg<AnyMemRef, "the load operation">:$base, Variadic<Index>:$indices, Optional<AnyType>:$predicate);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "type($base) $base `[` $indices `]` ($predicate^ `:` type($predicate))? attr-dict `:` type($result)";
+}
+
+//Defines a store operation with integrated address calculation.
+def Neura_StoreIndexedOp: Op<NeuraDialect, "store_indexed", [AttrSizedOperandSegments]> {
+  let summary = "Store with integrated address calculation for multi-dimensional arrays";
+  let description = [{
+    Calculates the address using the base address and indices.
+    Store the value at the calculated address.
+    Example:
+      neura.store_indexed %value, %base [%arg1, %arg2] : f32
+  }];
+  let arguments = (ins AnyType:$value, Arg<AnyMemRef, "the store operation">:$base, Variadic<Index>:$indices, Optional<AnyType>:$predicate);
+  let results = (outs);
+  let assemblyFormat = "$value `to` type($base) $base `[` $indices `]` ($predicate^ `:` type($predicate))? attr-dict `:` type($value)";
+}
+
 // Defines a pointer computation operation.
 def Neura_GEP : Op<NeuraDialect, "gep"> {
   let summary = "Pointer computation using offset indices";
@@ -253,3 +281,51 @@ def Neura_ReserveOp : Op<NeuraDialect, "reserve"> {
   let results = (outs AnyType:$result);
   let assemblyFormat = "attr-dict `:` type($result)";
 }
+
+// ----------------------------------------------------
+// Defines loop related operations.
+
+// Loop iteration operation for index increament and compare
+// TODO: Add support for more complex loop structures using LoopInterOp
+def Neura_LoopIterOp : Op<NeuraDialect, "loop_iter", [AttrSizedOperandSegments]> {
+  let summary = "CGRA-optimized loop iteration operation";
+  let description = [{
+    Takes the current loop index, a step value, and an upper bound as the inputs.
+    Outputs the next loop index and a boolean condition indicating whether the loop should continue.
+    
+    Example:
+      %next_index, %continue = neura.loop_control current_index: 0, step: 1, bound: 10 : i32 i1}];
+
+  let arguments = (ins Index: $current_index, 
+                   Index:$step, 
+                   Index:$bound,
+                   Optional<AnyType>:$loop_type, // 0: <, 1: <=, 2: >, 3: >=
+                   Optional<AnyType>:$predicate);
+  let results = (outs Index:$next_index, I1:$continue_condition);
+  let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `:` type($bound) ($loop_type^ `:` type($loop_type))? ($predicate^ `:` type($predicate))? attr-dict `:` type($next_index) type($continue_condition)";
+}
+
+// Loop control operation that integrates loop iteration and control flow.
+def Neura_LoopControlOp: Op<NeuraDialect, "loop_control", [Terminator]>{
+  let summary = "Intergrated loop control operation for simple loops";
+  let description = [{
+    This operation is an integrated loop control operation that combines the loop iteration and control flow.
+    It has three main actions:
+    1. Calculates the next iteration's index: `next_index = current_index + step`
+    2. Checks if the loop should continue based on the current index and bound.
+    3. If the loop should continue, it branches to the loop body, and yields related values.
+    4. Otherwise, it exits the loop.
+  }];
+  let arguments = (ins Index:$current_index, // Current loop index
+                   Index:$step,
+                   Index:$bound, 
+                   DefaultValuedAttr<StrAttr, "\"lt\"">:$loop_type, // Loop type: "lt", "le", "gt", "ge", "eq", "ne"
+                   Variadic<AnyType>:$body_args // Additional arguments to pass through to the successors
+                   );
+  let results = (outs);
+  let successors = (successor
+                    AnySuccessor:$body, // loop body successors
+                    AnySuccessor:$exit // exit successors
+                    );
+  let assemblyFormat = "`current_index` `:` $current_index `,` `step` `:` $step `,` `bound` `:` $bound `,` `loop_type` `:` $loop_type `then` $body(`(`$body_args^ `:` type($body_args)`)`)? `else` $exit attr-dict";
+}
\ No newline at end of file
diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h
index 9cdeef7f..ff168337 100644
--- a/include/NeuraDialect/NeuraPasses.h
+++ b/include/NeuraDialect/NeuraPasses.h
@@ -13,9 +13,12 @@
 namespace mlir {
 namespace neura {
 
+void registerNeuraLegalizePassPipeline();
+    
 // Passes defined in GraphPasses.td
 #define GEN_PASS_DECL
 #include "NeuraDialect/NeuraPasses.h.inc"
+// Passes used for neura optimization and transformation
 std::unique_ptr<mlir::Pass> createInsertDataMovPass();
 std::unique_ptr<mlir::Pass> createInsertCtrlMovPass();
 std::unique_ptr<mlir::Pass> createFusePatternsPass();
@@ -23,6 +26,9 @@ std::unique_ptr<mlir::Pass> createAssignAcceleratorPass();
 std::unique_ptr<mlir::Pass> createTransformCtrlToDataFlowPass();
 std::unique_ptr<mlir::Pass> createLeveragePredicatedValuePass();
 
+// Passes used for neura compiler
+// std::unique_ptr<mlir::Pass> createGenerateDFGPass();
+
 #define GEN_PASS_REGISTRATION
 #include "NeuraDialect/NeuraPasses.h.inc"
 
diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td
index f4ea76a7..b488924e 100644
--- a/include/NeuraDialect/NeuraPasses.td
+++ b/include/NeuraDialect/NeuraPasses.td
@@ -36,17 +36,16 @@ def InsertCtrlMov : Pass<"insert-ctrl-mov", "ModuleOp"> {
 
 def TransformCtrlToDataFlow : Pass<"transform-ctrl-to-data-flow", "ModuleOp"> {
   let summary = "Inserts ctrl move operations in the Neura dialect";
-  let description =
-      [{Transform ctrl to predicate-based data flow.}];
+  let description = [{Transform ctrl to predicate - based data flow.}];
   let constructor = "neura::createTransformCtrlToDataFlowPass()";
 }
 
 def LeveragePredicatedValue : Pass<"leverage-predicated-value", "ModuleOp"> {
   let summary = "Convert values to predicated values in Neura dialect";
-  let description = [{
-    This pass converts regular values to predicated values in Neura dialect operations.
-    Each value is wrapped in a predicated value type with a default true predicate.
-  }];
+  let description = [{This pass converts regular values to predicated values in
+                          Neura dialect operations
+                              .Each value is wrapped in a predicated value type
+                                  with a default true predicate.}];
   let constructor = "neura::createLeveragePredicatedValuePass()";
 }
 
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 599a4181..7ed6674c 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(NeuraDialect)
-add_subdirectory(Conversion)
\ No newline at end of file
+add_subdirectory(Conversion)
+# add_subdirectory(Compiler)
\ No newline at end of file
diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp
new file mode 100644
index 00000000..9cf65348
--- /dev/null
+++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp
@@ -0,0 +1,388 @@
+#include "Common/AcceleratorAttrs.h"
+#include "Conversion/ConversionPasses.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Region.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/IR/Visitors.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Support/LogicalResult.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+using namespace mlir;
+using namespace mlir::neura;
+using namespace mlir::func;
+
+#define GEN_PASS_DEF_LOWERAFFINETONEURA
+#include "Conversion/ConversionPasses.h.inc"
+
+namespace {
+LogicalResult convertAffineMapToIndices(AffineMap map, ValueRange map_operands,
+                                        Location loc, PatternRewriter &rewriter,
+                                        SmallVector<Value> &new_indices) {
+  new_indices.clear();
+  new_indices.reserve(map.getNumResults());
+  for (AffineExpr expr : map.getResults()) {
+    if (AffineConstantExpr const_expr = dyn_cast<AffineConstantExpr>(expr)) {
+      IndexType index_type = rewriter.getIndexType();
+      IntegerAttr value_attr =
+          rewriter.getIntegerAttr(index_type, const_expr.getValue());
+      new_indices.push_back(rewriter.create<neura::ConstantOp>(
+          loc, index_type, value_attr, nullptr)); // nullptr is for predicated bit
+    } else if (AffineDimExpr dim_expr = dyn_cast<AffineDimExpr>(expr)) {
+      if (dim_expr.getPosition() >= map.getNumDims() ||
+          dim_expr.getPosition() >=
+              map_operands
+                  .size()) { // Check against mapOperands size for safety
+        return failure();
+      }
+      new_indices.push_back(map_operands[dim_expr.getPosition()]);
+    } else if (AffineSymbolExpr sym_expr = dyn_cast<AffineSymbolExpr>(expr)) {
+      unsigned symbol_operand_index = map.getNumDims() + sym_expr.getPosition();
+      if (symbol_operand_index >= map_operands.size()) {
+        return failure();
+      }
+      new_indices.push_back(map_operands[symbol_operand_index]);
+    } else {
+      // For more complex affine expressions (e.g., d0 + c1),
+      // materialize the result using affine.apply.
+      // This is a temporary workaround for complex expressions.
+      // TODO: Handle more complex expressions.
+      llvm::errs() << "[affine2neura] Complex affine expression: " << expr
+                   << "\n";
+      AffineMap single_result_map = AffineMap::get(
+          map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext());
+      Value complexIndex = rewriter.create<affine::AffineApplyOp>(
+          loc, single_result_map, map_operands);
+      new_indices.push_back(complexIndex);
+    }
+  }
+  return success();
+}
+
+struct AffineLoadLowering : public OpRewritePattern<affine::AffineLoadOp> {
+  using OpRewritePattern<affine::AffineLoadOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(affine::AffineLoadOp load_op,
+                                PatternRewriter &rewriter) const override {
+    Location loc = load_op.getLoc();
+    auto memref = load_op.getMemref();
+    AffineMap map = load_op.getAffineMap();
+    ValueRange map_operands = load_op.getMapOperands();
+    // Gets the indices for the load operation
+    SmallVector<Value> new_indices;
+    if (failed(convertAffineMapToIndices(map, map_operands, loc, rewriter,
+                                         new_indices))) {
+      return load_op.emitError(
+          "[affine2neura] Failed to convert affine map to indices");
+    }
+
+    MemRefType memref_type = dyn_cast<MemRefType>(memref.getType());
+    if (!memref_type) {
+      return load_op.emitError(
+          "[affine2neura] Base of load is not a MemRefType");
+    }
+    if (new_indices.size() != static_cast<size_t>(memref_type.getRank())) {
+      return load_op.emitError(
+                 "[affine2neura] Number of indices from affine map (")
+             << new_indices.size() << ") does not match memref rank ("
+             << memref_type.getRank() << ")";
+    }
+
+    // Create the neura.load_indexed operation
+   LoadIndexedOp new_load_op = rewriter.create<neura::LoadIndexedOp>(
+        loc, load_op.getType(), memref, ValueRange{new_indices}, nullptr); // nullptr is for predicated bit
+
+    rewriter.replaceOp(load_op, new_load_op.getResult());
+    return success();
+  }
+};
+
+struct AffineStoreLowering : public OpRewritePattern<affine::AffineStoreOp> {
+  using OpRewritePattern<affine::AffineStoreOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(affine::AffineStoreOp store_op,
+                                PatternRewriter &rewriter) const override {
+    Location loc = store_op.getLoc();
+    auto memref = store_op.getMemref();
+    Value value = store_op.getValueToStore();
+    AffineMap map = store_op.getAffineMap();
+    ValueRange mapOperands = store_op.getMapOperands();
+
+    SmallVector<Value> newIndices;
+    if (failed(convertAffineMapToIndices(map, mapOperands, loc, rewriter,
+                                         newIndices))) {
+      return store_op.emitError(
+          "[affine2neura] Failed to convert affine map to indices");
+    }
+
+    MemRefType memRefType = dyn_cast<MemRefType>(memref.getType());
+    if (!memRefType) {
+      return store_op.emitError(
+          "[affine2neura] Base of store is not a MemRefType");
+    }
+    if (newIndices.size() != static_cast<size_t>(memRefType.getRank())) {
+      return store_op.emitError(
+                 "[affine2neura] Number of indices from affine map (")
+             << newIndices.size() << ") does not match memref rank ("
+             << memRefType.getRank() << ")";
+    }
+
+    rewriter.create<neura::StoreIndexedOp>(loc, value, memref,
+                                           ValueRange{newIndices}, nullptr); // nullptr is for predicated bit
+    rewriter.eraseOp(store_op);
+    return success();
+  }
+};
+
+struct AffineApplyLowering : public OpRewritePattern<affine::AffineApplyOp> {
+  using OpRewritePattern<affine::AffineApplyOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(affine::AffineApplyOp apply_op,
+                                PatternRewriter &rewriter) const override {
+    AffineMap map = apply_op.getAffineMap();
+    ValueRange operands = apply_op.getMapOperands();
+    Location loc = apply_op.getLoc();
+
+    if (map.getNumResults() != 1) {
+      return apply_op.emitError(
+          "[affine2neura] AffineApplyOp must have a single result");
+    }
+
+    AffineExpr expr = map.getResult(0);
+    // Handle simple affine expressions like d0 + cst
+    // TODO: Handle more complex expressions
+    if (isa<AffineBinaryOpExpr>(expr)) {
+      AffineBinaryOpExpr bin_expr = dyn_cast<AffineBinaryOpExpr>(expr);
+      if (bin_expr.getKind() == AffineExprKind::Add) {
+        if (isa<AffineDimExpr>(bin_expr.getLHS())) {
+          AffineDimExpr dim = dyn_cast<AffineDimExpr>(bin_expr.getLHS());
+          if (isa<AffineConstantExpr>(bin_expr.getRHS())) {
+            AffineConstantExpr cst =
+                dyn_cast<AffineConstantExpr>(bin_expr.getRHS());
+            neura::ConstantOp cstVal = rewriter.create<neura::ConstantOp>(
+                loc, rewriter.getIndexType(),
+                rewriter.getIntegerAttr(rewriter.getIndexType(),
+                                        cst.getValue()),
+                nullptr); // nullptr is for predicated bit
+            neura::AddOp addOp = rewriter.create<neura::AddOp>(
+                loc, cstVal.getType(), operands[dim.getPosition()], cstVal,
+                nullptr); // nullptr is for predicated bit
+            rewriter.replaceOp(apply_op, addOp.getResult());
+            return success();
+          }
+        }
+      }
+    }
+
+    // You can add more cases here for different affine expressions
+    // For now, we will just emit an error for unsupported expressions.
+    return apply_op.emitError("[affine2neura] Unsupported complex affine "
+                              "expression in AffineApplyOp.\n")
+           << "Only simple affine expressions like d0 + cst are supported.\n";
+  }
+};
+
+LogicalResult lowerAffineFor(affine::AffineForOp for_op, OpBuilder &builder,
+                             IRMapping &value_mapping) {
+  llvm::errs() << "[affine2neura] Lowering AffineForOp: " << for_op << "\n";
+  Location loc = for_op.getLoc();
+  IndexType index_type = builder.getIndexType();
+
+  // 1 Extract1 loop parameters (lower bound, upper bound, step)
+  Value lower_bound_val;
+  if (for_op.hasConstantLowerBound()) {
+    int64_t lower_bound_constant = for_op.getConstantLowerBound();
+    lower_bound_val = builder.create<neura::ConstantOp>(
+        loc, index_type, builder.getIndexAttr(lower_bound_constant), nullptr); // nullptr is for predicated bit
+  } else {
+    // If the lower bound is not constant, we need to use affine.apply
+    affine::AffineBound lower_bound = for_op.getLowerBound();
+    AffineMap lower_bound_map = lower_bound.getMap();
+    ValueRange lower_bound_operands = for_op.getLowerBoundOperands();
+    lower_bound_val = builder.create<affine::AffineApplyOp>(
+        loc, lower_bound_map, lower_bound_operands);
+  }
+
+  Value upper_bound_val;
+  if (for_op.hasConstantUpperBound()) {
+    int64_t upper_bound_constant = for_op.getConstantUpperBound();
+    upper_bound_val = builder.create<neura::ConstantOp>(
+        loc, index_type, builder.getIndexAttr(upper_bound_constant), nullptr); // nullptr is for predicated bit
+  } else {
+    // For non-constant upper bounds, we also use affine.apply
+    affine::AffineBound upper_bound = for_op.getUpperBound();
+    AffineMap upper_bound_map = upper_bound.getMap();
+    ValueRange upper_bound_operands = for_op.getUpperBoundOperands();
+    upper_bound_val = builder.create<affine::AffineApplyOp>(
+        loc, upper_bound_map, upper_bound_operands);
+  }
+
+  Value step_val = builder.create<neura::ConstantOp>(
+      loc, index_type, builder.getIndexAttr(for_op.getStepAsInt()), nullptr); // nullptr is for predicated bit
+
+  // 2 Creates the block structure
+  Block *origin_block = builder.getInsertionBlock();
+  auto origin_point = builder.getInsertionPoint();
+  Region *parent_region = origin_block->getParent();
+
+  // 2.1 Creates the header block
+  Block *header_block = builder.createBlock(
+      parent_region, std::next(Region::iterator(origin_block)), {index_type},
+      {loc});
+  // 2.2 Creates the body block
+  Block *body_block = builder.createBlock(
+      parent_region, std::next(Region::iterator(header_block)), {index_type},
+      {loc});
+  // 2.3 Creates the exit block
+  Block *exit_block = builder.createBlock(
+      parent_region, std::next(Region::iterator(body_block)));
+  // 2.4 Creates the continue block
+  Block *continue_block = origin_block->splitBlock(origin_point);
+
+  // 3 Connects the blocks
+  // 3.1 Connects origin_block -> header_block
+  builder.setInsertionPointToEnd(origin_block);
+  builder.create<neura::Br>(loc, ValueRange{lower_bound_val}, header_block);
+
+  // 3.2 Connects header_block -> body_block
+  builder.setInsertionPointToEnd(header_block);
+  SmallVector<Value> body_args;
+  body_args.push_back(header_block->getArgument(0)); // current index
+  builder.create<neura::LoopControlOp>(
+      loc, header_block->getArgument(0), step_val, upper_bound_val,
+      builder.getStringAttr("lt"), body_args, body_block, exit_block);
+
+  // 3.3 Clones the body of the original affine.for operation
+  // Assumes the body of the affine.for operation is a single block
+  // So we need to guarantee the sequence of handling the nested affine.for
+  // operations is correct. (From outermost to innermost)
+  builder.setInsertionPointToStart(body_block);
+  Value current_index = body_block->getArgument(0);
+  if (!for_op.getRegion().empty()) {
+    Block &source_block = for_op.getRegion().front();
+    IRMapping mapping;
+    mapping.map(source_block.getArgument(0), current_index);
+    for (Operation &op : llvm::make_range(source_block.begin(),
+                                          std::prev(source_block.end()))) {
+      Operation *cloned_op = builder.clone(op, mapping);
+      for (unsigned i = 0; i < op.getNumResults(); ++i)
+        mapping.map(op.getResult(i), cloned_op->getResult(i));
+    }
+  }
+
+  // 3.4 Connects body_block -> header_block
+  builder.setInsertionPointToEnd(body_block);
+  builder.create<neura::Br>(loc, ValueRange{current_index}, header_block);
+
+  // 3.5 Connects exit_block -> continue_block
+  builder.setInsertionPointToEnd(exit_block);
+  builder.create<neura::Br>(loc, ValueRange{}, continue_block);
+
+  builder.setInsertionPointToStart(continue_block);
+
+  for_op.erase();
+
+  return success();
+}
+
+affine::AffineForOp findOuterMostAffineFor(func::FuncOp &func_op) {
+  // Find the outermost affine.for operation
+  affine::AffineForOp top_for_op = nullptr;
+  func_op.walk([&](affine::AffineForOp for_op) {
+    // Checks if this for_op has any AffineForOp parent
+    Operation *parent_op = for_op->getParentOp();
+    bool has_affine_for_parent = false;
+
+    while (parent_op) {
+      if (isa<affine::AffineForOp>(parent_op)) {
+        has_affine_for_parent = true;
+        break;
+      }
+      parent_op = parent_op->getParentOp();
+    }
+
+    // If it has no AffineForOp parent, it's a Ftop-level loop
+    if (!has_affine_for_parent) {
+      top_for_op = for_op;            // Store the found operation
+      return WalkResult::interrupt(); // Stop walking
+    }
+
+    return WalkResult::advance(); // Continue walking
+  });
+
+  return top_for_op; // Return the found operation
+}
+
+struct LowerAffineToNeuraPass
+    : public PassWrapper<LowerAffineToNeuraPass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass)
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<neura::NeuraDialect, arith::ArithDialect,
+                    memref::MemRefDialect, affine::AffineDialect>();
+  }
+
+  StringRef getArgument() const override { return "lower-affine-to-neura"; }
+  StringRef getDescription() const override {
+    return "Lower affine operations to Neura dialect operations";
+  }
+
+  void runOnOperation() override {
+    ModuleOp module_op = getOperation();
+    MLIRContext *context = module_op.getContext();
+    IRMapping mapping;
+    module_op.walk(
+        [&](func::FuncOp func_op) {
+          if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) {
+            auto target = func_op->getAttrOfType<StringAttr>(
+                mlir::accel::kAcceleratorAttr);
+            if (target && target.getValue() == mlir::accel::kNeuraTarget) {
+              while (affine::AffineForOp outer_for_op =
+                         findOuterMostAffineFor(func_op)) {
+                llvm::errs()
+                    << "[affine2neura] Find outermost affine.for operation: "
+                    << outer_for_op << "\n";
+                OpBuilder builder(outer_for_op);
+                if (failed(lowerAffineFor(outer_for_op, builder, mapping))) {
+                  outer_for_op.emitError("[affine2neura] Failed to lower "
+                                         "outermost affine.for operation");
+                  signalPassFailure();
+                }
+              }
+
+              RewritePatternSet patterns(context);
+              patterns.add<AffineLoadLowering, AffineStoreLowering>(context);
+
+              if (failed(applyPatternsGreedily(func_op.getOperation(),
+                                               std::move(patterns)))) {
+                func_op.emitError("[affine2neura] Failed to lower affine "
+                                    "operations to Neura dialect");
+                signalPassFailure();
+              }
+            }
+          }
+        });
+  }
+};
+} // namespace
+
+std::unique_ptr<mlir::Pass> mlir::createLowerAffineToNeuraPass() {
+  return std::make_unique<LowerAffineToNeuraPass>();
+}
\ No newline at end of file
diff --git a/lib/Conversion/AffineToNeura/CMakeLists.txt b/lib/Conversion/AffineToNeura/CMakeLists.txt
new file mode 100644
index 00000000..fc71ff70
--- /dev/null
+++ b/lib/Conversion/AffineToNeura/CMakeLists.txt
@@ -0,0 +1,15 @@
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+add_mlir_conversion_library(MLIRNeuraAffineToNeuraPass
+  AffineToNeuraPass.cpp
+
+  DEPENDS
+  MLIRConversionIncGen
+
+  LINK_LIBS PUBLIC
+    MLIRIR
+    MLIRPass
+    MLIRSupport
+    MLIRTransforms
+    # MLIRNeura
+)
\ No newline at end of file
diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
index ab952519..ee844d9d 100644
--- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
+++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
@@ -24,7 +24,7 @@ using namespace mlir::func;
 using namespace mlir::neura;
 
 #define GEN_PASS_DEF_LOWERARITHTONEURA
-#include "NeuraDialect/NeuraPasses.h.inc"
+#include "Conversion/ConversionPasses.h.inc"
 
 namespace{
 
@@ -44,7 +44,7 @@ struct ArithFAddToNeuraFAdd : public OpRewritePattern<mlir::arith::AddFOp> {
 };
 
 struct LowerArithToNeuraPass
-    : public PassWrapper<LowerArithToNeuraPass, OperationPass<func::FuncOp>> {
+    : public PassWrapper<LowerArithToNeuraPass, OperationPass<ModuleOp>> {
 
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerArithToNeuraPass)
 
diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt
index 1dbce29f..2a33d1e2 100644
--- a/lib/Conversion/CMakeLists.txt
+++ b/lib/Conversion/CMakeLists.txt
@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_subdirectory(ArithToNeura)
 add_subdirectory(LlvmToNeura)
+add_subdirectory(AffineToNeura)
 
 # add_mlir_library(
 #     MLIRNeuraConversion
diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
index c9c2fe23..39d72b39 100644
--- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
+++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
@@ -25,7 +25,7 @@ using namespace mlir;
 using namespace mlir::neura;
 
 #define GEN_PASS_DEF_LOWERLLVMTONEURA
-#include "NeuraDialect/NeuraPasses.h.inc"
+#include "Conversion/ConversionPasses.h.inc"
 
 
 namespace {
diff --git a/lib/NeuraDialect/CMakeLists.txt b/lib/NeuraDialect/CMakeLists.txt
index 50532491..d9a626b6 100644
--- a/lib/NeuraDialect/CMakeLists.txt
+++ b/lib/NeuraDialect/CMakeLists.txt
@@ -1,29 +1,32 @@
-# Set include paths for TableGen
-set(MLIR_TABLEGEN_INCLUDES
-    "-I${PROJECT_SOURCE_DIR}/include"
-    "-I${PROJECT_SOURCE_DIR}/include/NeuraDialect"
-    "-I${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect")
+# # Set include paths for TableGen
+# set(MLIR_TABLEGEN_INCLUDES
+#     "-I${PROJECT_SOURCE_DIR}/include"
+#     "-I${PROJECT_SOURCE_DIR}/include/NeuraDialect"
+#     "-I${CMAKE_CURRENT_BINARY_DIR}/include/NeuraDialect")
 
-# Generate TableGen files
-set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/include/NeuraDialect/Neura.td)
-mlir_tablegen(Neura.h.inc -gen-op-decls ${MLIR_TABLEGEN_INCLUDES})
-mlir_tablegen(Neura.cpp.inc -gen-op-defs ${MLIR_TABLEGEN_INCLUDES})
-mlir_tablegen(NeuraDialect.h.inc -gen-dialect-decls ${MLIR_TABLEGEN_INCLUDES})
-mlir_tablegen(NeuraDialect.cpp.inc -gen-dialect-defs ${MLIR_TABLEGEN_INCLUDES})
-mlir_tablegen(NeuraTypes.h.inc -gen-typedef-decls ${MLIR_TABLEGEN_INCLUDES})
-mlir_tablegen(NeuraTypes.cpp.inc -gen-typedef-defs ${MLIR_TABLEGEN_INCLUDES})
-add_public_tablegen_target(MLIRNeuraDialectIncGen)
+# # Generate TableGen files
+# set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/include/NeuraDialect/Neura.td)
+# mlir_tablegen(Neura.h.inc -gen-op-decls ${MLIR_TABLEGEN_INCLUDES})
+# mlir_tablegen(Neura.cpp.inc -gen-op-defs ${MLIR_TABLEGEN_INCLUDES})
+# mlir_tablegen(NeuraDialect.h.inc -gen-dialect-decls ${MLIR_TABLEGEN_INCLUDES})
+# mlir_tablegen(NeuraDialect.cpp.inc -gen-dialect-defs ${MLIR_TABLEGEN_INCLUDES})
+# mlir_tablegen(NeuraTypes.h.inc -gen-typedef-decls ${MLIR_TABLEGEN_INCLUDES})
+# mlir_tablegen(NeuraTypes.cpp.inc -gen-typedef-defs ${MLIR_TABLEGEN_INCLUDES})
+# add_public_tablegen_target(MLIRNeuraDialectIncGen)
 
 # Add the dialect library
 add_mlir_dialect_library(MLIRNeura
         Neura.cpp
         NeuraTypes.cpp
+        NeuraPasses.cpp
 
         ADDITIONAL_HEADER_DIRS
         ${PROJECT_SOURCE_DIR}/include/NeuraDialect
 
         DEPENDS
-        MLIRNeuraDialectIncGen
+        MLIRNeuraIncGen
+        MLIRNeuraTransformsIncGen
+        MLIRConversionIncGen
         
         LINK_LIBS PUBLIC
         MLIRIR
diff --git a/lib/NeuraDialect/NeuraPasses.cpp b/lib/NeuraDialect/NeuraPasses.cpp
new file mode 100644
index 00000000..11b92b13
--- /dev/null
+++ b/lib/NeuraDialect/NeuraPasses.cpp
@@ -0,0 +1,27 @@
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "mlir/Transforms/Passes.h"
+
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "NeuraDialect/NeuraPasses.h"
+#include "NeuraDialect/NeuraTypes.h"
+#include "Conversion/ConversionPasses.h"
+
+// This pass pipeline can convert all the other dialects into the Neura dialect
+void mlir::neura::registerNeuraLegalizePassPipeline() {
+  PassPipelineRegistration<>("neura-legalize",
+                             "Legalize operations to Neura dialect",
+                             [](OpPassManager &pm) {
+                                // Convert all the other dialects into the Neura dialect
+                                pm.addPass(mlir::createLowerAffineToNeuraPass());
+                                pm.addPass(mlir::createLowerArithToNeuraPass());
+                                pm.addPass(mlir::createLowerLlvmToNeuraPass());
+
+                                // Insert data and control movement operations
+                                // pm.addPass(mlir::neura::createLeveragePredicatedValuePass());
+                                // pm.addPass(mlir::neura::createInsertDataMovPass());
+                                // pm.addPass(mlir::neura::createInsertCtrlMovPass());
+                                // pm.addPass(mlir::neura::createTransformCtrlToDataFlowPass());
+                             });
+}
\ No newline at end of file
diff --git a/test/.lit_test_times.txt b/test/.lit_test_times.txt
new file mode 100644
index 00000000..c0405ae5
--- /dev/null
+++ b/test/.lit_test_times.txt
@@ -0,0 +1,26 @@
+2.734089e-02 affine2neura/gpt2-node27/node27.mlir
+1.068902e-02 affine2neura/deep-nested/deep_nested.mlir
+2.698708e-02 affine2neura/gpt2-node11/node11.mlir
+2.851033e-02 affine2neura/gpt2-node30/node30.mlir
+3.188467e-02 neura/ctrl/branch.mlir
+2.987862e-02 neura/ctrl/branch_no_arg.mlir
+1.032019e-02 neura/ctrl/branch_for.mlir
+-5.869865e-04 affine2neura/deep-nested/deep_nested_neura.mlir
+-5.869865e-04 affine2neura/gpt2-node11/node11_neura.mlir
+-3.650188e-04 affine2neura/gpt2-node27/node27_neura.mlir
+-6.232262e-04 affine2neura/gpt2-node27/node27_unroll.mlir
+-3.950596e-04 affine2neura/gpt2-node30/node30_neura.mlir
+-4.494190e-04 affine2neura/gpt2-node30/node30_unroll.mlir
+6.077766e-03 arith2neura/add.mlir
+2.200377e-01 c2llvm2mlir/test.mlir
+5.845070e-03 neura/arith_add.mlir
+5.631447e-03 neura/fadd_fadd.mlir
+9.507132e-02 neura/for_loop/test.mlir
+2.653909e-02 neura/interpreter/add.mlir
+5.800486e-03 neura/interpreter/interpreter.mlir
+1.472716e-01 neura/interpreter/lower_and_interpret.mlir
+1.522479e-01 neura/interpreter/lower_and_interpret_subf.mlir
+2.643609e-02 neura/interpreter/predicated_data.mlir
+2.739096e-02 neura/llvm_add.mlir
+2.676344e-02 neura/llvm_sub.mlir
+2.563691e-02 test.mlir
diff --git a/test/affine2neura/deep-nested/deep_nested.cpp b/test/affine2neura/deep-nested/deep_nested.cpp
new file mode 100644
index 00000000..405e6c5b
--- /dev/null
+++ b/test/affine2neura/deep-nested/deep_nested.cpp
@@ -0,0 +1,31 @@
+int input_data[3][3][3];
+int output_data[3][3][3];
+float weights[3];
+
+int deep_nested() {
+  // 10 nested loops
+  for (int i0 = 0; i0 < 3; i0++) {
+    for (int i1 = 0; i1 < 3; i1++) {
+      for (int i2 = 0; i2 < 3; i2++) {
+        for (int i3 = 0; i3 < 3; i3++) {
+          for (int i4 = 0; i4 < 3; i4++) {
+            for (int i5 = 0; i5 < 3; i5++) {
+              for (int i6 = 0; i6 < 3; i6++) {
+                for (int i7 = 0; i7 < 3; i7++) {
+                  for (int i8 = 0; i8 < 3; i8++) {
+                    for (int i9 = 0; i9 < 3; i9++) {
+                      // Assuming some operation on input_data
+                      output_data[i0][i1][i2] +=
+                          input_data[i0][i1][i2];
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/test/affine2neura/deep-nested/deep_nested.mlir b/test/affine2neura/deep-nested/deep_nested.mlir
new file mode 100644
index 00000000..cb5c5db9
--- /dev/null
+++ b/test/affine2neura/deep-nested/deep_nested.mlir
@@ -0,0 +1,47 @@
+// Check that the affine loop nest is correctly transformed to neura.loop_control
+// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s
+module attributes {} {
+  memref.global @input_data : memref<3x3x3xi32> = uninitialized
+  memref.global @output_data : memref<3x3x3xi32> = uninitialized
+  func.func @_Z11deep_nestedv() -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
+    %c0_i32 = arith.constant 0 : i32
+    %0 = memref.get_global @output_data : memref<3x3x3xi32>
+    %1 = memref.get_global @input_data : memref<3x3x3xi32>
+    affine.for %arg0 = 0 to 3 {
+      affine.for %arg1 = 0 to 3 {
+        affine.for %arg2 = 0 to 3 {
+          affine.for %arg3 = 0 to 3 {
+            affine.for %arg4 = 0 to 3 {
+              affine.for %arg5 = 0 to 3 {
+                affine.for %arg6 = 0 to 3 {
+                  affine.for %arg7 = 0 to 3 {
+                    %2 = affine.load %1[%arg0, %arg1, %arg2] : memref<3x3x3xi32>
+                    affine.for %arg8 = 0 to 3 {
+                      affine.for %arg9 = 0 to 3 {
+                        %3 = affine.load %0[%arg0, %arg1, %arg2] : memref<3x3x3xi32>
+                        %4 = arith.addi %3, %2 : i32
+                        affine.store %4, %0[%arg0, %arg1, %arg2] : memref<3x3x3xi32>
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return %c0_i32 : i32
+  }
+}
+
+// Verify function signature is preserved
+// CHECK-LABEL: func.func @_Z11deep_nestedv() -> i32
+
+// Verify all affine operations are eliminated
+// CHECK-NOT: affine.for
+// CHECK-NOT: affine.load
+// CHECK-NOT: affine.store
+// CHECK-NOT: affine.apply
+
+// CHECK-COUNT-10: neura.loop_control
diff --git a/test/affine2neura/gpt2-node11/compile.sh b/test/affine2neura/gpt2-node11/compile.sh
new file mode 100755
index 00000000..4d3eeaee
--- /dev/null
+++ b/test/affine2neura/gpt2-node11/compile.sh
@@ -0,0 +1,3 @@
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node11.cpp -S --raise-scf-to-affine -o ./node11.mlir
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node11.mlir --affine-loop-unroll="unroll-factor=2" -o ./node11_unroll.mlir
+# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node11/node11.cpp b/test/affine2neura/gpt2-node11/node11.cpp
new file mode 100644
index 00000000..fdd7519f
--- /dev/null
+++ b/test/affine2neura/gpt2-node11/node11.cpp
@@ -0,0 +1,12 @@
+float input[1][16][64];
+float output[1][16];
+
+int node11() {
+  for (int arg2 = 0; arg2 < 1; arg2++) {
+    for (int arg3 = 0; arg3 < 16; arg3++) {
+      for (int arg4 = 0; arg4 < 64; arg4+=1) 
+        output[arg2][arg3] += input[arg2][arg3][arg4];
+    }
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node11/node11.mlir b/test/affine2neura/gpt2-node11/node11.mlir
new file mode 100644
index 00000000..795bb45f
--- /dev/null
+++ b/test/affine2neura/gpt2-node11/node11.mlir
@@ -0,0 +1,31 @@
+// Check that the affine loop nest is correctly transformed to neura.loop_control
+// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s
+module attributes {} {
+  memref.global @input : memref<1x16x64xf32> = uninitialized
+  memref.global @output : memref<1x16xf32> = uninitialized
+  func.func @_Z6node11v() -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
+    %c0_i32 = arith.constant 0 : i32
+    %0 = memref.get_global @output : memref<1x16xf32>
+    %1 = memref.get_global @input : memref<1x16x64xf32>
+    affine.for %arg0 = 0 to 16 {
+      affine.for %arg1 = 0 to 64 {
+        %2 = affine.load %1[0, %arg0, %arg1] : memref<1x16x64xf32>
+        %3 = affine.load %0[0, %arg0] : memref<1x16xf32>
+        %4 = arith.addf %3, %2 : f32
+        affine.store %4, %0[0, %arg0] : memref<1x16xf32>
+      }
+    }
+    return %c0_i32 : i32
+  }
+}
+
+// Verify function signature is preserved
+// CHECK-LABEL: func.func @_Z6node11v() -> i32
+
+// Verify all affine operations are eliminated
+// CHECK-NOT: affine.for
+// CHECK-NOT: affine.load
+// CHECK-NOT: affine.store
+// CHECK-NOT: affine.apply
+
+// CHECK-COUNT-2: neura.loop_control
diff --git a/test/affine2neura/gpt2-node27/compile.sh b/test/affine2neura/gpt2-node27/compile.sh
new file mode 100755
index 00000000..bc268f1a
--- /dev/null
+++ b/test/affine2neura/gpt2-node27/compile.sh
@@ -0,0 +1,3 @@
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node27.cpp -S --raise-scf-to-affine -o ./node27.mlir
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27.mlir --affine-loop-unroll="unroll-factor=2" -o ./node27_unroll.mlir
+# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node27/node27.cpp b/test/affine2neura/gpt2-node27/node27.cpp
new file mode 100644
index 00000000..456aaabd
--- /dev/null
+++ b/test/affine2neura/gpt2-node27/node27.cpp
@@ -0,0 +1,14 @@
+float input[1][16][4][16];
+float output[1][4][16][16];
+
+int node27() {
+  for (int arg2 = 0; arg2 < 1; arg2++) {
+    for (int arg3 = 0; arg3 < 16; arg3++) {
+      for (int arg4 = 0; arg4 < 4; arg4 += 1) {
+        for (int arg5 = 0; arg5 < 16; arg5 += 1) {
+          output[arg2][arg3][arg4][arg5] = input[arg2][arg4][arg3][arg5];
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node27/node27.mlir b/test/affine2neura/gpt2-node27/node27.mlir
new file mode 100644
index 00000000..3bc78ff5
--- /dev/null
+++ b/test/affine2neura/gpt2-node27/node27.mlir
@@ -0,0 +1,30 @@
+// Check that the affine loop nest is correctly transformed to neura.loop_control
+// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s
+module attributes {} {
+  memref.global @input : memref<1x16x4x16xf32> = uninitialized
+  memref.global @output : memref<1x4x16x16xf32> = uninitialized
+  func.func @_Z6node27v() -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
+    %0 = llvm.mlir.undef : i32
+    %1 = memref.get_global @output : memref<1x4x16x16xf32>
+    %2 = memref.get_global @input : memref<1x16x4x16xf32>
+    affine.for %arg0 = 0 to 16 {
+      affine.for %arg1 = 0 to 4 {
+        affine.for %arg2 = 0 to 16 {
+          %3 = affine.load %2[0, %arg1, %arg0, %arg2] : memref<1x16x4x16xf32>
+          affine.store %3, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x16xf32>
+        }
+      }
+    }
+    return %0 : i32
+  }
+}
+// Verify function signature is preserved
+// CHECK-LABEL: func.func @_Z6node27v() -> i32
+
+// Verify all affine operations are eliminated
+// CHECK-NOT: affine.for
+// CHECK-NOT: affine.load
+// CHECK-NOT: affine.store
+// CHECK-NOT: affine.apply
+
+// CHECK-COUNT-3: neura.loop_control
diff --git a/test/affine2neura/gpt2-node30/compile.sh b/test/affine2neura/gpt2-node30/compile.sh
new file mode 100755
index 00000000..28b23b23
--- /dev/null
+++ b/test/affine2neura/gpt2-node30/compile.sh
@@ -0,0 +1,3 @@
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/cgeist ./node30.cpp -S --raise-scf-to-affine -o ./node30.mlir
+/home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node30.mlir --affine-loop-unroll="unroll-factor=2" -o ./node30_unroll.mlir
+# /home/lucas/Project/NeuraCompiler/thirdparty/Polygeist/build/bin/polygeist-opt ./node27_unroll.mlir --affine-loop-tile="tile-size=2" -o ./node27_tile.mlir
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node30/node30.cpp b/test/affine2neura/gpt2-node30/node30.cpp
new file mode 100644
index 00000000..596450f8
--- /dev/null
+++ b/test/affine2neura/gpt2-node30/node30.cpp
@@ -0,0 +1,15 @@
+float A[1][4][16][64];
+// float B=20.0;
+float C[1][4][16][64];
+
+int node30() {
+  for (int arg2 = 0; arg2 < 1; arg2++) {
+    for (int arg3 = 0; arg3 < 4; arg3++) {
+      for (int arg4 = 0; arg4 < 16; arg4++) {
+        for (int arg5 = 0; arg5 < 64; arg5++) {
+          C[arg2][arg3][arg4][arg5] = A[arg2][arg3][arg4][arg5] * 10;
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/test/affine2neura/gpt2-node30/node30.mlir b/test/affine2neura/gpt2-node30/node30.mlir
new file mode 100644
index 00000000..9d3b77d0
--- /dev/null
+++ b/test/affine2neura/gpt2-node30/node30.mlir
@@ -0,0 +1,33 @@
+// Check that the affine loop nest is correctly transformed to neura.loop_control
+// RUN: mlir-neura-opt %s --assign-accelerator --lower-affine-to-neura | FileCheck %s
+module attributes {} {
+  memref.global @A : memref<1x4x16x64xf32> = uninitialized
+  memref.global @C : memref<1x4x16x64xf32> = uninitialized
+  func.func @_Z6node30v() -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
+    %cst = arith.constant 1.000000e+01 : f32
+    %0 = llvm.mlir.undef : i32
+    %1 = memref.get_global @C : memref<1x4x16x64xf32>
+    %2 = memref.get_global @A : memref<1x4x16x64xf32>
+    affine.for %arg0 = 0 to 4 {
+      affine.for %arg1 = 0 to 16 {
+        affine.for %arg2 = 0 to 64 {
+          %3 = affine.load %2[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32>
+          %4 = arith.mulf %3, %cst : f32
+          affine.store %4, %1[0, %arg0, %arg1, %arg2] : memref<1x4x16x64xf32>
+        }
+      }
+    }
+    return %0 : i32
+  }
+}
+
+// Verify function signature is preserved
+// CHECK-LABEL: func.func @_Z6node30v() -> i32
+
+// Verify all affine operations are eliminated
+// CHECK-NOT: affine.for
+// CHECK-NOT: affine.load
+// CHECK-NOT: affine.store
+// CHECK-NOT: affine.apply
+
+// CHECK-COUNT-3: neura.loop_control
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 2f980553..8390f87c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(mlir-neura-opt)
-add_subdirectory(neura-interpreter)
\ No newline at end of file
+add_subdirectory(neura-interpreter)
+add_subdirectory(neura-compiler)
\ No newline at end of file
diff --git a/tools/mlir-neura-opt/mlir-neura-opt.cpp b/tools/mlir-neura-opt/mlir-neura-opt.cpp
index d21664fb..5453a29d 100644
--- a/tools/mlir-neura-opt/mlir-neura-opt.cpp
+++ b/tools/mlir-neura-opt/mlir-neura-opt.cpp
@@ -3,6 +3,7 @@
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/InitAllDialects.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/InitAllPasses.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Support/LogicalResult.h"
@@ -20,6 +21,8 @@ int main(int argc, char **argv) {
   registry.insert<mlir::arith::ArithDialect>();
   registry.insert<mlir::DLTIDialect>();
   registry.insert<mlir::LLVM::LLVMDialect>();
+  registry.insert<mlir::affine::AffineDialect>();
+  registry.insert<mlir::memref::MemRefDialect>();
 
   mlir::neura::registerPasses();
   mlir::registerPasses();
diff --git a/tools/neura-compiler/CMakeLists.txt b/tools/neura-compiler/CMakeLists.txt
new file mode 100644
index 00000000..69e78747
--- /dev/null
+++ b/tools/neura-compiler/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_executable(neura-compiler neura-compiler.cpp)
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+set(LIBS
+        ${dialect_libs}
+        ${conversion_libs}
+        MLIRNeuraTransforms
+        MLIRConversion
+        MLIRNeura
+        MLIRTransforms
+        MLIROptLib
+        MLIRPass
+        MLIRIR
+        MLIRParser
+        MLIRSupport
+        )
+
+target_link_libraries(neura-compiler PRIVATE ${LIBS})
\ No newline at end of file
diff --git a/tools/neura-compiler/neura-compiler.cpp b/tools/neura-compiler/neura-compiler.cpp
new file mode 100644
index 00000000..8180709e
--- /dev/null
+++ b/tools/neura-compiler/neura-compiler.cpp
@@ -0,0 +1,32 @@
+// neura-compiler.cpp
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/InitAllPasses.h"
+#include "mlir/Support/FileUtilities.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+
+#include "Conversion/ConversionPasses.h"
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraPasses.h"
+
+int main(int argc, char **argv) {
+  // Registers MLIR dialects.
+  mlir::DialectRegistry registry;
+  registry.insert<mlir::neura::NeuraDialect>();
+  registry.insert<mlir::func::FuncDialect>();
+  registry.insert<mlir::arith::ArithDialect>();
+  registry.insert<mlir::DLTIDialect>();
+  registry.insert<mlir::LLVM::LLVMDialect>();
+  registry.insert<mlir::affine::AffineDialect>();
+  registry.insert<mlir::memref::MemRefDialect>();
+
+  mlir::neura::registerNeuraLegalizePassPipeline();
+
+  // Runs the MLIR optimizer.
+  return mlir::asMainReturnCode(
+      mlir::MlirOptMain(argc, argv, "Neura Dialect Optimizer", registry));
+}
\ No newline at end of file