-
Notifications
You must be signed in to change notification settings - Fork 14
Optimize spatial-temporal heuristic and reduce II for multiple kernels #222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c53e1d6
408e2b1
b800ea7
960a652
bb69d05
a995662
d12394d
83b017a
45944ff
72257d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,10 @@ static const int AWARD_BACKWARD_PROXIMITY_SCALE = 1; | |
| static const int AWARD_BASE_MULTIPLIER = 1; | ||
| static const int AWARD_CRITICAL_BONUS_DIV = 1; | ||
|
|
||
| // Congestion penalty coefficients (tunable). | ||
| static const int STRONG_CONGESTION_PENALTY = 60; // used for high fan-in ops (>=3) | ||
| static const int WEAK_CONGESTION_PENALTY = 15; // used for low fan-in ops | ||
|
|
||
| namespace mlir { | ||
| namespace neura { | ||
| OperationKind getOperationKindFromMlirOp(Operation *op) { | ||
|
|
@@ -394,12 +398,53 @@ mlir::neura::getOpsInAlapLevels(const std::vector<Operation *> &sorted_ops, | |
| } | ||
|
|
||
| std::vector<std::pair<Operation *, int>> mlir::neura::flatten_level_buckets( | ||
| const std::vector<std::vector<Operation *>> &level_buckets) { | ||
| const std::vector<std::vector<Operation *>> &level_buckets, | ||
| const std::set<Operation *> &critical_ops) { | ||
| std::vector<std::pair<Operation *, int>> result; | ||
|
|
||
| for (int level = 0; level < static_cast<int>(level_buckets.size()); ++level) { | ||
| for (Operation *op : level_buckets[level]) { | ||
| result.emplace_back(op, level); | ||
| // Collects ops with their current index to ensure stable sorting. | ||
| std::vector<std::pair<Operation *, int>> ops_with_index; | ||
| for (int i = 0; i < (int)level_buckets[level].size(); ++i) { | ||
| ops_with_index.push_back({level_buckets[level][i], i}); | ||
| } | ||
|
|
||
| // Sorts with criticality as PRIMARY criterion within the same ALAP level. | ||
| // This addresses tancheng's feedback: critical ops should map before | ||
| // high-degree non-critical ops in the same level. | ||
| std::sort(ops_with_index.begin(), ops_with_index.end(), | ||
| [&critical_ops](const std::pair<Operation *, int> &a_pair, | ||
| const std::pair<Operation *, int> &b_pair) { | ||
| Operation *a = a_pair.first; | ||
| Operation *b = b_pair.first; | ||
|
|
||
| bool a_is_critical = critical_ops.count(a) > 0; | ||
| bool b_is_critical = critical_ops.count(b) > 0; | ||
|
|
||
| // Priority 1: Critical ops come first (within same ALAP level). | ||
| if (a_is_critical != b_is_critical) | ||
| return a_is_critical > b_is_critical; | ||
|
|
||
| // Priority 2: Degree (connectivity) - higher degree first. | ||
| int degree_a = a->getNumOperands(); | ||
| int degree_b = b->getNumOperands(); | ||
| for (Value res : a->getResults()) { | ||
| degree_a += std::distance(res.getUsers().begin(), | ||
| res.getUsers().end()); | ||
| } | ||
| for (Value res : b->getResults()) { | ||
| degree_b += std::distance(res.getUsers().begin(), | ||
| res.getUsers().end()); | ||
| } | ||
| if (degree_a != degree_b) | ||
| return degree_a > degree_b; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we also prioritize the ops on critical path? (even though they have lower degree?)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Operations on the critical path already receive the highest priority. In
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean if one bucket level has both critical operation (
This comment was marked as resolved.
Sorry, something went wrong.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean you implemented what I suggested, which leads to worse II?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the problem. The previous mapping was unstable, so the reduced ii was a coincidence. I am trying to solve the problem.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done~ |
||
|
|
||
| // Priority 3: Original index (stability tie-breaker). | ||
| return a_pair.second < b_pair.second; | ||
| }); | ||
|
|
||
| for (const auto &p : ops_with_index) { | ||
| result.emplace_back(p.first, level); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -628,7 +673,7 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op, MappingLoc src_loc, | |
| continue; | ||
| } | ||
|
|
||
| // Explores two routing options from current tile: | ||
| int next_time = current_state.current_time + 1; | ||
|
|
||
| // Option 1: Moves to adjacent tile through link. | ||
| for (Link *out_link : current_state.current_tile->getOutLinks()) { | ||
|
|
@@ -904,6 +949,10 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops, | |
| std::vector<MappingLoc> backward_users_locs; | ||
| for (Operation *user : backward_users) { | ||
| std::vector<MappingLoc> user_locs = mapping_state.getAllLocsOfOp(user); | ||
| if (user_locs.empty()) { | ||
| // llvm::errs() << "[Warning] No locations found for backward user " << *user << "\n"; | ||
| continue; | ||
| } | ||
| assert(!user_locs.empty() && "No locations found for backward user"); | ||
|
|
||
| MappingLoc backward_user_loc = user_locs.back(); | ||
|
|
@@ -972,7 +1021,39 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops, | |
| if (meet_producer_constraint && meet_backward_user_constraint) { | ||
| // Earlier time steps get higher scores. | ||
| int time_bonus = latest_end_time_step - t; | ||
| int total_award = tile_award + time_bonus; | ||
|
|
||
| // === Balanced Link congestion penalty === | ||
| // A conservative penalty to guide the mapper away from hotspots | ||
| // without being too restrictive for small IIs. | ||
| int total_in = tile->getInLinks().size(); | ||
| int total_out = tile->getOutLinks().size(); | ||
| int occupied_in = 0; | ||
| int occupied_out = 0; | ||
|
|
||
| for (auto *link : tile->getInLinks()) { | ||
| if (!mapping_state.isAvailableAcrossTime({link, t})) | ||
| occupied_in++; | ||
| } | ||
| for (auto *link : tile->getOutLinks()) { | ||
| if (!mapping_state.isAvailableAcrossTime({link, t})) | ||
| occupied_out++; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| } | ||
|
|
||
| float in_ratio = (total_in > 0) ? (float)occupied_in / total_in : 0; | ||
| float out_ratio = (total_out > 0) ? (float)occupied_out / total_out : 0; | ||
|
|
||
| // Adaptive penalty strategy: | ||
| // - Use very strong penalty (60) only for high fan-in ops (>= 3 producers) | ||
| // - Use weak penalty (15) for low fan-in ops | ||
| // This optimizes fuse-pattern (II=11 target) without breaking iter-merge | ||
| int base_penalty_coeff = (producers.size() >= 3) | ||
| ? STRONG_CONGESTION_PENALTY | ||
| : WEAK_CONGESTION_PENALTY; | ||
|
|
||
| int congestion_penalty = static_cast<int>(in_ratio * in_ratio * base_penalty_coeff) + | ||
| static_cast<int>(out_ratio * out_ratio * base_penalty_coeff); | ||
|
|
||
| int total_award = tile_award + time_bonus - congestion_penalty; | ||
| updateAward(locs_with_award, tile_loc_candidate, total_award); | ||
| } | ||
| } | ||
|
|
@@ -983,11 +1064,17 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops, | |
| std::vector<std::pair<MappingLoc, int>> locs_award_vec( | ||
| locs_with_award.begin(), locs_with_award.end()); | ||
|
|
||
| // Sorts by award (descending). | ||
| // Sorts by award (descending). Use stable sort/tie-breaker logic | ||
| // to minimize noise in mapping results. | ||
| std::sort( | ||
| locs_award_vec.begin(), locs_award_vec.end(), | ||
| [](const std::pair<MappingLoc, int> &a, | ||
| const std::pair<MappingLoc, int> &b) { return a.second > b.second; }); | ||
| const std::pair<MappingLoc, int> &b) { | ||
| if (a.second != b.second) | ||
| return a.second > b.second; | ||
| // Tie-breaker: earlier time step first. | ||
| return a.first.time_step < b.first.time_step; | ||
| }); | ||
| // TODO: Needs to handle tie case and prioritize lower resource utilization, | ||
| // however, compiled II becomes worse after adding this tie-breaker: | ||
| // https://github.com/coredac/dataflow/issues/59. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.