11from __future__ import annotations
22
33import logging
4+ from typing import NamedTuple
45
56from packaging .version import InvalidVersion
67from packaging .version import parse as parse_version
1011 ComparisonResults ,
1112 DiffItem ,
1213 DiffType ,
14+ FileAnalysis ,
15+ FileInfo ,
1316 SizeAnalysisResults ,
1417 SizeMetricDiffItem ,
1518 TreemapElement ,
@@ -42,6 +45,11 @@ def compare_size_analysis(
4245
4346 diff_items = []
4447
48+ head_renamed_paths , base_renamed_paths = _find_renamed_paths (
49+ head_size_analysis_results .file_analysis ,
50+ base_size_analysis_results .file_analysis ,
51+ )
52+
4553 if not skip_diff_item_comparison :
4654 for path in sorted (all_paths ):
4755 head_elements = head_files .get (path , [])
@@ -77,6 +85,11 @@ def compare_size_analysis(
7785 # Process unmatched head elements (added)
7886 for head_element in unmatched_head :
7987 head_size = head_element .size
88+
89+ # Skip if this is a renamed file (same hash exists in base at different path)
90+ if path in head_renamed_paths :
91+ continue
92+
8093 if head_size == 0 :
8194 continue
8295
@@ -94,6 +107,11 @@ def compare_size_analysis(
94107 # Process unmatched base elements (removed)
95108 for base_element in unmatched_base :
96109 base_size = base_element .size
110+
111+ # Skip if this is a renamed file (same hash exists in head at different path)
112+ if path in base_renamed_paths :
113+ continue
114+
97115 if base_size == 0 :
98116 continue
99117
@@ -173,13 +191,20 @@ def _should_skip_diff_item_comparison(
173191 return has_mismatched_major or has_mismatched_minor
174192
175193
194+ class MatchedElements (NamedTuple ):
195+ """Result of matching treemap elements between head and base."""
196+
197+ matched_pairs : list [tuple [TreemapElement , TreemapElement ]]
198+ unmatched_head : list [TreemapElement ]
199+ unmatched_base : list [TreemapElement ]
200+
201+
176202def _match_elements (
177203 head_elements : list [TreemapElement ], base_elements : list [TreemapElement ]
178- ) -> tuple [ list [ tuple [ TreemapElement , TreemapElement ]], list [ TreemapElement ], list [ TreemapElement ]] :
204+ ) -> MatchedElements :
179205 """
180206 Intelligently match elements from head and base when there are duplicates.
181207 For example, in iOS processing multiple images can map to the same file name.
182- Returns: (matched_pairs, unmatched_head, unmatched_base)
183208
184209 Matching strategy:
185210 1. First, match by exact name and size
@@ -224,7 +249,7 @@ def _match_elements(
224249 elem for idx , elem in enumerate (base_elements ) if idx not in matched_base_indices
225250 ]
226251
227- return matched_pairs , unmatched_head , unmatched_base
252+ return MatchedElements ( matched_pairs , unmatched_head , unmatched_base )
228253
229254
230255def _flatten_leaf_nodes (
@@ -248,3 +273,66 @@ def _flatten_leaf_nodes(
248273 items [child_path ].extend (child_elements )
249274
250275 return items
276+
277+
278+ def _find_renamed_paths (
279+ head_file_analysis : FileAnalysis | None ,
280+ base_file_analysis : FileAnalysis | None ,
281+ ) -> tuple [set [str ], set [str ]]:
282+ """Find paths that are likely renames (same hash, different path).
283+
284+ When a file with the same hash exists at different paths in head vs base,
285+ we consider it a rename. However, if there are more paths on one side
286+ (e.g., file was renamed AND duplicated), we only mark min(head, base)
287+ as renames - the rest are true additions/removals.
288+ """
289+ head_hash_to_paths = _build_hash_to_paths (head_file_analysis )
290+ base_hash_to_paths = _build_hash_to_paths (base_file_analysis )
291+
292+ head_renamed_paths : set [str ] = set ()
293+ base_renamed_paths : set [str ] = set ()
294+
295+ for file_hash , head_paths in head_hash_to_paths .items ():
296+ base_paths = base_hash_to_paths .get (file_hash , set ())
297+ # Paths only in head (not in base) with the same hash as paths only in base
298+ head_only = head_paths - base_paths
299+ base_only = base_paths - head_paths
300+
301+ if head_only and base_only :
302+ # Only mark the minimum count as renames - the rest are real adds/removes
303+ # e.g., 1 base path + 3 head paths = 1 rename + 2 additions
304+ rename_count = min (len (head_only ), len (base_only ))
305+ head_renamed_paths .update (sorted (head_only )[:rename_count ])
306+ base_renamed_paths .update (sorted (base_only )[:rename_count ])
307+
308+ return head_renamed_paths , base_renamed_paths
309+
310+
311+ def _build_hash_to_paths (file_analysis : FileAnalysis | None ) -> dict [str , set [str ]]:
312+ if not file_analysis :
313+ return {}
314+
315+ hash_to_paths : dict [str , set [str ]] = {}
316+ for file_info in file_analysis .items :
317+ _collect_file_hashes (file_info , hash_to_paths )
318+ return hash_to_paths
319+
320+
321+ def _collect_file_hashes (
322+ file_info : FileInfo ,
323+ hash_to_paths : dict [str , set [str ]],
324+ parent_path : str = "" ,
325+ ) -> None :
326+ if parent_path and not file_info .path .startswith (f"{ parent_path } /" ):
327+ full_path = f"{ parent_path } /{ file_info .path } "
328+ else :
329+ full_path = file_info .path
330+
331+ if not file_info .children :
332+ if file_info .hash not in hash_to_paths :
333+ hash_to_paths [file_info .hash ] = set ()
334+ hash_to_paths [file_info .hash ].add (full_path )
335+ else :
336+ # Asset catalogs can have children
337+ for child in file_info .children :
338+ _collect_file_hashes (child , hash_to_paths , full_path )
0 commit comments