maidsafe · maqi · Sep 9, 2025 · Sep 8, 2025
diff --git a/src/stream_decrypt.rs b/src/stream_decrypt.rs
@@ -10,7 +10,7 @@
 
 use crate::{
     decrypt::decrypt_chunk, get_root_data_map_parallel, utils::extract_hashes,
-    utils::get_chunk_index, ChunkInfo, DataMap, Result,
+    ChunkInfo, DataMap, Result,
 };
 use bytes::Bytes;
 use std::ops::Range;
@@ -179,9 +179,10 @@ where
             return Ok(Bytes::new());
         }
 
-        // Calculate which chunks we need
-        let start_chunk = get_chunk_index(file_size, start);
-        let end_chunk = get_chunk_index(file_size, end_pos.saturating_sub(1));
+        // Calculate which chunks we need using actual chunk sizes from data map
+        // This avoids issues with different MAX_CHUNK_SIZE schemes
+        let start_chunk = self.get_chunk_index_from_infos(start);
+        let end_chunk = self.get_chunk_index_from_infos(end_pos.saturating_sub(1));
 
         // Collect the chunk hashes we need
         let mut required_hashes = Vec::new();
@@ -235,6 +236,34 @@ where
             .filter(|info| info.index < chunk_index)
             .fold(0, |acc, chunk| acc + chunk.src_size)
     }
+
+    /// Calculate chunk index from position based on actual chunk sizes in chunk_infos.
+    /// This avoids issues when the input datamap was generated using different MAX_CHUNK_SIZE schemes.
+    /// 
+    /// # Arguments
+    /// * `position` - Byte position within the file
+    /// 
+    /// # Returns
+    /// * `usize` - The chunk index that contains the given position
+    fn get_chunk_index_from_infos(&self, position: usize) -> usize {
+        let mut accumulated_size = 0;
+
+        for chunk_info in &self.chunk_infos {
+            // Check if position falls within this chunk
+            if position >= accumulated_size && position < accumulated_size + chunk_info.src_size {
+                return chunk_info.index;
+            }
+            accumulated_size += chunk_info.src_size;
+        }
+
+        // If position is beyond all chunks, return the last chunk index
+        // This handles the case where position == file_size
+        if let Some(last_chunk) = self.chunk_infos.last() {
+            last_chunk.index
+        } else {
+            0 // Fallback for empty chunk_infos (shouldn't happen in practice)
+        }
+    }
 }
 
 impl<F> Iterator for DecryptionStream<F>
@@ -925,4 +954,131 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_chunk_boundary_underflow_reproduction() -> Result<()> {
+        // This test reproduces the exact scenario reported:
+        // file_size: 16405289714, start_position: 4194304u64
+        // We expect start_chunk_pos <= start_position, and if smaller,
+        // the difference should be less than 1MB (1024*1024)
+
+        let file_size = 16404310194u64 as usize; // ~15.27 GB
+        let start_position = 4194304u64 as usize; // 1MB before end
+
+        // Simulate different MAX_CHUNK_SIZE cheme
+        let max_chunk_size = crate::MAX_CHUNK_SIZE * 2;
+
+        println!("Testing with file_size: {}, start_position: {}", file_size, start_position);
+
+        // First, create the mock data map to use with get_chunk_index_from_infos
+        // We need this to avoid dependency on MAX_CHUNK_SIZE utility functions
+
+        // Create a mock data map that simulates how chunks would be distributed
+        // for a file of this size. We need to generate chunk infos with realistic
+        // src_size values that match how the encryption algorithm would chunk the data.
+
+        let num_chunks = crate::utils::get_num_chunks_with_variable_max(file_size, max_chunk_size);
+        println!("Total number of chunks: {}", num_chunks);
+
+        let mut chunk_infos = Vec::new();
+        let mut accumulated_size = 0;
+
+        // Generate chunk infos with sizes that match the actual chunking algorithm
+        for chunk_index in 0..num_chunks {
+            let chunk_size = crate::utils::get_chunk_size_with_variable_max(file_size, chunk_index, max_chunk_size);
+
+            // Create a ChunkInfo with dummy hashes (as the test notes, only src_size and index matter)
+            let chunk_info = ChunkInfo {
+                index: chunk_index,
+                dst_hash: XorName::from_content(&[chunk_index as u8]), // Dummy hash
+                src_hash: XorName::from_content(&[(chunk_index + 1) as u8]), // Dummy hash
+                src_size: chunk_size,
+            };
+
+            chunk_infos.push(chunk_info);
+            accumulated_size += chunk_size;
+        }
+
+        // Verify the total size matches
+        assert_eq!(accumulated_size, file_size, "Mock data map total size should match file size");
+
+        // Create a mock DecryptionStream to test get_chunk_start_position
+        let data_map = DataMap::new(chunk_infos);
+
+        // Create a dummy get_chunk_parallel function (won't be used in this test)
+        let get_chunk_parallel = |_hashes: &[(usize, XorName)]| -> Result<Vec<(usize, Bytes)>> {
+            Ok(Vec::new())
+        };
+
+        // Create a mock DecryptionStream
+        let mock_stream = DecryptionStream {
+            chunk_infos: data_map.infos(),
+            src_hashes: vec![XorName::from_content(&[0u8]); num_chunks], // Dummy hashes
+            get_chunk_parallel,
+            current_batch_start: 0,
+            current_batch_chunks: Vec::new(),
+            current_batch_index: 0,
+        };
+
+        // Use the new get_chunk_index_from_infos method instead of the utility function
+        let start_chunk_index = mock_stream.get_chunk_index_from_infos(start_position);
+        println!("Calculated start_chunk_index using get_chunk_index_from_infos: {}", start_chunk_index);
+
+        // Test get_chunk_start_position
+        let start_chunk_pos = mock_stream.get_chunk_start_position(start_chunk_index);
+
+        println!("start_chunk_pos: {}", start_chunk_pos);
+        println!("start_position: {}", start_position);
+
+        // Verify our expectations
+        if start_chunk_pos <= start_position {
+            println!("✓ start_chunk_pos <= start_position (as expected)");
+
+            if start_chunk_pos < start_position {
+                let diff = start_position - start_chunk_pos;
+                println!("Difference: {}", diff);
+
+                // The difference should be less than 1MB (1024*1024 = 1048576)
+                assert!(
+                    diff < 1024 * 1024,
+                    "Difference {} should be less than 1MB (1048576), but got {}",
+                    diff,
+                    diff
+                );
+                println!("✓ Difference {} is less than 1MB", diff);
+            } else {
+                println!("start_chunk_pos exactly equals start_position");
+            }
+        } else {
+            // This is the problematic case that causes underflow
+            let would_underflow = start_chunk_pos - start_position;
+            panic!(
+                "❌ start_chunk_pos ({}) > start_position ({}) by {}, this would cause underflow!",
+                start_chunk_pos, start_position, would_underflow
+            );
+        }
+
+        // Additional verification: calculate what the internal_offset would be
+        let internal_offset = start_position - start_chunk_pos;
+        println!("Calculated internal_offset: {}", internal_offset);
+
+        // Verify this is reasonable (should be less than chunk size)
+        // Get chunk size from the actual data map instead of utility function
+        let chunk_size = mock_stream.chunk_infos
+            .iter()
+            .find(|info| info.index == start_chunk_index)
+            .map(|info| info.src_size)
+            .unwrap_or(0);
+        println!("Chunk {} size: {}", start_chunk_index, chunk_size);
+
+        assert!(
+            internal_offset < chunk_size,
+            "internal_offset {} should be less than chunk size {}",
+            internal_offset,
+            chunk_size
+        );
+
+        println!("✓ Test passed: No underflow condition detected");
+        Ok(())
+    }
 }
diff --git a/src/utils.rs b/src/utils.rs
@@ -59,49 +59,63 @@ pub(crate) fn get_pki(
 
 // Returns the number of chunks according to file size.
 pub(crate) fn get_num_chunks(file_size: usize) -> usize {
+    get_num_chunks_with_variable_max(file_size, crate::MAX_CHUNK_SIZE)
+}
+
+// Returns the number of chunks according to file size.
+pub(crate) fn get_num_chunks_with_variable_max(file_size: usize, max_chunk_size: usize) -> usize {
     if file_size < (3 * crate::MIN_CHUNK_SIZE) {
         return 0;
     }
-    if file_size < (3 * crate::MAX_CHUNK_SIZE) {
+    if file_size < (3 * max_chunk_size) {
         return 3;
     }
-    if file_size % crate::MAX_CHUNK_SIZE == 0 {
-        file_size / crate::MAX_CHUNK_SIZE
+    if file_size % max_chunk_size == 0 {
+        file_size / max_chunk_size
     } else {
-        (file_size / crate::MAX_CHUNK_SIZE) + 1
+        (file_size / max_chunk_size) + 1
     }
 }
 
 // Returns the size of a chunk according to file size.
 pub(crate) fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize {
+    get_chunk_size_with_variable_max(file_size, chunk_index, crate::MAX_CHUNK_SIZE)
+}
+
+// Returns the size of a chunk according to file size.
+pub(crate) fn get_chunk_size_with_variable_max(
+    file_size: usize,
+    chunk_index: usize,
+    max_chunk_size: usize,
+) -> usize {
     if file_size < 3 * crate::MIN_CHUNK_SIZE {
         return 0;
     }
-    if file_size < 3 * crate::MAX_CHUNK_SIZE {
+    if file_size < 3 * max_chunk_size {
         if chunk_index < 2 {
             return file_size / 3;
         } else {
             // When the file_size % 3 > 0, the third (last) chunk includes the remainder
             return file_size - (2 * (file_size / 3));
         }
     }
-    let total_chunks = get_num_chunks(file_size);
+    let total_chunks = get_num_chunks_with_variable_max(file_size, max_chunk_size);
     if chunk_index < total_chunks - 2 {
-        return crate::MAX_CHUNK_SIZE;
+        return max_chunk_size;
     }
-    let remainder = file_size % crate::MAX_CHUNK_SIZE;
+    let remainder = file_size % max_chunk_size;
     let penultimate = (total_chunks - 2) == chunk_index;
     if remainder == 0 {
-        return crate::MAX_CHUNK_SIZE;
+        return max_chunk_size;
     }
     if remainder < crate::MIN_CHUNK_SIZE {
         if penultimate {
-            crate::MAX_CHUNK_SIZE - crate::MIN_CHUNK_SIZE
+            max_chunk_size - crate::MIN_CHUNK_SIZE
         } else {
             crate::MIN_CHUNK_SIZE + remainder
         }
     } else if penultimate {
-        crate::MAX_CHUNK_SIZE
+        max_chunk_size
     } else {
         remainder
     }