Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 160 additions & 4 deletions src/stream_decrypt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

use crate::{
decrypt::decrypt_chunk, get_root_data_map_parallel, utils::extract_hashes,
utils::get_chunk_index, ChunkInfo, DataMap, Result,
ChunkInfo, DataMap, Result,
};
use bytes::Bytes;
use std::ops::Range;
Expand Down Expand Up @@ -179,9 +179,10 @@ where
return Ok(Bytes::new());
}

// Calculate which chunks we need
let start_chunk = get_chunk_index(file_size, start);
let end_chunk = get_chunk_index(file_size, end_pos.saturating_sub(1));
// Calculate which chunks we need using actual chunk sizes from data map
// This avoids issues with different MAX_CHUNK_SIZE schemes
let start_chunk = self.get_chunk_index_from_infos(start);
let end_chunk = self.get_chunk_index_from_infos(end_pos.saturating_sub(1));

// Collect the chunk hashes we need
let mut required_hashes = Vec::new();
Expand Down Expand Up @@ -235,6 +236,34 @@ where
.filter(|info| info.index < chunk_index)
.fold(0, |acc, chunk| acc + chunk.src_size)
}

/// Calculate chunk index from position based on actual chunk sizes in chunk_infos.
/// This avoids issues when the input datamap was generated using different MAX_CHUNK_SIZE schemes.
///
/// # Arguments
/// * `position` - Byte position within the file
///
/// # Returns
/// * `usize` - The chunk index that contains the given position
fn get_chunk_index_from_infos(&self, position: usize) -> usize {
let mut accumulated_size = 0;

for chunk_info in &self.chunk_infos {
// Check if position falls within this chunk
if position >= accumulated_size && position < accumulated_size + chunk_info.src_size {
return chunk_info.index;
}
accumulated_size += chunk_info.src_size;
}

// If position is beyond all chunks, return the last chunk index
// This handles the case where position == file_size
if let Some(last_chunk) = self.chunk_infos.last() {
last_chunk.index
} else {
0 // Fallback for empty chunk_infos (shouldn't happen in practice)
}
}
}

impl<F> Iterator for DecryptionStream<F>
Expand Down Expand Up @@ -925,4 +954,131 @@ mod tests {

Ok(())
}

#[test]
fn test_chunk_boundary_underflow_reproduction() -> Result<()> {
// This test reproduces the exact scenario reported:
// file_size: 16405289714, start_position: 4194304u64
// We expect start_chunk_pos <= start_position, and if smaller,
// the difference should be less than 1MB (1024*1024)

let file_size = 16404310194u64 as usize; // ~15.27 GB
let start_position = 4194304u64 as usize; // 1MB before end

// Simulate different MAX_CHUNK_SIZE cheme
let max_chunk_size = crate::MAX_CHUNK_SIZE * 2;

println!("Testing with file_size: {}, start_position: {}", file_size, start_position);

// First, create the mock data map to use with get_chunk_index_from_infos
// We need this to avoid dependency on MAX_CHUNK_SIZE utility functions

// Create a mock data map that simulates how chunks would be distributed
// for a file of this size. We need to generate chunk infos with realistic
// src_size values that match how the encryption algorithm would chunk the data.

let num_chunks = crate::utils::get_num_chunks_with_variable_max(file_size, max_chunk_size);
println!("Total number of chunks: {}", num_chunks);

let mut chunk_infos = Vec::new();
let mut accumulated_size = 0;

// Generate chunk infos with sizes that match the actual chunking algorithm
for chunk_index in 0..num_chunks {
let chunk_size = crate::utils::get_chunk_size_with_variable_max(file_size, chunk_index, max_chunk_size);

// Create a ChunkInfo with dummy hashes (as the test notes, only src_size and index matter)
let chunk_info = ChunkInfo {
index: chunk_index,
dst_hash: XorName::from_content(&[chunk_index as u8]), // Dummy hash
src_hash: XorName::from_content(&[(chunk_index + 1) as u8]), // Dummy hash
src_size: chunk_size,
};

chunk_infos.push(chunk_info);
accumulated_size += chunk_size;
}

// Verify the total size matches
assert_eq!(accumulated_size, file_size, "Mock data map total size should match file size");

// Create a mock DecryptionStream to test get_chunk_start_position
let data_map = DataMap::new(chunk_infos);

// Create a dummy get_chunk_parallel function (won't be used in this test)
let get_chunk_parallel = |_hashes: &[(usize, XorName)]| -> Result<Vec<(usize, Bytes)>> {
Ok(Vec::new())
};

// Create a mock DecryptionStream
let mock_stream = DecryptionStream {
chunk_infos: data_map.infos(),
src_hashes: vec![XorName::from_content(&[0u8]); num_chunks], // Dummy hashes
get_chunk_parallel,
current_batch_start: 0,
current_batch_chunks: Vec::new(),
current_batch_index: 0,
};

// Use the new get_chunk_index_from_infos method instead of the utility function
let start_chunk_index = mock_stream.get_chunk_index_from_infos(start_position);
println!("Calculated start_chunk_index using get_chunk_index_from_infos: {}", start_chunk_index);

// Test get_chunk_start_position
let start_chunk_pos = mock_stream.get_chunk_start_position(start_chunk_index);

println!("start_chunk_pos: {}", start_chunk_pos);
println!("start_position: {}", start_position);

// Verify our expectations
if start_chunk_pos <= start_position {
println!("✓ start_chunk_pos <= start_position (as expected)");

if start_chunk_pos < start_position {
let diff = start_position - start_chunk_pos;
println!("Difference: {}", diff);

// The difference should be less than 1MB (1024*1024 = 1048576)
assert!(
diff < 1024 * 1024,
"Difference {} should be less than 1MB (1048576), but got {}",
diff,
diff
);
println!("✓ Difference {} is less than 1MB", diff);
} else {
println!("start_chunk_pos exactly equals start_position");
}
} else {
// This is the problematic case that causes underflow
let would_underflow = start_chunk_pos - start_position;
panic!(
"❌ start_chunk_pos ({}) > start_position ({}) by {}, this would cause underflow!",
start_chunk_pos, start_position, would_underflow
);
}

// Additional verification: calculate what the internal_offset would be
let internal_offset = start_position - start_chunk_pos;
println!("Calculated internal_offset: {}", internal_offset);

// Verify this is reasonable (should be less than chunk size)
// Get chunk size from the actual data map instead of utility function
let chunk_size = mock_stream.chunk_infos
.iter()
.find(|info| info.index == start_chunk_index)
.map(|info| info.src_size)
.unwrap_or(0);
println!("Chunk {} size: {}", start_chunk_index, chunk_size);

assert!(
internal_offset < chunk_size,
"internal_offset {} should be less than chunk size {}",
internal_offset,
chunk_size
);

println!("✓ Test passed: No underflow condition detected");
Ok(())
}
}
36 changes: 25 additions & 11 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,49 +59,63 @@ pub(crate) fn get_pki(

// Returns the number of chunks according to file size.
pub(crate) fn get_num_chunks(file_size: usize) -> usize {
get_num_chunks_with_variable_max(file_size, crate::MAX_CHUNK_SIZE)
}

// Returns the number of chunks according to file size.
pub(crate) fn get_num_chunks_with_variable_max(file_size: usize, max_chunk_size: usize) -> usize {
if file_size < (3 * crate::MIN_CHUNK_SIZE) {
return 0;
}
if file_size < (3 * crate::MAX_CHUNK_SIZE) {
if file_size < (3 * max_chunk_size) {
return 3;
}
if file_size % crate::MAX_CHUNK_SIZE == 0 {
file_size / crate::MAX_CHUNK_SIZE
if file_size % max_chunk_size == 0 {
file_size / max_chunk_size
} else {
(file_size / crate::MAX_CHUNK_SIZE) + 1
(file_size / max_chunk_size) + 1
}
}

// Returns the size of a chunk according to file size.
pub(crate) fn get_chunk_size(file_size: usize, chunk_index: usize) -> usize {
get_chunk_size_with_variable_max(file_size, chunk_index, crate::MAX_CHUNK_SIZE)
}

// Returns the size of a chunk according to file size.
pub(crate) fn get_chunk_size_with_variable_max(
file_size: usize,
chunk_index: usize,
max_chunk_size: usize,
) -> usize {
if file_size < 3 * crate::MIN_CHUNK_SIZE {
return 0;
}
if file_size < 3 * crate::MAX_CHUNK_SIZE {
if file_size < 3 * max_chunk_size {
if chunk_index < 2 {
return file_size / 3;
} else {
// When the file_size % 3 > 0, the third (last) chunk includes the remainder
return file_size - (2 * (file_size / 3));
}
}
let total_chunks = get_num_chunks(file_size);
let total_chunks = get_num_chunks_with_variable_max(file_size, max_chunk_size);
if chunk_index < total_chunks - 2 {
return crate::MAX_CHUNK_SIZE;
return max_chunk_size;
}
let remainder = file_size % crate::MAX_CHUNK_SIZE;
let remainder = file_size % max_chunk_size;
let penultimate = (total_chunks - 2) == chunk_index;
if remainder == 0 {
return crate::MAX_CHUNK_SIZE;
return max_chunk_size;
}
if remainder < crate::MIN_CHUNK_SIZE {
if penultimate {
crate::MAX_CHUNK_SIZE - crate::MIN_CHUNK_SIZE
max_chunk_size - crate::MIN_CHUNK_SIZE
} else {
crate::MIN_CHUNK_SIZE + remainder
}
} else if penultimate {
crate::MAX_CHUNK_SIZE
max_chunk_size
} else {
remainder
}
Expand Down
Loading