diff --git a/src/scope/core/pipelines/process.py b/src/scope/core/pipelines/process.py index 8704c2626..2f8e15231 100644 --- a/src/scope/core/pipelines/process.py +++ b/src/scope/core/pipelines/process.py @@ -16,8 +16,8 @@ def preprocess_chunk( frames = [] for frame in chunk: - # Move to pipeline device - frame = frame.to(device=device, dtype=dtype) + # Move to pipeline device first (likely as uint8), then convert dtype on device + frame = frame.to(device=device).to(dtype=dtype) frame = rearrange(frame, "T H W C -> T C H W") _, _, H, W = frame.shape diff --git a/src/scope/server/frame_processor.py b/src/scope/server/frame_processor.py index b6e18bbb6..ca58d0c24 100644 --- a/src/scope/server/frame_processor.py +++ b/src/scope/server/frame_processor.py @@ -836,15 +836,14 @@ def prepare_chunk(self, chunk_size: int) -> list[torch.Tensor]: for _ in range(last_idx + 1): self.frame_buffer.popleft() - # Convert VideoFrames to tensors + # Convert VideoFrames to tensors (keep as uint8, GPU will handle dtype conversion) tensor_frames = [] for video_frame in video_frames: - # Convert VideoFrame into (1, H, W, C) tensor on cpu + # Convert VideoFrame into (1, H, W, C) uint8 tensor on cpu # The T=1 dimension is expected by preprocess_chunk which rearranges T H W C -> T C H W - tensor = ( - torch.from_numpy(video_frame.to_ndarray(format="rgb24")) - .float() - .unsqueeze(0) + # Note: We keep uint8 here and let pipeline preprocess chunk to target dtype on GPU + tensor = torch.from_numpy(video_frame.to_ndarray(format="rgb24")).unsqueeze( + 0 ) tensor_frames.append(tensor)