diff --git a/examples/time_in_zone/scripts/draw_zones.py b/examples/time_in_zone/scripts/draw_zones.py index 74508ccd1..37d5149b8 100644 --- a/examples/time_in_zone/scripts/draw_zones.py +++ b/examples/time_in_zone/scripts/draw_zones.py @@ -30,11 +30,46 @@ def resolve_source(source_path: str) -> np.ndarray | None: image = cv2.imread(source_path) if image is not None: - return image + return resize_to_fit_screen(image) frame_generator = sv.get_video_frames_generator(source_path=source_path) frame = next(frame_generator) - return frame + return resize_to_fit_screen(frame) + + +def resize_to_fit_screen( + image: np.ndarray, max_width: int = 1200, max_height: int = 800 +) -> np.ndarray: + """ + Resize image to fit screen while maintaining aspect ratio. + + Args: + image: Input image + max_width: Maximum width for display + max_height: Maximum height for display + + Returns: + Resized image + """ + height, width = image.shape[:2] + + # Calculate scaling factor + scale_w = max_width / width + scale_h = max_height / height + scale = min(scale_w, scale_h, 1.0) # Don't upscale if image is smaller + + if scale < 1.0: + new_width = int(width * scale) + new_height = int(height * scale) + resized = cv2.resize( + image, (new_width, new_height), interpolation=cv2.INTER_AREA + ) + print( + f"Video resolution resized from {width}x{height} -> {new_width}x{new_height}" + ) + return resized + + return image def mouse_event(event: int, x: int, y: int, flags: int, param: Any) -> None: @@ -119,8 +154,46 @@ def redraw_polygons(image: np.ndarray) -> None: ) -def save_polygons_to_json(polygons, target_path): +def convert_coordinates_to_original(polygons, original_size, display_size): + """ + Convert coordinates from display size back to original video size. + + Args: + polygons: List of polygons with display coordinates + original_size: (width, height) of original video + display_size: (width, height) of display window + + Returns: + List of polygons with original coordinates + """ + orig_w, orig_h = original_size + disp_w, disp_h = display_size + + scale_x = orig_w / disp_w + scale_y = orig_h / disp_h + + converted_polygons = [] + for polygon in polygons: + if polygon: # Skip empty polygons + converted_polygon = [] + for x, y in polygon: + orig_x = int(x * scale_x) + orig_y = int(y * scale_y) + converted_polygon.append([orig_x, orig_y]) + converted_polygons.append(converted_polygon) + + return converted_polygons + + +def save_polygons_to_json(polygons, target_path, original_size=None, display_size=None): data_to_save = polygons if polygons[-1] else polygons[:-1] + + # Convert coordinates back to original size if needed + if original_size and display_size: + data_to_save = convert_coordinates_to_original( + data_to_save, original_size, display_size + ) + with open(target_path, "w") as f: json.dump(data_to_save, f) @@ -132,6 +205,18 @@ def main(source_path: str, zone_configuration_path: str) -> None: print("Failed to load source image.") return + # Get original video dimensions + cap = cv2.VideoCapture(source_path) + original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + + # Get display dimensions + display_height, display_width = original_image.shape[:2] + + print(f"Original video size: {original_width}x{original_height}") + print(f"Display size: {display_width}x{display_height}") + image = original_image.copy() cv2.imshow(WINDOW_NAME, image) cv2.setMouseCallback(WINDOW_NAME, mouse_event, image) @@ -144,8 +229,14 @@ def main(source_path: str, zone_configuration_path: str) -> None: POLYGONS[-1] = [] current_mouse_position = None elif key == KEY_SAVE: - save_polygons_to_json(POLYGONS, zone_configuration_path) + save_polygons_to_json( + POLYGONS, + zone_configuration_path, + (original_width, original_height), + (display_width, display_height), + ) print(f"Polygons saved to {zone_configuration_path}") + print("Coordinates converted to original video size.") break redraw(image, original_image) if key == KEY_QUIT: diff --git a/examples/time_in_zone/ultralytics_file_example.py b/examples/time_in_zone/ultralytics_file_example.py index 1b83108df..7ee450928 100644 --- a/examples/time_in_zone/ultralytics_file_example.py +++ b/examples/time_in_zone/ultralytics_file_example.py @@ -23,6 +23,7 @@ def main( confidence: float, iou: float, classes: list[int], + output_video_path: str = None, ) -> None: model = YOLO(weights) tracker = sv.ByteTrack(minimum_matching_threshold=0.5) @@ -39,6 +40,32 @@ def main( ] timers = [FPSBasedTimer(video_info.fps) for _ in zones] + # Video writer setup + video_writer = None + if output_video_path: + # Use Twitter-compatible codec - try H264 first, then XVID + try: + fourcc = cv2.VideoWriter_fourcc(*"H264") + video_writer = cv2.VideoWriter( + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height), + ) + print(f"Video output being saved: {output_video_path}") + print("Using Twitter-compatible H.264 codec") + except: + # H264 desteklenmiyorsa XVID kullan + fourcc = cv2.VideoWriter_fourcc(*"XVID") + video_writer = cv2.VideoWriter( + output_video_path, + fourcc, + video_info.fps, + (video_info.width, video_info.height), + ) + print(f"Video output being saved: {output_video_path}") + print("Using XVID codec (convert with FFmpeg for Twitter)") + for frame in frames_generator: results = model(frame, verbose=False, device=device, conf=confidence)[0] detections = sv.Detections.from_ultralytics(results) @@ -74,8 +101,19 @@ def main( ) cv2.imshow("Processed Video", annotated_frame) + + # Save frame to output video if writer is available + if video_writer is not None: + video_writer.write(annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): break + + # Cleanup + if video_writer is not None: + video_writer.release() + print(f"Video successfully saved: {output_video_path}") + cv2.destroyAllWindows() @@ -126,6 +164,12 @@ def main( default=[], help="List of class IDs to track. If empty, all classes are tracked.", ) + parser.add_argument( + "--output_video_path", + type=str, + default=None, + help="Path to save the output video. If not provided, video will only be displayed.", + ) args = parser.parse_args() main( @@ -136,4 +180,5 @@ def main( confidence=args.confidence_threshold, iou=args.iou_threshold, classes=args.classes, + output_video_path=args.output_video_path, )