Push patch for letterbox masks post-processing

PawelPeczek-Roboflow · PawelPeczek-Roboflow · commit 1a77ca61773a · 2025-10-02T11:28:16.000+02:00
diff --git a/inference/core/version.py b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.57.4"
+__version__ = "0.58.0"
 
 
 if __name__ == "__main__":
diff --git a/inference/models/perception_encoder/vision_encoder/tokenizer.py b/inference/models/perception_encoder/vision_encoder/tokenizer.py
@@ -1,4 +1,4 @@
-""" CLIP tokenizer
+"""CLIP tokenizer
 
 Copied from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI.
 """
diff --git a/inference/models/rfdetr/__init__.py b/inference/models/rfdetr/__init__.py
@@ -1,3 +1,6 @@
-from inference.models.rfdetr.rfdetr import RFDETRObjectDetection
-from inference.models.rfdetr.rfdetr import RFDETRInstanceSegmentation
+from inference.models.rfdetr.rfdetr import (
+    RFDETRInstanceSegmentation,
+    RFDETRObjectDetection,
+)
+
 __all__ = ["RFDETRObjectDetection", "RFDETRInstanceSegmentation"]
diff --git a/inference/models/rfdetr/rfdetr.py b/inference/models/rfdetr/rfdetr.py
@@ -717,13 +717,21 @@ def make_response(
             masks = [pred[7] for pred in predictions[image_ind]]
             orig_h, orig_w = img_dims[image_ind]
             prediction_h, prediction_w = self.mask_shape[0], self.mask_shape[1]
+
             mask_preds = []
             for mask in masks:
                 points = mask2poly(mask.astype(np.uint8))
                 new_points = []
                 for point in points:
-                    new_x = point[0] * (orig_w / prediction_w)
-                    new_y = point[1] * (orig_h / prediction_h)
+                    if self.resize_method == "Stretch to":
+                        new_x = point[0] * (orig_w / prediction_w)
+                        new_y = point[1] * (orig_h / prediction_h)
+                    else:
+                        scale = max(orig_w / prediction_w, orig_h / prediction_h)
+                        pad_x = (orig_w - prediction_w * scale) / 2
+                        pad_y = (orig_h - prediction_h * scale) / 2
+                        new_x = point[0] * scale + pad_x
+                        new_y = point[1] * scale + pad_y
                     new_points.append(np.array([new_x, new_y]))
                 mask_preds.append(new_points)
             batch_mask_preds.append(mask_preds)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.57.4"`
	`1`	`+__version__ = "0.58.0"`
`2`	`2`
`3`	`3`
`4`	`4`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-""" CLIP tokenizer`
	`1`	`+"""CLIP tokenizer`
`2`	`2`
`3`	`3`	`Copied from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI.`
`4`	`4`	`"""`