huggingface
diff --git a/‎data/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎data/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎data/config.py‎
Lines changed: 101 additions & 0 deletions b/‎data/config.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎data/constants.py‎
Lines changed: 7 additions & 0 deletions b/‎data/constants.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎data/loader.py‎
Lines changed: 1 addition & 3 deletions b/‎data/loader.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎data/random_erasing.py‎
Lines changed: 31 additions & 88 deletions b/‎data/random_erasing.py‎
Lines changed: 31 additions & 88 deletions
@@ -1,4 +1,5 @@
+from data.constants import *
+from data.config import resolve_data_config
 from data.dataset import Dataset
 from data.transforms import *
 from data.loader import create_loader
-from data.random_erasing import RandomErasingTorch, RandomErasingNumpy
 
@@ -0,0 +1,101 @@
+from data.constants import *
+
+
+def resolve_data_config(model, args, default_cfg={}, verbose=True):
+    new_config = {}
+    default_cfg = default_cfg
+    if not default_cfg and hasattr(model, 'default_cfg'):
+        default_cfg = model.default_cfg
+
+    # Resolve input/image size
+    # FIXME grayscale/chans arg to use different # channels?
+    in_chans = 3
+    input_size = (in_chans, 224, 224)
+    if args.img_size is not None:
+        # FIXME support passing img_size as tuple, non-square
+        assert isinstance(args.img_size, int)
+        input_size = (in_chans, args.img_size, args.img_size)
+    elif 'input_size' in default_cfg:
+        input_size = default_cfg['input_size']
+    new_config['input_size'] = input_size
+
+    # resolve interpolation method
+    new_config['interpolation'] = 'bilinear'
+    if args.interpolation:
+        new_config['interpolation'] = args.interpolation
+    elif 'interpolation' in default_cfg:
+        new_config['interpolation'] = default_cfg['interpolation']
+
+    # resolve dataset + model mean for normalization
+    new_config['mean'] = get_mean_by_model(args.model)
+    if args.mean is not None:
+        mean = tuple(args.mean)
+        if len(mean) == 1:
+            mean = tuple(list(mean) * in_chans)
+        else:
+            assert len(mean) == in_chans
+        new_config['mean'] = mean
+    elif 'mean' in default_cfg:
+        new_config['mean'] = default_cfg['mean']
+
+    # resolve dataset + model std deviation for normalization
+    new_config['std'] = get_std_by_model(args.model)
+    if args.std is not None:
+        std = tuple(args.std)
+        if len(std) == 1:
+            std = tuple(list(std) * in_chans)
+        else:
+            assert len(std) == in_chans
+        new_config['std'] = std
+    elif 'std' in default_cfg:
+        new_config['std'] = default_cfg['std']
+
+    # resolve default crop percentage
+    new_config['crop_pct'] = DEFAULT_CROP_PCT
+    if 'crop_pct' in default_cfg:
+        new_config['crop_pct'] = default_cfg['crop_pct']
+
+    if verbose:
+        print('Data processing configuration for current model + dataset:')
+        for n, v in new_config.items():
+            print('\t%s: %s' % (n, str(v)))
+
+    return new_config
+
+
+def get_mean_by_name(name):
+    if name == 'dpn':
+        return IMAGENET_DPN_MEAN
+    elif name == 'inception' or name == 'le':
+        return IMAGENET_INCEPTION_MEAN
+    else:
+        return IMAGENET_DEFAULT_MEAN
+
+
+def get_std_by_name(name):
+    if name == 'dpn':
+        return IMAGENET_DPN_STD
+    elif name == 'inception' or name == 'le':
+        return IMAGENET_INCEPTION_STD
+    else:
+        return IMAGENET_DEFAULT_STD
+
+
+def get_mean_by_model(model_name):
+    model_name = model_name.lower()
+    if 'dpn' in model_name:
+        return IMAGENET_DPN_STD
+    elif 'ception' in model_name or 'nasnet' in model_name:
+        return IMAGENET_INCEPTION_MEAN
+    else:
+        return IMAGENET_DEFAULT_MEAN
+
+
+def get_std_by_model(model_name):
+    model_name = model_name.lower()
+    if 'dpn' in model_name:
+        return IMAGENET_DEFAULT_STD
+    elif 'ception' in model_name or 'nasnet' in model_name:
+        return IMAGENET_INCEPTION_STD
+    else:
+        return IMAGENET_DEFAULT_STD
@@ -0,0 +1,7 @@
+DEFAULT_CROP_PCT = 0.875
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
+IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)
+IMAGENET_DPN_MEAN = (124 / 255, 117 / 255, 104 / 255)
+IMAGENET_DPN_STD = tuple([1 / (.0167 * 255)] * 3)
@@ -1,6 +1,4 @@
-import torch
 import torch.utils.data
-from data.random_erasing import RandomErasingTorch
 from data.transforms import *
 from data.distributed_sampler import OrderedDistributedSampler
 
@@ -27,7 +25,7 @@ def __init__(self,
         self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1)
         self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1)
         if rand_erase_prob > 0.:
-            self.random_erasing = RandomErasingTorch(
+            self.random_erasing = RandomErasing(
                 probability=rand_erase_prob, per_pixel=rand_erase_pp)
         else:
             self.random_erasing = None
 
@@ -2,125 +2,68 @@
 
 import random
 import math
-import numpy as np
 import torch
 
 
-class RandomErasingNumpy:
+def _get_patch(per_pixel, rand_color, patch_size, dtype=torch.float32, device='cuda'):
+    if per_pixel:
+        return torch.empty(
+            patch_size, dtype=dtype, device=device).normal_()
+    elif rand_color:
+        return torch.empty((patch_size[0], 1, 1), dtype=dtype, device=device).normal_()
+    else:
+        return torch.zeros((patch_size[0], 1, 1), dtype=dtype, device=device)
+
+
+class RandomErasing:
     """ Randomly selects a rectangle region in an image and erases its pixels.
         'Random Erasing Data Augmentation' by Zhong et al.
         See https://arxiv.org/pdf/1708.04896.pdf
 
-        This 'Numpy' variant of RandomErasing is intended to be applied on a per
-        image basis after transforming the image to uint8 numpy array in
-        range 0-255 prior to tensor conversion and normalization
+        This variant of RandomErasing is intended to be applied to either a batch
+        or single image tensor after it has been normalized by dataset mean and std.
     Args:
          probability: The probability that the Random Erasing operation will be performed.
          sl: Minimum proportion of erased area against input image.
          sh: Maximum proportion of erased area against input image.
-         r1: Minimum aspect ratio of erased area.
-         mean: Erasing value.
+         min_aspect: Minimum aspect ratio of erased area.
+         per_pixel: random value for each pixel in the erase region, precedence over rand_color
+         rand_color: random color for whole erase region, 0 if neither this or per_pixel set
     """
 
     def __init__(
             self,
             probability=0.5, sl=0.02, sh=1/3, min_aspect=0.3,
-            per_pixel=False, rand_color=False,
-            pl=0, ph=255, mean=[255 * 0.485, 255 * 0.456, 255 * 0.406],
-            out_type=np.uint8):
+            per_pixel=False, rand_color=False, device='cuda'):
         self.probability = probability
-        if not per_pixel and not rand_color:
-            self.mean = np.array(mean).round().astype(out_type)
-        else:
-            self.mean = None
         self.sl = sl
         self.sh = sh
         self.min_aspect = min_aspect
-        self.pl = pl
-        self.ph = ph
         self.per_pixel = per_pixel  # per pixel random, bounded by [pl, ph]
         self.rand_color = rand_color  # per block random, bounded by [pl, ph]
-        self.out_type = out_type
+        self.device = device
 
-    def __call__(self, img):
+    def _erase(self, img, chan, img_h, img_w, dtype):
         if random.random() > self.probability:
-            return img
-
-        chan, img_h, img_w = img.shape
+            return
         area = img_h * img_w
         for attempt in range(100):
             target_area = random.uniform(self.sl, self.sh) * area
             aspect_ratio = random.uniform(self.min_aspect, 1 / self.min_aspect)
-
             h = int(round(math.sqrt(target_area * aspect_ratio)))
             w = int(round(math.sqrt(target_area / aspect_ratio)))
-            if self.rand_color:
-                c = np.random.randint(self.pl, self.ph + 1, (chan,), self.out_type)
-            elif not self.per_pixel:
-                c = self.mean[:chan]
             if w < img_w and h < img_h:
                 top = random.randint(0, img_h - h)
                 left = random.randint(0, img_w - w)
-                if self.per_pixel:
-                    img[:, top:top + h, left:left + w] = np.random.randint(
-                        self.pl, self.ph + 1, (chan, h, w), self.out_type)
-                else:
-                    img[:, top:top + h, left:left + w] = c
-                return img
-
-        return img
-
-
-class RandomErasingTorch:
-    """ Randomly selects a rectangle region in an image and erases its pixels.
-        'Random Erasing Data Augmentation' by Zhong et al.
-        See https://arxiv.org/pdf/1708.04896.pdf
+                img[:, top:top + h, left:left + w] = _get_patch(
+                    self.per_pixel, self.rand_color, (chan, h, w), dtype=dtype, device=self.device)
+                break
 
-        This 'Torch' variant of RandomErasing is intended to be applied to a full batch
-        tensor after it has been normalized by dataset mean and std.
-    Args:
-         probability: The probability that the Random Erasing operation will be performed.
-         sl: Minimum proportion of erased area against input image.
-         sh: Maximum proportion of erased area against input image.
-         r1: Minimum aspect ratio of erased area.
-    """
-
-    def __init__(
-            self,
-            probability=0.5, sl=0.02, sh=1/3, min_aspect=0.3,
-            per_pixel=False, rand_color=False):
-        self.probability = probability
-        self.sl = sl
-        self.sh = sh
-        self.min_aspect = min_aspect
-        self.per_pixel = per_pixel  # per pixel random, bounded by [pl, ph]
-        self.rand_color = rand_color  # per block random, bounded by [pl, ph]
-
-    def __call__(self, batch):
-        batch_size, chan, img_h, img_w = batch.size()
-        area = img_h * img_w
-        for i in range(batch_size):
-            if random.random() > self.probability:
-                continue
-            img = batch[i]
-            for attempt in range(100):
-                target_area = random.uniform(self.sl, self.sh) * area
-                aspect_ratio = random.uniform(self.min_aspect, 1 / self.min_aspect)
-
-                h = int(round(math.sqrt(target_area * aspect_ratio)))
-                w = int(round(math.sqrt(target_area / aspect_ratio)))
-                if self.rand_color:
-                    c = torch.empty((chan, 1, 1), dtype=batch.dtype).normal_().cuda()
-                elif not self.per_pixel:
-                    c = torch.zeros((chan, 1, 1), dtype=batch.dtype).cuda()
-                if w < img_w and h < img_h:
-                    top = random.randint(0, img_h - h)
-                    left = random.randint(0, img_w - w)
-                    if self.per_pixel:
-                        img[:, top:top + h, left:left + w] = torch.empty(
-                            (chan, h, w), dtype=batch.dtype).normal_().cuda()
-                    else:
-                        img[:, top:top + h, left:left + w] = c
-                    break
-
-        return batch
+    def __call__(self, input):
+        if len(input.size()) == 3:
+            self._erase(input, *input.size(), input.dtype)
+        else:
+            batch_size, chan, img_h, img_w = input.size()
+            for i in range(batch_size):
+                self._erase(input[i], chan, img_h, img_w, input.dtype)
+        return input