pytorch-tabular
diff --git a/‎examples/regression_with_MDN.ipynb‎
Lines changed: 108484 additions & 0 deletions b/‎examples/regression_with_MDN.ipynb‎
Lines changed: 108484 additions & 0 deletions
diff --git a/‎examples/to_test_regression.py‎
Lines changed: 9 additions & 9 deletions b/‎examples/to_test_regression.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎pytorch_tabular/models/base_model.py‎
Lines changed: 55 additions & 14 deletions b/‎pytorch_tabular/models/base_model.py‎
Lines changed: 55 additions & 14 deletions
diff --git a/‎pytorch_tabular/models/mixture_density/config.py‎
Lines changed: 66 additions & 5 deletions b/‎pytorch_tabular/models/mixture_density/config.py‎
Lines changed: 66 additions & 5 deletions
@@ -54,14 +54,14 @@
     normalize_continuous_features=True,
 )
 
-mdn_config = MixtureDensityHeadConfig(num_gaussian=2)
-model_config = NODEMDNConfig(
-    task="regression",
-    # initialization="blah",
-    mdn_config = mdn_config
-)
-# model_config.validate()
-# model_config = NodeConfig(task="regression", depth=2, embed_categorical=False)
+# mdn_config = MixtureDensityHeadConfig(num_gaussian=2)
+# model_config = NODEMDNConfig(
+#     task="regression",
+#     # initialization="blah",
+#     mdn_config = mdn_config
+# )
+# # model_config.validate()
+model_config = NodeConfig(task="regression", depth=2, embed_categorical=False)
 trainer_config = TrainerConfig(checkpoints=None, max_epochs=5, gpus=1, profiler=None)
 # experiment_config = ExperimentConfig(
 #     project_name="DeepGMM_test",
@@ -84,6 +84,6 @@
 result = tabular_model.evaluate(test)
 # print(result)
 # # print(result[0]['train_loss'])
-pred_df = tabular_model.predict(test, quantiles=[0.25])
+pred_df = tabular_model.predict(test, quantiles=[0.25], ret_logits=True)
 print(pred_df.head())
 # pred_df.to_csv("output/temp2.csv")
@@ -10,8 +10,11 @@
 import torch
 import torch.nn as nn
 from omegaconf import DictConfig
+
 try:
     import wandb
+    import plotly.graph_objects as go
+
     WANDB_INSTALLED = True
 except ImportError:
     WANDB_INSTALLED = False
@@ -101,14 +104,21 @@ def calculate_loss(self, y, y_hat, tag):
 
     def calculate_metrics(self, y, y_hat, tag):
         metrics = []
-        for metric, metric_str, metric_params in zip(self.metrics, self.hparams.metrics, self.hparams.metrics_params):
+        for metric, metric_str, metric_params in zip(
+            self.metrics, self.hparams.metrics, self.hparams.metrics_params
+        ):
             if (self.hparams.task == "regression") and (self.hparams.output_dim > 1):
                 _metrics = []
                 for i in range(self.hparams.output_dim):
-                    if metric.__name__==pl.metrics.functional.mean_squared_log_error.__name__:
+                    if (
+                        metric.__name__
+                        == pl.metrics.functional.mean_squared_log_error.__name__
+                    ):
                         # MSLE should only be used in strictly positive targets. It is undefined otherwise
                         _metric = metric(
-                            torch.clamp(y_hat[:, i], min=0), torch.clamp(y[:, i], min=0), **metric_params
+                            torch.clamp(y_hat[:, i], min=0),
+                            torch.clamp(y[:, i], min=0),
+                            **metric_params,
                         )
                     else:
                         _metric = metric(y_hat[:, i], y[:, i], **metric_params)
@@ -139,33 +149,37 @@ def calculate_metrics(self, y, y_hat, tag):
     def forward(self, x: Dict):
         pass
 
-    def predict(self, x: Dict):
-        return self.forward(x).get("logits")
+    def predict(self, x: Dict, ret_model_output: bool = False):
+        ret_value = self.forward(x)
+        if ret_model_output:
+            return ret_value.get("logits"), ret_value
+        else:
+            return ret_value.get("logits")
 
     def training_step(self, batch, batch_idx):
         y = batch["target"]
-        y_hat = self(batch)['logits']
+        y_hat = self(batch)["logits"]
         loss = self.calculate_loss(y, y_hat, tag="train")
         _ = self.calculate_metrics(y, y_hat, tag="train")
         return loss
 
     def validation_step(self, batch, batch_idx):
         y = batch["target"]
-        y_hat = self(batch)['logits']
+        y_hat = self(batch)["logits"]
         _ = self.calculate_loss(y, y_hat, tag="valid")
         _ = self.calculate_metrics(y, y_hat, tag="valid")
         return y_hat, y
 
     def test_step(self, batch, batch_idx):
         y = batch["target"]
-        y_hat = self(batch)['logits']
+        y_hat = self(batch)["logits"]
         _ = self.calculate_loss(y, y_hat, tag="test")
         _ = self.calculate_metrics(y, y_hat, tag="test")
         return y_hat, y
 
     def configure_optimizers(self):
         if self.custom_optimizer is None:
-            #Loading from the config
+            # Loading from the config
             try:
                 self._optimizer = getattr(torch.optim, self.hparams.optimizer)
                 opt = self._optimizer(
@@ -179,7 +193,7 @@ def configure_optimizers(self):
                 )
                 raise e
         else:
-            #Loading from custom fit arguments
+            # Loading from custom fit arguments
             self._optimizer = self.custom_optimizer
 
             opt = self._optimizer(
@@ -215,15 +229,42 @@ def configure_optimizers(self):
         else:
             return opt
 
+    def create_plotly_histogram(self, arr, name, bin_dict=None):
+        fig = go.Figure()
+        for i in range(arr.shape[-1]):
+            fig.add_trace(
+                go.Histogram(
+                    x=arr[:, i],
+                    histnorm="probability",
+                    name=f"{name}_{i}",
+                    xbins=bin_dict,  # dict(start=0.0, end=1.0, size=0.1),  # bins used for histogram
+                )
+            )
+        # Overlay both histograms
+        fig.update_layout(
+            barmode="overlay",
+            legend=dict(
+                orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
+            ),
+        )
+        # Reduce opacity to see both histograms
+        fig.update_traces(opacity=0.5)
+        return fig
+
     def validation_epoch_end(self, outputs) -> None:
-        do_log_logits = self.hparams.log_logits and self.hparams.log_target == "wandb" and WANDB_INSTALLED
+        do_log_logits = (
+            self.hparams.log_logits
+            and self.hparams.log_target == "wandb"
+            and WANDB_INSTALLED
+        )
         if do_log_logits:
             logits = [output[0] for output in outputs]
-            flattened_logits = torch.flatten(torch.cat(logits))
+            logits = torch.cat(logits).detach().cpu()
+            fig = self.create_plotly_histogram(logits.unsqueeze(1), "logits")
             wandb.log(
                 {
-                    "valid_logits": wandb.Histogram(flattened_logits.to("cpu")),
+                    "valid_logits": fig,
                     "global_step": self.global_step,
                 },
-                commit=False
+                commit=False,
             )
@@ -18,7 +18,21 @@ class MixtureDensityHeadConfig:
     Args:
         num_gaussian (int): Number of Gaussian Distributions in the mixture model. Defaults to 1
         n_samples (int): Number of samples to draw from the posterior to get prediction. Defaults to 100
-        central_tendency (str): Which measure to use to get the point prediction. Choices are 'mean', 'median'. Defaults to `mean`
+        central_tendency (str): Which measure to use to get the point prediction. 
+            Choices are 'mean', 'median'. Defaults to `mean`
+        sigma_bias_flag (bool): Whether to have a bias term in the sigma layer. Defaults to False
+        mu_bias_init (Optional[List]): To initialize the bias parameter of the mu layer to predefined cluster centers. 
+            Should be a list with the same length as number of gaussians in the mixture model. 
+            It is highly recommended to set the parameter to combat mode collapse. Defaults to None
+        weight_regularization (Optional[int]): Whether to apply L1 or L2 Norm to the MDN layers. 
+            It is highly recommended to use this to avoid mode collapse. Choices are [1,2]. Defaults to L2
+        lambda_sigma (Optional[float]): The regularization constant for weight regularization of sigma layer. Defaults to 0.1
+        lambda_pi (Optional[float]): The regularization constant for weight regularization of pi layer. Defaults to 0.1
+        lambda_mu (Optional[float]): The regularization constant for weight regularization of mu layer. Defaults to 0.1
+        speedup_training (bool): Turning on this parameter does away with sampling during training which speeds up training, 
+            but also doesn't give you visibility on train metrics. Defaults to False
+        log_debug_plot (bool): Turning on this parameter plots histograms of the mu, sigma, and pi layers in addition to the logits
+            (if log_logits is turned on in experment config). Defaults to False
 
     """
 
@@ -28,6 +42,45 @@ class MixtureDensityHeadConfig:
             "help": "Number of Gaussian Distributions in the mixture model. Defaults to 1",
         },
     )
+    sigma_bias_flag: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to have a bias term in the sigma layer. Defaults to False",
+        },
+    )
+    mu_bias_init: Optional[List] = field(
+        default=None,
+        metadata={
+            "help": "To initialize the bias parameter of the mu layer to predefined cluster centers. Should be a list with the same length as number of gaussians in the mixture model. It is highly recommended to set the parameter to combat mode collapse. Defaults to None",
+        },
+    )
+
+    weight_regularization: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "Whether to apply L1 or L2 Norm to the MDN layers. Defaults to L2",
+            "choices": [1, 2],
+        },
+    )
+
+    lambda_sigma: Optional[float] = field(
+        default=0.1,
+        metadata={
+            "help": "The regularization constant for weight regularization of sigma layer. Defaults to 0.1",
+        },
+    )
+    lambda_pi: Optional[float] = field(
+        default=0.1,
+        metadata={
+            "help": "The regularization constant for weight regularization of pi layer. Defaults to 0.1",
+        },
+    )
+    lambda_mu: Optional[float] = field(
+        default=0,
+        metadata={
+            "help": "The regularization constant for weight regularization of mu layer. Defaults to 0",
+        },
+    )
     n_samples: int = field(
         default=100,
         metadata={
@@ -41,10 +94,16 @@ class MixtureDensityHeadConfig:
             "choices": ["mean", "median"],
         },
     )
-    fast_training: bool = field(
+    speedup_training: bool = field(
+        default=False,
+        metadata={
+            "help": "Turning on this parameter does away with sampling during training which speeds up training, but also doesn't give you visibility on train metrics. Defaults to False",
+        },
+    )
+    log_debug_plot: bool = field(
         default=False,
         metadata={
-            "help": "Turning onthis parameter does away with sampling during training which speeds up training, but also doesn't give you visibility on training metrics. Defaults to True",
+            "help": "Turning on this parameter plots histograms of the mu, sigma, and pi layers in addition to the logits(if log_logits is turned on in experment config). Defaults to False",
         },
     )
     _module_src: str = field(default="mixture_density")
@@ -87,7 +146,8 @@ class CategoryEmbeddingMDNConfig(CategoryEmbeddingModelConfig):
     """
 
     mdn_config: MixtureDensityHeadConfig = field(
-        default=None, metadata={"help": "The config for defining the Mixed Density Network Head"}
+        default=None,
+        metadata={"help": "The config for defining the Mixed Density Network Head"},
     )
     _module_src: str = field(default="mixture_density")
     _model_name: str = field(default="CategoryEmbeddingMDN")
@@ -159,7 +219,8 @@ class NODEMDNConfig(NodeConfig):
     """
 
     mdn_config: MixtureDensityHeadConfig = field(
-        default=None, metadata={"help": "The config for defining the Mixed Density Network Head"}
+        default=None,
+        metadata={"help": "The config for defining the Mixed Density Network Head"},
     )
     _module_src: str = field(default="mixture_density")
     _model_name: str = field(default="NODEMDN")