Added NGM agreement.

otiliastr · otiliastr · commit 1fa53762036c · 2019-09-30T01:00:18.000-04:00
diff --git a/neural_structured_learning/research/gam/trainer/trainer_agreement.py b/neural_structured_learning/research/gam/trainer/trainer_agreement.py
@@ -1119,3 +1119,71 @@ def predict_label_by_agreement(self, indices, num_neighbors=100,
       acc /= len(indices)
     logging.info('Majority vote accuracy: %.2f.', acc)
     return acc
+
+class TrainerAgreementAlwaysAgree(object):
+  """Trainer for an agreement model that always predicts that samples agree.
+
+  The goal of this class is to simulate the behavior of the Neural Graph
+  Machines model, which assumes that two nodes connected by a graph
+  always have the same label.
+  """
+
+  def __init__(self, data, **unused_kwargs):
+    self.data = data
+    self.vars_to_save = []
+
+  def train(self, *unused_args, **unused_kwargs):
+    logging.info('Using NGM, agreement always returns 1. no need to train...')
+
+  def predict(self, unused_session, unused_src_features, unused_tgt_features,
+              src_indices, tgt_indices):
+    """Predict agreement for the provided pairs of samples.
+
+    The function contains many unused arguments, in order to conform with the
+    interface of the TrainerAgreement class.
+
+    Arguments:
+      unused_session: A TensorFlow session where to run the model.
+      unused_src_features: An array of shape (num_samples, num_features)
+        containing the features of the first element of the pair.
+      unused_tgt_features: An array of shape (num_samples, num_features)
+        containing the features of the second element of the pair.
+      src_indices: An array of integers containing the index of each sample in
+        self.data of the samples in src_features.
+      tgt_indices: An array of integers containing the index of each sample in
+        self.data of the samples in tgt_features.
+
+    Returns:
+      An array containing the predicted agreement value for each pair of
+      provided samples.
+    """
+    num_samples = src_indices.shape[0]
+    return np.ones((num_samples,), dtype=np.float32)
+
+  def create_agreement_prediction(self, src_indices, *unused_args,
+                                  **unused_kwargs):
+    """Creates the agreement prediction TensorFlow subgraph.
+
+    This function is the equivalent of `create_agreement_prediction` in
+    TrainerAgreement, but here we always predict 1.0.
+
+    Arguments:
+      src_indices: A Tensor or Placeholder of shape (batch_size,)
+        containing the indices of the samples that are the sources of the edges.
+      unused_args: Other unused arguments, which we allow in order to
+        create a common interface with TrainerAgreement.
+      unused_kwargs: Other unused keyword arguments, which we allow in order to
+        create a common interface with TrainerAgreement.
+    Returns:
+      predictions: None, because this model doesn't do logits computations, but
+        we still return something in order to keep the same function outputs as
+        TrainerAgreement.
+      normalized_predictions: A Tensor of shape (batch_size,) with values in
+        {0, 1}, containing the agreement prediction probabilities.
+      variables: An empty dictionary of trainable variables, because this model
+        does not have any trainable variables.
+      reg_params: An empty dictionary of variables that are used in the
+        regularization weight decay term, because this model doesn't have
+        regularization variables.
+    """
+    return None, tf.ones((tf.shape(src_indices)[0],), tf.float32), {}, {}
diff --git a/neural_structured_learning/research/gam/trainer/trainer_cotrain.py b/neural_structured_learning/research/gam/trainer/trainer_cotrain.py
@@ -32,6 +32,7 @@
 
 from gam.data.dataset import CotrainDataset
 from gam.trainer.trainer_agreement import TrainerAgreement
+from gam.trainer.trainer_agreement import TrainerAgreementAlwaysAgree
 from gam.trainer.trainer_agreement import TrainerPerfectAgreement
 from gam.trainer.trainer_base import Trainer
 from gam.trainer.trainer_classification import TrainerClassification
@@ -443,38 +444,41 @@ def train(self, data, **kwargs):
       trainer_agr = TrainerPerfectAgreement(data=data)
     else:
       with tf.variable_scope('AgreementModel'):
-        trainer_agr = TrainerAgreement(
-            model=self.model_agr,
-            data=data,
-            optimizer=self.optimizer,
-            gradient_clip=self.gradient_clip,
-            min_num_iter=self.min_num_iter_agr,
-            max_num_iter=self.max_num_iter_agr,
-            num_iter_after_best_val=self.num_iter_after_best_val_agr,
-            max_num_iter_cotrain=self.max_num_iter_cotrain,
-            num_warm_up_iter=self.num_warm_up_iter_agr,
-            warm_start=self.warm_start_agr,
-            batch_size=self.batch_size_agr,
-            enable_summaries=self.enable_summaries_per_model,
-            summary_step=self.summary_step_agr,
-            summary_dir=self.summary_dir,
-            logging_step=self.logging_step_agr,
-            eval_step=self.eval_step_agr,
-            abs_loss_chg_tol=self.abs_loss_chg_tol,
-            rel_loss_chg_tol=self.rel_loss_chg_tol,
-            loss_chg_iter_below_tol=self.loss_chg_iter_below_tol,
-            checkpoints_dir=self.checkpoints_dir,
-            weight_decay=self.weight_decay_agr,
-            weight_decay_schedule=self.weight_decay_schedule_agr,
-            agree_by_default=False,
-            percent_val=self.ratio_valid_agr,
-            max_num_samples_val=self.max_samples_valid_agr,
-            seed=self.seed,
-            lr_decay_rate=self.lr_decay_rate_agr,
-            lr_decay_steps=self.lr_decay_steps_agr,
-            lr_initial=self.learning_rate_agr,
-            use_graph=self.use_graph,
-            add_negative_edges=self.add_negative_edges_agr)
+        if self.always_agree:
+          trainer_agr = TrainerAgreementAlwaysAgree(data=data)
+        else:
+          trainer_agr = TrainerAgreement(
+              model=self.model_agr,
+              data=data,
+              optimizer=self.optimizer,
+              gradient_clip=self.gradient_clip,
+              min_num_iter=self.min_num_iter_agr,
+              max_num_iter=self.max_num_iter_agr,
+              num_iter_after_best_val=self.num_iter_after_best_val_agr,
+              max_num_iter_cotrain=self.max_num_iter_cotrain,
+              num_warm_up_iter=self.num_warm_up_iter_agr,
+              warm_start=self.warm_start_agr,
+              batch_size=self.batch_size_agr,
+              enable_summaries=self.enable_summaries_per_model,
+              summary_step=self.summary_step_agr,
+              summary_dir=self.summary_dir,
+              logging_step=self.logging_step_agr,
+              eval_step=self.eval_step_agr,
+              abs_loss_chg_tol=self.abs_loss_chg_tol,
+              rel_loss_chg_tol=self.rel_loss_chg_tol,
+              loss_chg_iter_below_tol=self.loss_chg_iter_below_tol,
+              checkpoints_dir=self.checkpoints_dir,
+              weight_decay=self.weight_decay_agr,
+              weight_decay_schedule=self.weight_decay_schedule_agr,
+              agree_by_default=False,
+              percent_val=self.ratio_valid_agr,
+              max_num_samples_val=self.max_samples_valid_agr,
+              seed=self.seed,
+              lr_decay_rate=self.lr_decay_rate_agr,
+              lr_decay_steps=self.lr_decay_steps_agr,
+              lr_initial=self.learning_rate_agr,
+              use_graph=self.use_graph,
+              add_negative_edges=self.add_negative_edges_agr)
 
     if self.use_perfect_cls:
       # A perfect classification model used for debugging purposes.