tensorflow
diff --git a/‎neural_structured_learning/research/gam/trainer/trainer_agreement.py‎
Lines changed: 67 additions & 52 deletions b/‎neural_structured_learning/research/gam/trainer/trainer_agreement.py‎
Lines changed: 67 additions & 52 deletions
diff --git a/‎neural_structured_learning/research/gam/trainer/trainer_classification.py‎
Lines changed: 59 additions & 53 deletions b/‎neural_structured_learning/research/gam/trainer/trainer_classification.py‎
Lines changed: 59 additions & 53 deletions
diff --git a/‎neural_structured_learning/research/gam/trainer/trainer_cotrain.py‎
Lines changed: 4 additions & 3 deletions b/‎neural_structured_learning/research/gam/trainer/trainer_cotrain.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎neural_structured_learning/tools/BUILD‎
Lines changed: 26 additions & 8 deletions b/‎neural_structured_learning/tools/BUILD‎
Lines changed: 26 additions & 8 deletions
diff --git a/‎neural_structured_learning/tools/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎neural_structured_learning/tools/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -69,14 +69,13 @@ class TrainerClassification(Trainer):
     summary_step: Integer representing the summary step size.
     summary_dir: String representing the path to a directory where to save the
       variable summaries.
-    logging_step: Integer representing the number of iterations after which
-      we log the loss of the model.
+    logging_step: Integer representing the number of iterations after which we
+      log the loss of the model.
     eval_step: Integer representing the number of iterations after which we
       evaluate the model.
-    warm_start: Whether the model parameters are initialized at their
-      best value in the previous cotrain iteration. If False, they are
-      reinitialized.
-    gradient_clip=None,
+    warm_start: Whether the model parameters are initialized at their best value
+      in the previous cotrain iteration. If False, they are reinitialized.
+      gradient_clip=None,
     abs_loss_chg_tol: A float representing the absolute tolerance for checking
       if the training loss has converged. If the difference between the current
       loss and previous loss is less than `abs_loss_chg_tol`, we count this
@@ -89,19 +88,19 @@ class TrainerClassification(Trainer):
       iterations that pass the convergence criteria before stopping training.
     checkpoints_dir: Path to the folder where to store TensorFlow model
       checkpoints.
-    weight_decay: Weight for the weight decay term in the classification
-      model loss.
+    weight_decay: Weight for the weight decay term in the classification model
+      loss.
     weight_decay_schedule: Schedule how to adjust the classification weight
       decay weight after every cotrain iteration.
     penalize_neg_agr: Whether to not only encourage agreement between samples
       that the agreement model believes should have the same label, but also
       penalize agreement when two samples agree when the agreement model
       predicts they should disagree.
-    use_l2_clssif: Whether to use L2 loss for classification, as opposed to the
-      whichever loss is specified in the provided model_cls.
     first_iter_original:  A boolean specifying whether the first cotrain
       iteration trains the original classification model (with no agreement
       term).
+    use_l2_clssif: Whether to use L2 loss for classification, as opposed to the
+      whichever loss is specified in the provided model_cls.
     seed: Seed used by all the random number generators in this class.
     use_graph: Boolean specifying whether the agreement loss is applied to graph
       edges, as opposed to random pairs of samples.
@@ -162,8 +161,9 @@ def __init__(self,
     self.gradient_clip = gradient_clip
     self.logging_step = logging_step
     self.eval_step = eval_step
-    self.checkpoint_path = (os.path.join(checkpoints_dir, 'classif_best.ckpt')
-                            if checkpoints_dir is not None else None)
+    self.checkpoint_path = (
+        os.path.join(checkpoints_dir, 'classif_best.ckpt')
+        if checkpoints_dir is not None else None)
     self.weight_decay_initial = weight_decay
     self.weight_decay_schedule = weight_decay_schedule
     self.num_pairs_reg = num_pairs_reg
@@ -186,11 +186,11 @@ def __init__(self,
     # First obtain the features shape from the dataset, and append a batch_size
     # dimension to it (i.e., `None` to allow for variable batch size).
     features_shape = [None] + list(data.features_shape)
-    input_features = tf.placeholder(tf.float32, shape=features_shape,
-                                    name='input_features')
+    input_features = tf.placeholder(
+        tf.float32, shape=features_shape, name='input_features')
     input_labels = tf.placeholder(tf.int64, shape=(None,), name='input_labels')
-    one_hot_labels = tf.one_hot(input_labels, data.num_classes,
-                                name='input_labels_one_hot')
+    one_hot_labels = tf.one_hot(
+        input_labels, data.num_classes, name='input_labels_one_hot')
     # Create a placeholder specifying if this is train time.
     is_train = tf.placeholder_with_default(False, shape=[], name='is_train')
 
@@ -201,8 +201,8 @@ def __init__(self,
       self.variables = variables
       self.reg_params = reg_params
       predictions, variables, reg_params = (
-          self.model.get_predictions_and_params(encoding=encoding,
-                                                is_train=is_train))
+          self.model.get_predictions_and_params(
+              encoding=encoding, is_train=is_train))
       self.variables.update(variables)
       self.reg_params.update(reg_params)
       normalized_predictions = self.model.normalize_predictions(predictions)
@@ -221,9 +221,10 @@ def __init__(self,
         loss_supervised = tf.reduce_sum(loss_supervised, axis=-1)
         loss_supervised = tf.reduce_mean(loss_supervised)
       else:
-        loss_supervised = self.model.get_loss(predictions=predictions,
-                                              targets=one_hot_labels,
-                                              weight_decay=None)
+        loss_supervised = self.model.get_loss(
+          predictions=predictions,
+          targets=one_hot_labels,
+          weight_decay=None)
 
       # Agreement regularization loss.
       loss_agr = self._get_agreement_reg_loss(data, is_train, features_shape)
@@ -280,8 +281,9 @@ def __init__(self,
       gradients, _ = tf.clip_by_global_norm(gradients, self.gradient_clip)
       grads_and_vars = tuple(zip(gradients, variab))
     with tf.control_dependencies(
-        tf.get_collection(tf.GraphKeys.UPDATE_OPS,
-                          scope=tf.get_default_graph().get_name_scope())):
+        tf.get_collection(
+            tf.GraphKeys.UPDATE_OPS,
+            scope=tf.get_default_graph().get_name_scope())):
       train_op = self.optimizer.apply_gradients(
           grads_and_vars, global_step=self.global_step)
 
@@ -332,7 +334,7 @@ def _create_weight_decay_var(self, weight_decay_initial,
     if weight_decay_schedule is None:
       if weight_decay_initial is not None:
         weight_decay_var = tf.constant(
-          weight_decay_initial, dtype=tf.float32, name='weight_decay')
+            weight_decay_initial, dtype=tf.float32, name='weight_decay')
       else:
         weight_decay_var = None
     elif weight_decay_schedule == 'linear':
@@ -406,32 +408,28 @@ def _get_agreement_reg_loss(self, data, is_train, features_shape):
 
     with tf.variable_scope('predictions', reuse=True):
       encoding, _, _ = self.model.get_encoding_and_params(
-          inputs=features_ll_right, is_train=is_train,
-          update_batch_stats=False)
+          inputs=features_ll_right, is_train=is_train, update_batch_stats=False)
       predictions_ll_right, _, _ = self.model.get_predictions_and_params(
           encoding=encoding, is_train=is_train)
       predictions_ll_right = self.model.normalize_predictions(
           predictions_ll_right)
 
       encoding, _, _ = self.model.get_encoding_and_params(
-          inputs=features_lu_right, is_train=is_train,
-          update_batch_stats=False)
+          inputs=features_lu_right, is_train=is_train, update_batch_stats=False)
       predictions_lu_right, _, _ = self.model.get_predictions_and_params(
           encoding=encoding, is_train=is_train)
       predictions_lu_right = self.model.normalize_predictions(
           predictions_lu_right)
 
       encoding, _, _ = self.model.get_encoding_and_params(
-          inputs=features_uu_left, is_train=is_train,
-          update_batch_stats=False)
+          inputs=features_uu_left, is_train=is_train, update_batch_stats=False)
       predictions_uu_left, _, _ = self.model.get_predictions_and_params(
           encoding=encoding, is_train=is_train)
       predictions_uu_left = self.model.normalize_predictions(
           predictions_uu_left)
 
       encoding, _, _ = self.model.get_encoding_and_params(
-          inputs=features_uu_right, is_train=is_train,
-          update_batch_stats=False)
+          inputs=features_uu_right, is_train=is_train, update_batch_stats=False)
       predictions_uu_right, _, _ = self.model.get_predictions_and_params(
           encoding=encoding, is_train=is_train)
       predictions_uu_right = self.model.normalize_predictions(
@@ -442,8 +440,8 @@ def _get_agreement_reg_loss(self, data, is_train, features_shape):
     # Stop gradients need to be added
     # The case where there are no more uu or lu
     # edges at the end of training, so the shapes don't match needs fixing.
-    left = tf.concat(
-        (labels_ll_left, labels_lu_left, predictions_uu_left), axis=0)
+    left = tf.concat((labels_ll_left, labels_lu_left, predictions_uu_left),
+                     axis=0)
     right = tf.concat(
         (predictions_ll_right, predictions_lu_right, predictions_uu_right),
         axis=0)
@@ -455,12 +453,16 @@ def _get_agreement_reg_loss(self, data, is_train, features_shape):
     agreement_ll = tf.cast(
         tf.equal(labels_ll_left_idx, labels_ll_right_idx), dtype=tf.float32)
     _, agreement_lu, _, _ = self.trainer_agr.create_agreement_prediction(
-        src_features=features_lu_left, tgt_features=features_lu_right,
-        is_train=is_train, src_indices=indices_lu_left,
+        src_features=features_lu_left,
+        tgt_features=features_lu_right,
+        is_train=is_train,
+        src_indices=indices_lu_left,
         tgt_indices=indices_lu_right)
     _, agreement_uu, _, _ = self.trainer_agr.create_agreement_prediction(
-        src_features=features_uu_left, tgt_features=features_uu_right,
-        is_train=is_train, src_indices=indices_uu_left,
+        src_features=features_uu_left,
+        tgt_features=features_uu_right,
+        is_train=is_train,
+        src_indices=indices_uu_left,
         tgt_indices=indices_uu_right)
     agreement = tf.concat((agreement_ll, agreement_lu, agreement_uu), axis=0)
     if self.penalize_neg_agr:
@@ -476,10 +478,10 @@ def _get_agreement_reg_loss(self, data, is_train, features_shape):
     num_ll = tf.shape(predictions_ll_right)[0]
     num_lu = tf.shape(predictions_lu_right)[0]
     num_uu = tf.shape(predictions_uu_left)[0]
-    weights = tf.concat((self.reg_weight_ll * tf.ones(num_ll,),
-                         self.reg_weight_lu * tf.ones(num_lu,),
-                         self.reg_weight_uu * tf.ones(num_uu,)),
-                        axis=0)
+    weights = tf.concat(
+        (self.reg_weight_ll * tf.ones(num_ll,), self.reg_weight_lu *
+         tf.ones(num_lu,), self.reg_weight_uu * tf.ones(num_uu,)),
+        axis=0)
 
     # Scale each distance by its agreement weight and regularzation weight.
     loss = tf.reduce_mean(dists * weights * agreement)
@@ -511,8 +513,9 @@ def _construct_feed_dict(self,
       input_indices = next(data_iterator)
       # Select the labels. Use the true, correct labels, at test time, and the
       # self-labeled ones at train time.
-      labels = (self.data.get_original_labels(input_indices) if split == 'test'
-                else self.data.get_labels(input_indices))
+      labels = (
+          self.data.get_original_labels(input_indices)
+          if split == 'test' else self.data.get_labels(input_indices))
       feed_dict = {
           self.input_features: self.data.get_features(input_indices),
           self.input_labels: labels,
@@ -586,8 +589,8 @@ def _select_from_pool(indices):
     while True:
       indices_src, features_src, labels_src = _select_from_pool(src_indices)
       indices_tgt, features_tgt, labels_tgt = _select_from_pool(tgt_indices)
-      yield (indices_src, indices_tgt, features_src, features_tgt,
-             labels_src, labels_tgt)
+      yield (indices_src, indices_tgt, features_src, features_tgt, labels_src,
+             labels_tgt)
 
   def edge_iterator(self, data, batch_size, labeling):
     """An iterator over graph edges.
@@ -679,6 +682,7 @@ def train(self, data, session=None, **kwargs):
       data: A CotrainDataset object.
       session: A TensorFlow session or None.
       **kwargs: Other keyword arguments.
+
     Returns:
       best_test_acc: A float representing the test accuracy at the iteration
         where the validation accuracy is maximum.
@@ -742,11 +746,11 @@ def train(self, data, session=None, **kwargs):
     checkpoint_saved = False
     while not has_converged:
       feed_dict = self._construct_feed_dict(
-        data_iterator=data_iterator_train,
-        split='train',
-        pair_ll_iterator=pair_ll_iterator,
-        pair_lu_iterator=pair_lu_iterator,
-        pair_uu_iterator=pair_uu_iterator)
+          data_iterator=data_iterator_train,
+          split='train',
+          pair_ll_iterator=pair_ll_iterator,
+          pair_lu_iterator=pair_lu_iterator,
+          pair_uu_iterator=pair_uu_iterator)
       if self.enable_summaries and step % self.summary_step == 0:
         loss_val, summary, iter_cls_total, _ = session.run(
             [self.loss_op, self.summary_op, self.iter_cls_total, self.train_op],
@@ -813,8 +817,10 @@ def predict(self, session, indices, is_train):
       input_features = self.data.get_features(batch_indices)
       batch_predictions = session.run(
           self.normalized_predictions,
-          feed_dict={self.input_features: input_features,
-                     self.is_train:is_train})
+          feed_dict={
+              self.input_features: input_features,
+              self.is_train: is_train
+          })
       predictions.append(batch_predictions)
       idx_start = idx_end
     if not predictions:
 
@@ -349,10 +349,11 @@ def _select_samples_to_label(self, data, trainer_cls, session):
     # self-labeling them.
     indices_unlabeled = data.get_indices_unlabeled()
     val_ind = set(data.get_indices_val())
-    indices_unlabeled = np.asarray([ind for ind in indices_unlabeled
-                                    if ind not in val_ind])
+    indices_unlabeled = np.asarray(
+        [ind for ind in indices_unlabeled if ind not in val_ind])
     predictions = trainer_cls.predict(
-      session, indices_unlabeled, is_train=False)
+        session, indices_unlabeled, is_train=False)
+
     # Select most confident nodes. Compute confidence and most confident label,
     # which will be used as the new label.
     predicted_label = np.argmax(predictions, axis=-1)
 
@@ -29,7 +29,7 @@ py_library(
     srcs = ["__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":build_graph_lib",
+        ":graph_builder",
         ":graph_utils",
         ":pack_nbrs_lib",
     ],
@@ -56,25 +56,41 @@ py_test(
 )
 
 py_library(
-    name = "build_graph_lib",
-    srcs = ["build_graph.py"],
+    name = "graph_builder",
+    srcs = ["graph_builder.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":graph_utils",
-        # package absl:app
-        # package absl/flags
         # package absl/logging
         # package numpy
         # package six
         # package tensorflow
     ],
 )
 
+py_test(
+    name = "graph_builder_test",
+    srcs = ["graph_builder_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":graph_builder",
+        ":graph_utils",
+        # package protobuf,
+        # package absl/testing:absltest
+        # package tensorflow
+    ],
+)
+
 py_binary(
-    name = "build_graph",
-    srcs = ["build_graph.py"],
+    name = "graph_builder_main",
+    srcs = ["graph_builder_main.py"],
     python_version = "PY3",
-    deps = [":build_graph_lib"],
+    deps = [
+        ":graph_builder",
+        # package absl:app
+        # package absl/flags
+        # package tensorflow
+    ],
 )
 
 py_library(
@@ -103,6 +119,8 @@ py_binary(
     srcs = ["build_docs.py"],
     python_version = "PY3",
     deps = [
+        # package absl:app
+        # package absl/flags
         "//neural_structured_learning",
         # package tensorflow_docs/api_generator
     ],
 
@@ -1,6 +1,6 @@
 """Tools and APIs for preparing data for Neural Structured Learning."""
 
-import neural_structured_learning.tools.build_graph
+from neural_structured_learning.tools.graph_builder import build_graph
 from neural_structured_learning.tools.graph_utils import add_edge
 from neural_structured_learning.tools.graph_utils import add_undirected_edges
 from neural_structured_learning.tools.graph_utils import read_tsv_graph