refactor: update tensorflow test prediction scripts to accept optional url and headers arguments to be able to test the model deployed in kserve

gabrielmscampos · gabrielmscampos · commit bc911d60a667 · 2025-03-26T11:05:50.000+01:00
diff --git a/deployment/tensorflow/README.md b/deployment/tensorflow/README.md
@@ -24,5 +24,16 @@ docker run -it --rm \
 Run the test predictions script to test if your model working correctly with Tensorflow Serving:
 
 ```bash
-python test_predictions.py 360950
+python test_predictions.py -r 360950
+```
+
+## Test predictions on a different server
+
+If using `minikube` to deploy the lightgbm model using MLServer behind KServe, you can use the same script to test predictions:
+
+```bash
+python test_predictions.py \
+  -r 360950 \
+  -u "http://$(minikube ip):$(kubectl get svc istio-ingressgateway --namespace istio-system -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}')/v1/models/tensorflow-example:predict" \
+  -H "Host=$(kubectl get inferenceservice tensorflow-example --namespace default -o jsonpath='{.status.url}' | cut -d "/" -f 3)"
 ```
diff --git a/deployment/tensorflow/test_predictions.py b/deployment/tensorflow/test_predictions.py
@@ -1,38 +1,47 @@
-import sys
+import argparse
 
 import numpy as np
 import requests
 import matplotlib.pyplot as plt
 
 
-def inference_over_http(data: np.array):
-    url = "http://localhost:8501/v1/models/my_model:predict"
+def inference_over_http(data: np.array, url: str | None = None, headers: dict | None = None):
+    if url is None:
+        url = "http://localhost:8501/v1/models/my_model:predict"
+    if headers is None:
+        headers = {}
+
+    headers["Content-Type"] = "application/json"
     body = {"instances": data.tolist()}
-    response = requests.post(url, json=body)
+    response = requests.post(url, json=body, headers=headers)
     response.raise_for_status()
     return response.json()
 
 
 if __name__ == "__main__":
-    try:
-        run_number = int(sys.argv[1])
-    except IndexError:
-        quit("ERROR: Specify a run number while calling this script.")
-    except ValueError:
-        quit("ERROR: The provided run number is invalid.")
+    parser = argparse.ArgumentParser(description="Process run number, optional URL, and optional headers.")
+    parser.add_argument("-r", "--run_number", type=int, help="The run number (required integer argument).")
+    parser.add_argument("-u", "--url", type=str, help="Optional URL.")
+    parser.add_argument("-H", "--headers", type=str, nargs='*', help="Optional headers in key=value format.")
+    args = parser.parse_args()
+    
+    if args.run_number < 0:
+        quit("ERROR: The provided run number must be a non-negative integer.")
+    if args.headers:
+        args.headers = dict(header.split("=", 1) for header in args.headers)
 
     data_arr = np.load("../../data/data_unfiltered.npy")
     run_arr = np.load("../../data/runs_unfiltered.npy")
     unique_runs = np.unique(run_arr)
 
-    if run_number not in unique_runs:
+    if args.run_number not in unique_runs:
         quit(f"ERROR: The specified run number is not present in the sample data. Please, choose one of the following: {unique_runs.tolist()}")
     
     # Collect the data to send to the model
-    target_data = data_arr[run_arr == run_number]
+    target_data = data_arr[run_arr == args.run_number]
 
     # Do the inference
-    predictions = inference_over_http(target_data)
+    predictions = inference_over_http(target_data, args.url, args.headers)
 
     # Parse the output predictions
     reconstructed_data = []
@@ -44,7 +53,7 @@ def inference_over_http(data: np.array):
     # Plot
     avg_mse = np.array(avg_mse)
     plt.figure(figsize=(15, 5))
-    plt.plot(range(avg_mse.shape[0]), avg_mse, label=f"MSE {run_number}")
+    plt.plot(range(avg_mse.shape[0]), avg_mse, label=f"MSE {args.run_number}")
     plt.legend()
     plt.show()
     plt.close()