Skip to content

Commit 898b0ee

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Completes OPEN-5004 Create default timestamps and prediction ids when not provided
1 parent d0c01d3 commit 898b0ee

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

openlayer/__init__.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import time
2929
import uuid
3030
import warnings
31-
from typing import Dict, Optional
31+
from typing import Dict, Optional, Tuple
3232

3333
import pandas as pd
3434
import yaml
@@ -1841,10 +1841,33 @@ def publish_batch_data(
18411841
{"task_type": task_type.value, **batch_config}
18421842
)
18431843

1844+
# Add default columns if not present
1845+
columns_to_add = {"timestampColumnName", "inferenceIdColumnName"}
1846+
for column in columns_to_add:
1847+
if batch_data.get(column) is None:
1848+
batch_data, batch_df = self._add_default_column(
1849+
config=batch_data, df=batch_df, column_name=column
1850+
)
1851+
18441852
# TODO: Make POST request to upload batch
18451853
print("Publishing batch of data...")
18461854
print(batch_data)
18471855

1856+
def _add_default_column(
1857+
self, config: Dict[str, any], df: pd.DataFrame, column_name: str
1858+
) -> Tuple[Dict[str, any], pd.DataFrame]:
1859+
"""Adds the default column specified by ``column_name`` to the dataset config
1860+
and dataframe."""
1861+
if column_name == "timestampColumnName":
1862+
timestamp_column_name = f"timestamp_{str(uuid.uuid1())[:8]}"
1863+
config["timestampColumnName"] = timestamp_column_name
1864+
df[timestamp_column_name] = int(time.time())
1865+
elif column_name == "inferenceIdColumnName":
1866+
inference_id_column_name = f"inference_id_{str(uuid.uuid1())[:8]}"
1867+
config["inferenceIdColumnName"] = inference_id_column_name
1868+
df[inference_id_column_name] = [str(uuid.uuid1()) for _ in range(len(df))]
1869+
return config, df
1870+
18481871
def publish_ground_truths(
18491872
self,
18501873
inference_pipeline_id: str,

openlayer/schemas.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,8 @@ class BaseDatasetSchema(ma.Schema):
107107
@ma.validates_schema
108108
def validates_production_data_schema(self, data, **kwargs):
109109
"""Checks if `inferenceIdColumnName` and `timestampsColumnName` are
110-
specified for production data."""
111-
if data["label"] == DatasetType.Production.value:
112-
if data["inferenceIdColumnName"] is None:
113-
raise ma.ValidationError(
114-
"`inferenceIdColumnName` must be specified for production data."
115-
)
116-
if data["timestampColumnName"] is None:
117-
raise ma.ValidationError(
118-
"`timestampColumnName` must be specified for production data."
119-
)
120-
else:
110+
specified for non-production data."""
111+
if data["label"] != DatasetType.Production.value:
121112
if data["inferenceIdColumnName"] is not None:
122113
raise ma.ValidationError(
123114
"`inferenceIdColumnName` can only be specified for production data,"

openlayer/validators/dataset_validators.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import marshmallow as ma
1010
import pandas as pd
1111
import yaml
12+
1213
from .. import constants, schemas, tasks
1314
from .base_validator import BaseValidator
1415

0 commit comments

Comments
 (0)