Skip to content

Commit 3d14fb6

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Allow dict configs for monitoring methods
1 parent 4f32d3f commit 3d14fb6

File tree

1 file changed

+83
-9
lines changed

1 file changed

+83
-9
lines changed

openlayer/__init__.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,7 @@ def create_inference_pipeline(
13951395
description: Optional[str] = None,
13961396
reference_df: Optional[pd.DataFrame] = None,
13971397
reference_dataset_file_path: Optional[str] = None,
1398+
reference_dataset_config: Optional[Dict[str, any]] = None,
13981399
reference_dataset_config_file_path: Optional[str] = None,
13991400
) -> InferencePipeline:
14001401
"""Creates an inference pipeline in an Openlayer project.
@@ -1413,6 +1414,24 @@ def create_inference_pipeline(
14131414
description : str, optional
14141415
Inference pipeline description. If not specified, the description will be
14151416
set to ``"Monitoring production data."``.
1417+
reference_df : pd.DataFrame, optional
1418+
Dataframe containing your reference dataset. It is optional to provide the
1419+
reference dataframe during the creation of the inference pipeline. If you
1420+
wish, you can add it later with the :obj:`upload_reference_dataframe` or
1421+
:obj:`upload_reference_dataset` methods. Not needed if
1422+
``reference_dataset_file_path``is provided.
1423+
reference_dataset_file_path : str, optional
1424+
Path to the reference dataset CSV file. It is optional to provide the
1425+
reference dataset file path during the creation of the inference pipeline.
1426+
If you wish, you can add it later with the :obj:`upload_reference_dataframe`
1427+
or :obj:`upload_reference_dataset` methods. Not needed if ``reference_df``
1428+
is provided.
1429+
reference_dataset_config : Dict[str, any], optional
1430+
Dictionary containing the reference dataset configuration. This is not
1431+
needed if ``reference_dataset_config_file_path`` is provided.
1432+
reference_dataset_config_file_path : str, optional
1433+
Path to the reference dataset configuration YAML file. This is not needed
1434+
if ``reference_dataset_config`` is provided.
14161435
14171436
Returns
14181437
-------
@@ -1603,7 +1622,8 @@ def upload_reference_dataset(
16031622
inference_pipeline_id: str,
16041623
task_type: TaskType,
16051624
file_path: str,
1606-
dataset_config_file_path: str,
1625+
dataset_config: Optional[Dict[str, any]] = None,
1626+
dataset_config_file_path: Optional[str] = None,
16071627
) -> None:
16081628
r"""Uploads a reference dataset saved as a csv file to an inference pipeline.
16091629
@@ -1618,8 +1638,19 @@ def upload_reference_dataset(
16181638
----------
16191639
file_path : str
16201640
Path to the csv file containing the reference dataset.
1641+
dataset_config : Dict[str, any], optional
1642+
Dictionary containing the dataset configuration. This is not needed if
1643+
``dataset_config_file_path`` is provided.
1644+
1645+
.. admonition:: What's in the dataset config?
1646+
1647+
The dataset configuration depends on the :obj:`TaskType`.
1648+
Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1649+
for examples.
1650+
16211651
dataset_config_file_path : str
1622-
Path to the dataset configuration YAML file.
1652+
Path to the dataset configuration YAML file. This is not needed if
1653+
``dataset_config`` is provided.
16231654
16241655
.. admonition:: What's in the dataset config file?
16251656
@@ -1687,10 +1718,17 @@ def upload_reference_dataset(
16871718
... dataset_config_file_path='/path/to/dataset_config.yaml',
16881719
... )
16891720
"""
1721+
if dataset_config is None and dataset_config_file_path is None:
1722+
raise ValueError(
1723+
"Either `dataset_config` or `dataset_config_file_path` must be"
1724+
" provided."
1725+
)
1726+
16901727
# Validate dataset
16911728
dataset_validator = dataset_validators.get_validator(
16921729
task_type=task_type,
16931730
dataset_config_file_path=dataset_config_file_path,
1731+
dataset_config=dataset_config,
16941732
dataset_file_path=file_path,
16951733
)
16961734
failed_validations = dataset_validator.validate()
@@ -1702,7 +1740,8 @@ def upload_reference_dataset(
17021740
) from None
17031741

17041742
# Load dataset config and augment with defaults
1705-
dataset_config = utils.read_yaml(dataset_config_file_path)
1743+
if dataset_config_file_path is not None:
1744+
dataset_config = utils.read_yaml(dataset_config_file_path)
17061745
dataset_data = DatasetSchema().load(
17071746
{"task_type": task_type.value, **dataset_config}
17081747
)
@@ -1731,7 +1770,8 @@ def upload_reference_dataframe(
17311770
inference_pipeline_id: str,
17321771
task_type: TaskType,
17331772
dataset_df: pd.DataFrame,
1734-
dataset_config_file_path: str,
1773+
dataset_config: Optional[Dict[str, any]] = None,
1774+
dataset_config_file_path: Optional[str] = None,
17351775
) -> None:
17361776
r"""Uploads a reference dataset (a pandas dataframe) to an inference pipeline.
17371777
@@ -1746,8 +1786,19 @@ def upload_reference_dataframe(
17461786
----------
17471787
dataset_df : pd.DataFrame
17481788
Dataframe containing the reference dataset.
1789+
dataset_config : Dict[str, any], optional
1790+
Dictionary containing the dataset configuration. This is not needed if
1791+
``dataset_config_file_path`` is provided.
1792+
1793+
.. admonition:: What's in the dataset config?
1794+
1795+
The dataset configuration depends on the :obj:`TaskType`.
1796+
Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1797+
for examples.
1798+
17491799
dataset_config_file_path : str
1750-
Path to the dataset configuration YAML file.
1800+
Path to the dataset configuration YAML file. This is not needed if
1801+
``dataset_config`` is provided.
17511802
17521803
.. admonition:: What's in the dataset config file?
17531804
@@ -1828,6 +1879,7 @@ def upload_reference_dataframe(
18281879
return self.upload_reference_dataset(
18291880
file_path=file_path,
18301881
inference_pipeline_id=inference_pipeline_id,
1882+
dataset_config=dataset_config,
18311883
dataset_config_file_path=dataset_config_file_path,
18321884
task_type=task_type,
18331885
)
@@ -1837,16 +1889,29 @@ def publish_batch_data(
18371889
inference_pipeline_id: str,
18381890
task_type: TaskType,
18391891
batch_df: pd.DataFrame,
1840-
batch_config_file_path: str,
1892+
batch_config: Optional[Dict[str, any]] = None,
1893+
batch_config_file_path: Optional[str] = None,
18411894
) -> None:
18421895
"""Publishes a batch of production data to the Openlayer platform.
18431896
18441897
Parameters
18451898
----------
18461899
batch_df : pd.DataFrame
18471900
Dataframe containing the batch of production data.
1901+
batch_config : Dict[str, any], optional
1902+
Dictionary containing the batch configuration. This is not needed if
1903+
``batch_config_file_path`` is provided.
1904+
1905+
.. admonition:: What's in the config?
1906+
1907+
The configuration for a batch of data depends on the :obj:`TaskType`.
1908+
Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1909+
for examples of dataset configuration files. These configurations are
1910+
the same for development and batches of production data.
1911+
18481912
batch_config_file_path : str
1849-
Path to the configuration YAML file.
1913+
Path to the configuration YAML file. This is not needed if
1914+
``batch_config`` is provided.
18501915
18511916
.. admonition:: What's in the config file?
18521917
@@ -1889,17 +1954,26 @@ def publish_batch_data(
18891954
... batch_config_file_path='/path/to/batch_config.yaml',
18901955
... )
18911956
"""
1892-
if not os.path.exists(batch_config_file_path):
1957+
if batch_config is None and batch_config_file_path is None:
1958+
raise ValueError(
1959+
"Either `batch_config` or `batch_config_file_path` must be" " provided."
1960+
)
1961+
if batch_config_file_path is not None and not os.path.exists(
1962+
batch_config_file_path
1963+
):
18931964
raise exceptions.OpenlayerValidationError(
18941965
f"Batch config file path {batch_config_file_path} does not exist."
18951966
) from None
1896-
batch_config = utils.read_yaml(batch_config_file_path)
1967+
elif batch_config_file_path is not None:
1968+
batch_config = utils.read_yaml(batch_config_file_path)
1969+
18971970
batch_config["label"] = "production"
18981971

18991972
# Validate batch of data
19001973
batch_validator = dataset_validators.get_validator(
19011974
task_type=task_type,
19021975
dataset_config=batch_config,
1976+
dataset_config_file_path=batch_config_file_path,
19031977
dataset_df=batch_df,
19041978
)
19051979
failed_validations = batch_validator.validate()

0 commit comments

Comments
 (0)