@@ -1395,6 +1395,7 @@ def create_inference_pipeline(
13951395 description : Optional [str ] = None ,
13961396 reference_df : Optional [pd .DataFrame ] = None ,
13971397 reference_dataset_file_path : Optional [str ] = None ,
1398+ reference_dataset_config : Optional [Dict [str , any ]] = None ,
13981399 reference_dataset_config_file_path : Optional [str ] = None ,
13991400 ) -> InferencePipeline :
14001401 """Creates an inference pipeline in an Openlayer project.
@@ -1413,6 +1414,24 @@ def create_inference_pipeline(
14131414 description : str, optional
14141415 Inference pipeline description. If not specified, the description will be
14151416 set to ``"Monitoring production data."``.
1417+ reference_df : pd.DataFrame, optional
1418+ Dataframe containing your reference dataset. It is optional to provide the
1419+ reference dataframe during the creation of the inference pipeline. If you
1420+ wish, you can add it later with the :obj:`upload_reference_dataframe` or
1421+ :obj:`upload_reference_dataset` methods. Not needed if
1422+ ``reference_dataset_file_path``is provided.
1423+ reference_dataset_file_path : str, optional
1424+ Path to the reference dataset CSV file. It is optional to provide the
1425+ reference dataset file path during the creation of the inference pipeline.
1426+ If you wish, you can add it later with the :obj:`upload_reference_dataframe`
1427+ or :obj:`upload_reference_dataset` methods. Not needed if ``reference_df``
1428+ is provided.
1429+ reference_dataset_config : Dict[str, any], optional
1430+ Dictionary containing the reference dataset configuration. This is not
1431+ needed if ``reference_dataset_config_file_path`` is provided.
1432+ reference_dataset_config_file_path : str, optional
1433+ Path to the reference dataset configuration YAML file. This is not needed
1434+ if ``reference_dataset_config`` is provided.
14161435
14171436 Returns
14181437 -------
@@ -1603,7 +1622,8 @@ def upload_reference_dataset(
16031622 inference_pipeline_id : str ,
16041623 task_type : TaskType ,
16051624 file_path : str ,
1606- dataset_config_file_path : str ,
1625+ dataset_config : Optional [Dict [str , any ]] = None ,
1626+ dataset_config_file_path : Optional [str ] = None ,
16071627 ) -> None :
16081628 r"""Uploads a reference dataset saved as a csv file to an inference pipeline.
16091629
@@ -1618,8 +1638,19 @@ def upload_reference_dataset(
16181638 ----------
16191639 file_path : str
16201640 Path to the csv file containing the reference dataset.
1641+ dataset_config : Dict[str, any], optional
1642+ Dictionary containing the dataset configuration. This is not needed if
1643+ ``dataset_config_file_path`` is provided.
1644+
1645+ .. admonition:: What's in the dataset config?
1646+
1647+ The dataset configuration depends on the :obj:`TaskType`.
1648+ Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1649+ for examples.
1650+
16211651 dataset_config_file_path : str
1622- Path to the dataset configuration YAML file.
1652+ Path to the dataset configuration YAML file. This is not needed if
1653+ ``dataset_config`` is provided.
16231654
16241655 .. admonition:: What's in the dataset config file?
16251656
@@ -1687,10 +1718,17 @@ def upload_reference_dataset(
16871718 ... dataset_config_file_path='/path/to/dataset_config.yaml',
16881719 ... )
16891720 """
1721+ if dataset_config is None and dataset_config_file_path is None :
1722+ raise ValueError (
1723+ "Either `dataset_config` or `dataset_config_file_path` must be"
1724+ " provided."
1725+ )
1726+
16901727 # Validate dataset
16911728 dataset_validator = dataset_validators .get_validator (
16921729 task_type = task_type ,
16931730 dataset_config_file_path = dataset_config_file_path ,
1731+ dataset_config = dataset_config ,
16941732 dataset_file_path = file_path ,
16951733 )
16961734 failed_validations = dataset_validator .validate ()
@@ -1702,7 +1740,8 @@ def upload_reference_dataset(
17021740 ) from None
17031741
17041742 # Load dataset config and augment with defaults
1705- dataset_config = utils .read_yaml (dataset_config_file_path )
1743+ if dataset_config_file_path is not None :
1744+ dataset_config = utils .read_yaml (dataset_config_file_path )
17061745 dataset_data = DatasetSchema ().load (
17071746 {"task_type" : task_type .value , ** dataset_config }
17081747 )
@@ -1731,7 +1770,8 @@ def upload_reference_dataframe(
17311770 inference_pipeline_id : str ,
17321771 task_type : TaskType ,
17331772 dataset_df : pd .DataFrame ,
1734- dataset_config_file_path : str ,
1773+ dataset_config : Optional [Dict [str , any ]] = None ,
1774+ dataset_config_file_path : Optional [str ] = None ,
17351775 ) -> None :
17361776 r"""Uploads a reference dataset (a pandas dataframe) to an inference pipeline.
17371777
@@ -1746,8 +1786,19 @@ def upload_reference_dataframe(
17461786 ----------
17471787 dataset_df : pd.DataFrame
17481788 Dataframe containing the reference dataset.
1789+ dataset_config : Dict[str, any], optional
1790+ Dictionary containing the dataset configuration. This is not needed if
1791+ ``dataset_config_file_path`` is provided.
1792+
1793+ .. admonition:: What's in the dataset config?
1794+
1795+ The dataset configuration depends on the :obj:`TaskType`.
1796+ Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1797+ for examples.
1798+
17491799 dataset_config_file_path : str
1750- Path to the dataset configuration YAML file.
1800+ Path to the dataset configuration YAML file. This is not needed if
1801+ ``dataset_config`` is provided.
17511802
17521803 .. admonition:: What's in the dataset config file?
17531804
@@ -1828,6 +1879,7 @@ def upload_reference_dataframe(
18281879 return self .upload_reference_dataset (
18291880 file_path = file_path ,
18301881 inference_pipeline_id = inference_pipeline_id ,
1882+ dataset_config = dataset_config ,
18311883 dataset_config_file_path = dataset_config_file_path ,
18321884 task_type = task_type ,
18331885 )
@@ -1837,16 +1889,29 @@ def publish_batch_data(
18371889 inference_pipeline_id : str ,
18381890 task_type : TaskType ,
18391891 batch_df : pd .DataFrame ,
1840- batch_config_file_path : str ,
1892+ batch_config : Optional [Dict [str , any ]] = None ,
1893+ batch_config_file_path : Optional [str ] = None ,
18411894 ) -> None :
18421895 """Publishes a batch of production data to the Openlayer platform.
18431896
18441897 Parameters
18451898 ----------
18461899 batch_df : pd.DataFrame
18471900 Dataframe containing the batch of production data.
1901+ batch_config : Dict[str, any], optional
1902+ Dictionary containing the batch configuration. This is not needed if
1903+ ``batch_config_file_path`` is provided.
1904+
1905+ .. admonition:: What's in the config?
1906+
1907+ The configuration for a batch of data depends on the :obj:`TaskType`.
1908+ Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
1909+ for examples of dataset configuration files. These configurations are
1910+ the same for development and batches of production data.
1911+
18481912 batch_config_file_path : str
1849- Path to the configuration YAML file.
1913+ Path to the configuration YAML file. This is not needed if
1914+ ``batch_config`` is provided.
18501915
18511916 .. admonition:: What's in the config file?
18521917
@@ -1889,17 +1954,26 @@ def publish_batch_data(
18891954 ... batch_config_file_path='/path/to/batch_config.yaml',
18901955 ... )
18911956 """
1892- if not os .path .exists (batch_config_file_path ):
1957+ if batch_config is None and batch_config_file_path is None :
1958+ raise ValueError (
1959+ "Either `batch_config` or `batch_config_file_path` must be" " provided."
1960+ )
1961+ if batch_config_file_path is not None and not os .path .exists (
1962+ batch_config_file_path
1963+ ):
18931964 raise exceptions .OpenlayerValidationError (
18941965 f"Batch config file path { batch_config_file_path } does not exist."
18951966 ) from None
1896- batch_config = utils .read_yaml (batch_config_file_path )
1967+ elif batch_config_file_path is not None :
1968+ batch_config = utils .read_yaml (batch_config_file_path )
1969+
18971970 batch_config ["label" ] = "production"
18981971
18991972 # Validate batch of data
19001973 batch_validator = dataset_validators .get_validator (
19011974 task_type = task_type ,
19021975 dataset_config = batch_config ,
1976+ dataset_config_file_path = batch_config_file_path ,
19031977 dataset_df = batch_df ,
19041978 )
19051979 failed_validations = batch_validator .validate ()
0 commit comments