Skip to content

Commit 97b5f8d

Browse files
author
AWS
committed
AWS Clean Rooms ML Update: AWS Clean Rooms ML now supports privacy-enhancing synthetic dataset generation for custom ML training.
1 parent 45f89dd commit 97b5f8d

File tree

2 files changed

+172
-0
lines changed

2 files changed

+172
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "AWS Clean Rooms ML",
4+
"contributor": "",
5+
"description": "AWS Clean Rooms ML now supports privacy-enhancing synthetic dataset generation for custom ML training."
6+
}

services/cleanroomsml/src/main/resources/codegen-resources/service-2.json

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,22 @@
18791879
},
18801880
"documentation":"<p>Provides summary information about a trained model in a collaboration.</p>"
18811881
},
1882+
"ColumnClassificationDetails":{
1883+
"type":"structure",
1884+
"required":["columnMapping"],
1885+
"members":{
1886+
"columnMapping":{
1887+
"shape":"ColumnMappingList",
1888+
"documentation":"<p>A mapping that defines the classification of data columns for synthetic data generation and specifies how each column should be handled during the privacy-preserving data synthesis process.</p>"
1889+
}
1890+
},
1891+
"documentation":"<p>Contains classification information for data columns, including mappings that specify how columns should be handled during synthetic data generation and privacy analysis.</p>"
1892+
},
1893+
"ColumnMappingList":{
1894+
"type":"list",
1895+
"member":{"shape":"SyntheticDataColumnProperties"},
1896+
"min":5
1897+
},
18821898
"ColumnName":{
18831899
"type":"string",
18841900
"max":128,
@@ -2579,6 +2595,17 @@
25792595
},
25802596
"documentation":"<p>The configuration for defining custom patterns to be redacted from logs and error messages. This is for the CUSTOM config under entitiesToRedact. Both CustomEntityConfig and entitiesToRedact need to be present or not present.</p>"
25812597
},
2598+
"DataPrivacyScores":{
2599+
"type":"structure",
2600+
"required":["membershipInferenceAttackScores"],
2601+
"members":{
2602+
"membershipInferenceAttackScores":{
2603+
"shape":"MembershipInferenceAttackScoreList",
2604+
"documentation":"<p>Scores that evaluate the vulnerability of the synthetic data to membership inference attacks, which attempt to determine whether a specific individual was a member of the original dataset.</p>"
2605+
}
2606+
},
2607+
"documentation":"<p>Privacy evaluation scores that measure the privacy characteristics of the generated synthetic data, including assessments of potential privacy risks such as membership inference attacks.</p>"
2608+
},
25822609
"DataSource":{
25832610
"type":"structure",
25842611
"required":["glueDataSource"],
@@ -3141,6 +3168,10 @@
31413168
"shape":"ResourceDescription",
31423169
"documentation":"<p>The description of the ML input channel.</p>"
31433170
},
3171+
"syntheticDataConfiguration":{
3172+
"shape":"SyntheticDataConfiguration",
3173+
"documentation":"<p>The synthetic data configuration for this ML input channel, including parameters for generating privacy-preserving synthetic data and evaluation scores for measuring the privacy of the generated data.</p>"
3174+
},
31443175
"createTime":{
31453176
"shape":"SyntheticTimestamp_date_time",
31463177
"documentation":"<p>The time at which the ML input channel was created.</p>"
@@ -3665,6 +3696,10 @@
36653696
"shape":"ResourceDescription",
36663697
"documentation":"<p>The description of the ML input channel.</p>"
36673698
},
3699+
"syntheticDataConfiguration":{
3700+
"shape":"SyntheticDataConfiguration",
3701+
"documentation":"<p>The synthetic data configuration for this ML input channel, including parameters for generating privacy-preserving synthetic data and evaluation scores for measuring the privacy of the generated data.</p>"
3702+
},
36683703
"createTime":{
36693704
"shape":"SyntheticTimestamp_date_time",
36703705
"documentation":"<p>The time at which the ML input channel was created.</p>"
@@ -5387,12 +5422,81 @@
53875422
},
53885423
"documentation":"<p>Configuration information about how the exported model artifacts are stored.</p>"
53895424
},
5425+
"MLSyntheticDataParameters":{
5426+
"type":"structure",
5427+
"required":[
5428+
"epsilon",
5429+
"maxMembershipInferenceAttackScore",
5430+
"columnClassification"
5431+
],
5432+
"members":{
5433+
"epsilon":{
5434+
"shape":"MLSyntheticDataParametersEpsilonDouble",
5435+
"documentation":"<p>The epsilon value for differential privacy, which controls the privacy-utility tradeoff in synthetic data generation. Lower values provide stronger privacy guarantees but may reduce data utility.</p>"
5436+
},
5437+
"maxMembershipInferenceAttackScore":{
5438+
"shape":"MLSyntheticDataParametersMaxMembershipInferenceAttackScoreDouble",
5439+
"documentation":"<p>The maximum acceptable score for membership inference attack vulnerability. Synthetic data generation fails if the score for the resulting data exceeds this threshold.</p>"
5440+
},
5441+
"columnClassification":{
5442+
"shape":"ColumnClassificationDetails",
5443+
"documentation":"<p>Classification details for data columns that specify how each column should be treated during synthetic data generation.</p>"
5444+
}
5445+
},
5446+
"documentation":"<p>Parameters that control the generation of synthetic data for custom model training, including privacy settings and column classification details.</p>"
5447+
},
5448+
"MLSyntheticDataParametersEpsilonDouble":{
5449+
"type":"double",
5450+
"box":true,
5451+
"max":10,
5452+
"min":0.0001
5453+
},
5454+
"MLSyntheticDataParametersMaxMembershipInferenceAttackScoreDouble":{
5455+
"type":"double",
5456+
"box":true,
5457+
"max":1,
5458+
"min":0.5
5459+
},
53905460
"MaxResults":{
53915461
"type":"integer",
53925462
"box":true,
53935463
"max":100,
53945464
"min":1
53955465
},
5466+
"MembershipInferenceAttackScore":{
5467+
"type":"structure",
5468+
"required":[
5469+
"attackVersion",
5470+
"score"
5471+
],
5472+
"members":{
5473+
"attackVersion":{
5474+
"shape":"MembershipInferenceAttackVersion",
5475+
"documentation":"<p>The version of the membership inference attack, which consists of the attack type and its version number, used to generate this privacy score.</p>"
5476+
},
5477+
"score":{
5478+
"shape":"MembershipInferenceAttackScoreScoreDouble",
5479+
"documentation":"<p>The numerical score representing the vulnerability to membership inference attacks.</p>"
5480+
}
5481+
},
5482+
"documentation":"<p>A score that measures the vulnerability of synthetic data to membership inference attacks and provides both the numerical score and the version of the attack methodology used for evaluation.</p>"
5483+
},
5484+
"MembershipInferenceAttackScoreList":{
5485+
"type":"list",
5486+
"member":{"shape":"MembershipInferenceAttackScore"},
5487+
"max":1,
5488+
"min":1
5489+
},
5490+
"MembershipInferenceAttackScoreScoreDouble":{
5491+
"type":"double",
5492+
"box":true,
5493+
"max":1.0,
5494+
"min":0.0
5495+
},
5496+
"MembershipInferenceAttackVersion":{
5497+
"type":"string",
5498+
"enum":["DISTANCE_TO_CLOSEST_RECORD_V1"]
5499+
},
53965500
"MetricDefinition":{
53975501
"type":"structure",
53985502
"required":[
@@ -6054,6 +6158,68 @@
60546158
"min":1
60556159
},
60566160
"String":{"type":"string"},
6161+
"SyntheticDataColumnName":{
6162+
"type":"string",
6163+
"max":128,
6164+
"min":0,
6165+
"pattern":"[a-z0-9_](([a-z0-9_]+-)*([a-z0-9_]+))?"
6166+
},
6167+
"SyntheticDataColumnProperties":{
6168+
"type":"structure",
6169+
"required":[
6170+
"columnName",
6171+
"columnType",
6172+
"isPredictiveValue"
6173+
],
6174+
"members":{
6175+
"columnName":{
6176+
"shape":"SyntheticDataColumnName",
6177+
"documentation":"<p>The name of the data column as it appears in the dataset.</p>"
6178+
},
6179+
"columnType":{
6180+
"shape":"SyntheticDataColumnType",
6181+
"documentation":"<p>The data type of the column, which determines how the synthetic data generation algorithm processes and synthesizes values for this column.</p>"
6182+
},
6183+
"isPredictiveValue":{
6184+
"shape":"Boolean",
6185+
"documentation":"<p>Indicates if this column contains predictive values that should be treated as target variables in machine learning models. This affects how the synthetic data generation preserves statistical relationships.</p>"
6186+
}
6187+
},
6188+
"documentation":"<p>Properties that define how a specific data column should be handled during synthetic data generation, including its name, type, and role in predictive modeling.</p>"
6189+
},
6190+
"SyntheticDataColumnType":{
6191+
"type":"string",
6192+
"enum":[
6193+
"CATEGORICAL",
6194+
"NUMERICAL"
6195+
]
6196+
},
6197+
"SyntheticDataConfiguration":{
6198+
"type":"structure",
6199+
"required":["syntheticDataParameters"],
6200+
"members":{
6201+
"syntheticDataParameters":{
6202+
"shape":"MLSyntheticDataParameters",
6203+
"documentation":"<p>The parameters that control how synthetic data is generated, including privacy settings, column classifications, and other configuration options that affect the data synthesis process.</p>"
6204+
},
6205+
"syntheticDataEvaluationScores":{
6206+
"shape":"SyntheticDataEvaluationScores",
6207+
"documentation":"<p>Evaluation scores that assess the quality and privacy characteristics of the generated synthetic data, providing metrics on data utility and privacy preservation.</p>"
6208+
}
6209+
},
6210+
"documentation":"<p>Configuration settings for synthetic data generation, including the parameters that control data synthesis and the evaluation scores that measure the quality and privacy characteristics of the generated synthetic data.</p>"
6211+
},
6212+
"SyntheticDataEvaluationScores":{
6213+
"type":"structure",
6214+
"required":["dataPrivacyScores"],
6215+
"members":{
6216+
"dataPrivacyScores":{
6217+
"shape":"DataPrivacyScores",
6218+
"documentation":"<p>Privacy-specific evaluation scores that measure how well the synthetic data protects individual privacy, including assessments of potential privacy risks such as membership inference attacks.</p>"
6219+
}
6220+
},
6221+
"documentation":"<p>Comprehensive evaluation metrics for synthetic data that assess both the utility of the generated data for machine learning tasks and its privacy preservation characteristics.</p>"
6222+
},
60576223
"SyntheticTimestamp_date_time":{
60586224
"type":"timestamp",
60596225
"timestampFormat":"iso8601"

0 commit comments

Comments
 (0)