-
Notifications
You must be signed in to change notification settings - Fork 25
Add group-wise inference risks #53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,16 +14,16 @@ | |
|
|
||
|
|
||
| def _run_attack( | ||
| target: pd.DataFrame, | ||
| syn: pd.DataFrame, | ||
| n_attacks: int, | ||
| aux_cols: list[str], | ||
| secret: str, | ||
| n_jobs: int, | ||
| naive: bool, | ||
| regression: Optional[bool], | ||
| inference_model: Optional[InferencePredictor], | ||
| ) -> int: | ||
| target: pd.DataFrame, | ||
| syn: pd.DataFrame, | ||
| n_attacks: int, | ||
| aux_cols: list[str], | ||
| secret: str, | ||
| n_jobs: int, | ||
| naive: bool, | ||
| regression: Optional[bool], | ||
| inference_model: Optional[InferencePredictor], | ||
| ) -> tuple[int, pd.Series]: | ||
| if regression is None: | ||
| regression = pd.api.types.is_numeric_dtype(target[secret]) | ||
|
|
||
|
|
@@ -35,9 +35,11 @@ def _run_attack( | |
| # Instantiate the default KNN model if no other model is passed through `inference_model`. | ||
| if inference_model is None: | ||
| inference_model = KNNInferencePredictor(data=syn, columns=aux_cols, target_col=secret, n_jobs=n_jobs) | ||
|
|
||
| guesses = inference_model.predict(targets) | ||
| guesses = guesses.reindex_like(targets) | ||
|
|
||
| return evaluate_inference_guesses(guesses=guesses, secrets=targets[secret], regression=regression).sum() | ||
| return evaluate_inference_guesses(guesses=guesses, secrets=targets[secret], regression=regression).sum(), guesses | ||
|
|
||
|
|
||
| def evaluate_inference_guesses( | ||
|
|
@@ -72,6 +74,9 @@ def evaluate_inference_guesses( | |
| Array of boolean values indicating the correcteness of each guess. | ||
|
|
||
| """ | ||
| if not guesses.index.equals(secrets.index): | ||
| raise RuntimeError("The predictions indices do not match the target indices. Check your inference model.") | ||
|
|
||
| guesses_np = guesses.to_numpy() | ||
| secrets_np = secrets.to_numpy() | ||
|
|
||
|
|
@@ -152,7 +157,7 @@ def __init__( | |
| syn: pd.DataFrame, | ||
| aux_cols: list[str], | ||
| secret: str, | ||
| regression: Optional[bool] = None, | ||
| regression: bool = False, | ||
| n_attacks: int = 500, | ||
| control: Optional[pd.DataFrame] = None, | ||
| inference_model: Optional[InferencePredictor] = None | ||
|
|
@@ -180,7 +185,7 @@ def __init__( | |
| self._aux_cols = aux_cols | ||
| self._evaluated = False | ||
|
|
||
| def _attack(self, target: pd.DataFrame, naive: bool, n_jobs: int, n_attacks: int) -> int: | ||
| def _attack(self, target: pd.DataFrame, naive: bool, n_jobs: int, n_attacks: int) -> tuple[int, pd.Series]: | ||
| return _run_attack( | ||
| target=target, | ||
| syn=self._syn, | ||
|
|
@@ -207,14 +212,17 @@ def evaluate(self, n_jobs: int = -2) -> "InferenceEvaluator": | |
| The evaluated ``InferenceEvaluator`` object. | ||
|
|
||
| """ | ||
| self._n_baseline = self._attack(target=self._ori, naive=True, n_jobs=n_jobs, | ||
| n_attacks=self._n_attacks_baseline) | ||
| self._n_success = self._attack(target=self._ori, naive=False, n_jobs=n_jobs, | ||
| n_attacks=self._n_attacks_ori) | ||
| self._n_control = ( | ||
| None if self._control is None else self._attack(target=self._control, naive=False, n_jobs=n_jobs, | ||
| n_attacks=self._n_attacks_control) | ||
| ) | ||
| self._n_baseline, self._guesses_baseline = self._attack( | ||
| target=self._ori, naive=True, n_jobs=n_jobs, n_attacks=self._n_attacks_baseline | ||
| ) | ||
| self._n_success, self._guesses_success = self._attack( | ||
| target=self._ori, naive=False, n_jobs=n_jobs, n_attacks=self._n_attacks_ori | ||
| ) | ||
| self._n_control, self._guesses_control = ( | ||
| (None, None) | ||
| if self._control is None | ||
| else self._attack(target=self._control, naive=False, n_jobs=n_jobs, n_attacks=self._n_attacks_control) | ||
| ) | ||
|
|
||
| self._evaluated = True | ||
| return self | ||
|
|
@@ -269,3 +277,67 @@ def risk(self, confidence_level: float = 0.95, baseline: bool = False) -> Privac | |
| """ | ||
| results = self.results(confidence_level=confidence_level) | ||
| return results.risk(baseline=baseline) | ||
|
|
||
| def risk_for_groups(self, confidence_level: float = 0.95) -> dict[str, EvaluationResults]: | ||
| """Compute the inference risk for each group of targets with the same value of the secret attribute. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| confidence_level : float, default is 0.95 | ||
| Confidence level for the error bound calculation. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, tuple[EvaluationResults | PrivacyRisk] | ||
| The group as a key, and then for every group the results (EvaluationResults), | ||
| and the risks (PrivacyRisk) as a tuple. | ||
|
Comment on lines
+291
to
+293
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why returning both the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. otherwise, you can mirror the non-grouped case and have two separate methods, one called
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. I removed the risk for now. In our use case, it was almost always relevant to know the attack success rate. I'm returning the EvaluationResults, and if necessary one can call the risk() afterwards |
||
|
|
||
| """ | ||
| if not self._evaluated: | ||
| raise RuntimeError("The inference evaluator wasn't evaluated yet. Please, run `evaluate()` first.") | ||
|
|
||
| all_results = {} | ||
|
|
||
| # For every unique group in `self._secret` | ||
| for group, data_ori in self._ori.groupby(self._secret): | ||
| # Get the targets for the current group | ||
| common_indices = data_ori.index.intersection(self._guesses_success.index) | ||
| # Get the guesses for the current group | ||
| target_group = data_ori.loc[common_indices] | ||
| n_attacks_ori = len(target_group) | ||
|
|
||
| # Count the number of success attacks | ||
| n_success = evaluate_inference_guesses( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. re the other comment: here you are not checking for consistent indexes. That would come for free if it's moved in |
||
| guesses=self._guesses_success.loc[common_indices], | ||
| secrets=target_group[self._secret], | ||
| regression=self._regression, | ||
| ).sum() | ||
|
|
||
| if self._control is not None: | ||
| # Get the targets for the current control group | ||
| data_control = self._control[self._control[self._secret] == group] | ||
| n_attacks_control = len(data_control) | ||
|
|
||
| # Get the guesses for the current control group | ||
| common_indices = data_control.index.intersection(self._guesses_control.index) | ||
|
|
||
| # Count the number of success control attacks | ||
| n_control = evaluate_inference_guesses( | ||
| guesses=self._guesses_control.loc[common_indices], | ||
| secrets=data_control[self._secret], | ||
| regression=self._regression, | ||
| ).sum() | ||
| else: | ||
| n_control = None | ||
| n_attacks_control = -1 | ||
|
|
||
| # Recreate the EvaluationResults for the current group | ||
| all_results[group] = EvaluationResults( | ||
| n_attacks=(n_attacks_ori, self._n_attacks_baseline, n_attacks_control), | ||
| n_success=n_success, | ||
| n_baseline=self._n_baseline, # The baseline risk should be the same independent of the group | ||
| n_control=n_control, | ||
| confidence_level=confidence_level, | ||
| ) | ||
|
|
||
| return all_results | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💯