-
Notifications
You must be signed in to change notification settings - Fork 89
[SVCS-334] Sentry captures dataverse API tokens on certain error types #313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
TomBaxter
wants to merge
4
commits into
CenterForOpenScience:develop
from
TomBaxter:feature/SVCS-334
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,191 @@ | ||
| from unittest import mock | ||
|
|
||
| import pytest | ||
|
|
||
| from waterbutler.server.sanitize import WBSanitizer | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def sanitizer(): | ||
| return WBSanitizer() | ||
|
|
||
|
|
||
| class TestWBSanitizer: | ||
| # The sanitize function changes some strings and dictionaries | ||
| # you put into it, so you need to explicitly test most things | ||
|
|
||
| MASK = '*' * 8 | ||
|
|
||
| def test_no_sanitization(self, sanitizer): | ||
| assert sanitizer.sanitize('thing', 'ghost science') == 'ghost science' | ||
|
|
||
| def test_fields_sanitized(self, sanitizer): | ||
| fields = sanitizer.FIELDS | ||
| for field in fields: | ||
| assert sanitizer.sanitize(field, 'free speech') == self.MASK | ||
|
|
||
| def test_value_is_none(self, sanitizer): | ||
| assert sanitizer.sanitize('great hair', None) is None | ||
|
|
||
| def test_key_is_none(self, sanitizer): | ||
| assert sanitizer.sanitize(None, 'best day ever') is 'best day ever' | ||
|
|
||
| def test_sanitize_credit_card(self, sanitizer): | ||
| assert sanitizer.sanitize('credit', '424242424242424') == self.MASK | ||
| # This string is not censored since it is out of the range of what it considers | ||
| # to be a credit card | ||
| assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK | ||
|
|
||
| def test_none_key_is_sanitized(self, sanitizer): | ||
| assert sanitizer.sanitize(None, '424242424242424') == self.MASK | ||
| # This string is not censored since it is out of the range of what it considers | ||
| # to be a credit card | ||
| assert sanitizer.sanitize(None, '4242424242424243333333') != self.MASK | ||
|
|
||
| def test_dataverse_secret(self, sanitizer): | ||
|
|
||
| # Named oddly because if you call it `dv_secret` it will get sanitized by a different | ||
| # part of the sanitizer | ||
| dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| assert sanitizer.sanitize('dv_value', dv_value) == self.MASK | ||
|
|
||
| dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| expected = 'random characters and other things ' + self.MASK | ||
| assert sanitizer.sanitize('dv_value', dv_value) == expected | ||
|
|
||
| def test_bytes(self, sanitizer): | ||
| assert sanitizer.sanitize(b'key', 'bossy yogurt') == self.MASK | ||
| assert sanitizer.sanitize(b'should_be_safe', 'snow science') == 'snow science' | ||
|
|
||
| def test_sanitize_dictionary(self, sanitizer): | ||
| value_dict = { | ||
| 'great_entry': 'very much not a secret or credit card' | ||
| } | ||
|
|
||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'great_entry': 'very much not a secret or credit card' | ||
| } | ||
|
|
||
| sanitize_dict = { | ||
| 'key': 'secret', | ||
| 'okay_value': 'bears are awesome' | ||
| } | ||
| result = sanitizer.sanitize('sanitize_dict', sanitize_dict) | ||
|
|
||
| assert result == { | ||
| 'key': self.MASK, | ||
| 'okay_value': 'bears are awesome' | ||
| } | ||
|
|
||
| def test_nested_dictionary(self, sanitizer): | ||
| value_dict = { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': 'this will be censored', | ||
| 'secret': { | ||
| 'secret': { | ||
| 'secret': 'pie is great' | ||
| } | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| def test_nested_dictionary_with_list(self, sanitizer): | ||
| value_dict = { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': 'this will be censored', | ||
| 'secret': { | ||
| 'value': ['bunch', 'of', 'semi', 'random', 'beige', 'run'] | ||
|
|
||
| }, | ||
| 'not_hidden': { | ||
| 'list_of_dict': [ | ||
| {'value': 'value'}, | ||
| {'key': 'secret'} | ||
| ] | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK, | ||
| 'not_hidden': { | ||
| 'list_of_dict': [ | ||
| {'value': 'value'}, | ||
| {'key': self.MASK} | ||
| ] | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| def test_sanitize_list(self, sanitizer): | ||
| value_list = [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ] | ||
|
|
||
| result = sanitizer.sanitize('value_list', value_list) | ||
|
|
||
| assert result == [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| self.MASK | ||
| ] | ||
|
|
||
| def test_sanitize_nested_lists(self, sanitizer): | ||
| value_list = [ | ||
| [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ], | ||
| 'blarg', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc', | ||
| [[[[[[[ | ||
| ['check out this level of nested'], 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ]]]]]]], | ||
| { | ||
| 'key': 'red leaves', | ||
| 'secret': [[[[[[[[]]]]]]]] | ||
| } | ||
| ] | ||
|
|
||
| result = sanitizer.sanitize('value_list', value_list) | ||
|
|
||
| assert result == [ | ||
| [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| self.MASK | ||
| ], | ||
| 'blarg', | ||
| self.MASK, | ||
| [[[[[[[['check out this level of nested'], self.MASK]]]]]]], | ||
| { | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK | ||
| } | ||
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| import re | ||
|
|
||
| from raven.processors import SanitizePasswordsProcessor | ||
|
|
||
|
|
||
| class WBSanitizer(SanitizePasswordsProcessor): | ||
| """ | ||
| Use parent class to asterisk out things that look like passwords, credit card numbers, | ||
| and API keys in frames, http, and basic extra data. | ||
|
|
||
| In addition, asterisk out Dataverse formatted ouath tokens. | ||
| """ | ||
|
|
||
| # Should specifically match Dataverse secrets. Key format checked on demo and on Harvard | ||
| DATAVERSE_SECRET_RE = re.compile(r'[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]' | ||
| '{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}') | ||
|
|
||
| def __init__(self): | ||
| # As of raven version 6.4 this attribute name has been changed from FIELDS to KEYS. | ||
| # Will need to be updated when we upgrade. | ||
| self.FIELDS = self.FIELDS.union(['key', 'token', 'refresh_token']) | ||
|
|
||
| def sanitize(self, key, value): | ||
| """Subclass the sanitize function of the `SanitizePasswordsProcessor'.""" | ||
|
|
||
| value = SanitizePasswordsProcessor.sanitize(self, key, value) | ||
|
|
||
| if isinstance(value, dict): | ||
| for item in value: | ||
| value[item] = self.sanitize(item, value[item]) | ||
|
|
||
| if isinstance(value, list): | ||
| new_list = [] | ||
| for item in value: | ||
| new_list.append(self.sanitize(key, item)) | ||
| value = new_list | ||
|
|
||
| # Check for Dataverse secrets | ||
| if isinstance(value, str): | ||
| matches = self.DATAVERSE_SECRET_RE.findall(value) | ||
| for match in matches: | ||
| value = value.replace(match, self.MASK) | ||
|
|
||
| return value |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@felliott If you prefer, you can update the style during merge. Thanks.