-
Notifications
You must be signed in to change notification settings - Fork 89
[SVCS-334]Adding custom sentry sanitizer #297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,191 @@ | ||
| from unittest import mock | ||
|
|
||
| import pytest | ||
|
|
||
| from waterbutler.server.sanitize import WBSanitizer | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def sanitizer(): | ||
| return WBSanitizer(mock.Mock()) | ||
|
|
||
|
|
||
| class TestWBSanitizer: | ||
| # The sanitize function changes some strings and dictionaries | ||
| # you put into it, so you need to explicitly test most things | ||
|
|
||
| MASK = '*' * 8 | ||
|
|
||
| def test_no_sanitization(self, sanitizer): | ||
| assert sanitizer.sanitize('thing', 'ghost science') == 'ghost science' | ||
|
|
||
| def test_fields_sanitized(self, sanitizer): | ||
| fields = sanitizer.FIELDS | ||
| for field in fields: | ||
| assert sanitizer.sanitize(field, 'free speech') == self.MASK | ||
|
|
||
| def test_value_is_none(self, sanitizer): | ||
| assert sanitizer.sanitize('great hair', None) is None | ||
|
|
||
| def test_key_is_none(self, sanitizer): | ||
| assert sanitizer.sanitize(None, 'best day ever') is 'best day ever' | ||
|
|
||
| def test_sanitize_credit_card(self, sanitizer): | ||
| assert sanitizer.sanitize('credit', '424242424242424') == self.MASK | ||
| # This string is not censored since it is out of the range of what it considers | ||
| # to be a credit card | ||
| assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK | ||
|
|
||
| def test_none_key_is_sanitized(self, sanitizer): | ||
| assert sanitizer.sanitize(None, '424242424242424') == self.MASK | ||
| # This string is not censored since it is out of the range of what it considers | ||
| # to be a credit card | ||
| assert sanitizer.sanitize(None, '4242424242424243333333') != self.MASK | ||
|
|
||
| def test_dataverse_secret(self, sanitizer): | ||
|
|
||
| # Named oddly because if you call it `dv_secret` it will get sanitized by a different | ||
| # part of the sanitizer | ||
| dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| assert sanitizer.sanitize('dv_value', dv_value) == self.MASK | ||
|
|
||
| dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| expected = 'random characters and other things ' + self.MASK | ||
| assert sanitizer.sanitize('dv_value', dv_value) == expected | ||
|
|
||
| def test_bytes(self, sanitizer): | ||
| assert sanitizer.sanitize(b'key', 'bossy yogurt') == self.MASK | ||
| assert sanitizer.sanitize(b'should_be_safe', 'snow science') == 'snow science' | ||
|
|
||
| def test_sanitize_dictionary(self, sanitizer): | ||
| value_dict = { | ||
| 'great_entry': 'very much not a secret or credit card' | ||
| } | ||
|
|
||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'great_entry': 'very much not a secret or credit card' | ||
| } | ||
|
|
||
| sanitize_dict = { | ||
| 'key': 'secret', | ||
| 'okay_value': 'bears are awesome' | ||
| } | ||
| result = sanitizer.sanitize('sanitize_dict', sanitize_dict) | ||
|
|
||
| assert result == { | ||
| 'key': self.MASK, | ||
| 'okay_value': 'bears are awesome' | ||
| } | ||
|
|
||
| def test_nested_dictionary(self, sanitizer): | ||
| value_dict = { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': 'this will be censored', | ||
| 'secret': { | ||
| 'secret': { | ||
| 'secret': 'pie is great' | ||
| } | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| def test_nested_dictionary_with_list(self, sanitizer): | ||
| value_dict = { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': 'this will be censored', | ||
| 'secret': { | ||
| 'value': ['bunch', 'of', 'semi', 'random', 'beige', 'run'] | ||
|
|
||
| }, | ||
| 'not_hidden': { | ||
| 'list_of_dict': [ | ||
| {'value': 'value'}, | ||
| {'key': 'secret'} | ||
| ] | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
| result = sanitizer.sanitize('value_dict', value_dict) | ||
| assert result == { | ||
| 'value': { | ||
| 'other': 'words', | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK, | ||
| 'not_hidden': { | ||
| 'list_of_dict': [ | ||
| {'value': 'value'}, | ||
| {'key': self.MASK} | ||
| ] | ||
| }, | ||
| 'new': 'best' | ||
| } | ||
| } | ||
|
|
||
| def test_sanitize_list(self, sanitizer): | ||
| value_list = [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ] | ||
|
|
||
| result = sanitizer.sanitize('value_list', value_list) | ||
|
|
||
| assert result == [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| self.MASK | ||
| ] | ||
|
|
||
| def test_sanitize_nested_lists(self, sanitizer): | ||
| value_list = [ | ||
| [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ], | ||
| 'blarg', | ||
| 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc', | ||
| [[[[[[[ | ||
| ['check out this level of nested'], 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' | ||
| ]]]]]]], | ||
| { | ||
| 'key': 'red leaves', | ||
| 'secret': [[[[[[[[]]]]]]]] | ||
| } | ||
| ] | ||
|
|
||
| result = sanitizer.sanitize('value_list', value_list) | ||
|
|
||
| assert result == [ | ||
| [ | ||
| 'blarg', | ||
| '10', | ||
| 'key', | ||
| self.MASK | ||
| ], | ||
| 'blarg', | ||
| self.MASK, | ||
| [[[[[[[['check out this level of nested'], self.MASK]]]]]]], | ||
| { | ||
| 'key': self.MASK, | ||
| 'secret': self.MASK | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| import re | ||
|
|
||
| from raven.processors import SanitizePasswordsProcessor | ||
|
|
||
|
|
||
| class WBSanitizer(SanitizePasswordsProcessor): | ||
| """Asterisk out things that look like passwords, keys, etc.""" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make better use of parent class. |
||
|
|
||
| # Store mask as a fixed length for security | ||
| MASK = '*' * 8 | ||
|
|
||
| # Token and key added from original. Key is used by Dataverse | ||
| FIELDS = frozenset([ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For phase 2 consideration: double check the list.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cslzchen Can you please clarify this comment?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomBaxter My suggestion is check if there are other possible keys that may store sensitive or secret information. Here is the default keys from FIELDS = frozenset([
'password', 'secret', 'passwd', 'authorization', 'api_key', 'apikey',
'pw', 'cred'
])We don't have
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood. I plan on making better use of inheritance so that we don't miss any defaults in the parent. I'll also check the sentry data for Dataverse, for other possible things to sanitize. I'll definitely add |
||
| 'password', | ||
| 'secret', | ||
| 'passwd', | ||
| 'authorization', | ||
| 'api_key', | ||
| 'apikey', | ||
| 'sentry_dsn', | ||
| 'access_token', | ||
| 'key', | ||
| 'token', | ||
| ]) | ||
|
|
||
| # Credit card regex left intact from original processor | ||
| # While we should never have credit card information, its still best to perform the check | ||
| # and keep old functionality | ||
| VALUES_RE = re.compile(r'^(?:\d[ -]*?){13,16}$') | ||
|
|
||
| # Should specifically match Dataverse secrets. Key format checked on demo and on Harvard | ||
| DATAVERSE_SECRET_RE = re.compile(r'[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]' | ||
| '{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}') | ||
|
|
||
| def sanitize(self, key, value): | ||
| """Overload the sanitize function of the `SanitizePasswordsProcessor'.""" | ||
|
|
||
| if value is None: | ||
| return | ||
|
|
||
| # Part of the original method. Looks for credit cards to sanitize | ||
| if isinstance(value, str) and self.VALUES_RE.search(value): | ||
| return self.MASK | ||
|
|
||
| if isinstance(value, dict): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need recursion for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Complete, for lists as well |
||
| for item in value: | ||
| value[item] = self.sanitize(item, value[item]) | ||
|
|
||
| if isinstance(value, list): | ||
| new_list = [] | ||
| for item in value: | ||
| new_list.append(self.sanitize(key, item)) | ||
|
|
||
| value = new_list | ||
|
|
||
| # Check for Dataverse secrets | ||
| if isinstance(value, str): | ||
| matches = self.DATAVERSE_SECRET_RE.findall(value) | ||
| for match in matches: | ||
| value = value.replace(match, self.MASK) | ||
|
|
||
| # key can be a NoneType | ||
| # This sould be after the regex checks incase a `None` key is a token | ||
| if not key: | ||
| return value | ||
|
|
||
| # Just in case we have bytes here, we want to turn them into text | ||
| # properly without failing so we can perform our check. | ||
| if isinstance(key, bytes): | ||
| # May want a try/except block around this, but for now it should be okay | ||
| key = key.decode('utf-8', 'replace') | ||
| else: | ||
| key = str(key) | ||
|
|
||
| key = key.lower() | ||
| for field in self.FIELDS: | ||
| if field in key: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check key directly from self.FIELDS |
||
| return self.MASK | ||
|
|
||
| return value | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to update this after bumpingUpdate: ignore this commentsetuptoolsto37.0.0.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cslzchen Can you please clarify this comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@TomBaxter Thanks for catching this. I probably was thinking about MFR. No need to change.