diff --git a/tests/server/test_sanitize.py b/tests/server/test_sanitize.py new file mode 100644 index 000000000..e7a2fda23 --- /dev/null +++ b/tests/server/test_sanitize.py @@ -0,0 +1,191 @@ +from unittest import mock + +import pytest + +from waterbutler.server.sanitize import WBSanitizer + + +@pytest.fixture +def sanitizer(): + return WBSanitizer() + + +class TestWBSanitizer: + # The sanitize function changes some strings and dictionaries + # you put into it, so you need to explicitly test most things + + MASK = '*' * 8 + + def test_no_sanitization(self, sanitizer): + assert sanitizer.sanitize('thing', 'ghost science') == 'ghost science' + + def test_fields_sanitized(self, sanitizer): + fields = sanitizer.FIELDS + for field in fields: + assert sanitizer.sanitize(field, 'free speech') == self.MASK + + def test_value_is_none(self, sanitizer): + assert sanitizer.sanitize('great hair', None) is None + + def test_key_is_none(self, sanitizer): + assert sanitizer.sanitize(None, 'best day ever') is 'best day ever' + + def test_sanitize_credit_card(self, sanitizer): + assert sanitizer.sanitize('credit', '424242424242424') == self.MASK + # This string is not censored since it is out of the range of what it considers + # to be a credit card + assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK + + def test_none_key_is_sanitized(self, sanitizer): + assert sanitizer.sanitize(None, '424242424242424') == self.MASK + # This string is not censored since it is out of the range of what it considers + # to be a credit card + assert sanitizer.sanitize(None, '4242424242424243333333') != self.MASK + + def test_dataverse_secret(self, sanitizer): + + # Named oddly because if you call it `dv_secret` it will get sanitized by a different + # part of the sanitizer + dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' + assert sanitizer.sanitize('dv_value', dv_value) == self.MASK + + dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' + expected = 'random characters and other things ' + self.MASK + assert sanitizer.sanitize('dv_value', dv_value) == expected + + def test_bytes(self, sanitizer): + assert sanitizer.sanitize(b'key', 'bossy yogurt') == self.MASK + assert sanitizer.sanitize(b'should_be_safe', 'snow science') == 'snow science' + + def test_sanitize_dictionary(self, sanitizer): + value_dict = { + 'great_entry': 'very much not a secret or credit card' + } + + result = sanitizer.sanitize('value_dict', value_dict) + assert result == { + 'great_entry': 'very much not a secret or credit card' + } + + sanitize_dict = { + 'key': 'secret', + 'okay_value': 'bears are awesome' + } + result = sanitizer.sanitize('sanitize_dict', sanitize_dict) + + assert result == { + 'key': self.MASK, + 'okay_value': 'bears are awesome' + } + + def test_nested_dictionary(self, sanitizer): + value_dict = { + 'value': { + 'other': 'words', + 'key': 'this will be censored', + 'secret': { + 'secret': { + 'secret': 'pie is great' + } + }, + 'new': 'best' + } + } + + result = sanitizer.sanitize('value_dict', value_dict) + assert result == { + 'value': { + 'other': 'words', + 'key': self.MASK, + 'secret': self.MASK, + 'new': 'best' + } + } + + def test_nested_dictionary_with_list(self, sanitizer): + value_dict = { + 'value': { + 'other': 'words', + 'key': 'this will be censored', + 'secret': { + 'value': ['bunch', 'of', 'semi', 'random', 'beige', 'run'] + + }, + 'not_hidden': { + 'list_of_dict': [ + {'value': 'value'}, + {'key': 'secret'} + ] + }, + 'new': 'best' + } + } + result = sanitizer.sanitize('value_dict', value_dict) + assert result == { + 'value': { + 'other': 'words', + 'key': self.MASK, + 'secret': self.MASK, + 'not_hidden': { + 'list_of_dict': [ + {'value': 'value'}, + {'key': self.MASK} + ] + }, + 'new': 'best' + } + } + + def test_sanitize_list(self, sanitizer): + value_list = [ + 'blarg', + '10', + 'key', + 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' + ] + + result = sanitizer.sanitize('value_list', value_list) + + assert result == [ + 'blarg', + '10', + 'key', + self.MASK + ] + + def test_sanitize_nested_lists(self, sanitizer): + value_list = [ + [ + 'blarg', + '10', + 'key', + 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' + ], + 'blarg', + 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc', + [[[[[[[ + ['check out this level of nested'], 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc' + ]]]]]]], + { + 'key': 'red leaves', + 'secret': [[[[[[[[]]]]]]]] + } + ] + + result = sanitizer.sanitize('value_list', value_list) + + assert result == [ + [ + 'blarg', + '10', + 'key', + self.MASK + ], + 'blarg', + self.MASK, + [[[[[[[['check out this level of nested'], self.MASK]]]]]]], + { + 'key': self.MASK, + 'secret': self.MASK + } + ] diff --git a/waterbutler/server/app.py b/waterbutler/server/app.py index db87fd22a..fb131e355 100644 --- a/waterbutler/server/app.py +++ b/waterbutler/server/app.py @@ -44,7 +44,8 @@ def make_app(debug): [(r'/status', handlers.StatusHandler)], debug=debug, ) - app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=__version__) + app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=__version__, + processors=('waterbutler.server.sanitize.WBSanitizer',)) return app diff --git a/waterbutler/server/sanitize.py b/waterbutler/server/sanitize.py new file mode 100644 index 000000000..d2bacf1ca --- /dev/null +++ b/waterbutler/server/sanitize.py @@ -0,0 +1,44 @@ +import re + +from raven.processors import SanitizePasswordsProcessor + + +class WBSanitizer(SanitizePasswordsProcessor): + """ + Use parent class to asterisk out things that look like passwords, credit card numbers, + and API keys in frames, http, and basic extra data. + + In addition, asterisk out Dataverse formatted ouath tokens. + """ + + # Should specifically match Dataverse secrets. Key format checked on demo and on Harvard + DATAVERSE_SECRET_RE = re.compile(r'[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]' + '{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}') + + def __init__(self): + # As of raven version 6.4 this attribute name has been changed from FIELDS to KEYS. + # Will need to be updated when we upgrade. + self.FIELDS = self.FIELDS.union(['key', 'token', 'refresh_token']) + + def sanitize(self, key, value): + """Subclass the sanitize function of the `SanitizePasswordsProcessor'.""" + + value = SanitizePasswordsProcessor.sanitize(self, key, value) + + if isinstance(value, dict): + for item in value: + value[item] = self.sanitize(item, value[item]) + + if isinstance(value, list): + new_list = [] + for item in value: + new_list.append(self.sanitize(key, item)) + value = new_list + + # Check for Dataverse secrets + if isinstance(value, str): + matches = self.DATAVERSE_SECRET_RE.findall(value) + for match in matches: + value = value.replace(match, self.MASK) + + return value