Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions tests/server/test_sanitize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from unittest import mock

import pytest

from waterbutler.server.sanitize import WBSanitizer


@pytest.fixture
def sanitizer():
return WBSanitizer()


class TestWBSanitizer:
# The sanitize function changes some strings and dictionaries
# you put into it, so you need to explicitly test most things

MASK = '*' * 8

def test_no_sanitization(self, sanitizer):
assert sanitizer.sanitize('thing', 'ghost science') == 'ghost science'

def test_fields_sanitized(self, sanitizer):
fields = sanitizer.FIELDS
for field in fields:
assert sanitizer.sanitize(field, 'free speech') == self.MASK

def test_value_is_none(self, sanitizer):
assert sanitizer.sanitize('great hair', None) is None

def test_key_is_none(self, sanitizer):
assert sanitizer.sanitize(None, 'best day ever') is 'best day ever'

def test_sanitize_credit_card(self, sanitizer):
assert sanitizer.sanitize('credit', '424242424242424') == self.MASK
# This string is not censored since it is out of the range of what it considers
# to be a credit card
assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK

def test_none_key_is_sanitized(self, sanitizer):
assert sanitizer.sanitize(None, '424242424242424') == self.MASK
# This string is not censored since it is out of the range of what it considers
# to be a credit card
assert sanitizer.sanitize(None, '4242424242424243333333') != self.MASK

def test_dataverse_secret(self, sanitizer):

# Named oddly because if you call it `dv_secret` it will get sanitized by a different
# part of the sanitizer
dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
assert sanitizer.sanitize('dv_value', dv_value) == self.MASK

dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
expected = 'random characters and other things ' + self.MASK
assert sanitizer.sanitize('dv_value', dv_value) == expected

def test_bytes(self, sanitizer):
assert sanitizer.sanitize(b'key', 'bossy yogurt') == self.MASK
assert sanitizer.sanitize(b'should_be_safe', 'snow science') == 'snow science'

def test_sanitize_dictionary(self, sanitizer):
value_dict = {
'great_entry': 'very much not a secret or credit card'
}

result = sanitizer.sanitize('value_dict', value_dict)
assert result == {
'great_entry': 'very much not a secret or credit card'
}

sanitize_dict = {
'key': 'secret',
'okay_value': 'bears are awesome'
}
result = sanitizer.sanitize('sanitize_dict', sanitize_dict)

assert result == {
'key': self.MASK,
'okay_value': 'bears are awesome'
}

def test_nested_dictionary(self, sanitizer):
value_dict = {
'value': {
'other': 'words',
'key': 'this will be censored',
'secret': {
'secret': {
'secret': 'pie is great'
}
},
'new': 'best'
}
}

result = sanitizer.sanitize('value_dict', value_dict)
assert result == {
'value': {
'other': 'words',
'key': self.MASK,
'secret': self.MASK,
'new': 'best'
}
}

def test_nested_dictionary_with_list(self, sanitizer):
value_dict = {
'value': {
'other': 'words',
'key': 'this will be censored',
'secret': {
'value': ['bunch', 'of', 'semi', 'random', 'beige', 'run']

},
'not_hidden': {
'list_of_dict': [
{'value': 'value'},
{'key': 'secret'}
]
},
'new': 'best'
}
}
result = sanitizer.sanitize('value_dict', value_dict)
assert result == {
'value': {
'other': 'words',
'key': self.MASK,
'secret': self.MASK,
'not_hidden': {
'list_of_dict': [
{'value': 'value'},
{'key': self.MASK}
]
},
'new': 'best'
}
}

def test_sanitize_list(self, sanitizer):
value_list = [
'blarg',
'10',
'key',
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
]

result = sanitizer.sanitize('value_list', value_list)

assert result == [
'blarg',
'10',
'key',
self.MASK
]

def test_sanitize_nested_lists(self, sanitizer):
value_list = [
[
'blarg',
'10',
'key',
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
],
'blarg',
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc',
[[[[[[[
['check out this level of nested'], 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
]]]]]]],
{
'key': 'red leaves',
'secret': [[[[[[[[]]]]]]]]
}
]

result = sanitizer.sanitize('value_list', value_list)

assert result == [
[
'blarg',
'10',
'key',
self.MASK
],
'blarg',
self.MASK,
[[[[[[[['check out this level of nested'], self.MASK]]]]]]],
{
'key': self.MASK,
'secret': self.MASK
}
]
3 changes: 2 additions & 1 deletion waterbutler/server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def make_app(debug):
[(r'/status', handlers.StatusHandler)],
debug=debug,
)
app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=__version__)
app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=__version__,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@felliott If you prefer, you can update the style during merge. Thanks.

processors=('waterbutler.server.sanitize.WBSanitizer',))
return app


Expand Down
44 changes: 44 additions & 0 deletions waterbutler/server/sanitize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re

from raven.processors import SanitizePasswordsProcessor


class WBSanitizer(SanitizePasswordsProcessor):
"""
Use parent class to asterisk out things that look like passwords, credit card numbers,
and API keys in frames, http, and basic extra data.

In addition, asterisk out Dataverse formatted ouath tokens.
"""

# Should specifically match Dataverse secrets. Key format checked on demo and on Harvard
DATAVERSE_SECRET_RE = re.compile(r'[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]'
'{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}')

def __init__(self):
# As of raven version 6.4 this attribute name has been changed from FIELDS to KEYS.
# Will need to be updated when we upgrade.
self.FIELDS = self.FIELDS.union(['key', 'token', 'refresh_token'])

def sanitize(self, key, value):
"""Subclass the sanitize function of the `SanitizePasswordsProcessor'."""

value = SanitizePasswordsProcessor.sanitize(self, key, value)

if isinstance(value, dict):
for item in value:
value[item] = self.sanitize(item, value[item])

if isinstance(value, list):
new_list = []
for item in value:
new_list.append(self.sanitize(key, item))
value = new_list

# Check for Dataverse secrets
if isinstance(value, str):
matches = self.DATAVERSE_SECRET_RE.findall(value)
for match in matches:
value = value.replace(match, self.MASK)

return value