From b9e96e86ba4dbbe0a5265a44503faa5604d50395 Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 22 Dec 2021 13:08:30 -0500 Subject: [PATCH 01/35] Initial conversion of py functions --- .github/workflows/ci.yml | 14 +++++------ .gitignore | 4 +++ README.md | 1 - sqlite_dissect/carving/signature.py | 32 ++++++++++++------------ sqlite_dissect/carving/utilities.py | 4 +-- sqlite_dissect/constants.py | 6 ++--- sqlite_dissect/export/sqlite_export.py | 10 ++++---- sqlite_dissect/file/database/header.py | 4 +-- sqlite_dissect/file/file_handle.py | 3 ++- sqlite_dissect/file/schema/master.py | 4 +-- sqlite_dissect/file/utilities.py | 4 +-- sqlite_dissect/file/version.py | 2 +- sqlite_dissect/file/wal/commit_record.py | 4 +-- sqlite_dissect/file/wal/wal.py | 4 +-- sqlite_dissect/interface.py | 2 +- sqlite_dissect/output.py | 14 +++++------ sqlite_dissect/utilities.py | 8 +++--- sqlite_dissect/version_history.py | 14 +++++------ 18 files changed, 68 insertions(+), 66 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7c3a10..2cc2a4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,8 +8,10 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Once we move to Python 3.x, this can be tested against multiple Python versions. eg. [3.6, 3.7, 3.8, 3.9] - python-version: [ 2.7 ] + # This allows the pipeline to be run against multiple Python versions. eg. [3.6, 3.7, 3.8, 3.9]. This results + # in linting and unit tests running for all listed versions as well as the creation of packages and wheels on + # creation of a tag in Git. + python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ] steps: # Get the code from the repository to be packaged @@ -42,16 +44,12 @@ jobs: - name: Test with pytest run: pytest - # Set the environment variable for the Git tag to pass along to the build - # process in the next step - - name: Set Environment - if: startsWith(github.ref, 'refs/tags') - run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $TAG_VERSION - # Build the binary wheel as well as the source tar - name: Build Objects if: startsWith(github.ref, 'refs/tags') run: python setup.py sdist bdist_wheel + env: + TAG_VERSION: "${GITHUB_REF#refs/*/}" # Ensure the objects were packaged correctly and there wasn't an issue with # the compilation or packaging process. diff --git a/.gitignore b/.gitignore index 406bae4..1dbc3c7 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,8 @@ # Other /output /log + +# PyTest and Coverage /.pytest_cache +/.coverage +/htmlcov diff --git a/README.md b/README.md index 5958dc0..9a8f4f7 100644 --- a/README.md +++ b/README.md @@ -252,6 +252,5 @@ TODO: - [ ] Add additional logging messages to the master schema entries skipped in signature generation. - [ ] Integrate in the SQLite Forensic Corpus into tests. - [ ] Look into updating terminology for versioning to timelining. -- [ ] Update code for compatibility with Python 3. - [ ] Create PyUnit tests. - [ ] Create a GUI. diff --git a/sqlite_dissect/carving/signature.py b/sqlite_dissect/carving/signature.py index 812b449..fc69382 100644 --- a/sqlite_dissect/carving/signature.py +++ b/sqlite_dissect/carving/signature.py @@ -258,7 +258,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en """ # Iterate through each of the records - for cell_md5_hex_digest, record in records.iteritems(): + for cell_md5_hex_digest, record in records.items(): """ @@ -294,7 +294,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en total_table_row_signature_count = 0 # Iterate through the table row signatures and set the total rows and increment the count - for serial_type_signature, table_row_signature in self.table_row_signatures.iteritems(): + for serial_type_signature, table_row_signature in self.table_row_signatures.items(): table_row_signature.number_of_rows = self.unique_records total_table_row_signature_count += table_row_signature.count @@ -422,7 +422,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en table_row_columns = {} # Iterate through the table row signatures and create the table row columns dictionary - for table_row_md5_hex_digest, table_row_signature in self.table_row_signatures.iteritems(): + for table_row_md5_hex_digest, table_row_signature in self.table_row_signatures.items(): # Iterate through all of the column signatures in the current table row signature for column_index in range(len(table_row_signature.column_signatures)): @@ -434,7 +434,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en table_row_columns[column_index] = [table_row_signature.column_signatures[column_index]] # Iterate through the table row columns and create the table column signatures - for table_row_column_index, table_row_column_serial_type_array in table_row_columns.iteritems(): + for table_row_column_index, table_row_column_serial_type_array in table_row_columns.items(): column_name = column_definitions[table_row_column_index].column_name self.table_column_signatures.append(TableColumnSignature(table_row_column_index, column_name, table_row_column_serial_type_array)) @@ -541,7 +541,7 @@ def stringify(self, padding="", print_table_row_signatures=True, print_schema_co signature_string = signature_string.format(schema_column_signature.stringify("\t")) string += signature_string if print_table_row_signatures: - for table_row_md5_hex_digest, table_row_signature in self.table_row_signatures.iteritems(): + for table_row_md5_hex_digest, table_row_signature in self.table_row_signatures.items(): signature_string = "\n" + padding + "Table Row Signature:\n{}" signature_string = signature_string.format(table_row_signature.stringify("\t", print_column_signatures)) string += signature_string @@ -913,7 +913,7 @@ def __init__(self, index, name, column_signatures): self._logger.error(log_message) raise SignatureError(log_message) - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): column_signature.number_of_rows = self.count def __repr__(self): @@ -936,14 +936,14 @@ def stringify(self, padding="", print_column_signatures=True): self.simplified_signature, len(self.column_signatures)) if print_column_signatures: - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): string += "\n" + padding + "Column Signature:\n{}".format(column_signature.stringify(padding + "\t")) return string @property def focused_probabilistic_signature(self): focused_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): if isinstance(column_signature, ColumnVariableLengthSignature): for serial_type in column_signature.variable_length_serial_types: serial_type_probability = column_signature.get_variable_length_serial_type_probability(serial_type) @@ -961,7 +961,7 @@ def focused_probabilistic_signature(self): @property def focused_signature(self): focused_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): if isinstance(column_signature, ColumnVariableLengthSignature): focused_signatures.extend(column_signature.variable_length_serial_types.keys()) elif isinstance(column_signature, ColumnFixedLengthSignature): @@ -977,14 +977,14 @@ def focused_signature(self): @property def simplified_probabilistic_signature(self): simplified_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): simplified_signatures.append((column_signature.serial_type, column_signature.probability)) return sorted(simplified_signatures, key=lambda x: x[0]) @property def simplified_signature(self): simplified_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): simplified_signatures.append(column_signature.serial_type) return sorted(simplified_signatures, key=int) @@ -1119,14 +1119,14 @@ def stringify(self, padding="", print_column_signatures=True): self.simplified_signature, len(self.column_signatures)) if print_column_signatures: - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): string += "\n" + padding + "Column Signature:\n{}".format(column_signature.stringify(padding + "\t")) return string @property def focused_signature(self): focused_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): if isinstance(column_signature, ColumnVariableLengthSignature): focused_signatures.append(sorted(column_signature.variable_length_serial_types.keys(), key=int)) elif isinstance(column_signature, ColumnFixedLengthSignature): @@ -1165,7 +1165,7 @@ def number_of_rows(self, number_of_rows): self._number_of_rows = number_of_rows - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): column_signature.number_of_rows = number_of_rows @property @@ -1188,7 +1188,7 @@ def probability(self): @property def simplified_signature(self): simplified_signatures = [] - for column_signature_index, column_signature in self.column_signatures.iteritems(): + for column_signature_index, column_signature in self.column_signatures.items(): simplified_signatures.append([column_signature.serial_type]) return simplified_signatures @@ -1519,7 +1519,7 @@ def update(self, serial_type, count=None, variable_length_serial_types=None): self.count += count - for variable_length_serial_type, variable_length_serial_type_count in variable_length_serial_types.iteritems(): + for variable_length_serial_type, variable_length_serial_type_count in variable_length_serial_types.items(): if variable_length_serial_type in self.variable_length_serial_types: self.variable_length_serial_types[variable_length_serial_type] += variable_length_serial_type_count else: diff --git a/sqlite_dissect/carving/utilities.py b/sqlite_dissect/carving/utilities.py index 78a3481..486ed1a 100644 --- a/sqlite_dissect/carving/utilities.py +++ b/sqlite_dissect/carving/utilities.py @@ -376,9 +376,9 @@ def get_content_size(serial_type): elif serial_type >= 12 and serial_type % 2 == 0: return (serial_type - 12) / 2 - # A string in the database encoding and is (N-13)/2 bytes in length. The nul terminator is omitted + # A string in the database encoding and is (N-13)/2 bytes in length. The null terminator is omitted elif serial_type >= 13 and serial_type % 2 == 1: - return (serial_type - 13) / 2 + return int((serial_type - 13) / 2) else: log_message = "Invalid serial type: {}." diff --git a/sqlite_dissect/constants.py b/sqlite_dissect/constants.py index e6f4a57..065e059 100644 --- a/sqlite_dissect/constants.py +++ b/sqlite_dissect/constants.py @@ -1,4 +1,4 @@ -from collections import MutableMapping +from collections.abc import MutableMapping from logging import getLogger from re import compile from sys import maxunicode @@ -79,7 +79,7 @@ def __len__(self): LOCK_BYTE_PAGE_END_OFFSET = 1073742336 SQLITE_DATABASE_HEADER_LENGTH = 100 -MAGIC_HEADER_STRING = "SQLite format 3\000" +MAGIC_HEADER_STRING = b'SQLite format 3\000' MAGIC_HEADER_STRING_ENCODING = UTF_8 MAXIMUM_PAGE_SIZE_INDICATOR = 1 MINIMUM_PAGE_SIZE_LIMIT = 512 @@ -284,5 +284,5 @@ def __len__(self): (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)]) -_illegal_xml_ranges = ["%s-%s" % (unichr(low), unichr(high)) for (low, high) in _illegal_xml_characters] +_illegal_xml_ranges = ["%s-%s" % (chr(low), chr(high)) for (low, high) in _illegal_xml_characters] ILLEGAL_XML_CHARACTER_PATTERN = compile(u'[%s]' % u''.join(_illegal_xml_ranges)) diff --git a/sqlite_dissect/export/sqlite_export.py b/sqlite_dissect/export/sqlite_export.py index 3d120f5..e069ae4 100644 --- a/sqlite_dissect/export/sqlite_export.py +++ b/sqlite_dissect/export/sqlite_export.py @@ -298,14 +298,14 @@ def _write_cells(connection, table_name, column_count, file_type, algorithm internal to SQLite to slightly change. Despite this, we make the following modifications in order to best ensure data integrity when writing the data back to the SQLite file: 1.) If the value is a bytearray, the value is interpreted as a blob object. In order to write this - back correctly, we set it to buffer(value) in order to write it back to the SQLite database as + back correctly, we set it to memoryview(value) in order to write it back to the SQLite database as a blob object. Before we write it back, we make sure that the object does not have text affinity, or if it does we decode it in the database text encoding before writing it. 2.) If the value is a string, we encode it using UTF-8. If this fails, that means it had characters not supported by the unicode encoding which caused it to fail. Since we are writing back carved records that may have invalid characters in strings due to parts being overwritten or false positives, this can occur a lot. Therefore, if the unicode encoding fails, we do the same - as above for blob objects and create a buffer(value) blob object and write that back to the + as above for blob objects and create a memoryview(value) blob object and write that back to the database in order to maintain the original data. Therefore, in some tables, depending on the data parsed or strings retrieved may be stored in either a string (text) or blob storage class. 3.) If the value does not fall in one of the above use cases, we leave it as is and write it back to the @@ -354,13 +354,13 @@ def _write_cells(connection, table_name, column_count, file_type, if text_affinity: value = value.decode(database_text_encoding, "replace") else: - value = buffer(value) + value = memoryview(value) elif isinstance(value, str): try: if text_affinity: value = value.decode(database_text_encoding, "replace") else: - value = buffer(value) + value = memoryview(value) except UnicodeDecodeError: """ @@ -374,7 +374,7 @@ def _write_cells(connection, table_name, column_count, file_type, """ - value = buffer(value) + value = memoryview(value) cell_record_column_values.append(value) diff --git a/sqlite_dissect/file/database/header.py b/sqlite_dissect/file/database/header.py index 33bd680..1d645e9 100644 --- a/sqlite_dissect/file/database/header.py +++ b/sqlite_dissect/file/database/header.py @@ -66,7 +66,7 @@ def __init__(self, database_header_byte_array): logger.error("Failed to retrieve the magic header.") raise - if self.magic_header_string != MAGIC_HEADER_STRING.decode(MAGIC_HEADER_STRING_ENCODING): + if self.magic_header_string != MAGIC_HEADER_STRING: log_message = "The magic header string is invalid." logger.error(log_message) raise HeaderParsingError(log_message) @@ -258,7 +258,7 @@ def __init__(self, database_header_byte_array): self.reserved_for_expansion = database_header_byte_array[72:92] pattern = compile(RESERVED_FOR_EXPANSION_REGEX) - reserved_for_expansion_hex = hexlify(self.reserved_for_expansion) + reserved_for_expansion_hex = hexlify(self.reserved_for_expansion).decode() if not pattern.match(reserved_for_expansion_hex): log_message = "Header space reserved for expansion is not zero: {}.".format(reserved_for_expansion_hex) logger.error(log_message) diff --git a/sqlite_dissect/file/file_handle.py b/sqlite_dissect/file/file_handle.py index 7791ceb..ec45984 100644 --- a/sqlite_dissect/file/file_handle.py +++ b/sqlite_dissect/file/file_handle.py @@ -68,7 +68,8 @@ def __init__(self, file_type, file_identifier, database_text_encoding=None, file self.file_externally_controlled = False self._database_text_encoding = database_text_encoding - if isinstance(file_identifier, basestring): + xbasestring = (str, bytes) + if isinstance(file_identifier, xbasestring): """ diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index 8b9e5d0..9708749 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -422,7 +422,7 @@ def _parse_table_interior(b_tree_table_interior_page, database_text_encoding): logger.error(log_message) raise MasterSchemaParsingError(log_message) - for row_type, row_type_data in returned_master_schema_entry_data.iteritems(): + for row_type, row_type_data in returned_master_schema_entry_data.items(): if row_type in master_schema_entry_data: master_schema_entry_data[row_type].extend(row_type_data) else: @@ -629,7 +629,7 @@ def stringify(self, padding="", print_record_columns=True): for comment in self.comments: string += "\n" + padding + "Comment: {}".format(comment) if print_record_columns: - for index, record_column in self.record_columns.iteritems(): + for index, record_column in self.record_columns.items(): string += "\n" \ + padding + "Record Column {}:\n{}:".format(index, record_column.stringify(padding + "\t")) return string diff --git a/sqlite_dissect/file/utilities.py b/sqlite_dissect/file/utilities.py index 5c38524..bb01999 100644 --- a/sqlite_dissect/file/utilities.py +++ b/sqlite_dissect/file/utilities.py @@ -13,8 +13,8 @@ def validate_page_version_history(version_history): - for version_number, version in version_history.versions.iteritems(): - for page_number, page in version.pages.iteritems(): + for version_number, version in version_history.versions.items(): + for page_number, page in version.pages.items(): if page.page_version_number != version.page_version_index[page.number]: return False if page.version_number != version.version_number: diff --git a/sqlite_dissect/file/version.py b/sqlite_dissect/file/version.py index 684caf3..6a01b79 100644 --- a/sqlite_dissect/file/version.py +++ b/sqlite_dissect/file/version.py @@ -182,7 +182,7 @@ def stringify(self, padding="", print_pages=True, print_schema=True): self.pointer_map_pages_modified, self.updated_b_tree_page_numbers) if print_pages: - for page in self.pages.itervalues(): + for page in self.pages.values(): string += "\n" + padding + "Page:\n{}".format(page.stringify(padding + "\t")) if print_schema: string += "\n" \ diff --git a/sqlite_dissect/file/wal/commit_record.py b/sqlite_dissect/file/wal/commit_record.py index b3f1051..2b0a531 100644 --- a/sqlite_dissect/file/wal/commit_record.py +++ b/sqlite_dissect/file/wal/commit_record.py @@ -100,7 +100,7 @@ def __init__(self, version_number, database, write_ahead_log, frames, page_frame self._database = database - for page_version_number in page_version_index.itervalues(): + for page_version_number in page_version_index.values(): if page_version_number >= version_number: log_message = "Page version number: {} is greater than the commit record specified version: {}." log_message = log_message.format(page_version_number, version_number) @@ -648,7 +648,7 @@ def stringify(self, padding="", print_pages=True, print_schema=True, print_frame string += "\n" + padding + "Database Header Differences:" # Parse the database header differences - for field, difference in self.database_header_differences.iteritems(): + for field, difference in self.database_header_differences.items(): difference_string = "\n" + padding + "\t" + "Field: {} changed from previous Value: {} to new Value: {}" string += difference_string.format(field, difference[0], difference[1]) diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 80a0158..77e705b 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -232,9 +232,9 @@ def stringify(self, padding="", print_frames=True): self.invalid_frame_indices, self.last_frame_commit_record.frame_index + 1) if print_frames: - for frame in self.frames.itervalues(): + for frame in self.frames.values(): string += "\n" + padding + "Frame:\n{}".format(frame.stringify(padding + "\t")) if print_frames and self.invalid_frames: - for invalid_frame in self.invalid_frames.itervalues(): + for invalid_frame in self.invalid_frames.values(): string += "\n" + padding + "Invalid Frame:\n{}".format(invalid_frame.stringify(padding + "\t")) return string diff --git a/sqlite_dissect/interface.py b/sqlite_dissect/interface.py index 00ee48e..6fcb752 100644 --- a/sqlite_dissect/interface.py +++ b/sqlite_dissect/interface.py @@ -130,7 +130,7 @@ def carve_table(table_name, signature, version): b_tree_pages = get_pages_from_b_tree_page(version.get_b_tree_root_page(master_schema_entry.root_page_number)) b_tree_pages = {b_tree_page.number: b_tree_page for b_tree_page in b_tree_pages} carved_cells = [] - for page_number, page in b_tree_pages.iteritems(): + for page_number, page in b_tree_pages.items(): # For carving freeblocks make sure the page is a b-tree page and not overflow if isinstance(page, BTreePage): carvings = SignatureCarver.carve_freeblocks(version, CELL_SOURCE.B_TREE, page.freeblocks, signature) diff --git a/sqlite_dissect/output.py b/sqlite_dissect/output.py index 0aa8f06..1cdd7f7 100644 --- a/sqlite_dissect/output.py +++ b/sqlite_dissect/output.py @@ -39,7 +39,7 @@ def get_page_breakdown(pages): page_breakdown = {} for page_type in PAGE_TYPE: page_breakdown[page_type] = [] - for page_number, page in pages.iteritems(): + for page_number, page in pages.items(): page_breakdown[page.page_type].append(page_number) return page_breakdown @@ -202,14 +202,14 @@ def stringify_master_schema_versions(version_history): master_schema_entries = {} - for version_number, version in version_history.versions.iteritems(): + for version_number, version in version_history.versions.items(): if version.master_schema_modified: modified_master_schema_entries = dict(map(lambda x: [x.md5_hash_identifier, x], version.master_schema.master_schema_entries)) - for md5_hash_identifier, master_schema_entry in modified_master_schema_entries.iteritems(): + for md5_hash_identifier, master_schema_entry in modified_master_schema_entries.items(): if md5_hash_identifier not in master_schema_entries: @@ -237,7 +237,7 @@ def stringify_master_schema_versions(version_history): master_schema_entries[md5_hash_identifier] = master_schema_entry - for md5_hash_identifier, master_schema_entry in master_schema_entries.iteritems(): + for md5_hash_identifier, master_schema_entry in master_schema_entries.items(): if md5_hash_identifier not in modified_master_schema_entries: @@ -261,7 +261,7 @@ def stringify_page_history(version_history, padding=""): def stringify_page_information(version, padding=""): string = padding + "Page Breakdown:" - for page_type, page_array in get_page_breakdown(version.pages).iteritems(): + for page_type, page_array in get_page_breakdown(version.pages).items(): page_array_length = len(page_array) string += "\n" + padding + "\t" + "{}: {} Page Numbers: {}" string = string.format(page_type, page_array_length, page_array) @@ -313,7 +313,7 @@ def stringify_page_structure(version, padding=""): def stringify_unallocated_space(version, padding="", include_empty_space=True): string = "" calculated_total_fragmented_bytes = 0 - for page_number, page in version.pages.iteritems(): + for page_number, page in version.pages.items(): unallocated_content = page.unallocated_content if len(unallocated_content): @@ -358,7 +358,7 @@ def stringify_version_pages(version, padding=""): version.updated_page_numbers) page_versions = {} - for page_number, page_version_number in version.page_version_index.iteritems(): + for page_number, page_version_number in version.page_version_index.items(): if page_version_number in page_versions: page_versions[page_version_number] = page_versions[page_version_number] + ", " + str(page_number) else: diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 5bdfce9..6ed2b65 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -47,7 +47,7 @@ def decode_varint(byte_array, offset=0): unsigned_integer_value = 0 varint_relative_offset = 0 - for x in xrange(1, 10): + for x in range(1, 10): varint_byte = ord(byte_array[offset + varint_relative_offset:offset + varint_relative_offset + 1]) varint_relative_offset += 1 @@ -90,7 +90,7 @@ def encode_varint(value): byte_array.insert(0, pack("B", byte)) value >>= 8 - for _ in xrange(8): + for _ in range(8): byte_array.insert(0, pack("B", (value & 0x7f) | 0x80)) value >>= 7 @@ -152,7 +152,7 @@ def get_record_content(serial_type, record_body, offset=0): # Big-endian 24-bit twos-complement integer elif serial_type == 3: content_size = 3 - value_byte_array = '\0' + record_body[offset:offset + content_size] + value_byte_array = b'\0' + record_body[offset:offset + content_size] value = unpack(b">I", value_byte_array)[0] if value & 0x800000: value -= 0x1000000 @@ -165,7 +165,7 @@ def get_record_content(serial_type, record_body, offset=0): # Big-endian 48-bit twos-complement integer elif serial_type == 5: content_size = 6 - value_byte_array = '\0' + '\0' + record_body[offset:offset + content_size] + value_byte_array = b'\0' + b'\0' + record_body[offset:offset + content_size] value = unpack(b">Q", value_byte_array)[0] if value & 0x800000000000: value -= 0x1000000000000 diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index 4d3336f..d1038fe 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -417,7 +417,7 @@ def stringify(self, padding="", print_cells=True): self._current_b_tree_page_numbers, self._carve_freelist_pages) if print_cells: - for current_cell in self._current_cells.itervalues(): + for current_cell in self._current_cells.values(): string += "\n" + padding + "Cell:\n{}".format(current_cell.stringify(padding + "\t")) return string @@ -501,7 +501,7 @@ def next(self): deleted_cells = {} # Iterate through the current cells - for current_cell_md5, current_cell in self._current_cells.iteritems(): + for current_cell_md5, current_cell in self._current_cells.items(): # Remove the cell from the added cells if it was already pre-existing if current_cell_md5 in added_cells: @@ -694,7 +694,7 @@ def next(self): # Initialize the carved cells carved_cells = [] - for freelist_page_number, freelist_page in updated_freelist_pages.iteritems(): + for freelist_page_number, freelist_page in updated_freelist_pages.items(): # Carve unallocated space carvings = SignatureCarver.carve_unallocated_space(version, CELL_SOURCE.FREELIST, @@ -798,13 +798,13 @@ def stringify(self, padding="", print_cells=True): self.freelist_pages_carved, self.updated_freelist_page_numbers) if print_cells: - for added_cell in self.added_cells.itervalues(): + for added_cell in self.added_cells.values(): string += "\n" + padding + "Added Cell:\n{}".format(added_cell.stringify(padding + "\t")) - for deleted_cell in self.deleted_cells.itervalues(): + for deleted_cell in self.deleted_cells.values(): string += "\n" + padding + "Deleted Cell:\n{}".format(deleted_cell.stringify(padding + "\t")) - for updated_cell in self.updated_cells.itervalues(): + for updated_cell in self.updated_cells.values(): string += "\n" + padding + "Updated Cell:\n{}".format(updated_cell.stringify(padding + "\t")) - for carved_cell in self.carved_cells.itervalues(): + for carved_cell in self.carved_cells.values(): string += "\n" + padding + "Carved Cell:\n{}".format(carved_cell.stringify(padding + "\t")) return string From 344f0e8b708b9a3342cbe538d6ee4fe8373f9101 Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 22 Dec 2021 14:20:17 -0500 Subject: [PATCH 02/35] Initial running capability --- main.py | 2 +- sqlite_dissect/file/database/page.py | 8 ++++---- sqlite_dissect/output.py | 5 +++-- sqlite_dissect/utilities.py | 13 ++++++++++++- sqlite_dissect/version_history.py | 5 ++++- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index f6c3d91..1ab18df 100644 --- a/main.py +++ b/main.py @@ -108,7 +108,7 @@ def main(args): # Setup the export type export_types = [EXPORT_TYPES.TEXT] if args.export and len(export_types) > 0: - export_types = map(str.upper, args.export) + export_types = list(map(str.upper, args.export)) # Setup the strict format checking strict_format_checking = True diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index 9cafa35..a246fe9 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -582,10 +582,10 @@ def __init__(self, version_interface, number, header_class_name, cell_class_name page = self._version_interface.get_page_data(self.number) self.page_type = None - self.hex_type = page[0] + self.hex_type = chr(page[0]).encode() if self.hex_type == MASTER_PAGE_HEX_ID: - master_page_hex_type = page[SQLITE_DATABASE_HEADER_LENGTH] + master_page_hex_type = chr(page[SQLITE_DATABASE_HEADER_LENGTH]).encode() if master_page_hex_type == TABLE_INTERIOR_PAGE_HEX_ID: self.page_type = PAGE_TYPE.B_TREE_TABLE_INTERIOR elif master_page_hex_type == TABLE_LEAF_PAGE_HEX_ID: @@ -593,7 +593,7 @@ def __init__(self, version_interface, number, header_class_name, cell_class_name else: log_message = "Page hex type for master page is: {} and not a table interior or table leaf page as " \ "expected in b-tree page: {} in page version: {} for version: {}." - log_message = log_message.format(hexlify(master_page_hex_type), self.number, + log_message = log_message.format(hex(master_page_hex_type), self.number, self.page_version_number, self.version_number) self._logger.error(log_message) raise BTreePageParsingError(log_message) @@ -609,7 +609,7 @@ def __init__(self, version_interface, number, header_class_name, cell_class_name else: log_message = "Page hex type: {} is not a valid b-tree page type for b-tree page: {} in page version: {} " \ "for version: {}." - log_message = log_message.format(hexlify(self.hex_type), self.number, self.page_version_number, + log_message = log_message.format(hex(self.hex_type), self.number, self.page_version_number, self.version_number) self._logger.error(log_message) raise BTreePageParsingError(log_message) diff --git a/sqlite_dissect/output.py b/sqlite_dissect/output.py index 1cdd7f7..f83ddb7 100644 --- a/sqlite_dissect/output.py +++ b/sqlite_dissect/output.py @@ -9,7 +9,7 @@ from sqlite_dissect.file.database.page import TableInteriorPage from sqlite_dissect.file.database.page import TableLeafPage from sqlite_dissect.exception import OutputError -from sqlite_dissect.utilities import has_content +from sqlite_dissect.utilities import has_content, decode_str """ @@ -146,7 +146,8 @@ def stringify_cell_record(cell, database_text_encoding, page_type): column_values.append(str(value)) else: column_values.append("NULL") - content = "(" + ", ".join(column_values) + ")" + + content = "(" + ", ".join(list(map(decode_str, column_values))) + ")" return "#{}: {}".format(cell.row_id, content) elif page_type == PAGE_TYPE.B_TREE_INDEX_LEAF: diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 6ed2b65..3733f7d 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -129,6 +129,9 @@ def get_class_instance(class_name): def get_md5_hash(string): md5_hash = md5() + # Ensure the string is properly encoded as a binary string + if isinstance(string, str): + string = string.encode() md5_hash.update(string) return md5_hash.hexdigest().upper() @@ -201,7 +204,7 @@ def get_record_content(serial_type, record_body, offset=0): # A string in the database encoding and is (N-13)/2 bytes in length. The nul terminator is omitted elif serial_type >= 13 and serial_type % 2 == 1: - content_size = (serial_type - 13) / 2 + content_size = int((serial_type - 13) / 2) value = record_body[offset:offset + content_size] else: @@ -227,3 +230,11 @@ def has_content(byte_array): if pattern.match(hexlify(byte_array)): return False return True + + +def decode_str(string): + """Python compatibility for auto-detecting encoded strings and decoding them""" + if isinstance(string, bytes): + return string.decode() + else: + return string diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index d1038fe..0a59c78 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -190,7 +190,7 @@ def __init__(self, database, write_ahead_log=None): log_message = "Version (commit record): {} has additional frames beyond the last commit frame found " \ "in the write ahead log and erroneous use cases may occur when parsing." log_message = log_message.format(commit_record_number) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Set the number of versions @@ -399,6 +399,9 @@ def __iter__(self): def __repr__(self): return self.__str__().encode("hex") + def __next__(self): + return self.next() + def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) From 0a0077ce59e1b6337dd8c8dca00e73636206a095 Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 22 Dec 2021 15:27:51 -0500 Subject: [PATCH 03/35] Fix cast to int --- sqlite_dissect/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 3733f7d..62f2330 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -199,7 +199,7 @@ def get_record_content(serial_type, record_body, offset=0): # A BLOB that is (N-12)/2 bytes in length elif serial_type >= 12 and serial_type % 2 == 0: - content_size = (serial_type - 12) / 2 + content_size = int((serial_type - 12) / 2) value = record_body[offset:offset + content_size] # A string in the database encoding and is (N-13)/2 bytes in length. The nul terminator is omitted From 724c227076252f64239b0b6984990f5418d4033a Mon Sep 17 00:00:00 2001 From: kchason Date: Mon, 10 Jan 2022 09:36:58 -0500 Subject: [PATCH 04/35] Switch to f-strings --- main.py | 108 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/main.py b/main.py index 1ab18df..5540193 100644 --- a/main.py +++ b/main.py @@ -75,8 +75,8 @@ def main(args): basicConfig(level=logging_level, format=logging_format, datefmt=logging_data_format, filename=args.log_file) logger = getLogger(LOGGER_NAME) - logger.debug("Setup logging using the log level: {}.".format(logging_level)) - logger.info("Using options: {}".format(args)) + logger.debug(f"Setup logging using the log level: {logging_level}.") + logger.info(f"Using options: {args}") if args.warnings: @@ -128,15 +128,15 @@ def main(args): output_directory = None if args.directory: if not exists(args.directory): - raise SqliteError("Unable to find output directory: {}.".format(args.directory)) + raise SqliteError(f"Unable to find output directory: {args.directory}.") output_directory = args.directory - logger.debug("Determined export type to be {} with file prefix: {} and output directory: {}" - .format(', '.join(export_types), file_prefix, output_directory)) + logger.debug( + f"Determined export type to be {export_types} with file prefix: {file_prefix} and output directory: {output_directory}") # Obtain the SQLite file if not exists(args.sqlite_file): - raise SqliteError("Unable to find SQLite file: {}.".format(args.sqlite_file)) + raise SqliteError(f"Unable to find SQLite file: {args.sqlite_file}.") """ @@ -161,11 +161,11 @@ def main(args): if not args.no_journal: if args.wal: if not exists(args.wal): - raise SqliteError("Unable to find wal file: {}.".format(args.wal)) + raise SqliteError(f"Unable to find wal file: {args.wal}.") wal_file_name = args.wal elif args.rollback_journal: if not exists(args.rollback_journal): - raise SqliteError("Unable to find rollback journal file: {}.".format(args.rollback_journal)) + raise SqliteError(f"Unable to find rollback journal file: {args.rollback_journal}.") rollback_journal_file_name = args.rollback_journal else: if exists(args.sqlite_file + WAL_FILE_POSTFIX): @@ -204,11 +204,10 @@ def main(args): """ - raise SqliteError("Found a zero length SQLite file with a wal file: {}. Unable to parse.".format(args.wal)) + raise SqliteError(f"Found a zero length SQLite file with a wal file: {args.wal}. Unable to parse.") elif zero_length_wal_file: - print("File: {} with wal file: {} has no content. Nothing to parse." - .format(args.sqlite_file, wal_file_name)) + print(f"File: {args.sqlite_file} with wal file: {wal_file_name} has no content. Nothing to parse.") exit(0) elif rollback_journal_file_name and not zero_length_rollback_journal_file: @@ -220,12 +219,13 @@ def main(args): """ - raise SqliteError("Found a zero length SQLite file with a rollback journal file: {}. Unable to parse." - .format(args.rollback_journal)) + raise SqliteError( + f"Found a zero length SQLite file with a rollback journal file: {args.rollback_journal}. " + f"Unable to parse.") elif zero_length_rollback_journal_file: - print("File: {} with rollback journal file: {} has no content. Nothing to parse." - .format(args.sqlite_file, rollback_journal_file_name)) + print("File: {args.sqlite_file} with rollback journal file: {rollback_journal_file_name} has no content. " + "Nothing to parse.") exit(0) else: @@ -248,11 +248,11 @@ def main(args): """ - raise SqliteError("Found both a rollback journal: {} and wal file: {}. Only one journal file should exist. " - "Unable to parse.".format(args.rollback_journal, args.wal)) + raise SqliteError(f"Found both a rollback journal: {args.rollback_journal} and wal file: {args.wal}. " + f"Only one journal file should exist. Unable to parse.") # Print a message parsing is starting and log the start time for reporting at the end on amount of time to run - print("\nParsing: {}...".format(args.sqlite_file)) + print(f"\nParsing: {args.sqlite_file}...") start_time = time() # Create the database and wal/rollback journal file (if existent) @@ -272,13 +272,13 @@ def main(args): # Check if the master schema was asked for if args.schema: # print the master schema of the database - print("\nDatabase Master Schema:\n{}".format(stringify_master_schema_version(database))) + print(f"\nDatabase Master Schema:\n{stringify_master_schema_version(database)}") print("Continuing to parse...") # Check if the schema history was asked for if args.schema_history: # print the master schema version history - print("\nVersion History of Master Schemas:\n{}".format(stringify_master_schema_versions(version_history))) + print(f"\nVersion History of Master Schemas:\n{stringify_master_schema_versions(version_history)}") print("Continuing to parse...") # Get the signature options @@ -303,8 +303,8 @@ def main(args): if rollback_journal_exempted_tables and specified_tables_to_carve: for table in rollback_journal_exempted_tables: if table in specified_tables_to_carve: - print("Table: {} found in both exempted and specified tables. Please update the arguments correctly." - .format(table)) + print(f"Table: {table} found in both exempted and specified tables. Please update the arguments " + f"correctly.") exit(0) # See if we need to generate signatures @@ -340,22 +340,22 @@ def main(args): if isinstance(master_schema_entry, OrdinaryTableRow): if master_schema_entry.without_row_id: - log_message = "A `without row_id` table was found: {} and will not have a signature generated " \ - "for carving since it is not supported yet.".format(master_schema_entry.table_name) + log_message = f"A `without row_id` table was found: {master_schema_entry.table_name} and will not" \ + " have a signature generated for carving since it is not supported yet." logger.info(log_message) continue if master_schema_entry.internal_schema_object: - log_message = "A `internal schema` table was found: {} and will not have a signature generated " \ - "for carving since it is not supported yet.".format(master_schema_entry.table_name) + log_message = f"A `internal schema` table was found: {master_schema_entry.table_name} and will " \ + f"not have a signature generated for carving since it is not supported yet." logger.info(log_message) continue signatures[master_schema_entry.name] = Signature(version_history, master_schema_entry) if print_signatures: - print("\nSignature:\n{}".format(signatures[master_schema_entry.name] - .stringify("\t", False, False, False))) + printable_signature = signatures[master_schema_entry.name].stringify("\t", False, False, False) + print(f"\nSignature:\n{printable_signature}") """ @@ -395,32 +395,33 @@ def main(args): # The export type was not found (this should not occur due to the checking of argparse) if not exported: - raise SqliteError("Invalid option for export type: {}.".format(', '.join(export_types))) + raise SqliteError(f"Invalid option for export type: {(', '.join(export_types))}.") # Carve the rollback journal if found and carving is not specified if rollback_journal_file and not carve: - print("Rollback journal file found: {}. Rollback journal file parsing is under development and " - "currently only supports carving. Please rerun with the --carve option for this output.") + print(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file parsing is under " + f"development and currently only supports carving. Please rerun with the --carve option for this output.") # Carve the rollback journal if found and carving is specified if rollback_journal_file and carve: if not output_directory: - print("Rollback journal file found: {}. Rollback journal file carving is under development and " - "currently only outputs to CSV. Due to this, the output directory needs to be specified. Please" - "rerun with a output directory specified in order for this to complete.") + print(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file carving is under " + f"development and currently only outputs to CSV. Due to this, the output directory needs to be " + f"specified. Please rerun with a output directory specified in order for this to complete.") else: - print("Carving rollback journal file: {}. Rollback journal file carving is under development and " - "currently only outputs to CSV. Any export type specified will be overridden for this.") + print(f"Carving rollback journal file: {rollback_journal_file}. Rollback journal file carving is under " + f"development and currently only outputs to CSV. Any export type specified will be overridden for " + f"this.") carve_rollback_journal(output_directory, rollback_journal_file, rollback_journal_file_name, specified_tables_to_carve, rollback_journal_exempted_tables, version_history, signatures, logger) - print("Finished in {} seconds.".format(round(time() - start_time, 2))) + print(f"Finished in {round(time() - start_time, 2)} seconds.") def print_text(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, @@ -431,8 +432,8 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ text_file_name = file_prefix + file_postfix # Export all index and table histories to a text file while supplying signature to carve with - print("\nExporting history as text to {}{}{}...".format(output_directory, sep, text_file_name)) - logger.debug("Exporting history as text to {}{}{}.".format(output_directory, sep, text_file_name)) + print(f"\nExporting history as text to {output_directory}{sep}{text_file_name}...") + logger.debug(f"Exporting history as text to {output_directory}{sep}{text_file_name}.") with CommitTextExporter(output_directory, text_file_name) as commit_text_exporter: @@ -453,10 +454,10 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will " + f"not be carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, @@ -473,7 +474,7 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ else: # Export all index and table histories to csv files while supplying signature to carve with - logger.debug("Exporting history to {} as text.".format("console")) + logger.debug("Exporting history to console as text.") for master_schema_entry in version_history.versions[BASE_VERSION_NUMBER].master_schema.master_schema_entries: @@ -490,10 +491,10 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will not " + f"be carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, @@ -511,8 +512,8 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, version_history, signatures, logger): # Export all index and table histories to csv files while supplying signature to carve with - print("\nExporting history as CSV to {}...".format(output_directory)) - logger.debug("Exporting history to {} as CSV.".format(output_directory)) + print(f"\nExporting history as CSV to {output_directory}...") + logger.debug(f"Exporting history to {output_directory} as CSV.") commit_csv_exporter = CommitCsvExporter(output_directory, file_prefix) @@ -531,10 +532,9 @@ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_t if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be carved.") + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, From bb34dad28e49627ad50bb75a8bf6883bc8efd4bd Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 17:26:14 -0500 Subject: [PATCH 05/35] Fix regex for tests --- sqlite_dissect/constants.py | 2 +- sqlite_dissect/tests/utilities_test.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sqlite_dissect/constants.py b/sqlite_dissect/constants.py index 065e059..bd5f5d3 100644 --- a/sqlite_dissect/constants.py +++ b/sqlite_dissect/constants.py @@ -150,7 +150,7 @@ def __len__(self): TEXT_SIGNATURE_IDENTIFIER = -2 ZERO_BYTE = b'\x00' -ALL_ZEROS_REGEX = "^0*$" +ALL_ZEROS_REGEX = b"^0*$" SQLITE_MASTER_SCHEMA_ROOT_PAGE = 1 MASTER_SCHEMA_COLUMN = Enum({"TYPE": 0, "NAME": 1, "TABLE_NAME": 2, "ROOT_PAGE": 3, "SQL": 4}) diff --git a/sqlite_dissect/tests/utilities_test.py b/sqlite_dissect/tests/utilities_test.py index 6e89f01..6bf4f5d 100644 --- a/sqlite_dissect/tests/utilities_test.py +++ b/sqlite_dissect/tests/utilities_test.py @@ -84,20 +84,20 @@ def test_get_record_content(self): # Test when serial_type is >= 12 and even result = get_record_content(12, test_string_array, 0) self.assertEqual(0, result[0]) - self.assertEqual('', result[1]) + self.assertEqual(b'', result[1]) result = get_record_content(24, test_string_array, 0) self.assertEqual(6, result[0]) - self.assertEqual('this i', result[1]) + self.assertEqual(b'this i', result[1]) # Test when serial_type is >= 13 and odd result = get_record_content(13, test_string_array, 0) self.assertEqual(0, result[0]) - self.assertEqual('', result[1]) + self.assertEqual(b'', result[1]) result = get_record_content(25, test_string_array, 0) self.assertEqual(6, result[0]) - self.assertEqual('this i', result[1]) + self.assertEqual(b'this i', result[1]) # Test that the proper exception is thrown when the input is invalid cases = [10, 11] From 302292d7741f8542823fb9a564a2232eddd74ae5 Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 17:44:07 -0500 Subject: [PATCH 06/35] Update hashing algorithm for py3 --- sqlite_dissect/export/case_export.py | 26 ++++++++++++++++++++---- sqlite_dissect/tests/case_export_test.py | 15 ++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/sqlite_dissect/export/case_export.py b/sqlite_dissect/export/case_export.py index 9a6c61a..b196b4b 100644 --- a/sqlite_dissect/export/case_export.py +++ b/sqlite_dissect/export/case_export.py @@ -12,6 +12,24 @@ """ +def hash_file(file_path: str, hash_algo=hashlib.sha256()) -> str: + """ + Generates a hash of a file by chunking it and utilizing the Python hashlib library. + """ + # Ensure the file path exists + if not path.exists(file_path): + raise FileNotFoundError(f'The file path {file_path} is not valid, the file does not exist') + + with open(file_path, 'rb') as file: + while True: + # Reading is buffered, so we can read smaller chunks. + chunk = file.read(hash_algo.block_size) + if not chunk: + break + hash_algo.update(chunk) + return hash_algo.hexdigest() + + class CaseExporter(object): # Define the formatted logger that is provided by the main.py execution path logger = None @@ -134,7 +152,7 @@ def add_observable_file(self, filepath, filetype=None): }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": hashlib.md5(filepath).hexdigest() + "@value": hash_file(filepath, hashlib.md5()) } }, { @@ -145,7 +163,7 @@ def add_observable_file(self, filepath, filetype=None): }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": hashlib.sha1(filepath).hexdigest() + "@value": hash_file(filepath, hashlib.sha1()) } }, { @@ -156,7 +174,7 @@ def add_observable_file(self, filepath, filetype=None): }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": hashlib.sha256(filepath).hexdigest() + "@value": hash_file(filepath, hashlib.sha256()) } }, { @@ -167,7 +185,7 @@ def add_observable_file(self, filepath, filetype=None): }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": hashlib.sha512(filepath).hexdigest() + "@value": hash_file(filepath, hashlib.sha512()) } } ] diff --git a/sqlite_dissect/tests/case_export_test.py b/sqlite_dissect/tests/case_export_test.py index 9bf0c5c..57a0eb6 100644 --- a/sqlite_dissect/tests/case_export_test.py +++ b/sqlite_dissect/tests/case_export_test.py @@ -3,6 +3,7 @@ from main import main from sqlite_dissect.utilities import DotDict +from os.path import abspath, join, realpath, dirname class TestCASEExport(unittest.TestCase): @@ -11,12 +12,18 @@ class TestCASEExport(unittest.TestCase): """ def test_case_output(self): + # Get the full path to avoid any nested issues + base_path = abspath(join(dirname(realpath(__file__)), '..', '..')) + input_path = join(base_path, 'test_files', 'chinook.db') + output_path = join(base_path, 'output') + case_path = join(output_path, 'case.json') + # Build the arguments for the testing args = { 'log_level': 'debug', 'export': ['case'], - 'directory': 'output', - 'sqlite_file': 'test_files/chinook.db' + 'directory': output_path, + 'sqlite_file': input_path } # Convert the dictionary to a dot-accessible object for the main parsing @@ -26,5 +33,5 @@ def test_case_output(self): main(args) # Ensure the case.json file exists - self.assertTrue(os.path.exists('output/case.json')) - self.assertTrue(os.path.isfile('output/case.json')) + self.assertTrue(os.path.exists(case_path)) + self.assertTrue(os.path.isfile(case_path)) From ba90a72e8789a30082162725c3c6a0c8e6f70b6b Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 17:55:32 -0500 Subject: [PATCH 07/35] Update type hinting for CASE export --- _version.py | 4 +-- setup.py | 10 ++++-- sqlite_dissect/export/case_export.py | 47 +++++++++++++++------------- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/_version.py b/_version.py index a3fed4c..f91e0e2 100644 --- a/_version.py +++ b/_version.py @@ -3,8 +3,8 @@ _version.py -This script identifies the version of the sqlite dissect library. +This script identifies the version of the SQLite Dissect library. """ -__version__ = "0.0.6" +__version__ = "0.2.0" diff --git a/setup.py b/setup.py index d315434..33c6331 100644 --- a/setup.py +++ b/setup.py @@ -44,11 +44,15 @@ "sqlite_dissect.carving", "sqlite_dissect.export"], classifiers=[ - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7" + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6" + "Programming Language :: Python :: 3.7" + "Programming Language :: Python :: 3.8" + "Programming Language :: Python :: 3.9" + "Programming Language :: Python :: 3.10" ], install_requires=[ - "openpyxl==2.6.4" + "openpyxl==3.0.9" ], zip_safe=False ) diff --git a/sqlite_dissect/export/case_export.py b/sqlite_dissect/export/case_export.py index b196b4b..d672440 100644 --- a/sqlite_dissect/export/case_export.py +++ b/sqlite_dissect/export/case_export.py @@ -3,6 +3,7 @@ import uuid from datetime import datetime from os import path +from typing import Optional from _version import __version__ @@ -30,15 +31,25 @@ def hash_file(file_path: str, hash_algo=hashlib.sha256()) -> str: return hash_algo.hexdigest() +def guid_list_to_objects(guids: list) -> list: + """ + Converts a list of string GUIDs to the object notation with an ID prefix + """ + if guids is None: + return [] + else: + return list(map(lambda g: {"@id": g}, guids)) + + class CaseExporter(object): # Define the formatted logger that is provided by the main.py execution path logger = None - result_guids = [] + result_guids: list = [] # Defines the initial structure for the CASE export. This will be supplemented with various methods that get called # from the main.py execution path. - case = { + case: dict = { '@context': { "@vocab": "http://example.org/ontology/local#", "case-investigation": "https://ontology.caseontology.org/case/investigation/", @@ -64,7 +75,7 @@ class CaseExporter(object): def __init__(self, logger): self.logger = logger - def register_options(self, options): + def register_options(self, options: list): """ Adds the command line options provided as the configuration values provided and outputting them in the schema defined in the uco-tool namespace. @@ -93,7 +104,7 @@ def register_options(self, options): # Add the configuration facet to the in progress CASE object self.case['@graph'][0]['uco-core:hasFacet'] = configuration - def add_observable_file(self, filepath, filetype=None): + def add_observable_file(self, filepath: str, filetype: str = None) -> Optional[str]: """ Adds the file specified in the provided filepath as an ObservableObject in the CASE export. This method handles calculation of filesize, extension, MD5 hash, SHA1 hash, and other metadata expected in the Observable TTL spec. @@ -196,8 +207,9 @@ def add_observable_file(self, filepath, filetype=None): return guid else: self.logger.critical('Attempting to add invalid filepath to CASE Observable export: {}'.format(filepath)) + return None - def link_observable_relationship(self, source_guid, target_guid, relationship): + def link_observable_relationship(self, source_guid: str, target_guid: str, relationship: str) -> None: self.case['@graph'].append({ "@id": ("kb:export-artifact-relationship-" + str(uuid.uuid4())), "@type": "uco-observable:ObservableRelationship", @@ -214,7 +226,7 @@ def link_observable_relationship(self, source_guid, target_guid, relationship): "uco-core:isDirectional": True }) - def add_export_artifacts(self, export_paths=None): + def add_export_artifacts(self, export_paths: list = None): """ Loops through the list of provided export artifact paths and adds them as observables and links them to the original observable artifact @@ -228,7 +240,7 @@ def add_export_artifacts(self, export_paths=None): # Add the export result GUID to the list to be extracted self.result_guids.append(export_guid) - def generate_provenance_record(self, description, guids): + def generate_provenance_record(self, description: str, guids: list) -> Optional[str]: """ Generates a provenance record for the tool and returns the GUID for the new object """ @@ -242,14 +254,14 @@ def generate_provenance_record(self, description, guids): "@id": guid, "@type": "case-investigation:ProvenanceRecord", "uco-core:description": description, - "uco-core:object": self.guid_list_to_objects(guids) + "uco-core:object": guid_list_to_objects(guids) } self.case['@graph'].append(record) return guid else: return None - def generate_header(self): + def generate_header(self) -> str: """ Generates the header for the tool and returns the GUID for the ObservableRelationships """ @@ -270,7 +282,7 @@ def generate_header(self): return guid - def generate_investigation_action(self, source_guids): + def generate_investigation_action(self, source_guids: list): """ Builds the investigative action object as defined in the CASE ontology. This also takes in the start and end datetimes from the analysis. @@ -303,13 +315,13 @@ def generate_investigation_action(self, source_guids): # Loop through and add the results to the ActionReferencesFacet action_facet = { "@type": "uco-action:ActionReferencesFacet", - "uco-action:object": self.guid_list_to_objects(source_guids), - "uco-action:result": self.guid_list_to_objects(self.result_guids) + "uco-action:object": guid_list_to_objects(source_guids), + "uco-action:result": guid_list_to_objects(self.result_guids) } action["uco-core:hasFacet"].append(action_facet) self.case['@graph'].append(action) - def export_case_file(self, export_path='output/case.json'): + def export_case_file(self, export_path: str = 'output/case.json'): """ Exports the built CASE object to the path specified in the export_path parameter. """ @@ -318,12 +330,3 @@ def export_case_file(self, export_path='output/case.json'): with open(export_path, 'w') as f: json.dump(self.case, f, ensure_ascii=False, indent=4) self.logger.info('CASE formatted file has been exported to {}'.format(export_path)) - - def guid_list_to_objects(self, guids): - """ - Converts a list of string GUIDs to the object notation with an ID prefix - """ - if guids is None: - return [] - else: - return list(map(lambda g: {"@id": g}, guids)) From 85072e9deab6dfc8a5f7a39bbe52c645db3f741c Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 18:01:05 -0500 Subject: [PATCH 08/35] Update for CASE test, add instrument to investigative action --- main.py | 4 ++-- sqlite_dissect/export/case_export.py | 3 ++- sqlite_dissect/tests/case_export_test.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 96f51e1..a15c89b 100644 --- a/main.py +++ b/main.py @@ -406,7 +406,7 @@ def main(args): exported = True # Add the header and get the GUID for the tool for future linking - case.generate_header() + tool_guid = case.generate_header() # Add the runtime arguments to the CASE output case.register_options(args) @@ -427,7 +427,7 @@ def main(args): case.end_datetime = datetime.now() # Trigger the generation of the investigative action since the start and end time have now been set - case.generate_investigation_action(source_guids) + case.generate_investigation_action(source_guids, tool_guid) # Export the output to a JSON file case.export_case_file(path.join(args.directory, 'case.json')) diff --git a/sqlite_dissect/export/case_export.py b/sqlite_dissect/export/case_export.py index d672440..763292a 100644 --- a/sqlite_dissect/export/case_export.py +++ b/sqlite_dissect/export/case_export.py @@ -282,7 +282,7 @@ def generate_header(self) -> str: return guid - def generate_investigation_action(self, source_guids: list): + def generate_investigation_action(self, source_guids: list, tool_guid: str): """ Builds the investigative action object as defined in the CASE ontology. This also takes in the start and end datetimes from the analysis. @@ -315,6 +315,7 @@ def generate_investigation_action(self, source_guids: list): # Loop through and add the results to the ActionReferencesFacet action_facet = { "@type": "uco-action:ActionReferencesFacet", + "uco-action:instrument": tool_guid, "uco-action:object": guid_list_to_objects(source_guids), "uco-action:result": guid_list_to_objects(self.result_guids) } diff --git a/sqlite_dissect/tests/case_export_test.py b/sqlite_dissect/tests/case_export_test.py index 57a0eb6..8725963 100644 --- a/sqlite_dissect/tests/case_export_test.py +++ b/sqlite_dissect/tests/case_export_test.py @@ -21,7 +21,7 @@ def test_case_output(self): # Build the arguments for the testing args = { 'log_level': 'debug', - 'export': ['case'], + 'export': ['case', 'text'], 'directory': output_path, 'sqlite_file': input_path } From 1150e75ff763f66186750827185fd9ff2fa7ad1c Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 18:07:57 -0500 Subject: [PATCH 09/35] Properly set regex escape sequences --- sqlite_dissect/file/schema/master.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index 9708749..a7fcebe 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -56,7 +56,6 @@ class MasterSchema(object): - MasterSchemaEntryData = namedtuple("MasterSchemaEntryData", "record_columns row_type sql b_tree_table_leaf_page_number cell") @@ -470,7 +469,6 @@ def _parse_table_leaf(b_tree_table_leaf_page, database_text_encoding): class MasterSchemaRow(object): - __metaclass__ = ABCMeta @abstractmethod @@ -677,7 +675,7 @@ def _get_master_schema_row_name_and_remaining_sql(row_type, name, sql, remaining if remaining_sql_command[0] == "[": # The table name or index name is surrounded by brackets - match_object = match("^\[(.*?)\]", remaining_sql_command) + match_object = match(r"^\[(.*?)\]", remaining_sql_command) if not match_object: log_message = "No bracket match found for {} name in sql for {} row name: {} and sql: {}." @@ -784,7 +782,6 @@ def _get_master_schema_row_name_and_remaining_sql(row_type, name, sql, remaining # Check to make sure the full comment indicators were found for "--" and "/*" if (character == '-' and remaining_sql_command[index + 1] != '-') or \ (character == '/' and remaining_sql_command[index + 1] != '*'): - log_message = "Comment indicator '{}' found followed by an invalid secondary comment " \ "indicator: {} found in {} name in sql for {} row name: {} and sql: {}." log_message = log_message.format(character, remaining_sql_command[index + 1], @@ -1417,7 +1414,7 @@ class for parsing. This was decided to be the best way to associate comments ba Note: When the check is done on the definition, we check the next character is not one of the allowed characters in a column name to make sure the constraint preface is not the beginning of a longer column name where it is not actually a constraint preface - (example: primaryEmail). The "\w" regular expression when no LOCALE and UNICODE flags + (example: primaryEmail). The r'\w' regular expression when no LOCALE and UNICODE flags are set will be equivalent to the set: [a-zA-Z0-9_]. """ @@ -1426,7 +1423,7 @@ class for parsing. This was decided to be the best way to associate comments ba if definition[:len(table_constraint_preface)].upper() == table_constraint_preface: if not (len(table_constraint_preface) + 1 <= len(definition) - and match("\w", definition[len(table_constraint_preface)])): + and match(r"\w", definition[len(table_constraint_preface)])): # We have found a table constraint here and make sure this is not the first definition if definition_index == 0: @@ -2297,7 +2294,6 @@ class ViewRow(MasterSchemaRow): def __init__(self, version_interface, b_tree_table_leaf_page_number, b_tree_table_leaf_cell, record_columns, tables): - super(ViewRow, self).__init__(version_interface, b_tree_table_leaf_page_number, b_tree_table_leaf_cell, record_columns) @@ -2314,7 +2310,6 @@ class TriggerRow(MasterSchemaRow): def __init__(self, version_interface, b_tree_table_leaf_page_number, b_tree_table_leaf_cell, record_columns, tables, views): - super(TriggerRow, self).__init__(version_interface, b_tree_table_leaf_page_number, b_tree_table_leaf_cell, record_columns) From 6e3404f4cec351a8b485da3b7ba4092f608ea4cd Mon Sep 17 00:00:00 2001 From: kchason Date: Sat, 15 Jan 2022 20:01:19 -0500 Subject: [PATCH 10/35] Fix instrument linking for CASE --- sqlite_dissect/export/case_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite_dissect/export/case_export.py b/sqlite_dissect/export/case_export.py index 763292a..d23a4bd 100644 --- a/sqlite_dissect/export/case_export.py +++ b/sqlite_dissect/export/case_export.py @@ -315,7 +315,7 @@ def generate_investigation_action(self, source_guids: list, tool_guid: str): # Loop through and add the results to the ActionReferencesFacet action_facet = { "@type": "uco-action:ActionReferencesFacet", - "uco-action:instrument": tool_guid, + "uco-action:instrument": guid_list_to_objects([tool_guid]), "uco-action:object": guid_list_to_objects(source_guids), "uco-action:result": guid_list_to_objects(self.result_guids) } From df02d75efdbceee6274e986e99f3c0da4fac7047 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 1 Mar 2022 15:39:27 -0500 Subject: [PATCH 11/35] Add attribution links --- docs/source/index.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index d0111d3..39c5601 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -7,6 +7,10 @@ the associated journal files. If they are not in the same directory as the speci their location will need to be specified in the command. SQLite carving will not be done by default. See the full command line options to enable carving. +SQLite Dissect is primarily developed and maintained by `Department of Defense Cyber Crime Center (DC3) `_ +with support from open-source contributors. The tool is hosted on GitHub at +`https://github.com/Defense-Cyber-Crime-Center/sqlite-dissect `_. + .. toctree:: :maxdepth: 2 From 5750f8d4df412f6b80572fb5bd23367156295979 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 1 Mar 2022 15:41:22 -0500 Subject: [PATCH 12/35] Bump version --- _version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_version.py b/_version.py index 8ab63c0..46f3a6b 100644 --- a/_version.py +++ b/_version.py @@ -4,4 +4,4 @@ This script identifies the version of the SQLite Dissect library. """ -__version__ = "0.2.0" +__version__ = "1.0.0" From 4ef3b6456ca4e3d0ab550e3d7068e1ccb2c98d66 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 1 Mar 2022 18:39:33 -0500 Subject: [PATCH 13/35] Update documentation references --- docs/source/index.rst | 6 ++++-- docs/source/sqlite_dissect/getting_started.rst | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 39c5601..5d29efb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,8 +8,10 @@ their location will need to be specified in the command. SQLite carving will not command line options to enable carving. SQLite Dissect is primarily developed and maintained by `Department of Defense Cyber Crime Center (DC3) `_ -with support from open-source contributors. The tool is hosted on GitHub at -`https://github.com/Defense-Cyber-Crime-Center/sqlite-dissect `_. +with support from open-source contributors. The tool is hosted on `GitHub `_. + +All released versions of the tool can be downloaded from the `GitHub releases page `_. + .. toctree:: :maxdepth: 2 diff --git a/docs/source/sqlite_dissect/getting_started.rst b/docs/source/sqlite_dissect/getting_started.rst index 3034fa2..d9dad8a 100644 --- a/docs/source/sqlite_dissect/getting_started.rst +++ b/docs/source/sqlite_dissect/getting_started.rst @@ -6,5 +6,8 @@ System Requirements SQLite Dissect depends on Python 3.6+, with automated tests run against versions 3.6, 3.7, 3.8, 3.9, and 3.10. It has been tested on Windows, OSX, and Linux (Ubuntu) platforms. +SQLite Dissect versions up to and including 0.1.0 support Python 2.7 and can be downloaded from the +`GitHub releases page `_. + To try to limit the need for dependencies, only one package is required for SQLite Dissect, which is the `openpxl `_ package that is used for exporting the results into Excel format. From 409b4029d68018b84395f78b2eac19a7f59ede3b Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 23 Mar 2022 12:23:53 -0400 Subject: [PATCH 14/35] Fix initial running capability --- main.py | 19 ++++++++----------- sqlite_dissect/utilities.py | 3 +-- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index cf5b858..ccc8949 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ import uuid import warnings -from argparse import ArgumentParser from logging import CRITICAL from logging import DEBUG from logging import ERROR @@ -9,7 +8,7 @@ from logging import basicConfig from logging import getLogger from os import path -from os.path import basename +from os.path import basename, abspath from os.path import join from os.path import exists from os.path import getsize @@ -17,7 +16,6 @@ from os.path import sep from time import time from warnings import warn -from _version import __version__ from sqlite_dissect.carving.rollback_journal_carver import RollBackJournalCarver from sqlite_dissect.carving.signature import Signature from sqlite_dissect.constants import BASE_VERSION_NUMBER @@ -43,7 +41,6 @@ from sqlite_dissect.version_history import VersionHistory from sqlite_dissect.version_history import VersionHistoryParser from datetime import datetime -import sys """ @@ -160,8 +157,8 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): f"Determined export type to be {export_types} with file prefix: {file_prefix} and output directory: {output_directory}") # Obtain the SQLite file - if not exists(arguments.sqlite_file): - raise SqliteError(f"Unable to find SQLite file: {args.sqlite_file}.") + if not exists(sqlite_file_path): + raise SqliteError(f"Unable to find SQLite file: {sqlite_file_path}.") """ @@ -232,7 +229,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): raise SqliteError(f"Found a zero length SQLite file with a wal file: {arguments.wal}. Unable to parse.") elif zero_length_wal_file: - print(f"File: {arguments.sqlite_file} with wal file: {wal_file_name} has no content. Nothing to parse.") + print(f"File: {sqlite_file_path} with wal file: {wal_file_name} has no content. Nothing to parse.") exit(0) elif rollback_journal_file_name and not zero_length_rollback_journal_file: @@ -250,7 +247,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): elif zero_length_rollback_journal_file: print( - f"File: {arguments.sqlite_file} with rollback journal file: {rollback_journal_file_name} has no content. " + f"File: {sqlite_file_path} with rollback journal file: {rollback_journal_file_name} has no content. " f"Nothing to parse.") exit(0) @@ -279,7 +276,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): f"Only one journal file should exist. Unable to parse.") # Print a message parsing is starting and log the start time for reporting at the end on amount of time to run - print(f"\nParsing: {arguments.sqlite_file}...") + print(f"\nParsing: {sqlite_file_path}...") start_time = time() # Create the database and wal/rollback journal file (if existent) @@ -438,7 +435,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): case.register_options(arguments) # Add the SQLite/DB file to the CASE output - source_guids = [case.add_observable_file(normpath(arguments.sqlite_file), 'sqlite-file')] + source_guids = [case.add_observable_file(normpath(sqlite_file_path), 'sqlite-file')] # Add the WAL and journal files to the output if they exist if wal_file_name: @@ -778,7 +775,7 @@ def carve_rollback_journal(output_directory, rollback_journal_file, rollback_jou # Determine if a directory has been passed instead of a file, in which case, find all args = parse_args() if args.sqlite_path is not None: - sqlite_files = get_sqlite_files(args.sqlite_path) + sqlite_files = get_sqlite_files(abspath(args.sqlite_path)) # Ensure there is at least one SQLite file if len(sqlite_files) > 0: for sqlite_file in sqlite_files: diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 52392e0..16bd625 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -272,8 +272,7 @@ def is_sqlite_file(path: str) -> bool: with open(path, "rb") as sqlite: header = sqlite.read(SQLITE_DATABASE_HEADER_LENGTH) header_magic = header[0:16] - magic = MAGIC_HEADER_STRING.decode(MAGIC_HEADER_STRING_ENCODING) - return header_magic == magic + return header_magic == MAGIC_HEADER_STRING except IOError as e: logging.error("Invalid SQLite file found: {}".format(e)) From 0b1181b26dfc6782decfdb81e7290e2a12efa1b2 Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 23 Mar 2022 14:02:29 -0400 Subject: [PATCH 15/35] Initial fix of string updates --- sqlite_dissect/tests/carving_utilities_test.py | 2 +- sqlite_dissect/tests/nist_assertions.py | 5 ++--- sqlite_dissect/tests/nist_test.py | 18 +++++++----------- sqlite_dissect/tests/utilities.py | 1 - sqlite_dissect/utilities.py | 9 ++++----- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/sqlite_dissect/tests/carving_utilities_test.py b/sqlite_dissect/tests/carving_utilities_test.py index d87f6ac..bc4bce1 100644 --- a/sqlite_dissect/tests/carving_utilities_test.py +++ b/sqlite_dissect/tests/carving_utilities_test.py @@ -83,7 +83,7 @@ def test_generate_regex_for_simplified_serial_type(): # hardcoded values for -2 and -1 # hex string for 0-9 # CarvingError for anything else - assert generate_regex_for_simplified_serial_type(4) == "\x04" + assert generate_regex_for_simplified_serial_type(4) == b"\x04" with pytest.raises(CarvingError): generate_regex_for_simplified_serial_type(-10) diff --git a/sqlite_dissect/tests/nist_assertions.py b/sqlite_dissect/tests/nist_assertions.py index f383951..6023ccc 100644 --- a/sqlite_dissect/tests/nist_assertions.py +++ b/sqlite_dissect/tests/nist_assertions.py @@ -1,5 +1,4 @@ from os.path import exists, basename -from hashlib import md5 def row_equals(row1, row2): @@ -73,7 +72,7 @@ def assert_correct_num_pages(reported_num, correct_num): def assert_correct_encoding(reported_enc, correct_enc): assert reported_enc.upper() == correct_enc.upper(), "The program reports and incorrect database text encoding!\n" \ "Correct encoding: %s\nReported encoding: %s" % ( - correct_enc, reported_enc) + correct_enc, reported_enc) # SFT-CA-08 @@ -127,7 +126,7 @@ def assert_correct_rows(reported_rows, correct_rows): # SFT-CA-13 def assert_correct_source(reported_source, accepted_sources, element): assert reported_source in accepted_sources, "The program reports an invalid file source!\n Element: %s\n" \ - "Reported source: %s" % (element, reported_source) + "Reported source: %s" % (element, reported_source) # NIST SFT-AO: diff --git a/sqlite_dissect/tests/nist_test.py b/sqlite_dissect/tests/nist_test.py index 859eea7..31b2ac1 100644 --- a/sqlite_dissect/tests/nist_test.py +++ b/sqlite_dissect/tests/nist_test.py @@ -1,13 +1,13 @@ import sqlite3 -import nist_assertions +import sqlite_dissect.tests.nist_assertions from hashlib import md5 from main import main import io import sys -import os import pytest from sqlite_dissect.constants import FILE_TYPE -from sqlite_dissect.tests.utilities import db_file, parse_csv +from sqlite_dissect.tests import nist_assertions +from sqlite_dissect.tests.utilities import parse_csv from sqlite_dissect.utilities import get_sqlite_files, parse_args @@ -41,6 +41,7 @@ def test_header_reporting(db_file): reported_num_pages = None reported_encoding = None for line in parser_output.getvalue().splitlines(): + line = str(line) if "FILE FORMAT WRITE VERSION" in line.upper(): reported_journal_mode_write = line.split(': ')[1].strip() elif "FILE FORMAT READ VERSION" in line.upper(): @@ -91,6 +92,7 @@ def test_schema_reporting(db_file): current_table = None row_count = 0 for line in parser_output.getvalue().splitlines(): + line = str(line) if "Master schema entry: " in line and "row type: table" in line: current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() reported_tables.append(current_table) @@ -153,8 +155,7 @@ def test_row_recovery(db_file, tmp_path): parser_output = io.BytesIO() sys.stdout = parser_output - args = parse_args([db_filepath, '-c', - '-e', 'csv', '--directory', str(tmp_path)]) + args = parse_args([db_filepath, '-c', '-e', 'csv', '--directory', str(tmp_path)]) sqlite_files = get_sqlite_files(args.sqlite_path) main(args, sqlite_files[0], len(sqlite_files) > 1) @@ -185,8 +186,8 @@ def test_metadata_reporting(db_file): main(args, sqlite_files[0], len(sqlite_files) > 1) current_table = None - log_lines = '' for line in parser_output.getvalue().splitlines(): + line = str(line) if "Master schema entry: " in line and "row type: table" in line: current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() @@ -198,11 +199,6 @@ def test_metadata_reporting(db_file): elif line == '-' * 15: current_table = None - # Logging for debugging purposes: - # with open(os.path.join(os.path.split(__file__)[0], 'log_files', db_file[0].name + '.log'), 'w') as log_file: - # log_file.write("Recovered table rows:\n") - # log_file.write(log_lines) - hash_after_parsing = get_md5_hash(db_filepath) nist_assertions.assert_md5_equals(hash_before_parsing, hash_after_parsing, db_file[0].name) diff --git a/sqlite_dissect/tests/utilities.py b/sqlite_dissect/tests/utilities.py index 4cc2004..aeb41b8 100644 --- a/sqlite_dissect/tests/utilities.py +++ b/sqlite_dissect/tests/utilities.py @@ -1,4 +1,3 @@ -import os import pytest import sqlite3 import random diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 16bd625..a7e5c3d 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -108,17 +108,16 @@ def encode_varint(value): if value & 0xff000000 << 32: byte = value & 0xff - byte_array.insert(0, pack("B", byte)) + byte_array.insert(0, byte) value >>= 8 for _ in range(8): - byte_array.insert(0, pack("B", (value & 0x7f) | 0x80)) + byte_array.insert(0, ((value & 0x7f) | 0x80)) value >>= 7 else: - while value: - byte_array.insert(0, pack("B", (value & 0x7f) | 0x80)) + byte_array.insert(0, ((value & 0x7f) | 0x80)) value >>= 7 if len(byte_array) >= 9: @@ -128,7 +127,7 @@ def encode_varint(value): getLogger(LOGGER_NAME).error(log_message) raise InvalidVarIntError(log_message) - byte_array[-1] &= 0x7f + byte_array = byte_array[:-1] + pack("B", (byte_array[-1] & 0x7f)) return byte_array From f46e468c534ef6233de66de799479d21fcdd9627 Mon Sep 17 00:00:00 2001 From: kchason Date: Fri, 25 Mar 2022 09:15:20 -0400 Subject: [PATCH 16/35] Additional test updates --- sqlite_dissect/carving/utilities.py | 4 +-- sqlite_dissect/file/wal/commit_record.py | 2 +- sqlite_dissect/file/wal/wal.py | 2 +- .../tests/carving_utilities_test.py | 27 +------------------ sqlite_dissect/tests/file_utilities_test.py | 4 +-- sqlite_dissect/tests/nist_test.py | 8 +++--- 6 files changed, 10 insertions(+), 37 deletions(-) diff --git a/sqlite_dissect/carving/utilities.py b/sqlite_dissect/carving/utilities.py index 486ed1a..0c67b42 100644 --- a/sqlite_dissect/carving/utilities.py +++ b/sqlite_dissect/carving/utilities.py @@ -26,10 +26,9 @@ """ -def decode_varint_in_reverse(byte_array, offset, max_varint_length=9): +def decode_varint_in_reverse(byte_array: bytearray, offset: int, max_varint_length=9): """ - This function will move backwards through a byte array trying to decode a varint in reverse. A InvalidVarIntError will be raised if a varint is not found by this algorithm used in this function. The calling logic should check for this case in case it is encountered which is likely in the context of carving. @@ -57,7 +56,6 @@ def decode_varint_in_reverse(byte_array, offset, max_varint_length=9): algorithm in this function. This error is not logged as an error but rather a debug statement since it is very likely to occur during carving and should be handled appropriately. - """ if offset > len(byte_array): diff --git a/sqlite_dissect/file/wal/commit_record.py b/sqlite_dissect/file/wal/commit_record.py index 2b0a531..d72c591 100644 --- a/sqlite_dissect/file/wal/commit_record.py +++ b/sqlite_dissect/file/wal/commit_record.py @@ -181,7 +181,7 @@ def __init__(self, version_number, database, write_ahead_log, frames, page_frame self.frames[frame.header.page_number] = frame # Set the updated page numbers derived from this commit records frame keys - self.updated_page_numbers = copy(self.frames.keys()) + self.updated_page_numbers = copy(list(self.frames.keys())) log_message = "Commit Record Version: {} has the updated page numbers: {}." log_message = log_message.format(self.version_number, self.updated_page_numbers) diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 77e705b..14c75ae 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -44,7 +44,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric frame_size = (WAL_FRAME_HEADER_LENGTH + self.file_handle.header.page_size) - self.number_of_frames = (self.file_handle.file_size - WAL_HEADER_LENGTH) / frame_size + self.number_of_frames = int((self.file_handle.file_size - WAL_HEADER_LENGTH) / frame_size) valid_frame_array = [] invalid_frame_array = [] diff --git a/sqlite_dissect/tests/carving_utilities_test.py b/sqlite_dissect/tests/carving_utilities_test.py index bc4bce1..c893411 100644 --- a/sqlite_dissect/tests/carving_utilities_test.py +++ b/sqlite_dissect/tests/carving_utilities_test.py @@ -1,5 +1,4 @@ import pytest -from os.path import abspath, dirname, join from sqlite_dissect.carving.utilities import decode_varint_in_reverse, generate_regex_for_simplified_serial_type, \ calculate_body_content_size, calculate_serial_type_definition_content_length_min_max, \ @@ -8,31 +7,7 @@ from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER, TEXT_SIGNATURE_IDENTIFIER from sqlite_dissect.exception import CarvingError from sqlite_dissect.utilities import encode_varint -from sqlite_dissect.file.database.database import Database -from sqlite_dissect.version_history import VersionHistory -from sqlite_dissect.file.wal.wal import WriteAheadLog -from sqlite_dissect.constants import MASTER_SCHEMA_ROW_TYPE -from sqlite_dissect.carving.signature import Signature - -''' -def encode_varint(value): - bit_length = len(bin(value).strip('0b')) - encoded_string = bytearray() - if value >> 64: - return None - elif value >> 56: - encoded_string.insert(0, value & 0xFF) - value >>= 8 - else: - encoded_string.insert(0, value & 0x7F) - value >>= 7 - while value != 0: - byte_to_insert = (value & 0x7F) | 0x80 - encoded_string.insert(0, byte_to_insert) - value >>= 7 - - return encoded_string -''' + varint_tuples = [ (0x10, encode_varint(0x10)), diff --git a/sqlite_dissect/tests/file_utilities_test.py b/sqlite_dissect/tests/file_utilities_test.py index 760ed39..5f0556f 100644 --- a/sqlite_dissect/tests/file_utilities_test.py +++ b/sqlite_dissect/tests/file_utilities_test.py @@ -32,8 +32,8 @@ def test_validate_page_version_history(change, expected_result): else: modified = False - for version_number, version in version_history.versions.iteritems(): - for page_number, page in version.pages.iteritems(): + for version_number, version in version_history.versions.items(): + for page_number, page in version.pages.items(): # modifies first page version number if change == 1: page.page_version_number += 1 diff --git a/sqlite_dissect/tests/nist_test.py b/sqlite_dissect/tests/nist_test.py index 31b2ac1..db1e8ec 100644 --- a/sqlite_dissect/tests/nist_test.py +++ b/sqlite_dissect/tests/nist_test.py @@ -7,7 +7,7 @@ import pytest from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.tests import nist_assertions -from sqlite_dissect.tests.utilities import parse_csv +from sqlite_dissect.tests.utilities import db_file, parse_csv from sqlite_dissect.utilities import get_sqlite_files, parse_args @@ -84,7 +84,7 @@ def test_schema_reporting(db_file): sys.stdout = parser_output args = parse_args([db_filepath]) sqlite_files = get_sqlite_files(args.sqlite_path) - main(args, sqlite_files[0], len(sqlite_files) > 1) + main(args, str(sqlite_files[0]), len(sqlite_files) > 1) reported_tables = [] reported_columns = {} @@ -157,7 +157,7 @@ def test_row_recovery(db_file, tmp_path): sys.stdout = parser_output args = parse_args([db_filepath, '-c', '-e', 'csv', '--directory', str(tmp_path)]) sqlite_files = get_sqlite_files(args.sqlite_path) - main(args, sqlite_files[0], len(sqlite_files) > 1) + main(args, str(sqlite_files[0]), len(sqlite_files) > 1) recovered_rows = [] @@ -183,7 +183,7 @@ def test_metadata_reporting(db_file): sys.stdout = parser_output args = parse_args([db_filepath, '-c']) sqlite_files = get_sqlite_files(args.sqlite_path) - main(args, sqlite_files[0], len(sqlite_files) > 1) + main(args, str(sqlite_files[0]), len(sqlite_files) > 1) current_table = None for line in parser_output.getvalue().splitlines(): From 8aebdb21dc932722198560a88463a2591944625a Mon Sep 17 00:00:00 2001 From: kchason Date: Fri, 25 Mar 2022 12:01:19 -0400 Subject: [PATCH 17/35] Switch to logging, fix warn -> warning --- main.py | 60 ++++++++++-------------- sqlite_dissect/carving/carver.py | 8 ++-- sqlite_dissect/carving/signature.py | 4 +- sqlite_dissect/constants.py | 4 +- sqlite_dissect/export/csv_export.py | 4 +- sqlite_dissect/export/sqlite_export.py | 6 +-- sqlite_dissect/export/text_export.py | 11 ++--- sqlite_dissect/export/xlsx_export.py | 4 +- sqlite_dissect/file/database/database.py | 4 +- sqlite_dissect/file/database/header.py | 2 +- sqlite_dissect/file/file_handle.py | 2 +- sqlite_dissect/file/journal/header.py | 6 +-- sqlite_dissect/file/schema/master.py | 10 ++-- sqlite_dissect/file/version_parser.py | 10 ++-- sqlite_dissect/file/wal/commit_record.py | 6 +-- sqlite_dissect/file/wal/header.py | 2 +- sqlite_dissect/file/wal/wal.py | 2 +- sqlite_dissect/tests/utilities.py | 2 +- sqlite_dissect/version_history.py | 2 +- 19 files changed, 69 insertions(+), 80 deletions(-) diff --git a/main.py b/main.py index ccc8949..fa181d0 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import logging import uuid import warnings from logging import CRITICAL @@ -229,7 +230,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): raise SqliteError(f"Found a zero length SQLite file with a wal file: {arguments.wal}. Unable to parse.") elif zero_length_wal_file: - print(f"File: {sqlite_file_path} with wal file: {wal_file_name} has no content. Nothing to parse.") + logger.error(f"File: {sqlite_file_path} with wal file: {wal_file_name} has no content. Nothing to parse.") exit(0) elif rollback_journal_file_name and not zero_length_rollback_journal_file: @@ -246,13 +247,13 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): f"Unable to parse.") elif zero_length_rollback_journal_file: - print( + logger.error( f"File: {sqlite_file_path} with rollback journal file: {rollback_journal_file_name} has no content. " f"Nothing to parse.") exit(0) else: - print("File: {} has no content. Nothing to parse.".format(sqlite_file_path)) + logger.error("File: {} has no content. Nothing to parse.".format(sqlite_file_path)) exit(0) # Make sure that both of the journal files are not found @@ -276,7 +277,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): f"Only one journal file should exist. Unable to parse.") # Print a message parsing is starting and log the start time for reporting at the end on amount of time to run - print(f"\nParsing: {sqlite_file_path}...") + logger.info(f"\nParsing: {sqlite_file_path}...") start_time = time() # Create the database and wal/rollback journal file (if existent) @@ -296,20 +297,21 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): # Check if the header info was asked for if arguments.header: # Print the header info of the database - print("\nDatabase header information:\n{}".format(database.database_header.stringify(padding="\t"))) - print("Continuing to parse...") + str_header = database.database_header.stringify(padding="\t") + logger.debug(f"\nDatabase header information:\n{str_header}") + logger.debug("Continuing to parse...") # Check if the master schema was asked for if arguments.schema: # print the master schema of the database - print(f"\nDatabase Master Schema:\n{stringify_master_schema_version(database)}") - print("Continuing to parse...") + logger.debug(f"\nDatabase Master Schema:\n{stringify_master_schema_version(database)}") + logger.debug("Continuing to parse...") # Check if the schema history was asked for if arguments.schema_history: # print the master schema version history - print(f"\nVersion History of Master Schemas:\n{stringify_master_schema_versions(version_history)}") - print("Continuing to parse...") + logger.debug(f"\nVersion History of Master Schemas:\n{stringify_master_schema_versions(version_history)}") + logger.debug("Continuing to parse...") # Get the signature options print_signatures = arguments.signatures @@ -322,7 +324,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if not carve and carve_freelists: log_message = "The carve option was not set but the carve_freelists option was. Disabling carve_freelists. " \ "Please specify the carve option to enable." - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Specific tables to be carved @@ -333,8 +335,8 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if rollback_journal_exempted_tables and specified_tables_to_carve: for table in rollback_journal_exempted_tables: if table in specified_tables_to_carve: - print(f"Table: {table} found in both exempted and specified tables. Please update the arguments " - f"correctly.") + logger.error(f"Table: {table} found in both exempted and specified tables. Please update the " + f"arguments correctly.") exit(0) # See if we need to generate signatures @@ -385,7 +387,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if print_signatures: printable_signature = signatures[master_schema_entry.name].stringify("\t", False, False, False) - print(f"\nSignature:\n{printable_signature}") + logger.debug(f"\nSignature:\n{printable_signature}") """ @@ -461,7 +463,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): # Carve the rollback journal if found and carving is not specified if rollback_journal_file and not carve: - print(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file parsing is under " + logger.warning(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file parsing is under " f"development and currently only supports carving. Please rerun with the --carve option for this output.") # Carve the rollback journal if found and carving is specified @@ -469,21 +471,22 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if not output_directory: - print(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file carving is under " - f"development and currently only outputs to CSV. Due to this, the output directory needs to be " - f"specified. Please rerun with a output directory specified in order for this to complete.") + logger.error(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file carving is " + f"under development and currently only outputs to CSV. Due to this, the output directory " + f"needs to be specified. Please rerun with a output directory specified in order for this to " + f"complete.") else: - print(f"Carving rollback journal file: {rollback_journal_file}. Rollback journal file carving is under " - f"development and currently only outputs to CSV. Any export type specified will be overridden for " - f"this.") + logger.error(f"Carving rollback journal file: {rollback_journal_file}. Rollback journal file carving is " + f"under development and currently only outputs to CSV. Any export type specified will be " + f"overridden for this.") carve_rollback_journal(output_directory, rollback_journal_file, rollback_journal_file_name, specified_tables_to_carve, rollback_journal_exempted_tables, version_history, signatures, logger) - print(f"Finished in {round(time() - start_time, 2)} seconds.") + logger.info(f"Finished in {round(time() - start_time, 2)} seconds.") def print_text(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, @@ -588,8 +591,7 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, version_history, signatures, logger): # Export all index and table histories to csv files while supplying signature to carve with - print(f"\nExporting history as CSV to {output_directory}...") - logger.debug(f"Exporting history to {output_directory} as CSV.") + logger.info(f"Exporting history to {output_directory} as CSV.") commit_csv_exporter = CommitCsvExporter(output_directory, file_prefix) @@ -608,7 +610,6 @@ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_t if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be carved.") logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " f"carved.") @@ -630,7 +631,6 @@ def print_sqlite(output_directory, file_prefix, carve, carve_freelists, file_postfix = "-sqlite-dissect.db3" sqlite_file_name = file_prefix + file_postfix - print("\nExporting history as SQLite to {}{}{}...".format(output_directory, sep, sqlite_file_name)) logger.debug("Exporting history as SQLite to {}{}{}.".format(output_directory, sep, sqlite_file_name)) with CommitSqliteExporter(output_directory, sqlite_file_name) as commit_sqlite_exporter: @@ -650,8 +650,6 @@ def print_sqlite(output_directory, file_prefix, carve, carve_freelists, if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved." .format(master_schema_entry.name)) @@ -673,7 +671,6 @@ def print_xlsx(output_directory, file_prefix, carve, carve_freelists, specified_ xlsx_file_name = file_prefix + file_postfix # Export all index and table histories to a xlsx workbook while supplying signature to carve with - print("\nExporting history as XLSX to {}{}{}...".format(output_directory, sep, xlsx_file_name)) logger.debug("Exporting history as XLSX to {}{}{}.".format(output_directory, sep, xlsx_file_name)) with CommitXlsxExporter(output_directory, xlsx_file_name) as commit_xlsx_exporter: @@ -693,8 +690,6 @@ def print_xlsx(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved." .format(master_schema_entry.name)) @@ -725,7 +720,6 @@ def carve_rollback_journal(output_directory, rollback_journal_file, rollback_jou """ csv_prefix_rollback_journal_file_name = basename(normpath(rollback_journal_file_name)) - print("Exporting rollback journal carvings as CSV to {}...".format(output_directory)) logger.debug("Exporting rollback journal carvings as csv to output directory: {}.".format(output_directory)) commit_csv_exporter = CommitCsvExporter(output_directory, csv_prefix_rollback_journal_file_name) @@ -765,8 +759,6 @@ def carve_rollback_journal(output_directory, rollback_journal_file, rollback_jou commit_csv_exporter.write_commit(master_schema_entry, commit) else: - print("Unable to find signature for: {}. This table will not be carved from the rollback journal." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved from the " "rollback journal.".format(master_schema_entry.name)) diff --git a/sqlite_dissect/carving/carver.py b/sqlite_dissect/carving/carver.py index 14ef80b..73d82bf 100644 --- a/sqlite_dissect/carving/carver.py +++ b/sqlite_dissect/carving/carver.py @@ -83,7 +83,7 @@ def carve_freeblocks(version, source, freeblocks, signature): "carving freeblocks with signatures: {}. Signatures starting with variable length serial " \ "types are not fully implemented and may result in carving false positives." log_message = log_message.format(first_column_serial_types, simplified_signature) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Retrieve and compile the serial type definition signature pattern @@ -186,7 +186,7 @@ def carve_freeblocks(version, source, freeblocks, signature): serial_type_definition_start_offset, serial_type_definition_end_offset, cutoff_offset, number_of_columns, signature.name, signature.table_name) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Return the cells carved from the freeblocks @@ -406,7 +406,7 @@ def carve_unallocated_space(version, source, page_number, unallocated_space_star serial_type_definition_start_offset, serial_type_definition_end_offset, cutoff_offset, number_of_columns, signature.name, signature.table_name) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) """ @@ -586,7 +586,7 @@ def carve_unallocated_space(version, source, page_number, unallocated_space_star partial_serial_type_definition_end_offset, partial_cutoff_offset, number_of_columns, signature.name, signature.table_name) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Return the cells carved from the freeblocks diff --git a/sqlite_dissect/carving/signature.py b/sqlite_dissect/carving/signature.py index fc69382..a36143f 100644 --- a/sqlite_dissect/carving/signature.py +++ b/sqlite_dissect/carving/signature.py @@ -84,7 +84,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(master_schema_entry.root_page_number, master_schema_entry.row_type, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) if master_schema_entry.internal_schema_object: @@ -94,7 +94,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(master_schema_entry.root_page_number, master_schema_entry.row_type, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) log_message = "Creating signature for master schema entry with name: {} table name: {} row type: {} and " \ diff --git a/sqlite_dissect/constants.py b/sqlite_dissect/constants.py index ae5a967..3391a40 100644 --- a/sqlite_dissect/constants.py +++ b/sqlite_dissect/constants.py @@ -240,8 +240,8 @@ def __len__(self): ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE = -1 ROLLBACK_JOURNAL_POSTFIX = "-journal" ROLLBACK_JOURNAL_HEADER_LENGTH = 28 -ROLLBACK_JOURNAL_HEADER_HEX_STRING = 'd9d505f920a163d7' -ROLLBACK_JOURNAL_HEADER_ALL_CONTENT = 'ffffffff' +ROLLBACK_JOURNAL_HEADER_HEX_STRING = b'\xd9\xd5\x05\xf9\x20\xa1\x63\xd7' +ROLLBACK_JOURNAL_HEADER_ALL_CONTENT = b'\xff\xff\xff\xff' BASE_VERSION_NUMBER = 0 COMMIT_RECORD_BASE_VERSION_NUMBER = BASE_VERSION_NUMBER + 1 diff --git a/sqlite_dissect/export/csv_export.py b/sqlite_dissect/export/csv_export.py index dc82fe2..075137f 100644 --- a/sqlite_dissect/export/csv_export.py +++ b/sqlite_dissect/export/csv_export.py @@ -119,7 +119,7 @@ def write_version(csv_file_name, export_directory, version, master_schema_entry_ log_message = log_message.format(master_schema_entry.row_type, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) @staticmethod @@ -563,7 +563,7 @@ def write_commit(self, master_schema_entry, commit): log_message = "Invalid commit page type: {} found for csv export on master " \ "schema entry name: {} while writing to csv file name: {}." log_message = log_message.format(commit.page_type, commit.name, csv_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) @staticmethod diff --git a/sqlite_dissect/export/sqlite_export.py b/sqlite_dissect/export/sqlite_export.py index e069ae4..9bb7d55 100644 --- a/sqlite_dissect/export/sqlite_export.py +++ b/sqlite_dissect/export/sqlite_export.py @@ -168,7 +168,7 @@ def write_commit(self, master_schema_entry, commit): "found for sqlite export on master schema entry name: {} page type: {} " \ "while writing to sqlite file name: {}." log_message = log_message.format(len(cells), commit.name, commit.page_type, self._sqlite_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) number_of_columns = len(cells[0].payload.record_columns) @@ -207,7 +207,7 @@ def write_commit(self, master_schema_entry, commit): log_message = "Invalid commit page type: {} found for sqlite export on master " \ "schema entry name: {} while writing to sqlite file name: {}." log_message = log_message.format(commit.page_type, commit.name, self._sqlite_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) create_table_statement = "CREATE TABLE {} ({})" @@ -266,7 +266,7 @@ def write_commit(self, master_schema_entry, commit): log_message = "Invalid commit page type: {} found for sqlite export on master " \ "schema entry name: {} while writing to sqlite file name: {}." log_message = log_message.format(commit.page_type, commit.name, self._sqlite_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) """ diff --git a/sqlite_dissect/export/text_export.py b/sqlite_dissect/export/text_export.py index ba53927..025fcdd 100644 --- a/sqlite_dissect/export/text_export.py +++ b/sqlite_dissect/export/text_export.py @@ -25,10 +25,8 @@ class CommitConsoleExporter(object): @staticmethod def write_header(master_schema_entry, page_type): - header = "\nMaster schema entry: {} row type: {} on page type: {} with sql: {}." - header = header.format(master_schema_entry.name, master_schema_entry.row_type, - page_type, master_schema_entry.sql) - print(header) + print(f"\nMaster schema entry: {master_schema_entry.name} row type: {master_schema_entry.row_type} on page " + f"type: {page_type} with sql: {master_schema_entry.sql}.".encode()) @staticmethod def write_commit(commit): @@ -87,7 +85,7 @@ def write_commit(commit): log_message = "Invalid commit page type: {} found for text export on master " \ "schema entry name: {} while writing to sqlite file name: {}." log_message = log_message.format(commit.page_type, commit.name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) @staticmethod @@ -144,7 +142,6 @@ def __enter__(self): # Check if the file exists and if it does rename it if exists(self._text_file_name): - # Generate a uuid to append to the file name new_file_name_for_existing_file = self._text_file_name + "-" + str(uuid4()) @@ -224,7 +221,7 @@ def write_commit(self, commit): log_message = "Invalid commit page type: {} found for text export on master " \ "schema entry name: {}." log_message = log_message.format(commit.page_type, commit.name, self._text_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) @staticmethod diff --git a/sqlite_dissect/export/xlsx_export.py b/sqlite_dissect/export/xlsx_export.py index d8c9c8f..c05ee62 100644 --- a/sqlite_dissect/export/xlsx_export.py +++ b/sqlite_dissect/export/xlsx_export.py @@ -147,7 +147,7 @@ def write_commit(self, master_schema_entry, commit): "which is greater than the 31 allowed characters while writing to xlsx file name: {}." log_message = log_message.format(commit.name, commit.page_type, len(commit.name), self._xlsx_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) sheet = self._sheets[sheet_name] if sheet_name in self._sheets else None @@ -229,7 +229,7 @@ def write_commit(self, master_schema_entry, commit): log_message = "Invalid commit page type: {} found for xlsx export on master " \ "schema entry name: {} while writing to xlsx file name: {}." log_message = log_message.format(commit.page_type, commit.name, self._xlsx_file_name) - logger.warn(log_message) + logger.warning(log_message) raise ExportError(log_message) @staticmethod diff --git a/sqlite_dissect/file/database/database.py b/sqlite_dissect/file/database/database.py index f440394..a32c043 100644 --- a/sqlite_dissect/file/database/database.py +++ b/sqlite_dissect/file/database/database.py @@ -121,7 +121,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric self.database_header.file_change_counter, self.database_header.sqlite_version_number, self.database_size_in_pages) - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) # The database header size in pages is set and the version valid for number does equals the change counter @@ -157,7 +157,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric "pages will remain unchanged but possibly erroneous use cases may occur when parsing." log_message = log_message.format(self.version_number, self.database_header.database_size_in_pages, calculated_size_in_pages, self.database_header.sqlite_version_number) - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) else: diff --git a/sqlite_dissect/file/database/header.py b/sqlite_dissect/file/database/header.py index fbe8cd7..e37a407 100644 --- a/sqlite_dissect/file/database/header.py +++ b/sqlite_dissect/file/database/header.py @@ -211,7 +211,7 @@ def __init__(self, database_header_byte_array): """ log_message = "Schema format number and database text encoding are 0 indicating no schema or data." - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) else: diff --git a/sqlite_dissect/file/file_handle.py b/sqlite_dissect/file/file_handle.py index ec45984..d94c671 100644 --- a/sqlite_dissect/file/file_handle.py +++ b/sqlite_dissect/file/file_handle.py @@ -222,7 +222,7 @@ def close(self): if self.file_externally_controlled: log_message = "Ignored request to close externally controlled file." - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) else: diff --git a/sqlite_dissect/file/journal/header.py b/sqlite_dissect/file/journal/header.py index e093c8b..8ce0a51 100644 --- a/sqlite_dissect/file/journal/header.py +++ b/sqlite_dissect/file/journal/header.py @@ -40,7 +40,7 @@ def __init__(self, rollback_journal_header_byte_array): self.header_string = rollback_journal_header_byte_array[0:8] - if self.header_string != ROLLBACK_JOURNAL_HEADER_HEX_STRING.decode("hex"): + if self.header_string != ROLLBACK_JOURNAL_HEADER_HEX_STRING: """ @@ -52,12 +52,12 @@ def __init__(self, rollback_journal_header_byte_array): """ log_message = "The header string is invalid." - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) self.page_count = unpack(b">I", rollback_journal_header_byte_array[8:12])[0] - if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT.decode("hex"): + if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT: self.page_count = ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE self.random_nonce_for_checksum = unpack(b">I", rollback_journal_header_byte_array[12:16])[0] diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index a7fcebe..aa0a6f8 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -1622,7 +1622,7 @@ class for parsing. This was decided to be the best way to associate comments ba log_message = "A table specified without a row id was found in table row with name: {} and sql: {}. " \ "This use case is not fully implemented." log_message = log_message.format(self.name, self.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) def stringify(self, padding="", print_record_columns=True, @@ -1858,7 +1858,7 @@ def __init__(self, version, b_tree_table_leaf_page_number, b_tree_table_leaf_cel log_message = "Virtual table name: {} was found with module name: {} and sql: {}. Virtual table modules are " \ "not fully implemented." log_message = log_message.format(self.name, self.module_name, self.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) """ @@ -1989,7 +1989,7 @@ def __init__(self, version_interface, b_tree_table_leaf_page_number, if table_row.without_row_id: log_message = "Index row with name: {} and table name: {} was found to rely on a table without a row id." log_message = log_message.format(self.name, self.table_name) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) """ @@ -2034,7 +2034,7 @@ def __init__(self, version_interface, b_tree_table_leaf_page_number, log_message = "A index internal schema object found in index row with name: {} " \ "and sql: {}. This is not fully implemented and may cause issues with index pages." log_message = log_message.format(self.name, self.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) else: @@ -2275,7 +2275,7 @@ def __init__(self, version_interface, b_tree_table_leaf_page_number, log_message = "A index specified as a partial index was found in index row with name: {} " \ "and sql: {}. This use case is not fully implemented." log_message = log_message.format(self.name, self.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) def stringify(self, padding="", print_record_columns=True): diff --git a/sqlite_dissect/file/version_parser.py b/sqlite_dissect/file/version_parser.py index c2e23ae..0b22dc3 100644 --- a/sqlite_dissect/file/version_parser.py +++ b/sqlite_dissect/file/version_parser.py @@ -139,7 +139,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(master_schema_entry.row_type, master_schema_entry.root_page_number, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Set the page type and update it as appropriate @@ -151,7 +151,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(master_schema_entry.root_page_number, master_schema_entry.row_type, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) elif isinstance(master_schema_entry, OrdinaryTableRow) and master_schema_entry.without_row_id: @@ -161,7 +161,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(master_schema_entry.root_page_number, master_schema_entry.row_type, master_schema_entry.name, master_schema_entry.table_name, master_schema_entry.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) self.page_type = PAGE_TYPE.B_TREE_INDEX_LEAF @@ -277,7 +277,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(version_number, self.name, self.table_name, self.row_type, self.sql, self.parser_starting_version_number, self.parser_ending_version_number) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) if starting_version_number is None and ending_version_number is None: @@ -287,7 +287,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en log_message = log_message.format(self.parser_starting_version_number, self.parser_ending_version_number, self.name, self.table_name, self.row_type, self.sql) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) self.parser_starting_version_number = starting_version_number diff --git a/sqlite_dissect/file/wal/commit_record.py b/sqlite_dissect/file/wal/commit_record.py index d72c591..5b2c813 100644 --- a/sqlite_dissect/file/wal/commit_record.py +++ b/sqlite_dissect/file/wal/commit_record.py @@ -226,7 +226,7 @@ def __init__(self, version_number, database, write_ahead_log, frames, page_frame "when parsing." log_message = log_message.format(len(self.page_version_index), self.database_size_in_pages, self.version_number, self.page_version_index) - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) """ @@ -332,7 +332,7 @@ def __init__(self, version_number, database, write_ahead_log, frames, page_frame log_message = "The sqlite database root page was found in version: {} in the updated pages: {} when " \ "both the database header and the root b-tree page were not modified." log_message = log_message.format(self.version_number, self.updated_page_numbers) - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) if not self.master_schema_modified: @@ -416,7 +416,7 @@ def __init__(self, version_number, database, write_ahead_log, frames, page_frame "committed page size is {}. Possibly erroneous use cases may occur when parsing." log_message = log_message.format(self.version_number, last_database_header.database_size_in_pages, self.committed_page_size) - self._logger.warn(log_message) + self._logger.warning(log_message) warn(log_message, RuntimeWarning) if self.master_schema_modified: diff --git a/sqlite_dissect/file/wal/header.py b/sqlite_dissect/file/wal/header.py index 7acdd34..fcaff21 100644 --- a/sqlite_dissect/file/wal/header.py +++ b/sqlite_dissect/file/wal/header.py @@ -66,7 +66,7 @@ def __init__(self, wal_header_byte_array): if self.checkpoint_sequence_number != 0: log_message = "Checkpoint sequence number is {} instead of 0 and may cause inconsistencies in wal parsing." log_message = log_message.format(self.checkpoint_sequence_number) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) self.salt_1 = unpack(b">I", wal_header_byte_array[16:20])[0] diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 14c75ae..20897d0 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -175,7 +175,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric log_message = "The wal file contains {} invalid frames. Invalid frames are currently skipped and not " \ "implemented which may cause loss in possible carved data at this time until implemented." log_message = log_message.format(len(self.invalid_frames)) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) self.last_frame_commit_record = None diff --git a/sqlite_dissect/tests/utilities.py b/sqlite_dissect/tests/utilities.py index aeb41b8..e65e0fc 100644 --- a/sqlite_dissect/tests/utilities.py +++ b/sqlite_dissect/tests/utilities.py @@ -168,7 +168,7 @@ def db_file(request, tmp_path): modified_rows.append(cursor.fetchone()) update_statement = generate_update_statement(request.param['table_name'], request.param['columns']) - cursor.executemany(update_statement, row_values) + cursor.executemany(update_statement, row_list) db.commit() if request.param['delete'] > 0: diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index 0a59c78..63e016e 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -283,7 +283,7 @@ def __init__(self, version_history, master_schema_entry, log_message = log_message.format(self.name, self.table_name, self.row_type, self.sql, self.parser_starting_version_number, self.parser_ending_version_number, MASTER_SCHEMA_ROW_TYPE.TABLE, signature.row_type) - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Set the signature From 77633b52067cfb4ad9a066edf885ee212ad51ea5 Mon Sep 17 00:00:00 2001 From: kchason Date: Fri, 25 Mar 2022 12:23:06 -0400 Subject: [PATCH 18/35] Remove hex formatting call --- sqlite_dissect/carving/signature.py | 8 ++++---- sqlite_dissect/export/text_export.py | 2 +- sqlite_dissect/file/database/header.py | 2 +- sqlite_dissect/file/database/page.py | 6 +++--- sqlite_dissect/file/database/payload.py | 4 ++-- sqlite_dissect/file/file_handle.py | 2 +- sqlite_dissect/file/header.py | 2 +- sqlite_dissect/file/journal/header.py | 2 +- sqlite_dissect/file/journal/jounal.py | 2 +- sqlite_dissect/file/schema/column.py | 4 ++-- sqlite_dissect/file/schema/master.py | 4 ++-- sqlite_dissect/file/schema/table.py | 2 +- sqlite_dissect/file/version.py | 2 +- sqlite_dissect/file/version_parser.py | 2 +- sqlite_dissect/file/wal/frame.py | 2 +- sqlite_dissect/file/wal/header.py | 2 +- sqlite_dissect/file/wal/wal.py | 2 +- sqlite_dissect/file/wal_index/header.py | 2 +- sqlite_dissect/file/wal_index/wal_index.py | 2 +- sqlite_dissect/version_history.py | 6 +++--- 20 files changed, 30 insertions(+), 30 deletions(-) diff --git a/sqlite_dissect/carving/signature.py b/sqlite_dissect/carving/signature.py index a36143f..28ac058 100644 --- a/sqlite_dissect/carving/signature.py +++ b/sqlite_dissect/carving/signature.py @@ -820,7 +820,7 @@ def __init__(self, column_definition): raise SignatureError(log_message) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -917,7 +917,7 @@ def __init__(self, index, name, column_signatures): column_signature.number_of_rows = self.count def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -1098,7 +1098,7 @@ def __init__(self, column_definitions, record): raise SignatureError(log_message) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -1311,7 +1311,7 @@ def __init__(self, index, name, serial_type, count=1): raise ValueError(log_message) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/export/text_export.py b/sqlite_dissect/export/text_export.py index 025fcdd..bd842dd 100644 --- a/sqlite_dissect/export/text_export.py +++ b/sqlite_dissect/export/text_export.py @@ -26,7 +26,7 @@ class CommitConsoleExporter(object): @staticmethod def write_header(master_schema_entry, page_type): print(f"\nMaster schema entry: {master_schema_entry.name} row type: {master_schema_entry.row_type} on page " - f"type: {page_type} with sql: {master_schema_entry.sql}.".encode()) + f"type: {page_type} with sql: {page_type}.") @staticmethod def write_commit(commit): diff --git a/sqlite_dissect/file/database/header.py b/sqlite_dissect/file/database/header.py index e37a407..c617c61 100644 --- a/sqlite_dissect/file/database/header.py +++ b/sqlite_dissect/file/database/header.py @@ -356,7 +356,7 @@ def __init__(self, page, header_length): self.md5_hex_digest = get_md5_hash(page[self.offset:self.header_length]) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index a246fe9..d296502 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -112,7 +112,7 @@ def __init__(self, version_interface, number): self.unallocated_space_end_offset = None def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -551,7 +551,7 @@ def __init__(self, index, offset, page_number, page_type, parent_page_number, md self.md5_hex_digest = md5_hex_digest def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -907,7 +907,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num self.md5_hex_digest = None def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/database/payload.py b/sqlite_dissect/file/database/payload.py index 4a71e65..afbf003 100644 --- a/sqlite_dissect/file/database/payload.py +++ b/sqlite_dissect/file/database/payload.py @@ -51,7 +51,7 @@ def __init__(self): self.serial_type_signature = "" def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -201,7 +201,7 @@ def __init__(self, index, serial_type, serial_type_varint_length, content_size, self.md5_hex_digest = md5_hex_digest def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/file_handle.py b/sqlite_dissect/file/file_handle.py index d94c671..879b197 100644 --- a/sqlite_dissect/file/file_handle.py +++ b/sqlite_dissect/file/file_handle.py @@ -179,7 +179,7 @@ def __init__(self, file_type, file_identifier, database_text_encoding=None, file raise ValueError(log_message) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/header.py b/sqlite_dissect/file/header.py index e68471e..08f4d48 100644 --- a/sqlite_dissect/file/header.py +++ b/sqlite_dissect/file/header.py @@ -30,7 +30,7 @@ def __init__(self): self.md5_hex_digest = None def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/journal/header.py b/sqlite_dissect/file/journal/header.py index 8ce0a51..79254db 100644 --- a/sqlite_dissect/file/journal/header.py +++ b/sqlite_dissect/file/journal/header.py @@ -89,7 +89,7 @@ def __init__(self): pass def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/journal/jounal.py b/sqlite_dissect/file/journal/jounal.py index 97aa12a..f071de9 100644 --- a/sqlite_dissect/file/journal/jounal.py +++ b/sqlite_dissect/file/journal/jounal.py @@ -21,7 +21,7 @@ def __init__(self, file_identifier, file_size=None): self.file_handle = FileHandle(FILE_TYPE.ROLLBACK_JOURNAL, file_identifier, file_size=file_size) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/schema/column.py b/sqlite_dissect/file/schema/column.py index 99f4ac7..456cf1f 100644 --- a/sqlite_dissect/file/schema/column.py +++ b/sqlite_dissect/file/schema/column.py @@ -558,7 +558,7 @@ def _is_column_constraint_preface(segment): return False def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -593,7 +593,7 @@ def __init__(self, index, constraint): self.constraint = constraint def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index aa0a6f8..38c7030 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -290,7 +290,7 @@ def __init__(self, version_interface, root_page): self.master_schema_page_numbers = [master_schema_page.number for master_schema_page in self.master_schema_pages] def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -592,7 +592,7 @@ def __init__(self, version_interface, b_tree_table_leaf_page_number, b_tree_tabl self.md5_hash_identifier = get_md5_hash(master_schema_entry_identifier_string) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/schema/table.py b/sqlite_dissect/file/schema/table.py index ba5b1c1..293e86f 100644 --- a/sqlite_dissect/file/schema/table.py +++ b/sqlite_dissect/file/schema/table.py @@ -34,7 +34,7 @@ def __init__(self, index, constraint, comments=None): self.comments = [comment.strip() for comment in comments] if comments else [] def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/version.py b/sqlite_dissect/file/version.py index 6a01b79..48a3e69 100644 --- a/sqlite_dissect/file/version.py +++ b/sqlite_dissect/file/version.py @@ -139,7 +139,7 @@ def __init__(self, file_handle, version_number, store_in_memory, strict_format_c self.updated_b_tree_page_numbers = None def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/version_parser.py b/sqlite_dissect/file/version_parser.py index 0b22dc3..1aaffdc 100644 --- a/sqlite_dissect/file/version_parser.py +++ b/sqlite_dissect/file/version_parser.py @@ -307,7 +307,7 @@ def __init__(self, version_history, master_schema_entry, version_number=None, en """ def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/wal/frame.py b/sqlite_dissect/file/wal/frame.py index 77acbdd..6cde9d9 100644 --- a/sqlite_dissect/file/wal/frame.py +++ b/sqlite_dissect/file/wal/frame.py @@ -74,7 +74,7 @@ def __init__(self, file_handle, frame_index, commit_record_number): self.contains_sqlite_database_header = True def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/wal/header.py b/sqlite_dissect/file/wal/header.py index fcaff21..3e26c52 100644 --- a/sqlite_dissect/file/wal/header.py +++ b/sqlite_dissect/file/wal/header.py @@ -119,7 +119,7 @@ def __init__(self, wal_frame_header_byte_array): self.md5_hex_digest = get_md5_hash(wal_frame_header_byte_array) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 20897d0..e1a1a1f 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -212,7 +212,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric raise NotImplementedError(log_message) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/wal_index/header.py b/sqlite_dissect/file/wal_index/header.py index a7e3eee..f091eea 100644 --- a/sqlite_dissect/file/wal_index/header.py +++ b/sqlite_dissect/file/wal_index/header.py @@ -235,7 +235,7 @@ def __init__(self, wal_index_checkpoint_info_byte_array, endianness): self.md5_hex_digest = get_md5_hash(wal_index_checkpoint_info_byte_array) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/file/wal_index/wal_index.py b/sqlite_dissect/file/wal_index/wal_index.py index 66642de..046817b 100644 --- a/sqlite_dissect/file/wal_index/wal_index.py +++ b/sqlite_dissect/file/wal_index/wal_index.py @@ -55,7 +55,7 @@ def __init__(self, file_name, file_size=None): logger.debug("Number of entries found: {}.".format(number_found)) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index 63e016e..98ab560 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -197,7 +197,7 @@ def __init__(self, database, write_ahead_log=None): self.number_of_versions = len(self.versions) def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) @@ -397,7 +397,7 @@ def __iter__(self): return self def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __next__(self): return self.next() @@ -776,7 +776,7 @@ def __init__(self, name, file_type, version_number, database_text_encoding, page self.carved_cells = {} def __repr__(self): - return self.__str__().encode("hex") + return self.__str__() def __str__(self): return sub("\t", "", sub("\n", " ", self.stringify())) From 08e1e5729a1d04d61587ecf2637e0be5248eb294 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 29 Mar 2022 09:10:48 -0400 Subject: [PATCH 19/35] Cast int for `range()` calls --- sqlite_dissect/constants.py | 2 +- sqlite_dissect/file/database/database.py | 2 +- sqlite_dissect/file/database/page.py | 6 +++--- sqlite_dissect/file/version.py | 2 +- sqlite_dissect/file/wal/wal.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sqlite_dissect/constants.py b/sqlite_dissect/constants.py index 3391a40..c01a521 100644 --- a/sqlite_dissect/constants.py +++ b/sqlite_dissect/constants.py @@ -274,7 +274,7 @@ def __len__(self): """ Defines the list of common SQLite3 file extensions for initial identification of files to dissect for the bulk processing. """ -SQLITE_FILE_EXTENSIONS = [".db", ".db3", ".sqlite", ".sqlite3"] +SQLITE_FILE_EXTENSIONS = (".db", ".db3", ".sqlite", ".sqlite3") """ Below we instantiate and compile a regular expression to check xml illegal characters: diff --git a/sqlite_dissect/file/database/database.py b/sqlite_dissect/file/database/database.py index a32c043..c6e4be8 100644 --- a/sqlite_dissect/file/database/database.py +++ b/sqlite_dissect/file/database/database.py @@ -174,7 +174,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric """ - self.updated_page_numbers = [page_index + 1 for page_index in range(self.database_size_in_pages)] + self.updated_page_numbers = [page_index + 1 for page_index in range(int(self.database_size_in_pages))] self.page_version_index = dict(map(lambda x: [x, self.version_number], self.updated_page_numbers)) self._logger.debug("Updated page numbers initialized as: {} in version: {}.".format(self.updated_page_numbers, diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index d296502..e0e2fa1 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -302,7 +302,7 @@ def __init__(self, version_interface, number, parent_freelist_trunk_page_number, FREELIST_HEADER_LENGTH])[0] self.freelist_leaf_page_numbers = [] self.freelist_leaf_pages = [] - for index in range(self.number_of_leaf_page_pointers): + for index in range(int(self.number_of_leaf_page_pointers)): start_offset = index * FREELIST_LEAF_PAGE_NUMBER_LENGTH + FREELIST_HEADER_LENGTH end_offset = start_offset + FREELIST_LEAF_PAGE_NUMBER_LENGTH freelist_leaf_page_number = unpack(b">I", page[start_offset:end_offset])[0] @@ -424,7 +424,7 @@ def __init__(self, version_interface, number, number_of_entries): self.md5_hex_digest = get_md5_hash(page) self.pointer_map_entries = [] - for index in range(self.number_of_entries): + for index in range(int(self.number_of_entries)): offset = index * POINTER_MAP_ENTRY_LENGTH @@ -657,7 +657,7 @@ def __init__(self, version_interface, number, header_class_name, cell_class_name self.cells = [] self.calculated_cell_total_byte_size = 0 - for cell_index in range(self.header.number_of_cells_on_page): + for cell_index in range(int(self.header.number_of_cells_on_page)): cell_start_offset = cell_pointer_array_offset + cell_index * CELL_POINTER_BYTE_LENGTH cell_end_offset = cell_start_offset + CELL_POINTER_BYTE_LENGTH cell_offset = unpack(b">H", page[cell_start_offset:cell_end_offset])[0] diff --git a/sqlite_dissect/file/version.py b/sqlite_dissect/file/version.py index 48a3e69..f369b78 100644 --- a/sqlite_dissect/file/version.py +++ b/sqlite_dissect/file/version.py @@ -268,7 +268,7 @@ def pages(self): self._logger.error(log_message) raise VersionParsingError(log_message) - for page_number in [page_index + 1 for page_index in range(self.database_size_in_pages)]: + for page_number in [page_index + 1 for page_index in range(int(self.database_size_in_pages))]: if page_number not in pages: log_message = "Page number: {} was not found in the pages: {} for version: {}." log_message = log_message.format(page_number, pages.keys(), self.version_number) diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index e1a1a1f..8184d7c 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -78,7 +78,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric # Initialize the dictionary self.invalid_frame_indices = {} - for frame_index in range(self.number_of_frames): + for frame_index in range(int(self.number_of_frames)): frame = WriteAheadLogFrame(self.file_handle, frame_index, commit_record_number) From 8675c816b88039f2abfa0061c7acef54b8f46b95 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 29 Mar 2022 15:49:33 -0400 Subject: [PATCH 20/35] Resolve string regex parsing for utility functions --- sqlite_dissect/carving/utilities.py | 2 +- sqlite_dissect/tests/carving_utilities_test.py | 2 +- sqlite_dissect/tests/nist_test.py | 12 +++--------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sqlite_dissect/carving/utilities.py b/sqlite_dissect/carving/utilities.py index 0c67b42..7cc2c7e 100644 --- a/sqlite_dissect/carving/utilities.py +++ b/sqlite_dissect/carving/utilities.py @@ -250,7 +250,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): elif column_serial_type == -2: text_regex = generate_regex_for_simplified_serial_type(column_serial_type) else: - basic_serial_type_regex += generate_regex_for_simplified_serial_type(column_serial_type) + basic_serial_type_regex += generate_regex_for_simplified_serial_type(column_serial_type).decode() if blob_regex or text_regex: diff --git a/sqlite_dissect/tests/carving_utilities_test.py b/sqlite_dissect/tests/carving_utilities_test.py index c893411..69462a1 100644 --- a/sqlite_dissect/tests/carving_utilities_test.py +++ b/sqlite_dissect/tests/carving_utilities_test.py @@ -25,7 +25,7 @@ @pytest.mark.parametrize('value, encoded_value', varint_tuples) def test_decode_varint_in_reverse(value, encoded_value): with pytest.raises(ValueError): - decode_varint_in_reverse(bytearray('0'*9), 11) + decode_varint_in_reverse(bytearray(b'0'*9), 11) assert decode_varint_in_reverse(encoded_value, len(encoded_value))[0] == value diff --git a/sqlite_dissect/tests/nist_test.py b/sqlite_dissect/tests/nist_test.py index db1e8ec..2487613 100644 --- a/sqlite_dissect/tests/nist_test.py +++ b/sqlite_dissect/tests/nist_test.py @@ -29,8 +29,6 @@ def test_header_reporting(db_file): db_filepath = str(db_file[0].resolve()) hash_before_parsing = get_md5_hash(db_filepath) - parser_output = io.BytesIO() - sys.stdout = parser_output args = parse_args([db_filepath, '--header']) sqlite_files = get_sqlite_files(args.sqlite_path) main(args, sqlite_files[0], len(sqlite_files) > 1) @@ -40,7 +38,7 @@ def test_header_reporting(db_file): reported_journal_mode_write = None reported_num_pages = None reported_encoding = None - for line in parser_output.getvalue().splitlines(): + for line in sys.stdout.read().splitlines(): line = str(line) if "FILE FORMAT WRITE VERSION" in line.upper(): reported_journal_mode_write = line.split(': ')[1].strip() @@ -80,8 +78,6 @@ def test_schema_reporting(db_file): db_filepath = str(db_file[0].resolve()) hash_before_parsing = get_md5_hash(db_filepath) - parser_output = io.BytesIO() - sys.stdout = parser_output args = parse_args([db_filepath]) sqlite_files = get_sqlite_files(args.sqlite_path) main(args, str(sqlite_files[0]), len(sqlite_files) > 1) @@ -91,7 +87,7 @@ def test_schema_reporting(db_file): reported_num_rows = {} current_table = None row_count = 0 - for line in parser_output.getvalue().splitlines(): + for line in sys.stdout.read().splitlines(): line = str(line) if "Master schema entry: " in line and "row type: table" in line: current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() @@ -179,14 +175,12 @@ def test_metadata_reporting(db_file): db_filepath = str(db_file[0].resolve()) hash_before_parsing = get_md5_hash(db_filepath) - parser_output = io.BytesIO() - sys.stdout = parser_output args = parse_args([db_filepath, '-c']) sqlite_files = get_sqlite_files(args.sqlite_path) main(args, str(sqlite_files[0]), len(sqlite_files) > 1) current_table = None - for line in parser_output.getvalue().splitlines(): + for line in sys.stdout.read().splitlines(): line = str(line) if "Master schema entry: " in line and "row type: table" in line: current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() From bdde91e6d81ca3a766f98a0fa7c47928c956a4e2 Mon Sep 17 00:00:00 2001 From: kchason Date: Mon, 11 Apr 2022 17:04:14 -0400 Subject: [PATCH 21/35] Switch `sub` to `str.replace` --- sqlite_dissect/carving/signature.py | 8 ++++---- sqlite_dissect/file/database/header.py | 2 +- sqlite_dissect/file/database/page.py | 6 +++--- sqlite_dissect/file/database/payload.py | 4 ++-- sqlite_dissect/file/file_handle.py | 2 +- sqlite_dissect/file/header.py | 2 +- sqlite_dissect/file/journal/header.py | 2 +- sqlite_dissect/file/journal/jounal.py | 2 +- sqlite_dissect/file/schema/column.py | 4 ++-- sqlite_dissect/file/schema/master.py | 4 ++-- sqlite_dissect/file/schema/table.py | 2 +- sqlite_dissect/file/version.py | 2 +- sqlite_dissect/file/version_parser.py | 2 +- sqlite_dissect/file/wal/frame.py | 2 +- sqlite_dissect/file/wal/header.py | 2 +- sqlite_dissect/file/wal/wal.py | 2 +- sqlite_dissect/file/wal_index/header.py | 2 +- sqlite_dissect/file/wal_index/wal_index.py | 2 +- sqlite_dissect/version_history.py | 6 +++--- 19 files changed, 29 insertions(+), 29 deletions(-) diff --git a/sqlite_dissect/carving/signature.py b/sqlite_dissect/carving/signature.py index 28ac058..31f3610 100644 --- a/sqlite_dissect/carving/signature.py +++ b/sqlite_dissect/carving/signature.py @@ -823,7 +823,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Derived Data Type Name: {}\n" \ @@ -920,7 +920,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_column_signatures=True): string = padding + "Index: {}\n" \ @@ -1101,7 +1101,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_column_signatures=True): string = padding + "Record Serial Type Signature: {}\n" \ @@ -1314,7 +1314,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Index: {}\n" \ diff --git a/sqlite_dissect/file/database/header.py b/sqlite_dissect/file/database/header.py index c617c61..30bfd74 100644 --- a/sqlite_dissect/file/database/header.py +++ b/sqlite_dissect/file/database/header.py @@ -359,7 +359,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Contains SQLite Database Header: {}\n" \ diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index e0e2fa1..1e00bad 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -115,7 +115,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Version Number: {}\n" \ @@ -554,7 +554,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Index: {}\n" \ @@ -910,7 +910,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Version Number: {}\n" \ diff --git a/sqlite_dissect/file/database/payload.py b/sqlite_dissect/file/database/payload.py index afbf003..5f53606 100644 --- a/sqlite_dissect/file/database/payload.py +++ b/sqlite_dissect/file/database/payload.py @@ -54,7 +54,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_record_columns=True): string = padding + "Start Offset: {}\n" \ @@ -204,7 +204,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Index: {}\n" \ diff --git a/sqlite_dissect/file/file_handle.py b/sqlite_dissect/file/file_handle.py index 879b197..dc3299f 100644 --- a/sqlite_dissect/file/file_handle.py +++ b/sqlite_dissect/file/file_handle.py @@ -182,7 +182,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_header=True): string = padding + "File Type: {}\n" \ diff --git a/sqlite_dissect/file/header.py b/sqlite_dissect/file/header.py index 08f4d48..d8f907b 100644 --- a/sqlite_dissect/file/header.py +++ b/sqlite_dissect/file/header.py @@ -33,7 +33,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') @abstractmethod def stringify(self, padding=""): diff --git a/sqlite_dissect/file/journal/header.py b/sqlite_dissect/file/journal/header.py index 79254db..053c2b1 100644 --- a/sqlite_dissect/file/journal/header.py +++ b/sqlite_dissect/file/journal/header.py @@ -92,7 +92,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): pass diff --git a/sqlite_dissect/file/journal/jounal.py b/sqlite_dissect/file/journal/jounal.py index f071de9..5c86008 100644 --- a/sqlite_dissect/file/journal/jounal.py +++ b/sqlite_dissect/file/journal/jounal.py @@ -24,7 +24,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "File Handle:\n{}" diff --git a/sqlite_dissect/file/schema/column.py b/sqlite_dissect/file/schema/column.py index 456cf1f..a93041a 100644 --- a/sqlite_dissect/file/schema/column.py +++ b/sqlite_dissect/file/schema/column.py @@ -561,7 +561,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_column_constraints=True): string = padding + "Column Text: {}\n" \ @@ -596,7 +596,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Index: {}\n" \ diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index 38c7030..e8a2036 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -293,7 +293,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_master_schema_root_page=True, print_master_schema_entries=True, print_b_tree_root_pages=True): @@ -595,7 +595,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_record_columns=True): string = padding + "Version Number: {}\n" \ diff --git a/sqlite_dissect/file/schema/table.py b/sqlite_dissect/file/schema/table.py index 293e86f..82a3c21 100644 --- a/sqlite_dissect/file/schema/table.py +++ b/sqlite_dissect/file/schema/table.py @@ -37,7 +37,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Index: {}\n" \ diff --git a/sqlite_dissect/file/version.py b/sqlite_dissect/file/version.py index f369b78..c2b7347 100644 --- a/sqlite_dissect/file/version.py +++ b/sqlite_dissect/file/version.py @@ -142,7 +142,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_pages=True, print_schema=True): string = padding + "File Type: {}\n" \ diff --git a/sqlite_dissect/file/version_parser.py b/sqlite_dissect/file/version_parser.py index 1aaffdc..919dcdb 100644 --- a/sqlite_dissect/file/version_parser.py +++ b/sqlite_dissect/file/version_parser.py @@ -310,7 +310,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Row Type: {}\n" \ diff --git a/sqlite_dissect/file/wal/frame.py b/sqlite_dissect/file/wal/frame.py index 6cde9d9..70a4b06 100644 --- a/sqlite_dissect/file/wal/frame.py +++ b/sqlite_dissect/file/wal/frame.py @@ -77,7 +77,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Frame Index: {}\n" \ diff --git a/sqlite_dissect/file/wal/header.py b/sqlite_dissect/file/wal/header.py index 3e26c52..61acf27 100644 --- a/sqlite_dissect/file/wal/header.py +++ b/sqlite_dissect/file/wal/header.py @@ -122,7 +122,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Page Number: {}\n" \ diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 8184d7c..8507309 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -215,7 +215,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_frames=True): string = padding + "File Handle:\n{}" diff --git a/sqlite_dissect/file/wal_index/header.py b/sqlite_dissect/file/wal_index/header.py index f091eea..abf2068 100644 --- a/sqlite_dissect/file/wal_index/header.py +++ b/sqlite_dissect/file/wal_index/header.py @@ -238,7 +238,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "Endianness: {}\n" \ diff --git a/sqlite_dissect/file/wal_index/wal_index.py b/sqlite_dissect/file/wal_index/wal_index.py index 046817b..93d7638 100644 --- a/sqlite_dissect/file/wal_index/wal_index.py +++ b/sqlite_dissect/file/wal_index/wal_index.py @@ -58,7 +58,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding=""): string = padding + "File Handle:\n{}" diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index 98ab560..eee15e3 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -200,7 +200,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_versions=True): string = "File Type: {}" @@ -403,7 +403,7 @@ def __next__(self): return self.next() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_cells=True): string = padding + "Page Type: {}\n" \ @@ -779,7 +779,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return sub("\t", "", sub("\n", " ", self.stringify())) + return self.stringify().replace('\t', '').replace('\n', ' ') def stringify(self, padding="", print_cells=True): string = padding + "Version Number: {}\n" \ From 7836776b1065a89ac3d3c407cb4d8c2305a681cd Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 12 Apr 2022 11:00:43 -0400 Subject: [PATCH 22/35] Remove excess imports and sub -> replace --- sqlite_dissect/carving/signature.py | 1 - sqlite_dissect/export/csv_export.py | 29 ++++++++-------------- sqlite_dissect/export/sqlite_export.py | 9 +++---- sqlite_dissect/file/database/header.py | 2 -- sqlite_dissect/file/database/page.py | 3 +-- sqlite_dissect/file/database/payload.py | 1 - sqlite_dissect/file/file_handle.py | 1 - sqlite_dissect/file/header.py | 1 - sqlite_dissect/file/journal/header.py | 8 ------ sqlite_dissect/file/journal/jounal.py | 1 - sqlite_dissect/file/schema/column.py | 2 +- sqlite_dissect/file/schema/master.py | 2 +- sqlite_dissect/file/schema/table.py | 1 - sqlite_dissect/file/version.py | 1 - sqlite_dissect/file/version_parser.py | 1 - sqlite_dissect/file/wal/frame.py | 1 - sqlite_dissect/file/wal/header.py | 1 - sqlite_dissect/file/wal/wal.py | 1 - sqlite_dissect/file/wal_index/header.py | 1 - sqlite_dissect/file/wal_index/wal_index.py | 1 - sqlite_dissect/version_history.py | 1 - 21 files changed, 16 insertions(+), 53 deletions(-) diff --git a/sqlite_dissect/carving/signature.py b/sqlite_dissect/carving/signature.py index 31f3610..316b06b 100644 --- a/sqlite_dissect/carving/signature.py +++ b/sqlite_dissect/carving/signature.py @@ -2,7 +2,6 @@ from abc import abstractmethod from copy import copy from logging import getLogger -from re import sub from warnings import warn from sqlite_dissect.carving.utilities import get_content_size from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/export/csv_export.py b/sqlite_dissect/export/csv_export.py index 075137f..0a91665 100644 --- a/sqlite_dissect/export/csv_export.py +++ b/sqlite_dissect/export/csv_export.py @@ -6,7 +6,6 @@ from os.path import basename from os.path import normpath from os.path import sep -from re import sub from sqlite_dissect.constants import ILLEGAL_XML_CHARACTER_PATTERN from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.constants import MASTER_SCHEMA_ROW_TYPE @@ -52,7 +51,7 @@ def write_version(csv_file_name, export_directory, version, master_schema_entry_ if master_schema_entry.root_page_number: fixed_file_name = basename(normpath(csv_file_name)) - fixed_master_schema_name = sub(" ", "_", master_schema_entry.name) + fixed_master_schema_name = master_schema_entry.name.replace(" ", "_") csv_file_name = export_directory + sep + fixed_file_name + "-" + fixed_master_schema_name + ".csv" logger.info("Writing CSV file: {}.".format(csv_file_name)) @@ -231,15 +230,13 @@ def _write_b_tree_index_leaf_records(csv_writer, version, master_schema_entry, b serial_type = record_column.serial_type text_affinity = True if serial_type >= 13 and serial_type % 2 == 1 else False value = record_column.value - if value is None: - pass - elif isinstance(value, (bytearray, str)): + if isinstance(value, (bytearray, str)): value = value.decode(version.database_text_encoding, "replace") if text_affinity else str(value) try: value.encode(UTF_8) except UnicodeDecodeError: value = value.decode(UTF_8, "replace") - value = ILLEGAL_XML_CHARACTER_PATTERN.sub(" ", value) + value = ILLEGAL_XML_CHARACTER_PATTERN.replace(" ", value) if value.startswith("="): value = ' ' + value cell_record_column_values.append(value) @@ -370,15 +367,13 @@ def _write_b_tree_table_leaf_records(csv_writer, version, master_schema_entry, b serial_type = record_column.serial_type text_affinity = True if serial_type >= 13 and serial_type % 2 == 1 else False value = record_column.value - if value is None: - pass - elif isinstance(value, (bytearray, str)): + if isinstance(value, (bytearray, str)): value = value.decode(version.database_text_encoding, "replace") if text_affinity else str(value) try: value = value.encode(UTF_8) except UnicodeDecodeError: value = value.decode(UTF_8, "replace").encode(UTF_8) - value = ILLEGAL_XML_CHARACTER_PATTERN.sub(" ", value) + value = ILLEGAL_XML_CHARACTER_PATTERN.replace(" ", value) if value.startswith("="): value = ' ' + value value = str(value) @@ -416,15 +411,13 @@ def _write_b_tree_table_master_schema_carved_records(csv_writer, version, carved serial_type = record_column.serial_type text_affinity = True if serial_type >= 13 and serial_type % 2 == 1 else False value = record_column.value - if value is None: - pass - elif isinstance(value, (bytearray, str)): + if isinstance(value, (bytearray, str)): value = value.decode(version.database_text_encoding, "replace") if text_affinity else str(value) try: value = value.encode(UTF_8) except UnicodeDecodeError: value = value.decode(UTF_8, "replace").encode(UTF_8) - value = ILLEGAL_XML_CHARACTER_PATTERN.sub(" ", value) + value = ILLEGAL_XML_CHARACTER_PATTERN.replace(" ", value) if value.startswith("="): value = ' ' + value value = str(value) @@ -483,7 +476,7 @@ def write_commit(self, master_schema_entry, commit): if not csv_file_name: mode = "wb" - commit_name = sub(" ", "_", commit.name) + commit_name = commit.name.replace(" ", "_") csv_file_name = os.path.join(self._export_directory, (self._file_name_prefix + "-" + commit_name + ".csv")) self._csv_file_names[commit.name] = csv_file_name write_headers = True @@ -649,15 +642,13 @@ def _write_cells(csv_writer, file_type, database_text_encoding, page_type, cells serial_type = record_column.serial_type text_affinity = True if serial_type >= 13 and serial_type % 2 == 1 else False value = record_column.value - if value is None: - pass - elif isinstance(value, (bytearray, str)): + if isinstance(value, (bytearray, str)): value = value.decode(database_text_encoding, "replace") if text_affinity else str(value) try: value = value.encode(UTF_8) except UnicodeDecodeError: value = value.decode(UTF_8, "replace").encode(UTF_8) - value = ILLEGAL_XML_CHARACTER_PATTERN.sub(" ", value) + value = ILLEGAL_XML_CHARACTER_PATTERN.replace(" ", value) if value.startswith("="): value = ' ' + value value = str(value) diff --git a/sqlite_dissect/export/sqlite_export.py b/sqlite_dissect/export/sqlite_export.py index 9bb7d55..304ced7 100644 --- a/sqlite_dissect/export/sqlite_export.py +++ b/sqlite_dissect/export/sqlite_export.py @@ -2,7 +2,6 @@ from os import rename from os.path import exists from os.path import sep -from re import sub from sqlite3 import connect from sqlite3 import sqlite_version from sqlite3 import version @@ -177,7 +176,7 @@ def write_commit(self, master_schema_entry, commit): index_column_headers.append("Column {}".format(i)) column_headers.extend(index_column_headers) - column_headers = [sub(" ", "_", column_header).lower() for column_header in column_headers] + column_headers = [column_header.replace(" ", "_").lower() for column_header in column_headers] elif commit.page_type == PAGE_TYPE.B_TREE_TABLE_LEAF: @@ -194,7 +193,7 @@ def write_commit(self, master_schema_entry, commit): updated_column_headers = [] for column_header in column_headers: - updated_column_header_name = "sd_" + sub(" ", "_", column_header).lower() + updated_column_header_name = "sd_" + column_header.replace(" ", "_").lower() while updated_column_header_name in column_definitions: updated_column_header_name = "sd_" + updated_column_header_name updated_column_headers.append(updated_column_header_name) @@ -348,9 +347,7 @@ def _write_cells(connection, table_name, column_count, file_type, text_affinity = True if serial_type >= 13 and serial_type % 2 == 1 else False value = record_column.value - if value is None: - pass - elif isinstance(value, bytearray): + if isinstance(value, bytearray): if text_affinity: value = value.decode(database_text_encoding, "replace") else: diff --git a/sqlite_dissect/file/database/header.py b/sqlite_dissect/file/database/header.py index 30bfd74..d0c7277 100644 --- a/sqlite_dissect/file/database/header.py +++ b/sqlite_dissect/file/database/header.py @@ -2,7 +2,6 @@ from binascii import hexlify from logging import getLogger from re import compile -from re import sub from struct import error from struct import unpack from warnings import warn @@ -12,7 +11,6 @@ from sqlite_dissect.constants import LEAF_PAYLOAD_FRACTION from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.constants import MAGIC_HEADER_STRING -from sqlite_dissect.constants import MAGIC_HEADER_STRING_ENCODING from sqlite_dissect.constants import MASTER_PAGE_HEX_ID from sqlite_dissect.constants import MAXIMUM_EMBEDDED_PAYLOAD_FRACTION from sqlite_dissect.constants import MAXIMUM_PAGE_SIZE diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index 1e00bad..efad687 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -1,7 +1,6 @@ from abc import ABCMeta from binascii import hexlify from logging import getLogger -from re import sub from struct import unpack from warnings import warn from sqlite_dissect.constants import CELL_LOCATION @@ -593,7 +592,7 @@ def __init__(self, version_interface, number, header_class_name, cell_class_name else: log_message = "Page hex type for master page is: {} and not a table interior or table leaf page as " \ "expected in b-tree page: {} in page version: {} for version: {}." - log_message = log_message.format(hex(master_page_hex_type), self.number, + log_message = log_message.format(master_page_hex_type, self.number, self.page_version_number, self.version_number) self._logger.error(log_message) raise BTreePageParsingError(log_message) diff --git a/sqlite_dissect/file/database/payload.py b/sqlite_dissect/file/database/payload.py index 5f53606..290690f 100644 --- a/sqlite_dissect/file/database/payload.py +++ b/sqlite_dissect/file/database/payload.py @@ -1,7 +1,6 @@ from abc import ABCMeta from binascii import hexlify from logging import getLogger -from re import sub from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.exception import RecordParsingError from sqlite_dissect.utilities import decode_varint diff --git a/sqlite_dissect/file/file_handle.py b/sqlite_dissect/file/file_handle.py index dc3299f..da7407e 100644 --- a/sqlite_dissect/file/file_handle.py +++ b/sqlite_dissect/file/file_handle.py @@ -1,6 +1,5 @@ import os from logging import getLogger -from re import sub from warnings import warn from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.constants import LOCK_BYTE_PAGE_START_OFFSET diff --git a/sqlite_dissect/file/header.py b/sqlite_dissect/file/header.py index d8f907b..e1a0b08 100644 --- a/sqlite_dissect/file/header.py +++ b/sqlite_dissect/file/header.py @@ -1,7 +1,6 @@ from abc import ABCMeta from abc import abstractmethod from logging import getLogger -from re import sub from sqlite_dissect.constants import LOGGER_NAME """ diff --git a/sqlite_dissect/file/journal/header.py b/sqlite_dissect/file/journal/header.py index 053c2b1..8019d37 100644 --- a/sqlite_dissect/file/journal/header.py +++ b/sqlite_dissect/file/journal/header.py @@ -1,7 +1,6 @@ from binascii import hexlify from logging import getLogger from struct import unpack -from re import sub from warnings import warn from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.constants import ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE @@ -41,7 +40,6 @@ def __init__(self, rollback_journal_header_byte_array): self.header_string = rollback_journal_header_byte_array[0:8] if self.header_string != ROLLBACK_JOURNAL_HEADER_HEX_STRING: - """ Instead of throwing an error here, a warning is thrown instead. This is due to the fact that the header @@ -85,14 +83,8 @@ def stringify(self, padding=""): class RollbackJournalPageRecordHeader(object): - def __init__(self): - pass - def __repr__(self): return self.__str__() def __str__(self): return self.stringify().replace('\t', '').replace('\n', ' ') - - def stringify(self, padding=""): - pass diff --git a/sqlite_dissect/file/journal/jounal.py b/sqlite_dissect/file/journal/jounal.py index 5c86008..3535b39 100644 --- a/sqlite_dissect/file/journal/jounal.py +++ b/sqlite_dissect/file/journal/jounal.py @@ -1,4 +1,3 @@ -from re import sub from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.file.file_handle import FileHandle diff --git a/sqlite_dissect/file/schema/column.py b/sqlite_dissect/file/schema/column.py index a93041a..f73f2c3 100644 --- a/sqlite_dissect/file/schema/column.py +++ b/sqlite_dissect/file/schema/column.py @@ -430,7 +430,7 @@ def _get_data_type(derived_data_type): derived_data_type = sub("\(.*\)$", "", derived_data_type) # Replace spaces with underscores - derived_data_type = sub(" ", "_", derived_data_type) + derived_data_type = derived_data_type.replace(" ", "_") for data_type in DATA_TYPE: diff --git a/sqlite_dissect/file/schema/master.py b/sqlite_dissect/file/schema/master.py index e8a2036..828dba3 100644 --- a/sqlite_dissect/file/schema/master.py +++ b/sqlite_dissect/file/schema/master.py @@ -587,7 +587,7 @@ def __init__(self, version_interface, b_tree_table_leaf_page_number, b_tree_tabl """ - master_schema_entry_identifier_string = "{}{}{}{}".format(self.row_id, self.row_type, self.name, + master_schema_entry_identifier_string = "{}{}{}{}{}".format(self.row_id, self.row_type, self.name, self.table_name, self.sql) self.md5_hash_identifier = get_md5_hash(master_schema_entry_identifier_string) diff --git a/sqlite_dissect/file/schema/table.py b/sqlite_dissect/file/schema/table.py index 82a3c21..7cd5199 100644 --- a/sqlite_dissect/file/schema/table.py +++ b/sqlite_dissect/file/schema/table.py @@ -1,5 +1,4 @@ from logging import getLogger -from re import sub from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.exception import MasterSchemaRowParsingError diff --git a/sqlite_dissect/file/version.py b/sqlite_dissect/file/version.py index c2b7347..03733d5 100644 --- a/sqlite_dissect/file/version.py +++ b/sqlite_dissect/file/version.py @@ -2,7 +2,6 @@ from abc import abstractmethod from binascii import hexlify from logging import getLogger -from re import sub from sqlite_dissect.constants import INDEX_INTERIOR_PAGE_HEX_ID from sqlite_dissect.constants import INDEX_LEAF_PAGE_HEX_ID from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/file/version_parser.py b/sqlite_dissect/file/version_parser.py index 919dcdb..eabdbda 100644 --- a/sqlite_dissect/file/version_parser.py +++ b/sqlite_dissect/file/version_parser.py @@ -1,6 +1,5 @@ from abc import ABCMeta from logging import getLogger -from re import sub from warnings import warn from sqlite_dissect.constants import BASE_VERSION_NUMBER from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/file/wal/frame.py b/sqlite_dissect/file/wal/frame.py index 70a4b06..abc5a2a 100644 --- a/sqlite_dissect/file/wal/frame.py +++ b/sqlite_dissect/file/wal/frame.py @@ -1,6 +1,5 @@ from binascii import hexlify from logging import getLogger -from re import sub from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.constants import LOGGER_NAME from sqlite_dissect.constants import MASTER_PAGE_HEX_ID diff --git a/sqlite_dissect/file/wal/header.py b/sqlite_dissect/file/wal/header.py index 61acf27..22a6df4 100644 --- a/sqlite_dissect/file/wal/header.py +++ b/sqlite_dissect/file/wal/header.py @@ -1,5 +1,4 @@ from logging import getLogger -from re import sub from struct import unpack from warnings import warn from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/file/wal/wal.py b/sqlite_dissect/file/wal/wal.py index 8507309..511a336 100644 --- a/sqlite_dissect/file/wal/wal.py +++ b/sqlite_dissect/file/wal/wal.py @@ -1,5 +1,4 @@ from logging import getLogger -from re import sub from warnings import warn from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/file/wal_index/header.py b/sqlite_dissect/file/wal_index/header.py index abf2068..fb75f25 100644 --- a/sqlite_dissect/file/wal_index/header.py +++ b/sqlite_dissect/file/wal_index/header.py @@ -1,6 +1,5 @@ from binascii import hexlify from logging import getLogger -from re import sub from struct import unpack from sqlite_dissect.constants import ENDIANNESS from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/file/wal_index/wal_index.py b/sqlite_dissect/file/wal_index/wal_index.py index 93d7638..e5e7767 100644 --- a/sqlite_dissect/file/wal_index/wal_index.py +++ b/sqlite_dissect/file/wal_index/wal_index.py @@ -1,5 +1,4 @@ from logging import getLogger -from re import sub from struct import unpack from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.constants import LOGGER_NAME diff --git a/sqlite_dissect/version_history.py b/sqlite_dissect/version_history.py index eee15e3..68623c0 100644 --- a/sqlite_dissect/version_history.py +++ b/sqlite_dissect/version_history.py @@ -1,5 +1,4 @@ from logging import getLogger -from re import sub from warnings import warn from sqlite_dissect.carving.carver import SignatureCarver from sqlite_dissect.constants import BASE_VERSION_NUMBER From afa4853e08ac7a0869dd03a5c02eaad6e767817a Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 20 Apr 2022 19:14:07 -0400 Subject: [PATCH 23/35] Update from entrypoint logic change --- sqlite_dissect/entrypoint.py | 172 +++++++++++++++++------------------ 1 file changed, 81 insertions(+), 91 deletions(-) diff --git a/sqlite_dissect/entrypoint.py b/sqlite_dissect/entrypoint.py index 8faaa75..aa44f03 100644 --- a/sqlite_dissect/entrypoint.py +++ b/sqlite_dissect/entrypoint.py @@ -1,3 +1,4 @@ +import logging import uuid import warnings from logging import CRITICAL @@ -8,7 +9,7 @@ from logging import basicConfig from logging import getLogger from os import path -from os.path import basename +from os.path import basename, abspath from os.path import join from os.path import exists from os.path import getsize @@ -51,7 +52,7 @@ """ -def main(arguments, sqlite_file_path, export_sub_paths=False): +def main(arguments, sqlite_file_path: str, export_sub_paths=False): """ The primary entrypoint for the SQLite Dissect carving utility. @@ -89,8 +90,8 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): filename=arguments.log_file) logger = getLogger(LOGGER_NAME) - logger.debug("Setup logging using the log level: {}.".format(logging_level)) - logger.info("Using options: {}".format(arguments)) + logger.debug(f"Setup logging using the log level: {logging_level}.") + logger.info(f"Using options: {arguments}") case = CaseExporter(logger) case.start_datetime = datetime.now() @@ -125,7 +126,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): # Setup the export type export_types = [EXPORT_TYPES.TEXT] if arguments.export and len(export_types) > 0: - export_types = map(str.upper, arguments.export) + export_types = list(map(str.upper, arguments.export)) # Setup the strict format checking strict_format_checking = True @@ -145,7 +146,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): output_directory = None if arguments.directory: if not exists(arguments.directory): - raise SqliteError("Unable to find output directory: {}.".format(arguments.directory)) + raise SqliteError(f"Unable to find output directory: {args.directory}.") output_directory = arguments.directory # Determine if there are sub-paths being configured for exports if export_sub_paths: @@ -156,23 +157,23 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): else: raise IOError("Unable to create the new sub-directory: {}", join(output_directory, subpath)) - logger.debug("Determined export type to be {} with file prefix: {} and output directory: {}" - .format(', '.join(export_types), file_prefix, output_directory)) + logger.debug( + f"Determined export type to be {export_types} with file prefix: {file_prefix} and output directory: {output_directory}") # Obtain the SQLite file if not exists(sqlite_file_path): - raise SqliteError("Unable to find SQLite file: {}.".format(sqlite_file_path)) + raise SqliteError(f"Unable to find SQLite file: {sqlite_file_path}.") """ - + If the file is a zero length file, we set a flag indicating it and check to make sure there are no associated wal or journal files before just exiting out stating that the file was empty. If a (non-zero length) wal or journal file is found, an exception will be thrown. However, if the no-journal option is specified, the journal files will not be checked, and the program will exit. - + Note: It is currently believed that there cannot be a zero length SQLite database file with a wal or journal file. That is why an exception is thrown here but needs to be investigated to make sure. - + """ # See if the SQLite file is zero-length @@ -186,12 +187,12 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if not arguments.no_journal: if arguments.wal: if not exists(arguments.wal): - raise SqliteError("Unable to find wal file: {}.".format(arguments.wal)) + raise SqliteError(f"Unable to find wal file: {arguments.wal}.") wal_file_name = arguments.wal elif arguments.rollback_journal: if not exists(arguments.rollback_journal): - raise SqliteError("Unable to find rollback journal file: {}.".format(arguments.rollback_journal)) - rollback_journal_file_name = arguments.rollback_journal + raise SqliteError(f"Unable to find rollback journal file: {arguments.rollback_journal}.") + rollback_journal_file_name = args.rollback_journal else: if exists(sqlite_file_path + WAL_FILE_POSTFIX): wal_file_name = sqlite_file_path + WAL_FILE_POSTFIX @@ -221,64 +222,65 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if wal_file_name and not zero_length_wal_file: """ - + Here we throw an exception if we find a wal file with content with no content in the original SQLite file. It is not certain this use case can occur and investigation needs to be done to make certain. There have been scenarios where there will be a database header with no schema or content in a database file with a WAL file that has all the schema entries and content but this is handled differently. - + """ - raise SqliteError( - "Found a zero length SQLite file with a wal file: {}. Unable to parse.".format(arguments.wal)) + raise SqliteError(f"Found a zero length SQLite file with a wal file: {arguments.wal}. Unable to parse.") elif zero_length_wal_file: - print("File: {} with wal file: {} has no content. Nothing to parse." - .format(sqlite_file_path, wal_file_name)) + logger.error(f"File: {sqlite_file_path} with wal file: {wal_file_name} has no content. Nothing to parse.") exit(0) elif rollback_journal_file_name and not zero_length_rollback_journal_file: """ - + Here we will only have a rollback journal file. Currently, since we need to have the database file to parse signatures from, we cannot solely carve on the journal file alone. - + """ - raise SqliteError("Found a zero length SQLite file with a rollback journal file: {}. Unable to parse." - .format(arguments.rollback_journal)) + raise SqliteError( + f"Found a zero length SQLite file with a rollback journal file: {arguments.rollback_journal}. " + f"Unable to parse.") elif zero_length_rollback_journal_file: - print("File: {} with rollback journal file: {} has no content. Nothing to parse." - .format(sqlite_file_path, rollback_journal_file_name)) + logger.error( + f"File: {sqlite_file_path} with rollback journal file: {rollback_journal_file_name} has no content. " + f"Nothing to parse.") exit(0) else: - print("File: {} has no content. Nothing to parse.".format(sqlite_file_path)) + logger.error("File: {} has no content. Nothing to parse.".format(sqlite_file_path)) exit(0) # Make sure that both of the journal files are not found if rollback_journal_file_name and wal_file_name: """ - + Since the arguments have you specify the journal file in a way that you can only set the wal or rollback journal file name, this case can only occur from finding both of the files on the file system for both wal and rollback journal when there is no journal options specified. Since the SQLite database cannot be set to use both wal and journal files in the same running, we determine this to be an error and throw and exception up. - + There may be a case where the mode was changed at some point and there is a single SQLite file with one or more journal files in combination of rollback journal and WAL files. More research would have to take place in this scenario and also take into the account of this actually occurring since in most cases it is set statically by the application SQLite database owner. - + """ - raise SqliteError("Found both a rollback journal: {} and wal file: {}. Only one journal file should exist. " - "Unable to parse.".format(arguments.rollback_journal, arguments.wal)) + raise SqliteError( + f"Found both a rollback journal: {arguments.rollback_journal} and wal file: {arguments.wal}. " + f"Only one journal file should exist. Unable to parse.") # Print a message parsing is starting and log the start time for reporting at the end on amount of time to run - print("\nParsing: {}...".format(sqlite_file_path)) + logger.info(f"\nParsing: {sqlite_file_path}...") start_time = time() # Create the database and wal/rollback journal file (if existent) @@ -298,20 +300,21 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): # Check if the header info was asked for if arguments.header: # Print the header info of the database - print("\nDatabase header information:\n{}".format(database.database_header.stringify(padding="\t"))) - print("Continuing to parse...") + str_header = database.database_header.stringify(padding="\t") + logger.debug(f"\nDatabase header information:\n{str_header}") + logger.debug("Continuing to parse...") # Check if the master schema was asked for if arguments.schema: # print the master schema of the database - print("\nDatabase Master Schema:\n{}".format(stringify_master_schema_version(database))) - print("Continuing to parse...") + logger.debug(f"\nDatabase Master Schema:\n{stringify_master_schema_version(database)}") + logger.debug("Continuing to parse...") # Check if the schema history was asked for if arguments.schema_history: # print the master schema version history - print("\nVersion History of Master Schemas:\n{}".format(stringify_master_schema_versions(version_history))) - print("Continuing to parse...") + logger.debug(f"\nVersion History of Master Schemas:\n{stringify_master_schema_versions(version_history)}") + logger.debug("Continuing to parse...") # Get the signature options print_signatures = arguments.signatures @@ -324,7 +327,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if not carve and carve_freelists: log_message = "The carve option was not set but the carve_freelists option was. Disabling carve_freelists. " \ "Please specify the carve option to enable." - logger.warn(log_message) + logger.warning(log_message) warn(log_message, RuntimeWarning) # Specific tables to be carved @@ -335,8 +338,8 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if rollback_journal_exempted_tables and specified_tables_to_carve: for table in rollback_journal_exempted_tables: if table in specified_tables_to_carve: - print("Table: {} found in both exempted and specified tables. Please update the arguments correctly." - .format(table)) + logger.error(f"Table: {table} found in both exempted and specified tables. Please update the " + f"arguments correctly.") exit(0) # See if we need to generate signatures @@ -362,7 +365,7 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): Note: This is not allowing "without rowid" or virtual tables until further testing is done. (Virtual tables tend to have a root page number of 0 with no data stored in the main table. Further investigation is needed.) - + Note: Table internal schema objects will not be accounted for. These are tables that start with "sqlite_" and are used for internal use to SQLite itself. These have never known to produce any forensic pertinent data. @@ -372,25 +375,25 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): if isinstance(master_schema_entry, OrdinaryTableRow): if master_schema_entry.without_row_id: - log_message = "A `without row_id` table was found: {} and will not have a signature generated " \ - "for carving since it is not supported yet.".format(master_schema_entry.table_name) + log_message = f"A `without row_id` table was found: {master_schema_entry.table_name} and will not" \ + " have a signature generated for carving since it is not supported yet." logger.info(log_message) continue if master_schema_entry.internal_schema_object: - log_message = "A `internal schema` table was found: {} and will not have a signature generated " \ - "for carving since it is not supported yet.".format(master_schema_entry.table_name) + log_message = f"A `internal schema` table was found: {master_schema_entry.table_name} and will " \ + f"not have a signature generated for carving since it is not supported yet." logger.info(log_message) continue signatures[master_schema_entry.name] = Signature(version_history, master_schema_entry) if print_signatures: - print("\nSignature:\n{}".format(signatures[master_schema_entry.name] - .stringify("\t", False, False, False))) + printable_signature = signatures[master_schema_entry.name].stringify("\t", False, False, False) + logger.debug(f"\nSignature:\n{printable_signature}") """ - + Note: Master schema entries (schema) are all pulled from the base version (the SQLite database file). Currently, the master schema entries are taken from the base version. Even though schema additions are handled in the WAL file for existing tables, tables added in the WAL have not been accounted for yet. @@ -459,32 +462,35 @@ def main(arguments, sqlite_file_path, export_sub_paths=False): # The export type was not found (this should not occur due to the checking of argparse) if not exported: - raise SqliteError("Invalid option for export type: {}.".format(', '.join(export_types))) + raise SqliteError(f"Invalid option for export type: {(', '.join(export_types))}.") # Carve the rollback journal if found and carving is not specified if rollback_journal_file and not carve: - print("Rollback journal file found: {}. Rollback journal file parsing is under development and " - "currently only supports carving. Please rerun with the --carve option for this output.") + logger.warning(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file parsing is under " + f"development and currently only supports carving. Please rerun with the --carve option for this" + f" output.") # Carve the rollback journal if found and carving is specified if rollback_journal_file and carve: if not output_directory: - print("Rollback journal file found: {}. Rollback journal file carving is under development and " - "currently only outputs to CSV. Due to this, the output directory needs to be specified. Please" - "rerun with a output directory specified in order for this to complete.") + logger.error(f"Rollback journal file found: {rollback_journal_file}. Rollback journal file carving is " + f"under development and currently only outputs to CSV. Due to this, the output directory " + f"needs to be specified. Please rerun with a output directory specified in order for this to " + f"complete.") else: - print("Carving rollback journal file: {}. Rollback journal file carving is under development and " - "currently only outputs to CSV. Any export type specified will be overridden for this.") + logger.error(f"Carving rollback journal file: {rollback_journal_file}. Rollback journal file carving is " + f"under development and currently only outputs to CSV. Any export type specified will be " + f"overridden for this.") carve_rollback_journal(output_directory, rollback_journal_file, rollback_journal_file_name, specified_tables_to_carve, rollback_journal_exempted_tables, version_history, signatures, logger) - print("Finished in {} seconds.".format(round(time() - start_time, 2))) + logger.info(f"Finished in {round(time() - start_time, 2)} seconds.") def print_text(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, @@ -501,8 +507,8 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ text_file_name = file_prefix + file_postfix # Export all index and table histories to a text file while supplying signature to carve with - print("\nExporting history as text to {}{}{}...".format(output_directory, sep, text_file_name)) - logger.debug("Exporting history as text to {}{}{}.".format(output_directory, sep, text_file_name)) + print(f"\nExporting history as text to {output_directory}{sep}{text_file_name}...") + logger.debug(f"Exporting history as text to {output_directory}{sep}{text_file_name}.") with CommitTextExporter(output_directory, text_file_name) as commit_text_exporter: @@ -523,10 +529,10 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will " + f"not be carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, @@ -547,7 +553,7 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ else: # Export all index and table histories to csv files while supplying signature to carve with - logger.debug("Exporting history to {} as text.".format("console")) + logger.debug("Exporting history to console as text.") for master_schema_entry in version_history.versions[BASE_VERSION_NUMBER].master_schema.master_schema_entries: @@ -564,10 +570,10 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + print(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will not " + f"be carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, @@ -589,8 +595,7 @@ def print_text(output_directory, file_prefix, carve, carve_freelists, specified_ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_tables_to_carve, version_history, signatures, logger): # Export all index and table histories to csv files while supplying signature to carve with - print("\nExporting history as CSV to {}...".format(output_directory)) - logger.debug("Exporting history to {} as CSV.".format(output_directory)) + logger.info(f"Exporting history to {output_directory} as CSV.") commit_csv_exporter = CommitCsvExporter(output_directory, file_prefix) @@ -609,10 +614,8 @@ def print_csv(output_directory, file_prefix, carve, carve_freelists, specified_t if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) - logger.error("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) + logger.error(f"Unable to find signature for: {master_schema_entry.name}. This table will not be " + f"carved.") if signature: version_history_parser = VersionHistoryParser(version_history, master_schema_entry, None, None, @@ -632,7 +635,6 @@ def print_sqlite(output_directory, file_prefix, carve, carve_freelists, file_postfix = "-sqlite-dissect.db3" sqlite_file_name = file_prefix + file_postfix - print("\nExporting history as SQLite to {}{}{}...".format(output_directory, sep, sqlite_file_name)) logger.debug("Exporting history as SQLite to {}{}{}.".format(output_directory, sep, sqlite_file_name)) with CommitSqliteExporter(output_directory, sqlite_file_name) as commit_sqlite_exporter: @@ -652,8 +654,6 @@ def print_sqlite(output_directory, file_prefix, carve, carve_freelists, if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved." .format(master_schema_entry.name)) @@ -675,7 +675,6 @@ def print_xlsx(output_directory, file_prefix, carve, carve_freelists, specified_ xlsx_file_name = file_prefix + file_postfix # Export all index and table histories to a xlsx workbook while supplying signature to carve with - print("\nExporting history as XLSX to {}{}{}...".format(output_directory, sep, xlsx_file_name)) logger.debug("Exporting history as XLSX to {}{}{}.".format(output_directory, sep, xlsx_file_name)) with CommitXlsxExporter(output_directory, xlsx_file_name) as commit_xlsx_exporter: @@ -695,8 +694,6 @@ def print_xlsx(output_directory, file_prefix, carve, carve_freelists, specified_ if not signature and master_schema_entry.row_type is MASTER_SCHEMA_ROW_TYPE.TABLE \ and not master_schema_entry.without_row_id \ and not master_schema_entry.internal_schema_object: - print("Unable to find signature for: {}. This table will not be carved." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved." .format(master_schema_entry.name)) @@ -727,7 +724,6 @@ def carve_rollback_journal(output_directory, rollback_journal_file, rollback_jou """ csv_prefix_rollback_journal_file_name = basename(normpath(rollback_journal_file_name)) - print("Exporting rollback journal carvings as CSV to {}...".format(output_directory)) logger.debug("Exporting rollback journal carvings as csv to output directory: {}.".format(output_directory)) commit_csv_exporter = CommitCsvExporter(output_directory, csv_prefix_rollback_journal_file_name) @@ -767,21 +763,15 @@ def carve_rollback_journal(output_directory, rollback_journal_file, rollback_jou commit_csv_exporter.write_commit(master_schema_entry, commit) else: - print("Unable to find signature for: {}. This table will not be carved from the rollback journal." - .format(master_schema_entry.name)) logger.error("Unable to find signature for: {}. This table will not be carved from the " "rollback journal.".format(master_schema_entry.name)) def cli(): - """ - Serves as a primary CLI entrypoint to parse the arguments from the CLI and call the main() function to parse the - arguments and process the SQLite files into the specified outputs. - """ # Determine if a directory has been passed instead of a file, in which case, find all args = parse_args() if args.sqlite_path is not None: - sqlite_files = get_sqlite_files(args.sqlite_path) + sqlite_files = get_sqlite_files(abspath(args.sqlite_path)) # Ensure there is at least one SQLite file if len(sqlite_files) > 0: for sqlite_file in sqlite_files: From 7c2323630efda5bf6fdff4023d4bf89faf705292 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 26 Apr 2022 08:52:14 -0400 Subject: [PATCH 24/35] Clean up output error handling and try-decode the fields --- sqlite_dissect/output.py | 3 +++ sqlite_dissect/tests/nist_assertions.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sqlite_dissect/output.py b/sqlite_dissect/output.py index f83ddb7..bbaef35 100644 --- a/sqlite_dissect/output.py +++ b/sqlite_dissect/output.py @@ -163,6 +163,9 @@ def stringify_cell_record(cell, database_text_encoding, page_type): column_values.append(str(value)) else: column_values.append("NULL") + + # Convert any binary strings into regular strings else it can't be joined properly + column_values = [decode_str(s) for s in column_values] content = "(" + ", ".join(column_values) + ")" return content diff --git a/sqlite_dissect/tests/nist_assertions.py b/sqlite_dissect/tests/nist_assertions.py index 6023ccc..7e8fafa 100644 --- a/sqlite_dissect/tests/nist_assertions.py +++ b/sqlite_dissect/tests/nist_assertions.py @@ -39,9 +39,9 @@ def assert_file_exists(file_path): # SFT-CA-03 def assert_correct_page_size(reported_size, correct_size): - assert reported_size == correct_size, "The program reports an incorrect page size!\nCorrect page size: %d\n" \ - "Reported page size: %d" % (correct_size, reported_size) - + assert reported_size == correct_size, "The program reports an incorrect page size!\n" \ + f"Correct page size: {correct_size}\n" \ + f"Reported page size: {reported_size}" # SFT-CA-04 # SFT-CA-05 From b4a2e0bea8a0924b696d373ac64e7a56740e34a2 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 26 Apr 2022 08:58:29 -0400 Subject: [PATCH 25/35] Fix conflicting linting --- sqlite_dissect/entrypoint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlite_dissect/entrypoint.py b/sqlite_dissect/entrypoint.py index aa44f03..050ac66 100644 --- a/sqlite_dissect/entrypoint.py +++ b/sqlite_dissect/entrypoint.py @@ -146,7 +146,7 @@ def main(arguments, sqlite_file_path: str, export_sub_paths=False): output_directory = None if arguments.directory: if not exists(arguments.directory): - raise SqliteError(f"Unable to find output directory: {args.directory}.") + raise SqliteError(f"Unable to find output directory: {arguments.directory}.") output_directory = arguments.directory # Determine if there are sub-paths being configured for exports if export_sub_paths: @@ -192,7 +192,7 @@ def main(arguments, sqlite_file_path: str, export_sub_paths=False): elif arguments.rollback_journal: if not exists(arguments.rollback_journal): raise SqliteError(f"Unable to find rollback journal file: {arguments.rollback_journal}.") - rollback_journal_file_name = args.rollback_journal + rollback_journal_file_name = arguments.rollback_journal else: if exists(sqlite_file_path + WAL_FILE_POSTFIX): wal_file_name = sqlite_file_path + WAL_FILE_POSTFIX From 482f47d5d1e9e98e6bfca426b3df6411680ef966 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 26 Apr 2022 09:12:38 -0400 Subject: [PATCH 26/35] Clean up import statements --- sqlite_dissect/carving/carver.py | 8 ++------ sqlite_dissect/entrypoint.py | 1 - sqlite_dissect/output.py | 11 +++-------- sqlite_dissect/tests/nist_test.py | 6 +++--- sqlite_dissect/tests/utilities.py | 2 +- sqlite_dissect/utilities.py | 9 ++------- 6 files changed, 11 insertions(+), 26 deletions(-) diff --git a/sqlite_dissect/carving/carver.py b/sqlite_dissect/carving/carver.py index 73d82bf..2a845a4 100644 --- a/sqlite_dissect/carving/carver.py +++ b/sqlite_dissect/carving/carver.py @@ -3,12 +3,8 @@ from warnings import warn from sqlite_dissect.carving.carved_cell import CarvedBTreeCell from sqlite_dissect.carving.utilities import generate_signature_regex -from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER -from sqlite_dissect.constants import CELL_LOCATION -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import TEXT_SIGNATURE_IDENTIFIER -from sqlite_dissect.exception import CarvingError -from sqlite_dissect.exception import CellCarvingError +from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER, CELL_LOCATION, LOGGER_NAME, TEXT_SIGNATURE_IDENTIFIER +from sqlite_dissect.exception import CarvingError, CellCarvingError """ diff --git a/sqlite_dissect/entrypoint.py b/sqlite_dissect/entrypoint.py index 050ac66..9cd484e 100644 --- a/sqlite_dissect/entrypoint.py +++ b/sqlite_dissect/entrypoint.py @@ -1,4 +1,3 @@ -import logging import uuid import warnings from logging import CRITICAL diff --git a/sqlite_dissect/output.py b/sqlite_dissect/output.py index bbaef35..23b6920 100644 --- a/sqlite_dissect/output.py +++ b/sqlite_dissect/output.py @@ -1,13 +1,8 @@ from binascii import hexlify from logging import getLogger -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import PAGE_TYPE -from sqlite_dissect.constants import UTF_8 -from sqlite_dissect.file.database.page import BTreePage -from sqlite_dissect.file.database.page import IndexInteriorPage -from sqlite_dissect.file.database.page import IndexLeafPage -from sqlite_dissect.file.database.page import TableInteriorPage -from sqlite_dissect.file.database.page import TableLeafPage +from sqlite_dissect.constants import LOGGER_NAME, PAGE_TYPE, UTF_8 +from sqlite_dissect.file.database.page import BTreePage, IndexInteriorPage, IndexLeafPage, TableInteriorPage, \ + TableLeafPage from sqlite_dissect.exception import OutputError from sqlite_dissect.utilities import has_content, decode_str diff --git a/sqlite_dissect/tests/nist_test.py b/sqlite_dissect/tests/nist_test.py index 3630011..501ceb9 100644 --- a/sqlite_dissect/tests/nist_test.py +++ b/sqlite_dissect/tests/nist_test.py @@ -1,10 +1,10 @@ import sqlite3 -import sqlite_dissect.tests.nist_assertions -from hashlib import md5 -from sqlite_dissect.entrypoint import main import io import sys import pytest + +from hashlib import md5 +from sqlite_dissect.entrypoint import main from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.tests import nist_assertions from sqlite_dissect.tests.utilities import db_file, parse_csv diff --git a/sqlite_dissect/tests/utilities.py b/sqlite_dissect/tests/utilities.py index e65e0fc..df25498 100644 --- a/sqlite_dissect/tests/utilities.py +++ b/sqlite_dissect/tests/utilities.py @@ -2,8 +2,8 @@ import sqlite3 import random import string -from collections import OrderedDict import uuid +from collections import OrderedDict default_columns = OrderedDict( diff --git a/sqlite_dissect/utilities.py b/sqlite_dissect/utilities.py index 353e53b..f287ae7 100644 --- a/sqlite_dissect/utilities.py +++ b/sqlite_dissect/utilities.py @@ -4,16 +4,11 @@ from hashlib import md5 from logging import getLogger from re import compile -from struct import pack -from struct import unpack +from struct import pack, unpack from os import walk, makedirs, path from os.path import exists, isdir, join from sqlite_dissect.constants import ALL_ZEROS_REGEX, SQLITE_DATABASE_HEADER_LENGTH, MAGIC_HEADER_STRING, \ - MAGIC_HEADER_STRING_ENCODING, SQLITE_FILE_EXTENSIONS -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import OVERFLOW_HEADER_LENGTH -from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER -from sqlite_dissect.constants import TEXT_SIGNATURE_IDENTIFIER + SQLITE_FILE_EXTENSIONS, LOGGER_NAME, OVERFLOW_HEADER_LENGTH, BLOB_SIGNATURE_IDENTIFIER, TEXT_SIGNATURE_IDENTIFIER from sqlite_dissect.exception import InvalidVarIntError from sqlite_dissect._version import __version__ from configargparse import ArgParser From 7aad721883f7742e7c64a3c65aa4e9afbbfcb83b Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 26 Apr 2022 13:01:23 -0400 Subject: [PATCH 27/35] Switch stdio redirects --- sqlite_dissect/entrypoint.py | 18 +-- sqlite_dissect/tests/nist_test.py | 184 +++++++++++++++--------------- 2 files changed, 99 insertions(+), 103 deletions(-) diff --git a/sqlite_dissect/entrypoint.py b/sqlite_dissect/entrypoint.py index 9cd484e..7fa6488 100644 --- a/sqlite_dissect/entrypoint.py +++ b/sqlite_dissect/entrypoint.py @@ -1,19 +1,8 @@ import uuid import warnings -from logging import CRITICAL -from logging import DEBUG -from logging import ERROR -from logging import INFO -from logging import WARNING -from logging import basicConfig -from logging import getLogger +from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING, basicConfig, getLogger from os import path -from os.path import basename, abspath -from os.path import join -from os.path import exists -from os.path import getsize -from os.path import normpath -from os.path import sep +from os.path import basename, abspath, join, exists, getsize, normpath, sep from time import time from warnings import warn from sqlite_dissect.carving.rollback_journal_carver import RollBackJournalCarver @@ -301,6 +290,9 @@ def main(arguments, sqlite_file_path: str, export_sub_paths=False): # Print the header info of the database str_header = database.database_header.stringify(padding="\t") logger.debug(f"\nDatabase header information:\n{str_header}") + # Print to stdout if the "text" output option was selected + if not export_types or EXPORT_TYPES.TEXT in export_types: + print(f"\nDatabase header information:\n{str_header}") logger.debug("Continuing to parse...") # Check if the master schema was asked for diff --git a/sqlite_dissect/tests/nist_test.py b/sqlite_dissect/tests/nist_test.py index 501ceb9..02b6d64 100644 --- a/sqlite_dissect/tests/nist_test.py +++ b/sqlite_dissect/tests/nist_test.py @@ -3,7 +3,9 @@ import sys import pytest +from contextlib import redirect_stdout from hashlib import md5 +from io import StringIO from sqlite_dissect.entrypoint import main from sqlite_dissect.constants import FILE_TYPE from sqlite_dissect.tests import nist_assertions @@ -31,43 +33,45 @@ def test_header_reporting(db_file): args = parse_args([db_filepath, '--header']) sqlite_files = get_sqlite_files(args.sqlite_path) - main(args, sqlite_files[0], len(sqlite_files) > 1) - reported_page_size = None - reported_journal_mode_read = None - reported_journal_mode_write = None - reported_num_pages = None - reported_encoding = None - for line in sys.stdout.read().splitlines(): - line = str(line) - if "FILE FORMAT WRITE VERSION" in line.upper(): - reported_journal_mode_write = line.split(': ')[1].strip() - elif "FILE FORMAT READ VERSION" in line.upper(): - reported_journal_mode_read = line.split(': ')[1].strip() - elif "PAGE SIZE" in line.upper(): - reported_page_size = int(line.split(': ')[1].strip()) - elif "DATABASE SIZE IN PAGES" in line.upper(): - reported_num_pages = int(line.split(': ')[1].strip()) - elif "DATABASE TEXT ENCODING" in line.upper(): - reported_encoding = line.split(': ')[1].strip() - - actual_database = sqlite3.connect(db_filepath) - db_cursor = actual_database.cursor() - - actual_page_size = fetch_pragma(db_cursor, 'page_size') - actual_journal_mode = fetch_pragma(db_cursor, 'journal_mode') - actual_num_pages = fetch_pragma(db_cursor, 'page_count') - actual_encoding = fetch_pragma(db_cursor, 'encoding') - - hash_after_parsing = get_md5_hash(db_filepath) - - nist_assertions.assert_md5_equals(hash_before_parsing, hash_after_parsing, db_file[0].name) - nist_assertions.assert_file_exists(db_filepath) - nist_assertions.assert_correct_page_size(reported_page_size, actual_page_size) - nist_assertions.assert_correct_journal_mode(reported_journal_mode_read, actual_journal_mode, 'r') - nist_assertions.assert_correct_journal_mode(reported_journal_mode_write, actual_journal_mode, 'w') - nist_assertions.assert_correct_num_pages(reported_num_pages, actual_num_pages) - nist_assertions.assert_correct_encoding(reported_encoding, actual_encoding) + with redirect_stdout(io.StringIO()) as output: + main(args, sqlite_files[0], len(sqlite_files) > 1) + + reported_page_size = None + reported_journal_mode_read = None + reported_journal_mode_write = None + reported_num_pages = None + reported_encoding = None + + for line in output.getvalue().splitlines(): + if "FILE FORMAT WRITE VERSION" in line.upper(): + reported_journal_mode_write = line.split(': ')[1].strip() + elif "FILE FORMAT READ VERSION" in line.upper(): + reported_journal_mode_read = line.split(': ')[1].strip() + elif "PAGE SIZE" in line.upper(): + reported_page_size = int(line.split(': ')[1].strip()) + elif "DATABASE SIZE IN PAGES" in line.upper(): + reported_num_pages = int(line.split(': ')[1].strip()) + elif "DATABASE TEXT ENCODING" in line.upper(): + reported_encoding = line.split(': ')[1].strip() + + actual_database = sqlite3.connect(db_filepath) + db_cursor = actual_database.cursor() + + actual_page_size = fetch_pragma(db_cursor, 'page_size') + actual_journal_mode = fetch_pragma(db_cursor, 'journal_mode') + actual_num_pages = fetch_pragma(db_cursor, 'page_count') + actual_encoding = fetch_pragma(db_cursor, 'encoding') + + hash_after_parsing = get_md5_hash(db_filepath) + + nist_assertions.assert_md5_equals(hash_before_parsing, hash_after_parsing, db_file[0].name) + nist_assertions.assert_file_exists(db_filepath) + nist_assertions.assert_correct_page_size(reported_page_size, actual_page_size) + nist_assertions.assert_correct_journal_mode(reported_journal_mode_read, actual_journal_mode, 'r') + nist_assertions.assert_correct_journal_mode(reported_journal_mode_write, actual_journal_mode, 'w') + nist_assertions.assert_correct_num_pages(reported_num_pages, actual_num_pages) + nist_assertions.assert_correct_encoding(reported_encoding, actual_encoding) # SFT-02 @@ -80,60 +84,60 @@ def test_schema_reporting(db_file): args = parse_args([db_filepath]) sqlite_files = get_sqlite_files(args.sqlite_path) - main(args, str(sqlite_files[0]), len(sqlite_files) > 1) - - reported_tables = [] - reported_columns = {} - reported_num_rows = {} - current_table = None - row_count = 0 - for line in sys.stdout.read().splitlines(): - line = str(line) - if "Master schema entry: " in line and "row type: table" in line: - current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() - reported_tables.append(current_table) - reported_columns[current_table] = [] - - create_statement = line[line.find("sql: "):].split(': ')[1].strip() - columns = create_statement[create_statement.find("(") + 1:create_statement.find(")")].split(',') - for column in columns: - reported_columns[current_table].append(column.strip().split()[0]) - - elif "File Type: " in line and current_table: - row_count += 1 - - elif line == '-' * 15: - reported_num_rows[current_table] = row_count - current_table = None - row_count = 0 - - actual_database = sqlite3.connect(db_filepath) - db_cursor = actual_database.cursor() - db_cursor.execute("SELECT tbl_name, sql FROM sqlite_master WHERE type='table'") - - actual_tables = [] - actual_columns = {} - actual_num_rows = {} - for table in db_cursor.fetchall(): - actual_tables.append(table[0]) - actual_columns[table[0]] = [] - - columns = table[1][table[1].find("(")+1:table[1].find(")")] - for column in columns.split(","): - actual_columns[table[0]].append(column.strip().split()[0]) - - db_cursor.execute("SELECT COUNT(*) FROM %s" % table[0]) - actual_num_rows[table[0]] = int(db_cursor.fetchone()[0]) - - hash_after_parsing = get_md5_hash(db_filepath) - - nist_assertions.assert_md5_equals(hash_before_parsing, hash_after_parsing, db_file[0].name) - nist_assertions.assert_file_exists(db_filepath) - nist_assertions.assert_correct_tables(reported_tables, actual_tables) - - for table in reported_columns: - nist_assertions.assert_correct_columns(reported_columns[table], actual_columns[table], table) - nist_assertions.assert_correct_num_pages(reported_num_rows[table], actual_num_rows[table]) + with redirect_stdout(io.StringIO()) as output: + main(args, str(sqlite_files[0]), len(sqlite_files) > 1) + + reported_tables = [] + reported_columns = {} + reported_num_rows = {} + current_table = None + row_count = 0 + for line in output.getvalue().splitlines(): + if "Master schema entry: " in line and "row type: table" in line: + current_table = line[line.find("Master schema entry: "):line.find("row type: ")].split(': ')[1].strip() + reported_tables.append(current_table) + reported_columns[current_table] = [] + + create_statement = line[line.find("sql: "):].split(': ')[1].strip() + columns = create_statement[create_statement.find("(") + 1:create_statement.find(")")].split(',') + for column in columns: + reported_columns[current_table].append(column.strip().split()[0]) + + elif "File Type: " in line and current_table: + row_count += 1 + + elif line == '-' * 15: + reported_num_rows[current_table] = row_count + current_table = None + row_count = 0 + + actual_database = sqlite3.connect(db_filepath) + db_cursor = actual_database.cursor() + db_cursor.execute("SELECT tbl_name, sql FROM sqlite_master WHERE type='table'") + + actual_tables = [] + actual_columns = {} + actual_num_rows = {} + for table in db_cursor.fetchall(): + actual_tables.append(table[0]) + actual_columns[table[0]] = [] + + columns = table[1][table[1].find("(")+1:table[1].find(")")] + for column in columns.split(","): + actual_columns[table[0]].append(column.strip().split()[0]) + + db_cursor.execute("SELECT COUNT(*) FROM %s" % table[0]) + actual_num_rows[table[0]] = int(db_cursor.fetchone()[0]) + + hash_after_parsing = get_md5_hash(db_filepath) + + nist_assertions.assert_md5_equals(hash_before_parsing, hash_after_parsing, db_file[0].name) + nist_assertions.assert_file_exists(db_filepath) + nist_assertions.assert_correct_tables(reported_tables, actual_tables) + + for table in reported_columns: + nist_assertions.assert_correct_columns(reported_columns[table], actual_columns[table], table) + nist_assertions.assert_correct_num_pages(reported_num_rows[table], actual_num_rows[table]) # SFT-03 From ff474ddc02e213aa4bfb2dd74fbceb0075cbddd0 Mon Sep 17 00:00:00 2001 From: kchason Date: Tue, 26 Apr 2022 15:53:54 -0400 Subject: [PATCH 28/35] Bump docs version --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 582f618..cc063e1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Department of Defense Cyber Crime Center (DC3)' # The full version, including alpha/beta/rc tags -release = '0.0.6' +release = '1.0.0' # -- General configuration --------------------------------------------------- master_doc = 'index' From 5b26e0bc49b532f6b0f8b13e8e49e429ac81f019 Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 11 May 2022 21:35:21 -0400 Subject: [PATCH 29/35] Update strings for tests --- sqlite_dissect/tests/utilities.py | 68 ++++++++++++++++++------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/sqlite_dissect/tests/utilities.py b/sqlite_dissect/tests/utilities.py index e82fd0c..047cd30 100644 --- a/sqlite_dissect/tests/utilities.py +++ b/sqlite_dissect/tests/utilities.py @@ -1,6 +1,5 @@ import pytest import hashlib -import os import sqlite3 import random import string @@ -8,15 +7,19 @@ import uuid from collections import OrderedDict -def strip_one(string, pattern): - return re.sub(pattern + '$', "", re.sub('^' + pattern, "", string)) -def find_breakpoints(input_string, quote_chars = ["'", '"'], delim = ','): +def strip_one(s: str, pattern: str): + return re.sub(pattern + '$', "", re.sub('^' + pattern, "", s)) + + +def find_breakpoints(input_string, quote_chars=None, delim=','): + if quote_chars is None: + quote_chars = ["'", '"'] + breakpoints = [] in_quotes = None is_encapsulated = False - last_char = None for index, character in enumerate(input_string): if in_quotes: if character == in_quotes: @@ -38,19 +41,21 @@ def find_breakpoints(input_string, quote_chars = ["'", '"'], delim = ','): return breakpoints + def parse_rows(row_string): commas = find_breakpoints(row_string) row_dict = {} - row_list = [row_string[i:j].strip() for i,j in zip([0] + [index + 1 for index in commas], commas + [None])] + row_list = [row_string[i:j].strip() for i, j in zip([0] + [index + 1 for index in commas], commas + [None])] for row in row_list: spaces = find_breakpoints(row, delim=' ') - row_dict[strip_one(row[ : spaces[0]], '[\'"]').lstrip('[ ').rstrip('] ')] = row[spaces[0] : ].strip() + row_dict[strip_one(row[: spaces[0]], '[\'"]').lstrip('[ ').rstrip('] ')] = row[spaces[0]:].strip() return row_dict -def get_index_of_closing_parenthesis(string, opening_parenthesis_offset=0): + +def get_index_of_closing_parenthesis(s: str, opening_parenthesis_offset=0): in_quotes = None in_block_comment = False in_line_comment = False @@ -61,11 +66,11 @@ def get_index_of_closing_parenthesis(string, opening_parenthesis_offset=0): line_comment_chars = '--' line_comment_term = '\n' - for index, character in enumerate(string[opening_parenthesis_offset : ]): + for index, character in enumerate(s[opening_parenthesis_offset:]): if in_quotes and character == in_quotes: in_quotes = None - elif in_block_comment and character == block_comment_term[0] and string[index : index + 2] == block_comment_term: + elif in_block_comment and character == block_comment_term[0] and s[index: index + 2] == block_comment_term: in_block_comment = False elif in_line_comment and character == line_comment_term: @@ -75,10 +80,10 @@ def get_index_of_closing_parenthesis(string, opening_parenthesis_offset=0): if character in quote_chars: in_quotes = character - elif character == block_comment_chars[0] and string[index : index + 2] == block_comment_chars: + elif character == block_comment_chars[0] and s[index: index + 2] == block_comment_chars: in_block_comment = True - elif character == line_comment_chars[0] and string[index : index + 2] == line_comment_chars: + elif character == line_comment_chars[0] and s[index: index + 2] == line_comment_chars: in_line_comment = True elif character == ')': @@ -88,13 +93,16 @@ def get_index_of_closing_parenthesis(string, opening_parenthesis_offset=0): def parse_schema(stdout): tables = {} + next_parenthesis = 0 + closing_parenthesis = 0 + while stdout: # Find the next table entry - stdout = stdout[stdout.find("Type: table") : ] - table_name = stdout[stdout.find("Table Name:") + 11 : stdout.find("SQL:")].strip() + stdout = stdout[stdout.find("Type: table"):] + table_name = stdout[stdout.find("Table Name:") + 11: stdout.find("SQL:")].strip() if table_name: - stdout = stdout[stdout.find("SQL:") + 4 : ] + stdout = stdout[stdout.find("SQL:") + 4:] closing_parenthesis_found = False in_quotes = False @@ -111,26 +119,27 @@ def parse_schema(stdout): index += 1 # Fetches lines with columns in them - schema_statement = stdout[next_parenthesis + 1 : closing_parenthesis].strip() + schema_statement = stdout[next_parenthesis + 1: closing_parenthesis].strip() tables[table_name] = parse_rows(schema_statement) - stdout = stdout[closing_parenthesis + 1 : ] + stdout = stdout[closing_parenthesis + 1:] return tables -def get_md5_hash(string): - return hashlib.md5(string).hexdigest().upper() +def get_md5_hash(s: str): + return hashlib.md5(s).hexdigest().upper() -def replace_bytes(byte_array, replacement, index): + +def replace_bytes(byte_array, replacement, index: int): return byte_array[:index] + replacement + byte_array[index + len(replacement):] -def decode_varint(byte_array, offset=0): +def decode_varint(byte_array, offset: int = 0): unsigned_integer_value = 0 varint_relative_offset = 0 - for x in xrange(1, 10): + for x in range(1, 10): varint_byte = ord(byte_array[offset + varint_relative_offset:offset + varint_relative_offset + 1]) varint_relative_offset += 1 @@ -153,6 +162,7 @@ def decode_varint(byte_array, offset=0): return signed_integer_value, varint_relative_offset + default_columns = OrderedDict( [ ('name', 'TEXT NOT NULL'), @@ -311,7 +321,7 @@ def db_file(request, tmp_path): row_values = [row[1:] for row in generate_rows(request.param['modify'], request.param['columns'])] map(lambda row_values, id_for_mod: row_values.append(id_for_mod), row_values, id_for_mod) for row_id in id_for_mod: - cursor.execute("SELECT * FROM testing WHERE id=?", (row_id, )) + cursor.execute("SELECT * FROM testing WHERE id=?", (row_id,)) modified_rows.append(cursor.fetchone()) update_statement = generate_update_statement(request.param['table_name'], request.param['columns']) @@ -320,7 +330,7 @@ def db_file(request, tmp_path): if request.param['delete'] > 0: for row_id in id_for_del: - cursor.execute("SELECT * FROM testing WHERE id=?", (row_id, )) + cursor.execute("SELECT * FROM testing WHERE id=?", (row_id,)) deleted_rows.append(cursor.fetchone()) cursor.executemany("DELETE FROM testing WHERE id=?", [[row_id] for row_id in id_for_del]) @@ -330,24 +340,24 @@ def db_file(request, tmp_path): db.close() yield db_filepath, modified_rows + deleted_rows + # Parses CSV file returned by sqlite_dissect operations and returns rows found that match the given operations. -def parse_csv(filepath, operations, first_key = 'id'): +def parse_csv(filepath, operations, first_key='id'): accepted_sources = ["ROLLBACK_JOURNAL", "DATABASE", "WAL"] with open(filepath, 'r') as csv_file: key_line = csv_file.readline().strip() commas = find_breakpoints(key_line) - keys = [strip_one(key_line[i:j], "['\"]") for i,j in zip([0] + [index + 1 for index in commas], commas + [None])] + keys = [strip_one(key_line[i:j], "['\"]") for i, j in + zip([0] + [index + 1 for index in commas], commas + [None])] op_index = keys.index("Operation") first_index = keys.index(first_key) rows = [] for line in csv_file: line_list = map(lambda data: data.strip('"'), line.strip().split(',')) - + if line_list[0] in accepted_sources and line_list[op_index] in operations: rows.append(tuple(line_list[first_index:])) return tuple(rows) - - From 0fd2a67d0365c9722ba342576c85b65a175dbd1f Mon Sep 17 00:00:00 2001 From: kchason Date: Wed, 31 Aug 2022 08:53:48 -0400 Subject: [PATCH 30/35] Additional test cleanup --- sqlite_dissect/output.py | 6 +++--- sqlite_dissect/tests/test_headers.py | 4 ++-- sqlite_dissect/tests/test_payload.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sqlite_dissect/output.py b/sqlite_dissect/output.py index 23b6920..9b542c1 100644 --- a/sqlite_dissect/output.py +++ b/sqlite_dissect/output.py @@ -59,17 +59,17 @@ def get_pointer_map_entries_breakdown(version): last_page_number = pointer_map_page.number + 1 last_entry = None for entry in pointer_map_page.pointer_map_entries: - if hexlify(last_type_seen) != hexlify(entry.page_type): + if hexlify(str.encode(last_type_seen)) != hexlify(str.encode(entry.page_type)): pages = entry.page_number - last_page_number breakdown = (pointer_map_page.number, last_page_number, entry.page_number - 1, - pages, hexlify(last_entry.page_type)) + pages, hexlify(str.encode(last_entry.page_type))) pointer_map_entries_breakdown.append(breakdown) last_page_number = entry.page_number last_type_seen = entry.page_type last_entry = entry pages = last_entry.page_number - last_page_number + 1 breakdown = (pointer_map_page.number, last_page_number, last_entry.page_number, - pages, hexlify(last_entry.page_type)) + pages, hexlify(str.encode(last_entry.page_type))) pointer_map_entries_breakdown.append(breakdown) return pointer_map_entries_breakdown diff --git a/sqlite_dissect/tests/test_headers.py b/sqlite_dissect/tests/test_headers.py index 8fd8e37..0e21aba 100644 --- a/sqlite_dissect/tests/test_headers.py +++ b/sqlite_dissect/tests/test_headers.py @@ -313,8 +313,8 @@ def test_journal_header_init(rollback_journal_header_byte_array, expected_value) assert journal_header.header_string == rollback_journal_header_byte_array[0:8] assert journal_header.page_count == ( - ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE \ - if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT.decode("hex") \ + ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE + if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT.decode("hex") else unpack(b">I", rollback_journal_header_byte_array[8:12])[0] ) diff --git a/sqlite_dissect/tests/test_payload.py b/sqlite_dissect/tests/test_payload.py index 554d502..c603485 100644 --- a/sqlite_dissect/tests/test_payload.py +++ b/sqlite_dissect/tests/test_payload.py @@ -53,7 +53,7 @@ def test_record_init(page, payload_offset, payload_byte_size, bytes_on_first_pag current_header_offset = decode_varint(page, payload_offset)[1] num_columns = 0 - serial_type_signature = b"" + serial_type_signature = "" while current_header_offset < decode_varint(page, payload_offset)[0]: serial_type, serial_type_varint_length = decode_varint(total_record_content, current_header_offset) serial_type_signature += str(get_serial_type_signature(serial_type)) From 6fe04e0cc22659f9061ff56607cdcf13bb328e0e Mon Sep 17 00:00:00 2001 From: kchason Date: Thu, 1 Sep 2022 12:23:53 -0400 Subject: [PATCH 31/35] Test updates for python 3 --- sqlite_dissect/carving/utilities.py | 22 +++++++++++----------- sqlite_dissect/tests/output_test.py | 12 ++++++------ sqlite_dissect/tests/test_headers.py | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sqlite_dissect/carving/utilities.py b/sqlite_dissect/carving/utilities.py index 7cc2c7e..98865b1 100644 --- a/sqlite_dissect/carving/utilities.py +++ b/sqlite_dissect/carving/utilities.py @@ -215,7 +215,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): """ - regex = "" + regex = b"" if skip_first_serial_type: signature = signature[1:] @@ -240,9 +240,9 @@ def generate_signature_regex(signature, skip_first_serial_type=False): """ - basic_serial_type_regex = "" - blob_regex = "" - text_regex = "" + basic_serial_type_regex = b"" + blob_regex = b"" + text_regex = b"" for column_serial_type in column_serial_type_array: if column_serial_type == -1: @@ -255,7 +255,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): if blob_regex or text_regex: if basic_serial_type_regex: - basic_serial_type_regex = "[{}]".format(basic_serial_type_regex) + basic_serial_type_regex = b"[%b]" % basic_serial_type_regex if blob_regex and not text_regex: @@ -267,7 +267,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): getLogger(LOGGER_NAME).error(log_message) raise CarvingError(log_message) - regex += "(?:{}|{})".format(basic_serial_type_regex, blob_regex) + regex += b"(?:%b|%b)" % (basic_serial_type_regex, blob_regex) elif not blob_regex and text_regex: @@ -280,15 +280,15 @@ def generate_signature_regex(signature, skip_first_serial_type=False): getLogger(LOGGER_NAME).error(log_message) raise CarvingError(log_message) - regex += "(?:{}|{})".format(basic_serial_type_regex, text_regex) + regex += b"(?:%b|%b)" % (basic_serial_type_regex, text_regex) elif blob_regex and text_regex: - var_length_regex = blob_regex + "|" + text_regex + var_length_regex = blob_regex + b"|" + text_regex if basic_serial_type_regex: - regex += "(?:{}|{})".format(basic_serial_type_regex, var_length_regex) + regex += b"(?:%b|%b)" % (basic_serial_type_regex, var_length_regex) else: - regex += "(?:{})".format(var_length_regex) + regex += b"(?:%b)" % var_length_regex else: log_message = "No appropriate regular expressions were found for basic serial type, blob, or " \ @@ -315,7 +315,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): getLogger(LOGGER_NAME).error(log_message) raise CarvingError(log_message) - regex += "[{}]".format(basic_serial_type_regex) + regex += b"[%b]" % basic_serial_type_regex else: diff --git a/sqlite_dissect/tests/output_test.py b/sqlite_dissect/tests/output_test.py index df1e442..b09f0e8 100644 --- a/sqlite_dissect/tests/output_test.py +++ b/sqlite_dissect/tests/output_test.py @@ -69,10 +69,10 @@ def test_get_pointer_map_entries_breakdown(): MockPointerMapPage([MockPage("LOCK_BYTE", 0), MockPage("FREELIST_TRUNK", 1), MockPage("FREELIST_LEAF", 2)], 0, 0), MockPointerMapPage([MockPage("LOCK_BYTE", 3), MockPage("FREELIST_TRUNK", 4), MockPage("FREELIST_LEAF", 5)], 1, 0) ], 0)) == [ - (0, 1, 0, 0, '4c4f434b5f42595445'), - (0, 1, 1, 1, '465245454c4953545f5452554e4b'), - (0, 2, 2, 1, '465245454c4953545f4c454146'), - (1, 2, 3, 2, '4c4f434b5f42595445'), - (1, 4, 4, 1, '465245454c4953545f5452554e4b'), - (1, 5, 5, 1, '465245454c4953545f4c454146') + (0, 1, 0, 0, b'4c4f434b5f42595445'), + (0, 1, 1, 1, b'465245454c4953545f5452554e4b'), + (0, 2, 2, 1, b'465245454c4953545f4c454146'), + (1, 2, 3, 2, b'4c4f434b5f42595445'), + (1, 4, 4, 1, b'465245454c4953545f5452554e4b'), + (1, 5, 5, 1, b'465245454c4953545f4c454146') ] \ No newline at end of file diff --git a/sqlite_dissect/tests/test_headers.py b/sqlite_dissect/tests/test_headers.py index 0e21aba..4c08c85 100644 --- a/sqlite_dissect/tests/test_headers.py +++ b/sqlite_dissect/tests/test_headers.py @@ -314,7 +314,7 @@ def test_journal_header_init(rollback_journal_header_byte_array, expected_value) assert journal_header.page_count == ( ROLLBACK_JOURNAL_ALL_CONTENT_UNTIL_END_OF_FILE - if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT.decode("hex") + if rollback_journal_header_byte_array[8:12] == ROLLBACK_JOURNAL_HEADER_ALL_CONTENT else unpack(b">I", rollback_journal_header_byte_array[8:12])[0] ) From f8909de146c72c47dec460d02af8b401bb18a08b Mon Sep 17 00:00:00 2001 From: Minoru Kobayashi Date: Mon, 19 Jun 2023 16:24:46 +0900 Subject: [PATCH 32/35] Fixed an import error of collections module --- sqlite_dissect/constants.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sqlite_dissect/constants.py b/sqlite_dissect/constants.py index c01a521..eac1940 100644 --- a/sqlite_dissect/constants.py +++ b/sqlite_dissect/constants.py @@ -1,4 +1,9 @@ -from collections.abc import MutableMapping +# https://docs.python.org/3.9/library/collections.html +try: + from collections.abc import MutableMapping +except ImportError: + from collections import MutableMapping + from logging import getLogger from re import compile from sys import maxunicode From 816f78ef53a9ba457a1c37f6d22f21ed06acb7bd Mon Sep 17 00:00:00 2001 From: Minoru Kobayashi Date: Tue, 20 Jun 2023 14:58:23 +0900 Subject: [PATCH 33/35] Fixed several errors --- sqlite_dissect/carving/carved_cell.py | 35 ++++++++++++--------------- sqlite_dissect/carving/utilities.py | 22 ++++++++--------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/sqlite_dissect/carving/carved_cell.py b/sqlite_dissect/carving/carved_cell.py index 3cbb263..b4f0ae3 100644 --- a/sqlite_dissect/carving/carved_cell.py +++ b/sqlite_dissect/carving/carved_cell.py @@ -1,23 +1,18 @@ from struct import unpack from warnings import warn -from sqlite_dissect.carving.utilities import calculate_body_content_size -from sqlite_dissect.carving.utilities import calculate_serial_type_definition_content_length_min_max -from sqlite_dissect.carving.utilities import decode_varint_in_reverse -from sqlite_dissect.carving.utilities import get_content_size -from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER -from sqlite_dissect.constants import CELL_LOCATION -from sqlite_dissect.constants import FILE_TYPE -from sqlite_dissect.constants import TEXT_SIGNATURE_IDENTIFIER -from sqlite_dissect.exception import CellCarvingError -from sqlite_dissect.exception import InvalidVarIntError + +from sqlite_dissect.carving.utilities import ( + calculate_body_content_size, + calculate_serial_type_definition_content_length_min_max, + decode_varint_in_reverse, get_content_size) +from sqlite_dissect.constants import (BLOB_SIGNATURE_IDENTIFIER, CELL_LOCATION, + FILE_TYPE, TEXT_SIGNATURE_IDENTIFIER) +from sqlite_dissect.exception import CellCarvingError, InvalidVarIntError from sqlite_dissect.file.database.page import BTreeCell -from sqlite_dissect.file.database.payload import Payload -from sqlite_dissect.file.database.payload import RecordColumn -from sqlite_dissect.utilities import decode_varint -from sqlite_dissect.utilities import encode_varint -from sqlite_dissect.utilities import get_md5_hash -from sqlite_dissect.utilities import get_record_content -from sqlite_dissect.utilities import get_serial_type_signature +from sqlite_dissect.file.database.payload import Payload, RecordColumn +from sqlite_dissect.utilities import (decode_varint, encode_varint, + get_md5_hash, get_record_content, + get_serial_type_signature) """ @@ -221,7 +216,7 @@ def __init__(self, location, data, serial_type_definition_start_offset, serial_t self.truncated_beginning = False self.truncated_ending = False - record_column_md5_hash_strings = [""] * self.number_of_columns + record_column_md5_hash_strings = [b""] * self.number_of_columns column_index = 0 body_byte_size = 0 @@ -273,7 +268,7 @@ def __init__(self, location, data, serial_type_definition_start_offset, serial_t self.serial_type_signature += str(get_serial_type_signature(first_serial_type)) - record_column_md5_hash_strings[column_index] = "" + record_column_md5_hash_strings[column_index] = b"" self.serial_type_definition_size += first_serial_type_varint_length @@ -439,7 +434,7 @@ def __init__(self, location, data, serial_type_definition_start_offset, serial_t self.serial_type_signature += str(get_serial_type_signature(first_serial_type)) - record_column_md5_hash_strings[column_index] = "" + record_column_md5_hash_strings[column_index] = b"" self.serial_type_definition_size += first_serial_type_varint_length diff --git a/sqlite_dissect/carving/utilities.py b/sqlite_dissect/carving/utilities.py index 98865b1..4ab2688 100644 --- a/sqlite_dissect/carving/utilities.py +++ b/sqlite_dissect/carving/utilities.py @@ -1,11 +1,9 @@ -from binascii import hexlify -from binascii import unhexlify +from binascii import hexlify, unhexlify from logging import getLogger -from sqlite_dissect.constants import BLOB_SIGNATURE_IDENTIFIER -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import TEXT_SIGNATURE_IDENTIFIER -from sqlite_dissect.exception import CarvingError -from sqlite_dissect.exception import InvalidVarIntError + +from sqlite_dissect.constants import (BLOB_SIGNATURE_IDENTIFIER, LOGGER_NAME, + TEXT_SIGNATURE_IDENTIFIER) +from sqlite_dissect.exception import CarvingError, InvalidVarIntError from sqlite_dissect.utilities import decode_varint """ @@ -178,9 +176,9 @@ def generate_regex_for_simplified_serial_type(simplified_serial_type): """ if simplified_serial_type == -2: - return "(?:[\x0C-\x7F]|[\x80-\xFF]{1,7}[\x00-\x7F])" + return b"(?:[\x0C-\x7F]|[\x80-\xFF]{1,7}[\x00-\x7F])" elif simplified_serial_type == -1: - return "(?:[\x0D-\x7F]|[\x80-\xFF]{1,7}[\x00-\x7F])" + return b"(?:[\x0D-\x7F]|[\x80-\xFF]{1,7}[\x00-\x7F])" elif 0 <= simplified_serial_type <= 9: return unhexlify("0{}".format(simplified_serial_type)) else: @@ -250,7 +248,7 @@ def generate_signature_regex(signature, skip_first_serial_type=False): elif column_serial_type == -2: text_regex = generate_regex_for_simplified_serial_type(column_serial_type) else: - basic_serial_type_regex += generate_regex_for_simplified_serial_type(column_serial_type).decode() + basic_serial_type_regex += generate_regex_for_simplified_serial_type(column_serial_type) if blob_regex or text_regex: @@ -372,11 +370,11 @@ def get_content_size(serial_type): # A BLOB that is (N-12)/2 bytes in length elif serial_type >= 12 and serial_type % 2 == 0: - return (serial_type - 12) / 2 + return (serial_type - 12) // 2 # A string in the database encoding and is (N-13)/2 bytes in length. The null terminator is omitted elif serial_type >= 13 and serial_type % 2 == 1: - return int((serial_type - 13) / 2) + return int((serial_type - 13) // 2) else: log_message = "Invalid serial type: {}." From 51d0904fe0fce00266ea7d405a728bfc6826f566 Mon Sep 17 00:00:00 2001 From: Minoru Kobayashi Date: Mon, 26 Jun 2023 16:30:50 +0900 Subject: [PATCH 34/35] Fixed some errors --- sqlite_dissect/file/database/database.py | 18 ++-- sqlite_dissect/file/database/page.py | 98 ++++++++++------------ sqlite_dissect/file/wal_index/wal_index.py | 10 +-- sqlite_dissect/utilities.py | 23 +++-- 4 files changed, 74 insertions(+), 75 deletions(-) diff --git a/sqlite_dissect/file/database/database.py b/sqlite_dissect/file/database/database.py index c6e4be8..6249a07 100644 --- a/sqlite_dissect/file/database/database.py +++ b/sqlite_dissect/file/database/database.py @@ -1,11 +1,11 @@ from copy import copy from warnings import warn -from sqlite_dissect.constants import BASE_VERSION_NUMBER -from sqlite_dissect.constants import FILE_TYPE -from sqlite_dissect.constants import FIRST_FREELIST_TRUNK_PAGE_INDEX -from sqlite_dissect.constants import FIRST_FREELIST_TRUNK_PARENT_PAGE_NUMBER -from sqlite_dissect.constants import SQLITE_3_7_0_VERSION_NUMBER -from sqlite_dissect.constants import SQLITE_MASTER_SCHEMA_ROOT_PAGE + +from sqlite_dissect.constants import (BASE_VERSION_NUMBER, FILE_TYPE, + FIRST_FREELIST_TRUNK_PAGE_INDEX, + FIRST_FREELIST_TRUNK_PARENT_PAGE_NUMBER, + SQLITE_3_7_0_VERSION_NUMBER, + SQLITE_MASTER_SCHEMA_ROOT_PAGE) from sqlite_dissect.exception import DatabaseParsingError from sqlite_dissect.file.database.page import FreelistTrunkPage from sqlite_dissect.file.database.utilities import create_pointer_map_pages @@ -98,7 +98,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric raise DatabaseParsingError(log_message) # Calculate the number of pages from the file size and page size - self.database_size_in_pages = self.file_handle.file_size / self.page_size + self.database_size_in_pages = self.file_handle.file_size // self.page_size # The database header size in pages is set and the version valid for number does not equal the change counter elif self.database_header.version_valid_for_number != self.database_header.file_change_counter: @@ -111,7 +111,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric """ # Calculate the number of pages from the file size and page size - self.database_size_in_pages = self.file_handle.file_size / self.page_size + self.database_size_in_pages = self.file_handle.file_size // self.page_size log_message = "Database header for version: {} specifies a database size in pages of {} but version " \ "valid for number: {} does not equal the file change counter: {} for sqlite " \ @@ -145,7 +145,7 @@ def __init__(self, file_identifier, store_in_memory=False, file_size=None, stric """ - calculated_size_in_pages = self.file_handle.file_size / self.page_size + calculated_size_in_pages = self.file_handle.file_size // self.page_size if self.database_header.database_size_in_pages != calculated_size_in_pages: diff --git a/sqlite_dissect/file/database/page.py b/sqlite_dissect/file/database/page.py index efad687..d915e62 100644 --- a/sqlite_dissect/file/database/page.py +++ b/sqlite_dissect/file/database/page.py @@ -3,54 +3,46 @@ from logging import getLogger from struct import unpack from warnings import warn -from sqlite_dissect.constants import CELL_LOCATION -from sqlite_dissect.constants import CELL_MODULE -from sqlite_dissect.constants import CELL_POINTER_BYTE_LENGTH -from sqlite_dissect.constants import CELL_SOURCE -from sqlite_dissect.constants import FIRST_OVERFLOW_PAGE_INDEX -from sqlite_dissect.constants import FIRST_OVERFLOW_PAGE_NUMBER_LENGTH -from sqlite_dissect.constants import FIRST_OVERFLOW_PARENT_PAGE_NUMBER -from sqlite_dissect.constants import FREEBLOCK_BYTE_LENGTH -from sqlite_dissect.constants import FREELIST_HEADER_LENGTH -from sqlite_dissect.constants import FREELIST_LEAF_PAGE_NUMBER_LENGTH -from sqlite_dissect.constants import FREELIST_NEXT_TRUNK_PAGE_LENGTH -from sqlite_dissect.constants import INDEX_INTERIOR_CELL_CLASS -from sqlite_dissect.constants import INDEX_INTERIOR_PAGE_HEX_ID -from sqlite_dissect.constants import INDEX_LEAF_CELL_CLASS -from sqlite_dissect.constants import INDEX_LEAF_PAGE_HEX_ID -from sqlite_dissect.constants import INTERIOR_PAGE_HEADER_CLASS -from sqlite_dissect.constants import LEAF_PAGE_HEADER_CLASS -from sqlite_dissect.constants import LEFT_CHILD_POINTER_BYTE_LENGTH -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import MASTER_PAGE_HEX_ID -from sqlite_dissect.constants import NEXT_FREEBLOCK_OFFSET_LENGTH -from sqlite_dissect.constants import OVERFLOW_HEADER_LENGTH -from sqlite_dissect.constants import PAGE_FRAGMENT_LIMIT -from sqlite_dissect.constants import PAGE_HEADER_MODULE -from sqlite_dissect.constants import PAGE_TYPE -from sqlite_dissect.constants import PAGE_TYPE_LENGTH -from sqlite_dissect.constants import POINTER_MAP_B_TREE_NON_ROOT_PAGE_TYPE -from sqlite_dissect.constants import POINTER_MAP_B_TREE_ROOT_PAGE_TYPE -from sqlite_dissect.constants import POINTER_MAP_ENTRY_LENGTH -from sqlite_dissect.constants import POINTER_MAP_FREELIST_PAGE_TYPE -from sqlite_dissect.constants import POINTER_MAP_OVERFLOW_FIRST_PAGE_TYPE -from sqlite_dissect.constants import POINTER_MAP_OVERFLOW_FOLLOWING_PAGE_TYPE -from sqlite_dissect.constants import POINTER_MAP_PAGE_TYPES -from sqlite_dissect.constants import SQLITE_DATABASE_HEADER_LENGTH -from sqlite_dissect.constants import SQLITE_MASTER_SCHEMA_ROOT_PAGE -from sqlite_dissect.constants import TABLE_INTERIOR_CELL_CLASS -from sqlite_dissect.constants import TABLE_INTERIOR_PAGE_HEX_ID -from sqlite_dissect.constants import TABLE_LEAF_CELL_CLASS -from sqlite_dissect.constants import TABLE_LEAF_PAGE_HEX_ID -from sqlite_dissect.constants import ZERO_BYTE -from sqlite_dissect.exception import BTreePageParsingError -from sqlite_dissect.exception import CellParsingError -from sqlite_dissect.exception import PageParsingError -from sqlite_dissect.file.database.payload import decode_varint -from sqlite_dissect.file.database.payload import Record -from sqlite_dissect.utilities import calculate_expected_overflow -from sqlite_dissect.utilities import get_class_instance -from sqlite_dissect.utilities import get_md5_hash + +from sqlite_dissect.constants import (CELL_LOCATION, CELL_MODULE, + CELL_POINTER_BYTE_LENGTH, CELL_SOURCE, + FIRST_OVERFLOW_PAGE_INDEX, + FIRST_OVERFLOW_PAGE_NUMBER_LENGTH, + FIRST_OVERFLOW_PARENT_PAGE_NUMBER, + FREEBLOCK_BYTE_LENGTH, + FREELIST_HEADER_LENGTH, + FREELIST_LEAF_PAGE_NUMBER_LENGTH, + FREELIST_NEXT_TRUNK_PAGE_LENGTH, + INDEX_INTERIOR_CELL_CLASS, + INDEX_INTERIOR_PAGE_HEX_ID, + INDEX_LEAF_CELL_CLASS, + INDEX_LEAF_PAGE_HEX_ID, + INTERIOR_PAGE_HEADER_CLASS, + LEAF_PAGE_HEADER_CLASS, + LEFT_CHILD_POINTER_BYTE_LENGTH, + LOGGER_NAME, MASTER_PAGE_HEX_ID, + NEXT_FREEBLOCK_OFFSET_LENGTH, + OVERFLOW_HEADER_LENGTH, + PAGE_FRAGMENT_LIMIT, PAGE_HEADER_MODULE, + PAGE_TYPE, PAGE_TYPE_LENGTH, + POINTER_MAP_B_TREE_NON_ROOT_PAGE_TYPE, + POINTER_MAP_B_TREE_ROOT_PAGE_TYPE, + POINTER_MAP_ENTRY_LENGTH, + POINTER_MAP_FREELIST_PAGE_TYPE, + POINTER_MAP_OVERFLOW_FIRST_PAGE_TYPE, + POINTER_MAP_OVERFLOW_FOLLOWING_PAGE_TYPE, + POINTER_MAP_PAGE_TYPES, + SQLITE_DATABASE_HEADER_LENGTH, + SQLITE_MASTER_SCHEMA_ROOT_PAGE, + TABLE_INTERIOR_CELL_CLASS, + TABLE_INTERIOR_PAGE_HEX_ID, + TABLE_LEAF_CELL_CLASS, + TABLE_LEAF_PAGE_HEX_ID, ZERO_BYTE) +from sqlite_dissect.exception import (BTreePageParsingError, CellParsingError, + PageParsingError) +from sqlite_dissect.file.database.payload import Record, decode_varint +from sqlite_dissect.utilities import (calculate_expected_overflow, + get_class_instance, get_md5_hash) """ @@ -1057,7 +1049,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num self.bytes_on_first_page = p if p > u - 35: - m = (((u - 12) * 32) / 255) - 23 + m = (((u - 12) * 32) // 255) - 23 self.bytes_on_first_page = m + ((p - m) % (u - 4)) if self.bytes_on_first_page > u - 35: self.bytes_on_first_page = m @@ -1239,7 +1231,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num u = self._page_size p = self.payload_byte_size - x = (((u - 12) * 64) / 255) - 23 + x = (((u - 12) * 64) // 255) - 23 """ @@ -1273,7 +1265,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num self.bytes_on_first_page = p if p > x: - m = (((u - 12) * 32) / 255) - 23 + m = (((u - 12) * 32) // 255) - 23 self.bytes_on_first_page = m + ((p - m) % (u - 4)) if self.bytes_on_first_page > x: self.bytes_on_first_page = m @@ -1482,7 +1474,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num u = self._page_size p = self.payload_byte_size - x = (((u - 12) * 64) / 255) - 23 + x = (((u - 12) * 64) // 255) - 23 """ @@ -1516,7 +1508,7 @@ def __init__(self, version_interface, page_version_number, file_offset, page_num self.bytes_on_first_page = p if p > x: - m = (((u - 12) * 32) / 255) - 23 + m = (((u - 12) * 32) // 255) - 23 self.bytes_on_first_page = m + ((p - m) % (u - 4)) if self.bytes_on_first_page > x: self.bytes_on_first_page = m diff --git a/sqlite_dissect/file/wal_index/wal_index.py b/sqlite_dissect/file/wal_index/wal_index.py index e5e7767..c5cf578 100644 --- a/sqlite_dissect/file/wal_index/wal_index.py +++ b/sqlite_dissect/file/wal_index/wal_index.py @@ -1,8 +1,8 @@ from logging import getLogger from struct import unpack -from sqlite_dissect.constants import FILE_TYPE -from sqlite_dissect.constants import LOGGER_NAME -from sqlite_dissect.constants import WAL_INDEX_HEADER_LENGTH + +from sqlite_dissect.constants import (FILE_TYPE, LOGGER_NAME, + WAL_INDEX_HEADER_LENGTH) from sqlite_dissect.file.file_handle import FileHandle """ @@ -28,7 +28,7 @@ def __init__(self, file_name, file_size=None): zero = False start = WAL_INDEX_HEADER_LENGTH while not zero: - i = (start - WAL_INDEX_HEADER_LENGTH) / 4 + i = (start - WAL_INDEX_HEADER_LENGTH) // 4 data = unpack(b"= 12 and serial_type % 2 == 0: - content_size = int((serial_type - 12) / 2) + content_size = int((serial_type - 12) // 2) value = record_body[offset:offset + content_size] # A string in the database encoding and is (N-13)/2 bytes in length. The nul terminator is omitted elif serial_type >= 13 and serial_type % 2 == 1: - content_size = int((serial_type - 13) / 2) + content_size = int((serial_type - 13) // 2) value = record_body[offset:offset + content_size] else: From 236f5f6dab3cf87439c8bb22ad55555595cdf25e Mon Sep 17 00:00:00 2001 From: Keith Chason Date: Mon, 28 Aug 2023 16:09:15 +0000 Subject: [PATCH 35/35] Switch 3.6 to 3.11 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c78ece..889c469 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: # This allows the pipeline to be run against multiple Python versions. eg. [3.6, 3.7, 3.8, 3.9]. This results # in linting and unit tests running for all listed versions as well as the creation of packages and wheels on # creation of a tag in Git. - python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10" ] + python-version: [ 3.7, 3.8, 3.9, "3.10", "3.11" ] steps: # Get the code from the repository to be packaged