diff --git a/tests/providers/googledrive/test_metadata.py b/tests/providers/googledrive/test_metadata.py index f425504ea..aedfb31d3 100644 --- a/tests/providers/googledrive/test_metadata.py +++ b/tests/providers/googledrive/test_metadata.py @@ -44,6 +44,7 @@ def test_file_metadata_drive(self, basepath, root_provider_fixtures): assert parsed.materialized_path == str(path) assert parsed.is_google_doc is False assert parsed.export_name == item['title'] + assert parsed.alt_export_name == 'PART_1420130849837.pdf' def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures): item = root_provider_fixtures['file_forward_slash'] @@ -66,6 +67,7 @@ def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures): assert parsed.materialized_path == str(path) assert parsed.is_google_doc is False assert parsed.export_name == item['title'] + assert parsed.alt_export_name == 'PART_1420130849837.pdf' def test_file_metadata_docs(self, basepath, root_provider_fixtures): item = root_provider_fixtures['docs_file_metadata'] @@ -80,6 +82,7 @@ def test_file_metadata_docs(self, basepath, root_provider_fixtures): } assert parsed.is_google_doc is True assert parsed.export_name == item['title'] + '.docx' + assert parsed.alt_export_name == 'version-test.pdf' def test_folder_metadata(self, root_provider_fixtures): item = root_provider_fixtures['folder_metadata'] diff --git a/tests/providers/googledrive/test_provider.py b/tests/providers/googledrive/test_provider.py index 81624f5a5..0e94a0a9e 100644 --- a/tests/providers/googledrive/test_provider.py +++ b/tests/providers/googledrive/test_provider.py @@ -588,6 +588,7 @@ class TestDownload: ds.DRIVE_IGNORE_VERSION) GDOC_EXPORT_MIME_TYPE = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + GDOC_ALT_EXPORT_MIME_TYPE = 'application/pdf' @pytest.mark.asyncio @pytest.mark.aiohttpretty @@ -619,6 +620,36 @@ async def test_download_editable_gdoc_no_revision(self, provider, sharing_fixtur assert aiohttpretty.has_call(method='GET', uri=revisions_url) assert aiohttpretty.has_call(method='GET', uri=download_file_url) + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_download_editable_gdoc_as_mfr(self, provider, sharing_fixtures): + metadata_body = sharing_fixtures['editable_gdoc']['metadata'] + path = GoogleDrivePath( + '/sharing/editable_gdoc', + _ids=['1', '2', metadata_body['id']] + ) + + metadata_query = provider._build_query(path.identifier) + metadata_url = provider.build_url('files', path.identifier) + aiohttpretty.register_json_uri('GET', metadata_url, body=metadata_body) + + revisions_body = sharing_fixtures['editable_gdoc']['revisions'] + revisions_url = provider.build_url('files', metadata_body['id'], 'revisions') + aiohttpretty.register_json_uri('GET', revisions_url, body=revisions_body) + + file_content = b'we love you conrad' + download_file_url = metadata_body['exportLinks'][self.GDOC_ALT_EXPORT_MIME_TYPE] + aiohttpretty.register_uri('GET', download_file_url, body=file_content, auto_length=True) + + result = await provider.download(path, mfr='true') + assert result.name == 'editable_gdoc.pdf' + + content = await result.read() + assert content == file_content + assert aiohttpretty.has_call(method='GET', uri=metadata_url) + assert aiohttpretty.has_call(method='GET', uri=revisions_url) + assert aiohttpretty.has_call(method='GET', uri=download_file_url) + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_download_editable_gdoc_good_revision(self, provider, sharing_fixtures): @@ -1577,6 +1608,19 @@ async def test_intra_copy_file(self, provider, root_provider_fixtures): class TestOperationsOrMisc: + def test_misc_utils(self): + metadata = { + 'mimeType': 'application/vnd.google-apps.drawing', + 'exportLinks': { + 'image/jpeg': 'badurl.osf.899' + } + } + ext = drive_utils.get_alt_download_extension(metadata) + link = drive_utils.get_alt_export_link(metadata) + + assert ext == '.jpg' + assert link == 'badurl.osf.899' + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_can_duplicate_names(self, provider): diff --git a/waterbutler/providers/googledrive/metadata.py b/waterbutler/providers/googledrive/metadata.py index 5e4fbafa8..e4b5931f7 100644 --- a/waterbutler/providers/googledrive/metadata.py +++ b/waterbutler/providers/googledrive/metadata.py @@ -136,6 +136,14 @@ def export_name(self): title += ext return title + @property + def alt_export_name(self): + title = self._file_title + if self.is_google_doc: + ext = utils.get_alt_download_extension(self.raw) + title += ext + return title + @property def _file_title(self): return self.raw['title'] diff --git a/waterbutler/providers/googledrive/provider.py b/waterbutler/providers/googledrive/provider.py index 73712d601..68444647f 100644 --- a/waterbutler/providers/googledrive/provider.py +++ b/waterbutler/providers/googledrive/provider.py @@ -234,9 +234,18 @@ async def download(self, # type: ignore metadata = await self.metadata(path, revision=revision) + if kwargs.get('mfr', None) and kwargs['mfr'].lower() == 'true': + download_url = drive_utils.get_alt_export_link(metadata.raw) # type: ignore + export_name = metadata.alt_export_name + else: + + # TODO figure out metadata.raw.get('downloadUrl') + download_url = metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw) # type: ignore + export_name = metadata.export_name # type: ignore + download_resp = await self.make_request( 'GET', - metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), # type: ignore + download_url, range=range, expects=(200, 206), throws=exceptions.DownloadError, @@ -251,7 +260,7 @@ async def download(self, # type: ignore if download_resp.headers.get('Content-Type'): # TODO: Add these properties to base class officially, instead of as one-off stream.content_type = download_resp.headers['Content-Type'] # type: ignore - stream.name = metadata.export_name # type: ignore + stream.name = export_name # type: ignore return stream async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \ diff --git a/waterbutler/providers/googledrive/utils.py b/waterbutler/providers/googledrive/utils.py index 6f5b1be98..4ba6cc178 100644 --- a/waterbutler/providers/googledrive/utils.py +++ b/waterbutler/providers/googledrive/utils.py @@ -4,6 +4,8 @@ 'ext': '.gdoc', 'download_ext': '.docx', 'type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'alt_download_ext': '.pdf', + 'alt_type': 'application/pdf', }, { 'mime_type': 'application/vnd.google-apps.drawing', @@ -37,28 +39,42 @@ def is_docs_file(metadata): def get_mimetype_from_ext(ext): - for format in DOCS_FORMATS: - if format['ext'] == ext: - return format['mime_type'] + for format_type in DOCS_FORMATS: + if format_type['ext'] == ext: + return format_type['mime_type'] def get_format(metadata): - for format in DOCS_FORMATS: - if format['mime_type'] == metadata['mimeType']: - return format + for format_type in DOCS_FORMATS: + if format_type['mime_type'] == metadata['mimeType']: + return format_type return DOCS_DEFAULT_FORMAT def get_extension(metadata): - format = get_format(metadata) - return format['ext'] + format_type = get_format(metadata) + return format_type['ext'] def get_download_extension(metadata): - format = get_format(metadata) - return format['download_ext'] + format_type = get_format(metadata) + return format_type['download_ext'] + + +def get_alt_download_extension(metadata): + format_type = get_format(metadata) + return format_type.get('alt_download_ext', None) or format_type['download_ext'] + + +def get_alt_export_link(metadata): + format_type = get_format(metadata) + export_links = metadata['exportLinks'] + if format_type.get('alt_type'): + return export_links.get(format_type['alt_type']) + else: + return export_links[format_type['type']] def get_export_link(metadata): - format = get_format(metadata) - return metadata['exportLinks'][format['type']] + format_type = get_format(metadata) + return metadata['exportLinks'][format_type['type']] diff --git a/waterbutler/server/api/v1/provider/metadata.py b/waterbutler/server/api/v1/provider/metadata.py index c99fb9fa0..ed1cb39e2 100644 --- a/waterbutler/server/api/v1/provider/metadata.py +++ b/waterbutler/server/api/v1/provider/metadata.py @@ -71,6 +71,7 @@ async def download_file(self): range=request_range, accept_url='direct' not in self.request.query_arguments, mode=self.get_query_argument('mode', default=None), + mfr=self.get_query_argument('mfr', default=None) ) if isinstance(stream, str):