From 6dbd910af065df68d1f61fb64fb58ae3c3407ccf Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:27:00 +0200 Subject: [PATCH 01/18] outline-import: backend upload endpoint + frontend upload page\n\nBackend:\n- Add POST /api/v1.0/outline_import/upload (zip)\n- Parse .md files, create doc tree from folders, rewrite image links to attachments, convert to Yjs via Y-provider\nFrontend:\n- Add /import/outline page with zip file picker + POST\n- Add menu entry 'Import from Outline' in left panel header\n- Add minimal i18n keys (en, fr) --- src/backend/core/api/viewsets.py | 158 ++++++++++++++++++ src/backend/core/urls.py | 4 + .../components/LeftPanelHeaderButton.tsx | 29 +++- .../apps/impress/src/i18n/translations.json | 10 +- .../src/pages/import/outline/index.tsx | 84 ++++++++++ 5 files changed, 275 insertions(+), 10 deletions(-) create mode 100644 src/frontend/apps/impress/src/pages/import/outline/index.tsx diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 9b44be6897..dec2ee69fd 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -3,6 +3,10 @@ import base64 import json +import io +import mimetypes +import re +import zipfile import logging import uuid from collections import defaultdict @@ -2174,3 +2178,157 @@ def _load_theme_customization(self): ) return theme_customization + + +class OutlineImportUploadView(drf.views.APIView): + """Upload an Outline export (.zip) and import it as Docs documents. + + Expects a multipart/form-data with field name 'file' containing a .zip archive + produced by Outline export. + + Returns a JSON payload with a list of created document ids. + """ + + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [permissions.IsAuthenticated] + + def post(self, request): + uploaded = request.FILES.get("file") + if not uploaded: + raise drf.exceptions.ValidationError({"file": "File is required"}) + + name = getattr(uploaded, "name", "") + if not name.endswith(".zip"): + raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) + + try: + content = uploaded.read() + archive = zipfile.ZipFile(io.BytesIO(content)) + except zipfile.BadZipFile as exc: + raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files = sorted([n for n in archive.namelist() if n.lower().endswith(".md")]) + + def ensure_dir_docs(dir_path: str) -> models.Document | None: + if not dir_path: + return None + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + # create a container doc with the folder name + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=request.user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent.add_child(creator=request.user, title=part) + models.DocumentAccess.objects.update_or_create( + document=doc, + user=request.user, + defaults={"role": models.RoleChoices.OWNER}, + ) + dir_docs[current] = doc + parent = doc + return parent + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: + content_type, _ = mimetypes.guess_type(arcname) + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(request.user.id), + "status": enums.DocumentAttachmentStatus.READY, + }, + } + if content_type: + extra_args["ContentType"] = content_type + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = ensure_dir_docs(dir_path) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=request.user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent_doc.add_child(creator=request.user, title=title) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=request.user, + defaults={"role": models.RoleChoices.OWNER}, + ) + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = upload_attachment(doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save(update_fields=["content", "updated_at"]) + except Exception as e: # noqa: BLE001 + logger.exception("Outline import failed for %s: %s", md_path, e) + + created_ids.append(str(doc.id)) + + return drf.response.Response( + {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + ) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 2ad8b00395..ef5ede4cfb 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -58,6 +58,10 @@ r"^templates/(?P[0-9a-z-]*)/", include(template_related_router.urls), ), + path( + "outline_import/upload", + viewsets.OutlineImportUploadView.as_view(), + ), ] ), ), diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index 5ca2315913..8d3b9594ea 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -2,7 +2,7 @@ import { Button } from '@openfun/cunningham-react'; import { useRouter } from 'next/router'; import { useTranslation } from 'react-i18next'; -import { Icon } from '@/components'; +import { DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useLeftPanelStore } from '../stores'; @@ -18,14 +18,25 @@ export const LeftPanelHeaderButton = () => { }, }); return ( - + + ); }; diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 47fabc6b99..1cf5dc2b3a 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -482,7 +482,11 @@ "Share with {{count}} users_one": "Share with {{count}} user", "Shared with {{count}} users_many": "Shared with {{count}} users", "Shared with {{count}} users_one": "Shared with {{count}} user", - "Shared with {{count}} users_other": "Shared with {{count}} users" + "Shared with {{count}} users_other": "Shared with {{count}} users", + "Import from Outline": "Import from Outline", + "Import Outline archive": "Import Outline archive", + "Select a .zip file": "Select a .zip file", + "Import": "Import" } }, "es": { @@ -838,6 +842,10 @@ "Open Source": "Open Source", "Open the document options": "Ouvrir les options du document", "Open the header menu": "Ouvrir le menu d'en-tête", + "Import from Outline": "Importer depuis Outline", + "Import Outline archive": "Importer une archive Outline", + "Select a .zip file": "Sélectionnez un fichier .zip", + "Import": "Importer", "Open document actions menu": "Ouvrir le menu d'actions du document", "Open the menu of actions for the document: {{title}}": "Ouvrir le menu des actions du document : {{title}}", "Main content": "Contenu principal", diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx new file mode 100644 index 0000000000..04a9036dcd --- /dev/null +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -0,0 +1,84 @@ +import { Button, Loader } from '@openfun/cunningham-react'; +import { useRouter } from 'next/router'; +import { ReactElement, useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +import { Box, Text } from '@/components'; +import { baseApiUrl } from '@/features/docs/doc-management'; +import { MainLayout } from '@/layouts'; +import { NextPageWithLayout } from '@/types/next'; + +const Page: NextPageWithLayout = () => { + const { t } = useTranslation(); + const router = useRouter(); + const [file, setFile] = useState(null); + const [isUploading, setIsUploading] = useState(false); + const [error, setError] = useState(null); + + const onSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setError(null); + if (!file) { + setError(t('Please select a .zip file')); + return; + } + setIsUploading(true); + try { + const form = new FormData(); + form.append('file', file); + const resp = await fetch(`${baseApiUrl('1.0')}outline_import/upload`, { + method: 'POST', + body: form, + credentials: 'include', + }); + if (!resp.ok) { + throw new Error(await resp.text()); + } + const data = (await resp.json()) as { created_document_ids: string[] }; + const first = data.created_document_ids?.[0]; + if (first) { + void router.replace(`/docs/${first}`); + } else { + void router.replace('/'); + } + } catch (e) { + setError(t('Something bad happens, please retry.')); + } finally { + setIsUploading(false); + } + }; + + return ( + + + {t('Import Outline archive')} + +
+ setFile(e.target.files?.[0] ?? null)} + aria-label={t('Select a .zip file')} + /> + + + {isUploading && } + + {error && ( + + {error} + + )} +
+
+ ); +}; + +Page.getLayout = function getLayout(page: ReactElement) { + return {page}; +}; + +export default Page; + From 1fd44066b2c4ca9045ea604821e0970ae93125b2 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:41:48 +0200 Subject: [PATCH 02/18] frontend(import-outline): fix baseApiUrl import path --- src/frontend/apps/impress/src/pages/import/outline/index.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index 04a9036dcd..0a3ce09b09 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -4,7 +4,7 @@ import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { Box, Text } from '@/components'; -import { baseApiUrl } from '@/features/docs/doc-management'; +import { baseApiUrl } from '@/api'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -81,4 +81,3 @@ Page.getLayout = function getLayout(page: ReactElement) { }; export default Page; - From becc51460d320139a6cd785d7128015d88c6ac44 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 07:42:28 +0200 Subject: [PATCH 03/18] outline-import: run malware scan on uploaded assets --- src/backend/core/api/viewsets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index dec2ee69fd..b6a2aaeb3f 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2261,6 +2261,7 @@ def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: ) doc.attachments.append(key) doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" def read_bytes(path_in_zip: str) -> bytes | None: From 9f4fb0692e19c47991a9c3d3ac591089eb05fa4e Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 08:46:42 +0200 Subject: [PATCH 04/18] tests(outline-import): add API tests for upload (.zip) flow\n- Anonymous forbidden\n- Authenticated happy path with local image and mocked conversion --- .../imports/test_api_outline_import_upload.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/backend/core/tests/imports/test_api_outline_import_upload.py diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py new file mode 100644 index 0000000000..d0b88b920a --- /dev/null +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -0,0 +1,79 @@ +"""Tests for the Outline zip import API endpoint.""" + +import io +import zipfile +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile + +import pytest +from rest_framework.test import APIClient + +from core import factories +from core.api.viewsets import malware_detection + + +pytestmark = pytest.mark.django_db + + +def make_zip_with_markdown_and_image(md_path: str, md_content: str, img_path: str, img_bytes: bytes) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + zf.writestr(md_path, md_content) + zf.writestr(img_path, img_bytes) + return buf.getvalue() + + +def test_outline_import_upload_anonymous_forbidden(): + """Anonymous users must not be able to use the import endpoint.""" + client = APIClient() + + # Minimal empty zip + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w"): + pass + upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") + + response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + + assert response.status_code == 401 + assert response.json()["detail"] == "Authentication credentials were not provided." + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +def test_outline_import_upload_authenticated_success(mock_convert): + """Authenticated users can upload an Outline export zip and create documents.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + # Markdown referencing a local image in the same directory + md = "# Imported Title\n\nSome text.\n\n![Alt](image.png)\n" + img = ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00" + b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe" + b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82" + ) + zip_bytes = make_zip_with_markdown_and_image( + md_path="Folder1/page.md", + md_content=md, + img_path="Folder1/image.png", + img_bytes=img, + ) + + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + with patch.object(malware_detection, "analyse_file") as mock_analyse_file: + response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + + assert response.status_code == 201 + data = response.json() + assert "created_document_ids" in data + # Only the markdown-backed document ids are returned (container folders are not listed) + assert len(data["created_document_ids"]) == 1 + + # The converter must have been called once per markdown file + mock_convert.assert_called_once() + # An antivirus scan is run for the uploaded image + assert mock_analyse_file.called + From 4f3b62db87443268d5542f3cbec24594e9f215d4 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 08:50:13 +0200 Subject: [PATCH 05/18] refactor(outline-import): move import logic to core/services/outline_import.py and call from view\n- Keep view thin; service handles zip, images, conversion, attachments\n- Fix imports accordingly --- src/backend/core/api/viewsets.py | 133 +--------------- src/backend/core/services/outline_import.py | 167 ++++++++++++++++++++ 2 files changed, 171 insertions(+), 129 deletions(-) create mode 100644 src/backend/core/services/outline_import.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b6a2aaeb3f..b18c7be901 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -4,8 +4,6 @@ import base64 import json import io -import mimetypes -import re import zipfile import logging import uuid @@ -2203,133 +2201,10 @@ def post(self, request): try: content = uploaded.read() - archive = zipfile.ZipFile(io.BytesIO(content)) + # Validate the archive format to fail fast on invalid uploads + zipfile.ZipFile(io.BytesIO(content)) except zipfile.BadZipFile as exc: raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - created_ids: list[str] = [] - dir_docs: dict[str, models.Document] = {} - md_files = sorted([n for n in archive.namelist() if n.lower().endswith(".md")]) - - def ensure_dir_docs(dir_path: str) -> models.Document | None: - if not dir_path: - return None - parts = [p for p in dir_path.split("/") if p] - parent: models.Document | None = None - current = "" - for part in parts: - current = f"{current}/{part}" if current else part - if current in dir_docs: - parent = dir_docs[current] - continue - # create a container doc with the folder name - if parent is None: - doc = models.Document.add_root( - depth=1, - creator=request.user, - title=part, - link_reach=models.LinkReachChoices.RESTRICTED, - ) - else: - doc = parent.add_child(creator=request.user, title=part) - models.DocumentAccess.objects.update_or_create( - document=doc, - user=request.user, - defaults={"role": models.RoleChoices.OWNER}, - ) - dir_docs[current] = doc - parent = doc - return parent - - img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") - - def upload_attachment(doc: models.Document, arcname: str, data: bytes) -> str: - content_type, _ = mimetypes.guess_type(arcname) - ext = (arcname.split(".")[-1] or "bin").lower() - file_id = uuid.uuid4() - key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" - extra_args = { - "Metadata": { - "owner": str(request.user.id), - "status": enums.DocumentAttachmentStatus.READY, - }, - } - if content_type: - extra_args["ContentType"] = content_type - default_storage.connection.meta.client.upload_fileobj( - io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args - ) - doc.attachments.append(key) - doc.save(update_fields=["attachments", "updated_at"]) - malware_detection.analyse_file(key, document_id=doc.id) - return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" - - def read_bytes(path_in_zip: str) -> bytes | None: - try: - with archive.open(path_in_zip, "r") as f: - return f.read() - except KeyError: - return None - - converter = YdocConverter() - - for md_path in md_files: - dir_path, file_name = ( - (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) - ) - parent_doc = ensure_dir_docs(dir_path) - - try: - raw_md = archive.read(md_path).decode("utf-8", errors="ignore") - except Exception: # noqa: BLE001 - raw_md = "" - - title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) - title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] - - if parent_doc is None: - doc = models.Document.add_root( - depth=1, - creator=request.user, - title=title, - link_reach=models.LinkReachChoices.RESTRICTED, - ) - else: - doc = parent_doc.add_child(creator=request.user, title=title) - - models.DocumentAccess.objects.update_or_create( - document=doc, - user=request.user, - defaults={"role": models.RoleChoices.OWNER}, - ) - - def replace_img_link(match: re.Match[str]) -> str: - url = match.group(1) - if url.startswith("http://") or url.startswith("https://"): - return match.group(0) - asset_rel = f"{dir_path}/{url}" if dir_path else url - asset_rel = re.sub(r"/+", "/", asset_rel) - data = read_bytes(asset_rel) - if data is None: - return match.group(0) - media_url = upload_attachment(doc, arcname=url, data=data) - return match.group(0).replace(url, media_url) - - rewritten_md = img_pattern.sub(replace_img_link, raw_md) - - try: - ydoc_b64 = converter.convert( - rewritten_md.encode("utf-8"), - content_type="text/markdown", - accept="application/vnd.yjs.doc", - ) - doc.content = ydoc_b64 - doc.save(update_fields=["content", "updated_at"]) - except Exception as e: # noqa: BLE001 - logger.exception("Outline import failed for %s: %s", md_path, e) - - created_ids.append(str(doc.id)) - - return drf.response.Response( - {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED - ) + created_ids = process_outline_zip(request.user, content) + return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py new file mode 100644 index 0000000000..b05f21872b --- /dev/null +++ b/src/backend/core/services/outline_import.py @@ -0,0 +1,167 @@ +"""Service to import an Outline export (.zip) into Docs documents.""" + +from __future__ import annotations + +import io +import mimetypes +import re +import uuid +import zipfile +from typing import Iterable + +from django.conf import settings +from django.core.files.storage import default_storage + +from lasuite.malware_detection import malware_detection + +from core import enums, models +from core.services.converter_services import YdocConverter + + +def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: + """Ensure each path segment in dir_path has a container document. + + Returns the deepest parent document or None when dir_path is empty. + """ + if not dir_path: + return None + + parts = [p for p in dir_path.split("/") if p] + parent: models.Document | None = None + current = "" + for part in parts: + current = f"{current}/{part}" if current else part + if current in dir_docs: + parent = dir_docs[current] + continue + + if parent is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=part, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent.add_child(creator=user, title=part) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + dir_docs[current] = doc + parent = doc + + return parent + + +def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> str: + """Upload a binary asset into object storage and return its public media URL.""" + content_type, _ = mimetypes.guess_type(arcname) + ext = (arcname.split(".")[-1] or "bin").lower() + file_id = uuid.uuid4() + key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" + extra_args = { + "Metadata": { + "owner": str(user.id), + "status": enums.DocumentAttachmentStatus.READY, + }, + } + if content_type: + extra_args["ContentType"] = content_type + + default_storage.connection.meta.client.upload_fileobj( + io.BytesIO(data), default_storage.bucket_name, key, ExtraArgs=extra_args + ) + doc.attachments.append(key) + doc.save(update_fields=["attachments", "updated_at"]) + malware_detection.analyse_file(key, document_id=doc.id) + return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + +def process_outline_zip(user, zip_bytes: bytes) -> list[str]: + """Process an Outline export zip and create Docs documents. + + Returns the list of created document IDs (stringified UUIDs) corresponding to + markdown-backed documents. Container folders used to rebuild hierarchy are not listed. + """ + archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + + created_ids: list[str] = [] + dir_docs: dict[str, models.Document] = {} + md_files: Iterable[str] = sorted( + [n for n in archive.namelist() if n.lower().endswith(".md")] + ) + + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") + + def read_bytes(path_in_zip: str) -> bytes | None: + try: + with archive.open(path_in_zip, "r") as f: + return f.read() + except KeyError: + return None + + converter = YdocConverter() + + for md_path in md_files: + dir_path, file_name = ( + (md_path.rsplit("/", 1) + [""])[:2] if "/" in md_path else ("", md_path) + ) + parent_doc = _ensure_dir_documents(user, dir_path, dir_docs) + + try: + raw_md = archive.read(md_path).decode("utf-8", errors="ignore") + except Exception: # noqa: BLE001 + raw_md = "" + + title_match = re.search(r"^#\s+(.+)$", raw_md, flags=re.MULTILINE) + title = title_match.group(1).strip() if title_match else file_name.rsplit(".", 1)[0] + + if parent_doc is None: + doc = models.Document.add_root( + depth=1, + creator=user, + title=title, + link_reach=models.LinkReachChoices.RESTRICTED, + ) + else: + doc = parent_doc.add_child(creator=user, title=title) + + models.DocumentAccess.objects.update_or_create( + document=doc, + user=user, + defaults={"role": models.RoleChoices.OWNER}, + ) + + def replace_img_link(match: re.Match[str]) -> str: + url = match.group(1) + if url.startswith("http://") or url.startswith("https://"): + return match.group(0) + asset_rel = f"{dir_path}/{url}" if dir_path else url + asset_rel = re.sub(r"/+", "/", asset_rel) + data = read_bytes(asset_rel) + if data is None: + return match.group(0) + media_url = _upload_attachment(user, doc, arcname=url, data=data) + return match.group(0).replace(url, media_url) + + rewritten_md = img_pattern.sub(replace_img_link, raw_md) + + try: + ydoc_b64 = converter.convert( + rewritten_md.encode("utf-8"), + content_type="text/markdown", + accept="application/vnd.yjs.doc", + ) + doc.content = ydoc_b64 + doc.save(update_fields=["content", "updated_at"]) + except Exception: # noqa: BLE001 + # Keep doc without content on conversion error but continue import + pass + + created_ids.append(str(doc.id)) + + return created_ids + From fa65c45e275b7f8a3df876d39f8ca320d01ce10c Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 13 Sep 2025 09:36:21 +0200 Subject: [PATCH 06/18] outline-import: reinforce safety and tests\n- Zip Slip protection (reject unsafe paths)\n- Ignore __MACOSX and hidden entries\n- Service unit tests (happy path + zip slip)\n- Change API path to /imports/outline/upload and update front + tests --- src/backend/core/api/viewsets.py | 6 ++- src/backend/core/services/outline_import.py | 26 +++++++++- .../imports/test_api_outline_import_upload.py | 5 +- .../services/test_outline_import_service.py | 52 +++++++++++++++++++ src/backend/core/urls.py | 2 +- .../src/pages/import/outline/index.tsx | 2 +- 6 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 src/backend/core/tests/services/test_outline_import_service.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b18c7be901..d0ba8686af 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -50,6 +50,7 @@ YdocConverter, ) from core.tasks.mail import send_ask_for_access_mail +from core.services.outline_import import OutlineImportError, process_outline_zip from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -2203,8 +2204,9 @@ def post(self, request): content = uploaded.read() # Validate the archive format to fail fast on invalid uploads zipfile.ZipFile(io.BytesIO(content)) + created_ids = process_outline_zip(request.user, content) except zipfile.BadZipFile as exc: raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - - created_ids = process_outline_zip(request.user, content) + except OutlineImportError as exc: + raise drf.exceptions.ValidationError({"file": str(exc)}) from exc return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index b05f21872b..0c9d98575e 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -8,6 +8,7 @@ import uuid import zipfile from typing import Iterable +import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -18,6 +19,10 @@ from core.services.converter_services import YdocConverter +class OutlineImportError(Exception): + """Raised when the Outline archive is invalid or unsafe.""" + + def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Document]) -> models.Document | None: """Ensure each path segment in dir_path has a container document. @@ -88,10 +93,25 @@ def process_outline_zip(user, zip_bytes: bytes) -> list[str]: """ archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) + # Basic Zip Slip protection: refuse absolute or parent-traversal entries + for name in archive.namelist(): + # Normalize to posix separators and check traversal + if name.startswith("/") or "\\" in name: + raise OutlineImportError("Unsafe path in archive") + parts = [p for p in name.split("/") if p] + if any(part == ".." for part in parts): + raise OutlineImportError("Unsafe path in archive") + created_ids: list[str] = [] dir_docs: dict[str, models.Document] = {} md_files: Iterable[str] = sorted( - [n for n in archive.namelist() if n.lower().endswith(".md")] + [ + n + for n in archive.namelist() + if n.lower().endswith(".md") + and not n.startswith("__MACOSX/") + and not any(part.startswith(".") for part in n.split("/")) + ] ) img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") @@ -141,6 +161,9 @@ def replace_img_link(match: re.Match[str]) -> str: return match.group(0) asset_rel = f"{dir_path}/{url}" if dir_path else url asset_rel = re.sub(r"/+", "/", asset_rel) + # sanitize computed asset path + if asset_rel.startswith("/") or any(part == ".." for part in asset_rel.split("/")): + return match.group(0) data = read_bytes(asset_rel) if data is None: return match.group(0) @@ -164,4 +187,3 @@ def replace_img_link(match: re.Match[str]) -> str: created_ids.append(str(doc.id)) return created_ids - diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py index d0b88b920a..aee401aa3b 100644 --- a/src/backend/core/tests/imports/test_api_outline_import_upload.py +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -34,7 +34,7 @@ def test_outline_import_upload_anonymous_forbidden(): pass upload = SimpleUploadedFile(name="export.zip", content=buf.getvalue(), content_type="application/zip") - response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") assert response.status_code == 401 assert response.json()["detail"] == "Authentication credentials were not provided." @@ -64,7 +64,7 @@ def test_outline_import_upload_authenticated_success(mock_convert): upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") with patch.object(malware_detection, "analyse_file") as mock_analyse_file: - response = client.post("/api/v1.0/outline_import/upload", {"file": upload}, format="multipart") + response = client.post("/api/v1.0/imports/outline/upload", {"file": upload}, format="multipart") assert response.status_code == 201 data = response.json() @@ -76,4 +76,3 @@ def test_outline_import_upload_authenticated_success(mock_convert): mock_convert.assert_called_once() # An antivirus scan is run for the uploaded image assert mock_analyse_file.called - diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py new file mode 100644 index 0000000000..7f94e63a7b --- /dev/null +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -0,0 +1,52 @@ +"""Unit tests for the Outline import service.""" + +import io +import zipfile +from unittest.mock import patch + +import pytest + +from core import factories +from core.services.outline_import import OutlineImportError, process_outline_zip + + +pytestmark = pytest.mark.django_db + + +def make_zip(entries: dict[str, bytes]) -> bytes: + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w") as zf: + for path, content in entries.items(): + zf.writestr(path, content) + return buf.getvalue() + + +@patch("core.services.converter_services.YdocConverter.convert", return_value="YmFzZTY0Y29udGVudA==") +@patch("core.services.outline_import.malware_detection.analyse_file") +@patch("django.core.files.storage.default_storage.connection.meta.client.upload_fileobj") +def test_process_outline_zip_happy_path(mock_upload, mock_av, mock_convert): + user = factories.UserFactory() + md = b"# T1\n![img](image.png)" + img = b"i-am-png" + zip_bytes = make_zip({ + "dir/page.md": md, + "dir/image.png": img, + "__MACOSX/._noise": b"", + ".hidden/skip.md": b"# hidden", + }) + + created = process_outline_zip(user, zip_bytes) + assert len(created) == 1 + mock_convert.assert_called_once() + mock_upload.assert_called() + mock_av.assert_called() + + +def test_process_outline_zip_zip_slip_rejected(): + user = factories.UserFactory() + zip_bytes = make_zip({ + "../evil.md": b"# E", + }) + with pytest.raises(OutlineImportError): + process_outline_zip(user, zip_bytes) + diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index ef5ede4cfb..61a46bce86 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -59,7 +59,7 @@ include(template_related_router.urls), ), path( - "outline_import/upload", + "imports/outline/upload", viewsets.OutlineImportUploadView.as_view(), ), ] diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index 0a3ce09b09..f47f427288 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -26,7 +26,7 @@ const Page: NextPageWithLayout = () => { try { const form = new FormData(); form.append('file', file); - const resp = await fetch(`${baseApiUrl('1.0')}outline_import/upload`, { + const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { method: 'POST', body: form, credentials: 'include', From cce6c966482c0855dc6f1a9c0b4e1fd720c1ff8b Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Tue, 16 Sep 2025 20:51:20 +0200 Subject: [PATCH 07/18] Add Outline import API view --- src/backend/core/api/imports.py | 34 +++++++++++++++++++++++++++++++++ src/backend/core/urls.py | 3 ++- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 src/backend/core/api/imports.py diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py new file mode 100644 index 0000000000..fecd427d42 --- /dev/null +++ b/src/backend/core/api/imports.py @@ -0,0 +1,34 @@ +"""Import endpoints for Outline (zip upload).""" + +from __future__ import annotations + +import rest_framework as drf + +from core.services.outline_import import OutlineImportError, process_outline_zip + + +# ---------- Outline (Zip Upload) ---------- + + +class OutlineImportUploadView(drf.views.APIView): + parser_classes = [drf.parsers.MultiPartParser] + permission_classes = [drf.permissions.IsAuthenticated] + + def post(self, request): + uploaded = request.FILES.get("file") + if not uploaded: + raise drf.exceptions.ValidationError({"file": "File is required"}) + + name = getattr(uploaded, "name", "") + if not name.endswith(".zip"): + raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) + + try: + content = uploaded.read() + created_ids = process_outline_zip(request.user, content) + except OutlineImportError as exc: + raise drf.exceptions.ValidationError({"file": str(exc)}) from exc + + return drf.response.Response( + {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + ) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 61a46bce86..acbb631b11 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -7,6 +7,7 @@ from rest_framework.routers import DefaultRouter from core.api import viewsets +from core.api import imports as import_views # - Main endpoints router = DefaultRouter() @@ -60,7 +61,7 @@ ), path( "imports/outline/upload", - viewsets.OutlineImportUploadView.as_view(), + import_views.OutlineImportUploadView.as_view(), ), ] ), From 6146a48552ebf5e777ce8922ff37f3aa33840c7d Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Tue, 16 Sep 2025 21:38:10 +0200 Subject: [PATCH 08/18] Remove legacy Outline import viewset --- src/backend/core/api/viewsets.py | 35 +------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index d0ba8686af..b5c212473c 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2176,37 +2176,4 @@ def _load_theme_customization(self): settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - return theme_customization - - -class OutlineImportUploadView(drf.views.APIView): - """Upload an Outline export (.zip) and import it as Docs documents. - - Expects a multipart/form-data with field name 'file' containing a .zip archive - produced by Outline export. - - Returns a JSON payload with a list of created document ids. - """ - - parser_classes = [drf.parsers.MultiPartParser] - permission_classes = [permissions.IsAuthenticated] - - def post(self, request): - uploaded = request.FILES.get("file") - if not uploaded: - raise drf.exceptions.ValidationError({"file": "File is required"}) - - name = getattr(uploaded, "name", "") - if not name.endswith(".zip"): - raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) - - try: - content = uploaded.read() - # Validate the archive format to fail fast on invalid uploads - zipfile.ZipFile(io.BytesIO(content)) - created_ids = process_outline_zip(request.user, content) - except zipfile.BadZipFile as exc: - raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - except OutlineImportError as exc: - raise drf.exceptions.ValidationError({"file": str(exc)}) from exc - return drf.response.Response({"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED) + return theme_customization \ No newline at end of file From 453b153506df128a0a2b2e4a2eb474950ea790db Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Wed, 17 Sep 2025 05:28:57 +0200 Subject: [PATCH 09/18] Improve Outline import validation and UI --- src/backend/core/api/imports.py | 8 +++ .../imports/test_api_outline_import_upload.py | 49 +++++++++++++++++++ .../components/LeftPanelHeaderButton.tsx | 27 +++++----- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py index fecd427d42..174c9b8f95 100644 --- a/src/backend/core/api/imports.py +++ b/src/backend/core/api/imports.py @@ -2,6 +2,9 @@ from __future__ import annotations +import io +import zipfile + import rest_framework as drf from core.services.outline_import import OutlineImportError, process_outline_zip @@ -25,7 +28,12 @@ def post(self, request): try: content = uploaded.read() + # Fail fast if the upload is not a valid zip archive + with zipfile.ZipFile(io.BytesIO(content)): + pass created_ids = process_outline_zip(request.user, content) + except zipfile.BadZipFile as exc: + raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc except OutlineImportError as exc: raise drf.exceptions.ValidationError({"file": str(exc)}) from exc diff --git a/src/backend/core/tests/imports/test_api_outline_import_upload.py b/src/backend/core/tests/imports/test_api_outline_import_upload.py index aee401aa3b..110df86103 100644 --- a/src/backend/core/tests/imports/test_api_outline_import_upload.py +++ b/src/backend/core/tests/imports/test_api_outline_import_upload.py @@ -11,6 +11,7 @@ from core import factories from core.api.viewsets import malware_detection +from core.services.outline_import import OutlineImportError pytestmark = pytest.mark.django_db @@ -76,3 +77,51 @@ def test_outline_import_upload_authenticated_success(mock_convert): mock_convert.assert_called_once() # An antivirus scan is run for the uploaded image assert mock_analyse_file.called + + +def test_outline_import_upload_invalid_zip_returns_validation_error(): + """Invalid archives are rejected with a validation error instead of crashing.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + upload = SimpleUploadedFile( + name="export.zip", + content=b"not-a-zip", + content_type="application/zip", + ) + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["Invalid zip archive"]} + + +@patch("core.api.imports.process_outline_zip", side_effect=OutlineImportError("boom")) +def test_outline_import_upload_outline_error_returns_validation_error(mock_process_outline): + """Service-level Outline import errors are surfaced as validation errors.""" + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + zip_bytes = make_zip_with_markdown_and_image( + md_path="doc.md", + md_content="# Title", + img_path="", + img_bytes=b"", + ) + upload = SimpleUploadedFile(name="export.zip", content=zip_bytes, content_type="application/zip") + + response = client.post( + "/api/v1.0/imports/outline/upload", + {"file": upload}, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == {"file": ["boom"]} + mock_process_outline.assert_called_once() diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx index 8d3b9594ea..ca9858be8e 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeaderButton.tsx @@ -2,7 +2,7 @@ import { Button } from '@openfun/cunningham-react'; import { useRouter } from 'next/router'; import { useTranslation } from 'react-i18next'; -import { DropdownMenu, Icon } from '@/components'; +import { Box, DropdownMenu, Icon } from '@/components'; import { useCreateDoc } from '@/features/docs/doc-management'; import { useLeftPanelStore } from '../stores'; @@ -18,16 +18,7 @@ export const LeftPanelHeaderButton = () => { }, }); return ( - void router.push('/import/outline'), - showSeparator: false, - }, - ]} - > + - + void router.push('/import/outline'), + showSeparator: false, + }, + ]} + > + ); }; From b7a7663e1bf89caa921acabf5f579eecc5cff5af Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Thu, 25 Sep 2025 00:46:55 +0200 Subject: [PATCH 10/18] feat(outline-import): Add markdown preprocessing for unsupported BlockNote elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert H4/H5/H6 headings to compatible formats (H4→H3 with marker, H5→bold with arrow, H6→paragraph with bullet) - Convert horizontal rules (---, ***, ___) to [DIVIDER_BLOCK] markers - Preserve task lists formatting for proper checkbox rendering - Add comprehensive unit tests for all conversion cases This ensures Outline exports with all 6 heading levels and other markdown features are properly imported into BlockNote.js which only supports 3 heading levels. --- src/backend/core/services/outline_import.py | 46 ++++++++++- .../services/test_outline_import_service.py | 79 ++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 0c9d98575e..16f5ecc00f 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -4,11 +4,11 @@ import io import mimetypes +import posixpath import re import uuid import zipfile from typing import Iterable -import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -19,6 +19,47 @@ from core.services.converter_services import YdocConverter +def _preprocess_outline_markdown(markdown: str) -> str: + """Pre-process Outline markdown to handle unsupported BlockNote.js elements. + + Conversions: + - H4 (####) → H3 with marker + - H5 (#####) → Bold paragraph with ▸ prefix + - H6 (######) → Paragraph with ▪ prefix + - Horizontal rules (---) → [DIVIDER] marker for post-processing + - Task lists (- [ ], - [x]) → Standard checkbox format + """ + lines = markdown.split('\n') + processed_lines = [] + + for line in lines: + # Convert H6 (######) to paragraph with prefix + if line.startswith('###### '): + processed_lines.append('▪ ' + line[7:].strip()) + # Convert H5 (#####) to bold paragraph with prefix + elif line.startswith('##### '): + processed_lines.append('**▸ ' + line[6:].strip() + '**') + # Convert H4 (####) to H3 with marker + elif line.startswith('#### '): + # Add a subtle marker to indicate this was H4 + processed_lines.append('### ' + line[5:].strip() + ' [H4]') + # Convert horizontal rules to divider marker + elif line.strip() in ['---', '***', '___'] and len(line.strip()) >= 3: + # Use a special marker that won't conflict with content + processed_lines.append('[DIVIDER_BLOCK]') + # Convert task lists to checkbox format + elif re.match(r'^\s*- \[ \]', line): + # Unchecked task + processed_lines.append(re.sub(r'^(\s*)- \[ \]', r'\1- [ ]', line)) + elif re.match(r'^\s*- \[x\]', line): + # Checked task + processed_lines.append(re.sub(r'^(\s*)- \[x\]', r'\1- [x]', line)) + else: + processed_lines.append(line) + + return '\n'.join(processed_lines) + + class OutlineImportError(Exception): """Raised when the Outline archive is invalid or unsafe.""" @@ -172,6 +213,9 @@ def replace_img_link(match: re.Match[str]) -> str: rewritten_md = img_pattern.sub(replace_img_link, raw_md) + # Pre-process markdown to handle Outline-specific content + rewritten_md = _preprocess_outline_markdown(rewritten_md) + try: ydoc_b64 = converter.convert( rewritten_md.encode("utf-8"), diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py index 7f94e63a7b..c5904f0295 100644 --- a/src/backend/core/tests/services/test_outline_import_service.py +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -7,7 +7,7 @@ import pytest from core import factories -from core.services.outline_import import OutlineImportError, process_outline_zip +from core.services.outline_import import OutlineImportError, process_outline_zip, _preprocess_outline_markdown pytestmark = pytest.mark.django_db @@ -50,3 +50,80 @@ def test_process_outline_zip_zip_slip_rejected(): with pytest.raises(OutlineImportError): process_outline_zip(user, zip_bytes) + +def test_preprocess_outline_markdown_heading_conversions(): + """Test that H4, H5, H6 are properly converted.""" + markdown = """# H1 Title +## H2 Section +### H3 Subsection +#### H4 Content +##### H5 Detail +###### H6 Note +""" + result = _preprocess_outline_markdown(markdown) + + assert "# H1 Title" in result + assert "## H2 Section" in result + assert "### H3 Subsection" in result + assert "### H4 Content [H4]" in result # H4 converted to H3 with marker + assert "**▸ H5 Detail**" in result # H5 converted to bold with arrow + assert "▪ H6 Note" in result # H6 converted to paragraph with bullet + + +def test_preprocess_outline_markdown_horizontal_rules(): + """Test that horizontal rules are converted to divider blocks.""" + markdown = """Content before +--- +Content after +*** +More content +___ +Final content""" + result = _preprocess_outline_markdown(markdown) + + assert result.count("[DIVIDER_BLOCK]") == 3 + assert "---" not in result + assert "***" not in result + assert "___" not in result + + +def test_preprocess_outline_markdown_task_lists(): + """Test that task lists are properly handled.""" + markdown = """- [ ] Unchecked task +- [x] Checked task +- Regular list item + - [ ] Nested unchecked + - [x] Nested checked""" + result = _preprocess_outline_markdown(markdown) + + assert "- [ ] Unchecked task" in result + assert "- [x] Checked task" in result + assert "- Regular list item" in result + assert " - [ ] Nested unchecked" in result + assert " - [x] Nested checked" in result + + +def test_preprocess_outline_markdown_combined(): + """Test combined conversions in a realistic document.""" + markdown = """# Main Title +## Section 1 +### Subsection +#### Deep Section +Some content here. +--- +##### Important Note +This is important. +###### Small detail +- [ ] Task to do +- [x] Completed task +""" + result = _preprocess_outline_markdown(markdown) + + assert "# Main Title" in result + assert "### Deep Section [H4]" in result + assert "[DIVIDER_BLOCK]" in result + assert "**▸ Important Note**" in result + assert "▪ Small detail" in result + assert "- [ ] Task to do" in result + assert "- [x] Completed task" in result + From 06d9c2b228a5695a2410c21a1227bddcc77700b3 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Fri, 26 Sep 2025 14:40:11 +0200 Subject: [PATCH 11/18] Revert "feat(outline-import): Add markdown preprocessing for unsupported BlockNote elements" This reverts commit b7a7663e1bf89caa921acabf5f579eecc5cff5af. --- src/backend/core/services/outline_import.py | 46 +---------- .../services/test_outline_import_service.py | 79 +------------------ 2 files changed, 2 insertions(+), 123 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 16f5ecc00f..0c9d98575e 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -4,11 +4,11 @@ import io import mimetypes -import posixpath import re import uuid import zipfile from typing import Iterable +import posixpath from django.conf import settings from django.core.files.storage import default_storage @@ -19,47 +19,6 @@ from core.services.converter_services import YdocConverter -def _preprocess_outline_markdown(markdown: str) -> str: - """Pre-process Outline markdown to handle unsupported BlockNote.js elements. - - Conversions: - - H4 (####) → H3 with marker - - H5 (#####) → Bold paragraph with ▸ prefix - - H6 (######) → Paragraph with ▪ prefix - - Horizontal rules (---) → [DIVIDER] marker for post-processing - - Task lists (- [ ], - [x]) → Standard checkbox format - """ - lines = markdown.split('\n') - processed_lines = [] - - for line in lines: - # Convert H6 (######) to paragraph with prefix - if line.startswith('###### '): - processed_lines.append('▪ ' + line[7:].strip()) - # Convert H5 (#####) to bold paragraph with prefix - elif line.startswith('##### '): - processed_lines.append('**▸ ' + line[6:].strip() + '**') - # Convert H4 (####) to H3 with marker - elif line.startswith('#### '): - # Add a subtle marker to indicate this was H4 - processed_lines.append('### ' + line[5:].strip() + ' [H4]') - # Convert horizontal rules to divider marker - elif line.strip() in ['---', '***', '___'] and len(line.strip()) >= 3: - # Use a special marker that won't conflict with content - processed_lines.append('[DIVIDER_BLOCK]') - # Convert task lists to checkbox format - elif re.match(r'^\s*- \[ \]', line): - # Unchecked task - processed_lines.append(re.sub(r'^(\s*)- \[ \]', r'\1- [ ]', line)) - elif re.match(r'^\s*- \[x\]', line): - # Checked task - processed_lines.append(re.sub(r'^(\s*)- \[x\]', r'\1- [x]', line)) - else: - processed_lines.append(line) - - return '\n'.join(processed_lines) - - class OutlineImportError(Exception): """Raised when the Outline archive is invalid or unsafe.""" @@ -213,9 +172,6 @@ def replace_img_link(match: re.Match[str]) -> str: rewritten_md = img_pattern.sub(replace_img_link, raw_md) - # Pre-process markdown to handle Outline-specific content - rewritten_md = _preprocess_outline_markdown(rewritten_md) - try: ydoc_b64 = converter.convert( rewritten_md.encode("utf-8"), diff --git a/src/backend/core/tests/services/test_outline_import_service.py b/src/backend/core/tests/services/test_outline_import_service.py index c5904f0295..7f94e63a7b 100644 --- a/src/backend/core/tests/services/test_outline_import_service.py +++ b/src/backend/core/tests/services/test_outline_import_service.py @@ -7,7 +7,7 @@ import pytest from core import factories -from core.services.outline_import import OutlineImportError, process_outline_zip, _preprocess_outline_markdown +from core.services.outline_import import OutlineImportError, process_outline_zip pytestmark = pytest.mark.django_db @@ -50,80 +50,3 @@ def test_process_outline_zip_zip_slip_rejected(): with pytest.raises(OutlineImportError): process_outline_zip(user, zip_bytes) - -def test_preprocess_outline_markdown_heading_conversions(): - """Test that H4, H5, H6 are properly converted.""" - markdown = """# H1 Title -## H2 Section -### H3 Subsection -#### H4 Content -##### H5 Detail -###### H6 Note -""" - result = _preprocess_outline_markdown(markdown) - - assert "# H1 Title" in result - assert "## H2 Section" in result - assert "### H3 Subsection" in result - assert "### H4 Content [H4]" in result # H4 converted to H3 with marker - assert "**▸ H5 Detail**" in result # H5 converted to bold with arrow - assert "▪ H6 Note" in result # H6 converted to paragraph with bullet - - -def test_preprocess_outline_markdown_horizontal_rules(): - """Test that horizontal rules are converted to divider blocks.""" - markdown = """Content before ---- -Content after -*** -More content -___ -Final content""" - result = _preprocess_outline_markdown(markdown) - - assert result.count("[DIVIDER_BLOCK]") == 3 - assert "---" not in result - assert "***" not in result - assert "___" not in result - - -def test_preprocess_outline_markdown_task_lists(): - """Test that task lists are properly handled.""" - markdown = """- [ ] Unchecked task -- [x] Checked task -- Regular list item - - [ ] Nested unchecked - - [x] Nested checked""" - result = _preprocess_outline_markdown(markdown) - - assert "- [ ] Unchecked task" in result - assert "- [x] Checked task" in result - assert "- Regular list item" in result - assert " - [ ] Nested unchecked" in result - assert " - [x] Nested checked" in result - - -def test_preprocess_outline_markdown_combined(): - """Test combined conversions in a realistic document.""" - markdown = """# Main Title -## Section 1 -### Subsection -#### Deep Section -Some content here. ---- -##### Important Note -This is important. -###### Small detail -- [ ] Task to do -- [x] Completed task -""" - result = _preprocess_outline_markdown(markdown) - - assert "# Main Title" in result - assert "### Deep Section [H4]" in result - assert "[DIVIDER_BLOCK]" in result - assert "**▸ Important Note**" in result - assert "▪ Small detail" in result - assert "- [ ] Task to do" in result - assert "- [x] Completed task" in result - From 68e58b24c249ee20c78ed98225bf27711a155197 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:01:33 +0200 Subject: [PATCH 12/18] fix(outline-import): Fix CSRF token and nested documents handling - Add CSRF token to Outline import upload request - Fix content save by removing invalid update_fields parameter - Handle nested documents properly to avoid duplicates when a document has child documents (e.g., Doc.md with Doc/ directory) --- src/backend/core/services/outline_import.py | 18 +++++++++++++++++- .../impress/src/pages/import/outline/index.tsx | 6 +++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 0c9d98575e..39aec3b652 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -114,6 +114,16 @@ def process_outline_zip(user, zip_bytes: bytes) -> list[str]: ] ) + # Build a set of md files that have corresponding directories (Outline nested docs) + # e.g., "Doc.md" and "Doc/" both exist -> "Doc" is a parent with nested children + md_with_dirs: set[str] = set() + for md_path in md_files: + # Remove .md extension to get potential directory name + base_path = md_path.rsplit(".md", 1)[0] + # Check if there's a directory with the same name + if any(n.startswith(f"{base_path}/") for n in archive.namelist()): + md_with_dirs.add(base_path) + img_pattern = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") def read_bytes(path_in_zip: str) -> bytes | None: @@ -149,6 +159,12 @@ def read_bytes(path_in_zip: str) -> bytes | None: else: doc = parent_doc.add_child(creator=user, title=title) + # If this md file has a corresponding directory, register it as a container + # so nested children will use this doc as parent instead of creating a duplicate + base_path = md_path.rsplit(".md", 1)[0] + if base_path in md_with_dirs: + dir_docs[base_path] = doc + models.DocumentAccess.objects.update_or_create( document=doc, user=user, @@ -179,7 +195,7 @@ def replace_img_link(match: re.Match[str]) -> str: accept="application/vnd.yjs.doc", ) doc.content = ydoc_b64 - doc.save(update_fields=["content", "updated_at"]) + doc.save() except Exception: # noqa: BLE001 # Keep doc without content on conversion error but continue import pass diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index f47f427288..eaac62d029 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -4,7 +4,7 @@ import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { Box, Text } from '@/components'; -import { baseApiUrl } from '@/api'; +import { baseApiUrl, getCSRFToken } from '@/api'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -26,10 +26,14 @@ const Page: NextPageWithLayout = () => { try { const form = new FormData(); form.append('file', file); + const csrfToken = getCSRFToken(); const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { method: 'POST', body: form, credentials: 'include', + headers: { + ...(csrfToken && { 'X-CSRFToken': csrfToken }), + }, }); if (!resp.ok) { throw new Error(await resp.text()); From 538c6413e9690c3580a328b50263ad21a9f47b9c Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:18:35 +0200 Subject: [PATCH 13/18] Cleanup imports --- src/backend/core/api/viewsets.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1fe636fc53..685ab091a5 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -3,8 +3,6 @@ import base64 import json -import io -import zipfile import logging import uuid from collections import defaultdict @@ -50,7 +48,6 @@ YdocConverter, ) from core.tasks.mail import send_ask_for_access_mail -from core.services.outline_import import OutlineImportError, process_outline_zip from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -2202,5 +2199,3 @@ def _load_theme_customization(self): theme_customization, settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - - return theme_customization \ No newline at end of file From 619b6243211d8d1276942a36c8dc5cc12048085a Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:19:44 +0200 Subject: [PATCH 14/18] Fix import outline --- src/backend/core/api/viewsets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 685ab091a5..f5ee2ab3a7 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2199,3 +2199,5 @@ def _load_theme_customization(self): theme_customization, settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) + + return theme_customization \ No newline at end of file From e1f5a13a3bf40cea119eaae78e901e8f23cab075 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 12 Oct 2025 17:20:40 +0200 Subject: [PATCH 15/18] add new line --- src/backend/core/api/viewsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index f5ee2ab3a7..1fb95c4eb6 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -2200,4 +2200,4 @@ def _load_theme_customization(self): settings.THEME_CUSTOMIZATION_CACHE_TIMEOUT, ) - return theme_customization \ No newline at end of file + return theme_customization From 7d6f0559b5c1b1b9ad5491012fec98cca9fca601 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Mon, 13 Oct 2025 10:17:31 +0200 Subject: [PATCH 16/18] es-lint fixes --- .../impress/src/pages/import/outline/index.tsx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index eaac62d029..bd0911e330 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -3,8 +3,8 @@ import { useRouter } from 'next/router'; import { ReactElement, useState } from 'react'; import { useTranslation } from 'react-i18next'; -import { Box, Text } from '@/components'; import { baseApiUrl, getCSRFToken } from '@/api'; +import { Box, Text } from '@/components'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; @@ -45,7 +45,7 @@ const Page: NextPageWithLayout = () => { } else { void router.replace('/'); } - } catch (e) { + } catch { setError(t('Something bad happens, please retry.')); } finally { setIsUploading(false); @@ -53,7 +53,12 @@ const Page: NextPageWithLayout = () => { }; return ( - + {t('Import Outline archive')} @@ -64,7 +69,12 @@ const Page: NextPageWithLayout = () => { onChange={(e) => setFile(e.target.files?.[0] ?? null)} aria-label={t('Select a .zip file')} /> - + From 1d65ca310d5db52d8ded2224070499ea61c2f1c1 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 29 Nov 2025 06:20:07 +0100 Subject: [PATCH 17/18] fix(outline-import): Address PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use python-magic instead of mimetypes for reliable MIME detection - Set attachment status to PROCESSING instead of READY (pending malware scan) - Create DocumentAccess only for root documents (children inherit access) - Reorganize imports (stdlib, third-party, django, local) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/backend/core/services/outline_import.py | 44 ++++++++++++--------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 39aec3b652..500295696f 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -3,15 +3,17 @@ from __future__ import annotations import io -import mimetypes +import posixpath import re import uuid import zipfile from typing import Iterable -import posixpath + +import magic from django.conf import settings from django.core.files.storage import default_storage +from django.db import transaction from lasuite.malware_detection import malware_detection @@ -47,14 +49,13 @@ def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Docume title=part, link_reach=models.LinkReachChoices.RESTRICTED, ) + models.DocumentAccess.objects.create( + document=doc, + user=user, + role=models.RoleChoices.OWNER, + ) else: doc = parent.add_child(creator=user, title=part) - - models.DocumentAccess.objects.update_or_create( - document=doc, - user=user, - defaults={"role": models.RoleChoices.OWNER}, - ) dir_docs[current] = doc parent = doc @@ -63,14 +64,15 @@ def _ensure_dir_documents(user, dir_path: str, dir_docs: dict[str, models.Docume def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> str: """Upload a binary asset into object storage and return its public media URL.""" - content_type, _ = mimetypes.guess_type(arcname) + mime = magic.Magic(mime=True) + content_type = mime.from_buffer(data[:1024]) if data else None ext = (arcname.split(".")[-1] or "bin").lower() file_id = uuid.uuid4() key = f"{doc.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.{ext}" extra_args = { "Metadata": { "owner": str(user.id), - "status": enums.DocumentAttachmentStatus.READY, + "status": enums.DocumentAttachmentStatus.PROCESSING, }, } if content_type: @@ -85,21 +87,26 @@ def _upload_attachment(user, doc: models.Document, arcname: str, data: bytes) -> return f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" +@transaction.atomic def process_outline_zip(user, zip_bytes: bytes) -> list[str]: """Process an Outline export zip and create Docs documents. + This function runs within an atomic transaction, ensuring that either all documents + are created successfully or none are (rollback on any error). + Returns the list of created document IDs (stringified UUIDs) corresponding to markdown-backed documents. Container folders used to rebuild hierarchy are not listed. """ archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) - # Basic Zip Slip protection: refuse absolute or parent-traversal entries + # Basic Zip Slip protection: refuse paths that escape the archive root for name in archive.namelist(): # Normalize to posix separators and check traversal if name.startswith("/") or "\\" in name: raise OutlineImportError("Unsafe path in archive") - parts = [p for p in name.split("/") if p] - if any(part == ".." for part in parts): + # Normalize the path and check if it escapes the root after normalization + normalized = posixpath.normpath(name) + if normalized.startswith("..") or normalized.startswith("/"): raise OutlineImportError("Unsafe path in archive") created_ids: list[str] = [] @@ -156,6 +163,11 @@ def read_bytes(path_in_zip: str) -> bytes | None: title=title, link_reach=models.LinkReachChoices.RESTRICTED, ) + models.DocumentAccess.objects.create( + document=doc, + user=user, + role=models.RoleChoices.OWNER, + ) else: doc = parent_doc.add_child(creator=user, title=title) @@ -165,12 +177,6 @@ def read_bytes(path_in_zip: str) -> bytes | None: if base_path in md_with_dirs: dir_docs[base_path] = doc - models.DocumentAccess.objects.update_or_create( - document=doc, - user=user, - defaults={"role": models.RoleChoices.OWNER}, - ) - def replace_img_link(match: re.Match[str]) -> str: url = match.group(1) if url.startswith("http://") or url.startswith("https://"): From be6a2cb989f77d94ea2922698ecf9f16ee1d4a33 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sat, 29 Nov 2025 12:13:50 +0100 Subject: [PATCH 18/18] fix(outline-import): Add async processing, improve UI, and address PR review - Add SessionAuthentication to upload endpoint - Add trailing slash to upload URL for Django compatibility - Register outline_import task in celery autodiscover - Add migration merge for comments feature conflict - Improve import UI with drag-drop, progress states, and translations - Add logging for markdown conversion failures --- src/backend/core/api/imports.py | 79 +++- src/backend/core/api/serializers.py | 41 ++ src/backend/core/malware_detection.py | 46 +- ...e_0025_outline_import_job_0026_comments.py | 14 + src/backend/core/models.py | 52 +++ src/backend/core/services/outline_import.py | 7 +- src/backend/core/urls.py | 9 +- src/backend/impress/celery_app.py | 5 +- .../apps/impress/src/i18n/translations.json | 32 ++ .../src/pages/import/outline/index.tsx | 399 +++++++++++++++--- 10 files changed, 592 insertions(+), 92 deletions(-) create mode 100644 src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py diff --git a/src/backend/core/api/imports.py b/src/backend/core/api/imports.py index 174c9b8f95..2aa764a778 100644 --- a/src/backend/core/api/imports.py +++ b/src/backend/core/api/imports.py @@ -2,41 +2,76 @@ from __future__ import annotations -import io -import zipfile +import uuid import rest_framework as drf +from django.core.files.storage import default_storage +from django.db import transaction +from django.urls import reverse -from core.services.outline_import import OutlineImportError, process_outline_zip +from lasuite.malware_detection import malware_detection + +from core import models +from core.api.serializers import OutlineImportSerializer # ---------- Outline (Zip Upload) ---------- class OutlineImportUploadView(drf.views.APIView): + """ + Upload an Outline export zip file for asynchronous processing. + + This endpoint: + 1. Validates the uploaded zip file + 2. Saves it to S3 storage + 3. Creates an OutlineImportJob to track the import + 4. Triggers malware scanning of the zip + 5. Returns a polling URL for checking import status + + The actual import processing happens asynchronously after malware scanning. + """ + + authentication_classes = [drf.authentication.SessionAuthentication] parser_classes = [drf.parsers.MultiPartParser] permission_classes = [drf.permissions.IsAuthenticated] def post(self, request): - uploaded = request.FILES.get("file") - if not uploaded: - raise drf.exceptions.ValidationError({"file": "File is required"}) - - name = getattr(uploaded, "name", "") - if not name.endswith(".zip"): - raise drf.exceptions.ValidationError({"file": "Must be a .zip file"}) - - try: - content = uploaded.read() - # Fail fast if the upload is not a valid zip archive - with zipfile.ZipFile(io.BytesIO(content)): - pass - created_ids = process_outline_zip(request.user, content) - except zipfile.BadZipFile as exc: - raise drf.exceptions.ValidationError({"file": "Invalid zip archive"}) from exc - except OutlineImportError as exc: - raise drf.exceptions.ValidationError({"file": str(exc)}) from exc + # Validate the uploaded file + serializer = OutlineImportSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + + uploaded_file = serializer.validated_data["file"] + + # Generate S3 key for the zip file + file_id = uuid.uuid4() + key = f"imports/outline/{request.user.id}/{file_id}.zip" + + # Save the zip file to S3 + default_storage.save(key, uploaded_file) + + # Create import job and trigger malware scan in a transaction + with transaction.atomic(): + job = models.OutlineImportJob.objects.create( + user=request.user, + zip_file_key=key, + status=models.OutlineImportJob.Status.PENDING, + ) + + # Trigger malware scan of the zip file + # The callback will trigger the import task if the file is safe + transaction.on_commit( + lambda: malware_detection.analyse_file(key, import_job_id=str(job.id)) + ) + + # Return job info and polling URL + status_url = reverse("outline-import-job-detail", kwargs={"pk": job.id}) return drf.response.Response( - {"created_document_ids": created_ids}, status=drf.status.HTTP_201_CREATED + { + "job_id": str(job.id), + "status": job.status, + "status_url": request.build_absolute_uri(status_url), + }, + status=drf.status.HTTP_202_ACCEPTED, # 202 Accepted for async processing ) diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 47754efe46..650beb24a2 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -1013,3 +1013,44 @@ def get_abilities(self, thread): if request: return thread.get_abilities(request.user) return {} + + +class OutlineImportSerializer(serializers.Serializer): + """Serializer for validating Outline export zip uploads.""" + + file = serializers.FileField() + + def validate_file(self, file): + """Validate that the uploaded file is a valid zip archive.""" + name = getattr(file, "name", "") + if not name.endswith(".zip"): + raise serializers.ValidationError("Must be a .zip file") + + # Validate it's actually a valid zip file by attempting to open it + try: + import io + import zipfile + + content = file.read() + file.seek(0) # Reset file pointer after reading + zipfile.ZipFile(io.BytesIO(content)) + except zipfile.BadZipFile as exc: + raise serializers.ValidationError("Invalid zip archive") from exc + + return file + + +class OutlineImportJobSerializer(serializers.ModelSerializer): + """Serializer for Outline import job status.""" + + class Meta: + model = models.OutlineImportJob + fields = [ + "id", + "status", + "created_document_ids", + "error_message", + "created_at", + "updated_at", + ] + read_only_fields = fields diff --git a/src/backend/core/malware_detection.py b/src/backend/core/malware_detection.py index 9b1ef3a726..783fa87a02 100644 --- a/src/backend/core/malware_detection.py +++ b/src/backend/core/malware_detection.py @@ -3,11 +3,12 @@ import logging from django.core.files.storage import default_storage +from django.db import transaction from lasuite.malware_detection.enums import ReportStatus from core.enums import DocumentAttachmentStatus -from core.models import Document +from core.models import Document, OutlineImportJob logger = logging.getLogger(__name__) security_logger = logging.getLogger("docs.security") @@ -16,6 +17,13 @@ def malware_detection_callback(file_path, status, error_info, **kwargs): """Malware detection callback""" + # Handle Outline import jobs + import_job_id = kwargs.get("import_job_id") + if import_job_id: + _handle_outline_import_scan(import_job_id, file_path, status, error_info) + return + + # Handle regular document attachments if status == ReportStatus.SAFE: logger.info("File %s is safe", file_path) # Get existing metadata @@ -50,3 +58,39 @@ def malware_detection_callback(file_path, status, error_info, **kwargs): # Delete the file from the storage default_storage.delete(file_path) + + +def _handle_outline_import_scan(job_id, file_path, status, error_info): + """Handle malware scan result for Outline import zip files.""" + from core.tasks.outline_import import process_outline_import_task + + try: + job = OutlineImportJob.objects.get(id=job_id) + except OutlineImportJob.DoesNotExist: + logger.error("OutlineImportJob %s not found for malware callback", job_id) + return + + if status == ReportStatus.SAFE: + logger.info("Outline import zip %s is safe, triggering import task", file_path) + job.status = OutlineImportJob.Status.SCANNING + job.save(update_fields=["status", "updated_at"]) + + # Trigger the import task after the current transaction commits + # This ensures the status update is visible to the task + transaction.on_commit(lambda: process_outline_import_task.delay(str(job.id))) + else: + security_logger.warning( + "Outline import zip %s contains malware. Job %s marked as failed. Error: %s", + file_path, + job_id, + error_info, + ) + job.status = OutlineImportJob.Status.FAILED + job.error_message = f"Malware detected in uploaded file: {error_info}" + job.save(update_fields=["status", "error_message", "updated_at"]) + + # Delete the infected zip file + try: + default_storage.delete(file_path) + except Exception as e: + logger.warning("Failed to delete infected zip file %s: %s", file_path, e) diff --git a/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py b/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py new file mode 100644 index 0000000000..3c3aa02f8a --- /dev/null +++ b/src/backend/core/migrations/0027_merge_0025_outline_import_job_0026_comments.py @@ -0,0 +1,14 @@ +# Generated by Django 5.2.7 on 2025-11-29 06:18 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0025_outline_import_job'), + ('core', '0026_comments'), + ] + + operations = [ + ] diff --git a/src/backend/core/models.py b/src/backend/core/models.py index c17d3ec449..8b9461af8d 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -1662,3 +1662,55 @@ def get_abilities(self, user): "partial_update": is_admin_or_owner, "retrieve": is_admin_or_owner, } + + +class OutlineImportJob(BaseModel): + """Track async Outline import jobs.""" + + class Status(models.TextChoices): + PENDING = "pending", _("Pending") + SCANNING = "scanning", _("Scanning") + PROCESSING = "processing", _("Processing") + COMPLETED = "completed", _("Completed") + FAILED = "failed", _("Failed") + + user = models.ForeignKey( + User, + verbose_name=_("user"), + help_text=_("user who initiated the import"), + on_delete=models.CASCADE, + related_name="outline_import_jobs", + ) + status = models.CharField( + verbose_name=_("status"), + help_text=_("current status of the import job"), + max_length=20, + choices=Status.choices, + default=Status.PENDING, + ) + zip_file_key = models.CharField( + verbose_name=_("zip file key"), + help_text=_("S3 key of the uploaded zip file"), + max_length=255, + ) + created_document_ids = ArrayField( + models.UUIDField(), + verbose_name=_("created document IDs"), + help_text=_("list of document IDs created during import"), + default=list, + blank=True, + ) + error_message = models.TextField( + verbose_name=_("error message"), + help_text=_("error message if import failed"), + blank=True, + ) + + class Meta: + db_table = "core_outline_import_job" + verbose_name = _("Outline import job") + verbose_name_plural = _("Outline import jobs") + ordering = ["-created_at"] + + def __str__(self): + return f"OutlineImportJob {self.id} ({self.status})" diff --git a/src/backend/core/services/outline_import.py b/src/backend/core/services/outline_import.py index 500295696f..d730620031 100644 --- a/src/backend/core/services/outline_import.py +++ b/src/backend/core/services/outline_import.py @@ -202,9 +202,12 @@ def replace_img_link(match: re.Match[str]) -> str: ) doc.content = ydoc_b64 doc.save() - except Exception: # noqa: BLE001 + except Exception as e: # noqa: BLE001 # Keep doc without content on conversion error but continue import - pass + import logging + logging.getLogger(__name__).warning( + "Failed to convert markdown for document %s: %s", doc.id, e + ) created_ids.append(str(doc.id)) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 878eae5e86..4bba64de85 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -8,12 +8,18 @@ from core.api import viewsets from core.api import imports as import_views +from core.api import import_viewsets # - Main endpoints router = DefaultRouter() router.register("templates", viewsets.TemplateViewSet, basename="templates") router.register("documents", viewsets.DocumentViewSet, basename="documents") router.register("users", viewsets.UserViewSet, basename="users") +router.register( + "imports/outline/jobs", + import_viewsets.OutlineImportJobViewSet, + basename="outline-import-job", +) # - Routes nested under a document document_related_router = DefaultRouter() @@ -62,8 +68,9 @@ include(thread_related_router.urls), ), path( - "imports/outline/upload", + "imports/outline/upload/", import_views.OutlineImportUploadView.as_view(), + name="outline-import-upload", ), ] ), diff --git a/src/backend/impress/celery_app.py b/src/backend/impress/celery_app.py index e38c57071c..ccf9abba4e 100644 --- a/src/backend/impress/celery_app.py +++ b/src/backend/impress/celery_app.py @@ -23,4 +23,7 @@ app.config_from_object("django.conf:settings", namespace="CELERY") # Load task modules from all registered Django apps. -app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) +# autodiscover_tasks looks for "tasks.py" in each app by default +# We also need to discover tasks in subdirectories like core/tasks/ +app.autodiscover_tasks(lambda: settings.INSTALLED_APPS + ["core.tasks"], related_name="mail") +app.autodiscover_tasks(lambda: ["core.tasks"], related_name="outline_import") diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 1f5083ac74..0790147d6f 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -533,6 +533,22 @@ "Import Outline archive": "Import Outline archive", "Select a .zip file": "Select a .zip file", "Import": "Import", + "Upload your Outline export (.zip) to import your documents": "Upload your Outline export (.zip) to import your documents", + "Drag and drop your file here": "Drag and drop your file here", + "or click to browse": "or click to browse", + "Accepts .zip files only": "Accepts .zip files only", + "Click or drag to select a file": "Click or drag to select a file", + "Uploading archive...": "Uploading archive...", + "Preparing import...": "Preparing import...", + "Scanning for security...": "Scanning for security...", + "Creating documents...": "Creating documents...", + "Import completed! Redirecting...": "Import completed! Redirecting...", + "Import is taking too long. Please try again.": "Import is taking too long. Please try again.", + "Import failed. Please try again.": "Import failed. Please try again.", + "Failed to check import status. Please try again.": "Failed to check import status. Please try again.", + "Something went wrong. Please try again.": "Something went wrong. Please try again.", + "Importing...": "Importing...", + "Cancel": "Cancel", "days_many": "days", "days_one": "day", "days_other": "days" @@ -885,6 +901,22 @@ "Import Outline archive": "Importer une archive Outline", "Select a .zip file": "Sélectionnez un fichier .zip", "Import": "Importer", + "Upload your Outline export (.zip) to import your documents": "Téléversez votre export Outline (.zip) pour importer vos documents", + "Drag and drop your file here": "Glissez-déposez votre fichier ici", + "or click to browse": "ou cliquez pour parcourir", + "Accepts .zip files only": "Accepte uniquement les fichiers .zip", + "Click or drag to select a file": "Cliquez ou glissez pour sélectionner un fichier", + "Uploading archive...": "Téléversement de l'archive...", + "Preparing import...": "Préparation de l'import...", + "Scanning for security...": "Analyse de sécurité...", + "Creating documents...": "Création des documents...", + "Import completed! Redirecting...": "Import terminé ! Redirection...", + "Import is taking too long. Please try again.": "L'import prend trop de temps. Veuillez réessayer.", + "Import failed. Please try again.": "L'import a échoué. Veuillez réessayer.", + "Failed to check import status. Please try again.": "Impossible de vérifier le statut de l'import. Veuillez réessayer.", + "Something went wrong. Please try again.": "Une erreur s'est produite. Veuillez réessayer.", + "Importing...": "Import en cours...", + "Cancel": "Annuler", "Open the menu of actions for the document: {{title}}": "Ouvrir le menu des actions du document : {{title}}", "Open the sharing settings for the document": "Ouvrir les paramètres de partage pour le document", "Organize": "Organiser", diff --git a/src/frontend/apps/impress/src/pages/import/outline/index.tsx b/src/frontend/apps/impress/src/pages/import/outline/index.tsx index bd0911e330..538ef2f315 100644 --- a/src/frontend/apps/impress/src/pages/import/outline/index.tsx +++ b/src/frontend/apps/impress/src/pages/import/outline/index.tsx @@ -1,92 +1,361 @@ -import { Button, Loader } from '@openfun/cunningham-react'; +import { Alert, Button, Loader, VariantType } from '@openfun/cunningham-react'; +import Head from 'next/head'; import { useRouter } from 'next/router'; -import { ReactElement, useState } from 'react'; +import { ReactElement, useCallback, useRef, useState } from 'react'; import { useTranslation } from 'react-i18next'; +import styled, { css } from 'styled-components'; -import { baseApiUrl, getCSRFToken } from '@/api'; -import { Box, Text } from '@/components'; +import { fetchAPI } from '@/api'; +import { Box, Card, Icon, Text, TextErrors } from '@/components'; +import { useCunninghamTheme } from '@/cunningham'; import { MainLayout } from '@/layouts'; import { NextPageWithLayout } from '@/types/next'; +type ImportStatus = + | 'idle' + | 'uploading' + | 'pending' + | 'scanning' + | 'processing' + | 'completed' + | 'failed'; + +interface ImportJob { + id: string; + status: string; + created_document_ids: string[]; + error_message: string; +} + +const DropZone = styled(Box)<{ $isDragging: boolean; $hasFile: boolean }>` + border: 2px dashed + ${({ $isDragging, $hasFile, theme }) => + $isDragging + ? 'var(--c--theme--colors--primary-500)' + : $hasFile + ? 'var(--c--theme--colors--success-500)' + : 'var(--c--theme--colors--greyscale-300)'}; + border-radius: 8px; + transition: all 0.2s ease; + cursor: pointer; + + &:hover { + border-color: var(--c--theme--colors--primary-400); + background-color: var(--c--theme--colors--primary-100); + } + + ${({ $isDragging }) => + $isDragging && + css` + background-color: var(--c--theme--colors--primary-100); + `} + + ${({ $hasFile }) => + $hasFile && + css` + background-color: var(--c--theme--colors--success-100); + `} +`; + +const HiddenInput = styled.input` + display: none; +`; + const Page: NextPageWithLayout = () => { const { t } = useTranslation(); const router = useRouter(); + const { colorsTokens } = useCunninghamTheme(); + const fileInputRef = useRef(null); + const [file, setFile] = useState(null); - const [isUploading, setIsUploading] = useState(false); + const [isDragging, setIsDragging] = useState(false); + const [status, setStatus] = useState('idle'); const [error, setError] = useState(null); - const onSubmit = async (e: React.FormEvent) => { - e.preventDefault(); + const pollJobStatus = useCallback( + async (id: string) => { + const maxAttempts = 120; // 2 minutes max + let attempts = 0; + + const poll = async (): Promise => { + attempts++; + if (attempts > maxAttempts) { + setError(t('Import is taking too long. Please try again.')); + setStatus('failed'); + return; + } + + try { + const response = await fetchAPI(`imports/outline/jobs/${id}/`); + if (!response.ok) { + throw new Error('Failed to fetch job status'); + } + + const job = (await response.json()) as ImportJob; + + if (job.status === 'completed') { + setStatus('completed'); + const firstDocId = job.created_document_ids?.[0]; + if (firstDocId) { + void router.replace(`/docs/${firstDocId}`); + } else { + void router.replace('/'); + } + return; + } + + if (job.status === 'failed') { + setError(job.error_message || t('Import failed. Please try again.')); + setStatus('failed'); + return; + } + + // Update status based on job status + if (job.status === 'scanning') { + setStatus('scanning'); + } else if (job.status === 'processing') { + setStatus('processing'); + } else { + setStatus('pending'); + } + + // Continue polling + setTimeout(() => void poll(), 1000); + } catch { + setError(t('Failed to check import status. Please try again.')); + setStatus('failed'); + } + }; + + await poll(); + }, + [router, t], + ); + + const handleUpload = async () => { + if (!file) return; + setError(null); - if (!file) { - setError(t('Please select a .zip file')); - return; - } - setIsUploading(true); + setStatus('uploading'); + try { const form = new FormData(); form.append('file', file); - const csrfToken = getCSRFToken(); - const resp = await fetch(`${baseApiUrl('1.0')}imports/outline/upload`, { + + const response = await fetchAPI('imports/outline/upload/', { method: 'POST', body: form, - credentials: 'include', - headers: { - ...(csrfToken && { 'X-CSRFToken': csrfToken }), - }, + withoutContentType: true, }); - if (!resp.ok) { - throw new Error(await resp.text()); - } - const data = (await resp.json()) as { created_document_ids: string[] }; - const first = data.created_document_ids?.[0]; - if (first) { - void router.replace(`/docs/${first}`); - } else { - void router.replace('/'); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData?.file?.[0] || 'Upload failed'); } - } catch { - setError(t('Something bad happens, please retry.')); - } finally { - setIsUploading(false); + + const data = (await response.json()) as { job_id: string; status: string }; + setStatus('pending'); + + // Start polling for job status + await pollJobStatus(data.job_id); + } catch (err) { + setError( + err instanceof Error + ? err.message + : t('Something went wrong. Please try again.'), + ); + setStatus('failed'); } }; + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(true); + }; + + const handleDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + }; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + const droppedFile = e.dataTransfer.files[0]; + if (droppedFile?.name.endsWith('.zip')) { + setFile(droppedFile); + setError(null); + setStatus('idle'); + } else { + setError(t('Please select a .zip file')); + } + }; + + const handleFileSelect = (e: React.ChangeEvent) => { + const selectedFile = e.target.files?.[0]; + if (selectedFile) { + setFile(selectedFile); + setError(null); + setStatus('idle'); + } + }; + + const handleZoneClick = () => { + fileInputRef.current?.click(); + }; + + const getStatusMessage = () => { + switch (status) { + case 'uploading': + return t('Uploading archive...'); + case 'pending': + return t('Preparing import...'); + case 'scanning': + return t('Scanning for security...'); + case 'processing': + return t('Creating documents...'); + case 'completed': + return t('Import completed! Redirecting...'); + default: + return null; + } + }; + + const isProcessing = ['uploading', 'pending', 'scanning', 'processing', 'completed'].includes( + status, + ); + return ( - - - {t('Import Outline archive')} - -
- setFile(e.target.files?.[0] ?? null)} - aria-label={t('Select a .zip file')} - /> - - - {isUploading && } - - {error && ( - - {error} - - )} -
-
+ <> + + {t('Import from Outline')} - {t('Docs')} + + + + + {/* Header */} + + + + {t('Import from Outline')} + + + {t('Upload your Outline export (.zip) to import your documents')} + + + + {/* Drop Zone */} + + + + {file ? ( + <> + + {file.name} + + {(file.size / 1024 / 1024).toFixed(2)} MB + + + ) : ( + <> + + {t('Drag and drop your file here')} + + {t('or click to browse')} + + + {t('Accepts .zip files only')} + + + )} + + + {/* Status */} + {isProcessing && ( + + + + {getStatusMessage()} + + + )} + + {/* Error */} + {error && status === 'failed' && ( + + )} + + {/* Success */} + {status === 'completed' && ( + + {t('Import completed! Redirecting...')} + + )} + + {/* Actions */} + + + + + + + + ); };