Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to

## [Unreleased]

### Added

- ✨ Import of documents #7765

### Changed

- ♿(frontend) improve accessibility:
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ logs: ## display app-dev logs (follow mode)
.PHONY: logs

run-backend: ## Start only the backend application and all needed services
@$(COMPOSE) up --force-recreate -d docspec
@$(COMPOSE) up --force-recreate -d celery-dev
@$(COMPOSE) up --force-recreate -d y-provider-development
@$(COMPOSE) up --force-recreate -d nginx
Expand Down
5 changes: 5 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,8 @@ services:
kc_postgresql:
condition: service_healthy
restart: true

docspec:
image: ghcr.io/docspecio/api:2.4.4
ports:
- "4000:4000"
1 change: 1 addition & 0 deletions docs/env.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ These are the environment variables you can set for the `impress-backend` contai
| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] |
| Y_PROVIDER_API_BASE_URL | Y Provider url | |
| Y_PROVIDER_API_KEY | Y provider API key | |
| DOCSPEC_API_URL | URL to endpoint of DocSpec conversion API | |


## impress-frontend image
Expand Down
4 changes: 3 additions & 1 deletion env.d/development/common
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,7 @@ DJANGO_SERVER_TO_SERVER_API_TOKENS=server-api-token
Y_PROVIDER_API_BASE_URL=http://y-provider-development:4444/api/
Y_PROVIDER_API_KEY=yprovider-api-key

DOCSPEC_API_URL=http://docspec:4000/conversion

# Theme customization
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
2 changes: 1 addition & 1 deletion env.d/development/common.e2e
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Y_PROVIDER_API_BASE_URL=http://y-provider:4444/api/

# Throttle
API_DOCUMENT_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
9 changes: 7 additions & 2 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from rest_framework import serializers

from core import choices, enums, models, utils, validators
from core.services import mime_types
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
ConversionError,
YdocConverter,
Converter,
)


Expand Down Expand Up @@ -188,6 +189,7 @@ class DocumentSerializer(ListDocumentSerializer):

content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True)

class Meta:
model = models.Document
Expand All @@ -204,6 +206,7 @@ class Meta:
"deleted_at",
"depth",
"excerpt",
"file",
"is_favorite",
"link_role",
"link_reach",
Expand Down Expand Up @@ -461,7 +464,9 @@ def create(self, validated_data):
language = user.language or language

try:
document_content = YdocConverter().convert(validated_data["content"])
document_content = Converter().convert(
validated_data["content"], mime_types.MARKDOWN, mime_types.YJS
)
except ConversionError as err:
raise serializers.ValidationError(
{"content": ["Could not convert content"]}
Expand Down
40 changes: 32 additions & 8 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,18 @@
from rest_framework.permissions import AllowAny

from core import authentication, choices, enums, models
from core.services import mime_types
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ServiceUnavailableError as YProviderServiceUnavailableError,
ConversionError,
Converter,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
YdocConverter,
ValidationError as YProviderValidationError,
)
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants
Expand Down Expand Up @@ -504,6 +506,28 @@ def perform_create(self, serializer):
"IN SHARE ROW EXCLUSIVE MODE;"
)

# Remove file from validated_data as it's not a model field
# Process it if present
uploaded_file = serializer.validated_data.pop("file", None)

# If a file is uploaded, convert it to Yjs format and set as content
if uploaded_file:
try:
file_content = uploaded_file.read()

converter = Converter()
converted_content = converter.convert(
file_content,
content_type=uploaded_file.content_type,
accept=mime_types.YJS,
)
serializer.validated_data["content"] = converted_content
serializer.validated_data["title"] = uploaded_file.name
except ConversionError as err:
raise drf.exceptions.ValidationError(
{"file": ["Could not convert file content"]}
) from err

obj = models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
Expand Down Expand Up @@ -1603,14 +1627,14 @@ def content(self, request, pk=None):
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
yprovider = Converter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
mime_types.YJS,
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
"markdown": mime_types.MARKDOWN,
"html": mime_types.HTML,
"json": mime_types.JSON,
}[content_format],
)
content = result
Expand Down
81 changes: 73 additions & 8 deletions src/backend/core/services/converter_services.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Y-Provider API services."""

import typing
from base64 import b64encode

from django.conf import settings

import requests

from core.services import mime_types


class ConversionError(Exception):
"""Base exception for conversion-related errors."""
Expand All @@ -19,8 +22,72 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""


class ConverterProtocol(typing.Protocol):
"""Protocol for converter classes."""

def convert(self, text, content_type, accept):
"""Convert content from one format to another."""


class Converter:
"""Orchestrates conversion between different formats using specialized converters."""

docspec: ConverterProtocol
ydoc: ConverterProtocol

def __init__(self):
self.docspec = DocSpecConverter()
self.ydoc = YdocConverter()

def convert(self, data, content_type, accept):
"""Convert input into other formats using external microservices."""

if content_type == mime_types.DOCX and accept == mime_types.YJS:
return self.convert(
self.docspec.convert(data, mime_types.DOCX, mime_types.BLOCKNOTE),
mime_types.BLOCKNOTE,
mime_types.YJS,
)

return self.ydoc.convert(data, content_type, accept)


class DocSpecConverter:
"""Service class for DocSpec conversion-related operations."""

def _request(self, url, data, content_type):
"""Make a request to the DocSpec API."""

response = requests.post(
url,
headers={"Accept": mime_types.BLOCKNOTE},
files={"file": ("document.docx", data, content_type)},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see this settings defined in the settings.py module.
I think in the Production class it mshould be set to True without ability to modify it (don't use django configuration in this case)

)
response.raise_for_status()
return response

def convert(self, data, content_type, accept):
"""Convert a Document to BlockNote."""
if not data:
raise ValidationError("Input data cannot be empty")

if content_type != mime_types.DOCX or accept != mime_types.BLOCKNOTE:
raise ValidationError(
f"Conversion from {content_type} to {accept} is not supported."
)

try:
return self._request(settings.DOCSPEC_API_URL, data, content_type).content
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to DocSpec conversion service",
) from err


class YdocConverter:
"""Service class for conversion-related operations."""
"""Service class for YDoc conversion-related operations."""

@property
def auth_header(self):
Expand All @@ -44,9 +111,7 @@ def _request(self, url, data, content_type, accept):
response.raise_for_status()
return response

def convert(
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
):
def convert(self, text, content_type=mime_types.MARKDOWN, accept=mime_types.YJS):
"""Convert a Markdown text into our internal format using an external microservice."""

if not text:
Expand All @@ -59,14 +124,14 @@ def convert(
content_type,
accept,
)
if accept == "application/vnd.yjs.doc":
if accept == mime_types.YJS:
return b64encode(response.content).decode("utf-8")
if accept in {"text/markdown", "text/html"}:
if accept in {mime_types.MARKDOWN, "text/html"}:
return response.text
if accept == "application/json":
if accept == mime_types.JSON:
return response.json()
raise ValidationError("Unsupported format")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
f"Failed to connect to YDoc conversion service {content_type}, {accept}",
) from err
8 changes: 8 additions & 0 deletions src/backend/core/services/mime_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""MIME type constants for document conversion."""

BLOCKNOTE = "application/vnd.blocknote+json"
YJS = "application/vnd.yjs.doc"
MARKDOWN = "text/markdown"
JSON = "application/json"
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
HTML = "text/html"
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from core import factories
from core.api.serializers import ServerCreateDocumentSerializer
from core.models import Document, Invitation, User
from core.services import mime_types
from core.services.converter_services import ConversionError, YdocConverter

pytestmark = pytest.mark.django_db
Expand Down Expand Up @@ -191,7 +192,9 @@ def test_api_documents_create_for_owner_existing(mock_convert_md):

assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

document = Document.objects.get()
assert response.json() == {"id": str(document.id)}
Expand Down Expand Up @@ -236,7 +239,9 @@ def test_api_documents_create_for_owner_new_user(mock_convert_md):

assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

document = Document.objects.get()
assert response.json() == {"id": str(document.id)}
Expand Down Expand Up @@ -297,7 +302,9 @@ def test_api_documents_create_for_owner_existing_user_email_no_sub_with_fallback

assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

document = Document.objects.get()
assert response.json() == {"id": str(document.id)}
Expand Down Expand Up @@ -393,7 +400,9 @@ def test_api_documents_create_for_owner_new_user_no_sub_no_fallback_allow_duplic
HTTP_AUTHORIZATION="Bearer DummyToken",
)
assert response.status_code == 201
mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

document = Document.objects.get()
assert response.json() == {"id": str(document.id)}
Expand Down Expand Up @@ -474,7 +483,9 @@ def test_api_documents_create_for_owner_with_default_language(
)
assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)
assert mock_send.call_args[0][3] == "de-de"


Expand All @@ -501,7 +512,9 @@ def test_api_documents_create_for_owner_with_custom_language(mock_convert_md):

assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

assert len(mail.outbox) == 1
email = mail.outbox[0]
Expand Down Expand Up @@ -537,7 +550,9 @@ def test_api_documents_create_for_owner_with_custom_subject_and_message(

assert response.status_code == 201

mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

assert len(mail.outbox) == 1
email = mail.outbox[0]
Expand Down Expand Up @@ -571,7 +586,9 @@ def test_api_documents_create_for_owner_with_converter_exception(
format="json",
HTTP_AUTHORIZATION="Bearer DummyToken",
)
mock_convert_md.assert_called_once_with("Document content")
mock_convert_md.assert_called_once_with(
"Document content", mime_types.MARKDOWN, mime_types.YJS
)

assert response.status_code == 400
assert response.json() == {"content": ["Could not convert content"]}
Expand Down
Loading
Loading