diff --git a/CHANGELOG.md b/CHANGELOG.md index d78a063ae9..41fd811d7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to - ✨(backend) allow to create a new user in a marketing system +### Changed + +- 🚸(backend) use unaccented full name for user search #1637 + ## [4.1.0] - 2025-12-09 ### Added diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1c1b9ef50a..7594770bdd 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1,4 +1,5 @@ """API endpoints""" + # pylint: disable=too-many-lines import base64 @@ -18,7 +19,7 @@ from django.db import connection, transaction from django.db import models as db from django.db.models.expressions import RawSQL -from django.db.models.functions import Left, Length +from django.db.models.functions import Greatest, Left, Length from django.http import Http404, StreamingHttpResponse from django.urls import reverse from django.utils import timezone @@ -37,6 +38,7 @@ from rest_framework.permissions import AllowAny from core import authentication, choices, enums, models +from core.api.filters import remove_accents from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService from core.services.converter_services import ( @@ -188,13 +190,15 @@ def get_queryset(self): queryset = queryset.exclude(documentaccess__document_id=document_id) filter_data = filterset.form.cleaned_data - query = filter_data["q"] + query = remove_accents(filter_data["q"]) # For emails, match emails by Levenstein distance to prevent typing errors if "@" in query: return ( queryset.annotate( - distance=RawSQL("levenshtein(email::text, %s::text)", (query,)) + distance=RawSQL( + "levenshtein(unaccent(email::text), %s::text)", (query,) + ) ) .filter(distance__lte=3) .order_by("distance", "email")[: settings.API_USERS_LIST_LIMIT] @@ -203,11 +207,15 @@ def get_queryset(self): # Use trigram similarity for non-email-like queries # For performance reasons we filter first by similarity, which relies on an # index, then only calculate precise similarity scores for sorting purposes + return ( - queryset.filter(email__trigram_word_similar=query) - .annotate(similarity=TrigramSimilarity("email", query)) + queryset.annotate( + sim_email=TrigramSimilarity("email", query), + sim_name=TrigramSimilarity("full_name", query), + ) + .annotate(similarity=Greatest("sim_email", "sim_name")) .filter(similarity__gt=0.2) - .order_by("-similarity", "email")[: settings.API_USERS_LIST_LIMIT] + .order_by("-similarity")[: settings.API_USERS_LIST_LIMIT] ) @drf.decorators.action( diff --git a/src/backend/core/migrations/0027_auto_20251120_0956.py b/src/backend/core/migrations/0027_auto_20251120_0956.py new file mode 100644 index 0000000000..fe795ff5f2 --- /dev/null +++ b/src/backend/core/migrations/0027_auto_20251120_0956.py @@ -0,0 +1,37 @@ +# Generated by Django 5.2.8 on 2025-11-20 09:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0026_comments"), + ] + + operations = [ + migrations.RunSQL( + sql=""" + CREATE OR REPLACE FUNCTION public.immutable_unaccent(regdictionary, text) + RETURNS text + LANGUAGE c IMMUTABLE PARALLEL SAFE STRICT AS + '$libdir/unaccent', 'unaccent_dict'; + + CREATE OR REPLACE FUNCTION public.f_unaccent(text) + RETURNS text + LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT + RETURN public.immutable_unaccent(regdictionary 'public.unaccent', $1); + + CREATE INDEX IF NOT EXISTS user_email_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(email) gin_trgm_ops); + + CREATE INDEX IF NOT EXISTS user_full_name_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(full_name) gin_trgm_ops); + """, + reverse_sql=""" + DROP INDEX IF EXISTS user_email_unaccent_trgm_idx; + DROP INDEX IF EXISTS user_full_name_unaccent_trgm_idx; + """, + ), + ] diff --git a/src/backend/core/tests/test_api_users.py b/src/backend/core/tests/test_api_users.py index a0a4355280..926e731bd4 100644 --- a/src/backend/core/tests/test_api_users.py +++ b/src/backend/core/tests/test_api_users.py @@ -76,6 +76,131 @@ def test_api_users_list_query_email(): assert user_ids == [] +def test_api_users_list_query_email_with_internationalized_domain_names(): + """ + Authenticated users should be able to list users and filter by email. + It should work even if the email address contains an internationalized domain name. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + jean = factories.UserFactory(email="jean.martin@éducation.fr") + marie = factories.UserFactory(email="marie.durand@education.fr") + kurokawa = factories.UserFactory(email="contact@黒川.日本") + + response = client.get("/api/v1.0/users/?q=jean.martin@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=jean.martin@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=contact@黒川.日本") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(kurokawa.id)] + + +def test_api_users_list_query_full_name(): + """ + Authenticated users should be able to list users and filter by full name. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman") + + response = client.get( + "/api/v1.0/users/?q=David", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BOWMAN") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BoWmAn") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bovin") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [] + + +def test_api_users_list_query_accented_full_name(): + """ + Authenticated users should be able to list users and filter by full name with accents. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + fred = factories.UserFactory( + email="contact@work.com", full_name="Frédérique Lefèvre" + ) + + response = client.get("/api/v1.0/users/?q=Frédérique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Frederique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefèvre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefevre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=François Lorfebvre") + assert response.status_code == 200 + users = [user["full_name"] for user in response.json()] + assert users == [] + + def test_api_users_list_limit(settings): """ Authenticated users should be able to list users and the number of results diff --git a/src/helm/env.d/dev/values.impress.yaml.gotmpl b/src/helm/env.d/dev/values.impress.yaml.gotmpl index f8b6a6e501..129a4b89ba 100644 --- a/src/helm/env.d/dev/values.impress.yaml.gotmpl +++ b/src/helm/env.d/dev/values.impress.yaml.gotmpl @@ -41,7 +41,7 @@ backend: OIDC_RP_CLIENT_ID: docs OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly OIDC_RP_SIGN_ALGO: RS256 - OIDC_RP_SCOPES: "openid email" + OIDC_RP_SCOPES: "openid email given_name usual_name" LOGIN_REDIRECT_URL: https://docs.127.0.0.1.nip.io LOGIN_REDIRECT_URL_FAILURE: https://docs.127.0.0.1.nip.io LOGOUT_REDIRECT_URL: https://docs.127.0.0.1.nip.io diff --git a/src/helm/env.d/feature/values.impress.yaml.gotmpl b/src/helm/env.d/feature/values.impress.yaml.gotmpl index c6c7fe8cf3..050c35ece2 100644 --- a/src/helm/env.d/feature/values.impress.yaml.gotmpl +++ b/src/helm/env.d/feature/values.impress.yaml.gotmpl @@ -42,7 +42,7 @@ backend: OIDC_RP_CLIENT_ID: docs OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly OIDC_RP_SIGN_ALGO: RS256 - OIDC_RP_SCOPES: "openid email" + OIDC_RP_SCOPES: "openid email given_name usual_name" LOGIN_REDIRECT_URL: https://{{ .Values.feature }}-docs.{{ .Values.domain }} LOGIN_REDIRECT_URL_FAILURE: https://{{ .Values.feature }}-docs.{{ .Values.domain }} LOGOUT_REDIRECT_URL: https://{{ .Values.feature }}-docs.{{ .Values.domain }}