From 0aadc06fc515b86f2ebd317bab62dd84ab395cad Mon Sep 17 00:00:00 2001 From: Anton Krytskyi Date: Wed, 3 Dec 2025 14:48:47 +0200 Subject: [PATCH] add author ordering --- .../datacite/datacite_tree_walker.py | 55 ++++++++++++++----- tests/identifiers/test_datacite.py | 51 +++++++++++++++++ 2 files changed, 93 insertions(+), 13 deletions(-) diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index f878152256e..5f0a283d450 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -17,6 +17,7 @@ FOAF, ORCID, OSF, + PROV, ROR, SKOS, DATACITE, @@ -128,23 +129,51 @@ def _visit_identifier(self, parent_el, *, doi_override=None): }) def _visit_creators(self, parent_el, focus_iri): - creator_iris = set(self.basket[focus_iri:DCTERMS.creator]) - if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket): - creator_iris.update(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator]) - if not creator_iris: - creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator]) - if not creator_iris: - creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator]) - if not creator_iris: - creator_iris.update(self.basket[focus_iri:DCTERMS.contributor]) - if not creator_iris: - creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor]) + creator_iris = [] + + ordered_contributors = [] + attribution_refs = list(self.basket[focus_iri:PROV.qualifiedAttribution]) + for attribution_ref in attribution_refs: + try: + order_val = next(self.basket[attribution_ref:OSF.order]) + except StopIteration: + # If there is no explicit order, shove it to the end + order_index = float('inf') + else: + try: + order_index = order_val.toPython() + except AttributeError: + order_index = int(order_val) + try: + agent_iri = next(self.basket[attribution_ref:PROV.agent]) + except StopIteration: + continue + ordered_contributors.append((order_index, agent_iri)) + + if ordered_contributors: + ordered_contributors.sort(key=lambda pair: pair[0]) + creator_iris.extend(agent_iri for _, agent_iri in ordered_contributors) + + # Fallbacks when there is no explicit OSF ordering if not creator_iris: - creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor]) + creator_iris = list(self.basket[focus_iri:DCTERMS.creator]) + if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket): + creator_iris.extend(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator]) + if not creator_iris: + creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator]) + if not creator_iris: + creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator]) + if not creator_iris: + creator_iris.extend(self.basket[focus_iri:DCTERMS.contributor]) + if not creator_iris: + creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor]) + if not creator_iris: + creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor]) + if not creator_iris: raise ValueError(f'gathered no creators or contributors around {focus_iri}') creators_el = self.visit(parent_el, 'creators', is_list=True) - for creator_iri in creator_iris: # TODO: "priority order" + for creator_iri in creator_iris: creator_el = self.visit(creators_el, 'creator') for name in self.basket[creator_iri:FOAF.name]: self.visit(creator_el, 'creatorName', text=name, attrib={ diff --git a/tests/identifiers/test_datacite.py b/tests/identifiers/test_datacite.py index 2bcd1605cf7..a0335bf5db3 100644 --- a/tests/identifiers/test_datacite.py +++ b/tests/identifiers/test_datacite.py @@ -129,6 +129,57 @@ def test_datacite_build_metadata_for_dataarchive_registration(self, registration assert resource_type.text == 'Pre-registration' assert resource_type.attrib['resourceTypeGeneral'] == 'Dataset' + def test_datacite_creators_follow_osf_contributor_order(self, datacite_client): + registration = RegistrationFactory(is_public=True) + first = registration.creator + second = AuthUserFactory() + third = AuthUserFactory() + registration.add_contributor(third, visible=True) + registration.add_contributor(second, visible=True) + registration.save() + + visible_contributors = list(registration.visible_contributors) + correct_order = [u.fullname for u in visible_contributors] + assert correct_order == [ + first.fullname, + third.fullname, + second.fullname, + ] + + metadata_xml = datacite_client.build_metadata(registration) + parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8') + root = lxml.etree.fromstring(metadata_xml, parser=parser) + creators_el = root.find('{%s}creators' % schema40.ns[None]) + creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None]) + xml_creator_names = [ + c.find('{%s}creatorName' % schema40.ns[None]).text + for c in creator_elems + ] + assert xml_creator_names == correct_order + + auth = Auth(first) + registration.move_contributor(first, auth=auth, index=2, save=True) + registration.refresh_from_db() + + visible_contributors = list(registration.visible_contributors) + new_correct_order = [u.fullname for u in visible_contributors] + assert new_correct_order == [ + third.fullname, + second.fullname, + first.fullname, + ] + + metadata_xml = datacite_client.build_metadata(registration) + root = lxml.etree.fromstring(metadata_xml, parser=parser) + creators_el = root.find('{%s}creators' % schema40.ns[None]) + creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None]) + xml_creator_names = [ + c.find('{%s}creatorName' % schema40.ns[None]).text + for c in creator_elems + ] + + assert xml_creator_names == new_correct_order + def test_datacite_format_contributors(self, datacite_client): visible_contrib = AuthUserFactory() visible_contrib2 = AuthUserFactory()