diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000..1161e2208 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,8 @@ +## README + +# XML Schema XPath selector cleaning + +Executed via `python scripts/xsd_selector_clear.py xsd` +Filters `NeTEx_publication.xsd` for unknown elements that are being used in xsd:selector for xsd:key, xsd:keyref, and xsd:unique. +Elements that are not in the schema, cannot be referenced, hence they cannot be found in a document. +It also resolves issues with extra spaces in the xsd:selector. diff --git a/scripts/analyse-refs-2.py b/scripts/analyse-refs-2.py new file mode 100644 index 000000000..d23b978af --- /dev/null +++ b/scripts/analyse-refs-2.py @@ -0,0 +1,157 @@ +from lxml import etree +from pathlib import Path +import copy + +XSD_ROOT = Path("xsd") +XSD_NS = {"xsd": "http://www.w3.org/2001/XMLSchema"} + +def get_version_of_object_ref_attrs(xsd_file: Path): + """Return all and similar children under VersionOfObjectRefStructure/extension.""" + tree = etree.parse(str(xsd_file)) + ext = tree.xpath( + "//xsd:complexType[@name='VersionOfObjectRefStructure']/xsd:simpleContent/xsd:extension", + namespaces=XSD_NS, + ) + if not ext: + raise ValueError("VersionOfObjectRefStructure niet gevonden of geen extension") + return [copy.deepcopy(c) for c in ext[0] if isinstance(c.tag, str)] + +def collect_ref_elements(root_dir: Path): + """Collect (xsd_file, type_name) for all complexTypes ending with RefStructure.""" + results = [] + for f in root_dir.rglob("*.xsd"): + if 'xsd/netex' in str(f): + tree = etree.parse(str(f)) + for ctype in tree.xpath("//xsd:element[substring(@name, string-length(@name) - string-length('Ref') +1) = 'Ref']", namespaces=XSD_NS): + name = ctype.get("name") + yield (f, name) + +def collect_ref_complextypes(root_dir: Path): + """Collect (xsd_file, type_name) for all complexTypes ending with RefStructure.""" + results = [] + for f in root_dir.rglob("*.xsd"): + tree = etree.parse(str(f)) + for ctype in tree.xpath("//xsd:complexType[substring(@name, string-length(@name) - string-length('RefStructure') +1) = 'RefStructure']", namespaces=XSD_NS): + name = ctype.get("name") + yield (f, name) + +def add_missing_attrs(restriction_elem, version_attrs): + """Ensure all attributes from VersionOfObjectRefStructure exist, append if missing.""" + existing_names = {a.get("name") for a in restriction_elem.findall("xsd:attribute", XSD_NS)} + for attr in version_attrs: + if attr.tag.endswith("attribute"): + if attr.get("name") not in existing_names: + restriction_elem.append(copy.deepcopy(attr)) + +def ensure_nameofrefclass(restriction_elem, type_name: str): + """Ensure nameOfRefClass attribute exists with correct type and default.""" + existing = restriction_elem.xpath("xsd:attribute[@name='nameOfRefClass']", namespaces=XSD_NS) + if existing: + return + # derive "XXX" from "ScheduledStopPointRefStructure" → "ScheduledStopPoint" + base = type_name.replace("RefStructure", "") + attr = etree.Element(f"{{{XSD_NS['xsd']}}}attribute") + attr.set("name", "nameOfRefClass") + attr.set("type", f"NameOfClass{base}") + attr.set("default", base) + restriction_elem.append(attr) + +def process_refstructure_complex_types(xsd_file, version_attrs, valid_ref_classes): + """Replace extension with restriction for RefStructures that inherit from VersionOfObjectRefStructure.""" + tree = etree.parse(str(xsd_file)) + modified = False + + for ctype in tree.xpath("//xsd:complexType[substring(@name, string-length(@name) - string-length('RefStructure') +1) = 'RefStructure']", namespaces=XSD_NS): + name = ctype.get("name") + if name not in valid_ref_classes: + continue # skip unrelated RefStructures + + ext = ctype.xpath(".//xsd:extension", namespaces=XSD_NS) + if not ext: + continue + ext_elem = ext[0] + base = ext_elem.get("base") + + restriction = etree.Element(f"{{{XSD_NS['xsd']}}}restriction", base=base) + + # Kopieer bestaande attributen uit huidige extension + for child in ext_elem: + restriction.append(copy.deepcopy(child)) + + # Voeg alle ontbrekende attributen van VersionOfObjectRefStructure toe + add_missing_attrs(restriction, version_attrs) + + # Zorg voor nameOfRefClass + ensure_nameofrefclass(restriction, name) + + # Vervang in simpleContent + simple_content = ctype.find("xsd:simpleContent", XSD_NS) + if simple_content is not None: + for child in list(simple_content): + simple_content.remove(child) + simple_content.append(restriction) + modified = True + + if modified: + tree.write(str(xsd_file), encoding="utf-8", xml_declaration=True, pretty_print=True) + return modified + + +def has_extension(xsd_file: Path, type_name: str) -> str | None: + """ + Check if a complexType with given name uses an . + Returns the base type if found, else None. + """ + tree = etree.parse(str(xsd_file)) + ext = tree.xpath( + f"//xsd:complexType[@name='{type_name}']//xsd:extension", + namespaces=XSD_NS + ) + if ext: + return ext[0].get("base") + return None + +def main(): + version_attrs = get_version_of_object_ref_attrs( + XSD_ROOT / "netex_framework/netex_responsibility/netex_relationship_support.xsd" + ) + + # Hier zou jouw analyzer gebruikt worden om te bepalen welke RefStructures erven van VersionOfObjectRefStructure + # Voorbeeld: valid_ref_classes = {"ScheduledStopPointRefStructure", "StopPlaceRefStructure", ...} + # → hier voorlopig als placeholder + from dependencygraph import XSDDependencyAnalyzer # vervang door jouw echte analyzer + ref_elements = set(collect_ref_elements(XSD_ROOT)) + + analyzer = XSDDependencyAnalyzer() + + analyzer.parse_schemas([XSD_ROOT]) + analyzer.build_dependency_graph() + + all_complex_types = set([]) + + for xsd_file, name in ref_elements: + type_chain = analyzer._get_type_chain(name) + if "VersionOfObjectRefStructure" not in type_chain: + continue + for x in type_chain: + all_complex_types.add(x) + + for xsd_file, name in collect_ref_complextypes(XSD_ROOT): + if name in all_complex_types: + base = has_extension(xsd_file, name) + if base: + pass + # print(f"{xsd_file}: {name} (extends {base})") + else: + print(name) + + # Pas enkel de relevante bestanden aan + # for xsd_file in XSD_ROOT.rglob("*.xsd"): + # if not str(xsd_file).startswith("xsd/netex"): + # continue + # if process_refstructure_complex_types(xsd_file, version_attrs, valid_ref_classes): + # print(f"[MODIFIED] {xsd_file}") + +if __name__ == "__main__": + main() + diff --git a/scripts/dependencygraph.py b/scripts/dependencygraph.py new file mode 100644 index 000000000..4d89ada9a --- /dev/null +++ b/scripts/dependencygraph.py @@ -0,0 +1,487 @@ +""" +XSD Schema Dependency Analyzer +Builds an inverted dependency graph showing which elements depend on a given element +through type hierarchy (extension/restriction relationships). +""" + +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Dict, Set, List +from collections import defaultdict +import json + + +class XSDDependencyAnalyzer: + """Analyzes XSD schemas to build inverted dependency graphs based on type hierarchies.""" + + XSD_NS = "http://www.w3.org/2001/XMLSchema" + + def __init__(self): + # Element name -> its type name (or inline type identifier) + self.element_to_type = {} + + # Type name -> its base type name (for extensions/restrictions) + self.type_hierarchy = {} + + # Element name -> set of elements that depend on it (through type hierarchy) + self.element_dependencies = defaultdict(set) + + # For debugging: store type derivation info + self.type_info = {} + + # Abstract Elements + self.abstract_elements = set([]) + + def parse_schemas(self, schema_paths: List[Path]) -> None: + """Parse all XSD schemas from the given paths.""" + for path in schema_paths: + if path.is_file(): + self._parse_schema_file(path) + elif path.is_dir(): + for xsd_file in path.rglob("*.xsd"): + self._parse_schema_file(xsd_file) + + def _parse_schema_file(self, filepath: Path) -> None: + """Parse a single XSD schema file.""" + try: + tree = ET.parse(filepath) + root = tree.getroot() + + # First pass: collect all type definitions + self._extract_types(root) + + # Second pass: collect elements and their types + self._extract_elements(root) + + except Exception as e: + print(f"Error parsing {filepath}: {e}") + + def _extract_types(self, root: ET.Element) -> None: + """Extract all type definitions and their base types.""" + # Complex types + for ctype in root.findall(f".//{{{self.XSD_NS}}}complexType"): + name = ctype.get('name') + if not name: + continue + + base_type = None + derivation = None + + # Check for extension + extension = ctype.find(f".//{{{self.XSD_NS}}}extension") + if extension is not None: + base_type = self._strip_ns(extension.get('base', '')) + derivation = 'extension' + + # Check for restriction + restriction = ctype.find(f".//{{{self.XSD_NS}}}restriction") + if restriction is not None: + base_type = self._strip_ns(restriction.get('base', '')) + derivation = 'restriction' + + if base_type: + self.type_hierarchy[name] = base_type + self.type_info[name] = {'base': base_type, 'derivation': derivation} + + # Simple types + for stype in root.findall(f".//{{{self.XSD_NS}}}simpleType"): + name = stype.get('name') + if not name: + continue + + restriction = stype.find(f"{{{self.XSD_NS}}}restriction") + if restriction is not None: + base_type = self._strip_ns(restriction.get('base', '')) + self.type_hierarchy[name] = base_type + self.type_info[name] = {'base': base_type, 'derivation': 'restriction'} + + def _extract_elements(self, root: ET.Element) -> None: + """Extract all element definitions and map them to their types.""" + for elem in root.findall(f".//{{{self.XSD_NS}}}element"): + name = elem.get('name') + if not name: + continue + + abstract = elem.get('abstract', False) + if abstract: + self.abstract_elements.add(name) + + # Check for direct type reference + type_ref = elem.get('type') + if type_ref: + self.element_to_type[name] = self._strip_ns(type_ref) + continue + + # Check for inline complex type + complex_type = elem.find(f"{{{self.XSD_NS}}}complexType") + if complex_type is not None: + # Look for restriction or extension in inline type + restriction = complex_type.find(f".//{{{self.XSD_NS}}}restriction") + extension = complex_type.find(f".//{{{self.XSD_NS}}}extension") + + if restriction is not None: + base_type = self._strip_ns(restriction.get('base', '')) + if base_type: + # Create a synthetic type name for the inline type + inline_type_name = f"{name}_InlineType" + self.element_to_type[name] = inline_type_name + self.type_hierarchy[inline_type_name] = base_type + self.type_info[inline_type_name] = {'base': base_type, 'derivation': 'restriction'} + + elif extension is not None: + base_type = self._strip_ns(extension.get('base', '')) + if base_type: + inline_type_name = f"{name}_InlineType" + self.element_to_type[name] = inline_type_name + self.type_hierarchy[inline_type_name] = base_type + self.type_info[inline_type_name] = {'base': base_type, 'derivation': 'extension'} + + def _strip_ns(self, qname: str) -> str: + """Strip namespace prefix from a qualified name.""" + if ':' in qname: + return qname.split(':', 1)[1] + return qname + + def build_dependency_graph(self) -> None: + """Build the inverted dependency graph with transitive relationships through element-specific types.""" + # First, build direct dependencies between element-specific types + direct_deps = defaultdict(set) + + for elem_name in self.element_to_type.keys(): + elem_specific_types = self._get_element_specific_types(elem_name) + if not elem_specific_types: + continue + + for other_elem_name in self.element_to_type.keys(): + if other_elem_name == elem_name: + continue + + other_specific_types = self._get_element_specific_types(other_elem_name) + + # Check if any of the other element's specific types extend this element's specific types + for other_spec_type in other_specific_types: + base = self.type_hierarchy.get(other_spec_type) + if base and base in elem_specific_types: + direct_deps[elem_name].add(other_elem_name) + break + + # Now compute transitive closure: if B depends on A, and C depends on B, then C depends on A + self.element_dependencies = self._compute_transitive_closure(direct_deps) + + def _compute_transitive_closure(self, direct_deps: Dict[str, Set[str]]) -> Dict[str, Set[str]]: + """Compute transitive closure of dependencies.""" + result = defaultdict(set) + + # Start with direct dependencies + for elem, deps in direct_deps.items(): + result[elem] = set(deps) + + # Add transitive dependencies + changed = True + while changed: + changed = False + for elem in list(result.keys()): + current_deps = set(result[elem]) + for dep in current_deps: + # If dep has its own dependencies, add them to elem's dependencies + if dep in result: + for transitive_dep in result[dep]: + if transitive_dep not in result[elem]: + result[elem].add(transitive_dep) + changed = True + + return result + + def _get_element_specific_types(self, elem_name: str) -> Set[str]: + """Get types that are specific to this element (contain the element name in the type name).""" + specific_types = set() + + elem_type = self.element_to_type.get(elem_name) + if not elem_type: + return specific_types + + # Start with the element's direct type + current_type = elem_type + + # Walk up the type hierarchy and collect types that contain the element name + while current_type: + # Check if this type name contains the element name + # For example: "ScheduledStopPoint_VersionStructure" contains "ScheduledStopPoint" + if elem_name in current_type: + specific_types.add(current_type) + else: + # Stop when we hit a generic type that doesn't contain the element name + break + + current_type = self.type_hierarchy.get(current_type) + + return specific_types + + def _get_type_chain(self, elem_name: str) -> List[str]: + """Get the complete type hierarchy chain for an element (most specific to most general).""" + chain = [] + + current_type = self.element_to_type.get(elem_name) + if not current_type: + return chain + + # Follow the type hierarchy up + while current_type: + chain.append(current_type) + current_type = self.type_hierarchy.get(current_type) + + return chain + + def _depends_on(self, derived_chain: List[str], base_chain: List[str]) -> bool: + """Check if derived_chain extends base_chain through shared type hierarchy.""" + if not base_chain or not derived_chain: + return False + + # Check if any type from base_chain appears in derived_chain + # If it does, and there are types before it in derived_chain, + # then derived depends on base + for base_type in base_chain: + if base_type in derived_chain: + derived_idx = derived_chain.index(base_type) + # If the shared type appears later in derived chain (index > 0), + # it means derived extends/restricts through this shared type + if derived_idx > 0: + return True + + return False + + def get_dependents(self, element_name: str) -> Set[str]: + """Get all elements that depend on the given element.""" + return self.element_dependencies.get(element_name, set()) + + def get_dependency_path(self, base_elem: str, derived_elem: str) -> str: + """Get the type hierarchy path showing the dependency (direct or transitive).""" + base_specific_types = self._get_element_specific_types(base_elem) + derived_specific_types = self._get_element_specific_types(derived_elem) + + if not base_specific_types or not derived_specific_types: + return "No element-specific type information" + + # Check for direct connection + for derived_type in derived_specific_types: + base_of_derived = self.type_hierarchy.get(derived_type) + if base_of_derived and base_of_derived in base_specific_types: + derivation = self.type_info.get(derived_type, {}).get('derivation', 'unknown') + return f"{derived_type} --[{derivation}]--> {base_of_derived} (direct)" + + # Check for transitive connection through intermediate elements + # Find elements that derived depends on and that depend on base + for intermediate in self.element_to_type.keys(): + if intermediate == base_elem or intermediate == derived_elem: + continue + + intermediate_specific = self._get_element_specific_types(intermediate) + + # Check if derived extends intermediate + derived_extends_intermediate = False + for derived_type in derived_specific_types: + base_of_derived = self.type_hierarchy.get(derived_type) + if base_of_derived and base_of_derived in intermediate_specific: + derived_extends_intermediate = True + break + + # Check if intermediate extends base + intermediate_extends_base = False + for inter_type in intermediate_specific: + base_of_inter = self.type_hierarchy.get(inter_type) + if base_of_inter and base_of_inter in base_specific_types: + intermediate_extends_base = True + break + + if derived_extends_intermediate and intermediate_extends_base: + return f"via {intermediate} (transitive)" + + return "No connection found" + + def print_all_dependencies(self) -> None: + """Print dependency information for all elements.""" + print("\n" + "="*80) + print("INVERTED DEPENDENCY GRAPH") + print("="*80) + + for elem_name in sorted(self.element_to_type.keys()): + dependents = self.get_dependents(elem_name) + if dependents: + print(f"\n{elem_name}:") + print(f" Type chain: {' → '.join(self._get_type_chain(elem_name))}") + print(f" Depended on by:") + for dep in sorted(dependents): + path = self.get_dependency_path(elem_name, dep) + print(f" - {dep}") + print(f" via: {path}") + + def print_dependencies(self, element_name: str) -> None: + """Print dependency information for a specific element.""" + if element_name not in self.element_to_type: + print(f"Element '{element_name}' not found in schemas.") + return + + print(f"\n{'='*80}") + print(f"Dependencies for: {element_name}") + print(f"{'='*80}") + + type_chain = self._get_type_chain(element_name) + print(f"\nType chain: {' → '.join(type_chain)}") + + # Show which types are considered "specific" to this element + specific_types = self._get_element_specific_types(element_name) + if specific_types: + print(f"\nElement-specific types: {', '.join(sorted(specific_types))}") + + dependents = self.get_dependents(element_name) + if dependents: + print(f"\nElements that depend on '{element_name}':") + for dep in sorted(dependents): + path = self.get_dependency_path(element_name, dep) + print(f" - {dep}") + print(f" via: {path}") + else: + print(f"\nNo elements depend on '{element_name}'") + print("(Only showing dependencies on element-specific types, not shared ancestor types)") + + def export_to_json(self, output_path: Path) -> None: + """Export the dependency graph to JSON.""" + result = {} + + for elem_name in sorted(self.element_to_type.keys()): + dependents = self.get_dependents(elem_name) + if dependents or elem_name: # Include all elements + result[elem_name] = { + 'type_chain': self._get_type_chain(elem_name), + 'depended_on_by': [ + { + 'element': dep, + 'via_type_chain': self.get_dependency_path(elem_name, dep) + } + for dep in sorted(dependents) + ] + } + + with open(output_path, 'w') as f: + json.dump(result, f, indent=2) + + print(f"\n{'='*80}") + print(f"Exported dependency graph to: {output_path}") + print(f"{'='*80}") + + def get_simple_graph(self, root_type: str = None) -> Dict[str, List[str]]: + """ + Get a simple dictionary mapping element names to lists of dependent elements. + + Args: + root_type: If specified, only include elements whose type chain ends with this type. + For example, 'EntityStructure' to filter entities. + + Returns: + Dict mapping element_name -> [list of dependent element names] + """ + result = {} + + for elem_name in sorted(self.element_to_type.keys()): + # Filter by root type if specified + if root_type: + type_chain = self._get_type_chain(elem_name) + if not type_chain or type_chain[-1] != root_type: + continue + + dependents = self.get_dependents(elem_name) + result[elem_name] = sorted(dependents) + + return result + + def export_simple_graph(self, output_path: Path, root_type: str = None) -> None: + """ + Export a simple dependency graph as a clean dictionary. + + Args: + output_path: Path to save the JSON file + root_type: If specified, only include elements whose type chain ends with this type + """ + graph = self.get_simple_graph(root_type) + + with open(output_path, 'w') as f: + json.dump({'graph': graph, 'abstract': list(self.abstract_elements)}, f, indent=2) + + filter_msg = f" (filtered to {root_type})" if root_type else "" + print(f"\n{'='*80}") + print(f"Exported simple dependency graph to: {output_path}{filter_msg}") + print(f"Total elements: {len(graph)}") + print(f"{'='*80}") + + +def main(): + """Example usage.""" + import sys + + if len(sys.argv) < 2: + print("Usage: python xsd_analyzer.py [element_name]") + print(" schema_path: Path to XSD file or directory") + print(" element_name: (optional) Specific element to analyze") + print("\nIf no element_name is provided, shows dependencies for ALL elements") + sys.exit(1) + + schema_path = Path(sys.argv[1]) + element_name = sys.argv[2] if len(sys.argv) > 2 else None + + # Create analyzer + analyzer = XSDDependencyAnalyzer() + + # Parse schemas + print(f"Parsing schemas from: {schema_path}") + if schema_path.is_file(): + analyzer.parse_schemas([schema_path]) + else: + analyzer.parse_schemas([schema_path]) + + print(f"Found {len(analyzer.element_to_type)} elements") + print(f"Found {len(analyzer.type_hierarchy)} type relationships") + + # Build dependency graph + print("Building dependency graph...") + analyzer.build_dependency_graph() + + # Print results + # if element_name: + # analyzer.print_dependencies(element_name) + # else: + # analyzer.print_all_dependencies() + + # Export detailed JSON + # output_path = Path("xsd_dependencies.json") + # analyzer.export_to_json(output_path) + + # Export simple graph (all elements) + simple_path = Path("xsd_simple_graph.json") + analyzer.export_simple_graph(simple_path) + + # Export simple graph filtered to EntityStructure + # entity_path = Path("xsd_entity_graph.json") + # analyzer.export_simple_graph(entity_path, root_type="EntityStructure") + + # Show example of using the simple graph + print("\nExample: Simple graph structure") + print("="*80) + graph = analyzer.get_simple_graph(root_type="EntityStructure") + + # Show a few examples + shown = 0 + for elem, deps in list(graph.items())[:3]: + if deps: # Only show elements with dependencies + print(f"graph['{elem}'] = {deps}") + shown += 1 + if shown >= 3: + break + + print(analyzer._get_type_chain('ServiceLink')) + print(analyzer._get_type_chain('TimingLink')) + print(analyzer._get_type_chain('StandardFareTable')) + print(analyzer._get_type_chain('ParkingTariff')) + print(analyzer._get_type_chain('ScheduledStopPointRef')) + +if __name__ == "__main__": + main() diff --git a/scripts/xsd_selector_clear.py b/scripts/xsd_selector_clear.py new file mode 100644 index 000000000..f6289fc87 --- /dev/null +++ b/scripts/xsd_selector_clear.py @@ -0,0 +1,147 @@ +from pathlib import Path +from lxml import etree +import json +import sys + +# --- CONFIG --- +NS = {"xsd": "http://www.w3.org/2001/XMLSchema"} +BASE_DIR = Path(sys.argv[1]) # map met alle XSD bestanden +INPUT_XSD = OUTPUT_XSD = BASE_DIR / "NeTEx_publication.xsd" # bestand dat je wilt herschrijven +REPORT_JSON = "report.json" + +# --- 1. Verzamel alle element-namen uit alle XSD's (recursief, inclusief nested, negeer abstract) --- +all_elements = set() +for xsd_file in BASE_DIR.rglob("*.xsd"): + tree = etree.parse(str(xsd_file)) + root = tree.getroot() + for elem in root.findall(".//xsd:element", namespaces=NS): + if elem.get("abstract") == "true": + continue # negeer abstract elements + name = elem.get("name") + if name: + all_elements.add(name) + +print(f"Totale elementen gevonden (excl. abstract): {len(all_elements)}") + +# --- 2. Open het doelbestand --- +tree = etree.parse(str(INPUT_XSD)) +root = tree.getroot() + +# --- 3. Prepare report dictionary --- +report = {"removed_selectors": [], "removed_elements": [], "removed_comments": []} + +# --- 4. Functie om selector xpath te filteren --- +def filter_selector(node): + selector = node.find("xsd:selector", namespaces=NS) + if selector is None: + return False # geen selector + xpath_expr = selector.get("xpath") + if not xpath_expr: + return False + exprs = [e.strip() for e in xpath_expr.split("|")] + kept_exprs = [] + removed_exprs = [] + for e in exprs: + # strip leading .// en namespace prefix + e_clean = e.replace(".//", "").split(":")[-1] + if e_clean in all_elements: + kept_exprs.append(e) + else: + removed_exprs.append(e) + if removed_exprs: + report["removed_selectors"].append({ + "parent_name": node.get("name"), + "removed_expressions": removed_exprs + }) + selector.set("xpath", " | ".join(kept_exprs)) # altijd bijwerken + print(f"[DEBUG] Node: {node.tag}, name: {node.get('name')}, kept expressions: {kept_exprs}") + return bool(kept_exprs) # True als minstens één expression overblijft + +# --- 5. Helper: verwijder alle comments direct boven een node --- +def remove_comments_above(node): + parent = node.getparent() + prev = node.getprevious() + removed_comments = [] + while prev is not None and isinstance(prev, etree._Comment): + removed_comments.append(prev.text) + parent.remove(prev) + prev = node.getprevious() # update na verwijderen + if removed_comments: + report["removed_comments"].append({ + "parent_name": node.get("name"), + "removed_comments": removed_comments + }) + print(f"[DEBUG] Removed comment(s) above '{node.get('name')}': {removed_comments}") + +# --- 6. Eerste pass: filter key, unique, keyref selectors --- +for tag in ["key", "unique"]: + for node in root.findall(f".//xsd:{tag}", namespaces=NS): + has_expr = filter_selector(node) + if not has_expr: + remove_comments_above(node) + report["removed_elements"].append({ + "type": tag, + "name": node.get("name"), + "reason": "all selector expressions removed" + }) + print(f"[DEBUG] Removing {tag} '{node.get('name')}' (no expressions left)") + node.getparent().remove(node) + +for kr in root.findall(".//xsd:keyref", namespaces=NS): + has_expr = filter_selector(kr) + refer_name = kr.get("refer") + if not has_expr or refer_name is None: + remove_comments_above(kr) + report["removed_elements"].append({ + "type": "keyref", + "name": kr.get("name"), + "reason": "all selector expressions removed or no refer" + }) + print(f"[DEBUG] Removing keyref '{kr.get('name')}' (no expressions or no refer)") + kr.getparent().remove(kr) + +# Tweede pass: verwijder keys die nergens meer door keyrefs gebruikt worden +for k in root.findall(".//xsd:key", namespaces=NS): + kname = k.get("name") # gebruik exact dezelfde string als in refer + still_used = False + for kr in root.findall(".//xsd:keyref", namespaces=NS): + if kr.get("refer") == kname or kr.get("refer") == f"netex:{kname}": + still_used = True + break + if not still_used: + remove_comments_above(k) + report["removed_elements"].append({ + "type": "key", + "name": kname, + "reason": "no keyrefs reference this key anymore" + }) + print(f"[DEBUG] Removing key '{kname}' (no keyrefs reference it)") + k.getparent().remove(k) + +# --- 3e pass: verwijder keyrefs waarvan de refererende key is verwijderd --- +existing_keys = {k.get("name") for k in root.findall(".//xsd:key", namespaces=NS)} + +for kr in root.findall(".//xsd:keyref", namespaces=NS): + refer = kr.get("refer") + if refer: + local_refer = refer.split(":")[-1] # simpele prefix strip + if local_refer not in existing_keys: + remove_comments_above(kr) + report["removed_elements"].append({ + "type": "keyref", + "name": kr.get("name"), + "reason": f"referenced key '{refer}' was removed" + }) + print(f"[DEBUG] Removing keyref '{kr.get('name')}' (referenced key '{refer}' removed)") + kr.getparent().remove(kr) + +# --- 8. Schrijf het XSD-bestand opnieuw --- +tree.write(str(OUTPUT_XSD), encoding="utf-8", xml_declaration=True, pretty_print=True) + +# --- 9. Schrijf JSON rapport --- +# with open(REPORT_JSON, "w", encoding="utf-8") as jf: +# json.dump(report, jf, indent=2, ensure_ascii=False, default=str) + +# print(f"Verwerkt XSD opgeslagen als {OUTPUT_XSD}") +# print(f"JSON rapport opgeslagen als {REPORT_JSON}") + diff --git a/xsd/NeTEx_publication.xsd b/xsd/NeTEx_publication.xsd index 6d3fb4286..cdf0125d6 100644 --- a/xsd/NeTEx_publication.xsd +++ b/xsd/NeTEx_publication.xsd @@ -324,13 +324,6 @@ - - - - - - - @@ -346,7 +339,7 @@ - + @@ -416,18 +409,18 @@ Every [ResponsibilitySet Id + Version] must be unique within document. - + - + - + @@ -462,17 +455,6 @@ - - - - - - - - - - - @@ -485,7 +467,7 @@ - + @@ -503,15 +485,6 @@ - - - - - - - - - @@ -609,18 +582,6 @@ - - - - - - - - - - - - @@ -1068,7 +1029,7 @@ Every [PointOnLink Id + PointOnLink] must be unique within document. - + @@ -1147,17 +1108,6 @@ - - - - - - - - - - - @@ -1168,17 +1118,6 @@ - - - - - - - - - - - @@ -1189,17 +1128,6 @@ - - - - - - - - - - - @@ -1207,7 +1135,7 @@ - + @@ -1223,7 +1151,7 @@ - + @@ -1570,17 +1498,6 @@ - - - - - - - - - - - @@ -1688,17 +1605,6 @@ - - - - - - - - - - - @@ -1709,17 +1615,6 @@ - - - - - - - - - - - @@ -1817,7 +1712,7 @@ - + @@ -1838,7 +1733,7 @@ - + @@ -1985,7 +1880,7 @@ - + @@ -2006,7 +1901,7 @@ - + @@ -2017,7 +1912,7 @@ - + @@ -2028,7 +1923,7 @@ - + @@ -2037,27 +1932,6 @@ - - - - - Every [TypeOfFarePaymentMethod Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -2331,27 +2205,6 @@ - - - - - Every [QueuingEquipment Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -2383,17 +2236,6 @@ - - - - - - - - - - - @@ -3725,27 +3567,6 @@ - - - - - Every [TypeOfRollingStockItem Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -4085,12 +3906,12 @@ - + - + @@ -4100,7 +3921,7 @@ Every [PassengerEntrance Id + Version] must be unique within document. - + @@ -4148,7 +3969,7 @@ - + @@ -4220,27 +4041,6 @@ - - - - - Every [DeckCapacity Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -4314,17 +4114,6 @@ - - - - - - - - - - - @@ -4526,12 +4315,12 @@ - + - + @@ -4751,7 +4540,7 @@ Every [Train Id + Version] must be unique within document. - + @@ -4793,18 +4582,18 @@ Every [TrainElementType Id + Version] must be unique within document. - + - + - + @@ -4897,12 +4686,12 @@ - + - + @@ -5132,7 +4921,7 @@ - + @@ -5237,7 +5026,7 @@ - + @@ -5340,17 +5129,6 @@ - - - - - - - - - - - @@ -5489,7 +5267,7 @@ - + @@ -5529,17 +5307,6 @@ - - - - - - - - - - - @@ -5708,27 +5475,6 @@ - - - - - Every [ParkingAreaCapacityAssignment Id + Version + order] must be unique within document. - - - - - - - - - - - - - - - - @@ -6092,12 +5838,12 @@ - + - + @@ -6107,7 +5853,7 @@ Every [PathJunction Id + Version] must be unique within document. - + @@ -6118,7 +5864,7 @@ - + @@ -6134,7 +5880,7 @@ - + @@ -6174,17 +5920,6 @@ - - - - - - - - - - - @@ -6237,17 +5972,6 @@ - - - - - - - - - - - @@ -6280,17 +6004,6 @@ - - - - - - - - - - - @@ -6322,17 +6035,6 @@ - - - - - - - - - - - @@ -6364,17 +6066,6 @@ - - - - - - - - - - - @@ -6395,7 +6086,7 @@ - + @@ -6496,7 +6187,7 @@ - + @@ -6735,7 +6426,7 @@ - + @@ -7033,17 +6724,6 @@ - - - - - - - - - - - @@ -7065,17 +6745,6 @@ - - - - - - - - - - - @@ -7188,27 +6857,6 @@ - - - - - Every [PointInTimingPattern Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -7222,7 +6870,7 @@ - + @@ -7279,18 +6927,18 @@ Every [VehicleMeetingPoint Id + Version] must be unique within document. - + - + - + @@ -7420,27 +7068,6 @@ - - - - - Every [VehicleTypeZoneRestrictionZone Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -7536,17 +7163,6 @@ - - - - - - - - - - - @@ -7830,17 +7446,6 @@ - - - - - - - - - - - @@ -7979,12 +7584,12 @@ - + - + @@ -8061,17 +7666,6 @@ - - - - - - - - - - - @@ -8217,12 +7811,12 @@ - + - + @@ -8259,7 +7853,7 @@ - + @@ -8446,27 +8040,6 @@ - - - - - - - - - - - - - - - - Every [JourneyFrequencyGroup Id + Version] must be unique within document. - - - - - @@ -8551,17 +8124,6 @@ - - - - - - - - - - - @@ -8595,7 +8157,7 @@ - + @@ -9064,27 +8626,6 @@ - - - - - Every [AccountableElementPart Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -9228,7 +8769,7 @@ - + @@ -9249,7 +8790,7 @@ - + @@ -9539,27 +9080,6 @@ - - - - - Every [FareStructure Id + Version] must be unique within document. - - - - - - - - - - - - - - - - @@ -9591,17 +9111,6 @@ - - - - - - - - - - - @@ -10217,12 +9726,12 @@ - + - + @@ -10392,7 +9901,7 @@ - + @@ -10535,7 +10044,7 @@ - + @@ -10915,7 +10424,7 @@ - + @@ -10998,17 +10507,6 @@ - - - - - - - - - - - @@ -11019,17 +10517,6 @@ - - - - - - - - - - - @@ -11040,17 +10527,6 @@ - - - - - - - - - - - @@ -12016,27 +11492,6 @@ - - - - - Every [FareContractEntry Id + Version] must be unique within document. - - - - - - - - - - - - - - - -