From 5ea23403c93e6d91698be18e4ea4aa2a7728df6a Mon Sep 17 00:00:00 2001 From: Bonggo Pras Date: Wed, 24 Dec 2025 13:57:23 +0700 Subject: [PATCH 1/4] perf: parse_xml + body mutation optimization --- docxtpl/template.py | 265 ++++++++++++++++++++++++++++---------------- 1 file changed, 167 insertions(+), 98 deletions(-) diff --git a/docxtpl/template.py b/docxtpl/template.py index f20280a..b90c6fc 100644 --- a/docxtpl/template.py +++ b/docxtpl/template.py @@ -19,6 +19,41 @@ from jinja2 import Environment, Template, meta from jinja2.exceptions import TemplateError + +def _create_optimized_env(**kwargs): + """Create an optimized Jinja2 environment for better performance. + + Optimizations applied: + - auto_reload=False: Skip checking if template source changed + - cache_size=400: Larger template cache for repeated renders + - enable_async=False: Disable async support (not needed, adds overhead) + """ + return Environment( + auto_reload=False, # Disable template auto-reload (faster) + cache_size=400, # Increase template cache size + enable_async=False, # Disable async (not needed, reduces overhead) + **kwargs + ) + + +# Module-level cached environments (created once, reused across all instances) +_CACHED_ENV = None +_CACHED_ENV_AUTOESCAPE = None + + +def _get_cached_env(autoescape=False): + """Get or create a cached Jinja2 environment for performance.""" + global _CACHED_ENV, _CACHED_ENV_AUTOESCAPE + + if autoescape: + if _CACHED_ENV_AUTOESCAPE is None: + _CACHED_ENV_AUTOESCAPE = _create_optimized_env(autoescape=True) + return _CACHED_ENV_AUTOESCAPE + else: + if _CACHED_ENV is None: + _CACHED_ENV = _create_optimized_env(autoescape=False) + return _CACHED_ENV + try: from html import escape # noqa: F401 except ImportError: @@ -43,6 +78,60 @@ class DocxTemplate(object): "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" ) + # Pre-compiled regex patterns for patch_xml() optimization + # These are compiled once at class load time, not on every render + _RE_JINJA_OPEN = re.compile( + r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}#])(<[^>]*>)+(?=\})", + re.DOTALL + ) + _RE_JINJA_CONTENT = re.compile( + r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*", + re.DOTALL + ) + _RE_COLSPAN = re.compile( + r"(](?:(?!]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?)", + re.DOTALL + ) + _RE_CELLBG = re.compile( + r"(](?:(?!]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?)", + re.DOTALL + ) + _RE_SPACE_PRESERVE = re.compile( + r"((?:(?!).)*)({{.*?}}|{%.*?%})", + re.DOTALL + ) + _RE_SPACE_PRESERVE_R = re.compile( + r"({{r\s.*?}}|{%r\s.*?%})", + re.DOTALL + ) + _RE_MERGE_PREV = re.compile(r"(?:(?!).)*?{%-", re.DOTALL) + _RE_MERGE_NEXT = re.compile(r"-%}(?:(?!]|{%|{{).)*?]*?>", re.DOTALL) + _RE_VMERGE = re.compile( + r"](?:(?!]).)*?{%\s*vm\s*%}.*?]", + re.DOTALL + ) + _RE_HMERGE = re.compile( + r"](?:(?!]).)*?{%\s*hm\s*%}.*?]", + re.DOTALL + ) + _RE_CLEAN_TAGS = re.compile(r"(?<=\{[\{%])(.*?)(?=[\}%]})") + _RE_PARAGRAPH_NEWLINE = re.compile(r"])") + _RE_PARAGRAPH_REMOVE_NEWLINE = re.compile(r"\n])") + _RE_STRIPTAGS = re.compile(r".*?(|]*>)", re.DOTALL) + _RE_COLSPAN_EMPTY = re.compile(r"](?:(?!]).)*.*?", re.DOTALL) + _RE_GRIDSPAN = re.compile(r"") + _RE_TCPR = re.compile(r"(]*>)") + _RE_SHD = re.compile(r"") + _RE_RESOLVE_PARAGRAPH = re.compile(r"]*)?>.*?", re.DOTALL) + _RE_RESOLVE_RUN = re.compile(r"]*)?>.*?", re.DOTALL) + _RE_RESOLVE_TEXT = re.compile(r"]*)?>.*?", re.DOTALL) + _RE_RUN_PROPS = re.compile(r".*?") + _RE_PARA_PROPS = re.compile(r".*?") + + # Cached Jinja2 environment for performance (created once, reused) + _cached_jinja_env = None + _cached_jinja_env_autoescape = None # For autoescape=True variant + def __init__(self, template_file: Union[IO[bytes], str, PathLike]) -> None: self.template_file = template_file self.reset_replacements() @@ -88,94 +177,63 @@ def patch_xml(self, src_xml): unescape html entities, etc...""" # replace {{ by {{ ( works with {{ }} {% and %} {# and #}) - src_xml = re.sub( - r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}\#])(<[^>]*>)+(?=\})", - "", - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_JINJA_OPEN.sub("", src_xml) # replace {{jinja2 stuff}} by {{jinja2 stuff}} # same thing with {% ... %} and {# #} # "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand def striptags(m): - return re.sub( - ".*?(|]*>)", "", m.group(0), flags=re.DOTALL - ) + # OPTIMIZED: Using pre-compiled pattern + return self._RE_STRIPTAGS.sub("", m.group(0)) - src_xml = re.sub( - r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*", - striptags, - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_JINJA_CONTENT.sub(striptags, src_xml) # manage table cell colspan def colspan(m): cell_xml = m.group(1) + m.group(3) - cell_xml = re.sub( - r"](?:(?!]).)*.*?", - "", - cell_xml, - flags=re.DOTALL, - ) - cell_xml = re.sub(r"", "", cell_xml, count=1) - return re.sub( - r"(]*>)", + # OPTIMIZED: Using pre-compiled pattern + cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml) + cell_xml = self._RE_GRIDSPAN.sub("", cell_xml, count=1) + return self._RE_TCPR.sub( r'\1' % m.group(2), cell_xml, ) - src_xml = re.sub( - r"(](?:(?!]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?)", - colspan, - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_COLSPAN.sub(colspan, src_xml) # manage table cell background color def cellbg(m): cell_xml = m.group(1) + m.group(3) - cell_xml = re.sub( - r"](?:(?!]).)*.*?", - "", - cell_xml, - flags=re.DOTALL, - ) - cell_xml = re.sub(r"", "", cell_xml, count=1) - return re.sub( - r"(]*>)", + # OPTIMIZED: Using pre-compiled pattern + cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml) + cell_xml = self._RE_SHD.sub("", cell_xml, count=1) + return self._RE_TCPR.sub( r'\1' % m.group(2), cell_xml, ) - src_xml = re.sub( - r"(](?:(?!]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?)", - cellbg, - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_CELLBG.sub(cellbg, src_xml) # ensure space preservation - src_xml = re.sub( - r"((?:(?!).)*)({{.*?}}|{%.*?%})", + # OPTIMIZED: Using pre-compiled patterns + src_xml = self._RE_SPACE_PRESERVE.sub( r'\1\2', src_xml, - flags=re.DOTALL, ) - src_xml = re.sub( - r"({{r\s.*?}}|{%r\s.*?%})", + src_xml = self._RE_SPACE_PRESERVE_R.sub( r'\1', src_xml, - flags=re.DOTALL, ) # {%- will merge with previous paragraph text - src_xml = re.sub(r"(?:(?!).)*?{%-", "{%", src_xml, flags=re.DOTALL) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_MERGE_PREV.sub("{%", src_xml) # -%} will merge with next paragraph text - src_xml = re.sub( - r"-%}(?:(?!]|{%|{{).)*?]*?>", "%}", src_xml, flags=re.DOTALL - ) + src_xml = self._RE_MERGE_NEXT.sub("%}", src_xml) for y in ["tr", "tc", "p", "r"]: # replace into xml code the row/paragraph/run containing @@ -220,12 +278,8 @@ def v_merge(m1): flags=re.DOTALL, ) - src_xml = re.sub( - r"](?:(?!]).)*?{%\s*vm\s*%}.*?]", - v_merge_tc, - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_VMERGE.sub(v_merge_tc, src_xml) # Use ``{% hm %}`` to make table cell become horizontally merged within # a ``{% for %}``. @@ -279,12 +333,8 @@ def without_gridspan(m2): # Discard every other cell generated in loop. return "{% if loop.first %}" + xml + "{% endif %}" - src_xml = re.sub( - r"](?:(?!]).)*?{%\s*hm\s*%}.*?]", - h_merge_tc, - src_xml, - flags=re.DOTALL, - ) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_HMERGE.sub(h_merge_tc, src_xml) def clean_tags(m): return ( @@ -298,18 +348,20 @@ def clean_tags(m): .replace("’", "'") ) - src_xml = re.sub(r"(?<=\{[\{%])(.*?)(?=[\}%]})", clean_tags, src_xml) + # OPTIMIZED: Using pre-compiled pattern + src_xml = self._RE_CLEAN_TAGS.sub(clean_tags, src_xml) return src_xml def render_xml_part(self, src_xml, part, context, jinja_env=None): - src_xml = re.sub(r"])", r"\n])", r" None: + # OPTIMIZED: Use cached environment if jinja_env is None: - jinja_env = Environment() + jinja_env = _get_cached_env() for section in self.docx.sections: for part in section.part.package.parts: @@ -403,30 +458,29 @@ def resolve_text(run_properties, paragraph_properties, m): return xml def resolve_run(paragraph_properties, m): - run_properties = re.search(r".*?", m.group(0)) + # OPTIMIZED: Using pre-compiled pattern + run_properties = self._RE_RUN_PROPS.search(m.group(0)) run_properties = run_properties.group(0) if run_properties else "" - return re.sub( - r"]*)?>.*?", + # OPTIMIZED: Using pre-compiled pattern + return self._RE_RESOLVE_TEXT.sub( lambda x: resolve_text(run_properties, paragraph_properties, x), m.group(0), - flags=re.DOTALL, ) def resolve_paragraph(m): - paragraph_properties = re.search(r".*?", m.group(0)) + # OPTIMIZED: Using pre-compiled pattern + paragraph_properties = self._RE_PARA_PROPS.search(m.group(0)) paragraph_properties = ( paragraph_properties.group(0) if paragraph_properties else "" ) - return re.sub( - r"]*)?>.*?", + # OPTIMIZED: Using pre-compiled pattern + return self._RE_RESOLVE_RUN.sub( lambda x: resolve_run(paragraph_properties, x), m.group(0), - flags=re.DOTALL, ) - xml = re.sub( - r"]*)?>.*?", resolve_paragraph, xml, flags=re.DOTALL - ) + # OPTIMIZED: Using pre-compiled pattern + xml = self._RE_RESOLVE_PARAGRAPH.sub(resolve_paragraph, xml) return xml @@ -437,9 +491,21 @@ def build_xml(self, context, jinja_env=None): return xml def map_tree(self, tree): - root = self.docx._element - body = root.body - root.replace(body, tree) + """Replace body content with rendered tree. + + OPTIMIZED: Instead of replacing the entire element (which + triggers expensive reconciliation), we now mutate the body's children + directly. This is much cheaper for large trees. + """ + body = self.docx._element.body + + # Remove all existing children from body + for child in list(body): + body.remove(child) + + # Append all children from the new tree + for child in list(tree): + body.append(child) def get_headers_footers(self, uri): for relKey, val in self.docx._part.rels.items(): @@ -479,11 +545,11 @@ def render( # init template working attributes self.render_init() - if autoescape: - if not jinja_env: - jinja_env = Environment(autoescape=autoescape) - else: - jinja_env.autoescape = autoescape + # OPTIMIZED: Use cached environment by default (avoids overhead of creating new env) + if not jinja_env: + jinja_env = _get_cached_env(autoescape=autoescape) + elif autoescape: + jinja_env.autoescape = autoescape # Body xml_src = self.build_xml(context, jinja_env) @@ -517,8 +583,10 @@ def render( # using of TC tag in for cycle can cause that count of columns does not # correspond to real count of columns in row. This function is able to fix it. def fix_tables(self, xml): - parser = etree.XMLParser(recover=True) - tree = etree.fromstring(xml, parser=parser) + # OPTIMIZED: Use parse_xml from docx.opc.oxml instead of etree.fromstring + # This ensures same document model and element classes, minimizing + # reconciliation cost when the tree is later used with map_tree() + tree = parse_xml(xml) # get namespace ns = "{" + tree.nsmap["w"] + "}" # walk trough xml and find table @@ -913,7 +981,8 @@ def get_undeclared_template_variables( if jinja_env: env = jinja_env else: - env = Environment() + # OPTIMIZED: Use cached environment + env = _get_cached_env() parse_content = env.parse(xml) all_variables = meta.find_undeclared_variables(parse_content) From 2dd1a2955e674cdc67e922aeb8bc8257c0c7ff8c Mon Sep 17 00:00:00 2001 From: Bonggo Pras Date: Thu, 8 Jan 2026 17:18:39 +0700 Subject: [PATCH 2/4] Fix poetry configuration - add required fields --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 476fc0e..28533c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,10 @@ repository = "https://github.com/elapouya/python-docx-template.git" document = "https://docxtpl.readthedocs.org" [tool.poetry] +name = "docxtpl" version = "0.0.0" +description = "Python docx template engine" +authors = ["Eric Lapouyade "] [tool.poetry.requires-plugins] poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] } From ec0b7e1e3858aa73be11ecdc30ef38f817b53589 Mon Sep 17 00:00:00 2001 From: Bonggo Pras Date: Thu, 8 Jan 2026 20:27:21 +0700 Subject: [PATCH 3/4] fix: improve XML handling and cleanup code - Add try/except fallback with recover=True for malformed XML in fix_tables() - Use OxmlElement with qn() instead of etree.SubElement for new grid columns - Remove unused _cached_jinja_env variables - Clean up redundant comments --- docxtpl/template.py | 129 +++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 67 deletions(-) diff --git a/docxtpl/template.py b/docxtpl/template.py index b90c6fc..272a5fb 100644 --- a/docxtpl/template.py +++ b/docxtpl/template.py @@ -15,6 +15,8 @@ from docx.opc.oxml import parse_xml from docx.opc.part import XmlPart import docx.oxml.ns +from docx.oxml import OxmlElement +from docx.oxml.ns import qn from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE from jinja2 import Environment, Template, meta from jinja2.exceptions import TemplateError @@ -128,10 +130,6 @@ class DocxTemplate(object): _RE_RUN_PROPS = re.compile(r".*?") _RE_PARA_PROPS = re.compile(r".*?") - # Cached Jinja2 environment for performance (created once, reused) - _cached_jinja_env = None - _cached_jinja_env_autoescape = None # For autoescape=True variant - def __init__(self, template_file: Union[IO[bytes], str, PathLike]) -> None: self.template_file = template_file self.reset_replacements() @@ -177,23 +175,19 @@ def patch_xml(self, src_xml): unescape html entities, etc...""" # replace {{ by {{ ( works with {{ }} {% and %} {# and #}) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_JINJA_OPEN.sub("", src_xml) # replace {{jinja2 stuff}} by {{jinja2 stuff}} # same thing with {% ... %} and {# #} # "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand def striptags(m): - # OPTIMIZED: Using pre-compiled pattern return self._RE_STRIPTAGS.sub("", m.group(0)) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_JINJA_CONTENT.sub(striptags, src_xml) # manage table cell colspan def colspan(m): cell_xml = m.group(1) + m.group(3) - # OPTIMIZED: Using pre-compiled pattern cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml) cell_xml = self._RE_GRIDSPAN.sub("", cell_xml, count=1) return self._RE_TCPR.sub( @@ -201,13 +195,11 @@ def colspan(m): cell_xml, ) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_COLSPAN.sub(colspan, src_xml) # manage table cell background color def cellbg(m): cell_xml = m.group(1) + m.group(3) - # OPTIMIZED: Using pre-compiled pattern cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml) cell_xml = self._RE_SHD.sub("", cell_xml, count=1) return self._RE_TCPR.sub( @@ -215,11 +207,9 @@ def cellbg(m): cell_xml, ) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_CELLBG.sub(cellbg, src_xml) # ensure space preservation - # OPTIMIZED: Using pre-compiled patterns src_xml = self._RE_SPACE_PRESERVE.sub( r'\1\2', src_xml, @@ -230,7 +220,6 @@ def cellbg(m): ) # {%- will merge with previous paragraph text - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_MERGE_PREV.sub("{%", src_xml) # -%} will merge with next paragraph text src_xml = self._RE_MERGE_NEXT.sub("%}", src_xml) @@ -278,7 +267,6 @@ def v_merge(m1): flags=re.DOTALL, ) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_VMERGE.sub(v_merge_tc, src_xml) # Use ``{% hm %}`` to make table cell become horizontally merged within @@ -333,7 +321,6 @@ def without_gridspan(m2): # Discard every other cell generated in loop. return "{% if loop.first %}" + xml + "{% endif %}" - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_HMERGE.sub(h_merge_tc, src_xml) def clean_tags(m): @@ -348,17 +335,14 @@ def clean_tags(m): .replace("’", "'") ) - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_CLEAN_TAGS.sub(clean_tags, src_xml) return src_xml def render_xml_part(self, src_xml, part, context, jinja_env=None): - # OPTIMIZED: Using pre-compiled pattern src_xml = self._RE_PARAGRAPH_NEWLINE.sub(r"\n None: - # OPTIMIZED: Use cached environment if jinja_env is None: jinja_env = _get_cached_env() @@ -458,28 +439,23 @@ def resolve_text(run_properties, paragraph_properties, m): return xml def resolve_run(paragraph_properties, m): - # OPTIMIZED: Using pre-compiled pattern run_properties = self._RE_RUN_PROPS.search(m.group(0)) run_properties = run_properties.group(0) if run_properties else "" - # OPTIMIZED: Using pre-compiled pattern return self._RE_RESOLVE_TEXT.sub( lambda x: resolve_text(run_properties, paragraph_properties, x), m.group(0), ) def resolve_paragraph(m): - # OPTIMIZED: Using pre-compiled pattern paragraph_properties = self._RE_PARA_PROPS.search(m.group(0)) paragraph_properties = ( paragraph_properties.group(0) if paragraph_properties else "" ) - # OPTIMIZED: Using pre-compiled pattern return self._RE_RESOLVE_RUN.sub( lambda x: resolve_run(paragraph_properties, x), m.group(0), ) - # OPTIMIZED: Using pre-compiled pattern xml = self._RE_RESOLVE_PARAGRAPH.sub(resolve_paragraph, xml) return xml @@ -524,10 +500,17 @@ def get_headers_footers_encoding(self, xml): def build_headers_footers_xml(self, context, uri, jinja_env=None): for relKey, part in self.get_headers_footers(uri): xml = self.get_part_xml(part) - encoding = self.get_headers_footers_encoding(xml) - xml = self.patch_xml(xml) - xml = self.render_xml_part(xml, part, context, jinja_env) - yield relKey, xml.encode(encoding) + + # Skip rendering if no Jinja tags present + # Headers/footers are often static, so this avoids caching/parsing overhead + if self._RE_JINJA_OPEN.search(xml) or self._RE_JINJA_CONTENT.search(xml): + encoding = self.get_headers_footers_encoding(xml) + xml = self.patch_xml(xml) + xml = self.render_xml_part(xml, part, context, jinja_env) + yield relKey, xml.encode(encoding) + else: + encoding = self.get_headers_footers_encoding(xml) + yield relKey, xml.encode(encoding) def map_headers_footers_xml(self, relKey, xml): part = self.docx._part.rels[relKey].target_part @@ -545,7 +528,7 @@ def render( # init template working attributes self.render_init() - # OPTIMIZED: Use cached environment by default (avoids overhead of creating new env) + # Use cached environment by default if not jinja_env: jinja_env = _get_cached_env(autoescape=autoescape) elif autoescape: @@ -581,24 +564,53 @@ def render( self.is_rendered = True # using of TC tag in for cycle can cause that count of columns does not - # correspond to real count of columns in row. This function is able to fix it. + # correspond to real count of columns in row. def fix_tables(self, xml): - # OPTIMIZED: Use parse_xml from docx.opc.oxml instead of etree.fromstring - # This ensures same document model and element classes, minimizing - # reconciliation cost when the tree is later used with map_tree() - tree = parse_xml(xml) + # Use parse_xml with safe fallback for malformed XML + try: + tree = parse_xml(xml) + except Exception: + # Fallback to permissive parser for malformed XML + parser = etree.XMLParser(recover=True) + tree = etree.fromstring(xml, parser=parser) # get namespace ns = "{" + tree.nsmap["w"] + "}" # walk trough xml and find table for t in tree.iter(ns + "tbl"): tblGrid = t.find(ns + "tblGrid") + if tblGrid is None: + continue + columns = tblGrid.findall(ns + "gridCol") - to_add = 0 - # walk trough all rows and try to find if there is higher cell count + columns_len = len(columns) + + # Single pass row analysis with both counters + # Original logic uses raw count for ADD, effective count for REMOVE + max_raw_cells = 0 # For ADD decision (raw tc count) + max_effective_cells = 0 # For REMOVE decision (with gridSpan) + for r in t.iter(ns + "tr"): cells = r.findall(ns + "tc") - if (len(columns) + to_add) < len(cells): - to_add = len(cells) - len(columns) + raw_count = len(cells) + effective_count = 0 + + for cell in cells: + tc_pr = cell.find(ns + "tcPr") + if tc_pr is not None: + grid_span = tc_pr.find(ns + "gridSpan") + if grid_span is not None: + effective_count += int(grid_span.get(ns + "val")) + continue + effective_count += 1 + + if raw_count > max_raw_cells: + max_raw_cells = raw_count + if effective_count > max_effective_cells: + max_effective_cells = effective_count + + # ADD columns based on RAW cell count (original behavior) + to_add = max_raw_cells - columns_len if max_raw_cells > columns_len else 0 + # is necessary to add columns? if to_add > 0: # at first, calculate width of table according to columns @@ -620,34 +632,16 @@ def fix_tables(self, xml): int(float(c.get(ns + "w")) * new_average / old_average) ), ) - # add new columns + # add new columns using OxmlElement for proper python-docx compatibility for i in range(to_add): - etree.SubElement( - tblGrid, ns + "gridCol", {ns + "w": str(int(new_average))} - ) + new_col = OxmlElement('w:gridCol') + new_col.set(qn('w:w'), str(int(new_average))) + tblGrid.append(new_col) - # Refetch columns after columns addition. + # REMOVE columns based on EFFECTIVE cell count (original behavior) columns = tblGrid.findall(ns + "gridCol") columns_len = len(columns) - - cells_len_max = 0 - - def get_cell_len(total, cell): - tc_pr = cell.find(ns + "tcPr") - grid_span = None if tc_pr is None else tc_pr.find(ns + "gridSpan") - - if grid_span is not None: - return total + int(grid_span.get(ns + "val")) - - return total + 1 - - # Calculate max of table cells to compare with `gridCol`. - for r in t.iter(ns + "tr"): - cells = r.findall(ns + "tc") - cells_len = functools.reduce(get_cell_len, cells, 0) - cells_len_max = max(cells_len_max, cells_len) - - to_remove = columns_len - cells_len_max + to_remove = columns_len - max_effective_cells if columns_len > max_effective_cells else 0 # If after the loop, there're less columns, than # originally was, remove extra `gridCol` declarations. @@ -676,8 +670,10 @@ def get_cell_len(total, cell): return tree def fix_docpr_ids(self, tree): - # some Ids may have some collisions : so renumbering all of them : - for elt in tree.xpath("//wp:docPr", namespaces=docx.oxml.ns.nsmap): + wp_ns = docx.oxml.ns.nsmap['wp'] + tag = "{%s}docPr" % wp_ns + + for elt in tree.iter(tag): self.docx_ids_index += 1 elt.attrib["id"] = str(self.docx_ids_index) @@ -981,7 +977,6 @@ def get_undeclared_template_variables( if jinja_env: env = jinja_env else: - # OPTIMIZED: Use cached environment env = _get_cached_env() parse_content = env.parse(xml) From e455da743210f75de24640cb4d93582d48b52b07 Mon Sep 17 00:00:00 2001 From: Jack Byrne <46843566+JackByrne@users.noreply.github.com> Date: Fri, 9 Jan 2026 15:35:29 +0000 Subject: [PATCH 4/4] Small comment clean-up --- docxtpl/template.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docxtpl/template.py b/docxtpl/template.py index 272a5fb..a757037 100644 --- a/docxtpl/template.py +++ b/docxtpl/template.py @@ -469,7 +469,7 @@ def build_xml(self, context, jinja_env=None): def map_tree(self, tree): """Replace body content with rendered tree. - OPTIMIZED: Instead of replacing the entire element (which + Instead of replacing the entire element with replace() (which triggers expensive reconciliation), we now mutate the body's children directly. This is much cheaper for large trees. """ @@ -568,9 +568,9 @@ def render( def fix_tables(self, xml): # Use parse_xml with safe fallback for malformed XML try: - tree = parse_xml(xml) + tree = parse_xml(xml) # parse_xml() is significantly faster except Exception: - # Fallback to permissive parser for malformed XML + # Fallback to permissive parser in the event of malformed XML parser = etree.XMLParser(recover=True) tree = etree.fromstring(xml, parser=parser) # get namespace @@ -585,7 +585,6 @@ def fix_tables(self, xml): columns_len = len(columns) # Single pass row analysis with both counters - # Original logic uses raw count for ADD, effective count for REMOVE max_raw_cells = 0 # For ADD decision (raw tc count) max_effective_cells = 0 # For REMOVE decision (with gridSpan) @@ -670,6 +669,7 @@ def fix_tables(self, xml): return tree def fix_docpr_ids(self, tree): + # Some Ids may have some collisions : so renumbering all of them wp_ns = docx.oxml.ns.nsmap['wp'] tag = "{%s}docPr" % wp_ns