diff --git a/pydocx/export/base.py b/pydocx/export/base.py index 9ca2afef..2d531b61 100644 --- a/pydocx/export/base.py +++ b/pydocx/export/base.py @@ -341,8 +341,10 @@ def export_run(self, run): # TODO squash multiple sequential text nodes into one? results = self.yield_nested(run.children, self.export_node) + if run.effective_properties: results = self.export_run_apply_properties(run, results) + return results def get_run_styles_to_apply(self, run): diff --git a/pydocx/export/html.py b/pydocx/export/html.py index fd8ebce1..827bac00 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -101,6 +101,7 @@ def __init__( allow_self_closing=False, closed=False, allow_whitespace=False, + custom_text=None, **attrs ): self.tag = tag @@ -108,6 +109,7 @@ def __init__( self.attrs = attrs self.closed = closed self.allow_whitespace = allow_whitespace + self.custom_text = custom_text def apply(self, results, allow_empty=True): if not allow_empty: @@ -116,12 +118,15 @@ def apply(self, results, allow_empty=True): return sequence = [[self]] + + if self.custom_text: + sequence.append([self.custom_text]) + if results is not None: sequence.append(results) if not self.allow_self_closing: sequence.append([self.close()]) - results = chain(*sequence) for result in results: @@ -178,6 +183,9 @@ def style(self): styles = { 'body': { 'margin': '0px auto', + }, + 'p': { + 'margin': '0' } } @@ -248,17 +256,15 @@ def export_footnote(self, footnote): return tag.apply(results, allow_empty=False) def get_paragraph_tag(self, paragraph): + if paragraph.is_empty: + return HtmlTag('p', custom_text=' ') + heading_style = paragraph.heading_style if heading_style: tag = self.get_heading_tag(paragraph) if tag: return tag - if self.in_table_cell: - return - if paragraph.has_structured_document_parent(): - return - if isinstance(paragraph.parent, NumberingItem): - return + return HtmlTag('p') def get_heading_tag(self, paragraph): @@ -277,10 +283,6 @@ def get_heading_tag(self, paragraph): def export_paragraph(self, paragraph): results = super(PyDocXHTMLExporter, self).export_paragraph(paragraph) - results = is_not_empty_and_not_only_whitespace(results) - if results is None: - return - tag = self.get_paragraph_tag(paragraph) if tag: results = tag.apply(results) @@ -585,9 +587,10 @@ def export_table_cell(self, table_cell): tag = HtmlTag('td', **attrs) numbering_spans = self.yield_numbering_spans(table_cell.children) - results = self.yield_nested_with_line_breaks_between_paragraphs( + + results = self.yield_nested( numbering_spans, - self.export_node, + self.export_node ) if tag: results = tag.apply(results) @@ -733,10 +736,8 @@ def export_numbering_span(self, numbering_span): return tag.apply(results) def export_numbering_item(self, numbering_item): - results = self.yield_nested_with_line_breaks_between_paragraphs( - numbering_item.children, - self.export_node, - ) + results = super(PyDocXHTMLExporter, self).export_numbering_item(numbering_item) + tag = HtmlTag('li') return tag.apply(results) diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index fe5443e3..3d3cee4a 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -6,17 +6,17 @@ ) from pydocx.models import XmlModel, XmlCollection, XmlChild +from pydocx.openxml.wordprocessing.bookmark import Bookmark +from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.hyperlink import Hyperlink +from pydocx.openxml.wordprocessing.inserted_run import InsertedRun from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties # noqa from pydocx.openxml.wordprocessing.run import Run -from pydocx.openxml.wordprocessing.tab_char import TabChar -from pydocx.openxml.wordprocessing.text import Text -from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun -from pydocx.openxml.wordprocessing.inserted_run import InsertedRun -from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.sdt_run import SdtRun from pydocx.openxml.wordprocessing.simple_field import SimpleField -from pydocx.openxml.wordprocessing.bookmark import Bookmark +from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun +from pydocx.openxml.wordprocessing.tab_char import TabChar +from pydocx.openxml.wordprocessing.text import Text class Paragraph(XmlModel): @@ -39,6 +39,20 @@ def __init__(self, **kwargs): super(Paragraph, self).__init__(**kwargs) self._effective_properties = None + @property + def is_empty(self): + if not self.children: + return True + + # we may have cases when a paragraph has a Bookmark with name '_GoBack' + # and we should treat it as empty paragraph + if len(self.children) == 1 and \ + isinstance(self.children[0], Bookmark) and \ + self.children[0].name in ('_GoBack',): + return True + + return False + @property def effective_properties(self): # TODO need to calculate effective properties like Run diff --git a/pydocx/test/testcases.py b/pydocx/test/testcases.py index 50e144c1..c10a24eb 100644 --- a/pydocx/test/testcases.py +++ b/pydocx/test/testcases.py @@ -50,6 +50,7 @@ '.pydocx-tab {display:inline-block;width:4em}' '.pydocx-underline {text-decoration:underline}' 'body {margin:0px auto;width:51.00em}' + 'p {margin:0}' '' )