|
1 | 1 | import os |
2 | 2 | import io |
3 | 3 | from pprint import pprint |
4 | | -import pymupdf |
| 4 | +import textwrap |
5 | 5 | import pickle |
6 | 6 |
|
| 7 | +import pymupdf |
| 8 | + |
7 | 9 | scriptdir = os.path.abspath(os.path.dirname(__file__)) |
8 | 10 | filename = os.path.join(scriptdir, "resources", "chinese-tables.pdf") |
9 | 11 | pickle_file = os.path.join(scriptdir, "resources", "chinese-tables.pickle") |
@@ -294,15 +296,29 @@ def test_markdown(): |
294 | 296 | doc = pymupdf.open(filename) |
295 | 297 | page = doc[0] |
296 | 298 | tab = page.find_tables(strategy="lines_strict")[0] |
297 | | - text = ( |
298 | | - "|Header1|Header2|Header3|\n" |
299 | | - "|---|---|---|\n" |
300 | | - "|Col11<br>Col12|Col21<br>Col22|Col31<br>Col32<br>Col33|\n" |
301 | | - "|Col13|Col23|Col34<br>Col35|\n" |
302 | | - "|Col14|Col24|Col36|\n" |
303 | | - "|Col15|Col25<br>Col26||\n\n" |
304 | | - ) |
305 | | - assert tab.to_markdown() == text |
| 299 | + if pymupdf.mupdf_version_tuple < (1, 27): |
| 300 | + md_expected = textwrap.dedent(''' |
| 301 | + |Header1|Header2|Header3| |
| 302 | + |---|---|---| |
| 303 | + |Col11<br>Col12|~~Col21~~<br>~~Col22~~|Col31<br>Col32<br>Col33| |
| 304 | + |Col13|~~Col23~~|Col34<br>Col35| |
| 305 | + |Col14|~~Col24~~|Col36| |
| 306 | + |Col15|~~Col25~~<br>~~Col26~~|| |
| 307 | + |
| 308 | + ''').lstrip() |
| 309 | + else: |
| 310 | + md_expected = ( |
| 311 | + "|Header1|Header2|Header3|\n" |
| 312 | + "|---|---|---|\n" |
| 313 | + "|Col11<br>Col12|Col21<br>Col22|Col31<br>Col32<br>Col33|\n" |
| 314 | + "|Col13|Col23|Col34<br>Col35|\n" |
| 315 | + "|Col14|Col24|Col36|\n" |
| 316 | + "|Col15|Col25<br>Col26||\n\n" |
| 317 | + ) |
| 318 | + |
| 319 | + |
| 320 | + md = tab.to_markdown() |
| 321 | + assert md == md_expected, f'Incorrect md:\n{textwrap.indent(md, " ")}' |
306 | 322 |
|
307 | 323 |
|
308 | 324 | def test_paths_param(): |
|
0 commit comments