Skip to content

Commit 8901a52

Browse files
committed
‼️ BREAKING: Align table parsing with GFM spec
Alter code such that parsing respects: https://github.github.com/gfm/#tables-extension- - table now doesn't deal with backticks at all - `|` is considered escaped if and only if there is a `\` character immediately before it - number of elements in the first row (thead) now must match second row (aligns) exactly - no `tbody` if it would be empty Implements: markdown-it/markdown-it@3021a52
1 parent 35b1f39 commit 8901a52

File tree

3 files changed

+309
-72
lines changed

3 files changed

+309
-72
lines changed

markdown_it/rules_block/table.py

Lines changed: 54 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# GFM table, non-standard
1+
# GFM table, https://github.github.com/gfm/#tables-extension-
22
import re
33

44
from .state_block import StateBlock
@@ -21,48 +21,35 @@ def escapedSplit(string):
2121
result = []
2222
pos = 0
2323
max = len(string)
24-
escapes = 0
24+
isEscaped = False
2525
lastPos = 0
26-
backTicked = False
27-
lastBackTick = 0
26+
current = ""
2827
ch = charCodeAt(string, pos)
2928

3029
while pos < max:
31-
if ch == 0x60: # /* ` */
32-
if backTicked:
33-
# make \` close code sequence, but not open it;
34-
# the reason is: `\` is correct code block
35-
backTicked = False
36-
lastBackTick = pos
37-
elif escapes % 2 == 0:
38-
backTicked = True
39-
lastBackTick = pos
40-
# /* | */
41-
elif ch == 0x7C and (escapes % 2 == 0) and not backTicked:
42-
result.append(string[lastPos:pos])
43-
lastPos = pos + 1
44-
45-
if ch == 0x5C: # /* \ */
46-
escapes += 1
47-
else:
48-
escapes = 0
30+
if ch == 0x7C: # /* | */
31+
if not isEscaped:
32+
# pipe separating cells, '|'
33+
result.append(current + string[lastPos:pos])
34+
current = ""
35+
lastPos = pos + 1
36+
else:
37+
# escaped pipe, '\|'
38+
current += string[lastPos : pos - 1]
39+
lastPos = pos
4940

41+
isEscaped = ch == 0x5C # /* \ */
5042
pos += 1
5143

52-
# If there was an un-closed backtick, go back to just after
53-
# the last backtick, but as if it was a normal character
54-
if pos == max and backTicked:
55-
backTicked = False
56-
pos = lastBackTick + 1
57-
5844
ch = charCodeAt(string, pos)
5945

60-
result.append(string[lastPos:])
46+
result.append(current + string[lastPos:])
6147

6248
return result
6349

6450

6551
def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
52+
tbodyLines = None
6653

6754
# should have at least two lines
6855
if startLine + 2 > endLine:
@@ -129,17 +116,28 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
129116
return False
130117
if state.sCount[startLine] - state.blkIndent >= 4:
131118
return False
132-
columns = escapedSplit(enclosingPipesRe.sub("", lineText))
119+
columns = escapedSplit(lineText)
120+
if columns and columns[0] == "":
121+
columns.pop(0)
122+
if columns and columns[-1] == "":
123+
columns.pop()
133124

134125
# header row will define an amount of columns in the entire table,
135-
# and align row shouldn't be smaller than that (the rest of the rows can)
126+
# and align row should be exactly the same (the rest of the rows can differ)
136127
columnCount = len(columns)
137-
if columnCount > len(aligns):
128+
if columnCount != len(aligns):
138129
return False
139130

140131
if silent:
141132
return True
142133

134+
oldParentType = state.parentType
135+
state.parentType = "table"
136+
137+
# use 'blockquote' lists for termination because it's
138+
# the most similar to tables
139+
terminatorRules = state.md.block.ruler.getRules("blockquote")
140+
143141
token = state.push("table_open", "table", 1)
144142
token.map = tableLines = [startLine, 0]
145143

@@ -166,20 +164,33 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
166164
token = state.push("tr_close", "tr", -1)
167165
token = state.push("thead_close", "thead", -1)
168166

169-
token = state.push("tbody_open", "tbody", 1)
170-
token.map = tbodyLines = [startLine + 2, 0]
171-
172167
nextLine = startLine + 2
173168
while nextLine < endLine:
174169
if state.sCount[nextLine] < state.blkIndent:
175170
break
176171

172+
terminate = False
173+
for i in range(len(terminatorRules)):
174+
if terminatorRules[i](state, nextLine, endLine, True):
175+
terminate = True
176+
break
177+
178+
if terminate:
179+
break
177180
lineText = getLine(state, nextLine).strip()
178-
if "|" not in lineText:
181+
if not lineText:
179182
break
180183
if state.sCount[nextLine] - state.blkIndent >= 4:
181184
break
182-
columns = escapedSplit(enclosingPipesRe.sub("", lineText))
185+
columns = escapedSplit(lineText)
186+
if columns and columns[0] == "":
187+
columns.pop(0)
188+
if columns and columns[-1] == "":
189+
columns.pop()
190+
191+
if nextLine == startLine + 2:
192+
token = state.push("tbody_open", "tbody", 1)
193+
token.map = tbodyLines = [startLine + 2, 0]
183194

184195
token = state.push("tr_open", "tr", 1)
185196
token.map = [nextLine, nextLine + 1]
@@ -205,9 +216,13 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
205216

206217
nextLine += 1
207218

208-
token = state.push("tbody_close", "tbody", -1)
219+
if tbodyLines:
220+
token = state.push("tbody_close", "tbody", -1)
221+
tbodyLines[1] = nextLine
222+
209223
token = state.push("table_close", "table", -1)
210224

211-
tableLines[1] = tbodyLines[1] = nextLine
225+
tableLines[1] = nextLine
226+
state.parentType = oldParentType
212227
state.line = nextLine
213228
return True

0 commit comments

Comments
 (0)