Skip to content

Commit 4378da2

Browse files
committed
fix: use proper YAML parsing in inject script instead of regex
This fixes issues with injecting minified code when YAML values contain escaped quotes or special characters. The yaml library properly handles parsing and serialization instead of relying on fragile regex patterns.
1 parent 3ed6e18 commit 4378da2

File tree

1 file changed

+29
-84
lines changed

1 file changed

+29
-84
lines changed

.verify-helper/scripts/inject_minified_docs.py

Lines changed: 29 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,20 @@
11
#!/usr/bin/env python3
22
"""
33
Inject minified code into documentation markdown files.
4-
This script reads minified versions from .competitive-verifier/minified/ and
5-
adds minifiedCode fields to the documentation markdown files.
4+
This script reads minified versions from cp-algo/min/ and cp-algo/min-bundled/
5+
and adds minifiedCode and minifiedBundledCode fields to the documentation markdown files.
66
"""
77

88
import os
99
import sys
1010
from pathlib import Path
1111
import re
1212
import json
13-
14-
15-
def get_minified_code(file_path, minified_dir):
16-
"""Get minified code for a given file path."""
17-
minified_file = minified_dir / file_path
18-
if minified_file.exists():
19-
try:
20-
with open(minified_file, 'r', encoding='utf-8') as f:
21-
code = f.read()
22-
# Escape for YAML
23-
code = code.replace('\\', '\\\\')
24-
code = code.replace('"', '\\"')
25-
code = code.replace('\n', '\\n')
26-
return code
27-
except Exception as e:
28-
print(f"Error reading {minified_file}: {e}", file=sys.stderr)
29-
return None
13+
import yaml
3014

3115

3216
def inject_minified_to_markdown(markdown_file, minified_code=None, minified_bundled_code=None):
33-
"""Inject minified code into markdown file's front matter."""
17+
"""Inject minified code into markdown file's front matter using proper YAML parsing."""
3418
try:
3519
with open(markdown_file, 'r', encoding='utf-8') as f:
3620
content = f.read()
@@ -39,76 +23,45 @@ def inject_minified_to_markdown(markdown_file, minified_code=None, minified_bund
3923
if not content.startswith('---'):
4024
return False
4125

42-
# Split front matter and content
26+
# Split front matter and content at first --- and second ---
4327
parts = content.split('---', 2)
4428
if len(parts) < 3:
4529
return False
4630

47-
front_matter = parts[1]
31+
front_matter_str = parts[1]
4832
body = parts[2]
4933

34+
# Parse YAML front matter
35+
try:
36+
front_matter = yaml.safe_load(front_matter_str)
37+
except Exception as e:
38+
print(f"Error parsing YAML for {markdown_file}: {e}", file=sys.stderr)
39+
return False
40+
41+
if not isinstance(front_matter, dict):
42+
return False
43+
5044
updated = False
5145

52-
# Handle minifiedCode
46+
# Add or update minifiedCode
5347
if minified_code:
54-
if 'minifiedCode:' in front_matter:
55-
# Replace existing minifiedCode
56-
front_matter = re.sub(
57-
r' minifiedCode: ".*?"(?=\n [a-zA-Z_]|\n$)',
58-
f' minifiedCode: "{minified_code}"',
59-
front_matter,
60-
flags=re.DOTALL
61-
)
62-
else:
63-
# Add minifiedCode after bundledCode if it exists
64-
if 'bundledCode:' in front_matter:
65-
front_matter = re.sub(
66-
r'( bundledCode: ".*?")(\n [a-zA-Z_]|\n$)',
67-
rf'\1\n minifiedCode: "{minified_code}"\2',
68-
front_matter,
69-
flags=re.DOTALL
70-
)
71-
else:
72-
# Add at the end of front matter
73-
front_matter = front_matter.rstrip() + f'\n minifiedCode: "{minified_code}"'
48+
front_matter['minifiedCode'] = minified_code
7449
updated = True
7550

76-
# Handle minifiedBundledCode
51+
# Add or update minifiedBundledCode
7752
if minified_bundled_code:
78-
if 'minifiedBundledCode:' in front_matter:
79-
# Replace existing minifiedBundledCode
80-
front_matter = re.sub(
81-
r' minifiedBundledCode: ".*?"(?=\n [a-zA-Z_]|\n$)',
82-
f' minifiedBundledCode: "{minified_bundled_code}"',
83-
front_matter,
84-
flags=re.DOTALL
85-
)
86-
else:
87-
# Add minifiedBundledCode after minifiedCode if it exists
88-
if 'minifiedCode:' in front_matter:
89-
front_matter = re.sub(
90-
r'( minifiedCode: ".*?")(\n [a-zA-Z_]|\n$)',
91-
rf'\1\n minifiedBundledCode: "{minified_bundled_code}"\2',
92-
front_matter,
93-
flags=re.DOTALL
94-
)
95-
elif 'bundledCode:' in front_matter:
96-
front_matter = re.sub(
97-
r'( bundledCode: ".*?")(\n [a-zA-Z_]|\n$)',
98-
rf'\1\n minifiedBundledCode: "{minified_bundled_code}"\2',
99-
front_matter,
100-
flags=re.DOTALL
101-
)
102-
else:
103-
# Add at the end of front matter
104-
front_matter = front_matter.rstrip() + f'\n minifiedBundledCode: "{minified_bundled_code}"'
53+
front_matter['minifiedBundledCode'] = minified_bundled_code
10554
updated = True
10655

10756
if not updated:
10857
return False
10958

59+
# Re-serialize YAML front matter
60+
# Use default_flow_style=False to keep lists as blocks, allow_unicode=True for special chars
61+
new_front_matter_str = yaml.dump(front_matter, default_flow_style=False, allow_unicode=True)
62+
11063
# Write updated content
111-
new_content = f'---{front_matter}---{body}'
64+
new_content = f'---{new_front_matter_str}---{body}'
11265
with open(markdown_file, 'w', encoding='utf-8') as f:
11366
f.write(new_content)
11467

@@ -157,25 +110,17 @@ def main():
157110
minified_file = minified_dir / f"{path_without_ext}.{ext}"
158111
if minified_file.exists():
159112
with open(minified_file, 'r', encoding='utf-8') as f:
160-
code = f.read()
161-
# Escape for YAML
162-
code = code.replace('\\', '\\\\')
163-
code = code.replace('"', '\\"')
164-
code = code.replace('\n', '\\n')
165-
minified_code = code
113+
minified_code = f.read()
114+
break
166115

167116
# Try to find corresponding minified bundled file
168117
for ext in possible_extensions:
169118
if minified_bundled_code is None and minified_bundled_dir.exists():
170119
minified_bundled_file = minified_bundled_dir / f"{path_without_ext}.{ext}"
171120
if minified_bundled_file.exists():
172121
with open(minified_bundled_file, 'r', encoding='utf-8') as f:
173-
code = f.read()
174-
# Escape for YAML
175-
code = code.replace('\\', '\\\\')
176-
code = code.replace('"', '\\"')
177-
code = code.replace('\n', '\\n')
178-
minified_bundled_code = code
122+
minified_bundled_code = f.read()
123+
break
179124

180125
# Only inject if we found at least one minified version
181126
if (minified_code or minified_bundled_code) and inject_minified_to_markdown(md_file, minified_code, minified_bundled_code):

0 commit comments

Comments
 (0)