|
1 | | -from typing import Any, Dict, Iterable, Sequence, Tuple |
2 | | -from urllib.parse import urljoin |
3 | | -import docutils.nodes as nodes |
4 | 1 | import string |
5 | | -from html.parser import HTMLParser |
6 | | -import sphinx |
7 | | -from sphinx.application import Sphinx |
8 | | - |
9 | | -DEFAULT_DESCRIPTION_LENGTH = 200 |
10 | | - |
11 | | - |
12 | | -class HTMLTextParser(HTMLParser): |
13 | | - """ |
14 | | - Parse HTML into text |
15 | | - """ |
16 | | - |
17 | | - def __init__(self): |
18 | | - super().__init__() |
19 | | - # All text found |
20 | | - self.text = "" |
21 | | - # Only text outside of html tags |
22 | | - self.text_outside_tags = "" |
23 | | - self.level = 0 |
| 2 | +from typing import Iterable |
24 | 3 |
|
25 | | - def handle_starttag(self, tag, attrs) -> None: |
26 | | - self.level += 1 |
27 | | - |
28 | | - def handle_endtag(self, tag) -> None: |
29 | | - self.level -= 1 |
30 | | - |
31 | | - def handle_data(self, data) -> None: |
32 | | - self.text += data |
33 | | - if self.level == 0: |
34 | | - self.text_outside_tags += data |
| 4 | +import docutils.nodes as nodes |
35 | 5 |
|
36 | 6 |
|
37 | | -class OGMetadataCreatorVisitor(nodes.NodeVisitor): |
| 7 | +class DescriptionParser(nodes.NodeVisitor): |
38 | 8 | """ |
39 | 9 | Finds the title and creates a description from a doctree |
40 | 10 | """ |
@@ -145,96 +115,13 @@ def dispatch_departure(self, node: nodes.Element) -> None: |
145 | 115 | self.stop = True |
146 | 116 |
|
147 | 117 |
|
148 | | -def make_tag(property: str, content: str) -> str: |
149 | | - return f'<meta property="{property}" content="{content}" />\n ' |
150 | | - |
151 | | - |
152 | | -def get_tags( |
153 | | - context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any] |
154 | | -) -> str: |
155 | | - |
156 | | - # Set length of description |
157 | | - try: |
158 | | - desc_len = int(config["ogp_description_length"]) |
159 | | - except ValueError: |
160 | | - desc_len = DEFAULT_DESCRIPTION_LENGTH |
161 | | - |
162 | | - # Get the title and parse any html in it |
163 | | - htp = HTMLTextParser() |
164 | | - htp.feed(context["title"]) |
165 | | - htp.close() |
| 118 | +def get_description( |
| 119 | + doctree: nodes.document, |
| 120 | + description_length: int, |
| 121 | + known_titles: Iterable[str] = None, |
| 122 | + document: nodes.document = None, |
| 123 | +): |
166 | 124 |
|
167 | | - # Parse/walk doctree for metadata (tag/description) |
168 | | - mcv = OGMetadataCreatorVisitor(desc_len, [htp.text, htp.text_outside_tags]) |
| 125 | + mcv = DescriptionParser(description_length, known_titles, document) |
169 | 126 | doctree.walkabout(mcv) |
170 | | - |
171 | | - tags = "\n " |
172 | | - |
173 | | - # title tag |
174 | | - tags += make_tag("og:title", htp.text) |
175 | | - |
176 | | - # type tag |
177 | | - tags += make_tag("og:type", config["ogp_type"]) |
178 | | - |
179 | | - # url tag |
180 | | - # Get the URL of the specific page |
181 | | - page_url = urljoin( |
182 | | - config["ogp_site_url"], context["pagename"] + context["file_suffix"] |
183 | | - ) |
184 | | - tags += make_tag("og:url", page_url) |
185 | | - |
186 | | - # site name tag |
187 | | - site_name = config["ogp_site_name"] |
188 | | - if site_name: |
189 | | - tags += make_tag("og:site_name", site_name) |
190 | | - |
191 | | - # description tag |
192 | | - tags += make_tag("og:description", mcv.description) |
193 | | - |
194 | | - # image tag |
195 | | - # Get the image from the config |
196 | | - image_url = config["ogp_image"] |
197 | | - if image_url: |
198 | | - tags += make_tag("og:image", image_url) |
199 | | - |
200 | | - # Add image alt text (either provided by config or from site_name) |
201 | | - ogp_image_alt = config["ogp_image_alt"] |
202 | | - if isinstance(ogp_image_alt, str): |
203 | | - tags += make_tag("og:image:alt", ogp_image_alt) |
204 | | - elif ogp_image_alt and site_name: |
205 | | - tags += make_tag("og:image:alt", site_name) |
206 | | - elif ogp_image_alt and htp.text: |
207 | | - tags += make_tag("og:image:alt", htp.text) |
208 | | - |
209 | | - # custom tags |
210 | | - tags += "\n".join(config["ogp_custom_meta_tags"]) |
211 | | - |
212 | | - return tags |
213 | | - |
214 | | - |
215 | | -def html_page_context( |
216 | | - app: Sphinx, |
217 | | - pagename: str, |
218 | | - templatename: str, |
219 | | - context: Dict[str, Any], |
220 | | - doctree: nodes.document, |
221 | | -) -> None: |
222 | | - if doctree: |
223 | | - context["metatags"] += get_tags(context, doctree, app.config) |
224 | | - |
225 | | - |
226 | | -def setup(app: Sphinx) -> Dict[str, Any]: |
227 | | - app.add_config_value("ogp_site_url", None, "html") |
228 | | - app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html") |
229 | | - app.add_config_value("ogp_image", None, "html") |
230 | | - app.add_config_value("ogp_image_alt", True, "html") |
231 | | - app.add_config_value("ogp_type", "website", "html") |
232 | | - app.add_config_value("ogp_site_name", None, "html") |
233 | | - app.add_config_value("ogp_custom_meta_tags", [], "html") |
234 | | - |
235 | | - app.connect("html-page-context", html_page_context) |
236 | | - |
237 | | - return { |
238 | | - "parallel_read_safe": True, |
239 | | - "parallel_write_safe": True, |
240 | | - } |
| 127 | + return mcv.description |
0 commit comments