diff --git a/README.md b/README.md index a4809c2..b424f19 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,9 @@ classes. It then maps the classes to a set of image processing instructions, computes new images, and modifies HTML code according to the instructions. +It can optionally scan the metadata of your content to update image URLs +there. + ### Define Transformations The first step in using this module is to define some image @@ -517,6 +520,60 @@ IMAGE_PROCESS_CLASS_PREFIX = "custom-prefix-" IMAGE_PROCESS_ADD_CLASS = False ``` +#### Converting Image Paths to URLs in Metadata + +If you want *Image Process* to process images in the metadata +of your content (for example, in the `og_image` field used by the `seo` and `pelican-open_graph` plugins), +you can set the `IMAGE_PROCESS_METADATA` setting to a dictionary mapping +metadata field names to transformation names. The transformation must be defined +in the `IMAGE_PROCESS` setting as usual, and it must be +an image replacement transformation (i.e., of type `image`). +For example: + +```python +# pelicanconf.py + +IMAGE_PROCESS_METADATA = { + "og_image": "og-image-transform", +} + +IMAGE_PROCESS = { + 'og-image-transform': {"type": "image", + "ops": ["scale_in 800 640 True"], + }, + # ... possibly other transformations ... +} +``` + +*Image Process* will look for the specified +metadata fields in your content and will apply the specified transformation +to the image path found in the metadata value. + +It is possible to override the transformation applied to a specific instance of a metadata field by prefixing +the metadata value with `{transformation-name}`, where `transformation-name` is the name +of a transformation in the `IMAGE_PROCESS` dictionary. For example, if you have defined +`IMAGE_PROCESS_METADATA` as above, you can override the transformation for a specific article +by setting its `og_image` metadata value to `{some-special-transformation}/path/to/image.jpg`, +where `some-special-transformation` is a transformation defined in the `IMAGE_PROCESS` +dictionary. Here is an example article using this feature: + +```markdown +# Example article +Title: Example Article +Date: 2024-06-01 +og_image: {some-special-transformation}/images/special-image.jpg + +This article uses a special image for Open Graph. +``` + +If you only want to process metadata fields for some articles, you can set the transformation to `None` +in `IMAGE_PROCESS_METADATA` and add a `{transform-name}` prefix to the metadata value of +selected articles. + +*Image Process* will update the metadata field to the URL of the transformed image. +The original metadata values are saved in the `image_process_original_metadata` dictionary +of the content object, so that you can access them later if needed. + ## Known Issues * Pillow, when resizing animated GIF files, [does not return an animated file](https://github.com/pelican-plugins/image-process/issues/11). diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..a461a08 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: minor + +- Process images in content metadata via new `IMAGE_PROCESS_METADATA` setting diff --git a/pelican/plugins/image_process/image_process.py b/pelican/plugins/image_process/image_process.py index 520e727..644fc5e 100644 --- a/pelican/plugins/image_process/image_process.py +++ b/pelican/plugins/image_process/image_process.py @@ -18,7 +18,7 @@ import subprocess import sys import urllib -from urllib.parse import unquote, urlparse +from urllib.parse import unquote, urljoin, urlparse from urllib.request import pathname2url, url2pathname from bs4 import BeautifulSoup @@ -389,15 +389,21 @@ def harvest_images_in_fragment(fragment, settings): return str(soup) -def compute_paths(img, settings, derivative): +def compute_paths(image_url, settings, derivative): + # Backwards compatibility: accept either a string (image_url) or + # a dict (img with "src" key) + if isinstance(image_url, dict): + image_url = image_url.get("src", "") + logger.warning(f"{LOG_PREFIX} Deprecated use of dict for image_url.") + process_dir = settings["IMAGE_PROCESS_DIR"] - img_src = urlparse(img["src"]) + img_src = urlparse(image_url) img_src_path = url2pathname(img_src.path.lstrip("/")) _img_src_dirname, filename = os.path.split(img_src_path) derivative_path = os.path.join(process_dir, derivative) # urljoin truncates leading ../ elements base_url = posixpath.join( - posixpath.dirname(img["src"]), pathname2url(str(derivative_path)) + posixpath.dirname(image_url), pathname2url(str(derivative_path)) ) PELICAN_V4 = 4 @@ -439,7 +445,7 @@ def compute_paths(img, settings, derivative): def process_img_tag(img, settings, derivative): - path = compute_paths(img, settings, derivative) + path = compute_paths(img["src"], settings, derivative) process = settings["IMAGE_PROCESS"][derivative] img["src"] = posixpath.join(path.base_url, path.filename) @@ -465,7 +471,7 @@ def format_srcset_element(path, condition): def build_srcset(img, settings, derivative): - path = compute_paths(img, settings, derivative) + path = compute_paths(img["src"], settings, derivative) process = settings["IMAGE_PROCESS"][derivative] default = process["default"] @@ -768,6 +774,74 @@ def process_image(image, settings): return i.width, i.height +def process_metadata(generator, metadata): + set_default_settings(generator.context) + metadata_to_process = generator.context.get("IMAGE_PROCESS_METADATA", {}).keys() + site_url = generator.context.get("SITEURL", "") + + original_values = {} + + for key, value in metadata.items(): + if isinstance(value, str) and key in metadata_to_process: + derivative = generator.context["IMAGE_PROCESS_METADATA"][key] + # If value starts with {some-other-derivative}, override derivative + if value.startswith("{") and "}" in value: + end_brace = value.index("}") + derivative = value[1:end_brace] + value = value[end_brace + 1 :].lstrip() # noqa: PLW2901 + + if derivative is None: + continue + + # Ignore Pelican special linking directives to avoid conflicts. + # Extracted from Pelican function _link_replacer() in contents.py + special_file_locations = { + "filename", + "attach", + "static", + "category", + "tag", + "author", + "index", + } + if derivative in special_file_locations: + logger.warning( + f"{LOG_PREFIX} Skipping metadata key '{key}' " + f"because it uses Pelican linking directive '{derivative}'." + ) + continue + + try: + process = generator.context["IMAGE_PROCESS"][derivative] + except KeyError as e: + raise RuntimeError(f"Derivative {derivative} undefined.") from e + + if not ( + isinstance(process, list) + or (isinstance(process, dict) and process["type"] == "image") + ): + raise RuntimeError( + f'IMAGE_PROCESS_METADATA "{key}" must reference a transformation ' + 'of type "image".' + ) + + path = compute_paths(value, generator.context, derivative) + + original_values[key] = value + metadata[key] = urljoin( + site_url, posixpath.join(path.base_url, path.filename) + ) + destination = os.path.join(str(path.base_path), path.filename) + + if not isinstance(process, list): + process = process["ops"] + + process_image((path.source, destination, process), generator.context) + + if original_values: + metadata["image_process_original_metadata"] = original_values + + def dump_config(pelican): set_default_settings(pelican.settings) @@ -779,6 +853,7 @@ def dump_config(pelican): def register(): + signals.article_generator_context.connect(process_metadata) signals.content_written.connect(harvest_images) signals.feed_written.connect(harvest_feed_images) signals.finalized.connect(dump_config) diff --git a/pelican/plugins/image_process/test_image_process.py b/pelican/plugins/image_process/test_image_process.py index fb67f47..cfc2868 100644 --- a/pelican/plugins/image_process/test_image_process.py +++ b/pelican/plugins/image_process/test_image_process.py @@ -14,6 +14,7 @@ compute_paths, harvest_images_in_fragment, process_image, + process_metadata, set_default_settings, try_open_image, ) @@ -90,7 +91,7 @@ def get_settings(**kwargs): "OUTPUT_PATH": "output", "static_content": {}, "filenames": {}, - "SITEURL": "//", + "SITEURL": "https://www.example.com", "IMAGE_PROCESS": SINGLE_TRANSFORMS, } settings = DEFAULT_CONFIG.copy() @@ -836,9 +837,9 @@ def test_try_open_image(): assert not try_open_image(TEST_DATA.joinpath("folded_puzzle.png")) assert not try_open_image(TEST_DATA.joinpath("minimal.svg")) - img = {"src": "https://upload.wikimedia.org/wikipedia/commons/3/34/Exemple.png"} + img_path = "https://upload.wikimedia.org/wikipedia/commons/3/34/Exemple.png" settings = get_settings(IMAGE_PROCESS_DIR="derivatives") - path = compute_paths(img, settings, derivative="thumb") + path = compute_paths(img_path, settings, derivative="thumb") with pytest.raises(FileNotFoundError): assert not try_open_image(path.source) @@ -901,6 +902,93 @@ def test_class_settings(mocker, orig_tag, new_tag, setting_overrides): assert harvest_images_in_fragment(orig_tag, settings) == new_tag +@pytest.mark.parametrize( + "orig_metadata, new_metadata, setting_overrides, should_process, transform_id, " + "expected_output_path", + [ + ( + {"title": "Test Article"}, + {"title": "Test Article"}, + {"IMAGE_PROCESS_METADATA": {"og_image": "crop"}}, + False, + None, + None, + ), + ( + {"og_image": "/photos/test-image.jpg"}, + { + "og_image": "https://www.example.com/photos/derivatives/crop/test-image.jpg", + "image_process_original_metadata": { + "og_image": "/photos/test-image.jpg" + }, + }, + {"IMAGE_PROCESS_METADATA": {"og_image": "crop"}}, + True, + "crop", + "photos/derivatives/crop/test-image.jpg", + ), + ( + {"og_image": "{resize}/photos/test-image.jpg"}, + { + "og_image": "https://www.example.com/photos/derivatives/resize/test-image.jpg", + "image_process_original_metadata": { + "og_image": "/photos/test-image.jpg" + }, + }, + {"IMAGE_PROCESS_METADATA": {"og_image": "crop"}}, + True, + "resize", + "photos/derivatives/resize/test-image.jpg", + ), + # Ignore Pelican special linking directives like {static} and {attach}. + ( + {"og_image": "{static}/photos/test-image.jpg"}, + {"og_image": "{static}/photos/test-image.jpg"}, + {"IMAGE_PROCESS_METADATA": {"og_image": "crop"}}, + False, + None, + None, + ), + ], +) +def test_process_metadata_image( # noqa: PLR0913 + mocker, + orig_metadata, + new_metadata, + setting_overrides, + should_process, + transform_id, + expected_output_path, +): + # Silence image transforms. + process = mocker.patch("pelican.plugins.image_process.image_process.process_image") + + settings = get_settings(**setting_overrides) + + fake_generator = mocker.MagicMock() + fake_generator.context = settings + processed_metadata = orig_metadata.copy() + process_metadata(fake_generator, processed_metadata) + + assert processed_metadata == new_metadata + + if should_process: + path = orig_metadata["og_image"] + if path.startswith("{") and "}" in path: + path = path.split("}", 1)[1].lstrip() + + process.assert_called_once_with( + ( + os.path.join(settings["PATH"], path[1:]), + os.path.join(settings["OUTPUT_PATH"], expected_output_path), + SINGLE_TRANSFORMS[transform_id], + ), + settings, + ) + + assert processed_metadata["image_process_original_metadata"]["og_image"] == path + + def generate_test_images(): settings = get_settings() image_count = 0