diff --git a/Makefile b/Makefile index 591fd5b6387b..95ab88e62616 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ repo-consistency: python utils/check_doctest_list.py python utils/update_metadata.py --check-only python utils/check_docstrings.py - python utils/add_dates.py + python utils/add_dates.py --check-only # this target runs checks on all files @@ -93,6 +93,7 @@ fix-copies: python utils/check_pipeline_typing.py --fix_and_overwrite python utils/check_doctest_list.py --fix_and_overwrite python utils/check_docstrings.py --fix_and_overwrite + python utils/add_dates.py # Run tests for the library diff --git a/docs/source/en/model_doc/afmoe.md b/docs/source/en/model_doc/afmoe.md index 4297701742aa..ecd0c726326e 100644 --- a/docs/source/en/model_doc/afmoe.md +++ b/docs/source/en/model_doc/afmoe.md @@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -*This model was released on {release_date} and added to Hugging Face Transformers on 2025-11-18.* +*This model was released on {release_date} and added to Hugging Face Transformers on 2025-11-29.*
diff --git a/docs/source/en/model_doc/fast_vlm.md b/docs/source/en/model_doc/fast_vlm.md index 25cbe3bff126..82ad34a6a8be 100644 --- a/docs/source/en/model_doc/fast_vlm.md +++ b/docs/source/en/model_doc/fast_vlm.md @@ -14,7 +14,7 @@ rendered properly in your Markdown viewer. --> -*This model was released on 2025-05-06 and added to Hugging Face Transformers on 2025-10-07.* +*This model was released on 2025-05-06 and added to Hugging Face Transformers on 2025-12-02.* # FastVLM diff --git a/docs/source/en/model_doc/glm46v.md b/docs/source/en/model_doc/glm46v.md index 6e099d7aaef6..64666cea7588 100644 --- a/docs/source/en/model_doc/glm46v.md +++ b/docs/source/en/model_doc/glm46v.md @@ -1,3 +1,20 @@ + +*This model was released on {release_date} and added to Hugging Face Transformers on 2025-11-15.* + # GLM-4.6V ## Glm46VConfig diff --git a/docs/source/en/model_doc/ministral3.md b/docs/source/en/model_doc/ministral3.md index 90d4cf41573e..26e07250be15 100644 --- a/docs/source/en/model_doc/ministral3.md +++ b/docs/source/en/model_doc/ministral3.md @@ -16,6 +16,7 @@ limitations under the License. ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on {release_date} and added to Hugging Face Transformers on 2025-12-01.* # Ministral3 diff --git a/docs/source/en/model_doc/nanochat.md b/docs/source/en/model_doc/nanochat.md index a951af07c3ca..5b0b0a3234d6 100644 --- a/docs/source/en/model_doc/nanochat.md +++ b/docs/source/en/model_doc/nanochat.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on {release_date} and added to Hugging Face Transformers on 2025-11-27.* # NanoChat diff --git a/docs/source/en/model_doc/t5gemma2.md b/docs/source/en/model_doc/t5gemma2.md index 7cf306069a7f..5393187343ce 100644 --- a/docs/source/en/model_doc/t5gemma2.md +++ b/docs/source/en/model_doc/t5gemma2.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on {release_date} and added to Hugging Face Transformers on 2025-12-01.*
PyTorch diff --git a/utils/add_dates.py b/utils/add_dates.py index 6719beae4b63..4b9c3e7514ba 100644 --- a/utils/add_dates.py +++ b/utils/add_dates.py @@ -2,7 +2,7 @@ import os import re import subprocess -from datetime import date +from datetime import date, datetime from huggingface_hub import paper_info @@ -176,14 +176,82 @@ def replace_paper_links(file_path: str) -> bool: return False -def insert_dates(model_card_list: list[str]): - """Insert release and commit dates into model cards""" +def _normalize_model_card_name(model_card: str) -> str: + """Ensure model card has .md extension""" + return model_card if model_card.endswith(".md") else f"{model_card}.md" + + +def _should_skip_model_card(model_card: str) -> bool: + """Check if model card should be skipped""" + return model_card in ("auto.md", "timm_wrapper.md") + + +def _read_model_card_content(model_card: str) -> str: + """Read and return the content of a model card""" + file_path = os.path.join(DOCS_PATH, model_card) + with open(file_path, "r", encoding="utf-8") as f: + return f.read() + + +def _get_dates_pattern_match(content: str): + """Search for the dates pattern in content and return match object""" + pattern = r"\n\*This model was released on (.*) and added to Hugging Face Transformers on (\d{4}-\d{2}-\d{2})\.\*" + return re.search(pattern, content) + + +def _dates_differ_significantly(date1: str, date2: str) -> bool: + """Check if two dates differ by more than 1 day""" + try: + d1 = datetime.strptime(date1, "%Y-%m-%d") + d2 = datetime.strptime(date2, "%Y-%m-%d") + return abs((d1 - d2).days) > 1 + except Exception: + return True # If dates can't be parsed, consider them different + + +def check_missing_dates(model_card_list: list[str]) -> list[str]: + """Check which model cards are missing release dates and return their names""" + missing_dates = [] for model_card in model_card_list: - if not model_card.endswith(".md"): - model_card = f"{model_card}.md" + model_card = _normalize_model_card_name(model_card) + if _should_skip_model_card(model_card): + continue - if model_card == "auto.md" or model_card == "timm_wrapper.md": + content = _read_model_card_content(model_card) + if not _get_dates_pattern_match(content): + missing_dates.append(model_card) + + return missing_dates + + +def check_incorrect_dates(model_card_list: list[str]) -> list[str]: + """Check which model cards have incorrect HF commit dates and return their names""" + incorrect_dates = [] + + for model_card in model_card_list: + model_card = _normalize_model_card_name(model_card) + if _should_skip_model_card(model_card): + continue + + content = _read_model_card_content(model_card) + match = _get_dates_pattern_match(content) + + if match: + existing_hf_date = match.group(2) + actual_hf_date = get_first_commit_date(model_name=model_card) + + if _dates_differ_significantly(existing_hf_date, actual_hf_date): + incorrect_dates.append(model_card) + + return incorrect_dates + + +def insert_dates(model_card_list: list[str]): + """Insert or update release and commit dates in model cards""" + for model_card in model_card_list: + model_card = _normalize_model_card_name(model_card) + if _should_skip_model_card(model_card): continue file_path = os.path.join(DOCS_PATH, model_card) @@ -193,55 +261,46 @@ def insert_dates(model_card_list: list[str]): if links_replaced: print(f"Updated paper links in {model_card}") - pattern = ( - r"\n\*This model was released on (.*) and added to Hugging Face Transformers on (\d{4}-\d{2}-\d{2})\.\*" - ) + # Read content and ensure copyright disclaimer exists + content = _read_model_card_content(model_card) + markers = list(re.finditer(r"-->", content)) - # Check if the copyright disclaimer sections exists, if not, add one with 2025 - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - markers = list(re.finditer(r"-->", content)) # Dates info is placed right below this marker if len(markers) == 0: print(f"No marker found in {model_card}. Adding copyright disclaimer to the top.") - - # Add copyright disclaimer to the very top of the file content = COPYRIGHT_DISCLAIMER + "\n\n" + content with open(file_path, "w", encoding="utf-8") as f: f.write(content) markers = list(re.finditer(r"-->", content)) + # Get dates hf_commit_date = get_first_commit_date(model_name=model_card) - paper_link = get_paper_link(model_card=model_card, path=file_path) - release_date = "" - if not (paper_link == "No_paper" or paper_link == "blog"): - release_date = get_release_date(paper_link) - else: + + if paper_link in ("No_paper", "blog"): release_date = r"{release_date}" + else: + release_date = get_release_date(paper_link) - match = re.search(pattern, content) + match = _get_dates_pattern_match(content) - # If the dates info line already exists, preserve the existing release date unless it's a placeholder, and update the HF commit date if needed + # Update or insert the dates line if match: - existing_release_date = match.group(1) # The release date part - existing_hf_date = match.group(2) # The existing HF date part - release_date = ( - release_date - if (existing_release_date == r"{release_date}" or existing_release_date == "None") - else existing_release_date - ) + # Preserve existing release date unless it's a placeholder + existing_release_date = match.group(1) + existing_hf_date = match.group(2) + + if existing_release_date not in (r"{release_date}", "None"): + release_date = existing_release_date + if existing_hf_date != hf_commit_date or existing_release_date != release_date: - old_line = match.group(0) # Full matched line + old_line = match.group(0) new_line = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*" - content = content.replace(old_line, new_line) with open(file_path, "w", encoding="utf-8") as f: f.write(content) - - # If the dates info line does not exist, add it else: + # Insert new dates line after copyright marker insert_index = markers[0].end() - date_info = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*" content = content[:insert_index] + date_info + content[insert_index:] with open(file_path, "w", encoding="utf-8") as f: @@ -262,19 +321,41 @@ def get_all_model_cards(): return sorted(model_cards) -def main(all=False, auto=True, models=None): +def main(all=False, models=None, check_only=False): + if check_only: + # Check all model cards for missing dates + all_model_cards = get_all_model_cards() + print(f"Checking all {len(all_model_cards)} model cards for missing dates...") + missing_dates = check_missing_dates(all_model_cards) + + # Check modified model cards for incorrect dates + modified_cards = get_modified_cards() + print(f"Checking {len(modified_cards)} modified model cards for incorrect dates...") + incorrect_dates = check_incorrect_dates(modified_cards) + + if missing_dates or incorrect_dates: + problematic_cards = missing_dates + incorrect_dates + model_names = [card.replace(".md", "") for card in problematic_cards] + raise ValueError( + f"Missing or incorrect dates in the following model cards: {' '.join(problematic_cards)}\n" + f"Run `python utils/add_dates.py --models {' '.join(model_names)}` to fix them." + ) + print("All dates are present and correct!") + return + + # Determine which model cards to process if all: model_cards = get_all_model_cards() print(f"Processing all {len(model_cards)} model cards from docs directory") - elif auto: + elif models: + model_cards = models + print(f"Processing specified model cards: {model_cards}") + else: model_cards = get_modified_cards() if not model_cards: print("No modified model cards found.") return print(f"Processing modified model cards: {model_cards}") - else: - model_cards = models - print(f"Processing specified model cards: {model_cards}") insert_dates(model_cards) @@ -282,13 +363,10 @@ def main(all=False, auto=True, models=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Add release and commit dates to model cards") group = parser.add_mutually_exclusive_group(required=False) - group.add_argument( - "--auto", action="store_true", help="Automatically process modified model cards from git status" - ) group.add_argument("--models", nargs="+", help="Specify model cards to process (without .md extension)") group.add_argument("--all", action="store_true", help="Process all model cards in the docs directory") + group.add_argument("--check-only", action="store_true", help="Check if the dates are already present") - parser.set_defaults(auto=True) args = parser.parse_args() - main(args.all, args.auto, args.models) + main(args.all, args.models, args.check_only)