
diff --git a/utils/add_dates.py b/utils/add_dates.py
index 6719beae4b63..4b9c3e7514ba 100644
--- a/utils/add_dates.py
+++ b/utils/add_dates.py
@@ -2,7 +2,7 @@
import os
import re
import subprocess
-from datetime import date
+from datetime import date, datetime
from huggingface_hub import paper_info
@@ -176,14 +176,82 @@ def replace_paper_links(file_path: str) -> bool:
return False
-def insert_dates(model_card_list: list[str]):
- """Insert release and commit dates into model cards"""
+def _normalize_model_card_name(model_card: str) -> str:
+ """Ensure model card has .md extension"""
+ return model_card if model_card.endswith(".md") else f"{model_card}.md"
+
+
+def _should_skip_model_card(model_card: str) -> bool:
+ """Check if model card should be skipped"""
+ return model_card in ("auto.md", "timm_wrapper.md")
+
+
+def _read_model_card_content(model_card: str) -> str:
+ """Read and return the content of a model card"""
+ file_path = os.path.join(DOCS_PATH, model_card)
+ with open(file_path, "r", encoding="utf-8") as f:
+ return f.read()
+
+
+def _get_dates_pattern_match(content: str):
+ """Search for the dates pattern in content and return match object"""
+ pattern = r"\n\*This model was released on (.*) and added to Hugging Face Transformers on (\d{4}-\d{2}-\d{2})\.\*"
+ return re.search(pattern, content)
+
+
+def _dates_differ_significantly(date1: str, date2: str) -> bool:
+ """Check if two dates differ by more than 1 day"""
+ try:
+ d1 = datetime.strptime(date1, "%Y-%m-%d")
+ d2 = datetime.strptime(date2, "%Y-%m-%d")
+ return abs((d1 - d2).days) > 1
+ except Exception:
+ return True # If dates can't be parsed, consider them different
+
+
+def check_missing_dates(model_card_list: list[str]) -> list[str]:
+ """Check which model cards are missing release dates and return their names"""
+ missing_dates = []
for model_card in model_card_list:
- if not model_card.endswith(".md"):
- model_card = f"{model_card}.md"
+ model_card = _normalize_model_card_name(model_card)
+ if _should_skip_model_card(model_card):
+ continue
- if model_card == "auto.md" or model_card == "timm_wrapper.md":
+ content = _read_model_card_content(model_card)
+ if not _get_dates_pattern_match(content):
+ missing_dates.append(model_card)
+
+ return missing_dates
+
+
+def check_incorrect_dates(model_card_list: list[str]) -> list[str]:
+ """Check which model cards have incorrect HF commit dates and return their names"""
+ incorrect_dates = []
+
+ for model_card in model_card_list:
+ model_card = _normalize_model_card_name(model_card)
+ if _should_skip_model_card(model_card):
+ continue
+
+ content = _read_model_card_content(model_card)
+ match = _get_dates_pattern_match(content)
+
+ if match:
+ existing_hf_date = match.group(2)
+ actual_hf_date = get_first_commit_date(model_name=model_card)
+
+ if _dates_differ_significantly(existing_hf_date, actual_hf_date):
+ incorrect_dates.append(model_card)
+
+ return incorrect_dates
+
+
+def insert_dates(model_card_list: list[str]):
+ """Insert or update release and commit dates in model cards"""
+ for model_card in model_card_list:
+ model_card = _normalize_model_card_name(model_card)
+ if _should_skip_model_card(model_card):
continue
file_path = os.path.join(DOCS_PATH, model_card)
@@ -193,55 +261,46 @@ def insert_dates(model_card_list: list[str]):
if links_replaced:
print(f"Updated paper links in {model_card}")
- pattern = (
- r"\n\*This model was released on (.*) and added to Hugging Face Transformers on (\d{4}-\d{2}-\d{2})\.\*"
- )
+ # Read content and ensure copyright disclaimer exists
+ content = _read_model_card_content(model_card)
+ markers = list(re.finditer(r"-->", content))
- # Check if the copyright disclaimer sections exists, if not, add one with 2025
- with open(file_path, "r", encoding="utf-8") as f:
- content = f.read()
- markers = list(re.finditer(r"-->", content)) # Dates info is placed right below this marker
if len(markers) == 0:
print(f"No marker found in {model_card}. Adding copyright disclaimer to the top.")
-
- # Add copyright disclaimer to the very top of the file
content = COPYRIGHT_DISCLAIMER + "\n\n" + content
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
markers = list(re.finditer(r"-->", content))
+ # Get dates
hf_commit_date = get_first_commit_date(model_name=model_card)
-
paper_link = get_paper_link(model_card=model_card, path=file_path)
- release_date = ""
- if not (paper_link == "No_paper" or paper_link == "blog"):
- release_date = get_release_date(paper_link)
- else:
+
+ if paper_link in ("No_paper", "blog"):
release_date = r"{release_date}"
+ else:
+ release_date = get_release_date(paper_link)
- match = re.search(pattern, content)
+ match = _get_dates_pattern_match(content)
- # If the dates info line already exists, preserve the existing release date unless it's a placeholder, and update the HF commit date if needed
+ # Update or insert the dates line
if match:
- existing_release_date = match.group(1) # The release date part
- existing_hf_date = match.group(2) # The existing HF date part
- release_date = (
- release_date
- if (existing_release_date == r"{release_date}" or existing_release_date == "None")
- else existing_release_date
- )
+ # Preserve existing release date unless it's a placeholder
+ existing_release_date = match.group(1)
+ existing_hf_date = match.group(2)
+
+ if existing_release_date not in (r"{release_date}", "None"):
+ release_date = existing_release_date
+
if existing_hf_date != hf_commit_date or existing_release_date != release_date:
- old_line = match.group(0) # Full matched line
+ old_line = match.group(0)
new_line = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*"
-
content = content.replace(old_line, new_line)
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
-
- # If the dates info line does not exist, add it
else:
+ # Insert new dates line after copyright marker
insert_index = markers[0].end()
-
date_info = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*"
content = content[:insert_index] + date_info + content[insert_index:]
with open(file_path, "w", encoding="utf-8") as f:
@@ -262,19 +321,41 @@ def get_all_model_cards():
return sorted(model_cards)
-def main(all=False, auto=True, models=None):
+def main(all=False, models=None, check_only=False):
+ if check_only:
+ # Check all model cards for missing dates
+ all_model_cards = get_all_model_cards()
+ print(f"Checking all {len(all_model_cards)} model cards for missing dates...")
+ missing_dates = check_missing_dates(all_model_cards)
+
+ # Check modified model cards for incorrect dates
+ modified_cards = get_modified_cards()
+ print(f"Checking {len(modified_cards)} modified model cards for incorrect dates...")
+ incorrect_dates = check_incorrect_dates(modified_cards)
+
+ if missing_dates or incorrect_dates:
+ problematic_cards = missing_dates + incorrect_dates
+ model_names = [card.replace(".md", "") for card in problematic_cards]
+ raise ValueError(
+ f"Missing or incorrect dates in the following model cards: {' '.join(problematic_cards)}\n"
+ f"Run `python utils/add_dates.py --models {' '.join(model_names)}` to fix them."
+ )
+ print("All dates are present and correct!")
+ return
+
+ # Determine which model cards to process
if all:
model_cards = get_all_model_cards()
print(f"Processing all {len(model_cards)} model cards from docs directory")
- elif auto:
+ elif models:
+ model_cards = models
+ print(f"Processing specified model cards: {model_cards}")
+ else:
model_cards = get_modified_cards()
if not model_cards:
print("No modified model cards found.")
return
print(f"Processing modified model cards: {model_cards}")
- else:
- model_cards = models
- print(f"Processing specified model cards: {model_cards}")
insert_dates(model_cards)
@@ -282,13 +363,10 @@ def main(all=False, auto=True, models=None):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Add release and commit dates to model cards")
group = parser.add_mutually_exclusive_group(required=False)
- group.add_argument(
- "--auto", action="store_true", help="Automatically process modified model cards from git status"
- )
group.add_argument("--models", nargs="+", help="Specify model cards to process (without .md extension)")
group.add_argument("--all", action="store_true", help="Process all model cards in the docs directory")
+ group.add_argument("--check-only", action="store_true", help="Check if the dates are already present")
- parser.set_defaults(auto=True)
args = parser.parse_args()
- main(args.all, args.auto, args.models)
+ main(args.all, args.models, args.check_only)