diff --git a/.codespell-ignore.txt b/.codespell-ignore.txt new file mode 100644 index 00000000000..30df1bf78f8 --- /dev/null +++ b/.codespell-ignore.txt @@ -0,0 +1,64 @@ +# Project-specific terms and proper nouns for FORRT +# Add one word per line (case-insensitive by default) + +# Project names and acronyms +FORRT +forrt +OSF +preregistration +CREP +Gilad +Feldman + +# Research/Academic terms +metascience +reprohack +preregistrations +replicability +reproducibility + +# Names and organizations +Kathawalla +Priya +Angelika +Stefan +behavioural +recognised +organised +grey +Hart +Tennant +Strack +Shepard +NWO +nwo + +# Technical terms +hugo +blogdown +netlify +yaml +Github +VSCode +agrc + +# Common acceptable variations +behaviour +honour +colour +favour +centre +practise +organisation +recognise +organise +theses +re-use +re-used +alpha-numeric + +# Legal/formal terms +recuse +recusal + +# Hyphenated words that are acceptable diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000000..3ec34e39375 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,15 @@ +[codespell] +# Skip these files and directories +skip = .git,*.png,*.jpg,*.jpeg,*.gif,*.svg,*.ico,*.woff,*.woff2,*.ttf,*.eot,*.min.js,*.min.css,themes,node_modules,public,resources,static/admin,*.lock,package-lock.json,.hugo_build.lock,go.sum,*.json,*.css,*.scss,*.toml,content/glossary/german,content/glossary/portuguese,content/glossary/arabic,content/glossary/spanish,content/glossary/french,data,*.pdf + +# Ignore these words (project-specific terms and proper nouns) +ignore-words = .codespell-ignore.txt + +# Check file names as well +check-filenames = + +# Check hidden files +check-hidden = + +# Exclude certain patterns in files +ignore-regex = (https?://|www\.|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}|) diff --git a/.github/workflows/spell-check.yaml b/.github/workflows/spell-check.yaml new file mode 100644 index 00000000000..4556f3ae925 --- /dev/null +++ b/.github/workflows/spell-check.yaml @@ -0,0 +1,56 @@ +name: Spell Check + +# ======================= +# Automated Spell Checking +# ======================= +# Purpose: Checks for spelling errors in pull requests using codespell +# Triggers: PR opened, synchronized, or reopened +# Reports: Comments on PR with potential typos and suggestions + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + issues: write + pull-requests: write + +jobs: + spell-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install codespell + run: | + python -m pip install --upgrade pip + pip install codespell + + - name: Run Spell Check Script + id: spell_check + run: | + python scripts/spell_check/check_spelling.py + + - name: Find Comment + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: Spell Check Results + + - name: Create or update comment + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: ${{ steps.spell_check.outputs.comment }} + edit-mode: replace diff --git a/scripts/spell_check/README.md b/scripts/spell_check/README.md new file mode 100644 index 00000000000..c8fe44b5fa1 --- /dev/null +++ b/scripts/spell_check/README.md @@ -0,0 +1,94 @@ +# FORRT Spell Check + +This directory contains the automated spell-checking system for the FORRT repository. + +## Components + +### check_spelling.py +Python script that: +- Runs codespell on the repository +- Parses the results +- Formats them as a GitHub comment for PRs + +### Configuration Files (in repository root) + +#### .codespellrc +Configuration file for codespell that: +- Specifies which files/directories to skip +- Configures checking options +- References the ignore words list + +#### .codespell-ignore.txt +List of words to ignore during spell checking: +- Project-specific terms (FORRT, preregistration, etc.) +- Proper names (authors, organizations) +- Technical terms +- Acceptable spelling variations (e.g., British English "behaviour", "colour" are acceptable alongside American English variants) + +## Usage + +### Running Locally + +To run spell check locally: + +```bash +# Install codespell +pip install codespell + +# Run the spell check +python scripts/spell_check/check_spelling.py +``` + +Or run codespell directly: + +```bash +codespell --config .codespellrc +``` + +### Adding Words to Whitelist + +If codespell flags a word that is correct (e.g., a person's name, technical term, or intentional spelling): + +1. Add the word to `.codespell-ignore.txt` +2. One word per line +3. Add a comment above the word explaining why it's whitelisted (optional but recommended) +4. Commit the change + +Example: +``` +# Author names +Kathawalla +Gilad +``` + +### GitHub Actions Workflow + +The spell check runs automatically on pull requests via the `.github/workflows/spell-check.yaml` workflow. It: + +1. Triggers on PR open, synchronize, or reopen +2. Installs codespell +3. Runs the spell check script +4. Posts/updates a comment on the PR with results + +## False Positives + +If you encounter false positives: + +1. **For legitimate terms**: Add to `.codespell-ignore.txt` +2. **For file types**: Add the extension to the `skip` list in `.codespellrc` +3. **For directories**: Add the directory path to the `skip` list in `.codespellrc` + +## Configuration + +The spell check focuses on: +- Content files (markdown) +- Scripts (Python, shell) +- GitHub workflows +- Documentation files + +It skips: +- Binary files (images, fonts) +- Themes and node_modules +- Non-English translations +- Data files (JSON, PDF) +- Lock files diff --git a/scripts/spell_check/check_spelling.py b/scripts/spell_check/check_spelling.py new file mode 100755 index 00000000000..5bfe8e7967f --- /dev/null +++ b/scripts/spell_check/check_spelling.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Spell-check script for FORRT repository using codespell. +Checks for typos in pull requests and generates a formatted comment. +""" + +import os +import sys +import subprocess +import json +from pathlib import Path + +def run_codespell(): + """Run codespell and capture output.""" + try: + # Run codespell on specific directories to avoid themes and other large dirs + # Focus on content, scripts, and GitHub workflows + paths = ['content', 'scripts', '.github', 'CONTRIBUTING.md', 'README.md'] + + result = subprocess.run( + ['codespell', '--config', '.codespellrc'] + paths, + cwd='/home/runner/work/forrtproject.github.io/forrtproject.github.io', + capture_output=True, + text=True + ) + + return result.stdout, result.returncode + except FileNotFoundError: + print("Error: codespell is not installed.", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error running codespell: {e}", file=sys.stderr) + sys.exit(1) + +def parse_codespell_output(output): + """Parse codespell output into structured format.""" + typos = [] + + if not output.strip(): + return typos + + lines = output.strip().split('\n') + for line in lines: + if ':' in line: + # Format: filename:line: TYPO ==> SUGGESTION + parts = line.split(':', 2) + if len(parts) >= 3: + filepath = parts[0].strip() + line_num = parts[1].strip() + message = parts[2].strip() + + typos.append({ + 'file': filepath, + 'line': line_num, + 'message': message + }) + + return typos + +def format_comment(typos): + """Format typos as a GitHub comment.""" + if not typos: + comment = "## ✅ Spell Check Passed\n\n" + comment += "No spelling issues found in this PR! 🎉" + return comment + + comment = "## 📝 Spell Check Results\n\n" + comment += f"Found {len(typos)} potential spelling issue(s) in this PR:\n\n" + + # Group typos by file + typos_by_file = {} + for typo in typos: + file = typo['file'] + if file not in typos_by_file: + typos_by_file[file] = [] + typos_by_file[file].append(typo) + + # Format output + for file, file_typos in sorted(typos_by_file.items()): + comment += f"### 📄 `{file}`\n\n" + comment += "| Line | Issue |\n" + comment += "|------|-------|\n" + for typo in file_typos: + line = typo['line'] + message = typo['message'].replace('|', '\\|') # Escape pipes for markdown + comment += f"| {line} | {message} |\n" + comment += "\n" + + comment += "---\n\n" + comment += "### â„šī¸ How to address these issues:\n\n" + comment += "1. **Fix the typo**: If it's a genuine typo, please correct it.\n" + comment += "2. **Add to whitelist**: If it's a valid word (e.g., a name, technical term), add it to `.codespell-ignore.txt`\n" + comment += "3. **False positive**: If this is a false positive, please report it in the PR comments.\n\n" + comment += "🤖 This check was performed by [codespell](https://github.com/codespell-project/codespell)" + + return comment + +def main(): + """Main function to run spell check and output comment.""" + print("Running spell check...", file=sys.stderr) + + # Run codespell + output, returncode = run_codespell() + + # Parse output + typos = parse_codespell_output(output) + + # Format comment + comment = format_comment(typos) + + # Output comment for GitHub Actions + # Escape special characters for GitHub Actions output + comment_escaped = comment.replace('%', '%25').replace('\n', '%0A').replace('\r', '%0D') + + # Set output using environment file (GitHub Actions recommended method) + github_output = os.environ.get('GITHUB_OUTPUT') + if github_output: + with open(github_output, 'a') as f: + f.write(f"comment<