Skip to content

Commit d0e25fe

Browse files
Merge branch 'main' into batch_spec_dec
2 parents 576b1be + 20890e3 commit d0e25fe

File tree

1,447 files changed

+42857
-69313
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,447 files changed

+42857
-69313
lines changed

.circleci/create_circleci_config.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,14 @@ def job_name(self):
318318
parallelism=6,
319319
)
320320

321+
training_ci_job = CircleCIJob(
322+
"training_ci",
323+
additional_env={"RUN_TRAINING_TESTS": True},
324+
docker_image=[{"image": "huggingface/transformers-torch-light"}],
325+
install_steps=["uv pip install ."],
326+
marker="is_training_test",
327+
parallelism=6,
328+
)
321329

322330
# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
323331
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
@@ -348,7 +356,8 @@ def job_name(self):
348356
PIPELINE_TESTS = [pipelines_torch_job]
349357
REPO_UTIL_TESTS = [repo_utils_job]
350358
DOC_TESTS = [doc_test_job]
351-
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
359+
TRAINING_CI_TESTS = [training_ci_job]
360+
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] + TRAINING_CI_TESTS # fmt: skip
352361

353362

354363
def create_circleci_config(folder=None):
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
name: PR Repo. Consistency Bot
2+
3+
on:
4+
issue_comment:
5+
types:
6+
- created
7+
branches-ignore:
8+
- main
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, '@bot /repo') }}
11+
cancel-in-progress: true
12+
permissions: read-all
13+
14+
15+
jobs:
16+
get-pr-number:
17+
name: Get PR number
18+
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap", "3outeille"]'), github.actor) && startsWith(github.event.comment.body, '@bot /repo') }}
19+
uses: ./.github/workflows/get-pr-number.yml
20+
21+
get-pr-info:
22+
name: Get PR commit SHA
23+
needs: get-pr-number
24+
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
25+
uses: ./.github/workflows/get-pr-info.yml
26+
with:
27+
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
28+
29+
check-timestamps:
30+
name: Check timestamps (security check)
31+
runs-on: ubuntu-22.04
32+
needs: get-pr-info
33+
outputs:
34+
VERIFIED_PR_HEAD_SHA: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}
35+
steps:
36+
- name: Verify `merge_commit` timestamp is older than the issue comment timestamp
37+
env:
38+
COMMENT_DATE: ${{ github.event.comment.created_at }}
39+
PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
40+
run: |
41+
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
42+
echo "COMMENT_DATE: $COMMENT_DATE"
43+
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
44+
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
45+
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
46+
exit -1;
47+
fi
48+
49+
init_comment_with_url:
50+
name: Init Comment on PR
51+
runs-on: ubuntu-22.04
52+
needs: [get-pr-number, check-timestamps]
53+
outputs:
54+
comment_id: ${{ steps.init_comment.outputs.comment_id }}
55+
permissions:
56+
pull-requests: write
57+
steps:
58+
- name: Delete existing bot comment if it exists
59+
env:
60+
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
61+
uses: actions/github-script@v6
62+
with:
63+
script: |
64+
const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10);
65+
66+
// Get all comments on the PR
67+
const { data: comments } = await github.rest.issues.listComments({
68+
owner: context.repo.owner,
69+
repo: context.repo.repo,
70+
issue_number: PR_NUMBER
71+
});
72+
73+
// Find existing bot comments that start with "Repo. Consistency"
74+
const existingComments = comments.filter(comment =>
75+
comment.user.login === 'github-actions[bot]' &&
76+
comment.body.startsWith('Repo. Consistency')
77+
);
78+
79+
if (existingComments.length > 0) {
80+
// Get the most recent comment
81+
const mostRecentComment = existingComments
82+
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
83+
84+
console.log(`Deleting most recent comment #${mostRecentComment.id}`);
85+
await github.rest.issues.deleteComment({
86+
owner: context.repo.owner,
87+
repo: context.repo.repo,
88+
comment_id: mostRecentComment.id
89+
});
90+
}
91+
92+
- name: Comment on PR with workflow run link
93+
id: init_comment
94+
env:
95+
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
96+
uses: actions/github-script@v6
97+
with:
98+
script: |
99+
const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10);
100+
const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
101+
102+
const { data: botComment } = await github.rest.issues.createComment({
103+
owner: context.repo.owner,
104+
repo: context.repo.repo,
105+
issue_number: PR_NUMBER,
106+
body: `Repo. Consistency fix is beginning .... [View the workflow run here](${runUrl}).`
107+
});
108+
core.setOutput('comment_id', botComment.id);
109+
110+
run-repo-consistency-checks:
111+
runs-on: ubuntu-22.04
112+
needs: [get-pr-info, check-timestamps, init_comment_with_url]
113+
outputs:
114+
changes_detected: ${{ steps.run_checks.outputs.changes_detected }}
115+
steps:
116+
# Checkout the trusted base repository (main branch) - this is safe
117+
- name: Checkout base repository
118+
uses: actions/checkout@v4
119+
with:
120+
ref: main
121+
122+
- name: Set up Python
123+
uses: actions/setup-python@v4
124+
with:
125+
python-version: "3.10"
126+
127+
- name: Install dependencies from trusted main branch
128+
run: |
129+
python -m pip install --upgrade pip
130+
pip install -e ".[quality]"
131+
pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
132+
133+
- name: Fetch and checkout PR code manually
134+
env:
135+
PR_HEAD_REPO_FULL_NAME: ${{ needs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
136+
PR_HEAD_SHA: ${{ needs.check-timestamps.outputs.VERIFIED_PR_HEAD_SHA }}
137+
run: |
138+
# Create separate directory for PR code
139+
mkdir -p pr-repo
140+
cd pr-repo
141+
142+
# Initialize git and fetch only the specific commit
143+
git init
144+
git remote add pr-origin https://github.com/${PR_HEAD_REPO_FULL_NAME}.git
145+
git fetch --depth=1 pr-origin ${PR_HEAD_SHA}
146+
git checkout ${PR_HEAD_SHA}
147+
148+
- name: Run checks with trusted script
149+
id: run_checks
150+
run: |
151+
# Copy trusted script to PR directory
152+
cp utils/check_copies.py pr-repo/utils/check_copies.py
153+
154+
# Run the trusted script in PR directory
155+
cd pr-repo
156+
python utils/check_copies.py --fix_and_overwrite
157+
158+
# Check if there are changes
159+
if [ -n "$(git status --porcelain)" ]; then
160+
echo "changes_detected=true" >> $GITHUB_OUTPUT
161+
else
162+
echo "changes_detected=false" >> $GITHUB_OUTPUT
163+
fi
164+
165+
- name: Save modified files
166+
if: steps.run_checks.outputs.changes_detected == 'true'
167+
run: |
168+
cd pr-repo
169+
mkdir -p ../artifact-staging
170+
git diff --name-only > ../artifact-staging/modified-files.txt
171+
# Copy each modified file
172+
while IFS= read -r file; do
173+
mkdir -p "../artifact-staging/pr-repo/$(dirname "$file")"
174+
cp "$file" "../artifact-staging/pr-repo/$file"
175+
done < ../artifact-staging/modified-files.txt
176+
177+
- name: Upload modified files
178+
if: steps.run_checks.outputs.changes_detected == 'true'
179+
uses: actions/upload-artifact@v4
180+
with:
181+
name: modified-files
182+
path: artifact-staging/
183+
184+
commit-and-comment:
185+
runs-on: ubuntu-22.04
186+
needs: [get-pr-number, get-pr-info, check-timestamps, init_comment_with_url, run-repo-consistency-checks]
187+
if: always()
188+
permissions:
189+
pull-requests: write
190+
steps:
191+
- name: Download modified files
192+
if: needs.run-repo-consistency-checks.outputs.changes_detected == 'true'
193+
uses: actions/download-artifact@v4
194+
with:
195+
name: modified-files
196+
197+
- name: Push changes via GitHub API (no checkout)
198+
if: needs.run-repo-consistency-checks.outputs.changes_detected == 'true'
199+
uses: actions/github-script@v6
200+
env:
201+
PR_HEAD_REF: ${{ needs.get-pr-info.outputs.PR_HEAD_REF }}
202+
PR_HEAD_SHA: ${{ needs.check-timestamps.outputs.VERIFIED_PR_HEAD_SHA }}
203+
PR_HEAD_REPO_OWNER: ${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
204+
PR_HEAD_REPO_NAME: ${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
205+
with:
206+
github-token: ${{ secrets.HF_STYLE_BOT_ACTION }}
207+
script: |
208+
const fs = require('fs');
209+
const path = require('path');
210+
211+
const owner = process.env.PR_HEAD_REPO_OWNER;
212+
const repo = process.env.PR_HEAD_REPO_NAME;
213+
const baseSha = process.env.PR_HEAD_SHA;
214+
const branch = process.env.PR_HEAD_REF;
215+
216+
console.log(`Creating commit on ${owner}/${repo} branch ${branch} from ${baseSha}`);
217+
218+
// Read list of modified files
219+
const modifiedFiles = fs.readFileSync('modified-files.txt', 'utf8')
220+
.trim()
221+
.split('\n')
222+
.filter(f => f.length > 0);
223+
224+
console.log(`Modified files: ${modifiedFiles.join(', ')}`);
225+
226+
// Get the base commit to retrieve its tree SHA (metadata only, no checkout)
227+
const { data: baseCommit } = await github.rest.git.getCommit({
228+
owner,
229+
repo,
230+
commit_sha: baseSha
231+
});
232+
233+
console.log(`Base tree SHA: ${baseCommit.tree.sha}`);
234+
235+
// Create blobs for each modified file
236+
const tree = [];
237+
for (const file of modifiedFiles) {
238+
const filePath = path.join('pr-repo', file);
239+
const content = fs.readFileSync(filePath, 'utf8');
240+
241+
console.log(`Creating blob for ${file}`);
242+
const { data: blob } = await github.rest.git.createBlob({
243+
owner,
244+
repo,
245+
content: content,
246+
encoding: 'utf-8'
247+
});
248+
249+
tree.push({
250+
path: file,
251+
mode: '100644',
252+
type: 'blob',
253+
sha: blob.sha
254+
});
255+
}
256+
257+
// Create new tree based on the base tree
258+
console.log(`Creating tree with ${tree.length} modified files`);
259+
const { data: newTree } = await github.rest.git.createTree({
260+
owner,
261+
repo,
262+
base_tree: baseCommit.tree.sha,
263+
tree: tree
264+
});
265+
266+
// Create commit
267+
console.log(`Creating commit`);
268+
const { data: newCommit } = await github.rest.git.createCommit({
269+
owner,
270+
repo,
271+
message: 'Apply repo. consistency fixes',
272+
tree: newTree.sha,
273+
parents: [baseSha]
274+
});
275+
276+
console.log(`Created commit: ${newCommit.sha}`);
277+
278+
// Update branch ref
279+
console.log(`Updating ref heads/${branch} to ${newCommit.sha}`);
280+
await github.rest.git.updateRef({
281+
owner,
282+
repo,
283+
ref: `heads/${branch}`,
284+
sha: newCommit.sha
285+
});
286+
287+
console.log(`Successfully pushed commit to ${branch}`);
288+
289+
- name: Prepare final comment message
290+
id: prepare_final_comment
291+
if: needs.init_comment_with_url.result == 'success'
292+
env:
293+
CHANGES_DETECTED: ${{ needs.run-repo-consistency-checks.outputs.changes_detected }}
294+
run: |
295+
if [ "$CHANGES_DETECTED" = 'true' ]; then
296+
echo "final_comment=Repo. Consistency bot fixed some files and pushed the changes." >> $GITHUB_OUTPUT
297+
else
298+
echo "final_comment=Repo. Consistency fix runs successfully without any file modified." >> $GITHUB_OUTPUT
299+
fi
300+
301+
- name: Comment on PR
302+
if: needs.init_comment_with_url.result == 'success'
303+
uses: actions/github-script@v6
304+
env:
305+
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
306+
with:
307+
script: |
308+
const PR_NUMBER = parseInt(process.env.PR_NUMBER, 10);
309+
await github.rest.issues.updateComment({
310+
owner: context.repo.owner,
311+
repo: context.repo.repo,
312+
comment_id: ${{ needs.init_comment_with_url.outputs.comment_id }},
313+
body: `${{ steps.prepare_final_comment.outputs.final_comment }}`
314+
});

.github/workflows/pr_build_doc_with_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ jobs:
110110
env:
111111
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
112112
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
113-
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.create_run.result) }}
113+
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.build-doc.result) }}
114114
steps:
115115
- name: Get `build-doc` job status
116116
run: |

.github/workflows/self-comment-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ env:
2727
jobs:
2828
get-pr-number:
2929
name: Get PR number
30-
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
30+
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap", "3outeille"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
3131
uses: ./.github/workflows/get-pr-number.yml
3232

3333
get-pr-info:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,4 @@ tags
175175

176176
# Cursor IDE files
177177
.cursor/
178+
test-results/

0 commit comments

Comments
 (0)