|
3 | 3 | import pytest |
4 | 4 | import numpy as np |
5 | 5 | import asyncio |
| 6 | +from time import perf_counter |
6 | 7 | from tokenizers import AddedToken, Encoding, Tokenizer |
7 | 8 | from tokenizers.implementations import BertWordPieceTokenizer |
8 | 9 | from tokenizers.models import BPE, Model, Unigram |
@@ -341,7 +342,7 @@ def test_padding(self): |
341 | 342 |
|
342 | 343 | # Can pad to the longest in a batch |
343 | 344 | output = tokenizer.encode_batch(["my name", "my name is john"]) |
344 | | - assert all([len(encoding) == 4 for encoding in output]) |
| 345 | + assert all(len(encoding) == 4 for encoding in output) |
345 | 346 |
|
346 | 347 | # Can pad to the specified length otherwise |
347 | 348 | tokenizer.enable_padding(length=4) |
@@ -950,21 +951,21 @@ async def encode_async(_): |
950 | 951 | # Measure sync performance with pre-initialized executor |
951 | 952 | # Warm up |
952 | 953 | await asyncio.gather(*[encode_sync_with_executor(i) for i in range(10)]) |
953 | | - time.sleep(0.03) |
| 954 | + asyncio.sleep(0.03) |
954 | 955 | # Actual measurement |
955 | | - start = time.perf_counter() |
| 956 | + start = perf_counter() |
956 | 957 | await asyncio.gather(*[encode_sync_with_executor(i) for i in range(n_tasks)]) |
957 | | - sync_time = time.perf_counter() - start |
| 958 | + sync_time = perf_counter() - start |
958 | 959 |
|
959 | 960 | # Measure async performance |
960 | 961 | # Warm up |
961 | 962 | await asyncio.gather(*[encode_async(i) for i in range(10)]) |
962 | 963 |
|
963 | 964 | # Actual measurement |
964 | | - time.sleep(0.03) |
965 | | - start = time.perf_counter() |
| 965 | + asyncio.sleep(0.03) |
| 966 | + start = perf_counter() |
966 | 967 | await asyncio.gather(*[encode_async(i) for i in range(n_tasks)]) |
967 | | - async_time = time.perf_counter() - start |
| 968 | + async_time = perf_counter() - start |
968 | 969 |
|
969 | 970 | # Log times |
970 | 971 | print(f"sync vs async processing times: {sync_time:.4f}s vs {async_time:.4f}s for {n_tasks} tasks") |
|
0 commit comments