Skip to content

Conversation

@Byczong
Copy link
Contributor

@Byczong Byczong commented Feb 5, 2025

Story details: https://app.shortcut.com/ps-web3/story/26105

todo:

  • implement downloader
  • add s3 env vars to .env.example make bucket public
  • use python -m python -m nameai.download in ci/cd, deployment scripts
  • improve performance tests
  • add load tests
  • make tokenization quality reports
  • limit data (?)

@changeset-bot
Copy link

changeset-bot bot commented Feb 5, 2025

⚠️ No Changeset found

Latest commit: 622caa2

Merging this PR will not cause a version bump for any packages. If these changes should not result in a new version, you're good to go. If these changes should result in a version bump, you need to add a changeset.

This PR includes no changesets

When changesets are added to this PR, you'll see the packages that this PR includes changesets for and the associated semver types

Click here to learn what changesets are, and how to add one.

Click here if you're a maintainer who wants to add a changeset to this PR

@vercel
Copy link

vercel bot commented Feb 5, 2025

The latest updates on your projects. Learn more about Vercel for Git ↗︎

Name Status Preview Comments Updated (UTC)
namegraph.dev ✅ Ready (Inspect) Visit Preview 💬 Add feedback Feb 19, 2025 1:22pm
namehashlabs.org ✅ Ready (Inspect) Visit Preview 💬 Add feedback Feb 19, 2025 1:22pm
namekit.io ✅ Ready (Inspect) Visit Preview 💬 Add feedback Feb 19, 2025 1:22pm
4 Skipped Deployments
Name Status Preview Comments Updated (UTC)
examples.nameguard.io ⬜️ Skipped (Inspect) Feb 19, 2025 1:22pm
nameai.io ⬜️ Skipped (Inspect) Feb 19, 2025 1:22pm
nameguard.io ⬜️ Skipped (Inspect) Feb 19, 2025 1:22pm
storybook.namekit.io ⬜️ Skipped (Inspect) Feb 19, 2025 1:22pm

Comment on lines +249 to +331
def test_person_name_tokenizer_simple_names():
"""Verify tokenization of clear person names."""
with init_person_name_tokenizer([]) as tokenizer:
from nameai.data import get_resource_path
import json

with open(get_resource_path('tests/person_names_quality.json')) as f:
quality_tests = json.load(f)

failures = []
for input_label, expected_tokens in quality_tests['simple_names'].items():
tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
expected_tuple = tuple(expected_tokens)
found = False
for tokens, score in tokenized_labels:
if tokens == expected_tuple:
found = True
assert score > -float('inf'), f'Expected valid score for {input_label}'
break
if not found:
failures.append(f'Failed to find expected tokenization for {input_label}')

if failures:
print('\n=== PersonNameTokenizer Quality Test Failures [simple_names] ===')
for failure in failures:
print(failure)
print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
assert False, 'Some tokenization quality tests failed. See above for details.'


def test_person_name_tokenizer_ambiguous_names():
"""Verify handling of ambiguous inputs that could be names."""
with init_person_name_tokenizer([]) as tokenizer:
from nameai.data import get_resource_path
import json

with open(get_resource_path('tests/person_names_quality.json')) as f:
quality_tests = json.load(f)

failures = []
for input_label, interpretation2expected_tokens in quality_tests['ambiguous_names'].items():
tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
if interpretation2expected_tokens['person_name'] is not None:
person_name_tokens = tuple(interpretation2expected_tokens['person_name'])
found = False
for tokens, score in tokenized_labels:
if tokens == person_name_tokens:
found = True
assert score > -float('inf'), f'Expected valid score for {input_label}'
break
if not found:
failures.append(f'Failed to find person name tokenization for {input_label}')

if failures:
print('\n=== PersonNameTokenizer Quality Test Failures [ambiguous_names] ===')
for failure in failures:
print(failure)
print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
assert False, 'Some tokenization quality tests failed. See above for details.'


def test_person_name_tokenizer_non_names_low_scores():
"""Verify that non-name inputs get low (< 1e-10) probability scores."""
with init_person_name_tokenizer([]) as tokenizer:
from nameai.data import get_resource_path
import json

with open(get_resource_path('tests/person_names_quality.json')) as f:
quality_tests = json.load(f)

failures = []
for input_label in quality_tests['non_names'].keys():
tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
for tokens, log_prob in tokenized_labels:
if log_prob >= math.log(1e-10):
failures.append(f'Expected very low score for non-name {input_label}, got {log_prob}')

if failures:
print('\n=== PersonNameTokenizer Quality Test Failures [non_names] ===')
for failure in failures:
print(failure)
print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
assert False, 'Some tokenization quality tests failed. See above for details.'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these tests simply adding a probability score check compared to those from test_nlp_inspector.py?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In test_tokenizer.py separate tokenizers are tested (AllTokenizer and PersonNamesTokenizer).
In test_nlp_inspector.py the tokenizations come from both tokenizers (merging in done in NLPInspector).

So these tests are for different levels of the tokenization pipeline.

@Goader
Copy link
Collaborator

Goader commented Feb 21, 2025

It all seems good to me. One thing that is bothering me is maintaining 2 separate implementations of the same functionality. I would think of possibly substituting this functionality in NameGraph by using the implementation from here? @djstrong

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants