diff --git a/.github/workflows/nameai-python-unit-tests.yml b/.github/workflows/nameai-python-unit-tests.yml
index 55ae711e2..c1e24f9cb 100644
--- a/.github/workflows/nameai-python-unit-tests.yml
+++ b/.github/workflows/nameai-python-unit-tests.yml
@@ -37,7 +37,11 @@ jobs:
 
       - name: Install dependencies
         working-directory: ./apps/api.nameai.io
-        run: poetry install --extras "lambda"
+        run: poetry install --extras "lambda" --with dev
+
+      - name: Download required data files
+        working-directory: ./apps/api.nameai.io
+        run: poetry run python -m nameai.download
 
       - name: Run tests
         working-directory: ./apps/api.nameai.io
diff --git a/apps/api.nameai.io/.env.example b/apps/api.nameai.io/.env.example
index 17f52c28c..2545a203f 100644
--- a/apps/api.nameai.io/.env.example
+++ b/apps/api.nameai.io/.env.example
@@ -18,4 +18,4 @@ ALCHEMY_URI_SEPOLIA=https://eth-sepolia.g.alchemy.com/v2/[YOUR_ALCHEMY_API_KEY]
 # - https://discuss.ens.domains/t/ens-subgraph-migration-to-the-decentralised-version/19183
 # - https://thegraph.com/explorer/subgraphs/5XqPmWe6gjyrJtFn9cLy237i4cWw2j9HcUJEXsP5qGtH?view=Query&chain=arbitrum-one
 ENS_SUBGRAPH_URL_MAINNET=https://api.thegraph.com/subgraphs/name/ensdomains/ens
-ENS_SUBGRAPH_URL_SEPOLIA=https://api.studio.thegraph.com/query/49574/enssepolia/version/latest
\ No newline at end of file
+ENS_SUBGRAPH_URL_SEPOLIA=https://api.studio.thegraph.com/query/49574/enssepolia/version/latest
diff --git a/apps/api.nameai.io/Dockerfile b/apps/api.nameai.io/Dockerfile
index 87affa174..2fef20918 100644
--- a/apps/api.nameai.io/Dockerfile
+++ b/apps/api.nameai.io/Dockerfile
@@ -4,4 +4,5 @@ RUN yum install gcc -y
 COPY pyproject.toml poetry.lock LICENSE README.md ./
 COPY nameai ./nameai/
 RUN pip install --no-cache-dir .[lambda]
+RUN python -m nameai.download
 CMD [ "nameai.root_api.handler" ]
\ No newline at end of file
diff --git a/apps/api.nameai.io/nameai/all_tokenizer.py b/apps/api.nameai.io/nameai/all_tokenizer.py
index 1d801d3e2..f17f3f900 100644
--- a/apps/api.nameai.io/nameai/all_tokenizer.py
+++ b/apps/api.nameai.io/nameai/all_tokenizer.py
@@ -69,19 +69,25 @@ def dfs(self, index, result, gap_before=False):
 
 
 class AllTokenizer:
-    """Return all tokenizations. It is a generator."""
+    """
+    General-purpose tokenizer that finds all possible word combinations in text.
+
+    Uses an Aho-Corasick automaton with multiple dictionaries to identify
+    valid words. Can produce tokenizations with gaps.
+    Yields tokenizations as tuples of tokens.
+    """
 
     def __init__(self, config):
         self.config = config
-        self.skip_non_words = config.tokenization.skip_non_words
-        self.with_gaps = config.tokenization.with_gaps
+        self.skip_non_words = config.tokenization.all_tokenizer.skip_non_words
+        self.with_gaps = config.tokenization.all_tokenizer.with_gaps
 
     @static_property
     def automaton(self):
         automaton = ahocorasick.Automaton()
 
         should_be_tokenized = set()
-        with open(get_resource_path(self.config.tokenization.should_be_tokenized), encoding='utf-8') as f:
+        with open(get_resource_path(self.config.tokenization.all_tokenizer.should_be_tokenized), encoding='utf-8') as f:
             for line in f:
                 word = line.strip().lower()
                 should_be_tokenized.add(word)
diff --git a/apps/api.nameai.io/nameai/config/prod_config.yaml b/apps/api.nameai.io/nameai/config/prod_config.yaml
index 69a5f8fc9..aa10871ff 100644
--- a/apps/api.nameai.io/nameai/config/prod_config.yaml
+++ b/apps/api.nameai.io/nameai/config/prod_config.yaml
@@ -4,12 +4,26 @@ tokenization:
   dictionary: words.txt
   custom_dictionary: custom_dictionary.txt
   domain_specific_dictionary: domain_specific_dictionary.txt
-  should_be_tokenized: should_be_tokenized.txt
-  skip_non_words: false
-  with_gaps: true
+  all_tokenizer:
+    should_be_tokenized: should_be_tokenized.txt
+    skip_non_words: false
+    with_gaps: true
+  person_names:
+    first_names: pn_firstnames.json
+    last_names: pn_lastnames.json
+    other: pn_other.json
+    country_stats: pn_country_stats.json
+    country_bonus: 100
 ngrams:
   unigrams: unigram_freq.csv
   bigrams: bigram_freq.csv
   custom_dictionary: custom_dictionary.txt
   domain_specific_dictionary: domain_specific_dictionary.txt
   custom_token_frequency: 500000
+s3_resources:
+  bucket: prod-name-generator-namegeneratori-inputss3bucket-c26jqo3twfxy
+  person_names:
+    first_names_key: person_names_firstnames.json
+    last_names_key: person_names_lastnames.json
+    other_key: person_names_other.json
+    country_stats_key: person_names_country_stats.json
diff --git a/apps/api.nameai.io/nameai/data/tests/person_names_quality.json b/apps/api.nameai.io/nameai/data/tests/person_names_quality.json
new file mode 100644
index 000000000..d0479ec9e
--- /dev/null
+++ b/apps/api.nameai.io/nameai/data/tests/person_names_quality.json
@@ -0,0 +1,55 @@
+{
+    "simple_names": {
+        "piotrwiśniewski": ["piotr", "wiśniewski"],
+        "camilleclaudel": ["camille", "claudel"],
+        "johnnydepp": ["johnny", "depp"],
+        "giancarloesposito": ["giancarlo", "esposito"],
+        "maríagarcía": ["maría", "garcía"],
+        "viktororbán": ["viktor", "orbán"],
+        "sørenkierkegaard": ["søren", "kierkegaard"],
+        "oceanneguérin": ["oceanne", "guérin"]
+    },
+    "ambiguous_names": {
+        "dragonfernandez": {
+            "person_name": ["dragon", "fernandez"],
+            "words": ["dragon", "fern", "a", "ndez"]
+        },
+        "wolfsmith": {
+            "person_name": ["wolf", "smith"],
+            "words": ["wolf", "smith"]
+        },
+        "blacksmith": {
+            "person_name": null,
+            "words": ["black", "smith"]
+        },
+        "charleswood": {
+            "person_name": ["charles", "wood"],
+            "words": ["char", "les", "wood"]
+        },
+        "maylin": {
+            "person_name": ["may", "lin"],
+            "words": ["may", "lin"]
+        },
+        "natalieportman": {
+            "person_name": ["natalie", "portman"],
+            "words": ["nat", "alie", "port", "man"]
+        },
+        "sunyoung": {
+            "person_name": ["sunyoung"],
+            "words": ["suny", "oung"]
+        },
+        "annalísa": {
+            "person_name": ["anna", "lísa"],
+            "words": ["ann", "alísa"]
+        }
+    },
+    "non_names": {
+        "dragonfernouch": ["dragon", "fern", "ouch"],
+        "webmaster": ["webmaster"],
+        "quantumleap": ["quantum", "leap"],
+        "neuralnet": ["neural", "net"],
+        "deepmatrix": ["deep", "matrix"],
+        "cloudsync": ["cloud", "sync"],
+        "byteflow": ["byte", "flow"]
+    }
+}
\ No newline at end of file
diff --git a/apps/api.nameai.io/nameai/download.py b/apps/api.nameai.io/nameai/download.py
new file mode 100644
index 000000000..5f7a01f6d
--- /dev/null
+++ b/apps/api.nameai.io/nameai/download.py
@@ -0,0 +1,53 @@
+import boto3
+import botocore
+from omegaconf import DictConfig
+import hydra
+import os
+
+from nameai.data import get_resource_path
+
+
+class S3Downloader:
+    def __init__(self, bucket: str):
+        self.s3_client = None
+        self.bucket = bucket
+        self.region_name = 'us-east-1'
+
+    def get_client(self):
+        if self.s3_client is None:
+            self.s3_client = boto3.client(
+                's3', region_name=self.region_name, config=botocore.config.Config(signature_version=botocore.UNSIGNED)
+            )
+        return self.s3_client
+
+    def download_file(self, key: str, local_path: str, overwrite: bool = False):
+        if os.path.exists(local_path) and not overwrite:
+            return
+        self.get_client().download_file(self.bucket, key, local_path)
+
+
+def download_files(config: DictConfig):
+    """Download files using provided config"""
+    downloader = S3Downloader(config.s3_resources.bucket)
+    files_to_download = [
+        (config.s3_resources.person_names.first_names_key, config.tokenization.person_names.first_names),
+        (config.s3_resources.person_names.last_names_key, config.tokenization.person_names.last_names),
+        (config.s3_resources.person_names.other_key, config.tokenization.person_names.other),
+        (config.s3_resources.person_names.country_stats_key, config.tokenization.person_names.country_stats),
+    ]
+
+    for s3_key, local_path in files_to_download:
+        downloader.download_file(
+            key=s3_key,
+            local_path=get_resource_path(local_path),
+            overwrite=True,
+        )
+
+
+@hydra.main(config_path='./config', config_name='prod_config', version_base=None)
+def download_files_main(config: DictConfig):
+    download_files(config)
+
+
+if __name__ == '__main__':
+    download_files_main()
diff --git a/apps/api.nameai.io/nameai/ngrams.py b/apps/api.nameai.io/nameai/ngrams.py
index 42124dfbb..c1503e2b3 100644
--- a/apps/api.nameai.io/nameai/ngrams.py
+++ b/apps/api.nameai.io/nameai/ngrams.py
@@ -82,13 +82,13 @@ def all_unigrams_count(self) -> int:
     def all_bigrams_count(self) -> int:
         return self._bigrams_and_count[1]
 
-    def unigram_count(self, word: str) -> int:
+    def unigram_count(self, word: str) -> int | float:
         return self.unigrams.get(word, self.oov_count(word))
 
     def bigram_count(self, word: str) -> Optional[int]:
         return self.bigrams.get(word, None)
 
-    def oov_count(self, word: str) -> int:
+    def oov_count(self, word: str) -> float:
         return (1 / 100) ** (len(word))
 
     def word_probability(self, word: str) -> float:
diff --git a/apps/api.nameai.io/nameai/nlp_inspector.py b/apps/api.nameai.io/nameai/nlp_inspector.py
index 7167c9dad..56094beec 100644
--- a/apps/api.nameai.io/nameai/nlp_inspector.py
+++ b/apps/api.nameai.io/nameai/nlp_inspector.py
@@ -10,6 +10,7 @@
 )
 from nameai.all_tokenizer import AllTokenizer
 from nameai.ngrams import Ngrams
+from nameai.person_names import PersonNameTokenizer
 
 
 def init_inspector():
@@ -49,6 +50,7 @@ class NLPInspector:
     def __init__(self, config):
         self.inspector = init_inspector()
         self.tokenizer = AllTokenizer(config)
+        self.person_names_tokenizer = PersonNameTokenizer(config)
         self.ngrams = Ngrams(config)
 
     def nlp_analyse_label(self, label: str) -> NLPLabelAnalysis:
@@ -92,28 +94,52 @@ def base_analyse_label(self, label: str):
         return self.inspector.analyse_label(label, simple_confusables=True)
 
     def tokenize(self, label: str, tokenizations_limit: int) -> tuple[list[dict], bool]:
-        tokenizeds_iterator = self.tokenizer.tokenize(label)
+        """
+        Tokenize text using both person name and general-purpose tokenizers.
+
+        Combines results from PersonNameTokenizer (with name-specific probabilities)
+        and AllTokenizer (with ngram-based probabilities).
+        Returns tokenizations sorted by probability.
+        """
+        all_tokenizer_iterator = self.tokenizer.tokenize(label)
+        person_names_iterator = self.person_names_tokenizer.tokenize_with_scores(label)
+
         tokenizeds = []
         partial_tokenization = False
         try:
             used = set()
             i = 0
-            for tokenized in tokenizeds_iterator:
+
+            # first add person name tokenizations with their original scores
+            for tokenized, log_prob in person_names_iterator:
+                if tokenized not in used:
+                    if i == tokenizations_limit:
+                        partial_tokenization = True
+                        break
+                    used.add(tokenized)
+                    i += 1
+                    tokenizeds.append({'tokens': tokenized, 'log_probability': log_prob, 'source': 'person_names'})
+
+            # then add regular tokenizations
+            for tokenized in all_tokenizer_iterator:
                 if tokenized not in used:
                     if i == tokenizations_limit:
                         partial_tokenization = True
                         break
                     used.add(tokenized)
                     i += 1
-                    tokenizeds.append(tokenized)
+                    # for AllTokenizer tokenizations, use ngrams probability
+                    tokenizeds.append(
+                        {
+                            'tokens': tokenized,
+                            'log_probability': self.ngrams.sequence_log_probability(tokenized),
+                            'source': 'ngrams',
+                        }
+                    )
+
         except RecursionError:
             partial_tokenization = True
 
-        tokenizeds = [
-            {'tokens': tokenized, 'log_probability': self.ngrams.sequence_log_probability(tokenized)}
-            for tokenized in tokenizeds
-        ]
-
         for tokenized in tokenizeds:
             tokenized['tokens'] = tuple(uniq_gaps(tokenized['tokens']))
             tokenized['probability'] = math.exp(tokenized['log_probability'])
diff --git a/apps/api.nameai.io/nameai/person_names.py b/apps/api.nameai.io/nameai/person_names.py
new file mode 100644
index 000000000..e51548811
--- /dev/null
+++ b/apps/api.nameai.io/nameai/person_names.py
@@ -0,0 +1,272 @@
+import collections
+import copy
+import json
+import math
+from typing import Iterator, Optional
+from omegaconf import DictConfig
+
+from nameai.data import get_resource_path
+
+
+class PersonNames:
+    """
+    Analyzes and scores potential person name interpretations in text.
+
+    Uses statistical data about first names, last names, and their frequency per country
+    to evaluate different possible interpretations of a text string as a person's name.
+    Scoring is weighted by country-specific internet user statistics to reflect
+    real-world name likelihood.
+    """
+
+    def __init__(self, config: DictConfig):
+        pn_config = config.tokenization.person_names
+        self.firstnames = json.load(open(get_resource_path(pn_config.first_names)))
+        self.lastnames = json.load(open(get_resource_path(pn_config.last_names)))
+        other = json.load(open(get_resource_path(pn_config.other)))
+        self.countries: dict[str, int] = other['all']
+        self.firstname_initials: dict[str, dict[str, int]] = other['firstname_initials']
+        self.lastname_initials: dict[str, dict[str, int]] = other['lastname_initials']
+        self.country_stats = json.load(open(get_resource_path(pn_config.country_stats)))
+        self.all_internet_users: int = sum(x[0] for x in self.country_stats.values())
+        self.all_population: int = sum(x[1] for x in self.country_stats.values())
+        self.country_bonus = pn_config.country_bonus
+        self.allow_cross_country = False
+
+    def print_missing_countries(self):
+        for country, stats in sorted(self.country_stats.items(), key=lambda x: x[1][0], reverse=True):
+            if country not in self.countries:
+                print('X', country, stats)
+            else:
+                print(country, stats)
+
+    def get_population(self, country: str) -> Optional[int]:
+        try:
+            return self.country_stats[country][1]
+        except Exception:
+            return None
+
+    def get_internet_users(self, country: str) -> Optional[int]:
+        try:
+            return self.country_stats[country][0]
+        except Exception:
+            return None
+
+    def get_internet_users_weight(self, country: str) -> Optional[float]:
+        try:
+            return self.country_stats[country][0] / self.all_internet_users
+        except Exception:
+            return None
+
+    def single_name(self, name: str, name_stats: dict[str, dict[str, int]]) -> dict:
+        name_prob = {
+            country: sum(gender_counts.values()) / self.countries[country] * self.get_internet_users_weight(country)
+            for country, gender_counts in name_stats.items()
+        }
+
+        genders = {}
+        for country, gender_counts in name_stats.items():
+            m = gender_counts.get('M', 1)
+            f = gender_counts.get('F', 1)
+            genders[country] = {'M': m / (m + f), 'F': f / (m + f)}
+
+        interpretation = {}
+        interpretation['names'] = [name_stats]
+        interpretation['prob'] = name_prob
+        interpretation['tokenization'] = (name,)
+        interpretation['genders'] = genders
+        return interpretation
+
+    def name_with_initial(
+        self,
+        name: str,
+        initial: str,
+        name_stats: dict[str, dict[str, int]],
+        initial_firstname: bool,
+        initial_first: bool,
+    ) -> dict:
+        name_prob = {
+            country: sum(gender_counts.values())
+            / self.countries[country]
+            * (
+                self.firstname_initials[country].get(initial, 1)
+                if initial_firstname
+                else self.lastname_initials[country].get(initial, 1)
+            )
+            / self.countries[country]
+            * self.get_internet_users_weight(country)
+            for country, gender_counts in name_stats.items()
+        }
+
+        genders = {}
+        for country, gender_counts in name_stats.items():
+            m = gender_counts.get('M', 1)
+            f = gender_counts.get('F', 1)
+            genders[country] = {'M': m / (m + f), 'F': f / (m + f)}
+
+        interpretation = {}
+        if initial_first:
+            interpretation['tokenization'] = (initial, name)
+        else:
+            interpretation['tokenization'] = (name, initial)
+
+        interpretation['names'] = [name_stats]
+        interpretation['prob'] = name_prob
+        interpretation['genders'] = genders
+        return interpretation
+
+    def two_names(
+        self, name1: str, name2: str, name1_stats: dict[str, dict[str, int]], name2_stats: dict[str, dict[str, int]]
+    ) -> dict:
+        name1_prob = {
+            country: sum(gender_counts.values()) / self.countries[country]
+            for country, gender_counts in name1_stats.items()
+        }
+        name2_prob = {
+            country: sum(gender_counts.values()) / self.countries[country]
+            for country, gender_counts in name2_stats.items()
+        }
+        interpretation = {}
+        interpretation['names'] = [name1_stats, name2_stats]
+        interpretation['tokenization'] = (name1, name2)
+
+        probs = collections.defaultdict(list)
+        probs2 = {}
+        genders = {}
+        for name_prob in [name1_prob, name2_prob]:
+            for country, prob in name_prob.items():
+                probs[country].append(prob)
+        for country, probs in probs.items():
+            if len(probs) == 1:
+                if not self.allow_cross_country:
+                    continue
+                probs.append(1 / self.countries[country])
+            probs2[country] = math.prod(probs)
+            probs2[country] *= self.get_internet_users_weight(country)
+
+            m = name1_stats.get(country, {}).get('M', 1) * name2_stats.get(country, {}).get('M', 1)
+            f = name1_stats.get(country, {}).get('F', 1) * name2_stats.get(country, {}).get('F', 1)
+            genders[country] = {'M': m / (m + f), 'F': f / (m + f)}
+        interpretation['prob'] = probs2
+        interpretation['genders'] = genders
+
+        return interpretation
+
+    def anal(self, input_name: str) -> list[dict]:
+        interpretations = []
+        # only one name
+        name_stats = copy.copy(self.firstnames.get(input_name, None))
+        if name_stats:
+            interpretation = self.single_name(input_name, name_stats)
+            interpretation['type'] = 'first'
+            interpretations.append(interpretation)
+
+        name_stats = copy.copy(self.lastnames.get(input_name, None))
+        if name_stats:
+            interpretation = self.single_name(input_name, name_stats)
+            interpretation['type'] = 'last'
+            interpretations.append(interpretation)
+
+        # one name with initial
+        for name, initial, initial_first in [
+            (input_name[1:], input_name[:1], True),
+            (input_name[:-1], input_name[-1:], False),
+        ]:
+            if not initial or not name:
+                continue
+            name_stats = copy.copy(self.firstnames.get(name, None))
+            if name_stats:
+                interpretation = self.name_with_initial(
+                    name, initial, name_stats, initial_firstname=False, initial_first=initial_first
+                )
+                interpretation['type'] = 'first with initial'
+                interpretations.append(interpretation)
+
+            name_stats = copy.copy(self.lastnames.get(name, None))
+            if name_stats:
+                interpretation = self.name_with_initial(
+                    name, initial, name_stats, initial_firstname=True, initial_first=initial_first
+                )
+                interpretation['type'] = 'last with initial'
+                interpretations.append(interpretation)
+
+        # two names
+        for i in range(1, len(input_name)):
+            name1 = input_name[:i]
+            name2 = input_name[i:]
+            name1_result = copy.copy(self.firstnames.get(name1, None))
+            name2_result = copy.copy(self.lastnames.get(name2, None))
+            if name1_result and name2_result:
+                interpretation = self.two_names(name1, name2, name1_result, name2_result)
+                interpretation['type'] = 'first last'
+                interpretations.append(interpretation)
+
+            name1_result = copy.copy(self.lastnames.get(name1, None))
+            name2_result = copy.copy(self.firstnames.get(name2, None))
+            if name1_result and name2_result:
+                interpretation = self.two_names(name1, name2, name1_result, name2_result)
+                interpretation['type'] = 'last first'
+                interpretations.append(interpretation)
+
+        return interpretations
+
+    def tokenize(
+        self, input_name: str, user_country: str = None, topn: int = 1
+    ) -> list[tuple[float, str, tuple[str, ...], list[str], dict[str, float]]]:
+        """Return best country interpretation."""
+        all_interpretations = self.score(input_name, user_country)
+        return all_interpretations[:topn]
+
+    def score(
+        self, input_name: str, user_country: str | None = None
+    ) -> list[tuple[float, str, tuple[str, ...], list[str], dict[str, float]]]:
+        """Return best interpretation."""
+        interpretations = self.anal(input_name)
+
+        all_interpretations = []
+        for r in interpretations:
+            if user_country in r['prob']:
+                r['prob'][user_country] = r['prob'][user_country] * self.country_bonus
+
+            for country, prob in r['prob'].items():
+                all_interpretations.append(
+                    (prob, country, r['tokenization'], r['type'], r['genders'].get(country, None))
+                )
+
+        return sorted(all_interpretations, reverse=True)
+
+
+class PersonNameTokenizer:
+    """
+    Specialized tokenizer for identifying person names in text.
+
+    Uses statistical name data and filtering to identify valid name tokens.
+    Yields tokenizations as tuples of tokens paired with their log probability.
+    """
+
+    def __init__(self, config: DictConfig):
+        super().__init__()
+        self.pn = PersonNames(config)
+        self.should_be_tokenized = set()
+        with open(get_resource_path(config.tokenization.all_tokenizer.should_be_tokenized), encoding='utf-8') as f:
+            for line in f:
+                word = line.strip().lower()
+                self.should_be_tokenized.add(word)
+
+    def _get_scores(self, label: str) -> list[tuple[float, str, tuple[str, ...], str, dict[str, float]]]:
+        """Get or compute scores for a label"""
+        return self.pn.score(label)
+
+    def tokenize_with_scores(self, label: str) -> Iterator[tuple[tuple[str, ...], float]]:
+        """
+        Tokenize a label into possible person name interpretations with their scores
+        returns an iterator of (tokenization, log_probability) pairs
+        """
+        seen = set()
+        for prob, country, tokenization, type_, genders in self._get_scores(label):
+            if (  # skip if any token is in should_be_tokenized list or is a single letter
+                tokenization not in seen
+                and all(len(t) > 1 for t in tokenization)
+                and not any(t.lower() in self.should_be_tokenized for t in tokenization)
+            ):
+                seen.add(tokenization)
+                yield tokenization, math.log(prob) if prob > 0 else -float('inf')
diff --git a/apps/api.nameai.io/poetry.lock b/apps/api.nameai.io/poetry.lock
index 1baf8d433..d4ca322e9 100644
--- a/apps/api.nameai.io/poetry.lock
+++ b/apps/api.nameai.io/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -327,6 +327,44 @@ files = [
     {file = "bitarray-2.9.2.tar.gz", hash = "sha256:a8f286a51a32323715d77755ed959f94bef13972e9a2fe71b609e40e6d27957e"},
 ]
 
+[[package]]
+name = "boto3"
+version = "1.36.14"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "boto3-1.36.14-py3-none-any.whl", hash = "sha256:e2dab15944c3f517c88850d60b07f2f6fd3bc69aa51c47670e4f45d62a8c41fd"},
+    {file = "boto3-1.36.14.tar.gz", hash = "sha256:4b0b8dd593b95f32a5a761dee65094423fbd06a4ad09f26b2e6c80493139569f"},
+]
+
+[package.dependencies]
+botocore = ">=1.36.14,<1.37.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.11.0,<0.12.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.36.14"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "botocore-1.36.14-py3-none-any.whl", hash = "sha256:546d0c071e9c8aeaca399d71bec414abe6434460f7d6640cbd92d4b1c3eb443e"},
+    {file = "botocore-1.36.14.tar.gz", hash = "sha256:53feff270078c23ba852fb2638fde6c5f74084cfc019dd5433e865cd04065c60"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.23.8)"]
+
 [[package]]
 name = "certifi"
 version = "2024.8.30"
@@ -1324,6 +1362,17 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.23.0"
@@ -2087,6 +2136,20 @@ files = [
 [package.dependencies]
 pytest = ">=3.1"
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
 [[package]]
 name = "python-dotenv"
 version = "1.0.1"
@@ -2495,6 +2558,23 @@ files = [
     {file = "ruff-0.6.7.tar.gz", hash = "sha256:44e52129d82266fa59b587e2cd74def5637b730a69c4542525dfdecfaae38bd5"},
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.11.2"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3transfer-0.11.2-py3-none-any.whl", hash = "sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc"},
+    {file = "s3transfer-0.11.2.tar.gz", hash = "sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f"},
+]
+
+[package.dependencies]
+botocore = ">=1.36.0,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]
+
 [[package]]
 name = "setuptools"
 version = "75.1.0"
@@ -2515,6 +2595,17 @@ enabler = ["pytest-enabler (>=2.2)"]
 test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
 type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"]
 
+[[package]]
+name = "six"
+version = "1.17.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+    {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+    {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -3010,4 +3101,4 @@ lambda = ["mangum"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "453b73989a1dc02fa3b1a79b727b2f3c0e2a7049dc2435391d0ed6000cc7717c"
+content-hash = "0ae1b34f136e9fad1241d06fa24f68aa3238121a865ca39427ffeed18d037e59"
diff --git a/apps/api.nameai.io/pyproject.toml b/apps/api.nameai.io/pyproject.toml
index 059df002c..73dc3da1e 100644
--- a/apps/api.nameai.io/pyproject.toml
+++ b/apps/api.nameai.io/pyproject.toml
@@ -21,6 +21,7 @@ httpx = "^0.25.0"
 python-dotenv = "^1.0.0"
 pyahocorasick = "^2.0.0"
 setuptools = "^75.1.0"
+boto3 = "^1.36.14"
 
 
 [tool.poetry.extras]
diff --git a/apps/api.nameai.io/start-local.sh b/apps/api.nameai.io/start-local.sh
index d93497840..61f70255c 100644
--- a/apps/api.nameai.io/start-local.sh
+++ b/apps/api.nameai.io/start-local.sh
@@ -3,4 +3,5 @@ pip install --upgrade pip
 pip install poetry
 pip install uvicorn
 pip install .[lambda]
+python -m nameai.download
 uvicorn nameai.root_api:app
\ No newline at end of file
diff --git a/apps/api.nameai.io/tests/load_tests/README.md b/apps/api.nameai.io/tests/load_tests/README.md
new file mode 100644
index 000000000..5901884f0
--- /dev/null
+++ b/apps/api.nameai.io/tests/load_tests/README.md
@@ -0,0 +1,57 @@
+# Load Tests for NameAI API
+
+This directory contains load testing scripts for the NameAI API using [Locust](https://locust.io/).
+
+## Start NameAI API
+
+In one terminal, start the NameAI API:
+
+```bash
+poetry run uvicorn nameai.nameai_api:app
+```
+
+## Install locust
+
+In another terminal, activate the poetry environment and install locust:
+
+```bash
+poetry run pip install locust
+```
+
+## Run tests
+
+Navigate to the `load_tests` directory and use one of the following options:
+
+### Tests in Web UI
+
+Start the load test with:
+```bash
+poetry run locust -f performance.py
+```
+Then open http://localhost:8089 in your browser to:
+- Configure number of users
+- Set spawn rate
+- Start/stop tests
+- View real-time metrics and charts
+
+### Headless tests
+
+You can run headless tests with these parameters:
+```bash
+poetry run locust -f performance.py --headless -u 100 -r 10 --run-time 1m -H "http://localhost:8000" --only-summary
+```
+
+This will:
+- Run with 100 users
+- Spawn 10 users per second
+- Run for 1 minute
+- Generate HTML reports
+
+
+### Test latency for different number of users
+
+```bash
+poetry run bash run_load_tests.sh
+```
+
+This will run the test with different number of users and save the results in `latency_results.csv`.
diff --git a/apps/api.nameai.io/tests/load_tests/latency_results.csv b/apps/api.nameai.io/tests/load_tests/latency_results.csv
new file mode 100644
index 000000000..5c09f8dd5
--- /dev/null
+++ b/apps/api.nameai.io/tests/load_tests/latency_results.csv
@@ -0,0 +1,6 @@
+users,requests,failures,mean_latency,median_latency,p95_latency
+16,939,0,11.60211740468521,7,13
+32,1919,0,12.957146057850554,6,16
+64,3778,0,25.72315333006236,7,64
+128,7360,0,59.24790962773564,18,150
+256,10123,0,493.8000638233782,440.0,880
diff --git a/apps/api.nameai.io/tests/load_tests/performance.py b/apps/api.nameai.io/tests/load_tests/performance.py
new file mode 100644
index 000000000..9026e28a6
--- /dev/null
+++ b/apps/api.nameai.io/tests/load_tests/performance.py
@@ -0,0 +1,71 @@
+import random
+
+from locust import HttpUser, task, between
+
+
+input_labels = [
+    'giancarloesposito',
+    'piotrwiśniewski',
+    'dragonfernandez',
+    'wolfsmith',
+    'mrscopcake',
+    'likemrscopcake',
+    'cryptocurrency',
+    'blockchain',
+    'yerbamate',
+    'javascript',
+    'superduper',
+    'ucberkeley',
+    'moshpit',
+    'coffeebean',
+    'laptoplaptop',
+    'americanairlines',
+    'usarmy',
+    'greenriver',
+    'counterstrike',
+    'rocknroll',
+    'sanfrancisco',
+    'ilikeyourcat',
+    'catlikeiyour',
+    'xchange',
+    'bball',
+    'nft',
+    'sdfbgfdbgjkdfjgdfhjfgdjfgdsjh',
+    '[003fda97309fd6aa9d7753dcffa37da8bb964d0fb99eba99d0770e76fc5bac91]',
+    'lapśtop',
+    'łcatł',
+    'laptop',
+    'toplap',
+    'repeatable',
+    'bothering',
+    'rakuten',
+    'livecam',
+    'miinibaashkiminasiganibiitoosijiganibadagwiingweshiganibakwezhigan',
+    'yorknewŁyork123',
+    'counterstrike',
+    'avadakedavra',
+    'lumosreparo',
+    'americanairlines',
+    'greenriver',
+    'uc',
+    'us',
+    'nft',
+]
+
+
+class NameAIUser(HttpUser):
+    wait_time = between(0.2, 1.6)
+
+    @task(1)
+    def inspect_label_get(self):
+        self.client.get(f'/inspect-label/{random.choice(input_labels)}')
+
+    @task(1)
+    def inspect_label_post(self):
+        self.client.post('/inspect-label', json={'label': random.choice(input_labels)})
+
+    @task(1)
+    def inspect_name(self):
+        self.client.post(
+            '/inspect-name', json={'name': f'{random.choice(input_labels)}.eth', 'network_name': 'mainnet'}
+        )
diff --git a/apps/api.nameai.io/tests/load_tests/run_load_tests.sh b/apps/api.nameai.io/tests/load_tests/run_load_tests.sh
new file mode 100755
index 000000000..c392d88ad
--- /dev/null
+++ b/apps/api.nameai.io/tests/load_tests/run_load_tests.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+user_counts=(16 32 64 128 256)
+output_file="latency_results.csv"
+echo "users,requests,failures,mean_latency,median_latency,p95_latency" > $output_file
+
+for n_users in "${user_counts[@]}"
+do
+    echo "Running test with $n_users users..."
+    
+    # run locust with specified number of users (--spawn-rate is set to n_users/10 for gradual ramp-up)
+    locust -f performance.py \
+           --headless \
+           --users $n_users \
+           --spawn-rate $(($n_users/10)) \
+           --run-time 1m \
+           --host "http://localhost:8000" \
+           --only-summary \
+           --csv="stats_$n_users"
+
+    # extract metrics from the csv file ("Aggregated" row)
+    stats=$(tail -n 1 "stats_${n_users}_stats.csv")
+
+    # extract relevant columns
+    echo "$stats" | awk -F',' '{print "'$n_users'," $3 "," $4 "," $6 "," $5 "," $16}' >> $output_file
+    
+    # clean up all temporary files
+    rm -f "stats_${n_users}_stats.csv" \
+          "stats_${n_users}_stats_history.csv" \
+          "stats_${n_users}_failures.csv" \
+          "stats_${n_users}_exceptions.csv"
+    
+    # wait between tests to let system stabilize
+    sleep 5
+done
+
+echo "Testing complete. Results saved to $output_file"
diff --git a/apps/api.nameai.io/tests/test_nameai.py b/apps/api.nameai.io/tests/test_nameai.py
index d195477c5..8492cfefe 100644
--- a/apps/api.nameai.io/tests/test_nameai.py
+++ b/apps/api.nameai.io/tests/test_nameai.py
@@ -18,11 +18,11 @@ def nameai():
 def test_normalized(nameai: 'NameAI'):
     result = nameai.inspect_label('nick')
     assert abs(result.nameai.purity_score - 0.9976234705882353) < 0.0001, result.nameai.purity_score
-    assert abs(result.nameai.sort_score - 0.9354685918689098) < 0.0001, result.nameai.sort_score
+    assert abs(result.nameai.sort_score - 0.93694557738369) < 0.0001, result.nameai.sort_score
     assert result.nameai.analysis.status == 'normalized'
-    assert abs(result.nameai.analysis.probability - 0.0000317942695746393) < 0.0001, result.nameai.analysis.probability
+    assert abs(result.nameai.analysis.probability - 0.00019752378433969656) < 0.0001, result.nameai.analysis.probability
     assert (
-        abs(result.nameai.analysis.log_probability - -10.356224486471852) < 0.0001
+        abs(result.nameai.analysis.log_probability - -8.529651553837413) < 0.0001
     ), result.nameai.analysis.log_probability
     assert result.nameai.analysis.word_count == 1
     assert result.nameguard.rating.name == 'WARN'
@@ -38,13 +38,13 @@ def test_name(nameai: 'NameAI'):
     result = nameai.inspect_name('nick')
     assert result.nameai.analysis.inspection.label == 'nick'
     assert abs(result.nameai.purity_score - 0.9976234705882353) < 0.0001, result.nameai.purity_score
-    assert abs(result.nameai.sort_score - 0.9354685918689098) < 0.0001, result.nameai.sort_score
+    assert abs(result.nameai.sort_score - 0.93694557738369) < 0.0001, result.nameai.sort_score
     assert result.nameai.analysis.status == 'normalized'
 
     result = nameai.inspect_name('nick.eth')
     assert result.nameai.analysis.inspection.label == 'nick'
     assert abs(result.nameai.purity_score - 0.9976234705882353) < 0.0001, result.nameai.purity_score
-    assert abs(result.nameai.sort_score - 0.9354685918689098) < 0.0001, result.nameai.sort_score
+    assert abs(result.nameai.sort_score - 0.93694557738369) < 0.0001, result.nameai.sort_score
     assert result.nameai.analysis.status == 'normalized'
 
     result = nameai.inspect_name('nick.eth.eth')
diff --git a/apps/api.nameai.io/tests/test_nlp_inspector.py b/apps/api.nameai.io/tests/test_nlp_inspector.py
index 6bc0eddce..360a56353 100644
--- a/apps/api.nameai.io/tests/test_nlp_inspector.py
+++ b/apps/api.nameai.io/tests/test_nlp_inspector.py
@@ -97,3 +97,130 @@ def test_inspector_word_count(nlp_inspector: 'NLPInspector'):
 
     result = nlp_inspector.nlp_analyse_label('toplap')
     assert result.word_count == 2
+
+
+def test_inspector_simple_names(nlp_inspector: 'NLPInspector'):
+    """Test that simple person names are correctly identified"""
+    from nameai.data import get_resource_path
+    import json
+
+    with open(get_resource_path('tests/person_names_quality.json')) as f:
+        quality_tests = json.load(f)
+
+    failures = []
+    for input_text, expected_tokens in quality_tests['simple_names'].items():
+        tokenizations, _ = nlp_inspector.tokenize(input_text, 1000)
+        expected_tokens = tuple(expected_tokens)
+        if tokenizations[0]['tokens'] != expected_tokens or tokenizations[0]['source'] != 'person_names':
+            failures.append(
+                f"\nInput: '{input_text}'\nExpected: {expected_tokens} (person_names)\n"
+                f"Got: {tokenizations[0]['tokens']} ({tokenizations[0]['source']})"
+            )
+
+    if failures:
+        print('\n=== Simple Names Test Failures ===')
+        for failure in failures:
+            print(failure)
+        print(f'\nTotal failures: {len(failures)} out of {len(quality_tests["simple_names"])} test cases')
+        assert False, 'Some simple name tests failed. See above for details.'
+
+
+def test_inspector_ambiguous_names(nlp_inspector: 'NLPInspector'):
+    """Test that ambiguous names are handled correctly"""
+    from nameai.data import get_resource_path
+    import json
+
+    with open(get_resource_path('tests/person_names_quality.json')) as f:
+        quality_tests = json.load(f)
+
+    failures = []
+    for input_text, interpretation2expected_tokens in quality_tests['ambiguous_names'].items():
+        tokenizations, _ = nlp_inspector.tokenize(input_text, 1000)
+        if interpretation2expected_tokens['person_name'] is not None:
+            expected_tokens = tuple(interpretation2expected_tokens['person_name'])
+            if tokenizations[0]['tokens'] != expected_tokens or tokenizations[0]['source'] != 'person_names':
+                failures.append(
+                    f"\nInput: '{input_text}'\nExpected: {expected_tokens} (person_names)\n"
+                    f"Got: {tokenizations[0]['tokens']} ({tokenizations[0]['source']})"
+                )
+        else:
+            if tokenizations[0]['source'] != 'ngrams':
+                failures.append(
+                    f"\nInput: '{input_text}'\nExpected ngrams source\n" f"Got: {tokenizations[0]['source']}"
+                )
+            expected_words = tuple(interpretation2expected_tokens['words'])
+            found_words = False
+            for tokenization in tokenizations:
+                if tokenization['tokens'] == expected_words:
+                    found_words = True
+                    break
+            if not found_words:
+                failures.append(
+                    f"\nInput: '{input_text}'\nExpected words tokenization: {expected_words}\n"
+                    f"Got tokenizations: {[t['tokens'] for t in tokenizations[:5]]}"
+                )
+
+    if failures:
+        print('\n=== Ambiguous Names Test Failures ===')
+        for failure in failures:
+            print(failure)
+        print(f'\nTotal failures: {len(failures)} out of {len(quality_tests["ambiguous_names"])} test cases')
+        assert False, 'Some ambiguous name tests failed. See above for details.'
+
+
+def test_inspector_non_names(nlp_inspector: 'NLPInspector'):
+    """Test that non-names are correctly identified"""
+    from nameai.data import get_resource_path
+    import json
+
+    with open(get_resource_path('tests/person_names_quality.json')) as f:
+        quality_tests = json.load(f)
+
+    failures = []
+    for input_text, expected_tokens in quality_tests['non_names'].items():
+        tokenizations, _ = nlp_inspector.tokenize(input_text, 1000)
+        expected_tuple = tuple(expected_tokens)
+        if tokenizations[0]['tokens'] != expected_tuple or tokenizations[0]['source'] != 'ngrams':
+            failures.append(
+                f"\nInput: '{input_text}'\nExpected: {expected_tokens} (ngrams)\n"
+                f"Got: {tokenizations[0]['tokens']} ({tokenizations[0]['source']})"
+            )
+
+    if failures:
+        print('\n=== Non-Names Test Failures ===')
+        for failure in failures:
+            print(failure)
+        print(f'\nTotal failures: {len(failures)} out of {len(quality_tests["non_names"])} test cases')
+        assert False, 'Some non-name tests failed. See above for details.'
+
+
+def test_inspector_tokenization_quality(nlp_inspector: 'NLPInspector'):
+    """Test combined tokenizer quality using the same test cases as AllTokenizer"""
+    from nameai.data import get_resource_path
+    import json
+
+    # Load tokenization quality test cases
+    with open(get_resource_path('tests/tokenization_quality.json')) as f:
+        quality_tests = json.load(f)
+
+    failures = []
+    for input_text, expected_tokens in quality_tests.items():
+        tokenizations, _ = nlp_inspector.tokenize(input_text, 1000)
+        expected_tuple = tuple(expected_tokens)
+        found = False
+        for tokenization in tokenizations:
+            if tokenization['tokens'] == expected_tuple:
+                found = True
+                break
+        if not found:
+            failures.append(
+                f"\nInput: '{input_text}'\nExpected: {expected_tokens}\n"
+                f"Got: {[t['tokens'] for t in tokenizations[:5]]}"
+            )
+
+    if failures:
+        print('\n=== Combined Tokenization Quality Test Failures ===')
+        for failure in failures:
+            print(failure)
+        print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
+        assert False, 'Some combined tokenization quality tests failed. See above for details.'
diff --git a/apps/api.nameai.io/tests/test_tokenizer.py b/apps/api.nameai.io/tests/test_tokenizer.py
index d7ca1de41..8de8957ef 100644
--- a/apps/api.nameai.io/tests/test_tokenizer.py
+++ b/apps/api.nameai.io/tests/test_tokenizer.py
@@ -3,12 +3,13 @@
 import pytest
 from pytest import mark
 from hydra import initialize_config_module, compose
+import math
 
 from mocked_static_property import mock_static_property
 
 
 @contextmanager
-def init_tokenizer(overrides):
+def init_all_tokenizer(overrides):
     with mock_static_property():
         from nameai.all_tokenizer import AllTokenizer
 
@@ -18,14 +19,24 @@ def init_tokenizer(overrides):
             yield tokenizer
 
 
+@contextmanager
+def init_person_name_tokenizer(overrides):
+    from nameai.person_names import PersonNameTokenizer
+
+    with initialize_config_module(version_base=None, config_module='nameai.config'):
+        config = compose(config_name='prod_config', overrides=overrides)
+        tokenizer = PersonNameTokenizer(config)
+        yield tokenizer
+
+
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=false', 'tokenization.with_gaps=false']),
+        (['tokenization.all_tokenizer.skip_non_words=false', 'tokenization.all_tokenizer.with_gaps=false']),
     ],
 )
 def test_all_tokenizer_skip_one_letter_words(overrides: List[str]):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('yorknewŁyork123'))
 
         assert (
@@ -51,11 +62,11 @@ def test_all_tokenizer_skip_one_letter_words(overrides: List[str]):
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=true']),
+        (['tokenization.all_tokenizer.skip_non_words=true']),
     ],
 )
 def test_all_tokenizer_skip_non_words(overrides: List[str]):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('yorknewŁyork123'))  # 0 tokenizations
         assert list(tokenized_labels) == []
 
@@ -71,11 +82,11 @@ def test_all_tokenizer_skip_non_words(overrides: List[str]):
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=true']),
+        (['tokenization.all_tokenizer.skip_non_words=true']),
     ],
 )
 def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias(overrides: List[str]):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('laptop'))
         assert ('laptop',) in tokenized_labels
         assert (
@@ -100,11 +111,11 @@ def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias(overrides: Lis
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=false', 'tokenization.with_gaps=true']),
+        (['tokenization.all_tokenizer.skip_non_words=false', 'tokenization.all_tokenizer.with_gaps=true']),
     ],
 )
 def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias_with_gaps(overrides: List[str]):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('lapŁtop'))
 
         assert (
@@ -122,22 +133,22 @@ def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias_with_gaps(over
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=false', 'tokenization.with_gaps=true']),
+        (['tokenization.all_tokenizer.skip_non_words=false', 'tokenization.all_tokenizer.with_gaps=true']),
     ],
 )
 def test_all_tokenizer_time(overrides):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         next(tokenizer.tokenize('miinibaashkiminasiganibiitoosijiganibadagwiingweshiganibakwezhigan'))
 
 
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=false', 'tokenization.with_gaps=true']),
+        (['tokenization.all_tokenizer.skip_non_words=false', 'tokenization.all_tokenizer.with_gaps=true']),
     ],
 )
 def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias_with_gaps23(overrides: List[str]):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('laptop😀ą'))
         print(tokenized_labels)
         assert ('laptop', '') in tokenized_labels
@@ -150,7 +161,7 @@ def test_all_tokenizer_skip_one_letter_words_and_non_words_no_ias_with_gaps23(ov
 
 @pytest.mark.execution_timeout(10)
 def test_all_tokenizer_reccurence():
-    with init_tokenizer([]) as tokenizer:
+    with init_all_tokenizer([]) as tokenizer:
         next(tokenizer.tokenize('test' * 900))
 
         with pytest.raises(RecursionError):
@@ -161,11 +172,11 @@ def test_all_tokenizer_reccurence():
 @mark.parametrize(
     'overrides',
     [
-        (['tokenization.skip_non_words=false', 'tokenization.with_gaps=true']),
+        (['tokenization.all_tokenizer.skip_non_words=false', 'tokenization.all_tokenizer.with_gaps=true']),
     ],
 )
 def test_all_tokenizer_reccurence2(overrides):
-    with init_tokenizer(overrides) as tokenizer:
+    with init_all_tokenizer(overrides) as tokenizer:
         tokenized = tokenizer.tokenize('i' * 4 * 950)
         next(tokenized)
         with pytest.raises(RecursionError):
@@ -174,7 +185,7 @@ def test_all_tokenizer_reccurence2(overrides):
 
 
 def test_all_tokenizer_custom_dict():
-    with init_tokenizer([]) as tokenizer:
+    with init_all_tokenizer([]) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('nfttop'))
         assert (
             'nft',
@@ -187,7 +198,7 @@ def test_all_tokenizer_custom_dict():
         tokenized_labels = list(tokenizer.tokenize('york'))
         assert ('york',) in tokenized_labels
 
-    with init_tokenizer(['tokenization.custom_dictionary=tests/empty.txt']) as tokenizer:
+    with init_all_tokenizer(['tokenization.custom_dictionary=tests/empty.txt']) as tokenizer:
         tokenized_labels = list(tokenizer.tokenize('nfttop'))
         assert (
             'nft',
@@ -202,7 +213,7 @@ def test_all_tokenizer_custom_dict():
 
 
 def test_all_tokenizer_quality():
-    with init_tokenizer([]) as tokenizer:
+    with init_all_tokenizer([]) as tokenizer:
         from nameai.data import get_resource_path
 
         for multiword in open(get_resource_path('should_be_tokenized.txt')):
@@ -212,7 +223,7 @@ def test_all_tokenizer_quality():
 
 
 def test_all_tokenizer_quality2():
-    with init_tokenizer([]) as tokenizer:
+    with init_all_tokenizer([]) as tokenizer:
         from nameai.data import get_resource_path
         import json
 
@@ -228,8 +239,136 @@ def test_all_tokenizer_quality2():
                 failures.append(f"\nInput: '{input_text}'\nExpected: {expected_tokens}\nGot: {tokenized_labels}")
 
         if failures:
-            print('\n=== Tokenization Quality Test Failures ===')
+            print('\n=== AllTokenizer Quality Test Failures ===')
             for failure in failures:
                 print(failure)
             print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
             assert False, 'Some tokenization quality tests failed. See above for details.'
+
+
+def test_person_name_tokenizer_simple_names():
+    """Verify tokenization of clear person names."""
+    with init_person_name_tokenizer([]) as tokenizer:
+        from nameai.data import get_resource_path
+        import json
+
+        with open(get_resource_path('tests/person_names_quality.json')) as f:
+            quality_tests = json.load(f)
+
+        failures = []
+        for input_label, expected_tokens in quality_tests['simple_names'].items():
+            tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
+            expected_tuple = tuple(expected_tokens)
+            found = False
+            for tokens, score in tokenized_labels:
+                if tokens == expected_tuple:
+                    found = True
+                    assert score > -float('inf'), f'Expected valid score for {input_label}'
+                    break
+            if not found:
+                failures.append(f'Failed to find expected tokenization for {input_label}')
+
+        if failures:
+            print('\n=== PersonNameTokenizer Quality Test Failures [simple_names] ===')
+            for failure in failures:
+                print(failure)
+            print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
+            assert False, 'Some tokenization quality tests failed. See above for details.'
+
+
+def test_person_name_tokenizer_ambiguous_names():
+    """Verify handling of ambiguous inputs that could be names."""
+    with init_person_name_tokenizer([]) as tokenizer:
+        from nameai.data import get_resource_path
+        import json
+
+        with open(get_resource_path('tests/person_names_quality.json')) as f:
+            quality_tests = json.load(f)
+
+        failures = []
+        for input_label, interpretation2expected_tokens in quality_tests['ambiguous_names'].items():
+            tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
+            if interpretation2expected_tokens['person_name'] is not None:
+                person_name_tokens = tuple(interpretation2expected_tokens['person_name'])
+                found = False
+                for tokens, score in tokenized_labels:
+                    if tokens == person_name_tokens:
+                        found = True
+                        assert score > -float('inf'), f'Expected valid score for {input_label}'
+                        break
+                if not found:
+                    failures.append(f'Failed to find person name tokenization for {input_label}')
+
+        if failures:
+            print('\n=== PersonNameTokenizer Quality Test Failures [ambiguous_names] ===')
+            for failure in failures:
+                print(failure)
+            print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
+            assert False, 'Some tokenization quality tests failed. See above for details.'
+
+
+def test_person_name_tokenizer_non_names_low_scores():
+    """Verify that non-name inputs get low (< 1e-10) probability scores."""
+    with init_person_name_tokenizer([]) as tokenizer:
+        from nameai.data import get_resource_path
+        import json
+
+        with open(get_resource_path('tests/person_names_quality.json')) as f:
+            quality_tests = json.load(f)
+
+        failures = []
+        for input_label in quality_tests['non_names'].keys():
+            tokenized_labels = list(tokenizer.tokenize_with_scores(input_label))
+            for tokens, log_prob in tokenized_labels:
+                if log_prob >= math.log(1e-10):
+                    failures.append(f'Expected very low score for non-name {input_label}, got {log_prob}')
+
+        if failures:
+            print('\n=== PersonNameTokenizer Quality Test Failures [non_names] ===')
+            for failure in failures:
+                print(failure)
+            print(f'\nTotal failures: {len(failures)} out of {len(quality_tests)} test cases')
+            assert False, 'Some tokenization quality tests failed. See above for details.'
+
+
+def test_person_name_tokenizer_probability_ranges():
+    """
+    Verify probability scoring across input categories.
+
+    Tests probability ranges for:
+    1. Clear names: high scores (> log(1e-8))
+    2. Ambiguous cases: medium scores (log(1e-12) to log(1e-8))
+    3. Non-names: very low scores (< log(1e-15))
+    """
+    with init_person_name_tokenizer([]) as tokenizer:
+        # test clear person names
+        tokenizations = list(tokenizer.tokenize_with_scores('giancarloesposito'))
+        assert any(
+            score > math.log(1e-8) for _, score in tokenizations
+        ), 'Clear person name should have high probability'
+
+        tokenizations = list(tokenizer.tokenize_with_scores('piotrwiśniewski'))
+        assert any(
+            score > math.log(1e-8) for _, score in tokenizations
+        ), 'Clear person name should have high probability'
+
+        # test ambiguous cases
+        tokenizations = list(tokenizer.tokenize_with_scores('dragonfernandez'))
+        assert any(
+            math.log(1e-12) < score < math.log(1e-8) for _, score in tokenizations
+        ), 'Ambiguous case should have medium probability'
+
+        tokenizations = list(tokenizer.tokenize_with_scores('wolfsmith'))
+        assert any(
+            math.log(1e-12) < score < math.log(1e-8) for _, score in tokenizations
+        ), 'Ambiguous case should have medium probability'
+
+        # test non-names
+        tokenizations = list(tokenizer.tokenize_with_scores('cryptocurrency'))
+        assert all(score < math.log(1e-15) for _, score in tokenizations), 'Non-name should have very low probability'
+
+        tokenizations = list(tokenizer.tokenize_with_scores('blockchain'))
+        assert all(score < math.log(1e-15) for _, score in tokenizations), 'Non-name should have very low probability'
+
+        tokenizations = list(tokenizer.tokenize_with_scores('yerbamate'))
+        assert all(score < math.log(1e-15) for _, score in tokenizations), 'Non-name should have very low probability'