feat: add model_type parameter to translate_text()

daniel-jones-dev · JanEbbing · commit dde23849ab93 · 2024-11-15T14:07:05.000Z
diff --git a/README.md b/README.md
@@ -99,7 +99,9 @@ There are additional optional arguments to control translation, see
 corresponding to your input text(s). `TextResult` has the following properties:
 - `text` is the translated text,
 - `detected_source_lang` is the detected source language code,
-- `billed_characters` is the number of characters billed for the translation. 
+- `billed_characters` is the number of characters billed for the translation.
+- `model_type_used` indicates the translation model used, but is `None` unless
+  the `model_type` option is specified. 
 
 ```python
 # Translate text into a target language, in this case, French:
@@ -162,6 +164,14 @@ arguments are:
   translated itself. Characters in the `context` parameter are not counted toward billing.
   See the [API documentation][api-docs-context-param] for more information and 
   example usage.
+- `model_type`: specifies the type of translation model to use, options are:
+  - `'quality_optimized'` (`ModelType.QUALITY_OPTIMIZED`): use a translation
+    model that maximizes translation quality, at the cost of response time. 
+    This option may be unavailable for some language pairs.
+  - `'prefer_quality_optimized'` (`ModelType.PREFER_QUALITY_OPTIMIZED`): use 
+    the highest-quality translation model for the given language pair.
+  - `'latency_optimized'` (`ModelType.LATENCY_OPTIMIZED`): use a translation
+    model that minimizes response time, at the cost of translation quality.
 - `tag_handling`: type of tags to parse before translation, options are `'html'`
   and `'xml'`.
 
diff --git a/deepl/__init__.py b/deepl/__init__.py
@@ -25,6 +25,7 @@
     Formality,
     GlossaryInfo,
     Language,
+    ModelType,
     SplitSentences,
     TextResult,
     Translator,
diff --git a/deepl/__main__.py b/deepl/__main__.py
@@ -79,6 +79,7 @@ def action_text(
     translator: deepl.Translator,
     show_detected_source: bool = False,
     show_billed_characters: Optional[bool] = None,
+    show_model_type_used: Optional[bool] = None,
     **kwargs,
 ):
     """Action function for the text command."""
@@ -93,9 +94,17 @@ def action_text(
             text_value = (
                 "unknown"
                 if output.billed_characters is None
-                else output.billed_characters
+                else str(output.billed_characters)
             )
             print(f"Billed characters: {text_value}")
+        if show_model_type_used:
+            text_value = (
+                "unknown"
+                if output.model_type_used is None
+                else output.model_type_used
+            )
+            print(f"Model type used: {text_value}")
+
         print(output.text)
 
 
@@ -325,6 +334,19 @@ def add_common_arguments(subparser: argparse.ArgumentParser):
         action="store_true",
         help="print billed characters for each text",
     )
+    parser_text.add_argument(
+        "--show-model-type-used",
+        dest="show_model_type_used",
+        action="store_true",
+        help="print the model type used for each text",
+    )
+    parser_text.add_argument(
+        "--model-type",
+        type=str,
+        choices=[enum.value for enum in deepl.ModelType],
+        default=None,
+        help="control model used for translation, see API for information",
+    )
     parser_text.add_argument(
         "text",
         nargs="+",
diff --git a/deepl/api_data.py b/deepl/api_data.py
@@ -17,10 +17,12 @@ def __init__(
         text: str,
         detected_source_lang: str,
         billed_characters: int,
+        model_type_used: Optional[str] = None,
     ):
         self.text = text
         self.detected_source_lang = detected_source_lang
         self.billed_characters = billed_characters
+        self.model_type_used = model_type_used
 
     def __str__(self):
         return self.text
@@ -421,3 +423,18 @@ class SplitSentences(Enum):
 
     def __str__(self):
         return self.value
+
+
+class ModelType(Enum):
+    """Options for model_type parameter.
+
+    Sets whether the translation engine should use a newer model type that
+    offers higher quality translations at the cost of translation time.
+    """
+
+    QUALITY_OPTIMIZED = "quality_optimized"
+    LATENCY_OPTIMIZED = "latency_optimized"
+    PREFER_QUALITY_OPTIMIZED = "prefer_quality_optimized"
+
+    def __str__(self):
+        return self.value
diff --git a/deepl/translator.py b/deepl/translator.py
@@ -8,6 +8,7 @@
     Formality,
     GlossaryInfo,
     GlossaryLanguagePair,
+    ModelType,
     Language,
     SplitSentences,
     TextResult,
@@ -347,6 +348,7 @@ def translate_text(
         non_splitting_tags: Union[str, List[str], None] = None,
         splitting_tags: Union[str, List[str], None] = None,
         ignore_tags: Union[str, List[str], None] = None,
+        model_type: Union[str, ModelType, None] = None,
     ) -> Union[TextResult, List[TextResult]]:
         """Translate text(s) into the target language.
 
@@ -387,6 +389,8 @@ def translate_text(
         :param ignore_tags: (Optional) XML tags containing text that should not
             be translated.
         :type ignore_tags: List of XML tags or comma-separated-list of tags.
+        :param model_type: (Optional) Controls whether the translation engine
+            should use a potentially slower model to achieve higher quality.
         :return: List of TextResult objects containing results, unless input
             text was one string, then a single TextResult object is returned.
         """
@@ -425,6 +429,8 @@ def translate_text(
             request_data["tag_handling"] = tag_handling
         if outline_detection is not None:
             request_data["outline_detection"] = bool(outline_detection)
+        if model_type is not None:
+            request_data["model_type"] = str(model_type)
 
         def join_tags(tag_argument: Union[str, Iterable[str]]) -> List[str]:
             if isinstance(tag_argument, str):
@@ -462,8 +468,14 @@ def join_tags(tag_argument: Union[str, Iterable[str]]) -> List[str]:
                 else ""
             )
             billed_characters = int(translation.get("billed_characters"))
+            model_type_used = translation.get("model_type_used")
             output.append(
-                TextResult(text, detected_source_language, billed_characters)
+                TextResult(
+                    text,
+                    detected_source_language,
+                    billed_characters,
+                    model_type_used,
+                )
             )
 
         return output if multi_input else output[0]
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -129,11 +129,14 @@ def test_languages(runner):
 
 def test_text(runner):
     result = runner.invoke(
-        main_function, 'text --to DE "proton beam" --show-detected-source'
+        main_function,
+        'text --to DE "proton beam" --show-detected-source '
+        "--show-model-type-used --model-type quality_optimized",
     )
     assert result.exit_code == 0, f"exit: {result.exit_code}\n {result.output}"
     assert example_text["DE"] in result.output
     assert "Detected source" in result.output
+    assert "Model type used: quality_optimized" in result.output
 
     # Test text options
     extra_options = [
@@ -155,6 +158,10 @@ def test_text(runner):
             "--non-splitting-tags a,b --non-splitting-tags c",
             "'non_splitting_tags': ['a', 'b', 'c']",
         ),
+        (
+            "--model-type quality_optimized",
+            "'model_type': 'quality_optimized'",
+        ),
     ]
     for args, search_str in extra_options:
         result = runner.invoke(
diff --git a/tests/test_translate_text.py b/tests/test_translate_text.py
@@ -17,6 +17,24 @@ def test_single_text(translator):
     assert result.billed_characters == len(example_text["EN"])
 
 
+@pytest.mark.parametrize(
+    "model_type",
+    [model_type for model_type in deepl.ModelType],
+)
+def test_model_type(translator, model_type):
+    result = translator.translate_text(
+        example_text["EN"], target_lang="DE", model_type=model_type
+    )
+    # TODO: use `removeprefix()` when we only support py3.8+
+    expected_model_type = str(model_type)
+    prefix_to_remove = "prefer_"
+    if expected_model_type.startswith(prefix_to_remove):
+        expected_model_type = expected_model_type[
+            len(prefix_to_remove) :  # noqa: E203
+        ]
+    assert expected_model_type == result.model_type_used
+
+
 def test_string_list(translator):
     texts = [example_text["FR"], example_text["EN"]]
     result = translator.translate_text(texts, target_lang="DE")