diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8db4dae --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +microsofttranslator.egg-info diff --git a/CHANGELOG b/CHANGELOG index d508575..17121d6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,21 @@ Changelog ========= +Version 0.9 +----------- + +* Added support for version 3 of the API (Thanks to @newearthmartin) +* Removed legacy code from version 1 and 2 +* This release is not backwards compatible: + - Methods take now an array of texts instead a single text + - API calls return more information in JSON format + +Version 0.8 +----------- + +* Added support for version 2 of the API (Thanks to @flyinactor91) +* Added new way of instantiating Translator using only Azure Translator Key (for V2) + Version 0.7 ----------- * Add support for language detection and finding supported languages (Thanks @@ -13,7 +28,7 @@ Version 0.6 Version 0.4 ----------- * Updated to use the Oauth based token issued by Bing - * This release is not backward compatibleas the class signature has changed + * This release is not backward compatible as the class signature has changed Version 0.3 ----------- diff --git a/README.rst b/README.rst index bb852ef..d541c8e 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ -Microsoft Translator V2 -- Python API +Microsoft Translator V3 -- Python API ===================================== -:Version: 0.7 +:Version: 0.9 :Web: http://fulfil.io/ :keywords: Microsoft Translator :copyright: Fulfil.IO, Openlabs Technologies & Consulting (P) LTD @@ -21,27 +21,11 @@ or application, or those desiring to communicate with people of a different language group. -Example Usage: -:: - - >>> from microsofttranslator import Translator - >>> translator = Translator('', '') - >>> print translator.translate("Hello", "pt") - "Olá" - -Registering your application ----------------------------- +Create your Azure translation key +--------------------------------- -To register your application with Azure DataMarket, -visit https://datamarket.azure.com/developer/applications/ using the -LiveID credentials from step 1, and click on “Register”. In the -“Register your application” dialog box, you can define your own -Client ID and Name. The redirect URI is not used for the Microsoft -Translator API. However, the redirect URI field is a mandatory field, -and you must provide a URI to obtain the access code. A description is -optional. - -Take a note of the client ID and the client secret value. +To sign up for Translator Text API, please follow instructions here +https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-text-how-to-signup Installing ---------- @@ -54,25 +38,30 @@ Installing Features -------- + Translation +++++++++++ :: >>> from microsofttranslator import Translator - >>> translator = Translator('', '') - >>> print translator.translate("Hello", "pt") - "Olá" + >>> translator = Translator('') + >>> print translator.translate(['hello'], 'es') + [['Hola']] -Translate multiple words at once -++++++++++++++++++++++++++++++++ + +Translate multiple phrases and multiple languages at once ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ :: >>> from microsofttranslator import Translator - >>> translator = Translator('', '') - >>> translator.translate_array(['apple', 'orange'], 'pt') - [{u'TranslatedText': u'Apple', u'From': u'en', u'OriginalTextSentenceLengths': [5], u'TranslatedTextSentenceLengths': [5]}, {u'TranslatedText': u'laranja', u'From': u'en', u'OriginalTextSentenceLengths': [6], u'TranslatedTextSentenceLengths': [7]}] + >>> translator = Translator('') + >>> print translator.translate(['hello', 'good bye'], 'de,it') + [ + ['Hallo', 'Ciao'], + ['Auf Wiedersehen', 'Arrivederci'] + ] Get supported languages +++++++++++++++++++++++ @@ -80,9 +69,17 @@ Get supported languages :: >>> from microsofttranslator import Translator - >>> translator = Translator('', '') + >>> translator = Translator('') >>> print translator.get_languages() - [u'ar', u'bg', u'ca', u'zh-CHS', u'zh-CHT', u'cs', u'da', u'nl', u'en', u'et', u'fi', u'fr', u'de', u'el', u'ht', u'he', u'hi', u'mww', u'hu', u'id', u'it', u'ja', u'tlh', u'tlh-Qaak', u'ko', u'lv', u'lt', u'ms', u'mt', u'no', u'fa', u'pl', u'pt', u'ro', u'ru', u'sk', u'sl', u'es', u'sv', u'th', u'tr', u'uk', u'ur', u'vi', u'cy'] + { + ... + 'en': {'nativeName': 'English', 'name': 'English', 'dir': 'ltr'}, + 'es': {'nativeName': 'Espa\xf1ol', 'name': 'Spanish', 'dir': 'ltr'}, + 'et': {'nativeName': 'Eesti', 'name': 'Estonian', 'dir': 'ltr'}, + 'fa': {'nativeName': 'Persian', 'name': 'Persian', 'dir': 'rtl'}, + 'fi': {'nativeName': 'Suomi', 'name': 'Finnish', 'dir': 'ltr'}, + ... + } Detect Language +++++++++++++++ @@ -90,9 +87,19 @@ Detect Language :: >>> from microsofttranslator import Translator - >>> translator = Translator('', '') - >>> translator.detect_language('hello') - u'en' + >>> translator = Translator('') + >>> translator.detect_language('how are you?') + { + 'language': 'en', + 'score': 1.0, + 'isTranslationSupported': True, + 'isTransliterationSupported': False, + 'alternatives': [ + {'score': 1.0, 'isTranslationSupported': True, 'isTransliterationSupported': False, 'language': 'ro'}, + {'score': 1.0, 'isTranslationSupported': True, 'isTransliterationSupported': False, 'language': 'fil'} + ] + } + Bugs and Development on Github diff --git a/__init__.py b/__init__.py index 50ef508..dc811cc 100644 --- a/__init__.py +++ b/__init__.py @@ -1,14 +1,11 @@ # -*- coding: utf-8 -*- """ __init__ - A translator using the micrsoft translation engine documented here: - - http://msdn.microsoft.com/en-us/library/ff512419.aspx - + https://docs.microsoft.com/en-us/azure/cognitive-services/translator/ """ -__all__ = ['Translator', 'TranslateApiException'] +__all__ = ['Translator', 'TranslatorException'] try: import simplejson as json @@ -16,236 +13,29 @@ import json import requests -import six -import warnings import logging from datetime import datetime, timedelta -class ArgumentOutOfRangeException(Exception): - def __init__(self, message): - self.message = message.replace('ArgumentOutOfRangeException: ', '') - super(ArgumentOutOfRangeException, self).__init__(self.message) - - -class TranslateApiException(Exception): - def __init__(self, message, *args): - self.message = message.replace('TranslateApiException: ', '') - super(TranslateApiException, self).__init__(self.message, *args) - - -class Translator(object): - """Implements AJAX API for the Microsoft Translator service - - :param app_id: A string containing the Bing AppID. (Deprecated) - """ - - base_url = "http://api.microsofttranslator.com/V2/Ajax.svc" - - def __init__( - self, client_id, client_secret, - scope="http://api.microsofttranslator.com", - grant_type="client_credentials", app_id=None, debug=False): - """ - - - :param client_id: The client ID that you specified when you registered - your application with Azure DataMarket. - :param client_secret: The client secret value that you obtained when - you registered your application with Azure - DataMarket. - :param scope: Defaults to http://api.microsofttranslator.com - ;param grant_type: Defaults to "client_credentials" - :param app_id: Deprecated - :param debug: If true, the logging level will be set to debug - - .. versionchanged: 0.4 - Bing AppID mechanism is deprecated and is no longer supported. - See: http://msdn.microsoft.com/en-us/library/hh454950 - """ - if app_id is not None: - warnings.warn("""app_id is deprected since v0.4. - See: http://msdn.microsoft.com/en-us/library/hh454950 - """, DeprecationWarning, stacklevel=2) - - self.client_id = client_id - self.client_secret = client_secret - self.scope = scope - self.grant_type = grant_type - self.access_token = None - self.debug = debug - self.logger = logging.getLogger("microsofttranslator") - if self.debug: - self.logger.setLevel(level=logging.DEBUG) - - def get_access_token(self): - """Bing AppID mechanism is deprecated and is no longer supported. - As mentioned above, you must obtain an access token to use the - Microsoft Translator API. The access token is more secure, OAuth - standard compliant, and more flexible. Users who are using Bing AppID - are strongly recommended to get an access token as soon as possible. - - .. note:: - The value of access token can be used for subsequent calls to the - Microsoft Translator API. The access token expires after 10 - minutes. It is always better to check elapsed time between time at - which token issued and current time. If elapsed time exceeds 10 - minute time period renew access token by following obtaining - access token procedure. - - :return: The access token to be used with subsequent requests - """ - args = { - 'client_id': self.client_id, - 'client_secret': self.client_secret, - 'scope': self.scope, - 'grant_type': self.grant_type - } - response = requests.post( - 'https://datamarket.accesscontrol.windows.net/v2/OAuth2-13', - data=args - ).json() - - self.logger.debug(response) - - if "error" in response: - raise TranslateApiException( - response.get('error_description', 'No Error Description'), - response.get('error', 'Unknown Error') - ) - return response['access_token'] - - def call(self, path, params): - """Calls the given path with the params urlencoded - - :param path: The path of the API call being made - :param params: The parameters dictionary - """ - if not self.access_token: - self.access_token = self.get_access_token() - - resp = requests.get( - "/".join([self.base_url, path]), - params=params, - headers={'Authorization': 'Bearer %s' % self.access_token} - ) - resp.encoding = 'UTF-8-sig' - rv = resp.json() - - if isinstance(rv, six.string_types) and \ - rv.startswith("ArgumentOutOfRangeException"): - raise ArgumentOutOfRangeException(rv) - - if isinstance(rv, six.string_types) and \ - rv.startswith("TranslateApiException"): - raise TranslateApiException(rv) - - if isinstance(rv, six.string_types) and \ - rv.startswith(("ArgumentException: " - "The incoming token has expired")): - self.access_token = None - return self.call(path, params) - return rv - - def translate( - self, text, to_lang, from_lang=None, - content_type='text/plain', category='general'): - """Translates a text string from one language to another. - - :param text: A string representing the text to translate. - :param to_lang: A string representing the language code to - translate the text into. - :param from_lang: A string representing the language code of the - translation text. If left None the response will include the - result of language auto-detection. (Default: None) - :param content_type: The format of the text being translated. - The supported formats are "text/plain" and "text/html". Any HTML - needs to be well-formed. - :param category: The category of the text to translate. The only - supported category is "general". - """ - params = { - 'text': text.encode('utf8'), - 'to': to_lang, - 'contentType': content_type, - 'category': category, - } - if from_lang is not None: - params['from'] = from_lang - return self.call("Translate", params) - - def translate_array(self, texts, to_lang, from_lang=None, **options): - """Translates an array of text strings from one language to another. - - :param texts: A list containing texts for translation. - :param to_lang: A string representing the language code to - translate the text into. - :param from_lang: A string representing the language code of the - translation text. If left None the response will include the - result of language auto-detection. (Default: None) - :param options: A TranslateOptions element containing the values below. - They are all optional and default to the most common settings. - - Category: A string containing the category (domain) of the - translation. Defaults to "general". - ContentType: The format of the text being translated. The - supported formats are "text/plain" and "text/html". Any - HTML needs to be well-formed. - Uri: A string containing the content location of this - translation. - User: A string used to track the originator of the submission. - State: User state to help correlate request and response. The - same contents will be returned in the response. - """ - options = { - 'Category': "general", - 'Contenttype': "text/plain", - 'Uri': '', - 'User': 'default', - 'State': '' - }.update(options) - params = { - 'texts': json.dumps(texts), - 'to': to_lang, - 'options': json.dumps(options), - } - if from_lang is not None: - params['from'] = from_lang - - return self.call("TranslateArray", params) - - def get_languages(self): - """Fetches the languages supported by Microsoft Translator - Returns list of languages - """ - return self.call('GetLanguagesForTranslate', '') - - def detect_language(self, text): - """Detects language of given string - Returns two letter language - Example : fr - """ - params = { - 'text': text.encode('utf8') - } - return self.call('Detect', params) +logger = logging.getLogger('microsofttranslator') class AzureAuthToken: - """Class to make sure that .value is always a valid 10-min auth token""" + """ Class to make sure that .value is always a valid 10-min auth token """ _token = None last_fetched = None - def __init__(self, api_key: str): + def __init__(self, api_key): self.azure_api_key = api_key @property def value(self): - """The value of the current auth token""" + """ The value of the current auth token """ if self._token is None or self.outdated: self.update() return self._token @property def outdated(self): - """Returns True if a new token value must be fetched""" + """ Returns True if a new token value must be fetched """ return self.last_fetched is None or \ datetime.utcnow() > self.last_fetched+timedelta(minutes=9) @@ -256,49 +46,98 @@ def update(self): self._token = resp.text self.last_fetched = datetime.utcnow() -class TranslatorCS(Translator): - """TranslatorCS is designed to be used with Azure Cognitive Services""" +class TranslatorException(Exception): + def __init__(self, code, message, *args): + self.code = code + self.message = message + super(TranslatorException, self).__init__('%d-%s' % (self.code, self.message), *args) - def __init__(self, client_key, debug=False): - """""" - self.auth_token = AzureAuthToken(client_key) - self.debug = debug - self.logger = logging.getLogger("microsofttranslator") - if self.debug: - self.logger.setLevel(level=logging.DEBUG) +class Translator(object): + """ Implements the Azure Cognitive Services - Translator REST API """ + + base_url = 'https://api.cognitive.microsofttranslator.com' - def get_access_token(self): - raise AttributeError( - "'TranslatorCS' object has no attribute 'get_access_token'" - ) + def __init__(self, client_key): + self.auth_token = AzureAuthToken(client_key) - def call(self, path, params): - """Calls the given path with the params urlencoded + def call(self, path, params, json=None): + """ + Calls the given path with the params urlencoded. + Will be POST if json is defined, otherwise a GET. :param path: The path of the API call being made - :param params: The parameters dictionary + :param params: The parameters dictionary for the query string + :param json: JSON data for POST body. """ - - resp = requests.get( - "/".join([self.base_url, path]), - params=params, - headers={'Authorization': 'Bearer %s' % self.auth_token.value} - ) + params = params.copy() + params.update({'api-version': '3.0'}) + url = self.base_url + '/' + path + + headers = {'Authorization': 'Bearer %s' % self.auth_token.value} + if json: + query_params = map(lambda e: '%s=%s' % e, params.items()) + url += '?' + '&'.join(query_params) + resp = requests.post(url, json=json, headers=headers) + else: + resp = requests.get(url, params=params, headers=headers) resp.encoding = 'UTF-8-sig' rv = resp.json() - if isinstance(rv, six.string_types) and \ - rv.startswith("ArgumentOutOfRangeException"): - raise ArgumentOutOfRangeException(rv) + if 'error' in rv: + error = rv['error'] + raise TranslatorException(error['code'], error['message']) - if isinstance(rv, six.string_types) and \ - rv.startswith("TranslateApiException"): - raise TranslateApiException(rv) - - #In theroy, this should never be called, but just in case... - if isinstance(rv, six.string_types) and \ - rv.startswith(("ArgumentException: " - "The incoming token has expired")): - self.auth_token.last_fetched = None - return self.call(path, params) return rv + + @staticmethod + def texts_as_json(texts): + return [{'Text': text.encode('utf8')} for text in texts] + + def get_languages(self): + """ + Fetches the languages supported by Microsoft Translator + Returns list of languages + """ + return self.call('languages', {})['translation'] + + def translate( + self, texts, + to_lang, from_lang=None, + text_type='plain', category='general'): + """ + Translates one or more text strings from one language to another. + + :param texts: + A string array representing the texts to translate. + :param to_lang: + A string representing the language code to translate the text into. + Can be many languages separated by comma. + :param from_lang: + A string representing the language code of the translation text. + If left None the response will include + the result of language auto-detection. (Default: None) + :param text_type: + The format of the text being translated. + The supported formats are "plain" and "html". + Any HTML needs to be well-formed. + :param category: + The category of the text to translate. + The only supported category is "general". + """ + params = { + 'to': to_lang, + 'textType': text_type, + 'category': category, + } + if from_lang: params['from'] = from_lang + translated = self.call('translate', params, json=Translator.texts_as_json(texts)) + translated = [[inner['text'] for inner in outer['translations']] for outer in translated] + return translated + + def detect_language(self, texts): + """ + Detects language of given string + Returns two letter language - Example : fr + :param texts: A string array representing the texts to detect language. + """ + return self.call('detect', {}, json=Translator.texts_as_json(texts)) diff --git a/setup.py b/setup.py index 37ff647..1f5306d 100644 --- a/setup.py +++ b/setup.py @@ -14,51 +14,50 @@ >>> from microsofttranslator import Translator >>> translator = Translator('') - >>> print translator.translate("Hello", "pt") - "Olá" + >>> print translator.translate('Hello', 'pt') + [['Olá']] The documentation for the service can be obtained here: - http://msdn.microsoft.com/en-us/library/ff512423.aspx + https://docs.microsoft.com/en-us/azure/cognitive-services/translator/ The project is hosted on GitHub where your could fork the project or report issues. Visit https://github.com/fulfilio/Microsoft-Translator-Python-API - """ + import codecs from setuptools import setup - setup( - name="microsofttranslator", - version="0.8", + name='microsofttranslator', + version='0.9', packages=[ 'microsofttranslator', ], package_dir={ 'microsofttranslator': '.' }, - author="Fulfil.IO Inc., Openlabs Technologies & Consulting (P) Limited", - author_email="info@fulfil.io", - description="Microsoft Translator V2 - Python API", + author='Fulfil.IO Inc., Openlabs Technologies & Consulting (P) Limited', + author_email='info@fulfil.io', + description='Microsoft Translator V3 - Python API', long_description=codecs.open( 'README.rst', encoding='UTF-8' ).read(), - license="BSD", - keywords="translation microsoft", - url="https://www.fulfil.io/", + license='BSD', + keywords='translation microsoft', + url='https://www.fulfil.io/', include_package_data=True, classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Natural Language :: English", - "Operating System :: OS Independent", - "Topic :: Software Development :: Internationalization", - "Topic :: Utilities", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 2", + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Topic :: Software Development :: Internationalization', + 'Topic :: Utilities', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2', ], - test_suite="microsofttranslator.test.test_all", + test_suite='microsofttranslator.test.test_all', install_requires=[ 'requests >= 1.2.3', 'six', diff --git a/test.py b/test.py index c900296..b42dd4c 100644 --- a/test.py +++ b/test.py @@ -1,47 +1,45 @@ # -*- coding: utf-8 -*- """ test - Test the translator - """ import os import unittest -from microsofttranslator import Translator, TranslateApiException +from . import Translator, TranslatorException -client_id = os.environ['CLIENT_ID'] -client_secret = os.environ['CLIENT_SECRET'] - -default_languages = [u'en', u'fr', u'de'] +azure_translator_key = os.environ['AZURE_TRANSLATOR_KEY'] +default_languages = ['en', 'fr', 'de'] class TestTranslator(unittest.TestCase): - def test_translate(self): - client = Translator(client_id, client_secret, debug=True) - self.assertEqual( - client.translate("hello", "pt"), u'Ol\xe1' - ) - - def test_translate_array(self): - client = Translator(client_id, client_secret, debug=True) - self.assertEqual(client.translate("hello", "pt"), u'Ol\xe1') - - def test_invalid_client_id(self): - client = Translator("foo", "bar") - with self.assertRaises(TranslateApiException): - client.translate("hello", "pt") + client = Translator(azure_translator_key) + translated = client.translate(['hello', 'how are you?'], 'pt,fr') + self.assertEqual(translated, [ + [u'Ol\xe1', u'Bonjour'], + [u'Como est\xe1?', u'Comment vas-tu?'], + ]) + + def test_invalid_translator_key(self): + client = Translator('invalid_translator_key') + with self.assertRaises(TranslatorException): + client.translate(['hello'], 'pt') + + def test_invalid_language(self): + client = Translator(azure_translator_key) + with self.assertRaises(TranslatorException): + client.translate(['hello'], 'abcd') def test_get_languages(self): - client = Translator(client_id, client_secret, debug=True) + client = Translator(azure_translator_key) languages = client.get_languages() - self.assertEqual(type(languages), list) - self.assertTrue(set(default_languages).issubset(set(languages))) + for language in default_languages: + self.assertIn(language, languages) def test_detect_language(self): - client = Translator(client_id, client_secret, debug=True) - self.assertEqual(client.detect_language('hello'), u'en') - + client = Translator(azure_translator_key) + detected_language = client.detect_language(['how are you?'])[0] + self.assertEqual(detected_language['language'], 'en') def test_all(): loader = unittest.TestLoader()