From cf2a0a8db12e8cf83c30a05b0ac337645547733a Mon Sep 17 00:00:00 2001 From: Abdellahitech Date: Fri, 30 Aug 2024 01:40:12 +0200 Subject: [PATCH 1/5] feat: add profile matching & grading and job matching --- hrflow/job/__init__.py | 2 + hrflow/job/matching.py | 107 ++++++++++++++++++++++++++++++++++++ hrflow/profile/__init__.py | 4 ++ hrflow/profile/grading.py | 68 +++++++++++++++++++++++ hrflow/profile/matching.py | 108 +++++++++++++++++++++++++++++++++++++ 5 files changed, 289 insertions(+) create mode 100644 hrflow/job/matching.py create mode 100644 hrflow/profile/grading.py create mode 100644 hrflow/profile/matching.py diff --git a/hrflow/job/__init__.py b/hrflow/job/__init__.py index b2f80d3..75f792f 100644 --- a/hrflow/job/__init__.py +++ b/hrflow/job/__init__.py @@ -1,5 +1,6 @@ from .asking import JobAsking from .embedding import JobEmbedding +from .matching import JobMatching from .parsing import JobParsing from .reasoning import JobReasoning from .scoring import JobScoring @@ -17,3 +18,4 @@ def __init__(self, client): self.scoring = JobScoring(self.client) self.reasoning = JobReasoning(self.client) self.storing = JobStoring(self.client) + self.matching = JobMatching(self.client) diff --git a/hrflow/job/matching.py b/hrflow/job/matching.py new file mode 100644 index 0000000..bf697ce --- /dev/null +++ b/hrflow/job/matching.py @@ -0,0 +1,107 @@ +import json + +from ..core.rate_limit import rate_limiter +from ..core.validation import ( + KEY_REGEX, + ORDER_BY_VALUES, + SORT_BY_VALUES, + validate_key, + validate_limit, + validate_page, + validate_provider_keys, + validate_reference, + validate_response, + validate_value, +) + + +class JobMatching: + def __init__(self, api): + """Init.""" + self.client = api + + @rate_limiter + def list( + self, + board_key, + job_key=None, + job_reference=None, + board_keys=None, + page=1, + limit=30, + sort_by="created_at", + order_by=None, + created_at_min=None, + created_at_max=None, + **kwargs, + ): + """ + Retrieve the matching information. + + Args: + job_key: + job_reference: + board_key: + baord_keys: + limit: (default to 30) + number of fetched jobs/page + page: REQUIRED default to 1 + number of the page associated to the pagination + sort_by: + order_by: + created_at_min: + The minimum date of creation of the targeted Jobs. + Format : "YYYY-MM-DD". + created_at_max: + The maximum date of creation of the targeted Jobs. + Format : "YYYY-MM-DD". + Returns + Applies the params to filter on Jobs in the targeted Boards and + returns the response from the endpoint. + Response examples : + - Success response : + { + "code": 200, # response code + "message": "Job Matching results", # response message + "meta" : {'page': 1, # current page + 'maxPage': 5, # max page in the paginated response + 'count': 2, # number of jobs in the current page + 'total': 10}, # total number of jobs retrieved + "data": { # list of jobs objects + "predictions":[ + [] + ] + "jobs":[ + { + "key": "xxx", + "reference": "xxx", + ... + }, + ... + ] + } + } + - Error response : (if the board_key is not valid) + { + "code": 400, + "message": "Invalid parameters. Unable to find object: source" + } + + """ + + query_params = { + "board_key": validate_key("Board", board_key, regex=KEY_REGEX), + "job_key": validate_key("Key", job_key, regex=KEY_REGEX), + "job_reference": validate_reference(job_reference), + "board_keys": json.dumps(validate_provider_keys(board_keys)), + "limit": validate_limit(limit), + "page": validate_page(page), + "sort_by": validate_value(sort_by, SORT_BY_VALUES, "sort by"), + "order_by": validate_value(order_by, ORDER_BY_VALUES, "order by"), + "created_at_min": created_at_min, # TODO validate dates format + "created_at_max": created_at_max, # TODO validate dates format + } + + params = {**query_params, **kwargs} + response = self.client.get("jobs/matching", params) + return validate_response(response) diff --git a/hrflow/profile/__init__.py b/hrflow/profile/__init__.py index 44a8dec..1f5016b 100644 --- a/hrflow/profile/__init__.py +++ b/hrflow/profile/__init__.py @@ -3,6 +3,8 @@ from .asking import ProfileAsking from .attachment import ProfileAttachments from .embedding import ProfileEmbedding +from .grading import ProfileGrading +from .matching import ProfileMatching from .parsing import ProfileParsing from .reasoning import ProfileReasoning from .revealing import ProfileRevealing @@ -35,3 +37,5 @@ def __init__(self, client): self.searching = ProfileSearching(self.client) self.reasoning = ProfileReasoning(self.client) self.unfolding = ProfileUnfolding(self.client) + self.matching = ProfileMatching(self.client) + self.grading = ProfileGrading(self.client) diff --git a/hrflow/profile/grading.py b/hrflow/profile/grading.py new file mode 100644 index 0000000..d1fe079 --- /dev/null +++ b/hrflow/profile/grading.py @@ -0,0 +1,68 @@ +import json +import typing as t + +from ..core.rate_limit import rate_limiter +from ..core.validation import ( + KEY_REGEX, + ORDER_BY_VALUES, + SORT_BY_VALUES, + validate_key, + validate_limit, + validate_page, + validate_provider_keys, + validate_reference, + validate_response, + validate_value, +) + + +class ProfileGrading: + def __init__(self, api): + """Init.""" + self.client = api + + @rate_limiter + def get( + self, + algorithm_key: t.Literal[ + "1d07451c0f33091869bd3c6d336dfa4e5c63af74", + "daaa0f61b72a68b985f31d123ad45b361adc91e4", + ], + source_key, + board_key, + profile_key=None, + profile_reference=None, + job_key=None, + job_reference=None, + ): + """ + 💾 Grade a Profile indexed in a Source for a Job + (https://api.hrflow.ai/v1/profile/grading). + + Args: + source_key: + The key of the Source where the profile is indexed. + key: + The Profile unique identifier. + reference: + The Profile reference chosen by the customer. + job_key: + job_reference: + board_key: + + Returns + Get information + + """ + query_params = { + "algorithm_key": algorithm_key, + "source_key": validate_key("Source", source_key, regex=KEY_REGEX), + "profile_key": validate_key("Key", profile_key, regex=KEY_REGEX), + "profile_reference": validate_reference(profile_reference), + "board_key": validate_key("Board", board_key, regex=KEY_REGEX), + "job_key": validate_key("Key", job_key, regex=KEY_REGEX), + "job_reference": validate_reference(job_reference), + } + + response = self.client.get("profile/grading", query_params) + return validate_response(response) diff --git a/hrflow/profile/matching.py b/hrflow/profile/matching.py new file mode 100644 index 0000000..87821fe --- /dev/null +++ b/hrflow/profile/matching.py @@ -0,0 +1,108 @@ +import json + +from ..core.rate_limit import rate_limiter +from ..core.validation import ( + KEY_REGEX, + ORDER_BY_VALUES, + SORT_BY_VALUES, + validate_key, + validate_limit, + validate_page, + validate_provider_keys, + validate_reference, + validate_response, + validate_value, +) + + +class ProfileMatching: + def __init__(self, api): + """Init.""" + self.client = api + + @rate_limiter + def list( + self, + source_key, + profile_key=None, + profile_reference=None, + source_keys=None, + page=1, + limit=30, + sort_by="created_at", + order_by=None, + created_at_min=None, + created_at_max=None, + **kwargs, + ): + """ + Retrieve the matching information. + + Args: + profile_key: + profile_reference: + source_key: + source_keys: + source_keys + limit: (default to 30) + number of fetched profiles/page + page: REQUIRED default to 1 + number of the page associated to the pagination + sort_by: + order_by: + created_at_min: + The minimum date of creation of the targeted Profiles. + Format : "YYYY-MM-DD". + created_at_max: + The maximum date of creation of the targeted Profiles. + Format : "YYYY-MM-DD". + Returns + Applies the params to filter on Profiles in the targeted Sources and + returns the response from the endpoint. + Response examples : + - Success response : + { + "code": 200, # response code + "message": "Profile Matching results", # response message + "meta" : {'page': 1, # current page + 'maxPage': 5, # max page in the paginated response + 'count': 2, # number of profiles in the current page + 'total': 10}, # total number of profiles retrieved + "data": { # list of profiles objects + "predictions":[ + [] + ] + "profiles":[ + { + "key": "xxx", + "reference": "xxx", + ... + }, + ... + ] + } + } + - Error response : (if the source_key is not valid) + { + "code": 400, + "message": "Invalid parameters. Unable to find object: source" + } + + """ + + query_params = { + "source_key": validate_key("Source", source_key, regex=KEY_REGEX), + "profile_key": validate_key("Key", profile_key, regex=KEY_REGEX), + "profile_reference": validate_reference(profile_reference), + "source_keys": json.dumps(validate_provider_keys(source_keys)), + "limit": validate_limit(limit), + "page": validate_page(page), + "sort_by": validate_value(sort_by, SORT_BY_VALUES, "sort by"), + "order_by": validate_value(order_by, ORDER_BY_VALUES, "order by"), + "created_at_min": created_at_min, # TODO validate dates format + "created_at_max": created_at_max, # TODO validate dates format + } + + params = {**query_params, **kwargs} + response = self.client.get("profiles/matching", params) + return validate_response(response) From 48704503601fda306d28d1d9d0892de7bd4c6370 Mon Sep 17 00:00:00 2001 From: Abdellahitech Date: Fri, 30 Aug 2024 12:05:06 +0200 Subject: [PATCH 2/5] fix: formatting after review --- hrflow/job/matching.py | 80 ++++++++++++++++++++----------------- hrflow/profile/grading.py | 42 +++++++++++--------- hrflow/profile/matching.py | 81 ++++++++++++++++++++------------------ 3 files changed, 109 insertions(+), 94 deletions(-) diff --git a/hrflow/job/matching.py b/hrflow/job/matching.py index bf697ce..429aefb 100644 --- a/hrflow/job/matching.py +++ b/hrflow/job/matching.py @@ -36,57 +36,63 @@ def list( **kwargs, ): """ - Retrieve the matching information. + 💾 Match Jobs indexed in Boards to a Job + (https://api.hrflow.ai/v1/jobs/matching). Args: - job_key: - job_reference: board_key: - baord_keys: - limit: (default to 30) - number of fetched jobs/page - page: REQUIRED default to 1 - number of the page associated to the pagination - sort_by: - order_by: - created_at_min: - The minimum date of creation of the targeted Jobs. - Format : "YYYY-MM-DD". - created_at_max: - The maximum date of creation of the targeted Jobs. - Format : "YYYY-MM-DD". - Returns - Applies the params to filter on Jobs in the targeted Boards and - returns the response from the endpoint. - Response examples : - - Success response : + The key of the Board in which the job is indexed. + job_key: + The key of a specific job to macth with. + job_reference: + The reference of a specific job to macth with. + board_keys: + A list of keys for multiple Boards of profiles to be matched with the specific job. + Example : ["xxx", "yyy", "zzz"] + limit: (default to 30) + number of fetched jobs/page + page: REQUIRED default to 1 + number of the page associated to the pagination + sort_by: + order_by: + created_at_min: + The minimum date of creation of the targeted Jobs. + Format: "YYYY-MM-DD". + created_at_max: + The maximum date of creation of the targeted Jobs. + Format: "YYYY-MM-DD". + + Returns: + Match the job identified by job_key or job_reference + and board_key with all jobs in the boards identified by keys in board_keys list. + Response examples: + - Success response: { "code": 200, # response code "message": "Job Matching results", # response message - "meta" : {'page': 1, # current page - 'maxPage': 5, # max page in the paginated response - 'count': 2, # number of jobs in the current page - 'total': 10}, # total number of jobs retrieved - "data": { # list of jobs objects - "predictions":[ - [] - ] - "jobs":[ - { - "key": "xxx", - "reference": "xxx", + "meta" : { + 'page': 1, # current page + 'maxPage': 5, # max page in the paginated response + 'count': 2, # number of jobs in the current page + 'total': 10 # total number of jobs retrieved + }, + "data": { # list of jobs objects + "predictions": [[]], + "jobs": [ + { + "key": "xxx", + "reference": "xxx", + ... + }, ... - }, - ... ] } } - - Error response : (if the board_key is not valid) + - Error response: (if the board_key is not valid) { "code": 400, "message": "Invalid parameters. Unable to find object: source" } - """ query_params = { diff --git a/hrflow/profile/grading.py b/hrflow/profile/grading.py index d1fe079..a55b973 100644 --- a/hrflow/profile/grading.py +++ b/hrflow/profile/grading.py @@ -18,40 +18,44 @@ class ProfileGrading: def __init__(self, api): - """Init.""" + """Initialize the ProfileGrading class with the provided API client.""" self.client = api @rate_limiter def get( self, - algorithm_key: t.Literal[ - "1d07451c0f33091869bd3c6d336dfa4e5c63af74", - "daaa0f61b72a68b985f31d123ad45b361adc91e4", - ], - source_key, - board_key, - profile_key=None, - profile_reference=None, - job_key=None, - job_reference=None, + algorithm_key: str, + source_key: str, + board_key: str, + profile_key: t.Optional[str] = None, + profile_reference: t.Optional[str] = None, + job_key: t.Optional[str] = None, + job_reference: t.Optional[str] = None, ): """ 💾 Grade a Profile indexed in a Source for a Job (https://api.hrflow.ai/v1/profile/grading). Args: + algorithm_key: + The key of the grading algorithm to use. + Refer to the documentation: https://developers.hrflow.ai/reference/grade-a-profile-indexed-in-a-source-for-a-job + for all possible values. source_key: - The key of the Source where the profile is indexed. - key: - The Profile unique identifier. - reference: - The Profile reference chosen by the customer. + The key of the Source where the profile to grade is indexed. + board_key: + The key of the Board where the job to grade to is indexed. + profile_key: + (Optional) The Profile unique identifier. + profile_reference: + (Optional) The Profile reference chosen by the customer. job_key: + (Optional) The Job unique identifier. job_reference: - board_key: + (Optional) The Job reference chosen by the customer. - Returns - Get information + Returns: + The grading information for the profile, based on the specified job. """ query_params = { diff --git a/hrflow/profile/matching.py b/hrflow/profile/matching.py index 87821fe..66ab3fc 100644 --- a/hrflow/profile/matching.py +++ b/hrflow/profile/matching.py @@ -17,7 +17,7 @@ class ProfileMatching: def __init__(self, api): - """Init.""" + """Initialize the ProfileMatching class with the provided API client.""" self.client = api @rate_limiter @@ -36,58 +36,63 @@ def list( **kwargs, ): """ - Retrieve the matching information. + 💾 Match Profils indexed in Sources to a Profile + (https://api.hrflow.ai/v1/profils/matching). Args: - profile_key: - profile_reference: source_key: - source_keys: - source_keys + The key of the Source in which the profile is indexed. + profile_key: (Optional) + The key of a specific profile to macth with. + profile_reference: (Optional) + The reference of a specific profile to macth with. + source_keys: (Optional) + A list of keys for multiple Sources of profiles to be matched with the profile. + page: (default to 1) + The page number for pagination. limit: (default to 30) - number of fetched profiles/page - page: REQUIRED default to 1 - number of the page associated to the pagination - sort_by: - order_by: - created_at_min: - The minimum date of creation of the targeted Profiles. - Format : "YYYY-MM-DD". - created_at_max: - The maximum date of creation of the targeted Profiles. - Format : "YYYY-MM-DD". - Returns - Applies the params to filter on Profiles in the targeted Sources and - returns the response from the endpoint. - Response examples : - - Success response : + Number of profiles to fetch per page. + sort_by: (default to "created_at") + The field to sort by. + order_by: (Optional) + The order of sorting, either 'asc' or 'desc'. + created_at_min: (Optional) + The minimum creation date of the profiles in format "YYYY-MM-DD". + created_at_max: (Optional) + The maximum creation date of the profiles in format "YYYY-MM-DD". + + Returns: + Match the profile identified by profile_key or profile_reference + and board_key with all profiles in the sources identified by keys in source_keys list. + + Response examples: + - Success response: { "code": 200, # response code "message": "Profile Matching results", # response message - "meta" : {'page': 1, # current page - 'maxPage': 5, # max page in the paginated response - 'count': 2, # number of profiles in the current page - 'total': 10}, # total number of profiles retrieved - "data": { # list of profiles objects - "predictions":[ - [] - ] - "profiles":[ - { - "key": "xxx", - "reference": "xxx", + "meta": { + 'page': 1, # current page + 'maxPage': 5, # max page in the paginated response + 'count': 2, # number of profiles in the current page + 'total': 10 # total number of profiles retrieved + }, + "data": { # list of profile objects + "predictions": [[]], + "profiles": [ + { + "key": "xxx", + "reference": "xxx", + ... + }, ... - }, - ... ] } } - - Error response : (if the source_key is not valid) + - Error response: (if the source_key is not valid) { "code": 400, "message": "Invalid parameters. Unable to find object: source" } - """ query_params = { From 17061ea3162a905495b01f68c83d1d43b9d87d3d Mon Sep 17 00:00:00 2001 From: Abdellahitech Date: Fri, 30 Aug 2024 13:00:59 +0200 Subject: [PATCH 3/5] fix: typo in matching profile --- hrflow/profile/matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrflow/profile/matching.py b/hrflow/profile/matching.py index 66ab3fc..d4acd84 100644 --- a/hrflow/profile/matching.py +++ b/hrflow/profile/matching.py @@ -63,7 +63,7 @@ def list( Returns: Match the profile identified by profile_key or profile_reference - and board_key with all profiles in the sources identified by keys in source_keys list. + and source_key with all profiles in the sources identified by keys in source_keys list. Response examples: - Success response: From 875a822b2984f162d1a7d25e0d6b744dac22be69 Mon Sep 17 00:00:00 2001 From: Abdellahitech Date: Tue, 29 Oct 2024 01:56:05 +0100 Subject: [PATCH 4/5] feat : new scoring evaluation notebook --- .../colab/scoring_evaluation_notebook.ipynb | 387 ++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 examples/colab/scoring_evaluation_notebook.ipynb diff --git a/examples/colab/scoring_evaluation_notebook.ipynb b/examples/colab/scoring_evaluation_notebook.ipynb new file mode 100644 index 0000000..660203e --- /dev/null +++ b/examples/colab/scoring_evaluation_notebook.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Overview\n", + "This notebook is made to evaluate the scoring of profiles regarding a specific job or jobs regarding specific profile\n", + "This notebook will be structured as follow:\n", + "* General functions:\n", + " * Get all items\n", + " * Get scoring results\n", + " * Tag item\n", + "* Score profiles for a specific job\n", + "* Score jobs for a specific profile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import json\n", + "import os\n", + "from datetime import datetime\n", + "from tqdm import tqdm\n", + "from hrflow import Hrflow\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "API_SECRET = os.getenv(\"API_SECRET\")\n", + "API_USER = os.getenv(\"API_USER\")\n", + "ALGORITHM = os.getenv(\"ALGORITHM\")\n", + "BOARD_KEY = os.getenv(\"BOARD_KEY\")\n", + "BOARD_KEYS = os.getenv(\"BOARD_KEYS\")\n", + "SOURCE_KEY = os.getenv(\"SOURCE_KEY\")\n", + "SOURCE_KEYS = os.getenv(\"SOURCE_KEYS\")\n", + "OUTPUT_FILE = os.getenv(\"OUTPUT_FILE\")\n", + "LIMIT_SCORING = \"32\"\n", + "LIMIT_SEARCHING = \"10000\"\n", + "ALGORITHM_FAMILY = \"tagger-rome4-family\"\n", + "ALGORITHM_SUBFAMILY = \"tagger-rome4-subfamily\"\n", + "ALGORITHM_CATEGORY = \"tagger-rome4-category\"\n", + "ALGORITHM_JOB_TITLE = \"tagger-rome4-jobtitle\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SOURCE_KEYS=json.loads(SOURCE_KEYS)\n", + "BOARD_KEYS=json.loads(BOARD_KEYS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = Hrflow(api_secret=API_SECRET,api_user=API_USER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Function to score item based on source or board of items\n", + "def get_scoring_items(client, item, source_keys=None, board_keys=None):\n", + " if source_keys:\n", + " response_scoring = client.profile.scoring.list(\n", + " job_key=item[\"key\"],\n", + " board_key=BOARD_KEY,\n", + " source_keys=source_keys,\n", + " limit=LIMIT_SCORING,\n", + " agent_key=ALGORITHM\n", + " )\n", + " else:\n", + " assert board_keys is not None\n", + " response_scoring = client.job.scoring.list(\n", + " profile_key=item[\"key\"],\n", + " source_key=SOURCE_KEY,\n", + " board_keys=board_keys,\n", + " limit=LIMIT_SCORING,\n", + " agent_key=ALGORITHM\n", + " )\n", + " \n", + " if response_scoring[\"code\"] != 200:\n", + " print(\"error while returning scoring:\", response_scoring)\n", + " return\n", + " \n", + " scored_items = response_scoring[\"data\"][\"profiles\"] if source_keys else response_scoring[\"data\"][\"jobs\"]\n", + "\n", + " scores = [prediction[1] for prediction in response_scoring[\"data\"][\"predictions\"]]\n", + " \n", + " return item, scored_items, scores\n", + "\n", + "# get items is sources or boards\n", + "def get_items_searching(\n", + " client,source_keys=None,board_keys=None\n", + "):\n", + " if source_keys:\n", + " response_searching = client.profile.searching.list(\n", + " source_keys=source_keys,\n", + " limit=LIMIT_SEARCHING,\n", + " order_by=\"desc\"\n", + " )\n", + " else:\n", + " assert board_keys is not None\n", + " response_searching = client.job.searching.list(\n", + " board_keys=board_keys,\n", + " limit=LIMIT_SEARCHING,\n", + " order_by=\"desc\"\n", + " )\n", + " \n", + " if response_searching[\"code\"] != 200:\n", + " print(\"error while returning searching:\", response_searching)\n", + " return\n", + " \n", + " searched_items = response_searching[\"data\"][\"profiles\"] if source_keys else response_searching[\"data\"][\"jobs\"]\n", + " return searched_items\n", + "\n", + "## function to tag items\n", + "def tagger_romev4(text, algorithm):\n", + " url = \"https://api.hrflow.ai/v1/text/tagging\"\n", + "\n", + " payload = {\n", + " \"algorithm_key\": algorithm,\n", + " \"texts\": [text],\n", + " \"top_n\": 1,\n", + " }\n", + " headers = {\n", + " \"accept\": \"application/json\",\n", + " \"content-type\": \"application/json\",\n", + " \"X-API-KEY\": API_SECRET,\n", + " \"X-USER-EMAIL\": API_USER,\n", + " }\n", + "\n", + " response = requests.post(url, json=payload, headers=headers)\n", + " if response.status_code != 200:\n", + " print(f\"HTTP error: {response.text}\")\n", + " return None\n", + "\n", + " response_data = response.json()\n", + " \n", + " data = response_data.get(\"data\")\n", + " if data and isinstance(data[0], dict):\n", + " tags = data[0].get(\"tags\")\n", + " if tags and isinstance(tags, list):\n", + " return tags[0] if tags else None\n", + " \n", + " return None\n", + "\n", + "def format_date(date_str: str) -> str:\n", + " if date_str:\n", + " return datetime.strptime(date_str, \"%Y-%m-%dT%H:%M:%S+0000\").strftime(\"%Y-%m-%d\")\n", + " return None\n", + "\n", + "def categorize_scores(scores):\n", + " star_count = {\n", + " \"5 stars\": sum(0.8 <= score <= 1 for score in scores),\n", + " \"4 stars\": sum(0.6 <= score < 0.8 for score in scores),\n", + " \"3 stars\": sum(0.4 <= score < 0.6 for score in scores),\n", + " \"2 stars\": sum(0.2 <= score < 0.4 for score in scores),\n", + " \"1 star\": sum(0 <= score < 0.2 for score in scores),\n", + " }\n", + " return star_count\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Score profiles for a specific job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "jobs = get_items_searching(client,board_keys=BOARD_KEYS)\n", + "for job in tqdm(jobs):\n", + " sections_desc = \"\\n\".join([section[\"description\"] for section in job[\"sections\"]])\n", + " \n", + " tags_data = {\n", + " \"family\": tagger_romev4(sections_desc, ALGORITHM_FAMILY),\n", + " \"subfamily\": tagger_romev4(sections_desc, ALGORITHM_SUBFAMILY),\n", + " \"category\": tagger_romev4(sections_desc, ALGORITHM_CATEGORY),\n", + " \"job_title\": tagger_romev4(sections_desc, ALGORITHM_JOB_TITLE),\n", + " }\n", + "\n", + " job[\"tags\"].extend(\n", + " [{\"name\": f\"hrflow_tag_romev4_{key}\", \"value\": value} for key, value in tags_data.items() if value]\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoring_job_result = []\n", + "for job in tqdm(jobs):\n", + " scoring_result = get_scoring_items(client,job,source_keys=SOURCE_KEYS)\n", + " scoring_job_result.append(scoring_result)\n", + "\n", + "rows = []\n", + "for job, _, scores in scoring_job_result:\n", + " star_count = categorize_scores(scores)\n", + " tags = job[\"tags\"]\n", + "\n", + " sections_desc = \"\\n\".join([section[\"description\"] for section in job[\"sections\"]])\n", + " len_offres = len(sections_desc)\n", + " \n", + " family = None\n", + " subfamily = None\n", + " category = None\n", + " job_title = None\n", + " \n", + " for tag in tags:\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_family\":\n", + " family = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_subfamily\":\n", + " subfamily = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_category\":\n", + " category = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_job_title\":\n", + " job_title = tag[\"value\"]\n", + " \n", + " rows.append({\n", + " \"Nom\": job[\"name\"],\n", + " \"Reference\": job[\"reference\"],\n", + " \"Date de création\" : format_date(job[\"created_at\"]),\n", + " \"Localisation\" : job[\"location\"][\"text\"],\n", + " \"Nombre de caractères de l'offre\": len_offres ,\n", + " \"Nombre de profils ayant 5 étoiles\": star_count[\"5 stars\"],\n", + " \"Nombre de profils ayant 4 étoiles\": star_count[\"4 stars\"],\n", + " \"Nombre de profils ayant 3 étoiles\": star_count[\"3 stars\"],\n", + " \"Nombre de profils ayant 2 étoiles\": star_count[\"2 stars\"],\n", + " \"Nombre de profils ayant 1 étoiles\": star_count[\"1 star\"],\n", + " \"Tagger romev4 family\": family,\n", + " \"Tagger romev4 subfamily\": subfamily,\n", + " \"Tagger romev4 category\": category,\n", + " \"Tagger romev4 job title\": job_title,\n", + " })\n", + "\n", + "df = pd.DataFrame(rows)\n", + "\n", + "\n", + "df.to_excel(OUTPUT_FILE, index=False)\n", + "\n", + "print(f\"Excel file '{OUTPUT_FILE}' generated successfully.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Score jobs for a specific profile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "profiles = get_items_searching(client,source_keys=SOURCE_KEYS)\n", + "for profile in tqdm(profiles):\n", + " tags_data = {\n", + " \"family\": tagger_romev4(profile[\"text\"], ALGORITHM_FAMILY),\n", + " \"subfamily\": tagger_romev4(profile[\"text\"], ALGORITHM_SUBFAMILY),\n", + " \"category\": tagger_romev4(profile[\"text\"], ALGORITHM_CATEGORY),\n", + " \"job_title\": tagger_romev4(profile[\"text\"], ALGORITHM_JOB_TITLE),\n", + " }\n", + "\n", + " profile[\"tags\"].extend(\n", + " [{\"name\": f\"hrflow_tag_romev4_{key}\", \"value\": value} for key, value in tags_data.items() if value]\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scoring_profile_result = []\n", + "for profile in tqdm(profiles):\n", + " scoring_result = get_scoring_items(client,profile,board_keys=BOARD_KEYS)\n", + " scoring_profile_result.append(scoring_result)\n", + "\n", + "rows = []\n", + "for profile, _, scores in scoring_profile_result:\n", + " star_count = categorize_scores(scores)\n", + " tags = profile[\"tags\"]\n", + "\n", + " family = None\n", + " subfamily = None\n", + " category = None\n", + " job_title = None\n", + " \n", + " for tag in tags:\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_family\":\n", + " family = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_subfamily\":\n", + " subfamily = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_category\":\n", + " category = tag[\"value\"]\n", + " if tag[\"name\"] == \"hrflow_tag_romev4_job_title\":\n", + " job_title = tag[\"value\"]\n", + " \n", + " rows.append({\n", + " \"Nom\": profile[\"name\"],\n", + " \"Reference\": profile[\"reference\"],\n", + " \"Date de reception\" : format_date(profile[\"created_at\"]),\n", + " \"Localisation\" : profile[\"location\"][\"text\"],\n", + " \"Nombre de caractères de l'offre\": len_offres ,\n", + " \"Nombre de jobs ayant 5 étoiles\": star_count[\"5 stars\"],\n", + " \"Nombre de jobs ayant 4 étoiles\": star_count[\"4 stars\"],\n", + " \"Nombre de jobs ayant 3 étoiles\": star_count[\"3 stars\"],\n", + " \"Nombre de jobs ayant 2 étoiles\": star_count[\"2 stars\"],\n", + " \"Nombre de jobs ayant 1 étoiles\": star_count[\"1 star\"],\n", + " \"Tagger romev4 family\": family,\n", + " \"Tagger romev4 subfamily\": subfamily,\n", + " \"Tagger romev4 category\": category,\n", + " \"Tagger romev4 job title\": job_title,\n", + " })\n", + "\n", + "df = pd.DataFrame(rows)\n", + "\n", + "\n", + "df.to_excel(OUTPUT_FILE, index=False)\n", + "\n", + "print(f\"Excel file '{OUTPUT_FILE}' generated successfully.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "customers_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 3c2752ce6c40df7d9bc840f21ff1f31c4f622afa Mon Sep 17 00:00:00 2001 From: Abdellahitech Date: Tue, 29 Oct 2024 01:58:50 +0100 Subject: [PATCH 5/5] fix : wrong branch --- .../colab/scoring_evaluation_notebook.ipynb | 387 ------------------ 1 file changed, 387 deletions(-) delete mode 100644 examples/colab/scoring_evaluation_notebook.ipynb diff --git a/examples/colab/scoring_evaluation_notebook.ipynb b/examples/colab/scoring_evaluation_notebook.ipynb deleted file mode 100644 index 660203e..0000000 --- a/examples/colab/scoring_evaluation_notebook.ipynb +++ /dev/null @@ -1,387 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Overview\n", - "This notebook is made to evaluate the scoring of profiles regarding a specific job or jobs regarding specific profile\n", - "This notebook will be structured as follow:\n", - "* General functions:\n", - " * Get all items\n", - " * Get scoring results\n", - " * Tag item\n", - "* Score profiles for a specific job\n", - "* Score jobs for a specific profile" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "import pandas as pd\n", - "import json\n", - "import os\n", - "from datetime import datetime\n", - "from tqdm import tqdm\n", - "from hrflow import Hrflow\n", - "from dotenv import load_dotenv" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "load_dotenv()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "API_SECRET = os.getenv(\"API_SECRET\")\n", - "API_USER = os.getenv(\"API_USER\")\n", - "ALGORITHM = os.getenv(\"ALGORITHM\")\n", - "BOARD_KEY = os.getenv(\"BOARD_KEY\")\n", - "BOARD_KEYS = os.getenv(\"BOARD_KEYS\")\n", - "SOURCE_KEY = os.getenv(\"SOURCE_KEY\")\n", - "SOURCE_KEYS = os.getenv(\"SOURCE_KEYS\")\n", - "OUTPUT_FILE = os.getenv(\"OUTPUT_FILE\")\n", - "LIMIT_SCORING = \"32\"\n", - "LIMIT_SEARCHING = \"10000\"\n", - "ALGORITHM_FAMILY = \"tagger-rome4-family\"\n", - "ALGORITHM_SUBFAMILY = \"tagger-rome4-subfamily\"\n", - "ALGORITHM_CATEGORY = \"tagger-rome4-category\"\n", - "ALGORITHM_JOB_TITLE = \"tagger-rome4-jobtitle\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SOURCE_KEYS=json.loads(SOURCE_KEYS)\n", - "BOARD_KEYS=json.loads(BOARD_KEYS)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "client = Hrflow(api_secret=API_SECRET,api_user=API_USER)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## Function to score item based on source or board of items\n", - "def get_scoring_items(client, item, source_keys=None, board_keys=None):\n", - " if source_keys:\n", - " response_scoring = client.profile.scoring.list(\n", - " job_key=item[\"key\"],\n", - " board_key=BOARD_KEY,\n", - " source_keys=source_keys,\n", - " limit=LIMIT_SCORING,\n", - " agent_key=ALGORITHM\n", - " )\n", - " else:\n", - " assert board_keys is not None\n", - " response_scoring = client.job.scoring.list(\n", - " profile_key=item[\"key\"],\n", - " source_key=SOURCE_KEY,\n", - " board_keys=board_keys,\n", - " limit=LIMIT_SCORING,\n", - " agent_key=ALGORITHM\n", - " )\n", - " \n", - " if response_scoring[\"code\"] != 200:\n", - " print(\"error while returning scoring:\", response_scoring)\n", - " return\n", - " \n", - " scored_items = response_scoring[\"data\"][\"profiles\"] if source_keys else response_scoring[\"data\"][\"jobs\"]\n", - "\n", - " scores = [prediction[1] for prediction in response_scoring[\"data\"][\"predictions\"]]\n", - " \n", - " return item, scored_items, scores\n", - "\n", - "# get items is sources or boards\n", - "def get_items_searching(\n", - " client,source_keys=None,board_keys=None\n", - "):\n", - " if source_keys:\n", - " response_searching = client.profile.searching.list(\n", - " source_keys=source_keys,\n", - " limit=LIMIT_SEARCHING,\n", - " order_by=\"desc\"\n", - " )\n", - " else:\n", - " assert board_keys is not None\n", - " response_searching = client.job.searching.list(\n", - " board_keys=board_keys,\n", - " limit=LIMIT_SEARCHING,\n", - " order_by=\"desc\"\n", - " )\n", - " \n", - " if response_searching[\"code\"] != 200:\n", - " print(\"error while returning searching:\", response_searching)\n", - " return\n", - " \n", - " searched_items = response_searching[\"data\"][\"profiles\"] if source_keys else response_searching[\"data\"][\"jobs\"]\n", - " return searched_items\n", - "\n", - "## function to tag items\n", - "def tagger_romev4(text, algorithm):\n", - " url = \"https://api.hrflow.ai/v1/text/tagging\"\n", - "\n", - " payload = {\n", - " \"algorithm_key\": algorithm,\n", - " \"texts\": [text],\n", - " \"top_n\": 1,\n", - " }\n", - " headers = {\n", - " \"accept\": \"application/json\",\n", - " \"content-type\": \"application/json\",\n", - " \"X-API-KEY\": API_SECRET,\n", - " \"X-USER-EMAIL\": API_USER,\n", - " }\n", - "\n", - " response = requests.post(url, json=payload, headers=headers)\n", - " if response.status_code != 200:\n", - " print(f\"HTTP error: {response.text}\")\n", - " return None\n", - "\n", - " response_data = response.json()\n", - " \n", - " data = response_data.get(\"data\")\n", - " if data and isinstance(data[0], dict):\n", - " tags = data[0].get(\"tags\")\n", - " if tags and isinstance(tags, list):\n", - " return tags[0] if tags else None\n", - " \n", - " return None\n", - "\n", - "def format_date(date_str: str) -> str:\n", - " if date_str:\n", - " return datetime.strptime(date_str, \"%Y-%m-%dT%H:%M:%S+0000\").strftime(\"%Y-%m-%d\")\n", - " return None\n", - "\n", - "def categorize_scores(scores):\n", - " star_count = {\n", - " \"5 stars\": sum(0.8 <= score <= 1 for score in scores),\n", - " \"4 stars\": sum(0.6 <= score < 0.8 for score in scores),\n", - " \"3 stars\": sum(0.4 <= score < 0.6 for score in scores),\n", - " \"2 stars\": sum(0.2 <= score < 0.4 for score in scores),\n", - " \"1 star\": sum(0 <= score < 0.2 for score in scores),\n", - " }\n", - " return star_count\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Score profiles for a specific job" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "jobs = get_items_searching(client,board_keys=BOARD_KEYS)\n", - "for job in tqdm(jobs):\n", - " sections_desc = \"\\n\".join([section[\"description\"] for section in job[\"sections\"]])\n", - " \n", - " tags_data = {\n", - " \"family\": tagger_romev4(sections_desc, ALGORITHM_FAMILY),\n", - " \"subfamily\": tagger_romev4(sections_desc, ALGORITHM_SUBFAMILY),\n", - " \"category\": tagger_romev4(sections_desc, ALGORITHM_CATEGORY),\n", - " \"job_title\": tagger_romev4(sections_desc, ALGORITHM_JOB_TITLE),\n", - " }\n", - "\n", - " job[\"tags\"].extend(\n", - " [{\"name\": f\"hrflow_tag_romev4_{key}\", \"value\": value} for key, value in tags_data.items() if value]\n", - " )\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_job_result = []\n", - "for job in tqdm(jobs):\n", - " scoring_result = get_scoring_items(client,job,source_keys=SOURCE_KEYS)\n", - " scoring_job_result.append(scoring_result)\n", - "\n", - "rows = []\n", - "for job, _, scores in scoring_job_result:\n", - " star_count = categorize_scores(scores)\n", - " tags = job[\"tags\"]\n", - "\n", - " sections_desc = \"\\n\".join([section[\"description\"] for section in job[\"sections\"]])\n", - " len_offres = len(sections_desc)\n", - " \n", - " family = None\n", - " subfamily = None\n", - " category = None\n", - " job_title = None\n", - " \n", - " for tag in tags:\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_family\":\n", - " family = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_subfamily\":\n", - " subfamily = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_category\":\n", - " category = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_job_title\":\n", - " job_title = tag[\"value\"]\n", - " \n", - " rows.append({\n", - " \"Nom\": job[\"name\"],\n", - " \"Reference\": job[\"reference\"],\n", - " \"Date de création\" : format_date(job[\"created_at\"]),\n", - " \"Localisation\" : job[\"location\"][\"text\"],\n", - " \"Nombre de caractères de l'offre\": len_offres ,\n", - " \"Nombre de profils ayant 5 étoiles\": star_count[\"5 stars\"],\n", - " \"Nombre de profils ayant 4 étoiles\": star_count[\"4 stars\"],\n", - " \"Nombre de profils ayant 3 étoiles\": star_count[\"3 stars\"],\n", - " \"Nombre de profils ayant 2 étoiles\": star_count[\"2 stars\"],\n", - " \"Nombre de profils ayant 1 étoiles\": star_count[\"1 star\"],\n", - " \"Tagger romev4 family\": family,\n", - " \"Tagger romev4 subfamily\": subfamily,\n", - " \"Tagger romev4 category\": category,\n", - " \"Tagger romev4 job title\": job_title,\n", - " })\n", - "\n", - "df = pd.DataFrame(rows)\n", - "\n", - "\n", - "df.to_excel(OUTPUT_FILE, index=False)\n", - "\n", - "print(f\"Excel file '{OUTPUT_FILE}' generated successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Score jobs for a specific profile" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "profiles = get_items_searching(client,source_keys=SOURCE_KEYS)\n", - "for profile in tqdm(profiles):\n", - " tags_data = {\n", - " \"family\": tagger_romev4(profile[\"text\"], ALGORITHM_FAMILY),\n", - " \"subfamily\": tagger_romev4(profile[\"text\"], ALGORITHM_SUBFAMILY),\n", - " \"category\": tagger_romev4(profile[\"text\"], ALGORITHM_CATEGORY),\n", - " \"job_title\": tagger_romev4(profile[\"text\"], ALGORITHM_JOB_TITLE),\n", - " }\n", - "\n", - " profile[\"tags\"].extend(\n", - " [{\"name\": f\"hrflow_tag_romev4_{key}\", \"value\": value} for key, value in tags_data.items() if value]\n", - " )\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scoring_profile_result = []\n", - "for profile in tqdm(profiles):\n", - " scoring_result = get_scoring_items(client,profile,board_keys=BOARD_KEYS)\n", - " scoring_profile_result.append(scoring_result)\n", - "\n", - "rows = []\n", - "for profile, _, scores in scoring_profile_result:\n", - " star_count = categorize_scores(scores)\n", - " tags = profile[\"tags\"]\n", - "\n", - " family = None\n", - " subfamily = None\n", - " category = None\n", - " job_title = None\n", - " \n", - " for tag in tags:\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_family\":\n", - " family = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_subfamily\":\n", - " subfamily = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_category\":\n", - " category = tag[\"value\"]\n", - " if tag[\"name\"] == \"hrflow_tag_romev4_job_title\":\n", - " job_title = tag[\"value\"]\n", - " \n", - " rows.append({\n", - " \"Nom\": profile[\"name\"],\n", - " \"Reference\": profile[\"reference\"],\n", - " \"Date de reception\" : format_date(profile[\"created_at\"]),\n", - " \"Localisation\" : profile[\"location\"][\"text\"],\n", - " \"Nombre de caractères de l'offre\": len_offres ,\n", - " \"Nombre de jobs ayant 5 étoiles\": star_count[\"5 stars\"],\n", - " \"Nombre de jobs ayant 4 étoiles\": star_count[\"4 stars\"],\n", - " \"Nombre de jobs ayant 3 étoiles\": star_count[\"3 stars\"],\n", - " \"Nombre de jobs ayant 2 étoiles\": star_count[\"2 stars\"],\n", - " \"Nombre de jobs ayant 1 étoiles\": star_count[\"1 star\"],\n", - " \"Tagger romev4 family\": family,\n", - " \"Tagger romev4 subfamily\": subfamily,\n", - " \"Tagger romev4 category\": category,\n", - " \"Tagger romev4 job title\": job_title,\n", - " })\n", - "\n", - "df = pd.DataFrame(rows)\n", - "\n", - "\n", - "df.to_excel(OUTPUT_FILE, index=False)\n", - "\n", - "print(f\"Excel file '{OUTPUT_FILE}' generated successfully.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "customers_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}