Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@

gspread==6.1.2
hubspot-api-client==8.2.1
notion-client==2.2.1
numpy==1.26.4
pandas==1.3.4
pyodbc==5.1.0
pytest==8.3.2
python-slugify==8.0.4
requests==2.32.3


Empty file.
88 changes: 88 additions & 0 deletions wherescape/connectors/gsheet/create_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import logging
from datetime import datetime, UTC

from ...helper_functions import create_column_names, create_display_names, prepare_metadata_query
from ...wherescape import WhereScape
from .gsheets_wrapper import Gsheet
from .gsheets_parsing import parse_gspread_arguments


def python_gsheet_create_metadata():
"""
Function that creates a load table in Wherescape based on the data
in a provided Google sheet file.
"""
start_time = datetime.now(tz=UTC)
# Initialize Wherescape
logging.info("Connecting to WhereScape")
wherescape_instance = WhereScape()
logging.info(
"Start time: %s for gsheet_load_data_os." % start_time.strftime("%Y-%m-%d %H:%M:%S")
)

load_table_name = wherescape_instance.table
url = wherescape_instance.query_meta(
"select lt_file_path from ws_load_tab where lt_table_name = ?",
[load_table_name],
)[0][0]
workbook_details = wherescape_instance.query_meta(
"select lt_file_name from ws_load_tab where lt_table_name = ?",
[load_table_name],
)[0][0]
logging.info(f"Metadata. URL: {url} ; Details : {workbook_details}")

args = parse_gspread_arguments(workbook_details)
if args.debug:
logging.warning("Debug mode on -> do not use for production.")

gsheet = Gsheet(args, url)

header_row = gsheet.get_header()
column_types = gsheet.get_column_types()
title = gsheet.get_worksheet().title
lt_obj_key = wherescape_instance.object_key

display_names = create_display_names(header_row)
column_names = create_column_names(header_row)
source_columns, comments = set_source_columns_and_comments(header_row)

sql = prepare_metadata_query(
lt_obj_key = lt_obj_key,
src_table_name = title,
columns=column_names,
display_names=display_names,
types=column_types,
comments=comments,
source_columns=source_columns,
)
logging.info(f"Stored details for {len(header_row)} columns")

wherescape_instance.push_to_meta(sql)
logging.info("--> Metadata updated. Table can be created.")

end_time = datetime.now(tz=UTC)
logging.info("End time: %s" % end_time.strftime("%Y-%m-%d %H:%M:%S"))
logging.info("Time elapsed: %s seconds" % (end_time - start_time).seconds)


def set_source_columns_and_comments(header_row: list):
"""
Fuction to determine source_column and comments for metadata.

Params:
header_row (list): header values.

Returns:
- list: source_column values.
- list: comment values.
"""
comments = []
source_columns = []

for value in header_row:
src_column_name = value.rstrip()

comments.append(src_column_name[0:1023].replace("'", "''"))
source_columns.append(src_column_name)

return source_columns, comments
81 changes: 81 additions & 0 deletions wherescape/connectors/gsheet/gsheets_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import argparse
import logging
import shlex

from gspread.utils import a1_range_to_grid_range


def parse_gspread_arguments(argument: str) -> argparse.Namespace:
"""
Converts an argument string into args object.

Parameters:
- argument (str): arguments for the parser collected in a string.

Returns
- args (Namespace): object with all arguments provided stored within.
"""
if argument == "":
logging.info("No arguments provided. Using defaults.")

argument_list = shlex.split(argument)

parser = create_parser()

try:
args = parser.parse_args(argument_list)
except SystemExit as ex:
logging.warning("There might be a mistake with the arguments. Ensure it's all correct.")
logging.error(ex)

if args.range:
args.range = args.range.upper()
if args.header_range:
args.header_range = args.header_range.upper()

logging.info(
f"workbook_name: {args.workbook_name}, sheet: {args.sheet}, range: {args.range}, hr: {args.header_range}, no_header: {str(args.no_header)}, debug: {args.debug}"
)

if args.header_range and args.no_header:
logging.error(
"You cannot specify both a header_range and --no_header in the object source File Name."
)
if args.header_range and not args.range:
logging.error(
"A --header_range can not be specified without specifying a --range."
)

if args.header_range and args.range:
row_index_header_range = a1_range_to_grid_range(args.header_range).get(
"startRowIndex"
)
row_index_range = a1_range_to_grid_range(args.range).get("startRowIndex")
if row_index_header_range != row_index_range:
logging.warning(
"If both a range and a header_range are specified, they should overlap."
)
return args


def create_parser():
"""
Method to create parser with arguments for workbook_details.

Return:
- parser containing possible args.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"workbook_name", help="Name of the Google Sheet/ workbook", default=None
) # positional argument
parser.add_argument("--sheet", help="Name of the sheet in the workbook")
parser.add_argument("--range", help="Cell range to retrieve")
parser.add_argument("--header_range", help="Cell range to be used as header")
parser.add_argument(
"--no_header", action="store_true", help="Specify if table has no header"
)
parser.add_argument(
"-d", "--debug", action="store_true", help="Print debug messages"
)
return parser
Loading