|
| 1 | +import functions_framework |
| 2 | +import os |
| 3 | +import re |
| 4 | +import json |
| 5 | +import hashlib |
| 6 | +import csv |
| 7 | +import json |
| 8 | +import threading |
| 9 | +import requests |
| 10 | +import pandas as pd |
| 11 | +import threading |
| 12 | +from io import StringIO |
| 13 | +from google.cloud import storage |
| 14 | + |
| 15 | + |
| 16 | + |
| 17 | +@functions_framework.http |
| 18 | +def create_json_file(request): |
| 19 | + request_json = request.get_json(silent=True) |
| 20 | + mapperid = request_json["mapperid"] |
| 21 | + |
| 22 | + client = storage.Client.from_service_account_json('piyush-chaudhari-fall2023-9ae1ed20a7f3.json') |
| 23 | + mapper_bucket_name = "mapper_bucket" |
| 24 | + mapper_bucket = client.get_bucket(mapper_bucket_name) |
| 25 | + |
| 26 | + aggregated_dict = dict() |
| 27 | + folder_name = f"mapper{mapperid}" # mapper folder name |
| 28 | + file_name = f"mapper{mapperid}.csv" # individual mapper csv file |
| 29 | + file_path = f"{folder_name}/{file_name}" |
| 30 | + # print('file_path:', file_path) |
| 31 | + blob = mapper_bucket.blob(file_path) |
| 32 | + |
| 33 | + |
| 34 | + # Download the content of the file as text |
| 35 | + content_text = blob.download_as_text() |
| 36 | + |
| 37 | + # Use pandas to read the CSV from the string |
| 38 | + csv_data = StringIO(content_text) |
| 39 | + df = pd.read_csv(csv_data) |
| 40 | + |
| 41 | + # Iterate through the DataFrame and create a list of tuples |
| 42 | + tuples_list = [tuple(row) for _, row in df.iterrows()] |
| 43 | + |
| 44 | + for word, filename, count in tuples_list: |
| 45 | + key = word |
| 46 | + if key in aggregated_dict: |
| 47 | + # check if file name exists |
| 48 | + if filename in aggregated_dict[key]: |
| 49 | + aggregated_dict[key][filename].append(count) |
| 50 | + else: |
| 51 | + aggregated_dict[key][filename] = [count] |
| 52 | + else: |
| 53 | + aggregated_dict[key] = {filename : [count]} |
| 54 | + |
| 55 | + |
| 56 | + json_file_path = f'mapper{mapperid}.json' |
| 57 | + |
| 58 | + # Create a blob (file) in the specified folder |
| 59 | + blob = mapper_bucket.blob(f"{folder_name}/{json_file_path}") |
| 60 | + # Convert the JSON data to a string |
| 61 | + json_string = json.dumps(aggregated_dict, indent=4) |
| 62 | + # Upload the JSON data to the specified file in Google Cloud Storage |
| 63 | + blob.upload_from_string(json_string, content_type="application/json") |
| 64 | + |
| 65 | + |
| 66 | + print(f"create_json_file OK - {mapperid}") |
| 67 | + return f"create_json_file OK - {mapperid}" |
| 68 | + |
| 69 | + |
| 70 | + |
| 71 | + |
| 72 | + |
0 commit comments