Skip to content

Commit b05a0b6

Browse files
committed
Main Mapreduce program - release - v1.0
1 parent bba3b08 commit b05a0b6

File tree

6 files changed

+248
-0
lines changed

6 files changed

+248
-0
lines changed

main/.DS_Store

6 KB
Binary file not shown.

main/Dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM python:3.8
2+
EXPOSE 8080
3+
WORKDIR /app
4+
COPY . ./
5+
RUN pip install -r requirements.txt
6+
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8080", "--server.address=0.0.0.0"]

main/app.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import streamlit as st
2+
import re
3+
import json
4+
import json
5+
import pandas as pd
6+
from google.cloud import storage
7+
import pandas as pd
8+
import time
9+
from datetime import datetime, timezone
10+
11+
12+
# Import CSS
13+
st.markdown('<link rel="stylesheet" href="style.css">', unsafe_allow_html=True)
14+
15+
def is_file_updated_recently(threshold_seconds):
16+
client = storage.Client.from_service_account_json('piyush-chaudhari-fall2023-9ae1ed20a7f3.json')
17+
eccmr_final_result_bucket_name = 'eccmr_final_result_bucket'
18+
eccmr_final_result_bucket = client.get_bucket(eccmr_final_result_bucket_name)
19+
file_name = "final_results.json"
20+
file_path = f"{file_name}"
21+
blob = eccmr_final_result_bucket.blob(file_path)
22+
23+
# Check if the file exists
24+
if not blob.exists():
25+
print(f"File '{file_path}' does not exist.")
26+
return False
27+
28+
# Retrieve the metadata
29+
blob.reload()
30+
# Check if 'updated' is not None
31+
if blob.updated is None:
32+
print(f"File '{file_path}' has no 'updated' timestamp.")
33+
return False
34+
35+
# Make both datetimes timezone-aware
36+
updated_time = blob.updated.replace(tzinfo=timezone.utc)
37+
current_time = datetime.utcnow().replace(tzinfo=timezone.utc)
38+
time_difference = current_time - updated_time
39+
40+
return time_difference.total_seconds() <= threshold_seconds
41+
42+
43+
def is_valid_input(text):
44+
return bool(re.match(r'^[^\s\n]+$', text))
45+
46+
def find_results(input_text):
47+
if not is_valid_input(input_text):
48+
st.warning("Invalid input! Please avoid spaces and newline characters.")
49+
return None
50+
51+
client = storage.Client.from_service_account_json('piyush-chaudhari-fall2023-9ae1ed20a7f3.json')
52+
eccmr_final_result_bucket_name = 'eccmr_final_result_bucket'
53+
eccmr_final_result_bucket = client.get_bucket(eccmr_final_result_bucket_name)
54+
file_name = "final_results.json"
55+
file_path = f"{file_name}"
56+
# print('file_path:', file_path)
57+
blob = eccmr_final_result_bucket.blob(file_path)
58+
if not blob.exists():
59+
print("result file does not exists.")
60+
return None
61+
62+
# Download the content of the file as text
63+
content_text = blob.download_as_text()
64+
json_object = json.loads(content_text)
65+
66+
# preprocess the input word
67+
input_text = input_text.strip().lower()
68+
input_text = re.sub(r'[^a-zA-Z0-9\s]', '', input_text)
69+
print("processed input text:", input_text)
70+
df = None
71+
72+
if input_text in json_object.keys():
73+
df = pd.DataFrame(list(json_object[input_text].items()), columns=['Document', 'Count'])
74+
# Sorting the DataFrame by the second column in descending order
75+
df = df.sort_values(by='Count', ascending=False)
76+
# Reindexing the sorted DataFrame
77+
df = df.reset_index(drop=True)
78+
79+
return df
80+
81+
def save_uploaded_file(uploaded_file):
82+
if uploaded_file is not None:
83+
file_path = uploaded_file.name
84+
with open(file_path, "wb") as f:
85+
f.write(uploaded_file.getvalue())
86+
87+
# uploading file in eccrm_dataset_bucket
88+
client = storage.Client.from_service_account_json('piyush-chaudhari-fall2023-9ae1ed20a7f3.json')
89+
eccrm_dataset_bucket_name = 'eccrm_dataset_bucket'
90+
eccrm_dataset_bucket = client.get_bucket(eccrm_dataset_bucket_name)
91+
92+
# Define the file name and destination folder in the bucket
93+
file_name = uploaded_file.name.split("/")[-1]
94+
folder_name = "dataset"
95+
destination_blob_name = f"{folder_name}/{file_name}"
96+
97+
# Upload the file to GCS
98+
blob = eccrm_dataset_bucket.blob(destination_blob_name)
99+
blob.upload_from_filename(file_path)
100+
101+
return f"File uploaded successfully."
102+
return None
103+
104+
def main():
105+
st.title("Map Reduce Assignment (Engineering Cloud Computing)")
106+
tabs = ["Find Occurances", "Upload File"]
107+
choice = st.sidebar.selectbox("Select Tab:", tabs)
108+
109+
if choice == tabs[0]:
110+
st.header(tabs[0])
111+
112+
# Tab 1 Content
113+
input_text = st.text_area("Enter Text:", placeholder="Enter the word ...", help="Enter word that you want to search in corpus and its occurances in each document")
114+
115+
if st.button("Find"):
116+
if is_valid_input(input_text):
117+
with st.spinner("Finding results..."):
118+
result_df = find_results(input_text)
119+
if result_df is not None:
120+
st.dataframe(result_df, use_container_width=True)
121+
else:
122+
st.info("Word not present in corpus.")
123+
else:
124+
st.warning("Invalid input! Please avoid spaces and newline characters.")
125+
126+
elif choice == tabs[1]:
127+
st.header(tabs[1])
128+
129+
# Tab 2 Content
130+
uploaded_file_key = "uploaded_file_key"
131+
uploaded_file = st.file_uploader("Choose a .txt file:", type=["txt"])
132+
if uploaded_file is not None:
133+
if st.button("Upload"):
134+
with st.spinner("Uploading file..."):
135+
upload_result = save_uploaded_file(uploaded_file)
136+
st.success(upload_result)
137+
138+
with st.spinner("Updating the inverted index..."):
139+
loop_interval = 1
140+
threshold_seconds = 20
141+
while True:
142+
if is_file_updated_recently(threshold_seconds):
143+
break
144+
time.sleep(loop_interval)
145+
st.success("Indexing complete. Thank you for waiting.")
146+
# Display countdown message and refresh the page
147+
refresh_in = 6
148+
countdown_placeholder = st.empty()
149+
for i in range(refresh_in, 0, -1):
150+
countdown_placeholder.text(f"Refreshing the page in {i} seconds.")
151+
time.sleep(1)
152+
st.experimental_rerun()
153+
154+
# Adding the "Made with love (emoji) Piyush Chaudhari" message at the bottom
155+
st.markdown(
156+
'<div style="position: fixed; bottom: 10px; left: 50%; transform: translateX(-50%);">'
157+
'Made with ❤️ Piyush Rajendra Chaudhari</div>',
158+
unsafe_allow_html=True
159+
)
160+
if __name__ == "__main__":
161+
main()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"type": "service_account",
3+
"project_id": "piyush-chaudhari-fall2023",
4+
"private_key_id": "",
5+
"private_key": "",
6+
"client_email": "googlecloudstorage@piyush-chaudhari-fall2023.iam.gserviceaccount.com",
7+
"client_id": "",
8+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
9+
"token_uri": "https://oauth2.googleapis.com/token",
10+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/googlecloudstorage%40piyush-chaudhari-fall2023.iam.gserviceaccount.com",
12+
"universe_domain": "googleapis.com"
13+
}

main/requirements.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
cachetools==5.3.2
2+
certifi==2023.11.17
3+
charset-normalizer==3.3.2
4+
google-api-core==2.15.0
5+
google-auth==2.25.2
6+
google-cloud-core==2.4.1
7+
google-cloud-storage==2.13.0
8+
google-crc32c==1.5.0
9+
google-resumable-media==2.6.0
10+
googleapis-common-protos==1.62.0
11+
idna==3.6
12+
protobuf==4.25.1
13+
pyasn1==0.5.1
14+
pyasn1-modules==0.3.0
15+
requests==2.31.0
16+
rsa==4.9
17+
urllib3==2.1.0
18+
streamlit==1.29.0

main/style.css

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
body {
2+
background-color: #f4f7fa;
3+
color: #333;
4+
}
5+
6+
.sidebar .css-vfskoc {
7+
background-color: #007bff;
8+
}
9+
10+
.st-d0 .st-cg .st-bp {
11+
background-color: #ffffff;
12+
border: 1px solid #c8d1e0;
13+
border-radius: 5px;
14+
padding: 15px;
15+
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
16+
}
17+
18+
.st-cc {
19+
color: #007bff;
20+
}
21+
22+
.st-cg button {
23+
background-color: #007bff;
24+
color: #ffffff;
25+
}
26+
27+
.st-ct {
28+
color: #007bff;
29+
}
30+
31+
.st-ct a {
32+
color: #007bff;
33+
}
34+
35+
.st-d0 .st-cg .st-bp button {
36+
margin-top: 10px;
37+
}
38+
39+
.st-bb {
40+
position: fixed;
41+
top: 50%;
42+
left: 50%;
43+
transform: translate(-50%, -50%);
44+
z-index: 1000;
45+
background: rgba(255, 255, 255, 0.8);
46+
border-radius: 10px;
47+
padding: 20px;
48+
text-align: center;
49+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
50+
}

0 commit comments

Comments
 (0)