diff --git a/learnify/.gitignore b/learnify/.gitignore new file mode 100644 index 000000000..87e40fd90 --- /dev/null +++ b/learnify/.gitignore @@ -0,0 +1,3 @@ +*.pyc +keys.py +__pycache__/ \ No newline at end of file diff --git a/learnify/Images/abstraction_levels.png b/learnify/Images/abstraction_levels.png new file mode 100644 index 000000000..3efef8d7e Binary files /dev/null and b/learnify/Images/abstraction_levels.png differ diff --git a/learnify/Images/educai-logo.png b/learnify/Images/educai-logo.png new file mode 100644 index 000000000..0ef50ef77 Binary files /dev/null and b/learnify/Images/educai-logo.png differ diff --git a/learnify/Images/embedding.jpg b/learnify/Images/embedding.jpg new file mode 100644 index 000000000..6a5d586a0 Binary files /dev/null and b/learnify/Images/embedding.jpg differ diff --git a/learnify/Images/hackatonai_logo.png b/learnify/Images/hackatonai_logo.png new file mode 100644 index 000000000..a217cb17f Binary files /dev/null and b/learnify/Images/hackatonai_logo.png differ diff --git a/learnify/Images/qa_chain_pipeline.jpeg b/learnify/Images/qa_chain_pipeline.jpeg new file mode 100644 index 000000000..301099c49 Binary files /dev/null and b/learnify/Images/qa_chain_pipeline.jpeg differ diff --git a/learnify/README.md b/learnify/README.md new file mode 100644 index 000000000..147c8850c --- /dev/null +++ b/learnify/README.md @@ -0,0 +1,38 @@ +# Learnify + +## Abstract + +### We wrote a research paper about our approach [Research paper](data/LearnifyPaper-1.pdf) + +### The pitch [Pitch](data/pitch.pdf) +The lack of personalization in education hampers struggling students’ ability to address learning gaps, contributing to burnout, high dropout +rates, and absenteeism. This issue arises from misguided approaches to personalization that focus on poor learning proxies, misdiagnosis of +causes-and-effects and an unhealthy emphasis on multi-modal course content, as current approaches to Intelligent Tutoring Systems (ITS) +often emphasize domain-specific expertise (e.g., math or coding), which neglects the core bottleneck in education: + + 1- lack of a good student +model + + 2- lack of a learning framework to utilize the student model. + +Our solution addresses this by combining both elements: democratizing access to an accurate student model and applying Bloom Taxonomy as a +learning framework to utilize it. This contrarian approach personalizes the student profile rather than the domain, leveraging Bloom Taxonomy +to build student-centric ITS. Key to our innovation is delegating student-model expertise to large language models (LLMs) while leaving +domain expertise to students and teachers, thus ensuring both personalization and autonomy. This avoids issues like LLM hallucinations in +domain-specific contexts which can worsen student performance and enhances accuracy in student modeling. Unlike BloomBERT, Khanmigo, +and TahseenAI, which face limitations from narrow focuses or flawed assumptions about learning, our solution utilizes learning’s first principles +to efficiently exploit student models. By focusing on the foundational student model, we aim to address global educational challenges, including +those highlighted by Morocco’s low PISA ranking, and to extend EdTech beyond K-12 and language learning markets. + +### Keywords +Bloom Taxonomy, Bloom’s 2 sigma problem, Mastery Learning, VARK model, Hypercorrection Effect, Cognitive Tutors, Intelligent Tutoring +System, Cognitivism, Connectivism, Large Language Models, Artificial Intelligence + + +## Instructions: + +1. Create a new python environnement +2. Install requirements `pip install -r requirements.txt` +3. create a keys.py file inside `\src` folder and put your open-ai key in it `key = "sk-proj xxxxx ...."` +4. lunch the app with `streamlit run app.py` +5. Find examples in `data/biology` or `data/physics` as inputs to the system \ No newline at end of file diff --git a/learnify/data/LearnifyPaper-1.pdf b/learnify/data/LearnifyPaper-1.pdf new file mode 100644 index 000000000..6dd6ba254 Binary files /dev/null and b/learnify/data/LearnifyPaper-1.pdf differ diff --git a/learnify/data/biology/genetique_question.txt b/learnify/data/biology/genetique_question.txt new file mode 100644 index 000000000..7121a3cdf --- /dev/null +++ b/learnify/data/biology/genetique_question.txt @@ -0,0 +1,4 @@ +Exercice 1 : Cellule et Structures Cellulaires + + Qu'est-ce qu'une cellule eucaryote ? Donnez une définition concise. + Expliquez la différence entre le réticulum endoplasmique lisse et le réticulum endoplasmique rugueux avec un exemple pour chacun. diff --git a/learnify/data/biology/student_1.txt b/learnify/data/biology/student_1.txt new file mode 100644 index 000000000..e396512c4 --- /dev/null +++ b/learnify/data/biology/student_1.txt @@ -0,0 +1,47 @@ +Remember + +Question: List the main characteristics of a eukaryotic cell. Answer: + + Defined nucleus enclosed by a nuclear envelope. + Presence of membrane-bound organelles like mitochondria and the endoplasmic reticulum. + Larger size compared to prokaryotic cells. + Complex cytoskeletal structure. + +Understand + +Question: Explain the difference between a eukaryotic cell and a prokaryotic cell. Answer: + + Eukaryotic cells have a well-defined nucleus containing the cell’s DNA, whereas prokaryotic cells do not have a nucleus; their DNA floats freely in the cell. + Eukaryotes have complex organelles including mitochondria, whereas prokaryotes do not have membrane-bound organelles. + + +Apply + +Question: How would you classify different types of cells based on their organelles? Answer: + + Muscle Cells: Contain numerous mitochondria for energy during contraction. + Nerve Cells: Extensive network of smooth endoplasmic reticulum for neurotransmitter production. + Plant Cells: Have chloroplasts for photosynthesis and large vacuoles for storage. + + +Analyze + +Question: Compare the structure of a plant cell and an animal cell. Answer: + + Plant cells have a cell wall for structure and rigidity, animal cells do not. + Plant cells contain chloroplasts for photosynthesis, animal cells lack chloroplasts. + Animal cells have centrioles, which are typically absent in plant cells. + +Evaluate + +Question: Judge the importance of cell specialization in multicellular organisms. Answer: + + Cell specialization allows multicellular organisms to be more efficient as different cells perform different functions, leading to higher overall functionality and survival. + +Create + +Question: Develop a model illustrating the functions of different organelles within a eukaryotic cell. Answer: + + No response given. (Student 1 struggles with creation tasks. + + \ No newline at end of file diff --git a/learnify/data/biology/student_2.txt b/learnify/data/biology/student_2.txt new file mode 100644 index 000000000..33dd87ec2 --- /dev/null +++ b/learnify/data/biology/student_2.txt @@ -0,0 +1,39 @@ +Original Question 1: Qu'est-ce qu'une cellule eucaryote ? Donnez une définition concise. +Remember + +Question: List the main characteristics of a eukaryotic cell. Answer: + + Nucleus containing DNA. + Membrane-bound organelles. + Larger and more complex than prokaryotes. + +Understand + +Question: Explain the difference between a eukaryotic cell and a prokaryotic cell. Answer: + + Eukaryotic cells have a nucleus; prokaryotic cells do not. + Eukaryotic cells have organelles that prokaryotic cells lack. + +Apply + +Question: How would you classify different types of cells based on their organelles? Answer: + + Could not accurately classify the types of cells. + +Analyze + +Question: Compare the structure of a plant cell and an animal cell. Answer: + + Could not accurately describe the differences, only noted that plant cells have chloroplasts and animal cells do not. + +Evaluate + +Question: Judge the importance of cell specialization in multicellular organisms. Answer: + + Gave a vague answer, noting only that specialization "might be useful" without providing substantial reasoning or examples. + +Create + +Question: Develop a model illustrating the functions of different organelles within a eukaryotic cell. Answer: + + No response given. (Student 2 also struggles with creative tasks.) \ No newline at end of file diff --git a/learnify/data/physics/exo.txt b/learnify/data/physics/exo.txt new file mode 100644 index 000000000..3131a2042 --- /dev/null +++ b/learnify/data/physics/exo.txt @@ -0,0 +1,9 @@ +Exercice 1 : Définition et Types d'Ondes + + Qu'est-ce qu'une onde mécanique ? Donnez une définition concise. + Expliquez la différence entre une onde transversale et une onde longitudinale avec un exemple pour chacune. + +Exercice 2 : Propagation et Vitesse + + Comment la vitesse de propagation d'une onde est-elle définie dans un milieu ? Utilisez la formule donnée dans vos notes. + Quel effet l’élasticité d’un milieu a-t-elle sur la vitesse de propagation d’une onde ? Donnez un exemple. diff --git a/learnify/data/pitch.pdf b/learnify/data/pitch.pdf new file mode 100644 index 000000000..5a722517c Binary files /dev/null and b/learnify/data/pitch.pdf differ diff --git a/learnify/requirements.txt b/learnify/requirements.txt new file mode 100644 index 000000000..5c361e70b --- /dev/null +++ b/learnify/requirements.txt @@ -0,0 +1,9 @@ +tiktoken==0.4.0 +openai==0.27.8 +langchain==0.0.247 +chromadb==0.4.3 +fastapi==0.99.1 +uvicorn==0.23.1 +streamlit +fpdf +matplotlib \ No newline at end of file diff --git a/learnify/src/agents.py b/learnify/src/agents.py new file mode 100644 index 000000000..f47211d00 --- /dev/null +++ b/learnify/src/agents.py @@ -0,0 +1,150 @@ +# agents.py + +# Bloom Taxonomy Creator Agent Prompt +TAXONOMY_AGENT_PROMPT = """ +You are an educational design assistant specializing in Bloom's Taxonomy. Your task is to transform a set of input questions into questions aligned with each level of Bloom's Taxonomy: Remember, Understand, Apply, Analyze, Evaluate, and Create. + +For each input question, generate corresponding questions at each taxonomy level, ensuring they are relevant to the original question's topic. + +Your output **must** be a well-formatted JSON object **only**. Do not include any explanations, code blocks, or additional text. The JSON should have a single key "Topic Questions" mapping to an array of question objects. Each question object must include the following keys: + +- "Original Question" +- "Remember" +- "Understand" +- "Apply" +- "Analyze" +- "Evaluate" +- "Create" + +Ensure the JSON is valid and free from any formatting issues. + +**Example Output:** + +{ + "Topic Questions": [ + { + "Original Question": "What is photosynthesis?", + "Remember": "Define photosynthesis.", + "Understand": "Explain how photosynthesis works.", + "Apply": "Describe how photosynthesis affects plant growth.", + "Analyze": "Compare photosynthesis and cellular respiration.", + "Evaluate": "Assess the importance of photosynthesis in ecosystems.", + "Create": "Design an experiment to measure the rate of photosynthesis." + } + ] +} + +**Input Questions:** + +{{context}} +""" + +# Scoring Agent Prompt +SCORING_AGENT_PROMPT = """ +You are an educational evaluator specializing in Bloom's Taxonomy assessments. + +Given the following JSON data containing students' answers to questions at each level of Bloom's Taxonomy, evaluate each answer and assign a score. + +Each score should be between 0 and 5, where 0 indicates no understanding and 5 indicates excellent understanding. + +Add a new field called "score" to each Sub-Question. + +Ensure the output is valid JSON and follows the same structure as the input, but with the "score" field added to each Sub-Question. + +**Example Input:** + +{ + "Bloom Taxonomy": { + "Remember": [ + { + "Original Question": "What is photosynthesis?", + "Sub-Question": { + "Question": "Define photosynthesis.", + "Answer": "It's how plants make food." + } + } + ] + } +} + +**Example Output:** + +{ + "Bloom Taxonomy": { + "Remember": [ + { + "Original Question": "What is photosynthesis?", + "Sub-Question": { + "Question": "Define photosynthesis.", + "Answer": "It's how plants make food.", + "score": 4 + } + } + ] + } +} + +**Input JSON:** + +{input_json} +""" + +# Metacognitive Recommendation Agent Prompt for Teacher +METACOGNITION_AGENT_PROMPT_TEACHER = """ +You are an educational coach specializing in metacognitive strategies aligned with Bloom's Taxonomy, addressing teachers. + +Given the following JSON data containing the weighted average scores for each level of Bloom's Taxonomy, analyze the scores and recommend metacognitive strategies that the teacher can use to help the student improve in all areas. + +Prioritize the Bloom's Taxonomy levels with the lowest weighted average scores, using the Leitner system to address and emphasize these areas. However, also provide encouraging feedback and suggestions for the areas where the student is performing well, guiding the teacher on how to support the student's strengths. + +For each taxonomy level: + +- Summarize the student's current performance. +- Provide metacognitive strategies and scaffolding techniques that the teacher can implement to help the student enhance their learning in that area. + +Ensure the output is a well-structured, clear set of recommendations without any additional explanations or irrelevant content. + +**Input JSON:** + +{input_json} +""" + +METACOGNITION_AGENT_PROMPT = """ +You are an educational coach specializing in metacognitive strategies aligned with Bloom's Taxonomy, addressing teachers. + +Given the following data containing the scores for each subquestion and taxonomy level, analyze the scores and recommend metacognitive strategies that the teacher can use to help the student improve in all areas. + +For each taxonomy level: + +- Summarize the student's current performance. +- Provide metacognitive strategies and scaffolding techniques that the teacher can implement to help the student enhance their learning in that area. + +Ensure the output is a well-structured, clear set of recommendations without any additional explanations or irrelevant content. + +**Input Data:** + +{input_json} +""" + +# Metacognitive Recommendation Agent Prompt for Student +METACOGNITION_AGENT_PROMPT_STUDENT = """ +You are an educational coach specializing in metacognitive strategies aligned with Bloom's Taxonomy, addressing students. + +Given the following JSON data containing the weighted average scores for each level of Bloom's Taxonomy, analyze the scores and provide metacognitive strategies to help the student improve in all areas. + +Prioritize the Bloom's Taxonomy levels with the lowest weighted average scores, using the Leitner system to address and emphasize these areas. However, also provide encouraging feedback and suggestions for the areas where you are performing well, motivating you to continue your great work and strive for even better understanding. + +For each taxonomy level: + +- Acknowledge your current performance with positive and motivating language. +- Provide first-principles-based metacognitive strategies that you can use to enhance your learning in that area. + +Ensure the output is friendly, motivational, and free from technical jargon. + +**Input JSON:** + +{input_json} +""" + +# Threshold for generating recommendations +THRESHOLD = 3.0 # Adjust this value as needed diff --git a/learnify/src/app.py b/learnify/src/app.py new file mode 100644 index 000000000..238c1d2a5 --- /dev/null +++ b/learnify/src/app.py @@ -0,0 +1,585 @@ +import streamlit as st +import tempfile +import os +import json +from dotenv import load_dotenv +import keys # Ensure this module contains your OpenAI API key as `key` +import llm # Ensure this module contains the QueryRunner class +import datetime # For timestamping saved files +import agents # Import your prompts from agents.py + +# Load environment variables if needed +load_dotenv() + +# Set OpenAI API Key +os.environ["OPENAI_API_KEY"] = keys.key + +# Import prompts from agents.py +TAXONOMY_AGENT_PROMPT = agents.TAXONOMY_AGENT_PROMPT +SCORING_AGENT_PROMPT = agents.SCORING_AGENT_PROMPT +METACOGNITION_AGENT_PROMPT = agents.METACOGNITION_AGENT_PROMPT + +# ===================================== +# UI Configuration Variables +# ===================================== + +# Images +LOGO_IMAGE_LEFT = "../Images/hackatonai_logo.png" +LOGO_IMAGE_RIGHT = "../Images/educai-logo.png" + +# Texts +APP_TITLE = "Learnify" +ANSWERS_SUBMITTED_MESSAGE = "✅ Your answers have been submitted and saved!" +STUDENT_SCORED_MESSAGE = "✅ Your performance has been evaluated!" +METACOGNITIVE_SUCCESS_MESSAGE = "✅ Personalized recommendations generated successfully!" + +# Taxonomy Level Descriptions +tax_lev_dic = { + "Remember": "🔍 **Recall what you've learned**", + "Understand": "💡 **Make sense of the idea**", + "Apply": "🔧 **Put your knowledge into action**", + "Analyze": "🕵️ **Break it down and explore the details**", + "Evaluate": "⚖️ **Make a judgment or decide what's best**", + "Create": "🛠️ **Build something new from what you've learned**" +} + +# Apply custom CSS for styling +st.markdown(""" + +""", unsafe_allow_html=True) + +# ===================================== +# Initialize Session State for storing answers and data +# ===================================== +if 'file_content' not in st.session_state: + st.session_state['file_content'] = None +if 'answers' not in st.session_state: + st.session_state['answers'] = {} +if 'transformed_questions' not in st.session_state: + st.session_state['transformed_questions'] = None +if 'restructured_data' not in st.session_state: + st.session_state['restructured_data'] = None +if 'restructured_filename' not in st.session_state: + st.session_state['restructured_filename'] = None +if 'original_filename' not in st.session_state: + st.session_state['original_filename'] = None +if 'answers_submitted' not in st.session_state: + st.session_state['answers_submitted'] = False +if 'scored_data' not in st.session_state: + st.session_state['scored_data'] = None +if 'scored_filename' not in st.session_state: + st.session_state['scored_filename'] = None +if 'weights' not in st.session_state: + st.session_state['weights'] = {} +if 'recommendations' not in st.session_state: + st.session_state['recommendations'] = None +if 'taxonomy_evaluation' not in st.session_state: + st.session_state['taxonomy_evaluation'] = None # New entry for Taxonomy-Based Evaluation +if 'selected_language' not in st.session_state: + st.session_state['selected_language'] = 'English' # Default language + + +# Define Taxonomy Levels +taxonomy_levels = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"] + +# Streamlit App Title and Logos +def display_header(): + col1, col2 = st.columns([1, 4]) + with col1: + st.image(LOGO_IMAGE_RIGHT, width=100) # Adjust the width as needed + with col2: + st.markdown(""" +
Tailored Learning, Empowering Success.
+ """, unsafe_allow_html=True) + +st.sidebar.markdown(""" +## Explore +Navigate through the sections to experience personalized learning. +""") + +# Upload File and Read Content +def upload_file(): + # Display an engaging message + st.markdown(""" + ### 🌟 **Elevate Your Learning Experience!** 🌟 + Ready to grow your skills? Upload your questions—it's simple, engaging, and rewarding. Start now and see your progress unfold! + """) + + # File uploader with friendly label and emoji + uploaded_file = st.file_uploader("📁 **Drag and drop a `.txt` file here, or click to select one**", type=["txt"]) + + if uploaded_file is not None: + st.markdown(f"**📄 Uploaded File:** {uploaded_file.name}") + try: + file_content = uploaded_file.read().decode("utf-8") + with st.expander("🔍 View Uploaded File Content"): + st.text(file_content) + st.session_state['file_content'] = file_content # Store file content in session state + return True + except Exception as e: + st.error(f"❌ An error occurred while processing the file: {e}") + return False + else: + st.info("👉 Please upload a `.txt` file to get started.") + return False + +# Run LLM Query for Taxonomy Agent +def run_llm_query(TAXONOMY_AGENT_PROMPT): + MODEL_NAME = "gpt-3.5-turbo" # or "gpt-4", etc. + file_content = st.session_state['file_content'] + with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8', suffix=".txt") as tmp_file: + tmp_file.write(file_content) + temp_file_path = tmp_file.name + + try: + bloom_taxonomy_agent = llm.QueryRunner(document_path=temp_file_path, model_name=MODEL_NAME) + bloom_taxonomy = bloom_taxonomy_agent.run_query(TAXONOMY_AGENT_PROMPT) + result_str = bloom_taxonomy.get('result', '').strip() + + if result_str: + # Remove code block markers if present + if result_str.startswith("```json") and result_str.endswith("```"): + result_str = result_str[7:-3].strip() + elif result_str.startswith("```") and result_str.endswith("```"): + result_str = result_str[3:-3].strip() + + # Try parsing the JSON + try: + json_response = json.loads(result_str) + st.session_state['transformed_questions'] = json_response.get("Topic Questions", []) + return True + except json.JSONDecodeError: + st.error("An error occurred while processing your questions. Please try again.") + return False + else: + st.error("No response from the language model.") + return False + except Exception: + st.error("An unexpected error occurred. Please try again.") + return False + finally: + # Clean up the temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) +tax_lev_dic = { + "Remember": "🔍 **Recall what you've learned**", + "Understand": "💡 **Make sense of the idea**", + "Apply": "🔧 **Put your knowledge into action**", + "Analyze": "🕵️ **Break it down and explore the details**", + "Evaluate": "⚖️ **Make a judgment or decide what's best**", + "Create": "🛠️ **Build something new from what you've learned**" +} +# Display Questions and Collect Answers +def display_questions_and_collect_answers(): + transformed_questions = st.session_state['transformed_questions'] + if transformed_questions: + st.subheader("📚 **Answer the following questions:**") + with st.form(key='answer_form'): + for idx, question_set in enumerate(transformed_questions): + original_question = question_set.get("Original Question", f"Question {idx+1}") + st.markdown(f"**📝 Original Question {idx+1}:** {original_question}") + + for level in taxonomy_levels: + taxonomy_question = question_set.get(level, "N/A") + if taxonomy_question == "N/A": + continue # Skip if the taxonomy question is not available + answer_field = f"{level} Answer_{idx}" + st.markdown(f"### {level}") + st.write(f"**Question:** {taxonomy_question}") + default_value = st.session_state['answers'].get(answer_field, "") + answer = st.text_area( + label=f"Your Answer for {level}:", + value=default_value, + key=answer_field, + height=100 + ) + st.session_state['answers'][answer_field] = answer + st.markdown("---") + submit_button = st.form_submit_button(label='🚀 Submit Your Answers') + if submit_button: + st.session_state['answers_submitted'] = True + st.success("🎉 Your answers have been submitted! Great job! 🎉") + # Collect all answers from session state + student_answers = {"Topic Questions": []} + + for idx, question_set in enumerate(transformed_questions): + original_question = question_set.get("Original Question", f"Question {idx+1}") + answer_set = { + "Original Question": original_question, + "Sub-Questions": {} + } + + for level in taxonomy_levels: + taxonomy_question = question_set.get(level, "N/A") + answer_field = f"{level} Answer_{idx}" # Unique key per question and level + student_answer = st.session_state['answers'].get(answer_field, "") + + answer_set["Sub-Questions"][level] = { + "Question": taxonomy_question, + "Answer": student_answer + } + + student_answers["Topic Questions"].append(answer_set) + + # Save the student answers to session state + st.session_state['student_answers'] = student_answers + + # Timestamp for unique filename + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + answers_file_path = f"student_answers_{timestamp}.json" + + try: + # Save the JSON file locally + with open(answers_file_path, 'w', encoding='utf-8') as f: + json.dump(student_answers, f, ensure_ascii=False, indent=4) + # Provide a download button for the JSON file + json_str = json.dumps(student_answers, ensure_ascii=False, indent=4) + st.download_button( + label="📥 Download Your Answers", + data=json_str, + file_name=answers_file_path, + mime='application/json' + ) + except Exception: + st.error("Failed to save your answers.") + return True + else: + st.error("No transformed questions to display.") + return False + +# Restructure JSON as per the new format +def restructure_json(): + # Use the student_answers from session state + student_answers = st.session_state.get('student_answers') + if student_answers: + restructured_data = {"Bloom Taxonomy": {level: [] for level in taxonomy_levels}} + for question_set in student_answers["Topic Questions"]: + original_question = question_set.get("Original Question") + sub_questions = question_set.get("Sub-Questions", {}) + for level in taxonomy_levels: + sub_question = sub_questions.get(level) + if sub_question: + restructured_data["Bloom Taxonomy"][level].append({ + "Original Question": original_question, + "Sub-Question": sub_question + }) + st.session_state['restructured_data'] = restructured_data + return restructured_data + + +# Save JSON File Locally +def save_json_file(data, filename_prefix): + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{filename_prefix}_{timestamp}.json" + try: + with open(filename, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=4) + return filename + except Exception: + st.error("Failed to save your data.") + return None + +# Run LLM Query for Scoring Agent +def run_scoring_agent(): + MODEL_NAME = "gpt-3.5-turbo" # or "gpt-4", etc. + + restructured_data = st.session_state['restructured_data'] + # Convert restructured_data to JSON string + input_json = json.dumps(restructured_data, ensure_ascii=False, indent=4) + + # Prepare the prompt with the input JSON + scoring_prompt = SCORING_AGENT_PROMPT.replace("{input_json}", input_json) + + # Write the input JSON to a temporary file + with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8', suffix=".json") as tmp_file: + tmp_file.write(input_json) + temp_file_path = tmp_file.name + + try: + scoring_agent = llm.QueryRunner(document_path=temp_file_path, model_name=MODEL_NAME) + scoring_response = scoring_agent.run_query(scoring_prompt) + result_str = scoring_response.get('result', '').strip() + + if result_str: + # Remove code block markers if present + if result_str.startswith("```json") and result_str.endswith("```"): + result_str = result_str[7:-3].strip() + elif result_str.startswith("```") and result_str.endswith("```"): + result_str = result_str[3:-3].strip() + + # Try parsing the JSON + try: + scored_data = json.loads(result_str) + st.session_state['scored_data'] = scored_data # Store in session state + return True + except json.JSONDecodeError: + st.error("An error occurred while evaluating your performance.") + return False + else: + st.error("No response from the language model.") + return False + + except Exception: + st.error("An unexpected error occurred during evaluation.") + return False + + finally: + # Clean up the temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) + +# Calculate Taxonomy-Based Evaluation and Store JSON +def calculate_taxonomy_evaluation(): + scored_data = st.session_state['scored_data'] + taxonomy_evaluation = {"Bloom Taxonomy": {}} + + for level in taxonomy_levels: + if level in scored_data["Bloom Taxonomy"]: + total_score = 0 + count = 0 + for sub_question in scored_data["Bloom Taxonomy"][level]: + score = sub_question["Sub-Question"].get("score", 0) + total_score += score + count += 1 + average_score = total_score / count if count > 0 else 0 + weight = st.session_state['weights'].get(level, 0) + weighted_average = average_score * weight + taxonomy_evaluation["Bloom Taxonomy"][level] = { + "average_score": average_score, + "weight": weight, + "weighted_average": weighted_average + } + else: + taxonomy_evaluation["Bloom Taxonomy"][level] = { + "average_score": 0, + "weight": st.session_state['weights'].get(level, 0), + "weighted_average": 0 + } + + st.session_state['taxonomy_evaluation'] = taxonomy_evaluation + return taxonomy_evaluation + +# Run LLM Query for Metacognitive Recommendation Agent +def run_metacognition_agent(): + MODEL_NAME = "gpt-3.5-turbo" # or "gpt-4", etc. + + # Retrieve the Taxonomy-Based Evaluation from session state + taxonomy_evaluation = st.session_state.get('taxonomy_evaluation') + + if not taxonomy_evaluation: + st.error("Evaluation data is missing.") + return False + + # Convert the taxonomy_evaluation to a JSON string + input_json_str = json.dumps(taxonomy_evaluation, ensure_ascii=False, indent=4) + + # Prepare the prompt with the input JSON + metacognition_prompt = METACOGNITION_AGENT_PROMPT.replace("{input_json}", input_json_str) + + # Write the prompt to a temporary file + with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8', suffix=".txt") as tmp_file: + tmp_file.write(metacognition_prompt) + temp_file_path = tmp_file.name + + try: + # Initialize QueryRunner with document_path and model_name + metacognition_agent = llm.QueryRunner(document_path=temp_file_path, model_name=MODEL_NAME) + + # Run the query + metacognition_response = metacognition_agent.run_query(metacognition_prompt) + result_str = metacognition_response.get('result', '').strip() + + if result_str: + # Store the recommendations + st.session_state['recommendations'] = result_str + return True + else: + st.error("No response from the language model.") + return False + + except Exception: + st.error("An unexpected error occurred while generating recommendations.") + return False + + finally: + # Clean up the temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) + +# Display Taxonomy-Based Evaluation +def display_taxonomy_based_evaluation(): + scored_data = st.session_state['scored_data'] + levels_present = [level for level in taxonomy_levels if level in scored_data["Bloom Taxonomy"]] + num_questions = max(len(scored_data["Bloom Taxonomy"][level]) for level in levels_present) if levels_present else 0 + + st.markdown("### Your Learning Progress Overview") + total_weighted_score = 0.0 + taxonomy_evaluation = {"Bloom Taxonomy": {}} + + for level in taxonomy_levels: + if level in scored_data["Bloom Taxonomy"]: + total_score = 0 + level_questions = scored_data["Bloom Taxonomy"][level] + for sub_question in level_questions: + score = sub_question["Sub-Question"].get("score", 0) + total_score += score + average_score = total_score / len(level_questions) if level_questions else 0 + weight = st.session_state['weights'].get(level, 0) + weighted_average = average_score * weight + total_weighted_score += weighted_average + + taxonomy_evaluation["Bloom Taxonomy"][level] = { + "average_score": average_score, + "weight": weight, + "weighted_average": weighted_average + } + + # Display the score using custom progress bars + st.write(f"**{level}**") + st.progress(average_score / 5.0) + st.write(f"Average Score: {average_score:.2f}/5") + st.write(f"Weighted Score: {weighted_average:.2f}") + st.markdown("---") + else: + st.warning(f"No data for level '{level}'.") + + st.write(f"**Total Weighted Average Score: {total_weighted_score:.2f} out of 5.00**") + + # Store the taxonomy evaluation for metacognition + st.session_state['taxonomy_evaluation'] = taxonomy_evaluation + +# Display Metacognitive Recommendations +def display_metacognitive_recommendations(): + recommendations = st.session_state['recommendations'] + st.markdown("### Metacognitive Recommendations") + st.write(recommendations) + + # Optionally, save the recommendations to a file + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"metacognitive_recommendations_{timestamp}.txt" + with open(filename, 'w', encoding='utf-8') as f: + f.write(recommendations) + + # Download button for recommendations + st.download_button( + label="📥 Download Recommendations", + data=recommendations, + file_name=filename, + mime='text/plain' + ) + + +def main(): + display_header() + # Language Selection + st.sidebar.subheader("Select Language") + language_options = ["English", "French", "Spanish", "German", "Chinese"] + selected_language = st.sidebar.selectbox( + "Choose your language", + language_options, + index=language_options.index(st.session_state.get('selected_language', 'English')) + ) + st.session_state['selected_language'] = selected_language + + # Step 1: File upload + if st.session_state['file_content'] is None: + if not upload_file(): + return # Stop execution until file is uploaded + + # Step 2: Run LLM Query + if st.session_state['transformed_questions'] is None: + if not run_llm_query(TAXONOMY_AGENT_PROMPT,): + return # Stop execution if there's an error + + # Step 3: Display Questions and Collect Answers + if not st.session_state['answers_submitted']: + if not display_questions_and_collect_answers(): + return # Wait until answers are submitted + + # Step 4: Restructure Data and Save Files + if st.session_state['restructured_data'] is None: + restructure_json() + + if st.session_state['restructured_filename'] is None: + restructured_filename = save_json_file(st.session_state['restructured_data'], "structured_data") + st.session_state['restructured_filename'] = restructured_filename + + if st.session_state['original_filename'] is None: + original_filename = save_json_file({"Topic Questions": st.session_state['transformed_questions']}, "original_questions") + st.session_state['original_filename'] = original_filename + + + # Display success message + #st.success(ANSWERS_SUBMITTED_MESSAGE) + + # Step 5: Scoring + st.markdown("### Reflect on Your Learning Journey!") + score_button = st.button('🎯 Score My Performance 🎯') + if score_button: + if run_scoring_agent(): + scored_filename = save_json_file(st.session_state['scored_data'], "student_score") + st.session_state['scored_filename'] = scored_filename + st.success(STUDENT_SCORED_MESSAGE) + # Calculate taxonomy evaluation + calculate_taxonomy_evaluation() + + if st.session_state.get('scored_data'): + # Display Scored Data + display_taxonomy_based_evaluation() + + # Step 6: Metacognitive Recommendations + st.markdown("### Get Personalized Recommendations") + recommend_button = st.button('✨ Get Recommendations') + if recommend_button: + if run_metacognition_agent(): + st.success(METACOGNITIVE_SUCCESS_MESSAGE) + + if st.session_state.get('recommendations'): + display_metacognitive_recommendations() + + # Sidebar Configuration for Weights + st.sidebar.header("Configuration") + st.sidebar.subheader("Set Importance Levels") + default_weights = [1/6]*6 # Default equal weights if none provided + if not st.session_state['weights']: + st.session_state['weights'] = dict(zip(taxonomy_levels, default_weights)) + + weights_input = {} + total_weight = 0.0 + for level in taxonomy_levels: + weight = st.sidebar.number_input(f"Importance of {level}", min_value=0.0, max_value=1.0, value=st.session_state['weights'][level], step=0.05) + weights_input[level] = weight + total_weight += weight + + # Normalize weights if total_weight != 1 + if total_weight != 1.0 and total_weight > 0: + st.sidebar.warning("Weights do not sum to 1. They will be normalized automatically.") + for level in taxonomy_levels: + st.session_state['weights'][level] = weights_input[level] / total_weight + elif total_weight == 0: + st.sidebar.error("Total weight cannot be zero. Resetting to default values.") + st.session_state['weights'] = dict(zip(taxonomy_levels, default_weights)) + else: + st.sidebar.success("Weights are set.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/learnify/src/config.py b/learnify/src/config.py new file mode 100644 index 000000000..1f1467570 --- /dev/null +++ b/learnify/src/config.py @@ -0,0 +1,12 @@ + +# Document-related parameters +DOCUMENT_PATH = '../data/exos.txt' +ENCODING = "utf-8" +MAX_TOKENS = 500 + +# Model-related parameters +MODEL_NAME = "gpt-3.5-turbo" +EMBEDDING_TYPE = "cl100k_base" + +# Search and retrieval-related parameters +TOP_N_CHUNKS = 3 \ No newline at end of file diff --git a/learnify/src/llm.py b/learnify/src/llm.py new file mode 100644 index 000000000..be95b42e1 --- /dev/null +++ b/learnify/src/llm.py @@ -0,0 +1,81 @@ +import os +import sys +import keys +import tokenization +import config + +from langchain.document_loaders import TextLoader +from langchain.indexes import VectorstoreIndexCreator +from langchain.embeddings import OpenAIEmbeddings +from langchain.vectorstores import Chroma +from langchain.chat_models import ChatOpenAI +from langchain.chains import RetrievalQA + + +class DocumentManager: + #### --------- Handles loading and chunking of text ------### + + def __init__(self, filename, encoding=config.EMBEDDING_TYPE): + self.filename = filename + self.tokenizer = tokenization.TextTokenizer(encoding) + self.text = None + self.chunks = None + + + def load_document(self): + #### ------- Loads the document from file -------### + self.text = self.tokenizer.read_file(self.filename) + + def split_text(self, max_tokens=config.MAX_TOKENS): + #### ------- Splits the text into chunks -------### + self.chunks = self.tokenizer.creat_chunks(self.text, max_tokens) + + +class ChunkStore: + def __init__(self, chunks): + self.chunks = chunks + self.vectorestore = None + + def store_chunks(self): + #### ------- stores the text chunks in a vector database -------### + texts = [chunk for chunk in self.chunks] + self.vectorstore = Chroma.from_texts(texts=texts, embedding=OpenAIEmbeddings()) + + def retrieve_top_n_chunks(self, question, n=3): + #### ------- Retrieves the top n relevant chunks for a given question -------### + important_chunks = self.vectorstore.similarity_search(question) + return important_chunks[:n] + + def get_retriever(self): + return self.vectorstore.as_retriever() + + +class QueryRunner: + def __init__(self, document_path, model_name=config.MODEL_NAME): + self.document_path = document_path + self.model_name = model_name + + def run_query(self, query): + document_manager = DocumentManager(self.document_path) + document_manager.load_document() + document_manager.split_text() + + chunk_store = ChunkStore(document_manager.chunks) + chunk_store.store_chunks() + + chunk_store.retrieve_top_n_chunks(query) + + llm = ChatOpenAI(model_name=self.model_name, temperature=0) + retriever = chunk_store.vectorstore.as_retriever() + qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever) + response = qa_chain({"query": query}) + + return response + +if __name__ == "__main__": + os.environ["OPENAI_API_KEY"] = keys.key + + query = sys.argv[1] + query_runner = QueryRunner(config.DOCUMENT_PATH) + result = query_runner.run_query(query) + print(result) diff --git a/learnify/src/main.py b/learnify/src/main.py new file mode 100644 index 000000000..289e666fb --- /dev/null +++ b/learnify/src/main.py @@ -0,0 +1,33 @@ + +import config +import llm +import keys +import os + +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../.."))) + + +app = FastAPI() +os.environ["OPENAI_API_KEY"] = keys.key + +#### --------- Mounting static files to be served at the "/static" endpoint ------------------ #### +app.mount("/static", StaticFiles(directory="static"), name="static") + +@app.get("/") +async def read_root(): + #### ---------- Serving the static index.html file when the root ("/") is accessed -------------- #### + return FileResponse('static/index.html') + +@app.get("/query") +async def get_query_response(query: str = Query(..., description="Enter your query here")): + #### ------Creating a QueryRunner object with the document path and model name --------------#### + query_runner = llm.QueryRunner(document_path = config.DOCUMENT_PATH ,model_name=config.MODEL_NAME) + + #### ------ Running the query and getting the response ------------------#### + response = query_runner.run_query(query) + + #### -------- Returning the response as a JSON object -------- #### + return {"response": response} diff --git a/learnify/src/tokenization.py b/learnify/src/tokenization.py new file mode 100644 index 000000000..8836f4f4f --- /dev/null +++ b/learnify/src/tokenization.py @@ -0,0 +1,47 @@ +import openai +import tiktoken +import os +import config + + +## ------------------Chunking the Document --------------### + +class TextTokenizer: + def __init__(self, encoding=config.EMBEDDING_TYPE): + self.encoding = encoding + self.tt_encoding = tiktoken.get_encoding(encoding) + + def read_file(self,fname): + with open(fname, 'r', encoding=config.ENCODING) as f: + file_text = f.read() + return file_text + + def count_tokens(self, text): + tokens = self.tt_encoding.encode(text) + return len(tokens) + + def creat_chunks(self, text, max_tokens): + chunks = [] + current_chunk = "" + current_chunk_tokens = 0 + + sentences = text.split(".") + for sentence in sentences: + sentence_tokens = self.count_tokens(sentence) + + # Si l'ajout de la phrase donne as exces de tokens ----> on la rajoute + if current_chunk_tokens + sentence_tokens <= max_tokens: + current_chunk += sentence + "." + current_chunk_tokens += sentence_tokens + + # Sinon on cree un noveau chunk avec la phrase en cours + else: + chunks.append(current_chunk) + current_chunk = sentence + "." + current_chunk_tokens = sentence_tokens + + if current_chunk: + chunks.append(current_chunk) + + return chunks + diff --git a/learnify/static/index.html b/learnify/static/index.html new file mode 100644 index 000000000..66447d420 --- /dev/null +++ b/learnify/static/index.html @@ -0,0 +1,20 @@ + + + + + + +