From 3e8d3f7ab3c7241f3e56691467545cf78899ab23 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 6 Oct 2025 20:52:40 -0400 Subject: [PATCH 1/3] Added password generator script and README --- Python/README.md | 154 +++++++++++++++++++++++++++ Python/password_generator.py | 200 +++++++++++++++++++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 Python/README.md create mode 100644 Python/password_generator.py diff --git a/Python/README.md b/Python/README.md new file mode 100644 index 0000000..9cfaa50 --- /dev/null +++ b/Python/README.md @@ -0,0 +1,154 @@ +# Password Generator + +A secure and customizable password generator written in Python. This tool allows you to create strong passwords with various character types and provides strength analysis for generated passwords. + +## Features + +- 🔒 **Cryptographically Secure**: Uses Python's `secrets` module for secure random generation +- ⚙️ **Customizable**: Choose password length and character types +- 📊 **Strength Analysis**: Get detailed feedback on password strength +- 🔢 **Batch Generation**: Generate multiple passwords at once +- 🖥️ **Dual Interface**: Both interactive and command-line modes +- 🎯 **Smart Defaults**: Ensures at least one character from each selected type + +## Installation + +### Prerequisites +- Python 3.6 or higher + +### Setup +1. **Clone or download** the script to your local machine +2. **Make executable** (optional): + ```bash + chmod +x password_generator.py + ``` + +## Usage + +### Interactive Mode (Recommended for beginners) + +Run the script without arguments to enter interactive mode: + +```bash +python3 password_generator.py +``` + +or if executable: + +```bash +./password_generator.py +``` + +You'll be prompted for: +- Password length (default: 12) +- Character types to include (lowercase, uppercase, digits, symbols) +- Number of passwords to generate (default: 1) + +### Command Line Mode + +For quick password generation or scripting: + +#### Basic Usage +```bash +# Generate one strong password with default settings (12 characters, all types) +python3 password_generator.py + +# Generate a 16-character password +python3 password_generator.py -l 16 + +# Generate 5 passwords +python3 password_generator.py -n 5 +``` + +#### Advanced Options +```bash +# Generate password without symbols +python3 password_generator.py --no-symbols + +# Generate numeric PIN (6 digits) +python3 password_generator.py -l 6 --no-lower --no-upper --no-symbols + +# Generate letters-only password +python3 password_generator.py --no-digits --no-symbols + +# Force interactive mode +python3 password_generator.py --interactive +``` + +### Command Line Arguments + +| Argument | Short | Description | Default | +|----------|-------|-------------|---------| +| `--length` | `-l` | Password length | 12 | +| `--number` | `-n` | Number of passwords to generate | 1 | +| `--no-lower` | | Exclude lowercase letters | Included | +| `--no-upper` | | Exclude uppercase letters | Included | +| `--no-digits` | | Exclude digits | Included | +| `--no-symbols` | | Exclude symbols | Included | +| `--interactive` | | Force interactive mode | Auto-detect | + +## Examples + +### Strong Password +```bash +python3 password_generator.py -l 16 +# Example output: Xk8#pL$2*mQ9!vR@ +``` + +### Multiple Passwords +```bash +python3 password_generator.py -n 3 -l 10 +``` +Output: +``` +Generated 3 passwords: + 1. aB3$fG8!kL + 2. pQ9@mN2#rT + 3. xY7!vW4$zU +``` + +### Application-Specific Passwords +```bash +# API Key style (letters and digits only) +python3 password_generator.py -l 32 --no-symbols + +# PIN code +python3 password_generator.py -l 6 --no-lower --no-upper --no-symbols + +# Memorable password (letters only) +python3 password_generator.py -l 14 --no-digits --no-symbols +``` + +## Password Strength Ratings + +The generator provides strength analysis based on: +- **Length**: Minimum 8 characters, good at 12+ +- **Character diversity**: Lowercase, uppercase, digits, symbols +- **Overall rating**: + - **Strong**: Meets 6+ criteria + - **Good**: Meets 4-5 criteria + - **Weak**: Meets 3 or fewer criteria + +## Security Notes + +- ✅ Uses cryptographically secure random number generation +- ✅ Ensures minimum character requirements are met +- ✅ Properly shuffles final passwords +- ✅ No passwords are stored or transmitted +- ⚠️ Generated passwords are displayed in terminal - clear your history if needed + +## Troubleshooting + +### Common Issues + +**"Password length too short" error** +- Solution: Increase password length or reduce character type requirements + +**No characters selected** +- Solution: Enable at least one character type (lowercase, uppercase, digits, or symbols) + +**Permission denied** +- Solution: Make script executable with `chmod +x password_generator.py` + +### Requirements +- Python 3.6+ (for `secrets` module and type hints) diff --git a/Python/password_generator.py b/Python/password_generator.py new file mode 100644 index 0000000..797db9a --- /dev/null +++ b/Python/password_generator.py @@ -0,0 +1,200 @@ +#!/usr/bin/python3 + +import random +import string +import secrets +import argparse +from typing import List, Dict + + +class PasswordGenerator: + def __init__(self): + self.character_sets = { + 'lowercase': string.ascii_lowercase, + 'uppercase': string.ascii_uppercase, + 'digits': string.digits, + 'symbols': string.punctuation + } + + def generate_password(self, length: int = 12, **requirements) -> str: + """ + Generate a password with specified requirements + + Args: + length: Length of the password + requirements: Boolean flags for character types + - lowercase: Include lowercase letters + - uppercase: Include uppercase letters + - digits: Include digits + - symbols: Include symbols + + Returns: + Generated password string + """ + # Default requirements if none specified + if not any(requirements.values()): + requirements = { + 'lowercase': True, + 'uppercase': True, + 'digits': True, + 'symbols': True + } + + # Build character pool based on requirements + character_pool = "" + required_chars = [] + + for char_type, include in requirements.items(): + if include and char_type in self.character_sets: + chars = self.character_sets[char_type] + character_pool += chars + # Add at least one character from each required type + required_chars.append(secrets.choice(chars)) + + if not character_pool: + raise ValueError("At least one character type must be selected") + + # Calculate remaining characters needed + remaining_length = length - len(required_chars) + if remaining_length < 0: + raise ValueError(f"Password length too short for required character types. Minimum: {len(required_chars)}") + + # Generate remaining characters + additional_chars = [secrets.choice(character_pool) for _ in range(remaining_length)] + + # Combine and shuffle + all_chars = required_chars + additional_chars + secrets.SystemRandom().shuffle(all_chars) + + return ''.join(all_chars) + + def generate_multiple_passwords(self, count: int = 5, **kwargs) -> List[str]: + """Generate multiple passwords with the same settings""" + return [self.generate_password(**kwargs) for _ in range(count)] + + def check_password_strength(self, password: str) -> Dict[str, bool]: + """Check the strength of a password""" + return { + 'has_lowercase': any(c.islower() for c in password), + 'has_uppercase': any(c.isupper() for c in password), + 'has_digit': any(c.isdigit() for c in password), + 'has_symbol': any(c in string.punctuation for c in password), + 'min_length': len(password) >= 8, + 'good_length': len(password) >= 12 + } + + +def get_user_preferences(): + """Get password generation preferences from user input""" + print("=== Password Generator ===") + + # Get password length + while True: + try: + length = int(input("Enter password length (default 12): ") or "12") + if length < 4: + print("Password length must be at least 4 characters.") + continue + break + except ValueError: + print("Please enter a valid number.") + + # Get character type preferences + print("\nSelect character types to include:") + requirements = { + 'lowercase': input("Include lowercase letters? (y/n, default y): ").lower() != 'n', + 'uppercase': input("Include uppercase letters? (y/n, default y): ").lower() != 'n', + 'digits': input("Include digits? (y/n, default y): ").lower() != 'n', + 'symbols': input("Include symbols? (y/n, default y): ").lower() != 'n' + } + + # Get number of passwords to generate + while True: + try: + count = int(input("\nHow many passwords to generate? (default 1): ") or "1") + if count < 1: + print("Please enter at least 1.") + continue + break + except ValueError: + print("Please enter a valid number.") + + return length, requirements, count + + +def display_password_strength(password: str, generator: PasswordGenerator): + """Display strength analysis for a password""" + strength = generator.check_password_strength(password) + + print(f"\nPassword: {password}") + print("Strength Analysis:") + print(f"Length: {len(password)} characters") + print(f"Lowercase letters: {'Yes' if strength['has_lowercase'] else 'No'}") + print(f"Uppercase letters: {'Yes' if strength['has_uppercase'] else 'No'}") + print(f"Digits: {'Yes' if strength['has_digit'] else 'No'}") + print(f"Symbols: {'Yes' if strength['has_symbol'] else 'No'}") + + # Overall strength rating + criteria_met = sum(strength.values()) + if criteria_met >= 6: + rating = "Strong" + elif criteria_met >= 4: + rating = "Good" + else: + rating = "Weak" + + print(f"Overall Rating: {rating}") + + +def main(): + """Main function with command line interface""" + parser = argparse.ArgumentParser(description='Generate secure passwords') + parser.add_argument('-l', '--length', type=int, default=12, help='Password length') + parser.add_argument('-n', '--number', type=int, default=1, help='Number of passwords') + parser.add_argument('--no-lower', action='store_true', help='Exclude lowercase letters') + parser.add_argument('--no-upper', action='store_true', help='Exclude uppercase letters') + parser.add_argument('--no-digits', action='store_true', help='Exclude digits') + parser.add_argument('--no-symbols', action='store_true', help='Exclude symbols') + parser.add_argument('--interactive', action='store_true', help='Use interactive mode') + + args = parser.parse_args() + + generator = PasswordGenerator() + + if args.interactive or not any(vars(args).values()): + # Interactive mode + length, requirements, count = get_user_preferences() + else: + # Command line mode + length = args.length + count = args.number + requirements = { + 'lowercase': not args.no_lower, + 'uppercase': not args.no_upper, + 'digits': not args.no_digits, + 'symbols': not args.no_symbols + } + + try: + # Generate passwords + if count == 1: + password = generator.generate_password(length=length, **requirements) + display_password_strength(password, generator) + else: + passwords = generator.generate_multiple_passwords(count=count, length=length, **requirements) + print(f"\nGenerated {count} passwords:") + for i, password in enumerate(passwords, 1): + print(f"{i:2d}. {password}") + + # Show strength for first password as example + if passwords: + display_password_strength(passwords[0], generator) + + except ValueError as e: + print(f"Error: {e}") + except Exception as e: + print(f"Unexpected error: {e}") + + +if __name__ == "__main__": + main() From f99c6ee47e5d97864da9ab92f1305ef6b10fe724 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 6 Oct 2025 21:09:04 -0400 Subject: [PATCH 2/3] Revert "Added password generator script and README" This reverts commit 3e8d3f7ab3c7241f3e56691467545cf78899ab23. reverted unnecessary changes to code --- Python/README.md | 154 --------------------------- Python/password_generator.py | 200 ----------------------------------- 2 files changed, 354 deletions(-) delete mode 100644 Python/README.md delete mode 100644 Python/password_generator.py diff --git a/Python/README.md b/Python/README.md deleted file mode 100644 index 9cfaa50..0000000 --- a/Python/README.md +++ /dev/null @@ -1,154 +0,0 @@ -# Password Generator - -A secure and customizable password generator written in Python. This tool allows you to create strong passwords with various character types and provides strength analysis for generated passwords. - -## Features - -- 🔒 **Cryptographically Secure**: Uses Python's `secrets` module for secure random generation -- ⚙️ **Customizable**: Choose password length and character types -- 📊 **Strength Analysis**: Get detailed feedback on password strength -- 🔢 **Batch Generation**: Generate multiple passwords at once -- 🖥️ **Dual Interface**: Both interactive and command-line modes -- 🎯 **Smart Defaults**: Ensures at least one character from each selected type - -## Installation - -### Prerequisites -- Python 3.6 or higher - -### Setup -1. **Clone or download** the script to your local machine -2. **Make executable** (optional): - ```bash - chmod +x password_generator.py - ``` - -## Usage - -### Interactive Mode (Recommended for beginners) - -Run the script without arguments to enter interactive mode: - -```bash -python3 password_generator.py -``` - -or if executable: - -```bash -./password_generator.py -``` - -You'll be prompted for: -- Password length (default: 12) -- Character types to include (lowercase, uppercase, digits, symbols) -- Number of passwords to generate (default: 1) - -### Command Line Mode - -For quick password generation or scripting: - -#### Basic Usage -```bash -# Generate one strong password with default settings (12 characters, all types) -python3 password_generator.py - -# Generate a 16-character password -python3 password_generator.py -l 16 - -# Generate 5 passwords -python3 password_generator.py -n 5 -``` - -#### Advanced Options -```bash -# Generate password without symbols -python3 password_generator.py --no-symbols - -# Generate numeric PIN (6 digits) -python3 password_generator.py -l 6 --no-lower --no-upper --no-symbols - -# Generate letters-only password -python3 password_generator.py --no-digits --no-symbols - -# Force interactive mode -python3 password_generator.py --interactive -``` - -### Command Line Arguments - -| Argument | Short | Description | Default | -|----------|-------|-------------|---------| -| `--length` | `-l` | Password length | 12 | -| `--number` | `-n` | Number of passwords to generate | 1 | -| `--no-lower` | | Exclude lowercase letters | Included | -| `--no-upper` | | Exclude uppercase letters | Included | -| `--no-digits` | | Exclude digits | Included | -| `--no-symbols` | | Exclude symbols | Included | -| `--interactive` | | Force interactive mode | Auto-detect | - -## Examples - -### Strong Password -```bash -python3 password_generator.py -l 16 -# Example output: Xk8#pL$2*mQ9!vR@ -``` - -### Multiple Passwords -```bash -python3 password_generator.py -n 3 -l 10 -``` -Output: -``` -Generated 3 passwords: - 1. aB3$fG8!kL - 2. pQ9@mN2#rT - 3. xY7!vW4$zU -``` - -### Application-Specific Passwords -```bash -# API Key style (letters and digits only) -python3 password_generator.py -l 32 --no-symbols - -# PIN code -python3 password_generator.py -l 6 --no-lower --no-upper --no-symbols - -# Memorable password (letters only) -python3 password_generator.py -l 14 --no-digits --no-symbols -``` - -## Password Strength Ratings - -The generator provides strength analysis based on: -- **Length**: Minimum 8 characters, good at 12+ -- **Character diversity**: Lowercase, uppercase, digits, symbols -- **Overall rating**: - - **Strong**: Meets 6+ criteria - - **Good**: Meets 4-5 criteria - - **Weak**: Meets 3 or fewer criteria - -## Security Notes - -- ✅ Uses cryptographically secure random number generation -- ✅ Ensures minimum character requirements are met -- ✅ Properly shuffles final passwords -- ✅ No passwords are stored or transmitted -- ⚠️ Generated passwords are displayed in terminal - clear your history if needed - -## Troubleshooting - -### Common Issues - -**"Password length too short" error** -- Solution: Increase password length or reduce character type requirements - -**No characters selected** -- Solution: Enable at least one character type (lowercase, uppercase, digits, or symbols) - -**Permission denied** -- Solution: Make script executable with `chmod +x password_generator.py` - -### Requirements -- Python 3.6+ (for `secrets` module and type hints) diff --git a/Python/password_generator.py b/Python/password_generator.py deleted file mode 100644 index 797db9a..0000000 --- a/Python/password_generator.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/python3 - -import random -import string -import secrets -import argparse -from typing import List, Dict - - -class PasswordGenerator: - def __init__(self): - self.character_sets = { - 'lowercase': string.ascii_lowercase, - 'uppercase': string.ascii_uppercase, - 'digits': string.digits, - 'symbols': string.punctuation - } - - def generate_password(self, length: int = 12, **requirements) -> str: - """ - Generate a password with specified requirements - - Args: - length: Length of the password - requirements: Boolean flags for character types - - lowercase: Include lowercase letters - - uppercase: Include uppercase letters - - digits: Include digits - - symbols: Include symbols - - Returns: - Generated password string - """ - # Default requirements if none specified - if not any(requirements.values()): - requirements = { - 'lowercase': True, - 'uppercase': True, - 'digits': True, - 'symbols': True - } - - # Build character pool based on requirements - character_pool = "" - required_chars = [] - - for char_type, include in requirements.items(): - if include and char_type in self.character_sets: - chars = self.character_sets[char_type] - character_pool += chars - # Add at least one character from each required type - required_chars.append(secrets.choice(chars)) - - if not character_pool: - raise ValueError("At least one character type must be selected") - - # Calculate remaining characters needed - remaining_length = length - len(required_chars) - if remaining_length < 0: - raise ValueError(f"Password length too short for required character types. Minimum: {len(required_chars)}") - - # Generate remaining characters - additional_chars = [secrets.choice(character_pool) for _ in range(remaining_length)] - - # Combine and shuffle - all_chars = required_chars + additional_chars - secrets.SystemRandom().shuffle(all_chars) - - return ''.join(all_chars) - - def generate_multiple_passwords(self, count: int = 5, **kwargs) -> List[str]: - """Generate multiple passwords with the same settings""" - return [self.generate_password(**kwargs) for _ in range(count)] - - def check_password_strength(self, password: str) -> Dict[str, bool]: - """Check the strength of a password""" - return { - 'has_lowercase': any(c.islower() for c in password), - 'has_uppercase': any(c.isupper() for c in password), - 'has_digit': any(c.isdigit() for c in password), - 'has_symbol': any(c in string.punctuation for c in password), - 'min_length': len(password) >= 8, - 'good_length': len(password) >= 12 - } - - -def get_user_preferences(): - """Get password generation preferences from user input""" - print("=== Password Generator ===") - - # Get password length - while True: - try: - length = int(input("Enter password length (default 12): ") or "12") - if length < 4: - print("Password length must be at least 4 characters.") - continue - break - except ValueError: - print("Please enter a valid number.") - - # Get character type preferences - print("\nSelect character types to include:") - requirements = { - 'lowercase': input("Include lowercase letters? (y/n, default y): ").lower() != 'n', - 'uppercase': input("Include uppercase letters? (y/n, default y): ").lower() != 'n', - 'digits': input("Include digits? (y/n, default y): ").lower() != 'n', - 'symbols': input("Include symbols? (y/n, default y): ").lower() != 'n' - } - - # Get number of passwords to generate - while True: - try: - count = int(input("\nHow many passwords to generate? (default 1): ") or "1") - if count < 1: - print("Please enter at least 1.") - continue - break - except ValueError: - print("Please enter a valid number.") - - return length, requirements, count - - -def display_password_strength(password: str, generator: PasswordGenerator): - """Display strength analysis for a password""" - strength = generator.check_password_strength(password) - - print(f"\nPassword: {password}") - print("Strength Analysis:") - print(f"Length: {len(password)} characters") - print(f"Lowercase letters: {'Yes' if strength['has_lowercase'] else 'No'}") - print(f"Uppercase letters: {'Yes' if strength['has_uppercase'] else 'No'}") - print(f"Digits: {'Yes' if strength['has_digit'] else 'No'}") - print(f"Symbols: {'Yes' if strength['has_symbol'] else 'No'}") - - # Overall strength rating - criteria_met = sum(strength.values()) - if criteria_met >= 6: - rating = "Strong" - elif criteria_met >= 4: - rating = "Good" - else: - rating = "Weak" - - print(f"Overall Rating: {rating}") - - -def main(): - """Main function with command line interface""" - parser = argparse.ArgumentParser(description='Generate secure passwords') - parser.add_argument('-l', '--length', type=int, default=12, help='Password length') - parser.add_argument('-n', '--number', type=int, default=1, help='Number of passwords') - parser.add_argument('--no-lower', action='store_true', help='Exclude lowercase letters') - parser.add_argument('--no-upper', action='store_true', help='Exclude uppercase letters') - parser.add_argument('--no-digits', action='store_true', help='Exclude digits') - parser.add_argument('--no-symbols', action='store_true', help='Exclude symbols') - parser.add_argument('--interactive', action='store_true', help='Use interactive mode') - - args = parser.parse_args() - - generator = PasswordGenerator() - - if args.interactive or not any(vars(args).values()): - # Interactive mode - length, requirements, count = get_user_preferences() - else: - # Command line mode - length = args.length - count = args.number - requirements = { - 'lowercase': not args.no_lower, - 'uppercase': not args.no_upper, - 'digits': not args.no_digits, - 'symbols': not args.no_symbols - } - - try: - # Generate passwords - if count == 1: - password = generator.generate_password(length=length, **requirements) - display_password_strength(password, generator) - else: - passwords = generator.generate_multiple_passwords(count=count, length=length, **requirements) - print(f"\nGenerated {count} passwords:") - for i, password in enumerate(passwords, 1): - print(f"{i:2d}. {password}") - - # Show strength for first password as example - if passwords: - display_password_strength(passwords[0], generator) - - except ValueError as e: - print(f"Error: {e}") - except Exception as e: - print(f"Unexpected error: {e}") - - -if __name__ == "__main__": - main() From 7223af099fa994ac5a8d6afbabf887ed3c1550d4 Mon Sep 17 00:00:00 2001 From: Jack Date: Fri, 31 Oct 2025 22:31:59 -0400 Subject: [PATCH 3/3] Web Scraper for Articles --- Python/web_scraper/README.md | 121 ++++++++++++++++++++++++++++++ Python/web_scraper/web_scraper.py | 115 ++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 Python/web_scraper/README.md create mode 100644 Python/web_scraper/web_scraper.py diff --git a/Python/web_scraper/README.md b/Python/web_scraper/README.md new file mode 100644 index 0000000..0525c2b --- /dev/null +++ b/Python/web_scraper/README.md @@ -0,0 +1,121 @@ +# Web Scraper + +A Python command-line tool for scraping news articles from websites using the `newspaper3k` library. The tool can extract individual articles or all articles from a news website and export them to JSON or CSV format. + +## Features + +- **Single Article Scraping**: Extract content from a specific article URL +- **Bulk Article Scraping**: Scrape all articles linked from a news website homepage +- **Multiple Export Formats**: Export data as JSON or CSV +- **Custom File Names**: Specify custom output file names +- **Article Metadata**: Extract title, authors, publication date, content, and URL + +## Installation + +1. Ensure you have Python 3.6+ installed +2. Install the required dependencies: + +```bash +pip install newspaper3k +``` + +## Usage + +### Basic Single Article Scraping + +```bash +python web_scraper.py "https://example.com/news-article" +``` + +This will create a `news.json` file with the scraped article data. + +### Scrape All Articles from a News Site + +```bash +python web_scraper.py "https://example-news.com" --all-articles +``` + +### Export to CSV Format + +```bash +python web_scraper.py "https://example.com/article" --csv-format +``` + +### Custom Output File Name + +```bash +python web_scraper.py "https://example.com/article" --file my_articles +``` + +### Combine Options + +```bash +# Scrape all articles and export as CSV with custom filename +python web_scraper.py "https://example-news.com" -a -csv -f my_data +``` + +## Command Line Arguments + +| Argument | Short | Description | Default | +|----------|-------|-------------|---------| +| `url` | - | URL of the webpage to scrape (required) | - | +| `--file` | `-f` | Custom output filename | `news` | +| `--csv-format` | `-csv` | Export to CSV instead of JSON | `False` | +| `--all-articles` | `-a` | Scrape all articles from the site | `False` | + +## Output Format + +### JSON Output +```json +[ + { + "title": "Article Title", + "authors": ["Author One", "Author Two"], + "publish_date": "2023-10-15 14:30:00", + "text": "Full article content...", + "url": "https://example.com/article" + } +] +``` + +### CSV Output +The CSV file will contain columns for: +- `title` +- `authors` (as a string representation of the list) +- `publish_date` +- `text` +- `url` + +## Examples + +1. **Scrape a single article to JSON:** + ```bash + python web_scraper.py "https://www.bbc.com/news/world-us-canada-12345678" + ``` + +2. **Scrape all articles from CNN and export as CSV:** + ```bash + python web_scraper.py "https://www.cnn.com" -a -csv -f cnn_articles + ``` + +3. **Scrape with custom JSON filename:** + ```bash + python web_scraper.py "https://example.com/article" -f my_article_data + ``` + +## Notes + +- The tool uses the `newspaper3k` library which may not work with all websites, especially those with heavy JavaScript rendering or anti-scraping measures +- Some news sites may block automated scraping attempts +- The quality of extracted content depends on the website's structure and the `newspaper3k` library's parsing capabilities +- For sites with many articles, using `--all-articles` may take considerable time + +## Error Handling + +- If scraping fails, the tool will display an error message +- Empty results will be indicated with appropriate messages +- Network issues and parsing errors are caught and reported + +## License + +This tool is provided for educational and personal use. Please respect website terms of service and robots.txt files when scraping. \ No newline at end of file diff --git a/Python/web_scraper/web_scraper.py b/Python/web_scraper/web_scraper.py new file mode 100644 index 0000000..44edd4d --- /dev/null +++ b/Python/web_scraper/web_scraper.py @@ -0,0 +1,115 @@ +#!/usr/bin/python3 + +import csv +import newspaper +import argparse +import json +from datetime import datetime + +class WebScraper: + def __init__(self, url, file_name='news', export_format='json'): + self.url = url + + if export_format not in ['json', 'csv']: + raise ValueError('Export format must be either json or csv.') + + self.export_format = export_format + + if export_format == 'json' and not file_name.endswith('.json'): + self.FILE_NAME = file_name + '.json' + elif export_format == 'csv' and not file_name.endswith('.csv'): + self.FILE_NAME = file_name + '.csv' + else: + self.FILE_NAME = file_name + + def export_to_JSON(self, articles): + with open(self.FILE_NAME, 'w') as f: + articles_dict = [article for article in articles] + json.dump(articles_dict, f, indent=2) + + def export_to_CSV(self, articles): + with open(self.FILE_NAME, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=['title', 'authors', 'publish_date', 'text', 'url']) + writer.writeheader() + for article in articles: + writer.writerow(article) + + def get_one_article(self, url=None): + target_url = url or self.url + try: + article = newspaper.Article(target_url) + article.download() + article.parse() + summary = { + 'title': article.title or "No title found", + 'authors': article.authors or ["Unknown author"], + 'publish_date': article.publish_date.strftime('%Y-%m-%d %H:%M:%S') if article.publish_date else None, + 'text': article.text or "No content found", + 'url': target_url + } + return summary + + except Exception as e: + print(f'Error scraping {target_url}: {e}') + return None + + def get_all_articles(self): + try: + summaries = [] + paper = newspaper.build(self.url, memoize_articles=False) + for art in paper.articles: + summary = self.get_one_article(art.url) + if summary: + summaries.append(summary) + return summaries + + except Exception as e: + print(f'Error building newspaper from {self.url}: {e}') + return [] + + +def main(): + parser = argparse.ArgumentParser(description='Web Scraper for News') + parser.add_argument('url', help='URL of the webpage to scrape') + parser.add_argument('--file', '-f', default='news', + help='Custom output file (default: news.json or news.csv)') + parser.add_argument('--csv-format', '-csv', action='store_true', + help='Export to CSV format instead of JSON format') + parser.add_argument('--all-articles', '-a', action='store_true', + help='Get all articles linked to URL instead of only the article from the URL itself') + + args = parser.parse_args() + + export_format = 'csv' if args.csv_format else 'json' + + try: + web_scraper = WebScraper( + url=args.url, + file_name=args.file, + export_format=export_format + ) + + if args.all_articles: + articles = web_scraper.get_all_articles() + else: + single_article = web_scraper.get_one_article() + articles = [single_article] if single_article else [] + + article_count = len(articles) + + if articles: + if export_format == 'json': + web_scraper.export_to_JSON(articles) + else: + web_scraper.export_to_CSV(articles) + + print(f'Successfully exported {article_count} articles to {web_scraper.FILE_NAME}') + else: + print('No articles found to export.') + + except Exception as e: + print(f'Error: {e}') + + +if __name__ == '__main__': + main() \ No newline at end of file