Updated

saisandeepramavath · saisandeepramavath · commit 67c5f1cc0826 · 2025-10-27T20:09:31.000-04:00
diff --git a/pandas/tests/mocking/test_database_io.py b/pandas/tests/mocking/test_database_io.py
@@ -0,0 +1,147 @@
+"""
+Unit Testing II - Mocking & Stubbing: Database I/O Operations
+Nithikesh Reddy
+"""
+import pytest
+import pandas as pd
+import numpy as np
+
+
+class TestDatabaseIOMocking:
+    """Test database I/O operations using mocks (FR-5)"""
+    
+    def test_read_sql_basic(self, monkeypatch):
+        """
+        Test basic SQL read operation with mocked database connection
+        
+        Test Oracle (FR-5): Reading a SQL query that returns 100 rows and 3 columns
+        should create a DataFrame with 100 rows and 3 columns
+        
+        Rationale: Database connections are external dependencies; mocking allows
+        testing SQL functionality without a real database server
+        """
+        # Setup: Mock data that would come from database
+        expected_data = pd.DataFrame({
+            'id': range(100),
+            'name': [f'user_{i}' for i in range(100)],
+            'value': np.random.rand(100)
+        })
+        
+        def mock_read_sql(query, con, **kwargs):
+            return expected_data
+        
+        # Apply mock
+        monkeypatch.setattr(pd, 'read_sql', mock_read_sql)
+        
+        # Execute: Read from "database"
+        result = pd.read_sql("SELECT * FROM users", con=None)
+        
+        # Verify Test Oracle: Shape is (100, 3)
+        assert result.shape == (100, 3), f"Expected (100, 3), got {result.shape}"
+        assert list(result.columns) == ['id', 'name', 'value']
+        assert len(result) == 100
+    
+    def test_read_sql_empty_result(self, monkeypatch):
+        """
+        Test SQL query returning empty result set
+        
+        Rationale: Empty query results are common; pandas should handle
+        them gracefully with an empty DataFrame
+        """
+        # Setup: Mock empty result
+        empty_data = pd.DataFrame(columns=['id', 'name', 'value'])
+        
+        def mock_read_sql(query, con, **kwargs):
+            return empty_data
+        
+        monkeypatch.setattr(pd, 'read_sql', mock_read_sql)
+        
+        # Execute
+        result = pd.read_sql("SELECT * FROM empty_table", con=None)
+        
+        # Verify: Empty DataFrame with correct columns
+        assert len(result) == 0
+        assert list(result.columns) == ['id', 'name', 'value']
+        assert isinstance(result, pd.DataFrame)
+    
+    def test_read_sql_with_parameters(self, monkeypatch):
+        """
+        Test parameterized SQL queries
+        
+        Rationale: Parameterized queries prevent SQL injection; verify pandas
+        handles parameter passing correctly
+        """
+        # Setup: Mock filtered data
+        filtered_data = pd.DataFrame({
+            'id': [5],
+            'name': ['user_5'],
+            'value': [0.5]
+        })
+        
+        def mock_read_sql(query, con, params=None, **kwargs):
+            if params and params.get('user_id') == 5:
+                return filtered_data
+            return pd.DataFrame()
+        
+        monkeypatch.setattr(pd, 'read_sql', mock_read_sql)
+        
+        # Execute: Parameterized query
+        result = pd.read_sql(
+            "SELECT * FROM users WHERE id = :user_id",
+            con=None,
+            params={'user_id': 5}
+        )
+        
+        # Verify: Filtered result
+        assert len(result) == 1
+        assert result['id'].iloc[0] == 5
+        
+    def test_read_sql_dtype_handling(self, monkeypatch):
+        """
+        Test SQL result data type conversion
+        
+        Test Oracle (FR-5): SQL INTEGER should convert to int64, VARCHAR to string,
+        DECIMAL to float64 in the resulting DataFrame
+        
+        Rationale: Type mapping from SQL to pandas is critical for correctness
+        """
+        # Setup: Mock with specific dtypes (using dict to avoid dtype conversion)
+        typed_data = pd.DataFrame({
+            'int_col': [1, 2, 3],
+            'str_col': ['a', 'b', 'c'],
+            'float_col': [1.1, 2.2, 3.3]
+        })
+        # Explicitly set dtypes to ensure consistency
+        typed_data['int_col'] = typed_data['int_col'].astype('int64')
+        typed_data['float_col'] = typed_data['float_col'].astype('float64')
+        
+        def mock_read_sql(query, con, **kwargs):
+            return typed_data
+        
+        monkeypatch.setattr(pd, 'read_sql', mock_read_sql)
+        
+        # Execute
+        result = pd.read_sql("SELECT * FROM typed_table", con=None)
+        
+        # Verify Test Oracle: Correct data types
+        assert result['int_col'].dtype == np.int64
+        # In pandas 3.0, strings may use string dtype instead of object
+        assert result['str_col'].dtype in [object, 'string', pd.StringDtype()]
+        assert result['float_col'].dtype == np.float64
+        
+    def test_read_sql_connection_error_handling(self, monkeypatch):
+        """
+        Test error handling when database connection fails
+        
+        Rationale: Connection failures are common in production; pandas should
+        handle them with clear error messages
+        """
+        # Setup: Mock to raise connection error
+        def mock_read_sql(query, con, **kwargs):
+            raise ConnectionError("Unable to connect to database")
+        
+        monkeypatch.setattr(pd, 'read_sql', mock_read_sql)
+        
+        # Execute & Verify: Should raise ConnectionError
+        with pytest.raises(ConnectionError, match="Unable to connect"):
+            pd.read_sql("SELECT * FROM users", con=None)
diff --git a/pandas/tests/mocking/test_datetime.py b/pandas/tests/mocking/test_datetime.py
@@ -1,12 +1,9 @@
 """
 Unit Testing II - Mocking & Stubbing: DateTime Operations
-Student: Malikarjuna
-Requirement: FR-6 - Intelligent time-series functionality (resampling, rolling, frequency conversion)
+Malikarjuna
+Requirement:  Intelligent time-series functionality (resampling, rolling, frequency conversion)
 
-This module tests pandas time-series operations using mocks to control
-time-dependent behavior and avoid relying on system clock.
 
-Following pandas test conventions: using pytest-style tests with monkeypatch.
 """
 
 import pytest
@@ -22,8 +19,7 @@ def test_timestamp_now_mocked(self, monkeypatch):
         """
         Test current timestamp creation with controlled time
         
-        Rationale: System clock is non-deterministic; mocking ensures
-        reproducible test results
+        
         """
         # Setup: Fix current time to specific moment
         fixed_time = pd.Timestamp('2024-01-15 12:00:00')
@@ -46,11 +42,10 @@ def test_date_range_generation(self, monkeypatch):
         """
         Test date range generation for time-series
         
-        Test Oracle (FR-6): Creating a date range for 365 days at daily frequency
+        Test Oracle : Creating a date range for 365 days at daily frequency
         should produce exactly 365 timestamps
         
-        Rationale: Date range generation can be tested without waiting for
-        actual date calculations
+        
         """
         # Setup: Mock date range
         expected_dates = pd.date_range('2023-01-01', periods=365, freq='D')
@@ -76,8 +71,7 @@ def test_time_series_resampling(self, monkeypatch):
         """
         Test time-series resampling operation (FR-6)
         
-        Rationale: Resampling is core time-series operation; mocking allows
-        testing without actual aggregation computation
+        
         """
         # Setup: Create time-series data
         dates = pd.date_range('2023-01-01', periods=100, freq='h')
@@ -112,11 +106,10 @@ def test_rolling_window_operations(self, monkeypatch):
         """
         Test rolling window calculations (FR-6)
         
-        Test Oracle (FR-6): Rolling mean with window=7 on 30-day data should
+        Test Oracle: Rolling mean with window=7 on 30-day data should
         produce 30 values with first 6 as NaN
         
-        Rationale: Rolling operations are computationally intensive; mocking
-        tests logic without actual window calculations
+       
         """
         # Setup: Time-series data
         dates = pd.date_range('2023-01-01', periods=30, freq='D')
@@ -152,8 +145,7 @@ def test_datetime_parsing_with_format(self, monkeypatch):
         """
         Test datetime string parsing with custom format
         
-        Rationale: Datetime parsing depends on locale/timezone; mocking
-        ensures consistent parsing behavior
+    
         """
         # Setup: Mock parsing of custom date format
         date_strings = ['2023-01-15', '2023-02-20', '2023-03-25']
diff --git a/pandas/tests/mocking/test_filesystem_io.py b/pandas/tests/mocking/test_filesystem_io.py
@@ -1,13 +1,9 @@
 """
 Unit Testing II - Mocking & Stubbing: File System I/O Operations
-Student: Sandeep
-Requirement: FR-5 - Loading data from flat files (CSV, Excel, HDF5)
+Sandeep
+Requirement:  Loading data from flat files (CSV, Excel, HDF5)
 
-This module tests pandas file I/O functionality using mocks to avoid
-requiring actual file system operations. Tests verify pandas correctly handles
-file parsing without creating real files.
 
-Following pandas test conventions: using pytest-style tests with monkeypatch.
 """
 
 import pytest
@@ -25,8 +21,7 @@ def test_read_csv_basic(self, monkeypatch):
         Test Oracle (FR-5): Reading a CSV file containing 100 rows and 5 columns
         should create a DataFrame with 100 rows and 5 columns
         
-        Rationale: File I/O is slow; mocking allows testing CSV parsing logic
-        without actual file creation
+    
         """
         # Setup: Mock CSV data (100 rows, 5 columns)
         expected_data = pd.DataFrame({
@@ -53,8 +48,7 @@ def test_read_csv_with_delimiter(self, monkeypatch):
         """
         Test CSV read with custom delimiter (tab-separated, pipe-separated)
         
-        Rationale: Delimited files come in various formats; verify pandas
-        handles custom delimiters correctly
+    
         """
         # Setup: Mock TSV data
         tsv_data = pd.DataFrame({
@@ -81,8 +75,7 @@ def test_read_excel_basic(self, monkeypatch):
         """
         Test Excel file read operation
         
-        Rationale: Excel files require xlrd/openpyxl; mocking avoids
-        dependency on external libraries
+    
         """
         # Setup: Mock Excel data
         excel_data = pd.DataFrame({
@@ -110,8 +103,7 @@ def test_read_hdf_basic(self, monkeypatch):
         
         Test Oracle (NFR-3): System should load data using ultrafast HDF5 format
         
-        Rationale: HDF5 format is for high-performance storage; verify
-        pandas handles HDF5 correctly without requiring pytables
+
         """
         # Setup: Mock HDF5 data
         hdf_data = pd.DataFrame({
@@ -137,8 +129,6 @@ def test_csv_file_not_found_handling(self, monkeypatch):
         """
         Test error handling when CSV file doesn't exist
         
-        Rationale: File not found is common error; pandas should handle
-        with clear error message
         """
         # Setup: Mock to raise FileNotFoundError
         def mock_read_csv(filepath, **kwargs):