|
| 1 | +""" |
| 2 | +System-level black-box tests for pandas end-to-end workflows. |
| 3 | +
|
| 4 | +These tests validate complete user workflows through public APIs only, |
| 5 | +treating the system as a black box without referencing internal implementation. |
| 6 | +
|
| 7 | +Test Categories: |
| 8 | +Data Loading and Export Workflow (Sandeep Ramavath) |
| 9 | +""" |
| 10 | +import os |
| 11 | +import tempfile |
| 12 | +import numpy as np |
| 13 | +import pandas as pd |
| 14 | +import pytest |
| 15 | + |
| 16 | + |
| 17 | +class TestDataIOWorkflow: |
| 18 | + """ |
| 19 | + System tests by Sandeep Ramavath. |
| 20 | + Validates end-to-end data import/export workflows through public API. |
| 21 | + """ |
| 22 | + |
| 23 | + def test_csv_roundtrip_workflow(self, tmp_path): |
| 24 | + """ |
| 25 | + Test Case: CSV Data Import-Export Workflow |
| 26 | + |
| 27 | + Pre-conditions: |
| 28 | + - Temporary directory available for file operations |
| 29 | + - pandas library installed and functional |
| 30 | + |
| 31 | + Test Steps: |
| 32 | + 1. Create DataFrame with mixed data types using public API |
| 33 | + 2. Export DataFrame to CSV file |
| 34 | + 3. Import CSV file back into new DataFrame |
| 35 | + 4. Verify data integrity and type preservation |
| 36 | + |
| 37 | + Expected Results: |
| 38 | + - CSV file created successfully |
| 39 | + - Data round-trips without loss |
| 40 | + - Numeric, string, and datetime types preserved |
| 41 | + - All values match original dataset |
| 42 | + """ |
| 43 | + # Step 1: Create DataFrame with mixed types through public API |
| 44 | + original_data = pd.DataFrame({ |
| 45 | + 'id': [1, 2, 3, 4, 5], |
| 46 | + 'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'], |
| 47 | + 'score': [95.5, 87.3, 92.1, 88.7, 91.4], |
| 48 | + 'date': pd.date_range('2024-01-01', periods=5), |
| 49 | + 'active': [True, False, True, True, False] |
| 50 | + }) |
| 51 | + |
| 52 | + # Step 2: Export to CSV using public API |
| 53 | + csv_path = tmp_path / "test_data.csv" |
| 54 | + original_data.to_csv(csv_path, index=False) |
| 55 | + |
| 56 | + # Verify file exists |
| 57 | + assert csv_path.exists(), "CSV file should be created" |
| 58 | + |
| 59 | + # Step 3: Import from CSV using public API |
| 60 | + loaded_data = pd.read_csv(csv_path, parse_dates=['date']) |
| 61 | + |
| 62 | + # Step 4: Verify data integrity |
| 63 | + assert len(loaded_data) == 5, "Should load 5 rows" |
| 64 | + assert list(loaded_data.columns) == ['id', 'name', 'score', 'date', 'active'] |
| 65 | + assert loaded_data['id'].tolist() == [1, 2, 3, 4, 5] |
| 66 | + assert loaded_data['name'].tolist() == ['Alice', 'Bob', 'Charlie', 'David', 'Eve'] |
| 67 | + assert loaded_data['score'].tolist() == [95.5, 87.3, 92.1, 88.7, 91.4] |
| 68 | + assert loaded_data['active'].tolist() == [True, False, True, True, False] |
| 69 | + |
| 70 | + # Verify datetime parsing |
| 71 | + assert pd.api.types.is_datetime64_any_dtype(loaded_data['date']) |
| 72 | + |
| 73 | + |
0 commit comments