Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ac5d1d3
added courseProjectCode and courseProjectDocs folder
saisandeepramavath Sep 18, 2025
603f06f
Added project proposal and quality metrics
saisandeepramavath Sep 23, 2025
89b7e90
Baseline builds & test
saisandeepramavath Oct 7, 2025
2362942
Extended test cases
saisandeepramavath Oct 7, 2025
70b1db9
Update course details in README for SWEN 777
saisandeepramavath Oct 22, 2025
942d94e
Remove success criteria and next steps from README
saisandeepramavath Oct 22, 2025
b3dc9d7
Merge branch 'main' into sandeeep
saisandeepramavath Oct 22, 2025
5a84dbb
Mock & Stubbing done
saisandeepramavath Oct 27, 2025
67c5f1c
Updated
saisandeepramavath Oct 28, 2025
5cb6057
Updated
saisandeepramavath Oct 28, 2025
c0c12fb
Mutation testing done
saisandeepramavath Nov 3, 2025
6b3e10f
found one code smell and fixed that
nithikeshreddy Nov 9, 2025
4d89314
Fixed another code smell and added report.md
saisandeepramavath Nov 10, 2025
c953dd2
fixed one code smell and updated report.md
MALLI7622 Nov 10, 2025
cb40e31
Merge pull request #4 from saisandeepramavath/nithikesh
saisandeepramavath Nov 20, 2025
1d4cd7f
Added integration testcase
Nov 25, 2025
a3e00de
Added README and report
Nov 26, 2025
e4e2d78
Added integration tests
nithikeshreddy Nov 26, 2025
141ecb5
Updated README and report
nithikeshreddy Nov 26, 2025
4551d14
Added integration tests
MALLI7622 Dec 1, 2025
ac63a5a
Updated report
MALLI7622 Dec 1, 2025
e11f306
Updated README
MALLI7622 Dec 1, 2025
332e06f
Updated README
MALLI7622 Dec 1, 2025
635eba3
Merge pull request #5 from saisandeepramavath/nithikesh
saisandeepramavath Dec 1, 2025
3fd39c0
Added system testing Data Loading and Export Workflow
Dec 1, 2025
9694b3a
Added system test caseData Cleaning and Transformation Workflow
nithikeshreddy Dec 1, 2025
607d0b4
y
nithikeshreddy Dec 4, 2025
ed79a74
Added system test Aggregation and Analysis Workflow
MALLI7622 Dec 4, 2025
937ff36
Added READM and report
MALLI7622 Dec 4, 2025
bc9c446
Merge branch 'main' into nithikesh
MALLI7622 Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,7 @@ doc/source/savefig/
# Pyodide/WASM related files #
##############################
/.pyodide-xbuildenv-*



venv
224 changes: 224 additions & 0 deletions courseProjectCode/Metrics/metrics_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@


import ast
import json
import os
import re
from typing import Dict, List, Tuple


ROOT_DIR = os.getcwd()

SKIP_DIRS = {
"node_modules",
"courseProjectDocs",
"courseProjectCode",
".git",
"__pycache__",
}

SOURCE_EXTENSIONS = {".py"}


def count_python_functions(file_content: str) -> Tuple[int, List[Tuple[int, int]]]:
try:
tree = ast.parse(file_content)
except SyntaxError:
return 0, []

function_spans = []
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
# end_lineno is available in Python 3.8+
start_line = getattr(node, "lineno", None)
end_line = getattr(node, "end_lineno", None)
if start_line is not None and end_line is not None:
function_spans.append((start_line, end_line))
return len(function_spans), function_spans


def count_js_functions(file_content: str) -> Tuple[int, List[Tuple[int, int]]]:
lines = file_content.splitlines()
count = 0
spans = []
for idx, line in enumerate(lines, start=1):
stripped = line.strip()
if stripped.startswith("//") or stripped.startswith("/*"):
continue
if re.search(r"\bfunction\b", stripped) or re.search(r"=>", stripped):
count += 1
spans.append((idx, idx))
return count, spans


def approximate_cyclomatic_complexity(lines: List[str]) -> int:
complexity = 1 # Base complexity
decision_keywords = [
"if ", "for ", "while ", "case ", "switch ", "catch ", "&&", "||", "?",
"elif ", "except ",
]
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
continue
for keyword in decision_keywords:
if keyword in stripped:
complexity += 1
break
return complexity


def analyse_file(filepath: str) -> Dict[str, object]:
try:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
except (OSError, UnicodeDecodeError):
return {}

lines = content.splitlines()
code_lines = 0
comment_lines = 0
in_block_comment = False

for line in lines:
stripped = line.strip()
if not stripped:
continue
if in_block_comment:
comment_lines += 1
if "*/" in stripped:
in_block_comment = False
continue
if stripped.startswith("/*"):
comment_lines += 1
if "*/" not in stripped:
in_block_comment = True
continue
if stripped.startswith("#") or stripped.startswith("//"):
comment_lines += 1
continue
if stripped.startswith("\"\"\""):
comment_lines += 1
continue
code_lines += 1

ext = os.path.splitext(filepath)[1]
functions_count = 0
function_spans: List[Tuple[int, int]] = []
if ext == ".py":
functions_count, function_spans = count_python_functions(content)
elif ext == ".js":
functions_count, function_spans = count_js_functions(content)

total_function_lines = 0
for start, end in function_spans:
if end >= start:
total_function_lines += end - start + 1
average_function_length = (
(total_function_lines / functions_count) if functions_count > 0 else 0
)

complexity = approximate_cyclomatic_complexity(lines)

parts = filepath.lower().split(os.sep)
is_test_file = any(
part.startswith("test") for part in parts if part not in {"", "."}
)

test_functions_count = 0
if is_test_file:
if ext == ".py":
try:
tree = ast.parse(content)
except SyntaxError:
tree = None
if tree is not None:
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
if node.name.startswith("test"):
test_functions_count += 1
elif ext == ".js":
test_functions_count = len(re.findall(r"\b(it|describe)\s*\(", content))

return {
"file": filepath,
"lines_of_code": code_lines,
"comment_lines": comment_lines,
"comment_ratio": (comment_lines / code_lines) if code_lines > 0 else 0,
"functions": functions_count,
"average_function_length": average_function_length,
"cyclomatic_complexity": complexity,
"is_test_file": is_test_file,
"test_functions": test_functions_count,
}


def walk_repository(root_dir: str) -> List[Dict[str, object]]:
results = []
for dirpath, dirnames, filenames in os.walk(root_dir):
# Remove skipped directories from traversal
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
for filename in filenames:
ext = os.path.splitext(filename)[1]
if ext in SOURCE_EXTENSIONS:
filepath = os.path.join(dirpath, filename)
metrics = analyse_file(filepath)
if metrics:
results.append(metrics)
return results


def aggregate_metrics(results: List[Dict[str, object]]) -> Dict[str, object]:

total_code_lines = sum(item["lines_of_code"] for item in results)
total_comment_lines = sum(item["comment_lines"] for item in results)
total_functions = sum(item["functions"] for item in results)
total_complexity = sum(item["cyclomatic_complexity"] for item in results)
total_files = len(results)

total_function_lines = sum(
item["average_function_length"] * item["functions"] for item in results
)
average_function_length = (
total_function_lines / total_functions if total_functions > 0 else 0
)
comment_ratio = (
(total_comment_lines / total_code_lines) if total_code_lines > 0 else 0
)

test_files = [item for item in results if item["is_test_file"]]
total_test_files = len(test_files)
total_test_lines = sum(item["lines_of_code"] for item in test_files)
total_test_functions = sum(item["test_functions"] for item in test_files)
test_ratio = (
(total_test_lines / total_code_lines) if total_code_lines > 0 else 0
)

aggregated = {
"total_files": total_files,
"total_code_lines": total_code_lines,
"total_comment_lines": total_comment_lines,
"comment_ratio": comment_ratio,
"total_functions": total_functions,
"average_function_length": average_function_length,
"total_cyclomatic_complexity": total_complexity,
"total_test_files": total_test_files,
"total_test_lines": total_test_lines,
"total_test_functions": total_test_functions,
"test_ratio": test_ratio,
}
return aggregated


def main() -> None:
results = walk_repository(ROOT_DIR)
aggregated = aggregate_metrics(results)
report = {
"files": results,
"summary": aggregated,
}
print(json.dumps(report, indent=2))


if __name__ == "__main__":
main()
140 changes: 140 additions & 0 deletions courseProjectCode/project-proposal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Project Proposal

## Project Overview

Our course project aims to build a lightweight data analysis library that
mimics essential features of the pandas ecosystem. The library will
provide tabular data structures (similar to DataFrame and Series) and
support common operations needed by scientists and engineers working
with structured data. Major functional goals include:

- **Handling missing data:** The system should represent missing values as
NaN, NA or NaT and propagate them through computations. This
capability simplifies data cleaning and statistical analysis by
preventing silent errors software.com.

- **Size mutability:** Users should be able to insert or delete columns
and rows in data structures. Dynamic resizing is central to
interactive analysis workflows where the shape of a table evolves as
new information becomes available raw.githubusercontent.com.

- **Automatic and explicit data alignment:** When performing
arithmetic or merging operations, the system will align data on
labels or allow users to opt out of alignment entirely. Proper
alignment prevents accidental mismatches and promotes reproducible
results raw.githubusercontent.com.

- **Flexible group-by operations:** The library should implement
split–apply–combine patterns for aggregation, transformation, and
filtering so that users can summarise data by categories with a
single fluent expression raw.githubusercontent.com.

- **Robust I/O tooling:** Data structures must load from and save to
common file formats (CSV, Excel) and efficiently persist to
high-performance formats such as HDF5 raw.githubusercontent.com.

- **Time-series functionality:** Operations like date-range generation,
frequency conversion, moving-window statistics and date shifting will
be built in so that time-indexed data can be analysed without
external libraries raw.githubusercontent.com.

In addition to these functional requirements, the project emphasises
non-functional qualities such as performance, flexibility and
expressive APIs. The goal is to provide an intuitive open-source tool
that researchers can use to analyse data without sacrificing speed or
power raw.githubusercontent.com.

---

## Key Quality Metrics

To ensure that the implementation is maintainable and testable, we will
track several quality metrics throughout the project lifecycle. The
metrics were selected based on guidance from software engineering
literature and industry best practices.

### Maintainability metrics

- **Maintainability index (MI):** Visual Studio defines an index from
0 to 100 that summarises the ease of maintaining a piece of code.
Higher values indicate more maintainable code, with scores above
20 considered “good,” 10–19 “moderate” and below 10 “poor”
learn.microsoft.com.
MI combines several measurements such as cyclomatic complexity,
depth of inheritance and class coupling. Although we do not
compute MI directly, we monitor its constituent components to track
trends over time.

- **Cyclomatic complexity:** This measures the number of linearly
independent paths through a program. Each decision point (e.g.,
if, for, while) adds one to the count. Higher complexity
indicates more potential execution paths and requires more tests to
achieve full coverage learn.microsoft.com. Our metrics script
approximates cyclomatic complexity by scanning for decision
keywords, providing a reproducible indicator of structural
complexity.

- **Comment-to-code ratio:** The number of comment lines divided by
the number of executable lines software.com. Comments
capture design assumptions, edge cases and rationale that are not
obvious from code alone. A moderate ratio improves maintainability
by aiding knowledge transfer and reducing ramp-up time for new
contributors software.com. However, excessively high
ratios can reflect commented-out code or verbose documentation,
so the ratio should be interpreted in context software.com.

- **Average function length:** Smaller functions tend to perform a
single task, are easier to understand and thus easier to modify.
The metrics script measures the average number of code lines per
function. Keeping this metric low encourages modular design and
aligns with the Single Responsibility Principle.

- **Class coupling and depth of inheritance:** Although our project
uses primarily functions and data structures, we will monitor
coupling and inheritance depth where applicable. Visual Studio’s
guidance notes that high class coupling and deep inheritance trees
decrease maintainability learn.microsoft.com. We will
minimise dependencies between modules and favour composition over
inheritance to keep these metrics low.

### Testability metrics

- **Test coverage:** Atlassian describes code coverage as a measure
of how much of the code base is exercised by tests and notes
several metrics: function, statement, branch, condition and line
coverage atlassian.com. Although a high coverage
percentage does not guarantee good tests, it reveals which parts of
the system remain untested and helps to prioritise additional
testing efforts. Since we cannot run external coverage tools in
this environment, our metrics script approximates test effort by
reporting the ratio of lines in test files to total lines of code
and counting the number of test functions. Increasing the
test-to-code ratio over time should correlate with improved
coverage.

- **Number of test cases:** We treat each test_* function in
Python and calls to describe/it in JavaScript as individual
test cases. Tracking the number of test cases encourages
developers to write focused, granular tests and highlights
subsystems that may need additional verification.

- **Complexity vs. tests:** Cyclomatic complexity informs us how
many test cases are theoretically required to exercise all
execution paths learn.microsoft.com. By comparing the number
of test cases to the aggregate complexity of the code base, we can
judge whether testing is keeping pace with growing code
intricacy. If complexity rises faster than test counts, there may
be untested paths that warrant attention.

---

## Using the metrics

The `metrics_collector.py` script in `courseProjectCode/Metrics/`
implements the measurements described above. Running the script
generates a JSON report containing per-file metrics and a summary.
These metrics will form the basis of our quality dashboard and guide
refactoring and testing priorities throughout the project. By
monitoring comment ratios, function lengths, complexity and test
ratios, we can make data-driven decisions to keep the code base
maintainable and to ensure that behaviour is thoroughly validated.
Binary file added courseProjectDocs/Setup/Coverage report.pdf
Binary file not shown.
Loading
Loading