Skip to content

Commit 3c08a24

Browse files
parse dataframe
1 parent 6024221 commit 3c08a24

File tree

11 files changed

+166
-3
lines changed

11 files changed

+166
-3
lines changed

Pipfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ verify_ssl = true
66
[packages]
77
loguru = "==0.6.*"
88
pandas = "==1.5.*"
9+
scikit-learn = "*"
910

1011
[dev-packages]
1112
black = "==23.*"

Pipfile.lock

Lines changed: 73 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/src/lazylearn/errors/__init__.py

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class DataSourceError(Exception):
2+
"""Raised if passing an incompatible argument as data source"""
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from pipeline.pipeline import IngestionPipeline, PipelineStep
2+
3+
4+
class Ingestion:
5+
def __init__(self):
6+
pass
7+
8+
def run(self, data):
9+
pipeline = IngestionPipeline()
10+
pipeline.raw_data = data
11+
12+
pipeline.add(DataSourceParser(data))
13+
14+
pipeline.add(ColumnInterpreter())
15+
16+
pipeline.run()
17+
18+
return pipeline.response()

python/src/lazylearn/ingestion/ingestion_pipeline_steps/__init__.py

Whitespace-only changes.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from errors.errors import DataSourceError
2+
from pandas import DataFrame
3+
from pipeline.pipeline import IngestionPipeline, PipelineStep
4+
5+
6+
class DataSourceParser(PipelineStep):
7+
def apply(self, pipeline: IngestionPipeline):
8+
"""
9+
This method is responsible for parsing the raw data
10+
source from its parent pipeline into a DataFrame
11+
object.
12+
13+
:param pipeline: parent IngestionPipeline
14+
:return:
15+
"""
16+
assert pipeline.raw_data is not None
17+
18+
if isinstance(pipeline.raw_data, DataFrame):
19+
pipeline.df = pipeline.raw_data
20+
else:
21+
raise DataSourceError

python/src/lazylearn/ingestion/ingestion_pipeline_steps/interpreter_step.py

Whitespace-only changes.

python/src/lazylearn/lazylearn.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
class LazyLearner:
2+
def __init__(self):
3+
self.project = None
4+
5+
def create_project(self, data, target, task="infer"):
6+
# ingest data
7+
pass
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
3+
from models.models import Dataset
4+
from pandas import DataFrame
5+
6+
7+
class Pipeline:
8+
def __init__(self):
9+
self._has_run: bool = False
10+
self._steps: List[PipelineStep] = []
11+
12+
def add(self, pipeline_step):
13+
self._steps.append(pipeline_step)
14+
15+
def run(self):
16+
[step.apply(self) for step in self._steps]
17+
self._has_run = True
18+
19+
20+
class PipelineStep:
21+
def apply(self, pipeline: Pipeline):
22+
pass
23+
24+
25+
class IngestionPipeline(Pipeline):
26+
def __init__(self):
27+
super().__init__()
28+
self.raw_data = None
29+
self.df: DataFrame = None
30+
31+
def response(self):
32+
return Dataset

0 commit comments

Comments
 (0)