Skip to content

Commit fa91ee1

Browse files
author
Guillaume Lemaitre
committed
Finish the combine method
1 parent 6a3c5de commit fa91ee1

File tree

10 files changed

+34
-148
lines changed

10 files changed

+34
-148
lines changed

imblearn/combine/smote_enn.py

Lines changed: 10 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@ class SMOTEENN(SamplerMixin):
2222
number of samples in the minority class over the the number of
2323
samples in the majority class.
2424
25-
random_state : int or None, optional (default=None)
26-
Seed for random number generation.
25+
random_state : int, RandomState instance or None, optional (default=None)
26+
If int, random_state is the seed used by the random number generator;
27+
If RandomState instance, random_state is the random number generator;
28+
If None, the random number generator is the RandomState instance used
29+
by np.random.
2730
2831
verbose : bool, optional (default=True)
2932
Whether or not to print information about the processing.
@@ -60,15 +63,6 @@ class SMOTEENN(SamplerMixin):
6063
6164
Attributes
6265
----------
63-
ratio : str or float
64-
If 'auto', the ratio will be defined automatically to balance
65-
the dataset. Otherwise, the ratio is defined as the
66-
number of samples in the minority class over the the number of
67-
samples in the majority class.
68-
69-
random_state : int or None
70-
Seed for random number generation.
71-
7266
min_c_ : str or int
7367
The identifier of the minority class.
7468
@@ -100,75 +94,21 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
10094
k=5, m=10, out_step=0.5, kind_smote='regular',
10195
size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):
10296

103-
"""Initialise the SMOTE ENN object.
104-
105-
Parameters
106-
----------
107-
ratio : str or float, optional (default='auto')
108-
If 'auto', the ratio will be defined automatically to balance
109-
the dataset. Otherwise, the ratio is defined as the
110-
number of samples in the minority class over the the number of
111-
samples in the majority class.
112-
113-
random_state : int or None, optional (default=None)
114-
Seed for random number generation.
115-
116-
verbose : bool, optional (default=True)
117-
Whether or not to print information about the processing.
118-
119-
k : int, optional (default=5)
120-
Number of nearest neighbours to used to construct synthetic
121-
samples.
122-
123-
m : int, optional (default=10)
124-
Number of nearest neighbours to use to determine if a minority
125-
sample is in danger.
126-
127-
out_step : float, optional (default=0.5)
128-
Step size when extrapolating.
129-
130-
kind_smote : str, optional (default='regular')
131-
The type of SMOTE algorithm to use one of the following
132-
options: 'regular', 'borderline1', 'borderline2', 'svm'.
133-
134-
size_ngh : int, optional (default=3)
135-
Size of the neighbourhood to consider to compute the average
136-
distance to the minority point samples.
137-
138-
kind_sel : str, optional (default='all')
139-
Strategy to use in order to exclude samples.
140-
141-
- If 'all', all neighbours will have to agree with the samples of
142-
interest to not be excluded.
143-
- If 'mode', the majority vote of the neighbours will be used in
144-
order to exclude a sample.
145-
146-
n_jobs : int, optional (default=-1)
147-
The number of threads to open if possible.
148-
149-
Returns
150-
-------
151-
None
152-
153-
"""
154-
super(SMOTEENN, self).__init__(ratio=ratio, random_state=random_state,
97+
super(SMOTEENN, self).__init__(ratio=ratio,
15598
verbose=verbose)
156-
99+
self.random_state = random_state
157100
self.k = k
158101
self.m = m
159102
self.out_step = out_step
160103
self.kind_smote = kind_smote
104+
self.size_ngh = size_ngh
105+
self.kind_enn = kind_enn
161106
self.n_jobs = n_jobs
162107
self.kwargs = kwargs
163-
164108
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
165109
verbose=self.verbose, k=self.k, m=self.m,
166110
out_step=self.out_step, kind=self.kind_smote,
167111
n_jobs=self.n_jobs, **self.kwargs)
168-
169-
self.size_ngh = size_ngh
170-
self.kind_enn = kind_enn
171-
172112
self.enn = EditedNearestNeighbours(random_state=self.random_state,
173113
verbose=self.verbose,
174114
size_ngh=self.size_ngh,
@@ -192,8 +132,6 @@ def fit(self, X, y):
192132
Return self.
193133
194134
"""
195-
# Check the consistency of X and y
196-
X, y = check_X_y(X, y)
197135

198136
super(SMOTEENN, self).fit(X, y)
199137

@@ -202,7 +140,7 @@ def fit(self, X, y):
202140

203141
return self
204142

205-
def sample(self, X, y):
143+
def _sample(self, X, y):
206144
"""Resample the dataset.
207145
208146
Parameters
@@ -222,10 +160,6 @@ def sample(self, X, y):
222160
The corresponding label of `X_resampled`
223161
224162
"""
225-
# Check the consistency of X and y
226-
X, y = check_X_y(X, y)
227-
228-
super(SMOTEENN, self).sample(X, y)
229163

230164
# Transform using SMOTE
231165
X, y = self.sm.sample(X, y)

imblearn/combine/smote_tomek.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ class SMOTETomek(SamplerMixin):
2323
number of samples in the minority class over the the number of
2424
samples in the majority class.
2525
26-
random_state : int or None, optional (default=None)
27-
Seed for random number generation.
26+
random_state : int, RandomState instance or None, optional (default=None)
27+
If int, random_state is the seed used by the random number generator;
28+
If RandomState instance, random_state is the random number generator;
29+
If None, the random number generator is the RandomState instance used
30+
by np.random.
2831
2932
verbose : bool, optional (default=True)
3033
Whether or not to print information about the processing.
@@ -61,15 +64,6 @@ class SMOTETomek(SamplerMixin):
6164
6265
Attributes
6366
----------
64-
ratio : str or float
65-
If 'auto', the ratio will be defined automatically to balance
66-
the dataset. Otherwise, the ratio is defined as the
67-
number of samples in the minority class over the the number of
68-
samples in the majority class.
69-
70-
random_state : int or None
71-
Seed for random number generation.
72-
7367
min_c_ : str or int
7468
The identifier of the minority class.
7569
@@ -99,62 +93,18 @@ class SMOTETomek(SamplerMixin):
9993
def __init__(self, ratio='auto', random_state=None, verbose=True,
10094
k=5, m=10, out_step=0.5, kind_smote='regular',
10195
n_jobs=-1, **kwargs):
102-
103-
"""Initialise the SMOTE Tomek links object.
104-
105-
Parameters
106-
----------
107-
ratio : str or float, optional (default='auto')
108-
If 'auto', the ratio will be defined automatically to balance
109-
the dataset. Otherwise, the ratio is defined as the
110-
number of samples in the minority class over the the number of
111-
samples in the majority class.
112-
113-
random_state : int or None, optional (default=None)
114-
Seed for random number generation.
115-
116-
verbose : bool, optional (default=True)
117-
Whether or not to print information about the processing.
118-
119-
k : int, optional (default=5)
120-
Number of nearest neighbours to used to construct synthetic
121-
samples.
122-
123-
m : int, optional (default=10)
124-
Number of nearest neighbours to use to determine if a minority
125-
sample is in danger.
126-
127-
out_step : float, optional (default=0.5)
128-
Step size when extrapolating.
129-
130-
kind_smote : str, optional (default='regular')
131-
The type of SMOTE algorithm to use one of the following
132-
options: 'regular', 'borderline1', 'borderline2', 'svm'.
133-
134-
n_jobs : int, optional (default=-1)
135-
Number of threads to run the algorithm when it is possible.
136-
137-
Returns
138-
-------
139-
None
140-
141-
"""
142-
super(SMOTETomek, self).__init__(ratio=ratio,
143-
random_state=random_state,
144-
verbose=verbose)
145-
96+
super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
97+
self.random_state = random_state
14698
self.k = k
14799
self.m = m
148100
self.out_step = out_step
149101
self.kind_smote = kind_smote
150102
self.n_jobs = n_jobs
151103
self.kwargs = kwargs
152-
153104
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
154105
verbose=self.verbose, k=self.k, m=self.m,
155106
out_step=self.out_step, kind=self.kind_smote,
156107
n_jobs=self.n_jobs, **self.kwargs)
157-
158108
self.tomek = TomekLinks(random_state=self.random_state,
159109
verbose=self.verbose)
160110

@@ -175,8 +125,6 @@ def fit(self, X, y):
175125
Return self.
176126
177127
"""
178-
# Check the consistency of X and y
179-
X, y = check_X_y(X, y)
180128

181129
super(SMOTETomek, self).fit(X, y)
182130

@@ -185,7 +133,7 @@ def fit(self, X, y):
185133

186134
return self
187135

188-
def sample(self, X, y):
136+
def _sample(self, X, y):
189137
"""Resample the dataset.
190138
191139
Parameters
@@ -205,10 +153,6 @@ def sample(self, X, y):
205153
The corresponding label of `X_resampled`
206154
207155
"""
208-
# Check the consistency of X and y
209-
X, y = check_X_y(X, y)
210-
211-
super(SMOTETomek, self).sample(X, y)
212156

213157
# Transform using SMOTE
214158
X, y = self.sm.sample(X, y)
960 Bytes
Binary file not shown.
480 Bytes
Binary file not shown.
48 Bytes
Binary file not shown.
24 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

imblearn/combine/tests/test_smote_enn.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,23 @@ def test_senn_bad_ratio():
3333

3434
# Define a negative ratio
3535
ratio = -1.0
36-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
36+
smote = SMOTEENN(ratio=ratio)
37+
assert_raises(ValueError, smote.fit, X, Y)
3738

3839
# Define a ratio greater than 1
3940
ratio = 100.0
40-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
41+
smote = SMOTEENN(ratio=ratio)
42+
assert_raises(ValueError, smote.fit, X, Y)
4143

4244
# Define ratio as an unknown string
4345
ratio = 'rnd'
44-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
46+
smote = SMOTEENN(ratio=ratio)
47+
assert_raises(ValueError, smote.fit, X, Y)
4548

4649
# Define ratio as a list which is not supported
4750
ratio = [.5, .5]
48-
assert_raises(ValueError, SMOTEENN, ratio=ratio)
51+
smote = SMOTEENN(ratio=ratio)
52+
assert_raises(ValueError, smote.fit, X, Y)
4953

5054

5155
def test_smote_fit_single_class():

imblearn/combine/tests/test_smote_tomek.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,23 @@ def test_smote_bad_ratio():
3333

3434
# Define a negative ratio
3535
ratio = -1.0
36-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
36+
smote = SMOTETomek(ratio=ratio)
37+
assert_raises(ValueError, smote.fit, X, Y)
3738

3839
# Define a ratio greater than 1
3940
ratio = 100.0
40-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
41+
smote = SMOTETomek(ratio=ratio)
42+
assert_raises(ValueError, smote.fit, X, Y)
4143

4244
# Define ratio as an unknown string
4345
ratio = 'rnd'
44-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
46+
smote = SMOTETomek(ratio=ratio)
47+
assert_raises(ValueError, smote.fit, X, Y)
4548

4649
# Define ratio as a list which is not supported
4750
ratio = [.5, .5]
48-
assert_raises(ValueError, SMOTETomek, ratio=ratio)
51+
smote = SMOTETomek(ratio=ratio)
52+
assert_raises(ValueError, smote.fit, X, Y)
4953

5054

5155
def test_smote_fit_single_class():

0 commit comments

Comments
 (0)