Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions toad/scorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,87 @@ def after_export(self, card, to_frame = False, to_json = None, to_csv = None, **

return card

def card2pmml(self, pmml: str = 'scorecard.pmml', debug: bool = False):
"""export a scorecard to pmml

Args:
pmml (str): io to write pmml file.
debug (bool): If true, print information about the conversion process.
"""
from sklearn_pandas import DataFrameMapper
from sklearn.linear_model import LinearRegression
from sklearn2pmml import sklearn2pmml, PMMLPipeline
from sklearn2pmml.preprocessing import LookupTransformer, ExpressionTransformer

mapper = []
samples = {}
for var, rule in self.rules.items():
end_string = ''
expression_string = ''
total_bins = len(rule['bins'])
if isinstance(rule['bins'][0], (np.ndarray, list)):
default_value = 0.
mapping = {}
for bins, score in zip(rule['bins'], rule['scores'].tolist()):
for _bin in bins:
if _bin == 'nan':
default_value = float(score)

mapping[_bin] = float(score)

mapper.append((
[var],
LookupTransformer(mapping=mapping, default_value=default_value),
))
samples[var] = [list(mapping.keys())[i] for i in np.random.randint(0, len(mapping), 20)]
else:
has_empty = len(rule['bins']) > 0 and pd.isnull(rule['bins'][-1])

if has_empty:
score_empty = rule['scores'][-1]
total_bins -= 1
iter = enumerate(zip(rule['bins'][:-1], rule['scores'][:-1]), start=1)
else:
iter = enumerate(zip(rule['bins'], rule['scores']), start=1)

if has_empty:
expression_string += f'{score_empty} if pandas.isnull(X[0])'

for i, (bin_var, score) in iter:
if i == 1 and not has_empty:
expression_string += f'{score} if X[0] < {bin_var}'
elif i == total_bins:
expression_string += f' else {score}'
else:
expression_string += f' else ({score} if X[0] < {bin_var}'
end_string += ')'

expression_string += end_string

mapper.append((
[var],
ExpressionTransformer(expression_string),
))
samples[var] = np.random.random(20) * 100

scorecard_mapper = DataFrameMapper(mapper, df_out=True)

pipeline = PMMLPipeline([
('preprocessing', scorecard_mapper),
('scorecard', LinearRegression(fit_intercept=False)),
])

pipeline.named_steps['scorecard'].fit(
pd.DataFrame(
np.random.randint(0, 100, (100, len(scorecard_mapper.features))),
columns=[m[0][0] for m in scorecard_mapper.features]
),
pd.Series(np.random.randint(0, 2, 100), name='score')
)

pipeline.named_steps['scorecard'].coef_ = np.ones(len(scorecard_mapper.features))

sklearn2pmml(pipeline, pmml, with_repr=True, debug=debug)

def _generate_testing_frame(self, maps, size = 'max', mishap = True, gap = 1e-2):
"""
Expand Down
33 changes: 32 additions & 1 deletion toad/scorecard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_woe_to_score():
score = np.sum(score, axis=1)
assert score[404] == TEST_SCORE


def test_bin_to_score():
score = card.bin_to_score(bins)
assert score[404] == TEST_SCORE
Expand Down Expand Up @@ -263,3 +263,34 @@ def test_predict_dict():
proba = card.predict(df.iloc[404].to_dict())
assert proba == TEST_SCORE


def test_export_pmml_from_scorecard():
card = ScoreCard(
combiner = combiner,
transer = woe_transer,
)
card.load(card_config)
with pytest.raises(Exception) as e:
# will raise an exception when export a card to pmml
card.card2pmml()

assert e.type != RuntimeError


def test_export_pmml_from_scorecard_load():
card = ScoreCard().load(card_config)
with pytest.raises(Exception) as e:
# will raise an exception when export a card to pmml
card.card2pmml()

assert e.type != RuntimeError


def test_predict_from_pmml():
with pytest.raises(Exception) as e:
# will raise an exception when load a pmml to card
from pypmml import Model
model = Model.fromFile("scorecard.pmml")

assert e.type != ModuleNotFoundError
assert model.predict(df).values[200, 0] == 600