From 011e3dd1b14da7d079c41bdd05b78ba4de43b797 Mon Sep 17 00:00:00 2001 From: itlubber <1830611168@qq.com> Date: Thu, 4 May 2023 22:26:54 +0800 Subject: [PATCH 1/5] add card2pmml method --- toad/scorecard.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/toad/scorecard.py b/toad/scorecard.py index 8573347..d2c90b5 100644 --- a/toad/scorecard.py +++ b/toad/scorecard.py @@ -376,7 +376,89 @@ def after_export(self, card, to_frame = False, to_json = None, to_csv = None, ** return card + def card2pmml(self, pmml: str = 'scorecard.pmml', debug: bool = False): + """export a scorecard to pmml + Args: + pmml (str): io to write pmml file. + debug (bool): If true, print information about the conversion process. + """ + from sklearn_pandas import DataFrameMapper + from sklearn.linear_model import LinearRegression + from sklearn2pmml import sklearn2pmml, PMMLPipeline + from sklearn2pmml.preprocessing import LookupTransformer, ExpressionTransformer + + mapper = [] + samples = {} + for var, rule in self.rules.items(): + end_string = '' + expression_string = '' + total_bins = len(rule['bins']) + if isinstance(rule['bins'][0], (np.ndarray, list)): + default_value = 0. + mapping = {} + for bins, score in zip(rule['bins'], rule['scores'].tolist()): + for _bin in bins: + if _bin == 'nan': + default_value = score + + mapping[_bin] = score + + mapper.append(( + [var], + LookupTransformer(mapping=mapping, default_value=default_value), + )) + samples[var] = [list(mapping.keys())[i] for i in np.random.randint(0, len(mapping), 20)] + else: + has_empty = len(rule['bins']) > 0 and pd.isnull(rule['bins'][-1]) + + if has_empty: + score_empty = rule['scores'][-1] + total_bins -= 1 + iter = enumerate(zip(rule['bins'][:-1], rule['scores'][:-1]), start=1) + else: + score_empty = 0 + iter = enumerate(zip(rule['bins'], rule['scores']), start=1) + + for i, (bin_var, score) in iter: + if i == 1: + expression_string += f'{score} if X[0] < {bin_var}' + elif i == total_bins: + expression_string += f' else {score}' + else: + expression_string += f' else ({score} if X[0] < {bin_var}' + end_string += ')' + + if has_empty: + expression_string += f' else ({score_empty} if pandas.isnull(X[0])' + end_string += ')' + + expression_string += end_string + + mapper.append(( + [var], + ExpressionTransformer(expression_string), + )) + samples[var] = np.random.random(20) * 100 + + scorecard_mapper = DataFrameMapper(mapper, df_out=True) + + pipeline = PMMLPipeline([ + ('preprocessing', scorecard_mapper), + ('scorecard', LinearRegression(fit_intercept=False)), + ]) + + pipeline.named_steps['scorecard'].fit( + pd.DataFrame( + np.random.randint(0, 100, (100, len(scorecard_mapper.features))), + columns=[m[0][0] for m in scorecard_mapper.features] + ), + pd.Series(np.random.randint(0, 2, 100), name='score') + ) + + pipeline.named_steps['scorecard'].coef_ = np.ones(len(scorecard_mapper.features)) + + sklearn2pmml(pipeline, pmml, with_repr=True, debug=debug) def _generate_testing_frame(self, maps, size = 'max', mishap = True, gap = 1e-2): """ From 05c62eefe9c6c36903dfcdf747086fde1d44186a Mon Sep 17 00:00:00 2001 From: itlubber <39397838+itlubber@users.noreply.github.com> Date: Sun, 7 May 2023 17:27:37 +0800 Subject: [PATCH 2/5] fix card2pmml nan bins --- toad/scorecard.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/toad/scorecard.py b/toad/scorecard.py index d2c90b5..4571614 100644 --- a/toad/scorecard.py +++ b/toad/scorecard.py @@ -400,9 +400,9 @@ def card2pmml(self, pmml: str = 'scorecard.pmml', debug: bool = False): for bins, score in zip(rule['bins'], rule['scores'].tolist()): for _bin in bins: if _bin == 'nan': - default_value = score + default_value = float(score) - mapping[_bin] = score + mapping[_bin] = float(score) mapper.append(( [var], @@ -417,22 +417,20 @@ def card2pmml(self, pmml: str = 'scorecard.pmml', debug: bool = False): total_bins -= 1 iter = enumerate(zip(rule['bins'][:-1], rule['scores'][:-1]), start=1) else: - score_empty = 0 iter = enumerate(zip(rule['bins'], rule['scores']), start=1) - + + if has_empty: + expression_string += f'{score_empty} if pandas.isnull(X[0])' + for i, (bin_var, score) in iter: - if i == 1: + if i == 1 and not has_empty: expression_string += f'{score} if X[0] < {bin_var}' elif i == total_bins: expression_string += f' else {score}' else: expression_string += f' else ({score} if X[0] < {bin_var}' end_string += ')' - - if has_empty: - expression_string += f' else ({score_empty} if pandas.isnull(X[0])' - end_string += ')' - + expression_string += end_string mapper.append(( @@ -447,7 +445,7 @@ def card2pmml(self, pmml: str = 'scorecard.pmml', debug: bool = False): ('preprocessing', scorecard_mapper), ('scorecard', LinearRegression(fit_intercept=False)), ]) - + pipeline.named_steps['scorecard'].fit( pd.DataFrame( np.random.randint(0, 100, (100, len(scorecard_mapper.features))), From 45276702b5b820d0b5d4b69a4301a182ca1e9bb0 Mon Sep 17 00:00:00 2001 From: itlubber <39397838+itlubber@users.noreply.github.com> Date: Sun, 7 May 2023 17:28:55 +0800 Subject: [PATCH 3/5] add card2pmml unit test case --- toad/scorecard_test.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/toad/scorecard_test.py b/toad/scorecard_test.py index 18e852d..688d1b8 100644 --- a/toad/scorecard_test.py +++ b/toad/scorecard_test.py @@ -122,7 +122,26 @@ def test_woe_to_score(): score = np.sum(score, axis=1) assert score[404] == TEST_SCORE +def test_export_pmml_from_scorecard(): + card = ScoreCard( + combiner = combiner, + transer = woe_transer, + ) + card.load(card_config) + with pytest.raises(Exception) as e: + # will raise an exception when export a card to pmml + card.card2pmml() + + assert e.type != RuntimeError + +def test_export_pmml_from_scorecard_load(): + card = ScoreCard().load(card_config) + with pytest.raises(Exception) as e: + # will raise an exception when export a card to pmml + card.card2pmml() + + assert e.type != RuntimeError def test_bin_to_score(): score = card.bin_to_score(bins) assert score[404] == TEST_SCORE @@ -262,4 +281,25 @@ def test_predict_dict(): """ a test for scalar inference time cost """ proba = card.predict(df.iloc[404].to_dict()) assert proba == TEST_SCORE + + +def test_export_pmml_from_scorecard(): + card = ScoreCard( + combiner = combiner, + transer = woe_transer, + ) + card.load(card_config) + with pytest.raises(Exception) as e: + # will raise an exception when export a card to pmml + card.card2pmml() + + assert e.type != RuntimeError + + +def test_export_pmml_from_scorecard_load(): + card = ScoreCard().load(card_config) + with pytest.raises(Exception) as e: + # will raise an exception when export a card to pmml + card.card2pmml() + assert e.type != RuntimeError From 76494a6a5291528070e02d0187b39af6d26588c5 Mon Sep 17 00:00:00 2001 From: itlubber <39397838+itlubber@users.noreply.github.com> Date: Sun, 7 May 2023 17:42:46 +0800 Subject: [PATCH 4/5] update unit test case --- toad/scorecard_test.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/toad/scorecard_test.py b/toad/scorecard_test.py index 688d1b8..5a72528 100644 --- a/toad/scorecard_test.py +++ b/toad/scorecard_test.py @@ -122,26 +122,7 @@ def test_woe_to_score(): score = np.sum(score, axis=1) assert score[404] == TEST_SCORE -def test_export_pmml_from_scorecard(): - card = ScoreCard( - combiner = combiner, - transer = woe_transer, - ) - card.load(card_config) - with pytest.raises(Exception) as e: - # will raise an exception when export a card to pmml - card.card2pmml() - assert e.type != RuntimeError - - -def test_export_pmml_from_scorecard_load(): - card = ScoreCard().load(card_config) - with pytest.raises(Exception) as e: - # will raise an exception when export a card to pmml - card.card2pmml() - - assert e.type != RuntimeError def test_bin_to_score(): score = card.bin_to_score(bins) assert score[404] == TEST_SCORE From 576eafe2a6688b5445924693dbb4387d1bf99b58 Mon Sep 17 00:00:00 2001 From: itlubber <39397838+itlubber@users.noreply.github.com> Date: Sun, 7 May 2023 18:06:24 +0800 Subject: [PATCH 5/5] update unit testing --- toad/scorecard_test.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/toad/scorecard_test.py b/toad/scorecard_test.py index 5a72528..c360c11 100644 --- a/toad/scorecard_test.py +++ b/toad/scorecard_test.py @@ -262,8 +262,8 @@ def test_predict_dict(): """ a test for scalar inference time cost """ proba = card.predict(df.iloc[404].to_dict()) assert proba == TEST_SCORE - - + + def test_export_pmml_from_scorecard(): card = ScoreCard( combiner = combiner, @@ -284,3 +284,13 @@ def test_export_pmml_from_scorecard_load(): card.card2pmml() assert e.type != RuntimeError + + +def test_predict_from_pmml(): + with pytest.raises(Exception) as e: + # will raise an exception when load a pmml to card + from pypmml import Model + model = Model.fromFile("scorecard.pmml") + + assert e.type != ModuleNotFoundError + assert model.predict(df).values[200, 0] == 600