Skip to content

Commit 8a93047

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Updated churn notebook
1 parent 5accfb5 commit 8a93047

File tree

1 file changed

+150
-47
lines changed

1 file changed

+150
-47
lines changed

examples/tabular-classification/churn-classifier/churn-classifier-sklearn.ipynb

Lines changed: 150 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 12,
5+
"execution_count": 1,
66
"id": "33179b0c",
77
"metadata": {},
88
"outputs": [],
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 13,
31+
"execution_count": 2,
3232
"id": "40472b51",
3333
"metadata": {},
3434
"outputs": [],
@@ -38,7 +38,7 @@
3838
},
3939
{
4040
"cell_type": "code",
41-
"execution_count": 14,
41+
"execution_count": 3,
4242
"id": "679e0b36",
4343
"metadata": {},
4444
"outputs": [
@@ -254,7 +254,7 @@
254254
"[10000 rows x 10 columns]"
255255
]
256256
},
257-
"execution_count": 14,
257+
"execution_count": 3,
258258
"metadata": {},
259259
"output_type": "execute_result"
260260
}
@@ -275,29 +275,7 @@
275275
},
276276
{
277277
"cell_type": "code",
278-
"execution_count": 15,
279-
"id": "d0e728a9",
280-
"metadata": {},
281-
"outputs": [
282-
{
283-
"data": {
284-
"text/plain": [
285-
"{'Gender': ['Female', 'Male'], 'Geography': ['France', 'Spain', 'Germany']}"
286-
]
287-
},
288-
"execution_count": 15,
289-
"metadata": {},
290-
"output_type": "execute_result"
291-
}
292-
],
293-
"source": [
294-
"categorical_map = {feature: list(X[feature].unique()) for feature in [\"Gender\", \"Geography\"]}\n",
295-
"categorical_map"
296-
]
297-
},
298-
{
299-
"cell_type": "code",
300-
"execution_count": 16,
278+
"execution_count": 4,
301279
"id": "708ade4c",
302280
"metadata": {},
303281
"outputs": [],
@@ -316,7 +294,7 @@
316294
},
317295
{
318296
"cell_type": "code",
319-
"execution_count": 17,
297+
"execution_count": 5,
320298
"id": "e0a1b4b0",
321299
"metadata": {},
322300
"outputs": [],
@@ -336,7 +314,7 @@
336314
},
337315
{
338316
"cell_type": "code",
339-
"execution_count": 18,
317+
"execution_count": 6,
340318
"id": "248556af",
341319
"metadata": {},
342320
"outputs": [
@@ -348,6 +326,16 @@
348326
"encoding Gender\n"
349327
]
350328
},
329+
{
330+
"name": "stderr",
331+
"output_type": "stream",
332+
"text": [
333+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
334+
" warnings.warn(msg, category=FutureWarning)\n",
335+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
336+
" warnings.warn(msg, category=FutureWarning)\n"
337+
]
338+
},
351339
{
352340
"data": {
353341
"text/html": [
@@ -609,7 +597,7 @@
609597
"[10000 rows x 13 columns]"
610598
]
611599
},
612-
"execution_count": 18,
600+
"execution_count": 6,
613601
"metadata": {},
614602
"output_type": "execute_result"
615603
}
@@ -623,7 +611,7 @@
623611
},
624612
{
625613
"cell_type": "code",
626-
"execution_count": 19,
614+
"execution_count": 7,
627615
"id": "b76d541a",
628616
"metadata": {},
629617
"outputs": [
@@ -636,6 +624,20 @@
636624
"encoding Geography\n",
637625
"encoding Gender\n"
638626
]
627+
},
628+
{
629+
"name": "stderr",
630+
"output_type": "stream",
631+
"text": [
632+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
633+
" warnings.warn(msg, category=FutureWarning)\n",
634+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
635+
" warnings.warn(msg, category=FutureWarning)\n",
636+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
637+
" warnings.warn(msg, category=FutureWarning)\n",
638+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
639+
" warnings.warn(msg, category=FutureWarning)\n"
640+
]
639641
}
640642
],
641643
"source": [
@@ -654,17 +656,20 @@
654656
},
655657
{
656658
"cell_type": "code",
657-
"execution_count": 27,
659+
"execution_count": 8,
658660
"id": "ee882b61",
659661
"metadata": {},
660662
"outputs": [
661663
{
662664
"data": {
665+
"text/html": [
666+
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(random_state=1300)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression(random_state=1300)</pre></div></div></div></div></div>"
667+
],
663668
"text/plain": [
664669
"LogisticRegression(random_state=1300)"
665670
]
666671
},
667-
"execution_count": 27,
672+
"execution_count": 8,
668673
"metadata": {},
669674
"output_type": "execute_result"
670675
}
@@ -676,7 +681,7 @@
676681
},
677682
{
678683
"cell_type": "code",
679-
"execution_count": 28,
684+
"execution_count": 9,
680685
"id": "a4f603d9",
681686
"metadata": {},
682687
"outputs": [
@@ -702,7 +707,7 @@
702707
},
703708
{
704709
"cell_type": "code",
705-
"execution_count": 29,
710+
"execution_count": 10,
706711
"id": "5d0e0298",
707712
"metadata": {},
708713
"outputs": [],
@@ -721,7 +726,7 @@
721726
},
722727
{
723728
"cell_type": "code",
724-
"execution_count": null,
729+
"execution_count": 11,
725730
"id": "82a38cd9",
726731
"metadata": {},
727732
"outputs": [],
@@ -742,7 +747,7 @@
742747
},
743748
{
744749
"cell_type": "code",
745-
"execution_count": 31,
750+
"execution_count": 12,
746751
"id": "d233987d",
747752
"metadata": {},
748753
"outputs": [],
@@ -757,7 +762,7 @@
757762
},
758763
{
759764
"cell_type": "code",
760-
"execution_count": 33,
765+
"execution_count": 13,
761766
"id": "674147b8",
762767
"metadata": {},
763768
"outputs": [
@@ -769,6 +774,18 @@
769774
"encoding Gender\n"
770775
]
771776
},
777+
{
778+
"name": "stderr",
779+
"output_type": "stream",
780+
"text": [
781+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
782+
" warnings.warn(msg, category=FutureWarning)\n",
783+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n",
784+
" warnings.warn(msg, category=FutureWarning)\n",
785+
"/Users/gustavocid/miniconda3/envs/unbox-examples/lib/python3.8/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
786+
" warnings.warn(\n"
787+
]
788+
},
772789
{
773790
"data": {
774791
"text/plain": [
@@ -777,7 +794,7 @@
777794
" [0.81455616, 0.18544384]])"
778795
]
779796
},
780-
"execution_count": 33,
797+
"execution_count": 13,
781798
"metadata": {},
782799
"output_type": "execute_result"
783800
}
@@ -788,7 +805,7 @@
788805
},
789806
{
790807
"cell_type": "code",
791-
"execution_count": null,
808+
"execution_count": 14,
792809
"id": "ebb1171a",
793810
"metadata": {},
794811
"outputs": [],
@@ -800,10 +817,46 @@
800817
},
801818
{
802819
"cell_type": "code",
803-
"execution_count": null,
820+
"execution_count": 15,
804821
"id": "230f1aa6",
805822
"metadata": {},
806-
"outputs": [],
823+
"outputs": [
824+
{
825+
"data": {
826+
"text/plain": [
827+
"{'classNameCounts': None,\n",
828+
" 'classNames': ['Retained', 'Exited'],\n",
829+
" 'columnNames': None,\n",
830+
" 'columnSettings': None,\n",
831+
" 'dateCreated': '2022-07-06T21:07:36.320577Z',\n",
832+
" 'description': 'this is my churn dataset',\n",
833+
" 'featureNames': ['CreditScore',\n",
834+
" 'Geography',\n",
835+
" 'Gender',\n",
836+
" 'Age',\n",
837+
" 'Tenure',\n",
838+
" 'Balance',\n",
839+
" 'NumOfProducts',\n",
840+
" 'HasCrCard',\n",
841+
" 'IsActiveMember',\n",
842+
" 'EstimatedSalary'],\n",
843+
" 'id': '4b4e4370-7766-4b42-a467-508ffbec219b',\n",
844+
" 'inProgressNotifications': ['dataset_4b4e4370-7766-4b42-a467-508ffbec219b_process_uploaded_dataset'],\n",
845+
" 'labelColumnIndex': None,\n",
846+
" 'language': 'en',\n",
847+
" 'modelCount': 0,\n",
848+
" 'name': 'Churn Validation',\n",
849+
" 'projects': [],\n",
850+
" 'rowCount': 0,\n",
851+
" 'tagCount': 0,\n",
852+
" 'taskType': 'tabular-classification'}"
853+
]
854+
},
855+
"execution_count": 15,
856+
"metadata": {},
857+
"output_type": "execute_result"
858+
}
859+
],
807860
"source": [
808861
"from unboxapi.tasks import TaskType\n",
809862
"\n",
@@ -815,17 +868,59 @@
815868
" description='this is my churn dataset',\n",
816869
" task_type=TaskType.TabularClassification,\n",
817870
" feature_names=feature_names,\n",
818-
" categorical_features_map=categorical_map,\n",
871+
" categorical_feature_names=[\"Gender\", \"Geography\"],\n",
819872
")\n",
820873
"dataset.to_dict()"
821874
]
822875
},
823876
{
824877
"cell_type": "code",
825-
"execution_count": null,
878+
"execution_count": 16,
826879
"id": "ace580e8",
827880
"metadata": {},
828-
"outputs": [],
881+
"outputs": [
882+
{
883+
"name": "stdout",
884+
"output_type": "stream",
885+
"text": [
886+
"Bundling model and artifacts...\n",
887+
"Uploading model to Unbox...\n"
888+
]
889+
},
890+
{
891+
"data": {
892+
"text/plain": [
893+
"{'classNames': ['Retained', 'Exited'],\n",
894+
" 'datasetCount': 0,\n",
895+
" 'dateCreated': '2022-07-06T21:07:38.167799Z',\n",
896+
" 'description': 'this is my churn classification model',\n",
897+
" 'featureNames': ['CreditScore',\n",
898+
" 'Geography',\n",
899+
" 'Gender',\n",
900+
" 'Age',\n",
901+
" 'Tenure',\n",
902+
" 'Balance',\n",
903+
" 'NumOfProducts',\n",
904+
" 'HasCrCard',\n",
905+
" 'IsActiveMember',\n",
906+
" 'EstimatedSalary'],\n",
907+
" 'featureSettings': None,\n",
908+
" 'id': 'f5785966-6df1-43a5-8721-ac81a2682e06',\n",
909+
" 'inProgressNotifications': ['model_f5785966-6df1-43a5-8721-ac81a2682e06_process_uploaded_model'],\n",
910+
" 'inferenceRunCount': 0,\n",
911+
" 'modelApi': None,\n",
912+
" 'name': 'Churn Classifier',\n",
913+
" 'projects': [],\n",
914+
" 'runReportCount': 0,\n",
915+
" 'taskType': 'tabular-classification',\n",
916+
" 'type': 'sklearn'}"
917+
]
918+
},
919+
"execution_count": 16,
920+
"metadata": {},
921+
"output_type": "execute_result"
922+
}
923+
],
829924
"source": [
830925
"model = client.add_model(\n",
831926
" function=predict_proba, \n",
@@ -838,13 +933,21 @@
838933
" feature_names=feature_names,\n",
839934
" train_sample_df=x_train[:3000],\n",
840935
" train_sample_label_column_name='churn',\n",
841-
" categorical_features_map=categorical_map,\n",
936+
" categorical_feature_names=[\"Gender\", \"Geography\"],\n",
842937
" col_names=feature_names,\n",
843938
" one_hot_encoder=data_encode_one_hot,\n",
844939
" encoders=encoders,\n",
845940
")\n",
846941
"model.to_dict()"
847942
]
943+
},
944+
{
945+
"cell_type": "code",
946+
"execution_count": null,
947+
"id": "74cae32f",
948+
"metadata": {},
949+
"outputs": [],
950+
"source": []
848951
}
849952
],
850953
"metadata": {
@@ -863,7 +966,7 @@
863966
"name": "python",
864967
"nbconvert_exporter": "python",
865968
"pygments_lexer": "ipython3",
866-
"version": "3.8.10"
969+
"version": "3.8.13"
867970
}
868971
},
869972
"nbformat": 4,

0 commit comments

Comments
 (0)