|
21 | 21 | }, |
22 | 22 | { |
23 | 23 | "cell_type": "code", |
24 | | - "execution_count": 22, |
| 24 | + "execution_count": 1, |
25 | 25 | "id": "56758c0a", |
26 | 26 | "metadata": {}, |
27 | 27 | "outputs": [], |
|
55 | 55 | }, |
56 | 56 | { |
57 | 57 | "cell_type": "code", |
58 | | - "execution_count": 1, |
| 58 | + "execution_count": 3, |
59 | 59 | "id": "4f69dcb3", |
60 | 60 | "metadata": {}, |
61 | 61 | "outputs": [], |
|
80 | 80 | }, |
81 | 81 | { |
82 | 82 | "cell_type": "code", |
83 | | - "execution_count": 2, |
| 83 | + "execution_count": 4, |
84 | 84 | "id": "2ed8bf11", |
85 | 85 | "metadata": {}, |
86 | 86 | "outputs": [], |
|
91 | 91 | }, |
92 | 92 | { |
93 | 93 | "cell_type": "code", |
94 | | - "execution_count": 3, |
| 94 | + "execution_count": 5, |
95 | 95 | "id": "ac811397", |
96 | 96 | "metadata": {}, |
97 | 97 | "outputs": [ |
|
245 | 245 | "4 0 " |
246 | 246 | ] |
247 | 247 | }, |
248 | | - "execution_count": 3, |
| 248 | + "execution_count": 5, |
249 | 249 | "metadata": {}, |
250 | 250 | "output_type": "execute_result" |
251 | 251 | } |
|
268 | 268 | }, |
269 | 269 | { |
270 | 270 | "cell_type": "code", |
271 | | - "execution_count": 4, |
| 271 | + "execution_count": 6, |
272 | 272 | "id": "0ccaafae", |
273 | 273 | "metadata": {}, |
274 | 274 | "outputs": [], |
|
292 | 292 | }, |
293 | 293 | { |
294 | 294 | "cell_type": "code", |
295 | | - "execution_count": 5, |
| 295 | + "execution_count": 7, |
296 | 296 | "id": "29e71531", |
297 | 297 | "metadata": {}, |
298 | 298 | "outputs": [], |
|
311 | 311 | }, |
312 | 312 | { |
313 | 313 | "cell_type": "code", |
314 | | - "execution_count": 6, |
| 314 | + "execution_count": 8, |
315 | 315 | "id": "3680efe3", |
316 | 316 | "metadata": {}, |
317 | 317 | "outputs": [], |
|
330 | 330 | }, |
331 | 331 | { |
332 | 332 | "cell_type": "code", |
333 | | - "execution_count": 7, |
| 333 | + "execution_count": 9, |
334 | 334 | "id": "0fcfef49", |
335 | 335 | "metadata": {}, |
336 | 336 | "outputs": [], |
|
341 | 341 | }, |
342 | 342 | { |
343 | 343 | "cell_type": "code", |
344 | | - "execution_count": 8, |
| 344 | + "execution_count": 10, |
345 | 345 | "id": "53491eab", |
346 | 346 | "metadata": {}, |
347 | 347 | "outputs": [ |
|
374 | 374 | }, |
375 | 375 | { |
376 | 376 | "cell_type": "code", |
377 | | - "execution_count": 9, |
| 377 | + "execution_count": 11, |
378 | 378 | "id": "a981bc4b", |
379 | 379 | "metadata": {}, |
380 | 380 | "outputs": [ |
|
384 | 384 | "GradientBoostingClassifier(random_state=42)" |
385 | 385 | ] |
386 | 386 | }, |
387 | | - "execution_count": 9, |
| 387 | + "execution_count": 11, |
388 | 388 | "metadata": {}, |
389 | 389 | "output_type": "execute_result" |
390 | 390 | } |
|
396 | 396 | }, |
397 | 397 | { |
398 | 398 | "cell_type": "code", |
399 | | - "execution_count": 11, |
| 399 | + "execution_count": 12, |
400 | 400 | "id": "ba829dcd", |
401 | 401 | "metadata": {}, |
402 | 402 | "outputs": [ |
|
425 | 425 | "id": "eb702d1f", |
426 | 426 | "metadata": {}, |
427 | 427 | "source": [ |
428 | | - "## 5. Unbox part -- have fun creating the next few cells!\n", |
| 428 | + "## 5. Unbox part!\n", |
429 | 429 | "\n", |
430 | 430 | "Now it's up to you! We will just compute a few important variables and concatenate the x and y, because Unbox expects a single dataframe with features and labels for the upload. \n", |
431 | 431 | "\n", |
432 | | - "Head back to the tutorial to see how you need to fill out the next few cells." |
| 432 | + "Head back to the tutorial for an explanation of next few cells." |
433 | 433 | ] |
434 | 434 | }, |
435 | 435 | { |
436 | 436 | "cell_type": "code", |
437 | | - "execution_count": 12, |
| 437 | + "execution_count": 13, |
438 | 438 | "id": "b1682ee4", |
439 | 439 | "metadata": {}, |
440 | 440 | "outputs": [], |
|
446 | 446 | }, |
447 | 447 | { |
448 | 448 | "cell_type": "code", |
449 | | - "execution_count": 13, |
| 449 | + "execution_count": 14, |
450 | 450 | "id": "d480f0c3", |
451 | 451 | "metadata": {}, |
452 | 452 | "outputs": [], |
|
461 | 461 | "id": "65964db9", |
462 | 462 | "metadata": {}, |
463 | 463 | "outputs": [], |
| 464 | + "source": [ |
| 465 | + "# instantiating the client\n", |
| 466 | + "import unboxapi\n", |
| 467 | + "\n", |
| 468 | + "client = unboxapi.UnboxClient('YOUR_API_KEY_HERE')" |
| 469 | + ] |
| 470 | + }, |
| 471 | + { |
| 472 | + "cell_type": "code", |
| 473 | + "execution_count": null, |
| 474 | + "id": "2dee6250", |
| 475 | + "metadata": {}, |
| 476 | + "outputs": [], |
| 477 | + "source": [ |
| 478 | + "# creating the project\n", |
| 479 | + "from unboxapi.tasks import TaskType\n", |
| 480 | + "\n", |
| 481 | + "project = client.create_project(name=\"Churn prediction\",\n", |
| 482 | + " task_type=TaskType.TabularClassification,\n", |
| 483 | + " description=\"Evaluation of ML approaches to predict churn\")" |
| 484 | + ] |
| 485 | + }, |
| 486 | + { |
| 487 | + "cell_type": "code", |
| 488 | + "execution_count": null, |
| 489 | + "id": "d0c680e8", |
| 490 | + "metadata": {}, |
| 491 | + "outputs": [], |
| 492 | + "source": [ |
| 493 | + "# uploading the dataset to the project\n", |
| 494 | + "dataset = project.add_dataframe(\n", |
| 495 | + " df=validation_set, \n", |
| 496 | + " commit_message='churn validation set for October',\n", |
| 497 | + " class_names=class_names, \n", |
| 498 | + " label_column_name='Exited', \n", |
| 499 | + " feature_names=feature_names, \n", |
| 500 | + " categorical_feature_names=categorical_feature_names, \n", |
| 501 | + ")" |
| 502 | + ] |
| 503 | + }, |
| 504 | + { |
| 505 | + "cell_type": "code", |
| 506 | + "execution_count": null, |
| 507 | + "id": "5b55095c", |
| 508 | + "metadata": {}, |
| 509 | + "outputs": [], |
| 510 | + "source": [ |
| 511 | + "# defining the model's predict probability function\n", |
| 512 | + "def predict_proba(model, input_features: np.ndarray, col_names: list, one_hot_encoder, encoders):\n", |
| 513 | + " # Pre-processing the categorical features\n", |
| 514 | + " df = pd.DataFrame(input_features, columns=col_names)\n", |
| 515 | + " encoded_df = one_hot_encoder(df, encoders)\n", |
| 516 | + " \n", |
| 517 | + " # Getting the model's predictions\n", |
| 518 | + " preds = model.predict_proba(encoded_df.to_numpy())\n", |
| 519 | + " \n", |
| 520 | + " return preds" |
| 521 | + ] |
| 522 | + }, |
| 523 | + { |
| 524 | + "cell_type": "code", |
| 525 | + "execution_count": null, |
| 526 | + "id": "d22d5cef", |
| 527 | + "metadata": {}, |
| 528 | + "outputs": [], |
| 529 | + "source": [ |
| 530 | + "# uploading the model to the project\n", |
| 531 | + "from unboxapi.models import ModelType\n", |
| 532 | + "\n", |
| 533 | + "model = project.add_model(\n", |
| 534 | + " function=predict_proba, \n", |
| 535 | + " model=sklearn_model,\n", |
| 536 | + " model_type=ModelType.sklearn,\n", |
| 537 | + " class_names=class_names,\n", |
| 538 | + " name='Churn Classifier',\n", |
| 539 | + " commit_message='this is my churn classification model',\n", |
| 540 | + " feature_names=feature_names,\n", |
| 541 | + " train_sample_df=training_set[:3000],\n", |
| 542 | + " train_sample_label_column_name='Exited',\n", |
| 543 | + " categorical_feature_names=categorical_feature_names,\n", |
| 544 | + " requirements_txt_file='requirements.txt',\n", |
| 545 | + " col_names=feature_names,\n", |
| 546 | + " one_hot_encoder=data_encode_one_hot,\n", |
| 547 | + " encoders=encoders,\n", |
| 548 | + ")" |
| 549 | + ] |
| 550 | + }, |
| 551 | + { |
| 552 | + "cell_type": "code", |
| 553 | + "execution_count": null, |
| 554 | + "id": "c9a29256", |
| 555 | + "metadata": {}, |
| 556 | + "outputs": [], |
464 | 557 | "source": [] |
465 | 558 | } |
466 | 559 | ], |
|
0 commit comments