22import os
33import random
44import shutil
5- import signal
6- import subprocess
75from typing import Dict , List , Optional , Type , Union
8- import sys
9- from uuid import uuid4
6+ import subprocess
107
118import fsspec
12- import mlflow
139import psutil
1410from jupyter_core .paths import jupyter_data_dir
1511from jupyter_server .transutils import _i18n
5046 create_output_filename ,
5147)
5248
53- MLFLOW_SERVER_HOST = "127.0.0.1"
54- MLFLOW_SERVER_PORT = "5000"
55- MLFLOW_SERVER_URI = f"http://{ MLFLOW_SERVER_HOST } :{ MLFLOW_SERVER_PORT } "
56-
5749
5850class BaseScheduler (LoggingConfigurable ):
5951 """Base class for schedulers. A default implementation
@@ -409,31 +401,20 @@ class Scheduler(BaseScheduler):
409401 task_runner = Instance (allow_none = True , klass = "jupyter_scheduler.task_runner.BaseTaskRunner" )
410402
411403 def start_mlflow_server (self ):
412- mlflow_process = subprocess .Popen (
404+ subprocess .Popen (
413405 [
414406 "mlflow" ,
415407 "server" ,
408+ "--backend-store-uri" ,
409+ "./mlruns" ,
410+ "--default-artifact-root" ,
411+ "./mlartifacts" ,
416412 "--host" ,
417- MLFLOW_SERVER_HOST ,
413+ "0.0.0.0" ,
418414 "--port" ,
419- MLFLOW_SERVER_PORT ,
420- ],
421- preexec_fn = os .setsid ,
415+ "5000" ,
416+ ]
422417 )
423- mlflow .set_tracking_uri (MLFLOW_SERVER_URI )
424- return mlflow_process
425-
426- def stop_mlflow_server (self ):
427- if self .mlflow_process is not None :
428- os .killpg (os .getpgid (self .mlflow_process .pid ), signal .SIGTERM )
429- self .mlflow_process .wait ()
430- self .mlflow_process = None
431- print ("MLFlow server stopped" )
432-
433- def mlflow_signal_handler (self , signum , frame ):
434- print ("Shutting down MLFlow server" )
435- self .stop_mlflow_server ()
436- sys .exit (0 )
437418
438419 def __init__ (
439420 self ,
@@ -450,9 +431,7 @@ def __init__(
450431 if self .task_runner_class :
451432 self .task_runner = self .task_runner_class (scheduler = self , config = config )
452433
453- self .mlflow_process = self .start_mlflow_server ()
454- signal .signal (signal .SIGINT , self .mlflow_signal_handler )
455- signal .signal (signal .SIGTERM , self .mlflow_signal_handler )
434+ self .start_mlflow_server ()
456435
457436 @property
458437 def db_session (self ):
@@ -502,21 +481,6 @@ def create_job(self, model: CreateJob) -> str:
502481 if not model .output_formats :
503482 model .output_formats = []
504483
505- mlflow_client = mlflow .MlflowClient ()
506-
507- if model .job_definition_id and model .mlflow_experiment_id :
508- experiment_id = model .mlflow_experiment_id
509- else :
510- experiment_id = mlflow_client .create_experiment (f"{ model .input_filename } -{ uuid4 ()} " )
511- model .mlflow_experiment_id = experiment_id
512- input_file_path = os .path .join (self .root_dir , model .input_uri )
513- mlflow .log_artifact (input_file_path , "input" )
514-
515- mlflow_run = mlflow_client .create_run (
516- experiment_id = experiment_id , run_name = f"{ model .input_filename } -{ uuid4 ()} "
517- )
518- model .mlflow_run_id = mlflow_run .info .run_id
519-
520484 job = Job (** model .dict (exclude_none = True , exclude = {"input_uri" }))
521485
522486 session .add (job )
@@ -664,12 +628,6 @@ def create_job_definition(self, model: CreateJobDefinition) -> str:
664628 if not self .file_exists (model .input_uri ):
665629 raise InputUriError (model .input_uri )
666630
667- mlflow_client = mlflow .MlflowClient ()
668- experiment_id = mlflow_client .create_experiment (f"{ model .input_filename } -{ uuid4 ()} " )
669- model .mlflow_experiment_id = experiment_id
670- input_file_path = os .path .join (self .root_dir , model .input_uri )
671- mlflow .log_artifact (input_file_path , "input" )
672-
673631 job_definition = JobDefinition (** model .dict (exclude_none = True , exclude = {"input_uri" }))
674632 session .add (job_definition )
675633 session .commit ()
0 commit comments