77#include < cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
88#include < sheredom/subprocess.h>
99
10- #include < cstdio>
1110#include < functional>
1211#include < algorithm>
1312#include < thread>
@@ -78,87 +77,57 @@ static std::filesystem::path get_server_exec_path() {
7877
7978struct local_model {
8079 std::string name;
81- std::string display_name;
8280 std::string path;
8381 std::string path_mmproj;
8482};
8583
86- static std::string sanitize_model_name (const std::string & name) {
87- std::string sanitized = name;
88- string_replace_all (sanitized, " /" , " _" );
89- string_replace_all (sanitized, " \\ " , " _" );
90- return sanitized;
91- }
92-
9384static std::vector<local_model> list_local_models (const std::string & dir) {
9485 if (!std::filesystem::exists (dir) || !std::filesystem::is_directory (dir)) {
9586 throw std::runtime_error (string_format (" error: '%s' does not exist or is not a directory\n " , dir.c_str ()));
9687 }
9788
9889 std::vector<local_model> models;
99- std::function<void (const std::string &, const std::string &)> scan_subdir =
100- [&](const std::string & subdir_path, const std::string & name) {
101- auto files = fs_list (subdir_path, true ); // Need directories for recursion
102- common_file_info model_file;
103- common_file_info first_shard_file;
104- common_file_info mmproj_file;
105-
106- for (const auto & file : files) {
107- if (file.is_dir ) {
108- const std::string child_name = name.empty () ? file.name : name + " /" + file.name ;
109- scan_subdir (file.path , child_name);
110- continue ;
111- }
112-
113- if (string_ends_with (file.name , " .gguf" )) {
114- if (file.name .find (" mmproj" ) != std::string::npos) {
115- mmproj_file = file;
116- } else if (file.name .find (" -00001-of-" ) != std::string::npos) {
117- first_shard_file = file;
118- } else {
119- model_file = file;
120- }
121- }
122- }
123-
124- // Convert absolute paths to relative
125- std::string model_path = first_shard_file.path .empty () ? model_file.path : first_shard_file.path ;
126- if (!model_path.empty ()) {
127- std::error_code ec;
128- auto rel_path = std::filesystem::relative (model_path, dir, ec);
129- if (!ec) {
130- model_path = rel_path.generic_string ();
131- }
132- }
133-
134- std::string mmproj_path = mmproj_file.path ;
135- if (!mmproj_path.empty ()) {
136- std::error_code ec;
137- auto rel_path = std::filesystem::relative (mmproj_path, dir, ec);
138- if (!ec) {
139- mmproj_path = rel_path.generic_string ();
90+ auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
91+ auto files = fs_list (subdir_path, false );
92+ common_file_info model_file;
93+ common_file_info first_shard_file;
94+ common_file_info mmproj_file;
95+ for (const auto & file : files) {
96+ if (string_ends_with (file.name , " .gguf" )) {
97+ if (file.name .find (" mmproj" ) != std::string::npos) {
98+ mmproj_file = file;
99+ } else if (file.name .find (" -00001-of-" ) != std::string::npos) {
100+ first_shard_file = file;
101+ } else {
102+ model_file = file;
140103 }
141104 }
105+ }
106+ // single file model
107+ local_model model{
108+ /* name */ name,
109+ /* path */ first_shard_file.path .empty () ? model_file.path : first_shard_file.path ,
110+ /* path_mmproj */ mmproj_file.path // can be empty
111+ };
112+ if (!model.path .empty ()) {
113+ models.push_back (model);
114+ }
115+ };
142116
117+ auto files = fs_list (dir, true );
118+ for (const auto & file : files) {
119+ if (file.is_dir ) {
120+ scan_subdir (file.path , file.name );
121+ } else if (string_ends_with (file.name , " .gguf" )) {
122+ // single file model
123+ std::string name = file.name ;
124+ string_replace_all (name, " .gguf" , " " );
143125 local_model model{
144- /* name */ name,
145- /* display_name */ sanitize_model_name (name),
146- /* path */ model_path,
147- /* path_mmproj */ mmproj_path // can be empty
126+ /* name */ name,
127+ /* path */ file.path ,
128+ /* path_mmproj */ " "
148129 };
149- if (!model.path .empty ()) {
150- models.push_back (model);
151- }
152- };
153-
154- scan_subdir (dir, " " );
155-
156- // when scanning the root, the name is empty, so adjust names for models directly under models_dir
157- for (auto & model : models) {
158- if (model.name .empty () && !model.path .empty ()) {
159- model.name = std::filesystem::path (model.path ).filename ().string ();
160- string_replace_all (model.name , " .gguf" , " " );
161- model.display_name = sanitize_model_name (model.name );
130+ models.push_back (model);
162131 }
163132 }
164133 return models;
@@ -169,8 +138,8 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
169138//
170139
171140
172- server_presets::server_presets (int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir )
173- : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)), models_dir(models_dir) {
141+ server_presets::server_presets (int argc, char ** argv, common_params & base_params, const std::string & presets_path)
142+ : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
174143 if (!presets_path.empty ()) {
175144 presets = common_presets_load (presets_path, ctx_params);
176145 SRV_INF (" Loaded %zu presets from %s\n " , presets.size (), presets_path.c_str ());
@@ -185,7 +154,6 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
185154 if (env == " LLAMA_ARG_PORT" ||
186155 env == " LLAMA_ARG_HOST" ||
187156 env == " LLAMA_ARG_ALIAS" ||
188- env == " LLAMA_ARG_MODELS_PRESET" ||
189157 env == " LLAMA_ARG_API_KEY" ||
190158 env == " LLAMA_ARG_MODELS_DIR" ||
191159 env == " LLAMA_ARG_MODELS_MAX" ||
@@ -240,17 +208,9 @@ void server_presets::render_args(server_model_meta & meta) {
240208 if (meta.in_cache ) {
241209 preset.options [control_args[" LLAMA_ARG_HF_REPO" ]] = meta.name ;
242210 } else {
243- std::string model_path = meta.path ;
244- if (!models_dir.empty () && !std::filesystem::path (model_path).is_absolute ()) {
245- model_path = models_dir + " /" + model_path;
246- }
247- preset.options [control_args[" LLAMA_ARG_MODEL" ]] = model_path;
211+ preset.options [control_args[" LLAMA_ARG_MODEL" ]] = meta.path ;
248212 if (!meta.path_mmproj .empty ()) {
249- std::string mmproj_path = meta.path_mmproj ;
250- if (!models_dir.empty () && !std::filesystem::path (mmproj_path).is_absolute ()) {
251- mmproj_path = models_dir + " /" + mmproj_path;
252- }
253- preset.options [control_args[" LLAMA_ARG_MMPROJ" ]] = mmproj_path;
213+ preset.options [control_args[" LLAMA_ARG_MMPROJ" ]] = meta.path_mmproj ;
254214 }
255215 }
256216 meta.args = preset.to_args ();
@@ -264,21 +224,20 @@ server_models::server_models(
264224 const common_params & params,
265225 int argc,
266226 char ** argv,
267- char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset, params.models_dir ) {
227+ char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
268228 for (int i = 0 ; i < argc; i++) {
269229 base_args.push_back (std::string (argv[i]));
270230 }
271231 for (char ** env = envp; *env != nullptr ; env++) {
272232 base_env.push_back (std::string (*env));
273233 }
274234 GGML_ASSERT (!base_args.empty ());
275- // Save binary path before base_args is modified by presets parsing
235+ // set binary path
276236 try {
277- server_binary_path = get_server_exec_path ().string ();
237+ base_args[ 0 ] = get_server_exec_path ().string ();
278238 } catch (const std::exception & e) {
279239 LOG_WRN (" failed to get server executable path: %s\n " , e.what ());
280- LOG_WRN (" using original argv[0] as fallback: %s\n " , argv[0 ]);
281- server_binary_path = std::string (argv[0 ]);
240+ LOG_WRN (" using original argv[0] as fallback: %s\n " , base_args[0 ].c_str ());
282241 }
283242 load_models ();
284243}
@@ -348,18 +307,13 @@ void server_models::load_models() {
348307 if (!base_params.models_dir .empty ()) {
349308 auto local_models = list_local_models (base_params.models_dir );
350309 for (const auto & model : local_models) {
351- const std::string name = model.display_name ;
352- if (mapping.find (name) != mapping.end ()) {
310+ if (mapping.find (model.name ) != mapping.end ()) {
353311 // already exists in cached models, skip
354312 continue ;
355313 }
356- auto preset = presets.get_preset (name);
357- if (preset.name .empty () && name != model.name ) {
358- preset = presets.get_preset (model.name );
359- }
360314 server_model_meta meta{
361- /* preset */ preset ,
362- /* name */ name,
315+ /* preset */ presets. get_preset (model. name ) ,
316+ /* name */ model. name ,
363317 /* path */ model.path ,
364318 /* path_mmproj */ model.path_mmproj ,
365319 /* in_cache */ false ,
@@ -552,15 +506,11 @@ void server_models::load(const std::string & name) {
552506 throw std::runtime_error (" failed to get a port number" );
553507 }
554508
555- presets.render_args (inst.meta );
556-
557509 inst.subproc = std::make_shared<subprocess_s>();
558510 {
559511 SRV_INF (" spawning server instance with name=%s on port %d\n " , inst.meta .name .c_str (), inst.meta .port );
560512
561513 std::vector<std::string> child_args = inst.meta .args ; // copy
562- // Insert binary path as argv[0]
563- child_args.insert (child_args.begin (), server_binary_path);
564514 std::vector<std::string> child_env = base_env; // copy
565515 child_env.push_back (" LLAMA_SERVER_ROUTER_PORT=" + std::to_string (base_params.port ));
566516
0 commit comments