Skip to content

Commit 0074098

Browse files
committed
Revert "server: fix router model discovery and child process spawning"
This reverts commit e3832b4.
1 parent e3832b4 commit 0074098

File tree

2 files changed

+48
-100
lines changed

2 files changed

+48
-100
lines changed

tools/server/server-models.cpp

Lines changed: 47 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
88
#include <sheredom/subprocess.h>
99

10-
#include <cstdio>
1110
#include <functional>
1211
#include <algorithm>
1312
#include <thread>
@@ -78,87 +77,57 @@ static std::filesystem::path get_server_exec_path() {
7877

7978
struct local_model {
8079
std::string name;
81-
std::string display_name;
8280
std::string path;
8381
std::string path_mmproj;
8482
};
8583

86-
static std::string sanitize_model_name(const std::string & name) {
87-
std::string sanitized = name;
88-
string_replace_all(sanitized, "/", "_");
89-
string_replace_all(sanitized, "\\", "_");
90-
return sanitized;
91-
}
92-
9384
static std::vector<local_model> list_local_models(const std::string & dir) {
9485
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
9586
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
9687
}
9788

9889
std::vector<local_model> models;
99-
std::function<void(const std::string &, const std::string &)> scan_subdir =
100-
[&](const std::string & subdir_path, const std::string & name) {
101-
auto files = fs_list(subdir_path, true); // Need directories for recursion
102-
common_file_info model_file;
103-
common_file_info first_shard_file;
104-
common_file_info mmproj_file;
105-
106-
for (const auto & file : files) {
107-
if (file.is_dir) {
108-
const std::string child_name = name.empty() ? file.name : name + "/" + file.name;
109-
scan_subdir(file.path, child_name);
110-
continue;
111-
}
112-
113-
if (string_ends_with(file.name, ".gguf")) {
114-
if (file.name.find("mmproj") != std::string::npos) {
115-
mmproj_file = file;
116-
} else if (file.name.find("-00001-of-") != std::string::npos) {
117-
first_shard_file = file;
118-
} else {
119-
model_file = file;
120-
}
121-
}
122-
}
123-
124-
// Convert absolute paths to relative
125-
std::string model_path = first_shard_file.path.empty() ? model_file.path : first_shard_file.path;
126-
if (!model_path.empty()) {
127-
std::error_code ec;
128-
auto rel_path = std::filesystem::relative(model_path, dir, ec);
129-
if (!ec) {
130-
model_path = rel_path.generic_string();
131-
}
132-
}
133-
134-
std::string mmproj_path = mmproj_file.path;
135-
if (!mmproj_path.empty()) {
136-
std::error_code ec;
137-
auto rel_path = std::filesystem::relative(mmproj_path, dir, ec);
138-
if (!ec) {
139-
mmproj_path = rel_path.generic_string();
90+
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
91+
auto files = fs_list(subdir_path, false);
92+
common_file_info model_file;
93+
common_file_info first_shard_file;
94+
common_file_info mmproj_file;
95+
for (const auto & file : files) {
96+
if (string_ends_with(file.name, ".gguf")) {
97+
if (file.name.find("mmproj") != std::string::npos) {
98+
mmproj_file = file;
99+
} else if (file.name.find("-00001-of-") != std::string::npos) {
100+
first_shard_file = file;
101+
} else {
102+
model_file = file;
140103
}
141104
}
105+
}
106+
// single file model
107+
local_model model{
108+
/* name */ name,
109+
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
110+
/* path_mmproj */ mmproj_file.path // can be empty
111+
};
112+
if (!model.path.empty()) {
113+
models.push_back(model);
114+
}
115+
};
142116

117+
auto files = fs_list(dir, true);
118+
for (const auto & file : files) {
119+
if (file.is_dir) {
120+
scan_subdir(file.path, file.name);
121+
} else if (string_ends_with(file.name, ".gguf")) {
122+
// single file model
123+
std::string name = file.name;
124+
string_replace_all(name, ".gguf", "");
143125
local_model model{
144-
/* name */ name,
145-
/* display_name */ sanitize_model_name(name),
146-
/* path */ model_path,
147-
/* path_mmproj */ mmproj_path // can be empty
126+
/* name */ name,
127+
/* path */ file.path,
128+
/* path_mmproj */ ""
148129
};
149-
if (!model.path.empty()) {
150-
models.push_back(model);
151-
}
152-
};
153-
154-
scan_subdir(dir, "");
155-
156-
// when scanning the root, the name is empty, so adjust names for models directly under models_dir
157-
for (auto & model : models) {
158-
if (model.name.empty() && !model.path.empty()) {
159-
model.name = std::filesystem::path(model.path).filename().string();
160-
string_replace_all(model.name, ".gguf", "");
161-
model.display_name = sanitize_model_name(model.name);
130+
models.push_back(model);
162131
}
163132
}
164133
return models;
@@ -169,8 +138,8 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
169138
//
170139

171140

172-
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir)
173-
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)), models_dir(models_dir) {
141+
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
142+
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
174143
if (!presets_path.empty()) {
175144
presets = common_presets_load(presets_path, ctx_params);
176145
SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
@@ -185,7 +154,6 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
185154
if (env == "LLAMA_ARG_PORT" ||
186155
env == "LLAMA_ARG_HOST" ||
187156
env == "LLAMA_ARG_ALIAS" ||
188-
env == "LLAMA_ARG_MODELS_PRESET" ||
189157
env == "LLAMA_ARG_API_KEY" ||
190158
env == "LLAMA_ARG_MODELS_DIR" ||
191159
env == "LLAMA_ARG_MODELS_MAX" ||
@@ -240,17 +208,9 @@ void server_presets::render_args(server_model_meta & meta) {
240208
if (meta.in_cache) {
241209
preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name;
242210
} else {
243-
std::string model_path = meta.path;
244-
if (!models_dir.empty() && !std::filesystem::path(model_path).is_absolute()) {
245-
model_path = models_dir + "/" + model_path;
246-
}
247-
preset.options[control_args["LLAMA_ARG_MODEL"]] = model_path;
211+
preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path;
248212
if (!meta.path_mmproj.empty()) {
249-
std::string mmproj_path = meta.path_mmproj;
250-
if (!models_dir.empty() && !std::filesystem::path(mmproj_path).is_absolute()) {
251-
mmproj_path = models_dir + "/" + mmproj_path;
252-
}
253-
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = mmproj_path;
213+
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj;
254214
}
255215
}
256216
meta.args = preset.to_args();
@@ -264,21 +224,20 @@ server_models::server_models(
264224
const common_params & params,
265225
int argc,
266226
char ** argv,
267-
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset, params.models_dir) {
227+
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
268228
for (int i = 0; i < argc; i++) {
269229
base_args.push_back(std::string(argv[i]));
270230
}
271231
for (char ** env = envp; *env != nullptr; env++) {
272232
base_env.push_back(std::string(*env));
273233
}
274234
GGML_ASSERT(!base_args.empty());
275-
// Save binary path before base_args is modified by presets parsing
235+
// set binary path
276236
try {
277-
server_binary_path = get_server_exec_path().string();
237+
base_args[0] = get_server_exec_path().string();
278238
} catch (const std::exception & e) {
279239
LOG_WRN("failed to get server executable path: %s\n", e.what());
280-
LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]);
281-
server_binary_path = std::string(argv[0]);
240+
LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
282241
}
283242
load_models();
284243
}
@@ -348,18 +307,13 @@ void server_models::load_models() {
348307
if (!base_params.models_dir.empty()) {
349308
auto local_models = list_local_models(base_params.models_dir);
350309
for (const auto & model : local_models) {
351-
const std::string name = model.display_name;
352-
if (mapping.find(name) != mapping.end()) {
310+
if (mapping.find(model.name) != mapping.end()) {
353311
// already exists in cached models, skip
354312
continue;
355313
}
356-
auto preset = presets.get_preset(name);
357-
if (preset.name.empty() && name != model.name) {
358-
preset = presets.get_preset(model.name);
359-
}
360314
server_model_meta meta{
361-
/* preset */ preset,
362-
/* name */ name,
315+
/* preset */ presets.get_preset(model.name),
316+
/* name */ model.name,
363317
/* path */ model.path,
364318
/* path_mmproj */ model.path_mmproj,
365319
/* in_cache */ false,
@@ -552,15 +506,11 @@ void server_models::load(const std::string & name) {
552506
throw std::runtime_error("failed to get a port number");
553507
}
554508

555-
presets.render_args(inst.meta);
556-
557509
inst.subproc = std::make_shared<subprocess_s>();
558510
{
559511
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
560512

561513
std::vector<std::string> child_args = inst.meta.args; // copy
562-
// Insert binary path as argv[0]
563-
child_args.insert(child_args.begin(), server_binary_path);
564514
std::vector<std::string> child_env = base_env; // copy
565515
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
566516

tools/server/server-models.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,8 @@ struct server_presets {
7575
common_params_context ctx_params;
7676
std::map<common_arg, std::string> base_args;
7777
std::map<std::string, common_arg> control_args; // args reserved for server control
78-
std::string models_dir;
7978

80-
server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir);
79+
server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir);
8180
common_preset get_preset(const std::string & name);
8281
void render_args(server_model_meta & meta);
8382
};
@@ -100,7 +99,6 @@ struct server_models {
10099
common_params base_params;
101100
std::vector<std::string> base_args;
102101
std::vector<std::string> base_env;
103-
std::string server_binary_path;
104102

105103
server_presets presets;
106104

0 commit comments

Comments
 (0)