Refactor ModelLoader: move loadModel() from LlamaApp to ModelLoader class

orionpapadakis · orionpapadakis · commit a0dafe03bc2a · 2025-09-18T20:49:35.000+03:00
diff --git a/src/main/java/org/beehive/gpullama3/LlamaApp.java b/src/main/java/org/beehive/gpullama3/LlamaApp.java
@@ -2,56 +2,20 @@
 
 import org.beehive.gpullama3.aot.AOT;
 import org.beehive.gpullama3.auxiliary.LastRunMetrics;
-import org.beehive.gpullama3.core.model.tensor.FloatTensor;
-import org.beehive.gpullama3.inference.sampler.CategoricalSampler;
 import org.beehive.gpullama3.inference.sampler.Sampler;
-import org.beehive.gpullama3.inference.sampler.ToppSampler;
 import org.beehive.gpullama3.model.Model;
 import org.beehive.gpullama3.model.loader.ModelLoader;
-import org.beehive.gpullama3.tornadovm.FloatArrayUtils;
-import uk.ac.manchester.tornado.api.types.arrays.FloatArray;
 
 import java.io.IOException;
-import java.util.random.RandomGenerator;
-import java.util.random.RandomGeneratorFactory;
 
 import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
+import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;
+
 public class LlamaApp {
     // Configuration flags for hardware acceleration and optimizations
     public static final boolean USE_VECTOR_API = Boolean.parseBoolean(System.getProperty("llama.VectorAPI", "true"));   // Enable Java Vector API for CPU acceleration
-    public static final boolean USE_AOT = Boolean.parseBoolean(System.getProperty("llama.AOT", "false"));               // Use Ahead-of-Time compilation
     public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(System.getProperty("llama.ShowPerfInteractive", "true")); // Show performance metrics in interactive mode
 
-
-    /**
-     * Loads the language model based on the given options.
-     * <p>
-     * If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader.
-     * </p>
-     *
-     * @param options
-     *         the parsed CLI options containing model path and max token limit
-     * @return the loaded {@link Model} instance
-     * @throws IOException
-     *         if the model fails to load
-     * @throws IllegalStateException
-     *         if AOT loading is enabled but the preloaded model is unavailable
-     */
-    private static Model loadModel(Options options) throws IOException {
-        if (USE_AOT) {
-            Model model = AOT.tryUsePreLoaded(options.modelPath(), options.maxTokens());
-            if (model == null) {
-                throw new IllegalStateException("Failed to load precompiled AOT model.");
-            }
-            return model;
-        }
-        return ModelLoader.loadModel(options.modelPath(), options.maxTokens(), true, options.useTornadovm());
-    }
-
-    private static Sampler createSampler(Model model, Options options) {
-        return selectSampler(model.configuration().vocabularySize(), options.temperature(), options.topp(), options.seed());
-    }
-
     private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
         String response = model.runInstructOnce(sampler, options);
         System.out.println(response);
diff --git a/src/main/java/org/beehive/gpullama3/inference/sampler/Sampler.java b/src/main/java/org/beehive/gpullama3/inference/sampler/Sampler.java
@@ -103,7 +103,7 @@ static Sampler selectSampler(int vocabularySize, float temperature, float topp,
         return sampler;
     }
 
-    static Sampler createSampler(Model model, Options options) {
+    public static Sampler createSampler(Model model, Options options) {
         return selectSampler(model.configuration().vocabularySize(), options.temperature(), options.topp(), options.seed());
     }
 
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java
@@ -1,6 +1,7 @@
 package org.beehive.gpullama3.model.loader;
 
 import org.beehive.gpullama3.Options;
+import org.beehive.gpullama3.aot.AOT;
 import org.beehive.gpullama3.core.model.GGMLType;
 import org.beehive.gpullama3.core.model.GGUF;
 import org.beehive.gpullama3.core.model.tensor.ArrayFloatTensor;
@@ -35,6 +36,8 @@
 
 public abstract class ModelLoader {
 
+    public static final boolean USE_AOT = Boolean.parseBoolean(System.getProperty("llama.AOT", "false"));               // Use Ahead-of-Time compilation
+
     protected FileChannel fileChannel;
     protected GGUF gguf;
     protected int contextLength;
@@ -76,6 +79,31 @@ private static ModelType detectModelType(Map<String, Object> metadata) {
         return ModelType.UNKNOWN;
     }
 
+    /**
+     * Loads the language model based on the given options.
+     * <p>
+     * If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader.
+     * </p>
+     *
+     * @param options
+     *         the parsed CLI options containing model path and max token limit
+     * @return the loaded {@link Model} instance
+     * @throws IOException
+     *         if the model fails to load
+     * @throws IllegalStateException
+     *         if AOT loading is enabled but the preloaded model is unavailable
+     */
+    public static Model loadModel(Options options) throws IOException {
+        if (USE_AOT) {
+            Model model = AOT.tryUsePreLoaded(options.modelPath(), options.maxTokens());
+            if (model == null) {
+                throw new IllegalStateException("Failed to load precompiled AOT model.");
+            }
+            return model;
+        }
+        return ModelLoader.loadModel(options.modelPath(), options.maxTokens(), true, options.useTornadovm());
+    }
+
     public static Model loadModel(Path ggufPath, int contextLength, boolean loadWeights, boolean useTornadovm) throws IOException {
         // initial load of metadata from gguf file
         GGUF gguf = GGUF.loadModel(ggufPath);

Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ static Sampler selectSampler(int vocabularySize, float temperature, float topp,`
`103`	`103`	`return sampler;`
`104`	`104`	`}`
`105`	`105`
`106`		`- static Sampler createSampler(Model model, Options options) {`
	`106`	`+ public static Sampler createSampler(Model model, Options options) {`
`107`	`107`	`return selectSampler(model.configuration().vocabularySize(), options.temperature(), options.topp(), options.seed());`
`108`	`108`	`}`
`109`	`109`