Merge pull request #436 from ThomasSevestre/optional_access_token

alexrudall · web-flow · commit eb51dfd1b171 · 2024-04-27T14:32:55.000+03:00
Allow usage of offline models with Ollama
diff --git a/Gemfile b/Gemfile
@@ -6,7 +6,7 @@ gemspec
 gem "byebug", "~> 11.1.3"
 gem "dotenv", "~> 2.8.1"
 gem "rake", "~> 13.1"
-gem "rspec", "~> 3.12"
+gem "rspec", "~> 3.13"
 gem "rubocop", "~> 1.50.2"
 gem "vcr", "~> 6.1.0"
 gem "webmock", "~> 3.19.1"
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -16,7 +16,7 @@ GEM
     byebug (11.1.3)
     crack (0.4.5)
       rexml
-    diff-lcs (1.5.0)
+    diff-lcs (1.5.1)
     dotenv (2.8.1)
     event_stream_parser (1.0.0)
     faraday (2.8.1)
@@ -37,19 +37,19 @@ GEM
     rake (13.1.0)
     regexp_parser (2.8.0)
     rexml (3.2.6)
-    rspec (3.12.0)
-      rspec-core (~> 3.12.0)
-      rspec-expectations (~> 3.12.0)
-      rspec-mocks (~> 3.12.0)
-    rspec-core (3.12.0)
-      rspec-support (~> 3.12.0)
-    rspec-expectations (3.12.2)
+    rspec (3.13.0)
+      rspec-core (~> 3.13.0)
+      rspec-expectations (~> 3.13.0)
+      rspec-mocks (~> 3.13.0)
+    rspec-core (3.13.0)
+      rspec-support (~> 3.13.0)
+    rspec-expectations (3.13.0)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.12.0)
-    rspec-mocks (3.12.3)
+      rspec-support (~> 3.13.0)
+    rspec-mocks (3.13.0)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.12.0)
-    rspec-support (3.12.0)
+      rspec-support (~> 3.13.0)
+    rspec-support (3.13.1)
     rubocop (1.50.2)
       json (~> 2.3)
       parallel (~> 1.10)
@@ -78,7 +78,7 @@ DEPENDENCIES
   byebug (~> 11.1.3)
   dotenv (~> 2.8.1)
   rake (~> 13.1)
-  rspec (~> 3.12)
+  rspec (~> 3.13)
   rubocop (~> 1.50.2)
   ruby-openai!
   vcr (~> 6.1.0)
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ Stream text with GPT-4, transcribe and translate audio with Whisper, or create i
       - [Extra Headers per Client](#extra-headers-per-client)
       - [Verbose Logging](#verbose-logging)
       - [Azure](#azure)
+      - [Ollama](#ollama)
     - [Counting Tokens](#counting-tokens)
     - [Models](#models)
       - [Examples](#examples)
@@ -191,6 +192,38 @@ To use the [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/cognit
 
 where `AZURE_OPENAI_URI` is e.g. `https://custom-domain.openai.azure.com/openai/deployments/gpt-35-turbo`
 
+#### Ollama
+
+Ollama allows you to run open-source LLMs, such as Llama 3, locally. It [offers chat compatibility](https://github.com/ollama/ollama/blob/main/docs/openai.md) with the OpenAI API.
+
+You can download Ollama [here](https://ollama.com/). On macOS you can install and run Ollama like this:
+
+```bash
+brew install ollama
+ollama serve
+ollama pull llama3:latest # In new terminal tab.
+```
+
+Create a client using your Ollama server and the pulled model, and stream a conversation for free:
+
+```ruby
+client = OpenAI::Client.new(
+  uri_base: "http://localhost:11434"
+)
+
+client.chat(
+    parameters: {
+        model: "llama3", # Required.
+        messages: [{ role: "user", content: "Hello!"}], # Required.
+        temperature: 0.7,
+        stream: proc do |chunk, _bytesize|
+            print chunk.dig("choices", 0, "delta", "content")
+        end
+    })
+
+# => Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat?
+```
+
 ### Counting Tokens
 
 OpenAI parses prompt text into [tokens](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them), which are words or portions of words. (These tokens are unrelated to your API access_token.) Counting tokens can help you estimate your [costs](https://openai.com/pricing). It can also help you ensure your prompt text size is within the max-token limits of your model's context window, and choose an appropriate [`max_tokens`](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens) completion parameter so your response will fit as well.
diff --git a/lib/openai.rb b/lib/openai.rb
@@ -36,9 +36,8 @@ def call(env)
   end
 
   class Configuration
-    attr_writer :access_token
-    attr_accessor :api_type, :api_version, :organization_id, :uri_base, :request_timeout,
-                  :extra_headers
+    attr_accessor :access_token, :api_type, :api_version, :organization_id,
+                  :uri_base, :request_timeout, :extra_headers
 
     DEFAULT_API_VERSION = "v1".freeze
     DEFAULT_URI_BASE = "https://api.openai.com/".freeze
@@ -53,13 +52,6 @@ def initialize
       @request_timeout = DEFAULT_REQUEST_TIMEOUT
       @extra_headers = {}
     end
-
-    def access_token
-      return @access_token if @access_token
-
-      error_text = "OpenAI access token missing! See https://github.com/alexrudall/ruby-openai#usage"
-      raise ConfigurationError, error_text
-    end
   end
 
   class << self
diff --git a/spec/fixtures/cassettes/llama3_chat.yml b/spec/fixtures/cassettes/llama3_chat.yml
diff --git a/spec/openai/client/chat_spec.rb b/spec/openai/client/chat_spec.rb
@@ -3,8 +3,9 @@
     context "with messages", :vcr do
       let(:messages) { [{ role: "user", content: "Hello!" }] }
       let(:stream) { false }
+      let(:uri_base) { nil }
       let(:response) do
-        OpenAI::Client.new.chat(
+        OpenAI::Client.new({ uri_base: uri_base }).chat(
           parameters: parameters
         )
       end
@@ -172,6 +173,23 @@ def call(chunk)
           end
         end
       end
+
+      context "with Ollama + model: llama3" do
+        let(:uri_base) { "http://localhost:11434" }
+        let(:model) { "llama3" }
+
+        it "succeeds" do
+          VCR.use_cassette(cassette) do
+            vcr_skip do
+              Faraday.new(url: uri_base).get
+            rescue Faraday::ConnectionFailed
+              pending "This test needs `ollama serve` running locally with #{model} installed"
+            end
+
+            expect(content.split.empty?).to eq(false)
+          end
+        end
+      end
     end
   end
 end
diff --git a/spec/openai_spec.rb b/spec/openai_spec.rb
@@ -29,14 +29,6 @@
       expect(OpenAI.configuration.extra_headers).to eq(extra_headers)
     end
 
-    context "without an access token" do
-      let(:access_token) { nil }
-
-      it "raises an error" do
-        expect { OpenAI::Client.new.chat }.to raise_error(OpenAI::ConfigurationError)
-      end
-    end
-
     context "with custom timeout and uri base" do
       before do
         OpenAI.configure do |config|
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
@@ -49,6 +49,8 @@
       end
     end
   end
+
+  c.include VCRHelpers
 end
 
 RSPEC_ROOT = File.dirname __FILE__
diff --git a/spec/support/vcr_skip.rb b/spec/support/vcr_skip.rb
@@ -0,0 +1,8 @@
+module VCRHelpers
+  def vcr_skip
+    VCR.configure { |c| c.allow_http_connections_when_no_cassette = true }
+    yield
+  ensure
+    VCR.configure { |c| c.allow_http_connections_when_no_cassette = false }
+  end
+end