FEATURE: support GPT-5 (#34168)

SamSaffron · web-flow · commit d410df351ddf · 2025-08-08T17:32:35.000+10:00
Also adds mappings for Claude on AWS

- ensures developer message is used
- ensures we do the max token remapping needed
- ensures reasoning effort is passed along
diff --git a/plugins/discourse-ai/config/locales/client.en.yml b/plugins/discourse-ai/config/locales/client.en.yml
@@ -580,7 +580,7 @@ en:
 
         model_description:
           none: "General settings that work for most language models"
-          anthropic-claude-opus-4-0: "Anthropic's most intelligent model"
+          anthropic-claude-opus-4-1: "Anthropic's most intelligent model"
           anthropic-claude-sonnet-4-0: "Optimal balance of speed and cost"
           anthropic-claude-3-7-sonnet-latest: "Optimal balance of speed and cost (previous generation)"
           anthropic-claude-3-5-haiku-latest: "Fast and cost-effective"
@@ -590,9 +590,9 @@ en:
           google-gemini-2-0-flash-lite: "Cost efficient and low latency model"
           open_ai-o3: "Open AI's most capable reasoning model"
           open_ai-o4-mini: "Advanced Cost-efficient reasoning model"
-          open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains"
-          open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
-          open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model."
+          open_ai-gpt-5: "Open AI's flagship model. It is well suited for problem solving across domains"
+          open_ai-gpt-5-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
+          open_ai-gpt-5-nano: "Fastest, most cost-effective GPT-5 model."
           samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model"
           samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
           mistral-mistral-large-latest: "Mistral's most powerful model"
diff --git a/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb b/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb
@@ -69,7 +69,10 @@ def tools_dialect
         # developer messages are preferred on recent reasoning models
         def supports_developer_messages?
           !legacy_reasoning_model? && llm_model.provider == "open_ai" &&
-            (llm_model.name.start_with?("o1") || llm_model.name.start_with?("o3"))
+            (
+              llm_model.name.start_with?("o1") || llm_model.name.start_with?("o3") ||
+                llm_model.name.start_with?("gpt-5")
+            )
         end
 
         def legacy_reasoning_model?
diff --git a/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb b/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb
@@ -70,20 +70,28 @@ def bedrock_model_id
           case llm_model.name
           when "claude-2"
             "anthropic.claude-v2:1"
-          when "claude-3-haiku"
+          when "claude-3-haiku", "claude-3-haiku-20240307"
             "anthropic.claude-3-haiku-20240307-v1:0"
           when "claude-3-sonnet"
             "anthropic.claude-3-sonnet-20240229-v1:0"
           when "claude-instant-1"
             "anthropic.claude-instant-v1"
           when "claude-3-opus"
             "anthropic.claude-3-opus-20240229-v1:0"
-          when "claude-3-5-sonnet"
+          when "claude-3-5-sonnet", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest"
             "anthropic.claude-3-5-sonnet-20241022-v2:0"
-          when "claude-3-5-haiku"
+          when "claude-3-5-sonnet-20240620"
+            "anthropic.claude-3-5-sonnet-20240620-v1:0"
+          when "claude-3-5-haiku", "claude-3-5-haiku-20241022", "claude-3-5-haiku-latest"
             "anthropic.claude-3-5-haiku-20241022-v1:0"
-          when "claude-3-7-sonnet"
+          when "claude-3-7-sonnet", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest"
             "anthropic.claude-3-7-sonnet-20250219-v1:0"
+          when "claude-opus-4-1", "claude-opus-4-1-20250805"
+            "anthropic.claude-opus-4-1-20250805-v1:0"
+          when "claude-opus-4", "claude-opus-4-20250514"
+            "anthropic.claude-opus-4-20250514-v1:0"
+          when "claude-sonnet-4", "claude-sonnet-4-20250514"
+            "anthropic.claude-sonnet-4-20250514-v1:0"
           else
             llm_model.name
           end
diff --git a/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb b/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb
@@ -14,7 +14,7 @@ def normalize_model_params(model_params)
           # max_tokens is deprecated however we still need to support it
           # on older OpenAI models and older Azure models, so we will only normalize
           # if our model name starts with o (to denote all the reasoning models)
-          if llm_model.name.starts_with?("o")
+          if llm_model.name.starts_with?("o") | llm_model.name.starts_with?("gpt-5")
             max_tokens = model_params.delete(:max_tokens)
             model_params[:max_completion_tokens] = max_tokens if max_tokens
           end
diff --git a/plugins/discourse-ai/lib/completions/llm.rb b/plugins/discourse-ai/lib/completions/llm.rb
@@ -51,9 +51,9 @@ def presets
                       output_cost: 4,
                     },
                     {
-                      name: "claude-opus-4-0",
+                      name: "claude-opus-4-1",
                       tokens: 200_000,
-                      display_name: "Claude 4 Opus",
+                      display_name: "Claude 4.1 Opus",
                       input_cost: 15,
                       cached_input_cost: 1.50,
                       output_cost: 75,
@@ -126,27 +126,27 @@ def presets
                       output_cost: 8,
                     },
                     {
-                      name: "gpt-4.1",
-                      tokens: 800_000,
-                      display_name: "GPT-4.1",
-                      input_cost: 2,
-                      cached_input_cost: 0.5,
-                      output_cost: 8,
+                      name: "gpt-5",
+                      tokens: 400_000,
+                      display_name: "GPT-5",
+                      input_cost: 1.25,
+                      cached_input_cost: 0.124,
+                      output_cost: 10,
                     },
                     {
-                      name: "gpt-4.1-mini",
-                      tokens: 800_000,
-                      display_name: "GPT-4.1 Mini",
-                      input_cost: 0.40,
-                      cached_input_cost: 0.10,
-                      output_cost: 1.60,
+                      name: "gpt-5-mini",
+                      tokens: 400_000,
+                      display_name: "GPT-5 Mini",
+                      input_cost: 0.25,
+                      cached_input_cost: 0.025,
+                      output_cost: 2.0,
                     },
                     {
-                      name: "gpt-4.1-nano",
-                      tokens: 800_000,
+                      name: "gpt-5-nano",
+                      tokens: 400_000,
                       display_name: "GPT-4.1 Nano",
-                      input_cost: 0.10,
-                      cached_input_cost: 0.025,
+                      input_cost: 0.05,
+                      cached_input_cost: 0.005,
                       output_cost: 0.40,
                     },
                   ],
diff --git a/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb b/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -738,6 +738,39 @@ def request_body(prompt, stream: false, tool_call: false)
         end
       end
 
+      it "supports gpt-5, remaps max_tokens, passes reasoning effort, and uses developer message" do
+        model.update!(name: "gpt-5")
+
+        prompt =
+          DiscourseAi::Completions::Prompt.new(
+            "You are a bot",
+            messages: [type: :user, content: "hello"],
+          )
+        dialect = compliance.dialect(prompt: prompt)
+
+        body_parsed = nil
+        stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+          body:
+            proc do |body|
+              body_parsed = JSON.parse(body)
+              true
+            end,
+        ).to_return(status: 200, body: { choices: [{ message: { content: "ok" } }] }.to_json)
+
+        endpoint.perform_completion!(
+          dialect,
+          user,
+          { max_tokens: 321, reasoning: { effort: "low" } },
+        )
+
+        expect(body_parsed["model"]).to eq("gpt-5")
+        expect(body_parsed["max_completion_tokens"]).to eq(321)
+        expect(body_parsed["max_tokens"]).to be_nil
+        expect(body_parsed["reasoning"]).to eq({ "effort" => "low" })
+        expect(body_parsed["messages"].first["role"]).to eq("developer")
+        expect(body_parsed["messages"].first["content"]).to eq("You are a bot")
+      end
+
       context "with tools" do
         it "returns a function invocation" do
           compliance.streaming_mode_tools(open_ai_mock)
diff --git a/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb b/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb
@@ -15,7 +15,7 @@
   it "correctly sets defaults" do
     visit "/admin/plugins/discourse-ai/ai-llms"
 
-    find("[data-llm-id='anthropic-claude-opus-4-0'] button").click()
+    find("[data-llm-id='anthropic-claude-opus-4-1'] button").click()
     form.field("api_key").fill_in("abcd")
     form.field("enabled_chat_bot").toggle
     form.submit
@@ -27,9 +27,9 @@
     expect(llm.api_key).to eq("abcd")
 
     preset = DiscourseAi::Completions::Llm.presets.find { |p| p[:id] == "anthropic" }
-    model_preset = preset[:models].find { |m| m[:name] == "claude-opus-4-0" }
+    model_preset = preset[:models].find { |m| m[:name] == "claude-opus-4-1" }
 
-    expect(llm.name).to eq("claude-opus-4-0")
+    expect(llm.name).to eq("claude-opus-4-1")
     expect(llm.url).to eq(preset[:endpoint])
     expect(llm.tokenizer).to eq(preset[:tokenizer].to_s)
     expect(llm.max_prompt_tokens.to_i).to eq(model_preset[:tokens])