From 015e14fed7ab7135ede8ebc8695c6e4b285732cd Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Fri, 8 Aug 2025 16:57:30 +1000 Subject: [PATCH 1/2] FEATURE: support GPT-5 Also adds mappings for Claude on AWS - ensures developer message is used - ensures we do the max token remapping needed - ensures reasoning effort is passed along --- .../discourse-ai/config/locales/client.en.yml | 8 ++--- .../lib/completions/dialects/chat_gpt.rb | 5 ++- .../lib/completions/endpoints/aws_bedrock.rb | 16 ++++++--- .../lib/completions/endpoints/open_ai.rb | 2 +- plugins/discourse-ai/lib/completions/llm.rb | 36 +++++++++---------- .../lib/completions/endpoints/open_ai_spec.rb | 33 +++++++++++++++++ 6 files changed, 72 insertions(+), 28 deletions(-) diff --git a/plugins/discourse-ai/config/locales/client.en.yml b/plugins/discourse-ai/config/locales/client.en.yml index 9e45c76d29de9..d0adfa464b1aa 100644 --- a/plugins/discourse-ai/config/locales/client.en.yml +++ b/plugins/discourse-ai/config/locales/client.en.yml @@ -580,7 +580,7 @@ en: model_description: none: "General settings that work for most language models" - anthropic-claude-opus-4-0: "Anthropic's most intelligent model" + anthropic-claude-opus-4-1: "Anthropic's most intelligent model" anthropic-claude-sonnet-4-0: "Optimal balance of speed and cost" anthropic-claude-3-7-sonnet-latest: "Optimal balance of speed and cost (previous generation)" anthropic-claude-3-5-haiku-latest: "Fast and cost-effective" @@ -590,9 +590,9 @@ en: google-gemini-2-0-flash-lite: "Cost efficient and low latency model" open_ai-o3: "Open AI's most capable reasoning model" open_ai-o4-mini: "Advanced Cost-efficient reasoning model" - open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains" - open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases." - open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model." + open_ai-gpt-5: "Open AI's flagship model. It is well suited for problem solving across domains" + open_ai-gpt-5-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases." + open_ai-gpt-5-nano: "Fastest, most cost-effective GPT-5 model." samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model" samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model" mistral-mistral-large-latest: "Mistral's most powerful model" diff --git a/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb b/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb index 3e29dcdddb1c0..a02b03a8de8a8 100644 --- a/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb +++ b/plugins/discourse-ai/lib/completions/dialects/chat_gpt.rb @@ -69,7 +69,10 @@ def tools_dialect # developer messages are preferred on recent reasoning models def supports_developer_messages? !legacy_reasoning_model? && llm_model.provider == "open_ai" && - (llm_model.name.start_with?("o1") || llm_model.name.start_with?("o3")) + ( + llm_model.name.start_with?("o1") || llm_model.name.start_with?("o3") || + llm_model.name.start_with?("gpt-5") + ) end def legacy_reasoning_model? diff --git a/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb b/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb index f1344f3a55963..4db8619daa7c9 100644 --- a/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb +++ b/plugins/discourse-ai/lib/completions/endpoints/aws_bedrock.rb @@ -70,7 +70,7 @@ def bedrock_model_id case llm_model.name when "claude-2" "anthropic.claude-v2:1" - when "claude-3-haiku" + when "claude-3-haiku", "claude-3-haiku-20240307" "anthropic.claude-3-haiku-20240307-v1:0" when "claude-3-sonnet" "anthropic.claude-3-sonnet-20240229-v1:0" @@ -78,12 +78,20 @@ def bedrock_model_id "anthropic.claude-instant-v1" when "claude-3-opus" "anthropic.claude-3-opus-20240229-v1:0" - when "claude-3-5-sonnet" + when "claude-3-5-sonnet", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest" "anthropic.claude-3-5-sonnet-20241022-v2:0" - when "claude-3-5-haiku" + when "claude-3-5-sonnet-20240620" + "anthropic.claude-3-5-sonnet-20240620-v1:0" + when "claude-3-5-haiku", "claude-3-5-haiku-20241022", "claude-3-5-haiku-latest" "anthropic.claude-3-5-haiku-20241022-v1:0" - when "claude-3-7-sonnet" + when "claude-3-7-sonnet", "claude-3-7-sonnet-20250219", "claude-3-7-sonnet-latest" "anthropic.claude-3-7-sonnet-20250219-v1:0" + when "claude-opus-4-1", "claude-opus-4-1-20250805" + "anthropic.claude-opus-4-1-20250805-v1:0" + when "claude-opus-4", "claude-opus-4-20250514" + "anthropic.claude-opus-4-20250514-v1:0" + when "claude-sonnet-4", "claude-sonnet-4-20250514" + "anthropic.claude-sonnet-4-20250514-v1:0" else llm_model.name end diff --git a/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb b/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb index 36e49a4a8390b..6d90bc26193c9 100644 --- a/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb +++ b/plugins/discourse-ai/lib/completions/endpoints/open_ai.rb @@ -14,7 +14,7 @@ def normalize_model_params(model_params) # max_tokens is deprecated however we still need to support it # on older OpenAI models and older Azure models, so we will only normalize # if our model name starts with o (to denote all the reasoning models) - if llm_model.name.starts_with?("o") + if llm_model.name.starts_with?("o") | llm_model.name.starts_with?("gpt-5") max_tokens = model_params.delete(:max_tokens) model_params[:max_completion_tokens] = max_tokens if max_tokens end diff --git a/plugins/discourse-ai/lib/completions/llm.rb b/plugins/discourse-ai/lib/completions/llm.rb index 663c6e68c98eb..f4f8348f07d17 100644 --- a/plugins/discourse-ai/lib/completions/llm.rb +++ b/plugins/discourse-ai/lib/completions/llm.rb @@ -51,9 +51,9 @@ def presets output_cost: 4, }, { - name: "claude-opus-4-0", + name: "claude-opus-4-1", tokens: 200_000, - display_name: "Claude 4 Opus", + display_name: "Claude 4.1 Opus", input_cost: 15, cached_input_cost: 1.50, output_cost: 75, @@ -126,27 +126,27 @@ def presets output_cost: 8, }, { - name: "gpt-4.1", - tokens: 800_000, - display_name: "GPT-4.1", - input_cost: 2, - cached_input_cost: 0.5, - output_cost: 8, + name: "gpt-5", + tokens: 400_000, + display_name: "GPT-5", + input_cost: 1.25, + cached_input_cost: 0.124, + output_cost: 10, }, { - name: "gpt-4.1-mini", - tokens: 800_000, - display_name: "GPT-4.1 Mini", - input_cost: 0.40, - cached_input_cost: 0.10, - output_cost: 1.60, + name: "gpt-5-mini", + tokens: 400_000, + display_name: "GPT-5 Mini", + input_cost: 0.25, + cached_input_cost: 0.025, + output_cost: 2.0, }, { - name: "gpt-4.1-nano", - tokens: 800_000, + name: "gpt-5-nano", + tokens: 400_000, display_name: "GPT-4.1 Nano", - input_cost: 0.10, - cached_input_cost: 0.025, + input_cost: 0.05, + cached_input_cost: 0.005, output_cost: 0.40, }, ], diff --git a/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb b/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb index 3f160415f990e..d0dc3705bdf31 100644 --- a/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/plugins/discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb @@ -738,6 +738,39 @@ def request_body(prompt, stream: false, tool_call: false) end end + it "supports gpt-5, remaps max_tokens, passes reasoning effort, and uses developer message" do + model.update!(name: "gpt-5") + + prompt = + DiscourseAi::Completions::Prompt.new( + "You are a bot", + messages: [type: :user, content: "hello"], + ) + dialect = compliance.dialect(prompt: prompt) + + body_parsed = nil + stub_request(:post, "https://api.openai.com/v1/chat/completions").with( + body: + proc do |body| + body_parsed = JSON.parse(body) + true + end, + ).to_return(status: 200, body: { choices: [{ message: { content: "ok" } }] }.to_json) + + endpoint.perform_completion!( + dialect, + user, + { max_tokens: 321, reasoning: { effort: "low" } }, + ) + + expect(body_parsed["model"]).to eq("gpt-5") + expect(body_parsed["max_completion_tokens"]).to eq(321) + expect(body_parsed["max_tokens"]).to be_nil + expect(body_parsed["reasoning"]).to eq({ "effort" => "low" }) + expect(body_parsed["messages"].first["role"]).to eq("developer") + expect(body_parsed["messages"].first["content"]).to eq("You are a bot") + end + context "with tools" do it "returns a function invocation" do compliance.streaming_mode_tools(open_ai_mock) From ca8928d3d0c0013d65a71d85145b29ed6da63ce8 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Fri, 8 Aug 2025 17:12:17 +1000 Subject: [PATCH 2/2] fix spec --- plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb b/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb index 964421168dcda..8614be50baa3c 100644 --- a/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb +++ b/plugins/discourse-ai/spec/system/llms/ai_llm_spec.rb @@ -15,7 +15,7 @@ it "correctly sets defaults" do visit "/admin/plugins/discourse-ai/ai-llms" - find("[data-llm-id='anthropic-claude-opus-4-0'] button").click() + find("[data-llm-id='anthropic-claude-opus-4-1'] button").click() form.field("api_key").fill_in("abcd") form.field("enabled_chat_bot").toggle form.submit @@ -27,9 +27,9 @@ expect(llm.api_key).to eq("abcd") preset = DiscourseAi::Completions::Llm.presets.find { |p| p[:id] == "anthropic" } - model_preset = preset[:models].find { |m| m[:name] == "claude-opus-4-0" } + model_preset = preset[:models].find { |m| m[:name] == "claude-opus-4-1" } - expect(llm.name).to eq("claude-opus-4-0") + expect(llm.name).to eq("claude-opus-4-1") expect(llm.url).to eq(preset[:endpoint]) expect(llm.tokenizer).to eq(preset[:tokenizer].to_s) expect(llm.max_prompt_tokens.to_i).to eq(model_preset[:tokens])