From fb0d3646877514606a07173846fa197ebbbefc27 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Thu, 8 May 2025 18:26:37 -0300 Subject: [PATCH 01/11] FEATURE: add inferred concepts system This commit adds a new inferred concepts system that: - Creates a model for storing concept labels that can be applied to topics - Provides AI personas for finding new concepts and matching existing ones - Adds jobs for generating concepts from popular topics - Includes a scheduled job that automatically processes engaging topics --- app/jobs/regular/apply_inferred_concepts.rb | 47 ++++++++ .../regular/generate_inferred_concepts.rb | 47 ++++++++ .../generate_concepts_from_popular_topics.rb | 38 ++++++ app/models/inferred_concept.rb | 21 ++++ .../inferred_concept_serializer.rb | 5 + config/settings.yml | 25 ++++ ...08182047_create_inferred_concepts_table.rb | 11 ++ ...8183456_create_topics_inferred_concepts.rb | 15 +++ lib/inferred_concepts/applier.rb | 112 ++++++++++++++++++ lib/inferred_concepts/finder.rb | 91 ++++++++++++++ lib/inferred_concepts/manager.rb | 94 +++++++++++++++ lib/personas/concept_finder.rb | 35 ++++++ lib/personas/concept_matcher.rb | 36 ++++++ lib/personas/persona.rb | 2 + lib/topic_extensions.rb | 2 + 15 files changed, 581 insertions(+) create mode 100644 app/jobs/regular/apply_inferred_concepts.rb create mode 100644 app/jobs/regular/generate_inferred_concepts.rb create mode 100644 app/jobs/scheduled/generate_concepts_from_popular_topics.rb create mode 100644 app/models/inferred_concept.rb create mode 100644 app/serializers/inferred_concept_serializer.rb create mode 100644 db/migrate/20250508182047_create_inferred_concepts_table.rb create mode 100644 db/migrate/20250508183456_create_topics_inferred_concepts.rb create mode 100644 lib/inferred_concepts/applier.rb create mode 100644 lib/inferred_concepts/finder.rb create mode 100644 lib/inferred_concepts/manager.rb create mode 100644 lib/personas/concept_finder.rb create mode 100644 lib/personas/concept_matcher.rb diff --git a/app/jobs/regular/apply_inferred_concepts.rb b/app/jobs/regular/apply_inferred_concepts.rb new file mode 100644 index 000000000..916c3f0cd --- /dev/null +++ b/app/jobs/regular/apply_inferred_concepts.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Jobs + class ApplyInferredConcepts < ::Jobs::Base + sidekiq_options queue: 'low' + + # Process a batch of topics to apply existing concepts to them + # + # @param args [Hash] Contains job arguments + # @option args [Array] :topic_ids Required - List of topic IDs to process + # @option args [Integer] :batch_size (100) Number of topics to process in each batch + def execute(args = {}) + return if args[:topic_ids].blank? + + # Process topics in smaller batches to avoid memory issues + batch_size = args[:batch_size] || 100 + + # Get the list of topic IDs + topic_ids = args[:topic_ids] + + # Process topics in batches + topic_ids.each_slice(batch_size) do |batch_topic_ids| + process_batch(batch_topic_ids) + end + end + + private + + def process_batch(topic_ids) + topics = Topic.where(id: topic_ids) + + topics.each do |topic| + begin + process_topic(topic) + rescue => e + Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") + end + end + end + + def process_topic(topic) + # Match topic against existing concepts and apply them + # Pass the topic object directly + DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic) + end + end +end \ No newline at end of file diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb new file mode 100644 index 000000000..d0b73b893 --- /dev/null +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Jobs + class GenerateInferredConcepts < ::Jobs::Base + sidekiq_options queue: 'low' + + # Process a batch of topics to generate new concepts (without applying them to topics) + # + # @param args [Hash] Contains job arguments + # @option args [Array] :topic_ids Required - List of topic IDs to process + # @option args [Integer] :batch_size (100) Number of topics to process in each batch + def execute(args = {}) + return if args[:topic_ids].blank? + + # Process topics in smaller batches to avoid memory issues + batch_size = args[:batch_size] || 100 + + # Get the list of topic IDs + topic_ids = args[:topic_ids] + + # Process topics in batches + topic_ids.each_slice(batch_size) do |batch_topic_ids| + process_batch(batch_topic_ids) + end + end + + private + + def process_batch(topic_ids) + topics = Topic.where(id: topic_ids) + + topics.each do |topic| + begin + process_topic(topic) + rescue => e + Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") + end + end + end + + def process_topic(topic) + # Use the Manager method that handles both identifying and creating concepts + # Pass the topic object directly + DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic) + end + end +end \ No newline at end of file diff --git a/app/jobs/scheduled/generate_concepts_from_popular_topics.rb b/app/jobs/scheduled/generate_concepts_from_popular_topics.rb new file mode 100644 index 000000000..fe009a1cd --- /dev/null +++ b/app/jobs/scheduled/generate_concepts_from_popular_topics.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module Jobs + class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled + every 1.day + + # This job runs daily and generates new concepts from popular topics + # It selects topics based on engagement metrics and generates concepts from their content + def execute(args = {}) + # Find candidate topics that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( + limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, + min_posts: SiteSetting.inferred_concepts_min_posts || 5, + min_likes: SiteSetting.inferred_concepts_min_likes || 10, + min_views: SiteSetting.inferred_concepts_min_views || 100, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process the candidate topics in batches using the regular job + Jobs.enqueue( + :generate_inferred_concepts, + topic_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to apply the concepts to topics + # This runs after a delay to ensure concepts have been generated + Jobs.enqueue_in( + 1.hour, + :apply_inferred_concepts, + topic_ids: candidates.map(&:id), + batch_size: 10 + ) + end + end +end \ No newline at end of file diff --git a/app/models/inferred_concept.rb b/app/models/inferred_concept.rb new file mode 100644 index 000000000..a5b8d8773 --- /dev/null +++ b/app/models/inferred_concept.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +class InferredConcept < ActiveRecord::Base + has_and_belongs_to_many :topics + + validates :name, presence: true, uniqueness: true +end + +# == Schema Information +# +# Table name: inferred_concepts +# +# id :bigint not null, primary key +# name :string not null +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_inferred_concepts_on_name (name) UNIQUE +# \ No newline at end of file diff --git a/app/serializers/inferred_concept_serializer.rb b/app/serializers/inferred_concept_serializer.rb new file mode 100644 index 000000000..265fe858c --- /dev/null +++ b/app/serializers/inferred_concept_serializer.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +class InferredConceptSerializer < ApplicationSerializer + attributes :id, :name, :created_at, :updated_at +end \ No newline at end of file diff --git a/config/settings.yml b/config/settings.yml index c2b62d599..2f166a3e0 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -401,3 +401,28 @@ discourse_ai: allow_any: false enum: "DiscourseAi::Configuration::LlmEnumerator" validator: "DiscourseAi::Configuration::LlmValidator" + + inferred_concepts_enabled: + default: false + client: true + description: "Enable the inferred concepts system that automatically generates and applies concepts to topics" + inferred_concepts_daily_topics_limit: + default: 20 + client: false + description: "Maximum number of topics to process each day for concept generation" + inferred_concepts_min_posts: + default: 5 + client: false + description: "Minimum number of posts a topic must have to be considered for concept generation" + inferred_concepts_min_likes: + default: 10 + client: false + description: "Minimum number of likes a topic must have to be considered for concept generation" + inferred_concepts_min_views: + default: 100 + client: false + description: "Minimum number of views a topic must have to be considered for concept generation" + inferred_concepts_lookback_days: + default: 30 + client: false + description: "Only consider topics created within this many days for concept generation" diff --git a/db/migrate/20250508182047_create_inferred_concepts_table.rb b/db/migrate/20250508182047_create_inferred_concepts_table.rb new file mode 100644 index 000000000..6686c040d --- /dev/null +++ b/db/migrate/20250508182047_create_inferred_concepts_table.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true +class CreateInferredConceptsTable < ActiveRecord::Migration[7.2] + def change + create_table :inferred_concepts do |t| + t.string :name, null: false + t.timestamps + end + + add_index :inferred_concepts, :name, unique: true + end +end diff --git a/db/migrate/20250508183456_create_topics_inferred_concepts.rb b/db/migrate/20250508183456_create_topics_inferred_concepts.rb new file mode 100644 index 000000000..6066bfbbd --- /dev/null +++ b/db/migrate/20250508183456_create_topics_inferred_concepts.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreateTopicsInferredConcepts < ActiveRecord::Migration[7.0] + def change + create_table :topics_inferred_concepts do |t| + t.integer :topic_id, null: false + t.integer :inferred_concept_id, null: false + t.timestamps + end + + add_index :topics_inferred_concepts, [:topic_id, :inferred_concept_id], unique: true, name: 'idx_unique_topic_inferred_concept' + add_index :topics_inferred_concepts, :topic_id + add_index :topics_inferred_concepts, :inferred_concept_id + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb new file mode 100644 index 000000000..9426ec0a3 --- /dev/null +++ b/lib/inferred_concepts/applier.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Applier + # Associates the provided concepts with a topic + # topic: a Topic instance + # concepts: an array of InferredConcept instances + def self.apply_to_topic(topic, concepts) + return if topic.blank? || concepts.blank? + + concepts.each do |concept| + # Use the join table to associate the concept with the topic + # Avoid duplicates by using find_or_create_by + ActiveRecord::Base.connection.execute(<<~SQL) + INSERT INTO topics_inferred_concepts (topic_id, inferred_concept_id, created_at, updated_at) + VALUES (#{topic.id}, #{concept.id}, NOW(), NOW()) + ON CONFLICT (topic_id, inferred_concept_id) DO NOTHING + SQL + end + end + + # Extracts content from a topic for concept analysis + # Returns a string with the topic title and first few posts + def self.topic_content_for_analysis(topic) + return "" if topic.blank? + + # Combine title and first few posts for analysis + posts = Post.where(topic_id: topic.id).order(:post_number).limit(10) + + content = "Title: #{topic.title}\n\n" + content += posts.map do |p| + "#{p.post_number}) #{p.user.username}: #{p.raw}" + end.join("\n\n") + + content + end + + # Comprehensive method to analyze a topic and apply concepts + def self.analyze_and_apply(topic) + return if topic.blank? + + # Get content to analyze + content = topic_content_for_analysis(topic) + + # Identify concepts + concept_names = Finder.identify_concepts(content) + + # Create or find concepts in the database + concepts = Finder.create_or_find_concepts(concept_names) + + # Apply concepts to the topic + apply_to_topic(topic, concepts) + + concepts + end + + # Match a topic with existing concepts + def self.match_existing_concepts(topic) + return [] if topic.blank? + + # Get content to analyze + content = topic_content_for_analysis(topic) + + # Get all existing concepts + existing_concepts = InferredConcept.all.pluck(:name) + return [] if existing_concepts.empty? + + # Use the ConceptMatcher persona to match concepts + matched_concept_names = match_concepts_to_content(content, existing_concepts) + + # Find concepts in the database + matched_concepts = InferredConcept.where(name: matched_concept_names) + + # Apply concepts to the topic + apply_to_topic(topic, matched_concepts) + + matched_concepts + end + + # Use ConceptMatcher persona to match content against provided concepts + def self.match_concepts_to_content(content, concept_list) + return [] if content.blank? || concept_list.blank? + + # Prepare user message with content and concept list + user_message = <<~MESSAGE + Content to analyze: + #{content} + + Available concepts to match: + #{concept_list.join(", ")} + MESSAGE + + # Use the ConceptMatcher persona to match concepts + llm = DiscourseAi::Completions::Llm.default_llm + persona = DiscourseAi::Personas::ConceptMatcher.new + context = DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: user_message }], + user: Discourse.system_user + ) + + prompt = persona.craft_prompt(context) + response = llm.completion(prompt, extract_json: true) + + return [] unless response.success? + + matching_concepts = response.parsed_output["matching_concepts"] + matching_concepts || [] + end + end + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb new file mode 100644 index 000000000..3e870cf16 --- /dev/null +++ b/lib/inferred_concepts/finder.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Finder + # Identifies potential concepts from provided content + # Returns an array of concept names (strings) + def self.identify_concepts(content) + return [] if content.blank? + + # Use the ConceptFinder persona to identify concepts + llm = DiscourseAi::Completions::Llm.default_llm + persona = DiscourseAi::Personas::ConceptFinder.new + context = DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: content }], + user: Discourse.system_user + ) + + prompt = persona.craft_prompt(context) + response = llm.completion(prompt, extract_json: true) + + return [] unless response.success? + + concepts = response.parsed_output["concepts"] + concepts || [] + end + + # Creates or finds concepts in the database from provided names + # Returns an array of InferredConcept instances + def self.create_or_find_concepts(concept_names) + return [] if concept_names.blank? + + concept_names.map do |name| + InferredConcept.find_or_create_by(name: name) + end + end + + # Finds candidate topics to use for concept generation + # + # @param limit [Integer] Maximum number of topics to return + # @param min_posts [Integer] Minimum number of posts in topic + # @param min_likes [Integer] Minimum number of likes across all posts + # @param min_views [Integer] Minimum number of views + # @param exclude_topic_ids [Array] Topic IDs to exclude + # @param category_ids [Array] Only include topics from these categories (optional) + # @param created_after [DateTime] Only include topics created after this time (optional) + # @return [Array] Array of Topic objects that are good candidates + def self.find_candidate_topics( + limit: 100, + min_posts: 5, + min_likes: 10, + min_views: 100, + exclude_topic_ids: [], + category_ids: nil, + created_after: 30.days.ago + ) + query = Topic.where( + "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", + min_posts, + min_views, + min_likes + ) + + # Apply additional filters + query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present? + query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present? + query = query.where("topics.created_at >= ?", created_after) if created_after.present? + + # Exclude PM topics (if they exist in Discourse) + query = query.where(archetype: Topic.public_archetype) + + # Exclude topics that already have concepts + topics_with_concepts = <<~SQL + SELECT DISTINCT topic_id + FROM topics_inferred_concepts + SQL + + query = query.where("topics.id NOT IN (#{topics_with_concepts})") + + # Score and order topics by engagement (combination of views, likes, and posts) + query = query.select( + "topics.*, + (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score" + ).order("engagement_score DESC") + + # Return limited number of topics + query.limit(limit) + end + end + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb new file mode 100644 index 000000000..282468919 --- /dev/null +++ b/lib/inferred_concepts/manager.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Manager + # Generate new concepts for a topic and apply them + # @param topic [Topic] A Topic instance + # @return [Array] The concepts that were applied + def self.analyze_topic(topic) + return [] if topic.blank? + + Applier.analyze_and_apply(topic) + end + + # Extract new concepts from arbitrary content + # @param content [String] The content to analyze + # @return [Array] The identified concept names + def self.identify_concepts(content) + Finder.identify_concepts(content) + end + + # Identify and create concepts from content without applying them to any topic + # @param content [String] The content to analyze + # @return [Array] The created or found concepts + def self.generate_concepts_from_content(content) + return [] if content.blank? + + # Identify concepts + concept_names = Finder.identify_concepts(content) + return [] if concept_names.blank? + + # Create or find concepts in the database + Finder.create_or_find_concepts(concept_names) + end + + # Generate concepts from a topic's content without applying them to the topic + # @param topic [Topic] A Topic instance + # @return [Array] The created or found concepts + def self.generate_concepts_from_topic(topic) + return [] if topic.blank? + + # Get content to analyze + content = Applier.topic_content_for_analysis(topic) + return [] if content.blank? + + # Generate concepts from the content + generate_concepts_from_content(content) + end + + # Match a topic against existing concepts + # @param topic [Topic] A Topic instance + # @return [Array] The concepts that were applied + def self.match_topic_to_concepts(topic) + return [] if topic.blank? + + Applier.match_existing_concepts(topic) + end + + # Find topics that have a specific concept + # @param concept_name [String] The name of the concept to search for + # @return [Array] Topics that have the specified concept + def self.search_topics_by_concept(concept_name) + concept = ::InferredConcept.find_by(name: concept_name) + return [] unless concept + concept.topics + end + + # Match arbitrary content against existing concepts + # @param content [String] The content to analyze + # @return [Array] Names of matching concepts + def self.match_content_to_concepts(content) + existing_concepts = InferredConcept.all.pluck(:name) + return [] if existing_concepts.empty? + + Applier.match_concepts_to_content(content, existing_concepts) + end + + # Find candidate topics that are good for concept generation + # + # @param opts [Hash] Options to pass to the finder + # @option opts [Integer] :limit (100) Maximum number of topics to return + # @option opts [Integer] :min_posts (5) Minimum number of posts in topic + # @option opts [Integer] :min_likes (10) Minimum number of likes across all posts + # @option opts [Integer] :min_views (100) Minimum number of views + # @option opts [Array] :exclude_topic_ids ([]) Topic IDs to exclude + # @option opts [Array] :category_ids (nil) Only include topics from these categories + # @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time + # @return [Array] Array of Topic objects that are good candidates + def self.find_candidate_topics(opts = {}) + Finder.find_candidate_topics(opts) + end + end + end +end \ No newline at end of file diff --git a/lib/personas/concept_finder.rb b/lib/personas/concept_finder.rb new file mode 100644 index 000000000..2e0502d04 --- /dev/null +++ b/lib/personas/concept_finder.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ConceptFinder < Persona + def system_prompt + <<~PROMPT.strip + You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text. + Your job is to extract meaningful labels that can be used to categorize content. + + Guidelines for generating concepts: + - Extract up to 7 concepts from the provided content + - Concepts should be single words or short phrases (1-3 words maximum) + - Focus on substantive topics, themes, technologies, methodologies, or domains + - Avoid overly general terms like "discussion" or "question" + - Ensure concepts are relevant to the core content + - Do not include proper nouns unless they represent key technologies or methodologies + - Maintain the original language of the text being analyzed + + Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value. + Your output should be in the following format: + + {"concepts": ["concept1", "concept2", "concept3"]} + + + Where the concepts are replaced by the actual concepts you've identified. + PROMPT + end + + def response_format + [{ key: "concepts", type: "array" }] + end + end + end +end diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb new file mode 100644 index 000000000..ce398bcb9 --- /dev/null +++ b/lib/personas/concept_matcher.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ConceptMatcher < Persona + def system_prompt + <<~PROMPT.strip + You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. + Your job is to analyze the content and determine which concepts from the list apply to it. + + Guidelines for matching concepts: + - Only select concepts that are clearly relevant to the content + - The content must substantially discuss or relate to the concept + - Superficial mentions are not enough to consider a concept relevant + - Be precise and selective - don't match concepts that are only tangentially related + - Consider both explicit mentions and implicit discussions of concepts + - Maintain the original language of the text being analyzed + - IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts + - If no concepts from the list match the content, return an empty array + + Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list. + Your output should be in the following format: + + {"matching_concepts": ["concept1", "concept3", "concept5"]} + + + Only include concepts from the provided list that match the content. If no concepts match, return an empty array. + PROMPT + end + + def response_format + [{ key: "matching_concepts", type: "array" }] + end + end + end +end \ No newline at end of file diff --git a/lib/personas/persona.rb b/lib/personas/persona.rb index 62426f77d..ba3d3be68 100644 --- a/lib/personas/persona.rb +++ b/lib/personas/persona.rb @@ -52,6 +52,8 @@ def system_personas ShortSummarizer => -12, Designer => -13, ForumResearcher => -14, + ConceptFinder => -15, + ConceptMatcher => -16, } end diff --git a/lib/topic_extensions.rb b/lib/topic_extensions.rb index 7ab36493d..659a33923 100644 --- a/lib/topic_extensions.rb +++ b/lib/topic_extensions.rb @@ -11,6 +11,8 @@ module TopicExtensions -> { where(summary_type: AiSummary.summary_types[:gist]) }, class_name: "AiSummary", as: :target + + has_and_belongs_to_many :inferred_concepts end end end From 5f0d682e69b0f092e78710ee5162382cbd5bb86c Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 15:03:45 -0300 Subject: [PATCH 02/11] FEATURE: Extend inferred concepts to include posts * Adds support for concepts to be inferred from and applied to posts * Replaces daily task with one that handles both topics and posts * Adds database migration for posts_inferred_concepts join table * Updates PersonaContext to include inferred concepts --- .claude/settings.local.json | 8 + app/jobs/regular/apply_inferred_concepts.rb | 47 ------ .../regular/generate_inferred_concepts.rb | 56 ++++--- .../generate_concepts_from_popular_items.rb | 81 ++++++++++ .../generate_concepts_from_popular_topics.rb | 38 ----- app/models/inferred_concept.rb | 1 + .../ai_inferred_concept_post_serializer.rb | 34 ++++ config/locales/server.en.yml | 6 + config/settings.yml | 8 + ...09000001_create_posts_inferred_concepts.rb | 15 ++ lib/inferred_concepts/applier.rb | 145 +++++++++++++----- lib/inferred_concepts/finder.rb | 76 +++++++-- lib/inferred_concepts/manager.rb | 98 ++++++++++-- lib/personas/bot_context.rb | 21 ++- lib/personas/concept_finder.rb | 12 +- lib/personas/concept_matcher.rb | 13 +- lib/post_extensions.rb | 2 + spec/lib/personas/persona_spec.rb | 4 + 18 files changed, 486 insertions(+), 179 deletions(-) create mode 100644 .claude/settings.local.json delete mode 100644 app/jobs/regular/apply_inferred_concepts.rb create mode 100644 app/jobs/scheduled/generate_concepts_from_popular_items.rb delete mode 100644 app/jobs/scheduled/generate_concepts_from_popular_topics.rb create mode 100644 app/serializers/ai_inferred_concept_post_serializer.rb create mode 100644 db/migrate/20250509000001_create_posts_inferred_concepts.rb diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..650de3e30 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(bundle exec rails g migration:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/app/jobs/regular/apply_inferred_concepts.rb b/app/jobs/regular/apply_inferred_concepts.rb deleted file mode 100644 index 916c3f0cd..000000000 --- a/app/jobs/regular/apply_inferred_concepts.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class ApplyInferredConcepts < ::Jobs::Base - sidekiq_options queue: 'low' - - # Process a batch of topics to apply existing concepts to them - # - # @param args [Hash] Contains job arguments - # @option args [Array] :topic_ids Required - List of topic IDs to process - # @option args [Integer] :batch_size (100) Number of topics to process in each batch - def execute(args = {}) - return if args[:topic_ids].blank? - - # Process topics in smaller batches to avoid memory issues - batch_size = args[:batch_size] || 100 - - # Get the list of topic IDs - topic_ids = args[:topic_ids] - - # Process topics in batches - topic_ids.each_slice(batch_size) do |batch_topic_ids| - process_batch(batch_topic_ids) - end - end - - private - - def process_batch(topic_ids) - topics = Topic.where(id: topic_ids) - - topics.each do |topic| - begin - process_topic(topic) - rescue => e - Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") - end - end - end - - def process_topic(topic) - # Match topic against existing concepts and apply them - # Pass the topic object directly - DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic) - end - end -end \ No newline at end of file diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb index d0b73b893..61b1d4be4 100644 --- a/app/jobs/regular/generate_inferred_concepts.rb +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -4,44 +4,64 @@ module Jobs class GenerateInferredConcepts < ::Jobs::Base sidekiq_options queue: 'low' - # Process a batch of topics to generate new concepts (without applying them to topics) + # Process items to generate new concepts # # @param args [Hash] Contains job arguments - # @option args [Array] :topic_ids Required - List of topic IDs to process - # @option args [Integer] :batch_size (100) Number of topics to process in each batch + # @option args [String] :item_type Required - Type of items to process ('topics' or 'posts') + # @option args [Array] :item_ids Required - List of item IDs to process + # @option args [Integer] :batch_size (100) Number of items to process in each batch + # @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones def execute(args = {}) - return if args[:topic_ids].blank? + return if args[:item_ids].blank? || args[:item_type].blank? - # Process topics in smaller batches to avoid memory issues + unless ['topics', 'posts'].include?(args[:item_type]) + Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}") + return + end + + # Process items in smaller batches to avoid memory issues batch_size = args[:batch_size] || 100 - # Get the list of topic IDs - topic_ids = args[:topic_ids] + # Get the list of item IDs + item_ids = args[:item_ids] + match_only = args[:match_only] || false - # Process topics in batches - topic_ids.each_slice(batch_size) do |batch_topic_ids| - process_batch(batch_topic_ids) + # Process items in batches + item_ids.each_slice(batch_size) do |batch_item_ids| + process_batch(batch_item_ids, args[:item_type], match_only) end end private - def process_batch(topic_ids) - topics = Topic.where(id: topic_ids) + def process_batch(item_ids, item_type, match_only) + klass = item_type.singularize.classify.constantize + items = klass.where(id: item_ids) - topics.each do |topic| + items.each do |item| begin - process_topic(topic) + process_item(item, item_type, match_only) rescue => e - Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") + Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}") end end end - def process_topic(topic) + def process_item(item, item_type, match_only) # Use the Manager method that handles both identifying and creating concepts - # Pass the topic object directly - DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic) + if match_only + if item_type == 'topics' + DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item) + else # posts + DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item) + end + else + if item_type == 'topics' + DiscourseAi::InferredConcepts::Manager.analyze_topic(item) + else # posts + DiscourseAi::InferredConcepts::Manager.analyze_post(item) + end + end end end end \ No newline at end of file diff --git a/app/jobs/scheduled/generate_concepts_from_popular_items.rb b/app/jobs/scheduled/generate_concepts_from_popular_items.rb new file mode 100644 index 000000000..a9a034935 --- /dev/null +++ b/app/jobs/scheduled/generate_concepts_from_popular_items.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Jobs + class GenerateConceptsFromPopularItems < ::Jobs::Scheduled + every 1.day + + # This job runs daily and generates new concepts from popular topics and posts + # It selects items based on engagement metrics and generates concepts from their content + def execute(args = {}) + return unless SiteSetting.inferred_concepts_enabled + + process_popular_topics + process_popular_posts + end + + private + + def process_popular_topics + + # Find candidate topics that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( + limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, + min_posts: SiteSetting.inferred_concepts_min_posts || 5, + min_likes: SiteSetting.inferred_concepts_min_likes || 10, + min_views: SiteSetting.inferred_concepts_min_views || 100, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate topics - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + + def process_popular_posts + + # Find candidate posts that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts( + limit: SiteSetting.inferred_concepts_daily_posts_limit || 30, + min_likes: SiteSetting.inferred_concepts_post_min_likes || 5, + exclude_first_posts: true, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate posts - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match against existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + end +end \ No newline at end of file diff --git a/app/jobs/scheduled/generate_concepts_from_popular_topics.rb b/app/jobs/scheduled/generate_concepts_from_popular_topics.rb deleted file mode 100644 index fe009a1cd..000000000 --- a/app/jobs/scheduled/generate_concepts_from_popular_topics.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled - every 1.day - - # This job runs daily and generates new concepts from popular topics - # It selects topics based on engagement metrics and generates concepts from their content - def execute(args = {}) - # Find candidate topics that are popular and don't have concepts yet - candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( - limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, - min_posts: SiteSetting.inferred_concepts_min_posts || 5, - min_likes: SiteSetting.inferred_concepts_min_likes || 10, - min_views: SiteSetting.inferred_concepts_min_views || 100, - created_after: SiteSetting.inferred_concepts_lookback_days.days.ago - ) - - return if candidates.blank? - - # Process the candidate topics in batches using the regular job - Jobs.enqueue( - :generate_inferred_concepts, - topic_ids: candidates.map(&:id), - batch_size: 10 - ) - - # Schedule a follow-up job to apply the concepts to topics - # This runs after a delay to ensure concepts have been generated - Jobs.enqueue_in( - 1.hour, - :apply_inferred_concepts, - topic_ids: candidates.map(&:id), - batch_size: 10 - ) - end - end -end \ No newline at end of file diff --git a/app/models/inferred_concept.rb b/app/models/inferred_concept.rb index a5b8d8773..0248277fa 100644 --- a/app/models/inferred_concept.rb +++ b/app/models/inferred_concept.rb @@ -2,6 +2,7 @@ class InferredConcept < ActiveRecord::Base has_and_belongs_to_many :topics + has_and_belongs_to_many :posts validates :name, presence: true, uniqueness: true end diff --git a/app/serializers/ai_inferred_concept_post_serializer.rb b/app/serializers/ai_inferred_concept_post_serializer.rb new file mode 100644 index 000000000..d4bfcd628 --- /dev/null +++ b/app/serializers/ai_inferred_concept_post_serializer.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class AiInferredConceptPostSerializer < ApplicationSerializer + attributes :id, + :post_number, + :topic_id, + :topic_title, + :username, + :avatar_template, + :created_at, + :updated_at, + :excerpt, + :truncated, + :inferred_concepts + + def avatar_template + User.avatar_template(object.username, object.uploaded_avatar_id) + end + + def excerpt + Post.excerpt(object.cooked) + end + + def truncated + object.cooked.length > SiteSetting.post_excerpt_maxlength + end + + def inferred_concepts + ActiveModel::ArraySerializer.new( + object.inferred_concepts, + each_serializer: InferredConceptSerializer + ) + end +end \ No newline at end of file diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 3e4c10642..390da18e8 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -326,6 +326,12 @@ en: short_summarizer: name: "Summarizer (short form)" description: "Default persona used to power AI short summaries for topic lists' items" + concept_finder: + name: "Concept Finder" + description: "AI Bot specialized in identifying concepts and themes in content" + concept_matcher: + name: "Concept Matcher" + description: "AI Bot specialized in matching content against existing concepts" topic_not_found: "Summary unavailable, topic not found!" summarizing: "Summarizing topic" searching: "Searching for: '%{query}'" diff --git a/config/settings.yml b/config/settings.yml index 2f166a3e0..92371470e 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -426,3 +426,11 @@ discourse_ai: default: 30 client: false description: "Only consider topics created within this many days for concept generation" + inferred_concepts_daily_posts_limit: + default: 30 + client: false + description: "Maximum number of posts to process each day for concept generation" + inferred_concepts_post_min_likes: + default: 5 + client: false + description: "Minimum number of likes a post must have to be considered for concept generation" diff --git a/db/migrate/20250509000001_create_posts_inferred_concepts.rb b/db/migrate/20250509000001_create_posts_inferred_concepts.rb new file mode 100644 index 000000000..258d0f144 --- /dev/null +++ b/db/migrate/20250509000001_create_posts_inferred_concepts.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0] + def change + create_table :posts_inferred_concepts do |t| + t.integer :post_id, null: false + t.integer :inferred_concept_id, null: false + t.timestamps + end + + add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept' + add_index :posts_inferred_concepts, :post_id + add_index :posts_inferred_concepts, :inferred_concept_id + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index 9426ec0a3..fa393e75a 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -8,7 +8,7 @@ class Applier # concepts: an array of InferredConcept instances def self.apply_to_topic(topic, concepts) return if topic.blank? || concepts.blank? - + concepts.each do |concept| # Use the join table to associate the concept with the topic # Avoid duplicates by using find_or_create_by @@ -19,94 +19,163 @@ def self.apply_to_topic(topic, concepts) SQL end end - + + # Associates the provided concepts with a post + # post: a Post instance + # concepts: an array of InferredConcept instances + def self.apply_to_post(post, concepts) + return if post.blank? || concepts.blank? + + concepts.each do |concept| + # Use the join table to associate the concept with the post + # Avoid duplicates by using find_or_create_by + ActiveRecord::Base.connection.execute(<<~SQL) + INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at) + VALUES (#{post.id}, #{concept.id}, NOW(), NOW()) + ON CONFLICT (post_id, inferred_concept_id) DO NOTHING + SQL + end + end + # Extracts content from a topic for concept analysis # Returns a string with the topic title and first few posts def self.topic_content_for_analysis(topic) return "" if topic.blank? - + # Combine title and first few posts for analysis posts = Post.where(topic_id: topic.id).order(:post_number).limit(10) - + content = "Title: #{topic.title}\n\n" - content += posts.map do |p| - "#{p.post_number}) #{p.user.username}: #{p.raw}" - end.join("\n\n") - + content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n") + + content + end + + # Extracts content from a post for concept analysis + # Returns a string with the post content + def self.post_content_for_analysis(post) + return "" if post.blank? + + # Get the topic title for context + topic_title = post.topic&.title || "" + + content = "Topic: #{topic_title}\n\n" + content += "Post by #{post.user.username}:\n#{post.raw}" + content end - + # Comprehensive method to analyze a topic and apply concepts def self.analyze_and_apply(topic) return if topic.blank? - + # Get content to analyze content = topic_content_for_analysis(topic) - + # Identify concepts concept_names = Finder.identify_concepts(content) - + # Create or find concepts in the database concepts = Finder.create_or_find_concepts(concept_names) - + # Apply concepts to the topic apply_to_topic(topic, concepts) - + + concepts + end + + # Comprehensive method to analyze a post and apply concepts + def self.analyze_and_apply_post(post) + return if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Identify concepts + concept_names = Finder.identify_concepts(content) + + # Create or find concepts in the database + concepts = Finder.create_or_find_concepts(concept_names) + + # Apply concepts to the post + apply_to_post(post, concepts) + concepts end - + # Match a topic with existing concepts def self.match_existing_concepts(topic) return [] if topic.blank? - + # Get content to analyze content = topic_content_for_analysis(topic) - + # Get all existing concepts - existing_concepts = InferredConcept.all.pluck(:name) + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts return [] if existing_concepts.empty? - + # Use the ConceptMatcher persona to match concepts matched_concept_names = match_concepts_to_content(content, existing_concepts) - + # Find concepts in the database matched_concepts = InferredConcept.where(name: matched_concept_names) - + # Apply concepts to the topic apply_to_topic(topic, matched_concepts) - + + matched_concepts + end + + # Match a post with existing concepts + def self.match_existing_concepts_for_post(post) + return [] if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Get all existing concepts + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts + return [] if existing_concepts.empty? + + # Use the ConceptMatcher persona to match concepts + matched_concept_names = match_concepts_to_content(content, existing_concepts) + + # Find concepts in the database + matched_concepts = InferredConcept.where(name: matched_concept_names) + + # Apply concepts to the post + apply_to_post(post, matched_concepts) + matched_concepts end - + # Use ConceptMatcher persona to match content against provided concepts def self.match_concepts_to_content(content, concept_list) return [] if content.blank? || concept_list.blank? - - # Prepare user message with content and concept list + + # Prepare user message with only the content user_message = <<~MESSAGE Content to analyze: #{content} - - Available concepts to match: - #{concept_list.join(", ")} MESSAGE - + # Use the ConceptMatcher persona to match concepts llm = DiscourseAi::Completions::Llm.default_llm - persona = DiscourseAi::Personas::ConceptMatcher.new - context = DiscourseAi::Personas::BotContext.new( - messages: [{ type: :user, content: user_message }], - user: Discourse.system_user - ) - + persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list) + context = + DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: user_message }], + user: Discourse.system_user, + ) + prompt = persona.craft_prompt(context) response = llm.completion(prompt, extract_json: true) - + return [] unless response.success? - + matching_concepts = response.parsed_output["matching_concepts"] matching_concepts || [] end end end -end \ No newline at end of file +end diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb index 3e870cf16..56ddcbdc6 100644 --- a/lib/inferred_concepts/finder.rb +++ b/lib/inferred_concepts/finder.rb @@ -36,7 +36,7 @@ def self.create_or_find_concepts(concept_names) end # Finds candidate topics to use for concept generation - # + # # @param limit [Integer] Maximum number of topics to return # @param min_posts [Integer] Minimum number of posts in topic # @param min_likes [Integer] Minimum number of likes across all posts @@ -46,46 +46,92 @@ def self.create_or_find_concepts(concept_names) # @param created_after [DateTime] Only include topics created after this time (optional) # @return [Array] Array of Topic objects that are good candidates def self.find_candidate_topics( - limit: 100, - min_posts: 5, - min_likes: 10, - min_views: 100, + limit: 100, + min_posts: 5, + min_likes: 10, + min_views: 100, exclude_topic_ids: [], category_ids: nil, created_after: 30.days.ago ) query = Topic.where( "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", - min_posts, - min_views, + min_posts, + min_views, min_likes ) - + # Apply additional filters query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present? query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present? query = query.where("topics.created_at >= ?", created_after) if created_after.present? - + # Exclude PM topics (if they exist in Discourse) query = query.where(archetype: Topic.public_archetype) - + # Exclude topics that already have concepts topics_with_concepts = <<~SQL - SELECT DISTINCT topic_id + SELECT DISTINCT topic_id FROM topics_inferred_concepts SQL - + query = query.where("topics.id NOT IN (#{topics_with_concepts})") - + # Score and order topics by engagement (combination of views, likes, and posts) query = query.select( - "topics.*, + "topics.*, (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score" ).order("engagement_score DESC") - + # Return limited number of topics query.limit(limit) end + + # Find candidate posts that are good for concept generation + # + # @param limit [Integer] Maximum number of posts to return + # @param min_likes [Integer] Minimum number of likes + # @param exclude_first_posts [Boolean] Exclude first posts in topics + # @param exclude_post_ids [Array] Post IDs to exclude + # @param category_ids [Array] Only include posts from topics in these categories + # @param created_after [DateTime] Only include posts created after this time + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts( + limit: 100, + min_likes: 5, + exclude_first_posts: true, + exclude_post_ids: [], + category_ids: nil, + created_after: 30.days.ago + ) + query = Post.where("posts.like_count >= ?", min_likes) + + # Exclude first posts if specified + query = query.where("posts.post_number > 1") if exclude_first_posts + + # Apply additional filters + query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present? + query = query.where("posts.created_at >= ?", created_after) if created_after.present? + + # Filter by category if specified + if category_ids.present? + query = query.joins(:topic).where("topics.category_id IN (?)", category_ids) + end + + # Exclude posts that already have concepts + posts_with_concepts = <<~SQL + SELECT DISTINCT post_id + FROM posts_inferred_concepts + SQL + + query = query.where("posts.id NOT IN (#{posts_with_concepts})") + + # Order by engagement (likes) + query = query.order(like_count: :desc) + + # Return limited number of posts + query.limit(limit) + end end end end \ No newline at end of file diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb index 282468919..3037cd153 100644 --- a/lib/inferred_concepts/manager.rb +++ b/lib/inferred_concepts/manager.rb @@ -3,59 +3,102 @@ module DiscourseAi module InferredConcepts class Manager + # Get a list of existing concepts + # @param limit [Integer, nil] Optional maximum number of concepts to return + # @return [Array] Array of InferredConcept objects + def self.list_concepts(limit: nil) + query = InferredConcept.all.order("name ASC") + + # Apply limit if provided + query = query.limit(limit) if limit.present? + + query.pluck(:name) + end # Generate new concepts for a topic and apply them # @param topic [Topic] A Topic instance # @return [Array] The concepts that were applied def self.analyze_topic(topic) return [] if topic.blank? - + Applier.analyze_and_apply(topic) end - + + # Generate new concepts for a post and apply them + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.analyze_post(post) + return [] if post.blank? + + Applier.analyze_and_apply_post(post) + end + # Extract new concepts from arbitrary content # @param content [String] The content to analyze # @return [Array] The identified concept names def self.identify_concepts(content) Finder.identify_concepts(content) end - + # Identify and create concepts from content without applying them to any topic # @param content [String] The content to analyze # @return [Array] The created or found concepts def self.generate_concepts_from_content(content) return [] if content.blank? - + # Identify concepts concept_names = Finder.identify_concepts(content) return [] if concept_names.blank? - + # Create or find concepts in the database Finder.create_or_find_concepts(concept_names) end - + # Generate concepts from a topic's content without applying them to the topic # @param topic [Topic] A Topic instance # @return [Array] The created or found concepts def self.generate_concepts_from_topic(topic) return [] if topic.blank? - + # Get content to analyze content = Applier.topic_content_for_analysis(topic) return [] if content.blank? - + + # Generate concepts from the content + generate_concepts_from_content(content) + end + + # Generate concepts from a post's content without applying them to the post + # @param post [Post] A Post instance + # @return [Array] The created or found concepts + def self.generate_concepts_from_post(post) + return [] if post.blank? + + # Get content to analyze + content = Applier.post_content_for_analysis(post) + return [] if content.blank? + # Generate concepts from the content generate_concepts_from_content(content) end - + # Match a topic against existing concepts # @param topic [Topic] A Topic instance # @return [Array] The concepts that were applied def self.match_topic_to_concepts(topic) return [] if topic.blank? - + Applier.match_existing_concepts(topic) end - + + # Match a post against existing concepts + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.match_post_to_concepts(post) + return [] if post.blank? + + Applier.match_existing_concepts_for_post(post) + end + # Find topics that have a specific concept # @param concept_name [String] The name of the concept to search for # @return [Array] Topics that have the specified concept @@ -64,19 +107,28 @@ def self.search_topics_by_concept(concept_name) return [] unless concept concept.topics end - + + # Find posts that have a specific concept + # @param concept_name [String] The name of the concept to search for + # @return [Array] Posts that have the specified concept + def self.search_posts_by_concept(concept_name) + concept = ::InferredConcept.find_by(name: concept_name) + return [] unless concept + concept.posts + end + # Match arbitrary content against existing concepts # @param content [String] The content to analyze # @return [Array] Names of matching concepts def self.match_content_to_concepts(content) existing_concepts = InferredConcept.all.pluck(:name) return [] if existing_concepts.empty? - + Applier.match_concepts_to_content(content, existing_concepts) end - + # Find candidate topics that are good for concept generation - # + # # @param opts [Hash] Options to pass to the finder # @option opts [Integer] :limit (100) Maximum number of topics to return # @option opts [Integer] :min_posts (5) Minimum number of posts in topic @@ -89,6 +141,20 @@ def self.match_content_to_concepts(content) def self.find_candidate_topics(opts = {}) Finder.find_candidate_topics(opts) end + + # Find candidate posts that are good for concept generation + # @param opts [Hash] Options to pass to the finder + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts(opts = {}) + Finder.find_candidate_posts( + limit: opts[:limit], + min_likes: opts[:min_likes], + exclude_first_posts: opts[:exclude_first_posts], + exclude_post_ids: opts[:exclude_post_ids], + category_ids: opts[:category_ids], + created_after: opts[:created_after], + ) + end end end -end \ No newline at end of file +end diff --git a/lib/personas/bot_context.rb b/lib/personas/bot_context.rb index 69d86669a..83220a520 100644 --- a/lib/personas/bot_context.rb +++ b/lib/personas/bot_context.rb @@ -17,7 +17,11 @@ class BotContext :context_post_ids, :feature_name, :resource_url, +<<<<<<< HEAD :cancel_manager +======= + :inferred_concepts +>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) def initialize( post: nil, @@ -35,7 +39,11 @@ def initialize( context_post_ids: nil, feature_name: "bot", resource_url: nil, +<<<<<<< HEAD cancel_manager: nil +======= + inferred_concepts: [] +>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) ) @participants = participants @user = user @@ -54,7 +62,7 @@ def initialize( @resource_url = resource_url @feature_name = feature_name - @resource_url = resource_url + @inferred_concepts = inferred_concepts @cancel_manager = cancel_manager @@ -68,7 +76,15 @@ def initialize( end # these are strings that can be safely interpolated into templates - TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url] + TEMPLATE_PARAMS = %w[ + time + site_url + site_title + site_description + participants + resource_url + inferred_concepts + ] def lookup_template_param(key) public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key) @@ -114,6 +130,7 @@ def to_json skip_tool_details: @skip_tool_details, feature_name: @feature_name, resource_url: @resource_url, + inferred_concepts: @inferred_concepts, } end end diff --git a/lib/personas/concept_finder.rb b/lib/personas/concept_finder.rb index 2e0502d04..a713e8b86 100644 --- a/lib/personas/concept_finder.rb +++ b/lib/personas/concept_finder.rb @@ -4,6 +4,16 @@ module DiscourseAi module Personas class ConceptFinder < Persona def system_prompt + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100) + existing_concepts_text = "" + + existing_concepts_text = <<~CONCEPTS if existing_concepts.present? + The following concepts already exist in the system: + #{existing_concepts.join(", ")} + + You can reuse these existing concepts if they apply to the content, or suggest new concepts. + CONCEPTS + <<~PROMPT.strip You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text. Your job is to extract meaningful labels that can be used to categorize content. @@ -16,7 +26,7 @@ def system_prompt - Ensure concepts are relevant to the core content - Do not include proper nouns unless they represent key technologies or methodologies - Maintain the original language of the text being analyzed - + #{existing_concepts_text} Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value. Your output should be in the following format: diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb index ce398bcb9..bea17e096 100644 --- a/lib/personas/concept_matcher.rb +++ b/lib/personas/concept_matcher.rb @@ -7,7 +7,8 @@ def system_prompt <<~PROMPT.strip You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. Your job is to analyze the content and determine which concepts from the list apply to it. - + + #{concepts_text} Guidelines for matching concepts: - Only select concepts that are clearly relevant to the content - The content must substantially discuss or relate to the concept @@ -17,13 +18,17 @@ def system_prompt - Maintain the original language of the text being analyzed - IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts - If no concepts from the list match the content, return an empty array - + + The list of available concepts is: + + {inferred_concepts} + Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list. Your output should be in the following format: {"matching_concepts": ["concept1", "concept3", "concept5"]} - + Only include concepts from the provided list that match the content. If no concepts match, return an empty array. PROMPT end @@ -33,4 +38,4 @@ def response_format end end end -end \ No newline at end of file +end diff --git a/lib/post_extensions.rb b/lib/post_extensions.rb index 04a28a156..3a06495f6 100644 --- a/lib/post_extensions.rb +++ b/lib/post_extensions.rb @@ -11,6 +11,8 @@ module PostExtensions -> { where(classification_type: "sentiment") }, class_name: "ClassificationResult", as: :target + + has_and_belongs_to_many :inferred_concepts end end end diff --git a/spec/lib/personas/persona_spec.rb b/spec/lib/personas/persona_spec.rb index d3e905680..22670f155 100644 --- a/spec/lib/personas/persona_spec.rb +++ b/spec/lib/personas/persona_spec.rb @@ -17,6 +17,7 @@ def system_prompt {participants} {time} {resource_url} + {inferred_concepts} PROMPT end end @@ -38,6 +39,7 @@ def system_prompt end let(:resource_url) { "https://path-to-resource" } + let(:inferred_concepts) { %w[bulbassaur charmander squirtle] } let(:context) do DiscourseAi::Personas::BotContext.new( @@ -47,6 +49,7 @@ def system_prompt time: Time.zone.now, participants: topic_with_users.allowed_users.map(&:username).join(", "), resource_url: resource_url, + inferred_concepts: inferred_concepts, ) end @@ -66,6 +69,7 @@ def system_prompt expect(system_message).to include("joe, jane") expect(system_message).to include(Time.zone.now.to_s) expect(system_message).to include(resource_url) + expect(system_message).to include(inferred_concepts) tools = rendered.tools From f5259479847f01a5798cdf19070730a7c3dfa507 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 15:06:27 -0300 Subject: [PATCH 03/11] lint --- .../regular/generate_inferred_concepts.rb | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb index 61b1d4be4..f570e8eaf 100644 --- a/app/jobs/regular/generate_inferred_concepts.rb +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -2,7 +2,7 @@ module Jobs class GenerateInferredConcepts < ::Jobs::Base - sidekiq_options queue: 'low' + sidekiq_options queue: "low" # Process items to generate new concepts # @@ -13,50 +13,52 @@ class GenerateInferredConcepts < ::Jobs::Base # @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones def execute(args = {}) return if args[:item_ids].blank? || args[:item_type].blank? - - unless ['topics', 'posts'].include?(args[:item_type]) + + unless %w[topics posts].include?(args[:item_type]) Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}") return end - + # Process items in smaller batches to avoid memory issues batch_size = args[:batch_size] || 100 - + # Get the list of item IDs item_ids = args[:item_ids] match_only = args[:match_only] || false - + # Process items in batches item_ids.each_slice(batch_size) do |batch_item_ids| process_batch(batch_item_ids, args[:item_type], match_only) end end - + private - + def process_batch(item_ids, item_type, match_only) klass = item_type.singularize.classify.constantize items = klass.where(id: item_ids) - + items.each do |item| begin process_item(item, item_type, match_only) rescue => e - Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}") + Rails.logger.error( + "Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}", + ) end end end - + def process_item(item, item_type, match_only) # Use the Manager method that handles both identifying and creating concepts if match_only - if item_type == 'topics' + if item_type == "topics" DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item) else # posts DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item) end else - if item_type == 'topics' + if item_type == "topics" DiscourseAi::InferredConcepts::Manager.analyze_topic(item) else # posts DiscourseAi::InferredConcepts::Manager.analyze_post(item) @@ -64,4 +66,4 @@ def process_item(item, item_type, match_only) end end end -end \ No newline at end of file +end From a8644fadf7d4441ca58851e3ae59a04290506f68 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 15:18:05 -0300 Subject: [PATCH 04/11] small fixes --- app/jobs/regular/generate_inferred_concepts.rb | 2 +- lib/personas/concept_matcher.rb | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb index f570e8eaf..06b0a7500 100644 --- a/app/jobs/regular/generate_inferred_concepts.rb +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -14,7 +14,7 @@ class GenerateInferredConcepts < ::Jobs::Base def execute(args = {}) return if args[:item_ids].blank? || args[:item_type].blank? - unless %w[topics posts].include?(args[:item_type]) + if %w[topics posts].exclude?(args[:item_type]) Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}") return end diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb index bea17e096..5099196b8 100644 --- a/lib/personas/concept_matcher.rb +++ b/lib/personas/concept_matcher.rb @@ -8,7 +8,6 @@ def system_prompt You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. Your job is to analyze the content and determine which concepts from the list apply to it. - #{concepts_text} Guidelines for matching concepts: - Only select concepts that are clearly relevant to the content - The content must substantially discuss or relate to the concept @@ -20,7 +19,6 @@ def system_prompt - If no concepts from the list match the content, return an empty array The list of available concepts is: - {inferred_concepts} Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list. From 4280255b288faefa0835eee9a41bff9c494b333b Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 15:43:17 -0300 Subject: [PATCH 05/11] tests --- spec/lib/personas/persona_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/personas/persona_spec.rb b/spec/lib/personas/persona_spec.rb index 22670f155..fe310ef87 100644 --- a/spec/lib/personas/persona_spec.rb +++ b/spec/lib/personas/persona_spec.rb @@ -39,7 +39,7 @@ def system_prompt end let(:resource_url) { "https://path-to-resource" } - let(:inferred_concepts) { %w[bulbassaur charmander squirtle] } + let(:inferred_concepts) { %w[bulbassaur charmander squirtle].join(", ") } let(:context) do DiscourseAi::Personas::BotContext.new( From 5b353bd87de102d7a5c37e121e9183858f2c0932 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 16:18:06 -0300 Subject: [PATCH 06/11] pass context --- lib/inferred_concepts/applier.rb | 6 ++---- lib/inferred_concepts/finder.rb | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index fa393e75a..9976f2474 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -154,10 +154,7 @@ def self.match_concepts_to_content(content, concept_list) return [] if content.blank? || concept_list.blank? # Prepare user message with only the content - user_message = <<~MESSAGE - Content to analyze: - #{content} - MESSAGE + user_message = content # Use the ConceptMatcher persona to match concepts llm = DiscourseAi::Completions::Llm.default_llm @@ -166,6 +163,7 @@ def self.match_concepts_to_content(content, concept_list) DiscourseAi::Personas::BotContext.new( messages: [{ type: :user, content: user_message }], user: Discourse.system_user, + inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, ) prompt = persona.craft_prompt(context) diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb index 56ddcbdc6..eb215bfc6 100644 --- a/lib/inferred_concepts/finder.rb +++ b/lib/inferred_concepts/finder.rb @@ -4,7 +4,7 @@ module DiscourseAi module InferredConcepts class Finder # Identifies potential concepts from provided content - # Returns an array of concept names (strings) + # Returns an array of concept names (strings) def self.identify_concepts(content) return [] if content.blank? @@ -13,28 +13,29 @@ def self.identify_concepts(content) persona = DiscourseAi::Personas::ConceptFinder.new context = DiscourseAi::Personas::BotContext.new( messages: [{ type: :user, content: content }], - user: Discourse.system_user + user: Discourse.system_user. + inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, ) - + prompt = persona.craft_prompt(context) response = llm.completion(prompt, extract_json: true) - + return [] unless response.success? - + concepts = response.parsed_output["concepts"] concepts || [] end - + # Creates or finds concepts in the database from provided names # Returns an array of InferredConcept instances def self.create_or_find_concepts(concept_names) return [] if concept_names.blank? - + concept_names.map do |name| InferredConcept.find_or_create_by(name: name) end end - + # Finds candidate topics to use for concept generation # # @param limit [Integer] Maximum number of topics to return @@ -134,4 +135,4 @@ def self.find_candidate_posts( end end end -end \ No newline at end of file +end From 1c4806beb68a8e23d1bf03b5805873b0079abba9 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 9 May 2025 16:24:20 -0300 Subject: [PATCH 07/11] dots are not commas --- lib/inferred_concepts/finder.rb | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb index eb215bfc6..37e2c625e 100644 --- a/lib/inferred_concepts/finder.rb +++ b/lib/inferred_concepts/finder.rb @@ -11,11 +11,12 @@ def self.identify_concepts(content) # Use the ConceptFinder persona to identify concepts llm = DiscourseAi::Completions::Llm.default_llm persona = DiscourseAi::Personas::ConceptFinder.new - context = DiscourseAi::Personas::BotContext.new( - messages: [{ type: :user, content: content }], - user: Discourse.system_user. + context = + DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: content }], + user: Discourse.system_user, inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, - ) + ) prompt = persona.craft_prompt(context) response = llm.completion(prompt, extract_json: true) @@ -31,9 +32,7 @@ def self.identify_concepts(content) def self.create_or_find_concepts(concept_names) return [] if concept_names.blank? - concept_names.map do |name| - InferredConcept.find_or_create_by(name: name) - end + concept_names.map { |name| InferredConcept.find_or_create_by(name: name) } end # Finds candidate topics to use for concept generation @@ -55,12 +54,13 @@ def self.find_candidate_topics( category_ids: nil, created_after: 30.days.ago ) - query = Topic.where( - "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", - min_posts, - min_views, - min_likes - ) + query = + Topic.where( + "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", + min_posts, + min_views, + min_likes, + ) # Apply additional filters query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present? @@ -79,10 +79,11 @@ def self.find_candidate_topics( query = query.where("topics.id NOT IN (#{topics_with_concepts})") # Score and order topics by engagement (combination of views, likes, and posts) - query = query.select( - "topics.*, - (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score" - ).order("engagement_score DESC") + query = + query.select( + "topics.*, + (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score", + ).order("engagement_score DESC") # Return limited number of topics query.limit(limit) From 0664ec512bfbb35e3c9752cbfe805811fdd3a4e2 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Thu, 15 May 2025 16:38:45 -0300 Subject: [PATCH 08/11] Dedup concepts --- .../generate_concepts_from_popular_items.rb | 92 ++++++++++--------- config/settings.yml | 23 +++-- ...8183456_create_topics_inferred_concepts.rb | 6 +- ...09000001_create_posts_inferred_concepts.rb | 6 +- lib/inferred_concepts/finder.rb | 46 ++++++++-- lib/inferred_concepts/manager.rb | 61 +++++++++++- lib/personas/concept_deduplicator.rb | 49 ++++++++++ lib/personas/persona.rb | 1 + 8 files changed, 216 insertions(+), 68 deletions(-) create mode 100644 lib/personas/concept_deduplicator.rb diff --git a/app/jobs/scheduled/generate_concepts_from_popular_items.rb b/app/jobs/scheduled/generate_concepts_from_popular_items.rb index a9a034935..74f294846 100644 --- a/app/jobs/scheduled/generate_concepts_from_popular_items.rb +++ b/app/jobs/scheduled/generate_concepts_from_popular_items.rb @@ -6,76 +6,80 @@ class GenerateConceptsFromPopularItems < ::Jobs::Scheduled # This job runs daily and generates new concepts from popular topics and posts # It selects items based on engagement metrics and generates concepts from their content - def execute(args = {}) + def execute(_args) return unless SiteSetting.inferred_concepts_enabled process_popular_topics process_popular_posts end - + private - - def process_popular_topics + def process_popular_topics # Find candidate topics that are popular and don't have concepts yet - candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( - limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, - min_posts: SiteSetting.inferred_concepts_min_posts || 5, - min_likes: SiteSetting.inferred_concepts_min_likes || 10, - min_views: SiteSetting.inferred_concepts_min_views || 100, - created_after: SiteSetting.inferred_concepts_lookback_days.days.ago - ) + candidates = + DiscourseAi::InferredConcepts::Manager.find_candidate_topics( + limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, + min_posts: SiteSetting.inferred_concepts_min_posts || 5, + min_likes: SiteSetting.inferred_concepts_min_likes || 10, + min_views: SiteSetting.inferred_concepts_min_views || 100, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago, + ) return if candidates.blank? - + # Process candidate topics - first generate concepts, then match Jobs.enqueue( :generate_inferred_concepts, - item_type: 'topics', - item_ids: candidates.map(&:id), - batch_size: 10 - ) - - # Schedule a follow-up job to match existing concepts - Jobs.enqueue_in( - 1.hour, - :generate_inferred_concepts, - item_type: 'topics', + item_type: "topics", item_ids: candidates.map(&:id), batch_size: 10, - match_only: true ) + + if SiteSetting.inferred_concepts_background_match + # Schedule a follow-up job to match existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: "topics", + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true, + ) + end end - - def process_popular_posts + def process_popular_posts # Find candidate posts that are popular and don't have concepts yet - candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts( - limit: SiteSetting.inferred_concepts_daily_posts_limit || 30, - min_likes: SiteSetting.inferred_concepts_post_min_likes || 5, - exclude_first_posts: true, - created_after: SiteSetting.inferred_concepts_lookback_days.days.ago - ) + candidates = + DiscourseAi::InferredConcepts::Manager.find_candidate_posts( + limit: SiteSetting.inferred_concepts_daily_posts_limit || 30, + min_likes: SiteSetting.inferred_concepts_post_min_likes || 5, + exclude_first_posts: true, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago, + ) return if candidates.blank? - + # Process candidate posts - first generate concepts, then match Jobs.enqueue( :generate_inferred_concepts, - item_type: 'posts', - item_ids: candidates.map(&:id), - batch_size: 10 - ) - - # Schedule a follow-up job to match against existing concepts - Jobs.enqueue_in( - 1.hour, - :generate_inferred_concepts, - item_type: 'posts', + item_type: "posts", item_ids: candidates.map(&:id), batch_size: 10, - match_only: true ) + + if SiteSetting.inferred_concepts_background_match + # Schedule a follow-up job to match against existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: "posts", + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true, + ) + end end end -end \ No newline at end of file +end diff --git a/config/settings.yml b/config/settings.yml index 92371470e..4c10e45d6 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -405,32 +405,39 @@ discourse_ai: inferred_concepts_enabled: default: false client: true - description: "Enable the inferred concepts system that automatically generates and applies concepts to topics" + inferred_concepts_background_match: + default: false + client: false inferred_concepts_daily_topics_limit: default: 20 client: false - description: "Maximum number of topics to process each day for concept generation" inferred_concepts_min_posts: default: 5 client: false - description: "Minimum number of posts a topic must have to be considered for concept generation" inferred_concepts_min_likes: default: 10 client: false - description: "Minimum number of likes a topic must have to be considered for concept generation" inferred_concepts_min_views: default: 100 client: false - description: "Minimum number of views a topic must have to be considered for concept generation" inferred_concepts_lookback_days: default: 30 client: false - description: "Only consider topics created within this many days for concept generation" inferred_concepts_daily_posts_limit: default: 30 client: false - description: "Maximum number of posts to process each day for concept generation" inferred_concepts_post_min_likes: default: 5 client: false - description: "Minimum number of likes a post must have to be considered for concept generation" + inferred_concepts_generate_persona: + default: "-15" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + inferred_concepts_match_persona: + default: "-16" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" + inferred_concepts_deduplicate_persona: + default: "-17" + type: enum + enum: "DiscourseAi::Configuration::PersonaEnumerator" diff --git a/db/migrate/20250508183456_create_topics_inferred_concepts.rb b/db/migrate/20250508183456_create_topics_inferred_concepts.rb index 6066bfbbd..8ecbcd970 100644 --- a/db/migrate/20250508183456_create_topics_inferred_concepts.rb +++ b/db/migrate/20250508183456_create_topics_inferred_concepts.rb @@ -2,14 +2,12 @@ class CreateTopicsInferredConcepts < ActiveRecord::Migration[7.0] def change - create_table :topics_inferred_concepts do |t| + create_table :topics_inferred_concepts, primary_key: %i[topic_id inferred_concept_id] do |t| t.integer :topic_id, null: false t.integer :inferred_concept_id, null: false t.timestamps end - add_index :topics_inferred_concepts, [:topic_id, :inferred_concept_id], unique: true, name: 'idx_unique_topic_inferred_concept' - add_index :topics_inferred_concepts, :topic_id add_index :topics_inferred_concepts, :inferred_concept_id end -end \ No newline at end of file +end diff --git a/db/migrate/20250509000001_create_posts_inferred_concepts.rb b/db/migrate/20250509000001_create_posts_inferred_concepts.rb index 258d0f144..518d643d4 100644 --- a/db/migrate/20250509000001_create_posts_inferred_concepts.rb +++ b/db/migrate/20250509000001_create_posts_inferred_concepts.rb @@ -2,14 +2,12 @@ class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0] def change - create_table :posts_inferred_concepts do |t| + create_table :posts_inferred_concepts, primary_key: %i[post_id inferred_concept_id] do |t| t.integer :post_id, null: false t.integer :inferred_concept_id, null: false t.timestamps end - add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept' - add_index :posts_inferred_concepts, :post_id add_index :posts_inferred_concepts, :inferred_concept_id end -end \ No newline at end of file +end diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb index 37e2c625e..38d9e367d 100644 --- a/lib/inferred_concepts/finder.rb +++ b/lib/inferred_concepts/finder.rb @@ -9,8 +9,13 @@ def self.identify_concepts(content) return [] if content.blank? # Use the ConceptFinder persona to identify concepts - llm = DiscourseAi::Completions::Llm.default_llm - persona = DiscourseAi::Personas::ConceptFinder.new + persona = + AiPersona + .all_personas(enabled_only: false) + .find { |persona| persona.id == SiteSetting.inferred_concepts_generate_persona.to_i } + .new + + llm = LlmModel.find(persona.class.default_llm_id) context = DiscourseAi::Personas::BotContext.new( messages: [{ type: :user, content: content }], @@ -18,12 +23,11 @@ def self.identify_concepts(content) inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, ) - prompt = persona.craft_prompt(context) - response = llm.completion(prompt, extract_json: true) + bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm) - return [] unless response.success? + response = bot.reply(context) - concepts = response.parsed_output["concepts"] + concepts = JSON.parse(response[0][0]).dig("concepts") concepts || [] end @@ -68,7 +72,7 @@ def self.find_candidate_topics( query = query.where("topics.created_at >= ?", created_after) if created_after.present? # Exclude PM topics (if they exist in Discourse) - query = query.where(archetype: Topic.public_archetype) + query = query.where(archetype: Archetype.default) # Exclude topics that already have concepts topics_with_concepts = <<~SQL @@ -134,6 +138,34 @@ def self.find_candidate_posts( # Return limited number of posts query.limit(limit) end + + # Deduplicate and standardize a list of concepts + # @param concept_names [Array] List of concept names to deduplicate + # @return [Hash] Hash with deduplicated concepts and mapping + def self.deduplicate_concepts(concept_names) + return { deduplicated_concepts: [], mapping: {} } if concept_names.blank? + + # Use the ConceptDeduplicator persona to deduplicate concepts + persona = + AiPersona + .all_personas(enabled_only: false) + .find { |persona| persona.id == SiteSetting.inferred_concepts_deduplicate_persona.to_i } + .new + + llm = LlmModel.find(persona.class.default_llm_id) + + # Create the input for the deduplicator + input = { type: :user, content: concept_names.join(", ") } + + context = + DiscourseAi::Personas::BotContext.new(messages: [input], user: Discourse.system_user) + + bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm) + + response = bot.reply(context) + + concepts = JSON.parse(response[0][0]).dig("streamlined_tags") + end end end end diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb index 3037cd153..a543d4c39 100644 --- a/lib/inferred_concepts/manager.rb +++ b/lib/inferred_concepts/manager.rb @@ -14,6 +14,65 @@ def self.list_concepts(limit: nil) query.pluck(:name) end + + # Deduplicate concepts in batches by letter + # This method will: + # 1. Group concepts by first letter + # 2. Process each letter group separately through the deduplicator + # 3. Do a final pass with all deduplicated concepts + # @return [Hash] Statistics about the deduplication process + def self.deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 150) + # Get all concepts + all_concepts = list_concepts + return if all_concepts.empty? + + letter_groups = Hash.new { |h, k| h[k] = [] } + + # Group concepts by first letter + all_concepts.each do |concept| + first_char = concept[0]&.upcase + + if first_char && first_char.match?(/[A-Z]/) + letter_groups[first_char] << concept + else + # Non-alphabetic or empty concepts go in a special group + letter_groups["#"] << concept + end + end + + # Process each letter group + letter_deduplicated_concepts = [] + + letter_groups.each do |letter, concepts| + next if concepts.empty? + + batches = concepts.each_slice(per_letter_batch).to_a + + batches.each do |batch| + result = Finder.deduplicate_concepts(batch) + letter_deduplicated_concepts.concat(result) + end + end + + # Final pass with all deduplicated concepts + if letter_deduplicated_concepts.present? + final_result = [] + + batches = letter_deduplicated_concepts.each_slice(full_pass_batch).to_a + batches.each do |batch| + dedups = Finder.deduplicate_concepts(batch) + final_result.concat(dedups) + end + + # Remove duplicates + final_result.uniq! + + # Apply the deduplicated concepts + InferredConcept.destroy_all + InferredConcept.insert_all(final_result.map { { name: it } }) + end + end + # Generate new concepts for a topic and apply them # @param topic [Topic] A Topic instance # @return [Array] The concepts that were applied @@ -139,7 +198,7 @@ def self.match_content_to_concepts(content) # @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time # @return [Array] Array of Topic objects that are good candidates def self.find_candidate_topics(opts = {}) - Finder.find_candidate_topics(opts) + Finder.find_candidate_topics(**opts) end # Find candidate posts that are good for concept generation diff --git a/lib/personas/concept_deduplicator.rb b/lib/personas/concept_deduplicator.rb new file mode 100644 index 000000000..d5a5048d5 --- /dev/null +++ b/lib/personas/concept_deduplicator.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ConceptDeduplicator < Persona + def system_prompt + <<~PROMPT.strip + You will be given a list of machine-generated tags. + Your task is to streamline this list by merging entries who are similar or related. + + Please follow these steps to create a streamlined list of tags: + + 1. Review the entire list of tags carefully. + 2. Identify and remove any exact duplicates. + 3. Look for tags that are too specific or niche, and consider removing them or replacing them with more general terms. + 4. If there are multiple tags that convey similar concepts, choose the best one and remove the others, or add a new one that covers the missing aspect. + 5. Ensure that the remaining tags are relevant and useful for describing the content. + + When deciding which tags are "best", consider the following criteria: + - Relevance: How well does the tag describe the core content or theme? + - Generality: Is the tag specific enough to be useful, but not so specific that it's unlikely to be searched for? + - Clarity: Is the tag easy to understand and free from ambiguity? + - Popularity: Would this tag likely be used by people searching for this type of content? + + Example Input: + AI Bias, AI Bots, AI Ethics, AI Helper, AI Integration, AI Moderation, AI Search, AI-Driven Moderation, AI-Generated Post Illustrations, AJAX Events, AJAX Requests, AMA Events, API, API Access, API Authentication, API Automation, API Call, API Changes, API Compliance, API Configuration, API Costs, API Documentation, API Endpoint, API Endpoints, API Functions, API Integration, API Key, API Keys, API Limitation, API Limitations, API Permissions, API Rate Limiting, API Request, API Request Optimization, API Requests, API Security, API Suspension, API Token, API Tokens, API Translation, API Versioning, API configuration, API endpoint, API key, APIs, APK, APT Package Manager, ARIA, ARIA Tags, ARM Architecture, ARM-based, AWS, AWS Lightsail, AWS RDS, AWS S3, AWS Translate, AWS costs, AWS t2.micro, Abbreviation Expansion, Abbreviations + + Example Output: + AI, AJAX, API, APK, APT Package Manager, ARIA, ARM Architecture, AWS, Abbreviations + + Please provide your streamlined list of tags within key. + + Remember, the goal is to create a more focused and effective set of tags while maintaining the essence of the original list. + + Your output should be in the following format: + + { + "streamlined_tags": ["tag1", "tag3"] + } + + PROMPT + end + + def response_format + [{ key: "streamlined_tags", type: "array" }] + end + end + end +end diff --git a/lib/personas/persona.rb b/lib/personas/persona.rb index ba3d3be68..002e8f4e1 100644 --- a/lib/personas/persona.rb +++ b/lib/personas/persona.rb @@ -54,6 +54,7 @@ def system_personas ForumResearcher => -14, ConceptFinder => -15, ConceptMatcher => -16, + ConceptDeduplicator => -17, } end From 1db2ae8dd7327d3be3afa54d4fb66690bc846c6a Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 16 May 2025 14:06:20 -0300 Subject: [PATCH 09/11] Working deduplication --- ...8183456_create_inferred_concepts_topics.rb | 11 +++++ ...8183456_create_topics_inferred_concepts.rb | 13 ------ ...09000001_create_inferred_concepts_posts.rb | 11 +++++ ...09000001_create_posts_inferred_concepts.rb | 13 ------ lib/inferred_concepts/applier.rb | 40 +------------------ lib/inferred_concepts/manager.rb | 2 +- 6 files changed, 24 insertions(+), 66 deletions(-) create mode 100644 db/migrate/20250508183456_create_inferred_concepts_topics.rb delete mode 100644 db/migrate/20250508183456_create_topics_inferred_concepts.rb create mode 100644 db/migrate/20250509000001_create_inferred_concepts_posts.rb delete mode 100644 db/migrate/20250509000001_create_posts_inferred_concepts.rb diff --git a/db/migrate/20250508183456_create_inferred_concepts_topics.rb b/db/migrate/20250508183456_create_inferred_concepts_topics.rb new file mode 100644 index 000000000..71a75570d --- /dev/null +++ b/db/migrate/20250508183456_create_inferred_concepts_topics.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateInferredConceptsTopics < ActiveRecord::Migration[7.0] + def change + create_table :inferred_concepts_topics, id: false do |t| + t.belongs_to :inferred_concept + t.belongs_to :topic + t.timestamps + end + end +end diff --git a/db/migrate/20250508183456_create_topics_inferred_concepts.rb b/db/migrate/20250508183456_create_topics_inferred_concepts.rb deleted file mode 100644 index 8ecbcd970..000000000 --- a/db/migrate/20250508183456_create_topics_inferred_concepts.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class CreateTopicsInferredConcepts < ActiveRecord::Migration[7.0] - def change - create_table :topics_inferred_concepts, primary_key: %i[topic_id inferred_concept_id] do |t| - t.integer :topic_id, null: false - t.integer :inferred_concept_id, null: false - t.timestamps - end - - add_index :topics_inferred_concepts, :inferred_concept_id - end -end diff --git a/db/migrate/20250509000001_create_inferred_concepts_posts.rb b/db/migrate/20250509000001_create_inferred_concepts_posts.rb new file mode 100644 index 000000000..2c17c441c --- /dev/null +++ b/db/migrate/20250509000001_create_inferred_concepts_posts.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateInferredConceptsPosts < ActiveRecord::Migration[7.0] + def change + create_table :inferred_concepts_posts, id: false do |t| + t.belongs_to :inferred_concept + t.belongs_to :post + t.timestamps + end + end +end diff --git a/db/migrate/20250509000001_create_posts_inferred_concepts.rb b/db/migrate/20250509000001_create_posts_inferred_concepts.rb deleted file mode 100644 index 518d643d4..000000000 --- a/db/migrate/20250509000001_create_posts_inferred_concepts.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0] - def change - create_table :posts_inferred_concepts, primary_key: %i[post_id inferred_concept_id] do |t| - t.integer :post_id, null: false - t.integer :inferred_concept_id, null: false - t.timestamps - end - - add_index :posts_inferred_concepts, :inferred_concept_id - end -end diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index 9976f2474..ace1bd746 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -65,44 +65,6 @@ def self.post_content_for_analysis(post) content end - # Comprehensive method to analyze a topic and apply concepts - def self.analyze_and_apply(topic) - return if topic.blank? - - # Get content to analyze - content = topic_content_for_analysis(topic) - - # Identify concepts - concept_names = Finder.identify_concepts(content) - - # Create or find concepts in the database - concepts = Finder.create_or_find_concepts(concept_names) - - # Apply concepts to the topic - apply_to_topic(topic, concepts) - - concepts - end - - # Comprehensive method to analyze a post and apply concepts - def self.analyze_and_apply_post(post) - return if post.blank? - - # Get content to analyze - content = post_content_for_analysis(post) - - # Identify concepts - concept_names = Finder.identify_concepts(content) - - # Create or find concepts in the database - concepts = Finder.create_or_find_concepts(concept_names) - - # Apply concepts to the post - apply_to_post(post, concepts) - - concepts - end - # Match a topic with existing concepts def self.match_existing_concepts(topic) return [] if topic.blank? @@ -158,7 +120,7 @@ def self.match_concepts_to_content(content, concept_list) # Use the ConceptMatcher persona to match concepts llm = DiscourseAi::Completions::Llm.default_llm - persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list) + persona = DiscourseAi::Personas::ConceptMatcher.new context = DiscourseAi::Personas::BotContext.new( messages: [{ type: :user, content: user_message }], diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb index a543d4c39..9af0bf193 100644 --- a/lib/inferred_concepts/manager.rb +++ b/lib/inferred_concepts/manager.rb @@ -68,7 +68,7 @@ def self.deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 1 final_result.uniq! # Apply the deduplicated concepts - InferredConcept.destroy_all + InferredConcept.where.not(name: final_result).destroy_all InferredConcept.insert_all(final_result.map { { name: it } }) end end From 4ad887c8353b2c426bf60536343b5a67684852a0 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 16 May 2025 15:18:33 -0300 Subject: [PATCH 10/11] cleaning up --- lib/inferred_concepts/applier.rb | 47 ++++++++++++++------------------ lib/inferred_concepts/manager.rb | 27 +----------------- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index ace1bd746..1a2f4c4ac 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -9,15 +9,7 @@ class Applier def self.apply_to_topic(topic, concepts) return if topic.blank? || concepts.blank? - concepts.each do |concept| - # Use the join table to associate the concept with the topic - # Avoid duplicates by using find_or_create_by - ActiveRecord::Base.connection.execute(<<~SQL) - INSERT INTO topics_inferred_concepts (topic_id, inferred_concept_id, created_at, updated_at) - VALUES (#{topic.id}, #{concept.id}, NOW(), NOW()) - ON CONFLICT (topic_id, inferred_concept_id) DO NOTHING - SQL - end + topic.inferred_concepts << concepts end # Associates the provided concepts with a post @@ -26,15 +18,7 @@ def self.apply_to_topic(topic, concepts) def self.apply_to_post(post, concepts) return if post.blank? || concepts.blank? - concepts.each do |concept| - # Use the join table to associate the concept with the post - # Avoid duplicates by using find_or_create_by - ActiveRecord::Base.connection.execute(<<~SQL) - INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at) - VALUES (#{post.id}, #{concept.id}, NOW(), NOW()) - ON CONFLICT (post_id, inferred_concept_id) DO NOTHING - SQL - end + post.inferred_concepts << concepts end # Extracts content from a topic for concept analysis @@ -119,21 +103,32 @@ def self.match_concepts_to_content(content, concept_list) user_message = content # Use the ConceptMatcher persona to match concepts - llm = DiscourseAi::Completions::Llm.default_llm - persona = DiscourseAi::Personas::ConceptMatcher.new + + persona = + AiPersona + .all_personas(enabled_only: false) + .find { |persona| persona.id == SiteSetting.inferred_concepts_match_persona.to_i } + .new + + llm = LlmModel.find(persona.class.default_llm_id) + + input = { type: :user, content: content } + context = DiscourseAi::Personas::BotContext.new( - messages: [{ type: :user, content: user_message }], + messages: [input], user: Discourse.system_user, - inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, + inferred_concepts: concept_list, ) - prompt = persona.craft_prompt(context) - response = llm.completion(prompt, extract_json: true) + bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm) + + response = bot.reply(context) + + debugger - return [] unless response.success? + matching_concepts = JSON.parse(response[0][0]).dig("matching_concepts") - matching_concepts = response.parsed_output["matching_concepts"] matching_concepts || [] end end diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb index 9af0bf193..f1e1e061a 100644 --- a/lib/inferred_concepts/manager.rb +++ b/lib/inferred_concepts/manager.rb @@ -73,24 +73,6 @@ def self.deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 1 end end - # Generate new concepts for a topic and apply them - # @param topic [Topic] A Topic instance - # @return [Array] The concepts that were applied - def self.analyze_topic(topic) - return [] if topic.blank? - - Applier.analyze_and_apply(topic) - end - - # Generate new concepts for a post and apply them - # @param post [Post] A Post instance - # @return [Array] The concepts that were applied - def self.analyze_post(post) - return [] if post.blank? - - Applier.analyze_and_apply_post(post) - end - # Extract new concepts from arbitrary content # @param content [String] The content to analyze # @return [Array] The identified concept names @@ -205,14 +187,7 @@ def self.find_candidate_topics(opts = {}) # @param opts [Hash] Options to pass to the finder # @return [Array] Array of Post objects that are good candidates def self.find_candidate_posts(opts = {}) - Finder.find_candidate_posts( - limit: opts[:limit], - min_likes: opts[:min_likes], - exclude_first_posts: opts[:exclude_first_posts], - exclude_post_ids: opts[:exclude_post_ids], - category_ids: opts[:category_ids], - created_after: opts[:created_after], - ) + Finder.find_candidate_posts(**opts) end end end From 74d47a97c6e87a7bd681f416a3db635eebf91710 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 16 May 2025 15:42:42 -0300 Subject: [PATCH 11/11] post rebase fixes --- lib/inferred_concepts/applier.rb | 2 -- lib/personas/bot.rb | 10 ++++++---- lib/personas/bot_context.rb | 10 ++-------- lib/personas/concept_deduplicator.rb | 6 +++++- lib/personas/concept_finder.rb | 6 +++++- lib/personas/concept_matcher.rb | 6 +++++- 6 files changed, 23 insertions(+), 17 deletions(-) diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index 1a2f4c4ac..c73bb4c85 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -125,8 +125,6 @@ def self.match_concepts_to_content(content, concept_list) response = bot.reply(context) - debugger - matching_concepts = JSON.parse(response[0][0]).dig("matching_concepts") matching_concepts || [] diff --git a/lib/personas/bot.rb b/lib/personas/bot.rb index b6e852c51..0dd726df6 100644 --- a/lib/personas/bot.rb +++ b/lib/personas/bot.rb @@ -152,10 +152,12 @@ def reply(context, llm_args: {}, &update_blk) raw_context << partial current_thinking << partial end - elsif partial.is_a?(DiscourseAi::Completions::StructuredOutput) - update_blk.call(partial, nil, :structured_output) - else - update_blk.call(partial) + elsif update_blk.present? + if partial.is_a?(DiscourseAi::Completions::StructuredOutput) + update_blk.call(partial, nil, :structured_output) + else + update_blk.call(partial) + end end end end diff --git a/lib/personas/bot_context.rb b/lib/personas/bot_context.rb index 83220a520..8ee814041 100644 --- a/lib/personas/bot_context.rb +++ b/lib/personas/bot_context.rb @@ -17,11 +17,8 @@ class BotContext :context_post_ids, :feature_name, :resource_url, -<<<<<<< HEAD - :cancel_manager -======= + :cancel_manager, :inferred_concepts ->>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) def initialize( post: nil, @@ -39,11 +36,8 @@ def initialize( context_post_ids: nil, feature_name: "bot", resource_url: nil, -<<<<<<< HEAD - cancel_manager: nil -======= + cancel_manager: nil, inferred_concepts: [] ->>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) ) @participants = participants @user = user diff --git a/lib/personas/concept_deduplicator.rb b/lib/personas/concept_deduplicator.rb index d5a5048d5..4c6902f13 100644 --- a/lib/personas/concept_deduplicator.rb +++ b/lib/personas/concept_deduplicator.rb @@ -3,6 +3,10 @@ module DiscourseAi module Personas class ConceptDeduplicator < Persona + def self.default_enabled + false + end + def system_prompt <<~PROMPT.strip You will be given a list of machine-generated tags. @@ -42,7 +46,7 @@ def system_prompt end def response_format - [{ key: "streamlined_tags", type: "array" }] + [{ "key" => "streamlined_tags", "type" => "array" }] end end end diff --git a/lib/personas/concept_finder.rb b/lib/personas/concept_finder.rb index a713e8b86..912e42fe1 100644 --- a/lib/personas/concept_finder.rb +++ b/lib/personas/concept_finder.rb @@ -3,6 +3,10 @@ module DiscourseAi module Personas class ConceptFinder < Persona + def self.default_enabled + false + end + def system_prompt existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100) existing_concepts_text = "" @@ -38,7 +42,7 @@ def system_prompt end def response_format - [{ key: "concepts", type: "array" }] + [{ "key" => "concepts", "type" => "array" }] end end end diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb index 5099196b8..8cdcdb0fa 100644 --- a/lib/personas/concept_matcher.rb +++ b/lib/personas/concept_matcher.rb @@ -3,6 +3,10 @@ module DiscourseAi module Personas class ConceptMatcher < Persona + def self.default_enabled + false + end + def system_prompt <<~PROMPT.strip You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. @@ -32,7 +36,7 @@ def system_prompt end def response_format - [{ key: "matching_concepts", type: "array" }] + [{ "key" => "matching_concepts", "type" => "array" }] end end end