Skip to content

Add category moderation groups and more attributes to generic importer #32561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
34 changes: 31 additions & 3 deletions script/bulk_import/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def initialize
@html_entities = HTMLEntities.new
@encoding = CHARSET_MAP[charset]
@bbcode_to_md = true if use_bbcode_to_md?
@assign_plugin_enabled = defined?(::DiscourseAssign)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused variable?


@markdown =
Redcarpet::Markdown.new(
Expand Down Expand Up @@ -499,6 +500,9 @@ def discourse_reaction_id_from_original_id(id)
id
name
full_name
public_admission
public_exit
allow_membership_requests
title
bio_raw
bio_cooked
Expand All @@ -509,6 +513,7 @@ def discourse_reaction_id_from_original_id(id)
created_at
updated_at
]
GROUP_COLUMNS << :assignable_level if defined?(::DiscourseAssign)

USER_COLUMNS = %i[
id
Expand Down Expand Up @@ -615,7 +620,7 @@ def discourse_reaction_id_from_original_id(id)

USER_FOLLOWER_COLUMNS = %i[user_id follower_id level created_at updated_at]

GROUP_USER_COLUMNS = %i[group_id user_id created_at updated_at]
GROUP_USER_COLUMNS = %i[group_id user_id owner created_at updated_at]

USER_CUSTOM_FIELD_COLUMNS = %i[user_id name value created_at updated_at]

Expand Down Expand Up @@ -645,7 +650,6 @@ def discourse_reaction_id_from_original_id(id)
description
position
parent_category_id
read_restricted
uploaded_logo_id
created_at
updated_at
Expand All @@ -659,6 +663,8 @@ def discourse_reaction_id_from_original_id(id)

CATEGORY_USER_COLUMNS = %i[category_id user_id notification_level last_seen_at]

CATEGORY_MODERATION_GROUP_COLUMNS = %i[category_id group_id created_at updated_at]

TOPIC_COLUMNS = %i[
id
archetype
Expand Down Expand Up @@ -797,6 +803,14 @@ def discourse_reaction_id_from_original_id(id)
updated_at
multiple_grant
query
allow_title
icon
listable
target_posts
enabled
auto_revoke
trigger
show_posts
]

USER_BADGE_COLUMNS = %i[badge_id user_id granted_at granted_by_id seq post_id created_at]
Expand Down Expand Up @@ -1041,6 +1055,10 @@ def create_category_users(rows, &block)
create_records(rows, "category_user", CATEGORY_USER_COLUMNS, &block)
end

def create_category_moderation_groups(rows, &block)
create_records(rows, "category_moderation_group", CATEGORY_MODERATION_GROUP_COLUMNS, &block)
end

def create_topics(rows, &block)
create_records(rows, "topic", TOPIC_COLUMNS, &block)
end
Expand Down Expand Up @@ -1225,6 +1243,10 @@ def process_group(group)

group[:created_at] ||= NOW
group[:updated_at] ||= group[:created_at]
# Default assignable_level if not provided by the source data.
# The 'assignable_level' attribute itself is only included in the import
# if the DiscourseAssign plugin is active (see GROUP_COLUMNS).
group[:assignable_level] ||= Group::ALIAS_LEVELS[:nobody]
group
end

Expand Down Expand Up @@ -1405,6 +1427,7 @@ def process_user_associated_account(account)
end

def process_group_user(group_user)
group_user[:owner] ||= false
group_user[:created_at] = NOW
group_user[:updated_at] = NOW
group_user
Expand Down Expand Up @@ -1440,7 +1463,6 @@ def process_category(category)
category[:slug] ||= Slug.for(name_lower, "") # TODO Ensure that slug doesn't exist yet
category[:description] = (category[:description] || "").scrub.strip.presence
category[:user_id] ||= Discourse::SYSTEM_USER_ID
category[:read_restricted] = false if category[:read_restricted].nil?
category[:created_at] ||= NOW
category[:updated_at] ||= category[:created_at]

Expand All @@ -1467,6 +1489,12 @@ def process_category_group(category_group)
category_group
end

def process_category_moderation_group(category_moderation_group)
category_moderation_group[:created_at] ||= NOW
category_moderation_group[:updated_at] ||= NOW
category_moderation_group
end

def process_category_tag_group(category_tag_group)
category_tag_group[:created_at] = NOW
category_tag_group[:updated_at] = NOW
Expand Down
143 changes: 132 additions & 11 deletions script/bulk_import/generic_bulk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def execute
import_category_tag_groups
import_category_permissions
import_category_users
import_category_moderation_groups
update_category_read_restricted

import_topics
import_posts
Expand Down Expand Up @@ -218,18 +220,18 @@ def import_categories
WITH
RECURSIVE
tree AS (
SELECT c.id, c.parent_category_id, c.name, c.description, c.color, c.text_color, c.read_restricted,
SELECT c.id, c.parent_category_id, c.name, c.description, c.color, c.text_color,
c.slug, c.existing_id, c.position, c.logo_upload_id, 0 AS level
FROM categories c
WHERE c.parent_category_id IS NULL
UNION ALL
SELECT c.id, c.parent_category_id, c.name, c.description, c.color, c.text_color, c.read_restricted,
SELECT c.id, c.parent_category_id, c.name, c.description, c.color, c.text_color,
c.slug, c.existing_id, c.position, c.logo_upload_id, tree.level + 1 AS level
FROM categories c,
tree
WHERE c.parent_category_id = tree.id
)
SELECT id, parent_category_id, name, description, color, text_color, read_restricted, slug, existing_id, logo_upload_id,
SELECT id, parent_category_id, name, description, color, text_color, slug, existing_id, logo_upload_id,
COALESCE(position,
ROW_NUMBER() OVER (PARTITION BY parent_category_id ORDER BY parent_category_id NULLS FIRST, name)) AS position
FROM tree
Expand All @@ -247,7 +249,6 @@ def import_categories
parent_category_id:
row["parent_category_id"] ? category_id_from_imported_id(row["parent_category_id"]) : nil,
slug: row["slug"],
read_restricted: row["read_restricted"],
uploaded_logo_id:
row["logo_upload_id"] ? upload_id_from_original_id(row["logo_upload_id"]) : nil,
}
Expand Down Expand Up @@ -360,6 +361,32 @@ def import_category_permissions
permissions.close
end

def import_category_moderation_groups
puts "", "Importing category moderation groups..."

moderation_groups = query(<<~SQL)
SELECT c.id AS category_id,
m.value AS group_id
FROM categories c,
JSON_EACH(c.moderation_group_ids) m
ORDER BY c.id, m.value
SQL

existing_moderation_groups = CategoryModerationGroup.pluck(:category_id, :group_id).to_set

create_category_moderation_groups(moderation_groups) do |row|
category_id = category_id_from_imported_id(row["category_id"])
group_id = group_id_from_imported_id(row["group_id"])

next unless category_id && group_id
next unless existing_moderation_groups.add?([category_id, group_id])

{ category_id: category_id, group_id: group_id }
end

moderation_groups.close
end

def import_category_users
puts "", "Importing category users..."

Expand Down Expand Up @@ -387,6 +414,44 @@ def import_category_users
category_users.close
end

def update_category_read_restricted
puts "", "Updating category read_restricted flags..."
start_time = Time.now
processed_count = 0
updated_count = 0
skipped_count = 0

Category
.includes(category_groups: :group)
.find_each do |category|
processed_count += 1

permissions = {}
category.category_groups.each do |category_group|
group = category_group.group
permissions[category_group.group_name] = category_group.permission_type if group.present?
end

expected_read_restricted =
if permissions.empty?
false
else
Category.resolve_permissions(permissions).first
end

current_read_restricted = category.read_restricted

if current_read_restricted != expected_read_restricted
category.update_column(:read_restricted, expected_read_restricted)
updated_count += 1
else
skipped_count += 1
end
end

puts " Update took #{(Time.now - start_time).to_i} seconds. Processed: #{processed_count}, Updated: #{updated_count}, Skipped: #{skipped_count}."
end

def import_groups
puts "", "Importing groups..."

Expand All @@ -399,15 +464,20 @@ def import_groups
create_groups(groups) do |row|
next if group_id_from_imported_id(row["id"]).present?

{
group_data = {
imported_id: row["id"],
name: row["name"],
full_name: row["full_name"],
public_admission: row["public_admission"] || false,
public_exit: row["public_exit"] || false,
allow_membership_requests: row["allow_membership_requests"] || false,
visibility_level: row["visibility_level"],
members_visibility_level: row["members_visibility_level"],
mentionable_level: row["mentionable_level"],
messageable_level: row["messageable_level"],
}
group_data[:assignable_level] = row["assignable_level"]
group_data
Comment on lines +467 to +480
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
group_data = {
imported_id: row["id"],
name: row["name"],
full_name: row["full_name"],
public_admission: row["public_admission"] || false,
public_exit: row["public_exit"] || false,
allow_membership_requests: row["allow_membership_requests"] || false,
visibility_level: row["visibility_level"],
members_visibility_level: row["members_visibility_level"],
mentionable_level: row["mentionable_level"],
messageable_level: row["messageable_level"],
}
group_data[:assignable_level] = row["assignable_level"]
group_data
{
imported_id: row["id"],
name: row["name"],
full_name: row["full_name"],
public_admission: row["public_admission"] || false,
public_exit: row["public_exit"] || false,
allow_membership_requests: row["allow_membership_requests"] || false,
visibility_level: row["visibility_level"],
members_visibility_level: row["members_visibility_level"],
mentionable_level: row["mentionable_level"],
messageable_level: row["messageable_level"],
assignable_level: row["assignable_level"]
}

end

groups.close
Expand All @@ -427,9 +497,11 @@ def import_group_members
create_group_users(group_members) do |row|
group_id = group_id_from_imported_id(row["group_id"])
user_id = user_id_from_imported_id(row["user_id"])

next if user_id.nil?
next if existing_group_user_ids.include?([group_id, user_id])

{ group_id: group_id, user_id: user_id }
{ group_id: group_id, user_id: user_id, owner: row["owner"] }
end

group_members.close
Expand Down Expand Up @@ -476,8 +548,10 @@ def import_users
moderator: row["moderator"],
suspended_at: suspended_at,
suspended_till: suspended_till,
trust_level: row["trust_level"],
registration_ip_address: row["registration_ip_address"],
date_of_birth: to_date(row["date_of_birth"]),
primary_group_id: group_id_from_imported_id(row["primary_group_id"]),
}
end

Expand Down Expand Up @@ -562,6 +636,9 @@ def import_user_options
email_level: row["email_level"],
email_messages_level: row["email_messages_level"],
email_digests: row["email_digests"],
hide_profile_and_presence: false,
hide_profile: false,
hide_presence: false,
Comment on lines +639 to +641
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why always false? If this is just about setting defaults, then the USER_OPTION_DEFAULTS hash in BulkImport is the right way to do it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see discussion with @s3lase here - #32561 (comment)

}
end

Expand Down Expand Up @@ -1953,11 +2030,28 @@ def import_tags
)
@tag_mapping[row["id"]] = tag.id

if row["tag_group_id"]
TagGroupMembership.find_or_create_by!(
tag_id: tag.id,
tag_group_id: @tag_group_mapping[row["tag_group_id"]],
)
intermediate_group_ids = []
if row["tag_group_ids"] && !row["tag_group_ids"].empty?
intermediate_group_ids = JSON.parse(row["tag_group_ids"])
elsif row["tag_group_id"] && !row["tag_group_id"].empty?
# Support old single tag_group_id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a tag_group_id attribute in the current IntermediateDB anymore. We should only support the lasted version, so reduce complexity. So, unless I'm missing something, we should remove this and make stuff less complicated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @gschlager, I'm getting mixed messages between you and @s3lase. See his previous message - #32561 (comment)

Here's the tag_group_id and 021-tags.sql in intermediate.db.

intermediate_group_ids = [row["tag_group_id"]]
end

if intermediate_group_ids.any?
intermediate_group_ids.each do |intermediate_group_id|
intermediate_group_id = intermediate_group_id.to_i if intermediate_group_id.is_a?(String)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do this unconditionally:

Suggested change
intermediate_group_id = intermediate_group_id.to_i if intermediate_group_id.is_a?(String)
intermediate_group_id = intermediate_group_id.to_i

But is it really needed? Do we store numbers as JSON strings for some reason?

discourse_tag_group_id = @tag_group_mapping[intermediate_group_id]

if discourse_tag_group_id
TagGroupMembership.find_or_create_by!(
tag_id: tag.id,
tag_group_id: discourse_tag_group_id,
)
else
puts "Warning: Intermediate tag group ID #{intermediate_group_id} from row not found in @tag_group_mapping for tag '#{tag.name}'"
end
end
end
end

Expand Down Expand Up @@ -2396,6 +2490,15 @@ def import_badges
image_upload_id:
row["image_upload_id"] ? upload_id_from_original_id(row["image_upload_id"]) : nil,
query: row["query"],
multiple_grant: to_boolean(row["multiple_grant"]),
allow_title: to_boolean(row["allow_title"]),
icon: row["icon"],
listable: to_boolean(row["listable"]),
target_posts: to_boolean(row["target_posts"]),
enabled: to_boolean(row["enabled"]),
auto_revoke: to_boolean(row["auto_revoke"]),
trigger: row["trigger"],
show_posts: to_boolean(row["show_posts"]),
}
end

Expand Down Expand Up @@ -2426,6 +2529,24 @@ def import_user_badges
end

user_badges.close

puts "", "Updating badge grant counts..."
start_time = Time.now

DB.exec(<<~SQL)
WITH
grants AS (
SELECT badge_id, COUNT(*) AS grant_count FROM user_badges GROUP BY badge_id
)

UPDATE badges
Comment on lines +2541 to +2542
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
UPDATE badges
UPDATE badges

SET grant_count = grants.grant_count
FROM grants
WHERE badges.id = grants.badge_id
AND badges.grant_count <> grants.grant_count
SQL

puts " Update took #{(Time.now - start_time).to_i} seconds."
end

def import_anniversary_user_badges
Expand Down
Loading