@@ -31,15 +31,14 @@ def execute(args)
31
31
. where ( archetype : Archetype . default )
32
32
. where ( deleted_at : nil )
33
33
. order ( "topics.bumped_at DESC" )
34
- . limit ( limit - rebaked )
35
34
36
- rebaked += populate_topic_embeddings ( vector_rep , topics )
35
+ rebaked += populate_topic_embeddings ( vector_rep , topics . limit ( limit - rebaked ) )
37
36
38
37
return if rebaked >= limit
39
38
40
39
# Then, we'll try to backfill embeddings for topics that have outdated
41
40
# embeddings, be it model or strategy version
42
- relation = topics . where ( <<~SQL )
41
+ relation = topics . where ( <<~SQL ) . limit ( limit - rebaked )
43
42
#{ table_name } .model_version < #{ vector_rep . version }
44
43
OR
45
44
#{ table_name } .strategy_version < #{ strategy . version }
@@ -65,20 +64,22 @@ def execute(args)
65
64
66
65
# Now for posts
67
66
table_name = vector_rep . post_table_name
67
+ posts_batch_size = 1000
68
68
69
69
posts =
70
70
Post
71
71
. joins ( "LEFT JOIN #{ table_name } ON #{ table_name } .post_id = posts.id" )
72
72
. where ( deleted_at : nil )
73
73
. where ( post_type : Post . types [ :regular ] )
74
- . limit ( limit - rebaked )
75
74
76
75
# First, we'll try to backfill embeddings for posts that have none
77
76
posts
78
77
. where ( "#{ table_name } .post_id IS NULL" )
79
- . find_in_batches do |batch |
80
- vector_rep . gen_bulk_reprensentations ( batch )
81
- rebaked += batch . size
78
+ . limit ( limit - rebaked )
79
+ . pluck ( :id )
80
+ . each_slice ( posts_batch_size ) do |batch |
81
+ vector_rep . gen_bulk_reprensentations ( Post . where ( id : batch ) )
82
+ rebaked += batch . length
82
83
end
83
84
84
85
return if rebaked >= limit
@@ -91,28 +92,26 @@ def execute(args)
91
92
OR
92
93
#{ table_name } .strategy_version < #{ strategy . version }
93
94
SQL
94
- . find_in_batches do |batch |
95
- vector_rep . gen_bulk_reprensentations ( batch )
96
- rebaked += batch . size
95
+ . limit ( limit - rebaked )
96
+ . pluck ( :id )
97
+ . each_slice ( posts_batch_size ) do |batch |
98
+ vector_rep . gen_bulk_reprensentations ( Post . where ( id : batch ) )
99
+ rebaked += batch . length
97
100
end
98
101
99
102
return if rebaked >= limit
100
103
101
104
# Finally, we'll try to backfill embeddings for posts that have outdated
102
105
# embeddings due to edits. Here we only do 10% of the limit
103
- posts_batch_size = 1000
104
-
105
- outdated_post_ids =
106
- posts
107
- . where ( "#{ table_name } .updated_at < ?" , 7 . days . ago )
108
- . order ( "random()" )
109
- . limit ( ( limit - rebaked ) / 10 )
110
- . pluck ( :id )
111
-
112
- outdated_post_ids . each_slice ( posts_batch_size ) do |batch |
113
- vector_rep . gen_bulk_reprensentations ( Post . where ( id : batch ) )
114
- rebaked += batch . length
115
- end
106
+ posts
107
+ . where ( "#{ table_name } .updated_at < ?" , 7 . days . ago )
108
+ . order ( "random()" )
109
+ . limit ( ( limit - rebaked ) / 10 )
110
+ . pluck ( :id )
111
+ . each_slice ( posts_batch_size ) do |batch |
112
+ vector_rep . gen_bulk_reprensentations ( Post . where ( id : batch ) )
113
+ rebaked += batch . length
114
+ end
116
115
117
116
rebaked
118
117
end
0 commit comments