Skip to content

Commit 2d2a3bc

Browse files
committed
Added the SPAMINATOR!
1 parent 6cc9c4e commit 2d2a3bc

File tree

4 files changed

+111
-67
lines changed

4 files changed

+111
-67
lines changed

app/controllers/protips_controller.rb

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -195,34 +195,13 @@ def etag_key_for_protip
195195
end
196196

197197
def spam?
198-
is_spam = false
199-
if smyte_spam?
200-
is_spam = true
201-
logger.info "[SMYTE-SPAM BLOCK] \"#{@protip.title}\""
198+
flags = Spaminator.new.protip_flags(@protip)
199+
if flags.any?
200+
logger.info "[SPAM BLOCK] \"#{@protip.title}\" #{flags.inspect}"
201+
true
202202
else
203-
logger.info "[SMYTE-SPAM ALLOW] \"#{@protip.title}\""
203+
logger.info "[SPAM ALLOW] \"#{@protip.title}\""
204+
false
204205
end
205-
206-
if @protip.looks_spammy?
207-
is_spam = true
208-
logger.info "[CW-SPAM BLOCK] \"#{@protip.title}\""
209-
else
210-
logger.info "[CW-SPAM ALLOW] \"#{@protip.title}\""
211-
end
212-
213-
is_spam
214-
end
215-
216-
def smyte_spam?
217-
return false if ENV['SMYTE_URL'].nil?
218-
data = {
219-
actor: serialize(current_user, CurrentUserSerializer),
220-
protip: serialize(@protip).except("spam_detected_at", "bad_content")
221-
}
222-
Smyte.new.spam?(
223-
'post_protip',
224-
data,
225-
request
226-
)
227206
end
228207
end

app/lib/spaminator.rb

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
URLS = /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
2+
3+
class Spaminator
4+
def bad_links?(text, urls)
5+
text.scan(/shurll.com|shorl.com/i).size > 1
6+
end
7+
8+
def recognized_format?(text)
9+
text.match(/^\[\!\[Foo\]/)
10+
end
11+
12+
def customer_support?(text)
13+
text.scan(/customer|support|phonenumber|phonesupport/i).size > 10
14+
end
15+
16+
def download_links?(text, urls, title)
17+
title.match(/serial key|free download/i) ||
18+
text.scan(/download|crack|serial|torrent/i).size > 10
19+
end
20+
21+
def many_spaces?(text, urls, title)
22+
title.scan(/ /).size > 2
23+
end
24+
25+
def mostly_url?(text, urls)
26+
urls.join.size / text.size.to_f > 0.5
27+
end
28+
29+
def weird_characters?(text)
30+
text.scan(/[\.]/).size / text.size.to_f > 0.10
31+
end
32+
33+
def protip_flags(protip)
34+
flags = []
35+
text = [protip.title, protip.body, protip.tags].flatten.join("\n")
36+
urls = URI.extract(text).compact
37+
38+
flags << 'bad_user' if protip.user.bad_user
39+
flags << 'bad_links' if bad_links?(text, urls)
40+
flags << 'customer_support' if customer_support?(text)
41+
flags << 'download_spam' if download_links?(text, urls, protip.title)
42+
flags << 'recognized_format' if recognized_format?(text)
43+
flags << 'mostly_url' if mostly_url?(text, urls)
44+
flags << 'weird_characters' if weird_characters?(text)
45+
46+
flags
47+
end
48+
49+
def user_flags(user)
50+
flags = []
51+
text = [user.title, user.username, user.about].flatten.join("\n")
52+
urls = URI.extract(text).compact
53+
54+
flags << 'bad_links' if bad_links?(text, urls)
55+
flags << 'customer_support' if customer_support?(text)
56+
flags << 'download_spam' if download_links?(text, urls, user.username)
57+
flags << 'recognized_format' if recognized_format?(text)
58+
flags << 'many_spaces' if many_spaces?(text, urls, user.username)
59+
flags << 'mostly_url' if mostly_url?(text, urls)
60+
flags << 'weird_characters' if weird_characters?(text)
61+
62+
flags
63+
end
64+
65+
end
66+

app/services/smyte.rb

Lines changed: 0 additions & 40 deletions
This file was deleted.

lib/tasks/spam.rake

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
namespace :spam do
2+
task :sweep => :environment do
3+
protips = Protip.where('created_at > ?', 7.days.ago).where(bad_content: false)
4+
good = []
5+
protips.each do |p|
6+
flags = Spaminator.new.protip_flags(p)
7+
if flags.any?
8+
puts "#{p.id} – #{p.title}#{p.body[0..100].gsub("\n", '')}"
9+
puts "#{flags.inspect}" if flags.any?
10+
puts
11+
12+
p.bad_content = true
13+
p.user.bad_user = true
14+
p.save
15+
else
16+
good << p
17+
end
18+
end
19+
20+
users = User.where('created_at > ?', 7.days.ago).where(bad_user: false)
21+
users.map do |u|
22+
flags = Spaminator.new.user_flags(u)
23+
if flags.any?
24+
puts "#{u.id} – #{u.username}#{(u.about || '')[0..100].gsub("\n", '')}"
25+
puts "#{flags.inspect}" if flags.any?
26+
puts
27+
28+
u.bad_user!
29+
else
30+
good << u
31+
end
32+
end
33+
34+
puts "Good"
35+
good.each do |e|
36+
puts "#{e.class}:#{e.id}#{e.try(:username) || e.title}"
37+
end
38+
end
39+
end

0 commit comments

Comments
 (0)