Skip to content

Commit 2dcd3c4

Browse files
committed
你再爬试试看😠
1 parent eff1ba2 commit 2dcd3c4

File tree

2 files changed

+96
-29
lines changed

2 files changed

+96
-29
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ redis==3.5.3
66
apscheduler==3.6.3
77
pymongo==3.11.2
88
tornado==6.0.4
9-
cryptography==3.4.1
9+
cryptography==3.4.1
10+
redis==3.5.3

web/server.py

Lines changed: 94 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import contextlib
1212
import logging
1313

14+
import redis
1415
import pymongo
1516
from http import HTTPStatus
1617
from concurrent.futures import ThreadPoolExecutor
@@ -36,6 +37,14 @@ def __del__(self):
3637
self.client.close()
3738

3839

40+
class Redis:
41+
def __init__(self):
42+
self.r = redis.StrictRedis(host="redis", decode_responses=True, db=2)
43+
44+
def __del__(self):
45+
self.r.close()
46+
47+
3948
class BaseHandler(web.RequestHandler):
4049
mongo = Mongo()
4150

@@ -51,47 +60,104 @@ def get(self):
5160
self.write(html)
5261

5362

54-
def anti_crawler(self) -> bool:
55-
cypertext = self.request.headers.get("ne1", "")
56-
referer = self.request.headers.get("Referer")
57-
param = self.get_query_argument("id")
58-
uri = self.request.uri
59-
logging.info("Verifying: Referer:[%s] ct:[%s], uri:[%s], id:[%s]", referer, cypertext, uri, param)
63+
class AntiCrawler:
64+
65+
def __init__(self, instance):
66+
self.tornado = instance
67+
self.redis = Redis()
68+
69+
def execute(self) -> bool:
70+
71+
header_result = self.header_check()
72+
ban_check = self.ban_check()
73+
if header_result or ban_check:
74+
return True
6075

61-
if (referer is None) or (param not in referer):
62-
return True
76+
def header_check(self):
77+
cypher_text = self.tornado.request.headers.get("ne1", "")
78+
referer = self.tornado.request.headers.get("Referer")
79+
param = self.tornado.get_query_argument("id")
80+
uri = self.tornado.request.uri
81+
logging.info("Verifying: Referer:[%s] ct:[%s], uri:[%s], id:[%s]", referer, cypher_text, uri, param)
6382

64-
try:
65-
passphrase = param
66-
result = decrypt(cypertext, passphrase).decode('u8')
67-
except Exception:
68-
logging.error("Decrypt failed")
69-
result = ""
83+
if (referer is None) or (param not in referer):
84+
return True
7085

71-
if result != self.request.uri:
72-
return True
86+
try:
87+
passphrase = param
88+
result = decrypt(cypher_text, passphrase).decode('u8')
89+
except Exception:
90+
logging.error("Decrypt failed")
91+
result = ""
92+
93+
if result != self.tornado.request.uri:
94+
return True
95+
96+
def ban_check(self):
97+
con = self.redis
98+
ip = self.get_real_ip()
99+
str_count = con.r.get(ip)
100+
if str_count and int(str_count) > 10:
101+
return True
102+
103+
def imprisonment(self, ip):
104+
con = self.redis
105+
# don't use incr - we need to set expire time
106+
if con.r.exists(ip):
107+
count_str = con.r.get(ip)
108+
count = int(count_str)
109+
count += 1
110+
else:
111+
count = 1
112+
# ban rule: (count-10)*600
113+
if count > 10:
114+
ex = (count - 10) * 3600
115+
else:
116+
ex = None
117+
con.r.set(ip, count, ex)
118+
119+
def get_real_ip(self):
120+
x_real = self.tornado.request.headers.get("X-Real-IP")
121+
remote_ip = self.tornado.request.remote_ip
122+
logging.warning("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip)
123+
return x_real or remote_ip
73124

74125

75126
class ResourceHandler(BaseHandler):
76127
executor = ThreadPoolExecutor(50)
77128

78129
@run_on_executor()
79130
def get_resource_data(self):
80-
if anti_crawler(self):
81-
# X-Real-IP
131+
forbidden = False
132+
banner = AntiCrawler(self)
133+
if banner.execute():
82134
logging.info("%s@%s make you happy:-(", self.request.headers.get("user-agent"),
83135
self.request.headers.get("X-Real-IP")
84136
)
85-
return {}
86-
param = self.get_query_argument("id")
87-
with contextlib.suppress(ValueError):
88-
param = int(param)
89-
data = self.mongo.db["yyets"].find_one_and_update(
90-
{"data.info.id": param},
91-
{'$inc': {'data.info.views': 1}},
92-
{'_id': False})
93-
94-
MetricsHandler.add("resource")
137+
data = {}
138+
forbidden = True
139+
else:
140+
param = self.get_query_argument("id")
141+
with contextlib.suppress(ValueError):
142+
param = int(param)
143+
data = self.mongo.db["yyets"].find_one_and_update(
144+
{"data.info.id": param},
145+
{'$inc': {'data.info.views': 1}},
146+
{'_id': False})
147+
148+
if data:
149+
MetricsHandler.add("resource")
150+
forbidden = False
151+
else:
152+
# not found, dangerous
153+
ip = banner.get_real_ip()
154+
banner.imprisonment(ip)
155+
self.set_status(404)
156+
data = {}
157+
158+
if forbidden:
159+
self.set_status(HTTPStatus.FORBIDDEN)
160+
95161
return data
96162

97163
@run_on_executor()

0 commit comments

Comments
 (0)