-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathservice.py
490 lines (438 loc) · 21.4 KB
/
service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
import errno
import os
import json
import typing
import asyncio
from collections import defaultdict
from elasticsearch import AsyncElasticsearch, NotFoundError
from elasticsearch.helpers import async_streaming_bulk
from hub.schema.result import Censor
from hub.service import BlockchainReaderService
from hub.common import IndexVersionMismatch, ALL_FIELDS, INDEX_DEFAULT_SETTINGS, expand_query
from hub.db.revertable import RevertableOp
from hub.db.common import TrendingNotification, DB_PREFIXES, ResolveResult
from hub.notifier_protocol import ElasticNotifierProtocol
from hub.elastic_sync.fast_ar_trending import FAST_AR_TRENDING_SCRIPT
from hub.elastic_sync.db import ElasticSyncDB
if typing.TYPE_CHECKING:
from hub.elastic_sync.env import ElasticEnv
class ElasticSyncService(BlockchainReaderService):
VERSION = 1
def __init__(self, env: 'ElasticEnv'):
super().__init__(env, 'lbry-elastic-writer', thread_workers=1, thread_prefix='lbry-elastic-writer')
self.env = env
# self._refresh_interval = 0.1
self._task = None
self.index = self.env.es_index_prefix + 'claims'
self._elastic_host = env.elastic_host
self._elastic_port = env.elastic_port
self.sync_timeout = 1800
self.sync_client = None
self._es_info_path = os.path.join(env.db_dir, 'es_info')
self._last_wrote_height = 0
self._last_wrote_block_hash = None
self._touched_claims = set()
self._deleted_claims = set()
self._removed_during_undo = set()
self._trending = defaultdict(list)
self._advanced = True
self.synchronized = asyncio.Event()
self._listeners: typing.List[ElasticNotifierProtocol] = []
self._force_reindex = False
def open_db(self):
env = self.env
self.db = ElasticSyncDB(
env.coin, env.db_dir, self.secondary_name, -1, env.reorg_limit,
env.cache_all_tx_hashes, blocking_channel_ids=env.blocking_channel_ids,
filtering_channel_ids=env.filtering_channel_ids, executor=self._executor,
index_address_status=env.index_address_status
)
async def run_es_notifier(self, synchronized: asyncio.Event):
started = False
while not started:
try:
server = await asyncio.get_event_loop().create_server(
lambda: ElasticNotifierProtocol(self._listeners),
self.env.elastic_notifier_host,
self.env.elastic_notifier_port
)
started = True
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
self.log.error(f'ES notifier server failed to listen on '
f'{self.env.elastic_notifier_host}:'
f'{self.env.elastic_notifier_port:d} : {e!r}')
if isinstance(e, OSError) and e.errno is errno.EADDRINUSE:
await asyncio.sleep(3)
continue
raise
self.log.info("ES notifier server listening on TCP %s:%i", self.env.elastic_notifier_host,
self.env.elastic_notifier_port)
synchronized.set()
async with server:
await server.serve_forever()
def notify_es_notification_listeners(self, height: int, block_hash: bytes):
for p in self._listeners:
p.send_height(height, block_hash)
self.log.info("notify listener %i", height)
def _read_es_height(self):
info = {}
if os.path.exists(self._es_info_path):
with open(self._es_info_path, 'r') as f:
try:
info.update(json.loads(f.read()))
except json.decoder.JSONDecodeError:
self.log.warning('failed to parse es sync status file')
self._last_wrote_height = int(info.get('height', 0))
self._last_wrote_block_hash = info.get('block_hash', None)
async def read_es_height(self):
await asyncio.get_event_loop().run_in_executor(self._executor, self._read_es_height)
def write_es_height(self, height: int, block_hash: str):
with open(self._es_info_path, 'w') as f:
f.write(json.dumps({'height': height, 'block_hash': block_hash}, indent=2))
self._last_wrote_height = height
self._last_wrote_block_hash = block_hash
async def get_index_version(self) -> int:
try:
template = await self.sync_client.indices.get_template(self.index)
return template[self.index]['version']
except NotFoundError:
return 0
async def set_index_version(self, version):
await self.sync_client.indices.put_template(
self.index, body={'version': version, 'index_patterns': ['ignored']}, ignore=400
)
async def start_index(self) -> bool:
if self.sync_client:
return False
hosts = [{'host': self._elastic_host, 'port': self._elastic_port}]
self.sync_client = AsyncElasticsearch(hosts, timeout=self.sync_timeout)
while True:
try:
await self.sync_client.cluster.health(wait_for_status='yellow')
self.log.info("ES is ready to connect to")
break
except ConnectionError:
self.log.warning("Failed to connect to Elasticsearch. Waiting for it!")
await asyncio.sleep(1)
index_version = await self.get_index_version()
res = await self.sync_client.indices.create(self.index, INDEX_DEFAULT_SETTINGS, ignore=400)
acked = res.get('acknowledged', False)
if acked:
await self.set_index_version(self.VERSION)
return True
elif index_version != self.VERSION:
self.log.error("es search index has an incompatible version: %s vs %s", index_version, self.VERSION)
raise IndexVersionMismatch(index_version, self.VERSION)
else:
await self.sync_client.indices.refresh(self.index)
return False
async def stop_index(self, delete=False):
if self.sync_client:
if delete:
await self.delete_index()
await self.sync_client.close()
self.sync_client = None
async def delete_index(self):
if self.sync_client:
return await self.sync_client.indices.delete(self.index, ignore_unavailable=True)
def update_filter_query(self, censor_type, blockdict, channels=False):
blockdict = {blocked.hex(): blocker.hex() for blocked, blocker in blockdict.items()}
if channels:
update = expand_query(channel_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
else:
update = expand_query(claim_id__in=list(blockdict.keys()), censor_type=f"<{censor_type}")
key = 'channel_id' if channels else 'claim_id'
update['script'] = {
"source": f"ctx._source.censor_type={censor_type}; "
f"ctx._source.censoring_channel_id=params[ctx._source.{key}];",
"lang": "painless",
"params": blockdict
}
return update
async def apply_filters(self, blocked_streams, blocked_channels, filtered_streams, filtered_channels):
only_channels = lambda x: {k: chan for k, (chan, repost) in x.items()}
async def batched_update_filter(items: typing.Dict[bytes, bytes], channel: bool, censor_type: int):
batches = [{}]
for k, v in items.items():
if len(batches[-1]) == 2000:
batches.append({})
batches[-1][k] = v
for batch in batches:
if batch:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(censor_type, only_channels(batch)), slices=4)
if channel:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(censor_type, only_channels(batch), True),
slices=4)
await self.sync_client.indices.refresh(self.index)
if filtered_streams:
await batched_update_filter(filtered_streams, False, Censor.SEARCH)
if filtered_channels:
await batched_update_filter(filtered_channels, True, Censor.SEARCH)
if blocked_streams:
await batched_update_filter(blocked_streams, False, Censor.RESOLVE)
if blocked_channels:
await batched_update_filter(blocked_channels, True, Censor.RESOLVE)
@staticmethod
def _upsert_claim_query(index, claim):
return {
'doc': {key: value for key, value in claim.items() if key in ALL_FIELDS},
'_id': claim['claim_id'],
'_index': index,
'_op_type': 'update',
'doc_as_upsert': True
}
@staticmethod
def _delete_claim_query(index, claim_hash: bytes):
return {
'_index': index,
'_op_type': 'delete',
'_id': claim_hash.hex()
}
@staticmethod
def _update_trending_query(index, claim_hash, notifications):
return {
'_id': claim_hash.hex(),
'_index': index,
'_op_type': 'update',
'script': {
'lang': 'painless',
'source': FAST_AR_TRENDING_SCRIPT,
'params': {'src': {
'changes': [
{
'height': notification.height,
'prev_amount': notification.prev_amount / 1E8,
'new_amount': notification.new_amount / 1E8,
} for notification in notifications
]
}}
},
}
async def _claim_producer(self):
for deleted in self._deleted_claims:
yield self._delete_claim_query(self.index, deleted)
touched_claims = list(self._touched_claims)
for idx in range(0, len(touched_claims), 1000):
batch = touched_claims[idx:idx+1000]
claims = {}
total_extras = {}
async for claim_hash, claim, extras in self.db._prepare_resolve_results(batch, include_extra=False,
apply_blocking=False,
apply_filtering=False):
if not claim:
self.log.warning("cannot sync claim %s", (claim_hash or b'').hex())
continue
claims[claim_hash] = claim
total_extras[claim_hash] = claim
total_extras.update(extras)
async for claim in self.db.prepare_claim_metadata_batch(claims, total_extras):
if claim:
yield self._upsert_claim_query(self.index, claim)
for claim_hash, notifications in self._trending.items():
yield self._update_trending_query(self.index, claim_hash, notifications)
def advance(self, height: int):
super().advance(height)
touched_or_deleted = self.db.prefix_db.touched_or_deleted.get(height)
for k, v in self.db.prefix_db.trending_notification.iterate((height,)):
self._trending[k.claim_hash].append(TrendingNotification(k.height, v.previous_amount, v.new_amount))
if touched_or_deleted:
readded_after_reorg = self._removed_during_undo.intersection(touched_or_deleted.touched_claims)
self._deleted_claims.difference_update(readded_after_reorg)
self._touched_claims.update(touched_or_deleted.touched_claims)
self._deleted_claims.update(touched_or_deleted.deleted_claims)
self._touched_claims.difference_update(self._deleted_claims)
for to_del in touched_or_deleted.deleted_claims:
if to_del in self._trending:
self._trending.pop(to_del)
self._advanced = True
def unwind(self):
self.db.block_timestamp_cache.clear()
reverted_block_hash = self.db.block_hashes[-1]
super().unwind()
packed = self.db.prefix_db.undo.get(len(self.db.tx_counts), reverted_block_hash)
touched_or_deleted = None
claims_to_delete = []
# find and apply the touched_or_deleted items in the undos for the reverted blocks
assert packed, f'missing undo information for block {len(self.db.tx_counts)}'
while packed:
op, packed = RevertableOp.unpack(packed)
if op.is_delete and op.key.startswith(DB_PREFIXES.touched_or_deleted.value):
assert touched_or_deleted is None, 'only should have one match'
touched_or_deleted = self.db.prefix_db.touched_or_deleted.unpack_value(op.value)
elif op.is_delete and op.key.startswith(DB_PREFIXES.claim_to_txo.value):
v = self.db.prefix_db.claim_to_txo.unpack_value(op.value)
if v.root_tx_num == v.tx_num and v.root_tx_num > self.db.tx_counts[-1]:
claims_to_delete.append(self.db.prefix_db.claim_to_txo.unpack_key(op.key).claim_hash)
if touched_or_deleted:
self._touched_claims.update(set(touched_or_deleted.deleted_claims).union(
touched_or_deleted.touched_claims.difference(set(claims_to_delete))))
self._deleted_claims.update(claims_to_delete)
self._removed_during_undo.update(claims_to_delete)
self._advanced = True
self.log.warning("delete %i claim and upsert %i from reorg", len(self._deleted_claims), len(self._touched_claims))
async def poll_for_changes(self):
await super().poll_for_changes()
cnt = 0
success = 0
if self._advanced:
if self._touched_claims or self._deleted_claims or self._trending:
async for ok, item in async_streaming_bulk(
self.sync_client, self._claim_producer(),
raise_on_error=False):
cnt += 1
if not ok:
self.log.warning("indexing failed for an item: %s", item)
else:
success += 1
await self.sync_client.indices.refresh(self.index)
await self.apply_filters(
self.db.blocked_streams, self.db.blocked_channels, self.db.filtered_streams,
self.db.filtered_channels
)
self.write_es_height(self.db.db_height, self.db.db_tip[::-1].hex())
self.log.info("Indexing block %i done. %i/%i successful", self._last_wrote_height, success, cnt)
self._touched_claims.clear()
self._deleted_claims.clear()
self._removed_during_undo.clear()
self._trending.clear()
self._advanced = False
self.synchronized.set()
self.notify_es_notification_listeners(self._last_wrote_height, self.db.db_tip)
@property
def last_synced_height(self) -> int:
return self._last_wrote_height
async def catch_up(self):
last_state = self.db.prefix_db.db_state.get()
db_height = last_state.height
if last_state and self._last_wrote_height and db_height > self._last_wrote_height:
self.log.warning(
"syncing ES from block %i to rocksdb height of %i (%i blocks to sync)",
self._last_wrote_height, last_state.height, last_state.height - self._last_wrote_height
)
for _ in range(self._last_wrote_height + 1, last_state.height + 1):
super().unwind()
for height in range(self._last_wrote_height + 1, last_state.height + 1):
self.advance(height)
else:
return
success = 0
cnt = 0
if self._touched_claims or self._deleted_claims or self._trending:
async for ok, item in async_streaming_bulk(
self.sync_client, self._claim_producer(),
raise_on_error=False):
cnt += 1
if not ok:
self.log.warning("indexing failed for an item: %s", item)
else:
success += 1
await self.sync_client.indices.refresh(self.index)
await self.apply_filters(
self.db.blocked_streams, self.db.blocked_channels, self.db.filtered_streams,
self.db.filtered_channels
)
self.write_es_height(db_height, last_state.tip[::-1].hex())
self._touched_claims.clear()
self._deleted_claims.clear()
self._removed_during_undo.clear()
self._trending.clear()
self._advanced = False
self.notify_es_notification_listeners(self._last_wrote_height, last_state.tip)
self.log.info("Indexing block %i done. %i/%i successful", self._last_wrote_height, success, cnt)
async def reindex(self, force=False):
if force or self._last_wrote_height == 0 and self.db.db_height > 0:
if self._last_wrote_height == 0:
self.log.info("running initial ES indexing of rocksdb at block height %i", self.db.db_height)
else:
self.log.info("reindex (last wrote: %i, db height: %i)", self._last_wrote_height, self.db.db_height)
await self._reindex()
async def block_bulk_sync_on_writer_catchup(self):
def _check_if_catching_up():
self.db.prefix_db.try_catch_up_with_primary()
state = self.db.prefix_db.db_state.get()
return state.catching_up
loop = asyncio.get_event_loop()
catching_up = True
while catching_up:
catching_up = await loop.run_in_executor(self._executor, _check_if_catching_up)
if catching_up:
await asyncio.sleep(1)
else:
return
def _iter_start_tasks(self):
yield self.block_bulk_sync_on_writer_catchup()
yield self.read_es_height()
yield self.start_index()
yield self.start_cancellable(self.run_es_notifier)
yield self.reindex(force=self._force_reindex)
yield self.catch_up()
self.block_count_metric.set(self.last_state.height)
yield self.start_prometheus()
yield self.start_cancellable(self.refresh_blocks_forever)
def _iter_stop_tasks(self):
yield self._stop_cancellable_tasks()
yield self.stop_index()
def run(self, reindex=False):
self._force_reindex = reindex
return super().run()
async def start(self, reindex=False):
self._force_reindex = reindex
try:
return await super().start()
finally:
self._force_reindex = False
async def _reindex(self):
async with self.lock:
self.log.info("reindexing %i claims (estimate)", self.db.prefix_db.claim_to_txo.estimate_num_keys())
await self.delete_index()
res = await self.sync_client.indices.create(self.index, INDEX_DEFAULT_SETTINGS, ignore=400)
acked = res.get('acknowledged', False)
if acked:
await self.set_index_version(self.VERSION)
await self.sync_client.indices.refresh(self.index)
self.write_es_height(0, self.env.coin.GENESIS_HASH)
await self._sync_all_claims()
await self.sync_client.indices.refresh(self.index)
self.write_es_height(self.db.db_height, self.db.db_tip[::-1].hex())
self.notify_es_notification_listeners(self.db.db_height, self.db.db_tip)
self.log.info("finished reindexing")
async def _sync_all_claims(self, batch_size=100000):
async def all_claims_producer():
current_height = self.db.db_height
async for claim in self.db.all_claims_producer(batch_size=batch_size):
yield self._upsert_claim_query(self.index, claim)
self.log.info("applying trending")
for batch_height in range(0, current_height, 10000):
notifications = defaultdict(list)
for k, v in self.db.prefix_db.trending_notification.iterate(start=(batch_height,), stop=(batch_height+10000,)):
notifications[k.claim_hash].append(TrendingNotification(k.height, v.previous_amount, v.new_amount))
async for (k,), v in self.db.prefix_db.claim_to_txo.multi_get_async_gen(
self._executor, [(claim_hash,) for claim_hash in notifications]):
if not v:
notifications.pop(k)
for claim_hash, trending in notifications.items():
yield self._update_trending_query(self.index, claim_hash, trending)
self._trending.clear()
cnt = 0
success = 0
producer = all_claims_producer()
finished = False
try:
async for ok, item in async_streaming_bulk(self.sync_client, producer, raise_on_error=False):
cnt += 1
if not ok:
self.log.warning("indexing failed for an item: %s", item)
else:
success += 1
if cnt % batch_size == 0:
self.log.info(f"indexed {success}/{cnt} claims")
finished = True
await self.sync_client.indices.refresh(self.index)
self.log.info("indexed %i/%i claims", success, cnt)
finally:
if not finished:
await producer.aclose()
self.shutdown_event.set()