Skip to content
This repository was archived by the owner on Jan 23, 2024. It is now read-only.

feat: improve firebase error handling #53

Merged
merged 6 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 53 additions & 36 deletions src/googleclouddebugger/firebase_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
"""Communicates with Firebase RTDB backend."""

from collections import deque
import copy
import hashlib
import json
import os
import platform
import requests
import socket
import sys
import threading
import time
import traceback

import firebase_admin
Expand Down Expand Up @@ -114,6 +115,7 @@ def __init__(self):

# Delay before retrying failed request.
self.register_backoff = backoff.Backoff() # Register debuggee.
self.subscribe_backoff = backoff.Backoff() # Subscribe to updates.
self.update_backoff = backoff.Backoff() # Update breakpoint.

# Maximum number of times that the message is re-transmitted before it
Expand Down Expand Up @@ -279,13 +281,25 @@ def _MainThreadProc(self):
self._breakpoint_subscription.
"""
# Note: if self._credentials is None, default app credentials will be used.
# TODO: Error handling.
firebase_admin.initialize_app(self._credentials,
{'databaseURL': self._database_url})
try:
firebase_admin.initialize_app(self._credentials,
{'databaseURL': self._database_url})
except ValueError:
native.LogWarning(
f'Failed to initialize firebase: {traceback.format_exc()}')
native.LogError('Failed to start debugger agent. Giving up.')
return

self._RegisterDebuggee()
registration_required, delay = True, 0
while registration_required:
time.sleep(delay)
registration_required, delay = self._RegisterDebuggee()
self.registration_complete.set()
self._SubscribeToBreakpoints()

subscription_required, delay = True, 0
while subscription_required:
time.sleep(delay)
subscription_required, delay = self._SubscribeToBreakpoints()
self.subscription_complete.set()

def _TransmissionThreadProc(self):
Expand All @@ -310,26 +324,29 @@ def _RegisterDebuggee(self):
Returns:
(registration_required, delay) tuple
"""
debuggee = None
try:
debuggee = self._GetDebuggee()
self._debuggee_id = debuggee['id']

try:
debuggee_path = f'cdbg/debuggees/{self._debuggee_id}'
native.LogInfo(
f'registering at {self._database_url}, path: {debuggee_path}')
firebase_admin.db.reference(debuggee_path).set(debuggee)
native.LogInfo(
f'Debuggee registered successfully, ID: {self._debuggee_id}')
self.register_backoff.Succeeded()
return (False, 0) # Proceed immediately to subscribing to breakpoints.
except BaseException:
native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}')
except BaseException:
native.LogWarning('Debuggee information not available: ' +
traceback.format_exc())
native.LogWarning(
f'Debuggee information not available: {traceback.format_exc()}')
return (True, self.register_backoff.Failed())

return (True, self.register_backoff.Failed())
try:
debuggee_path = f'cdbg/debuggees/{self._debuggee_id}'
native.LogInfo(
f'registering at {self._database_url}, path: {debuggee_path}')
firebase_admin.db.reference(debuggee_path).set(debuggee)
native.LogInfo(
f'Debuggee registered successfully, ID: {self._debuggee_id}')
self.register_backoff.Succeeded()
return (False, 0) # Proceed immediately to subscribing to breakpoints.
except BaseException:
# There is no significant benefit to handing different exceptions
# in different ways; we will log and retry regardless.
native.LogInfo(f'Failed to register debuggee: {traceback.format_exc()}')
return (True, self.register_backoff.Failed())

def _SubscribeToBreakpoints(self):
# Kill any previous subscriptions first.
Expand All @@ -340,7 +357,13 @@ def _SubscribeToBreakpoints(self):
path = f'cdbg/breakpoints/{self._debuggee_id}/active'
native.LogInfo(f'Subscribing to breakpoint updates at {path}')
ref = firebase_admin.db.reference(path)
self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback)
try:
self._breakpoint_subscription = ref.listen(self._ActiveBreakpointCallback)
return (False, 0)
except firebase_admin.exceptions.FirebaseError:
native.LogInfo(
f'Failed to subscribe to breakpoints: {traceback.format_exc()}')
return (True, self.subscribe_backoff.Failed())

def _ActiveBreakpointCallback(self, event):
if event.event_type == 'put':
Expand Down Expand Up @@ -410,7 +433,7 @@ def _TransmitBreakpointUpdates(self):
try:
# Something has changed on the breakpoint.
# It should be going from active to final, but let's make sure.
if not breakpoint_data['isFinalState']:
if not breakpoint_data.get('isFinalState', False):
raise BaseException(
f'Unexpected breakpoint update requested: {breakpoint_data}')

Expand All @@ -428,6 +451,7 @@ def _TransmitBreakpointUpdates(self):
f'cdbg/breakpoints/{self._debuggee_id}/active/{bp_id}')
bp_ref.delete()

summary_data = breakpoint_data
# Save snapshot data for snapshots only.
if is_snapshot:
# Note that there may not be snapshot data.
Expand All @@ -436,14 +460,15 @@ def _TransmitBreakpointUpdates(self):
bp_ref.set(breakpoint_data)

# Now strip potential snapshot data.
breakpoint_data.pop('evaluatedExpressions', None)
breakpoint_data.pop('stackFrames', None)
breakpoint_data.pop('variableTable', None)
summary_data = copy.deepcopy(breakpoint_data)
summary_data.pop('evaluatedExpressions', None)
summary_data.pop('stackFrames', None)
summary_data.pop('variableTable', None)

# Then add it to the list of final breakpoints.
bp_ref = firebase_admin.db.reference(
f'cdbg/breakpoints/{self._debuggee_id}/final/{bp_id}')
bp_ref.set(breakpoint_data)
bp_ref.set(summary_data)

native.LogInfo(f'Breakpoint {bp_id} update transmitted successfully')

Expand All @@ -460,15 +485,7 @@ def _TransmitBreakpointUpdates(self):
# This is very common if multiple instances are sending final update
# simultaneously.
native.LogInfo(f'{err}, breakpoint: {bp_id}')
except socket.error as err:
if retry_count < self.max_transmit_attempts - 1:
native.LogInfo(f'Socket error {err.errno} while sending breakpoint '
f'{bp_id} update: {traceback.format_exc()}')
retry_list.append((breakpoint_data, retry_count + 1))
else:
native.LogWarning(f'Breakpoint {bp_id} retry count exceeded maximum')
# Socket errors shouldn't persist like this; reconnect.
#reconnect = True

except BaseException:
native.LogWarning(f'Fatal error sending breakpoint {bp_id} update: '
f'{traceback.format_exc()}')
Expand Down
Loading