# Senior RF Engineer Interview Questions (LTE/5G
Optimization & Python)
## Python Skills Assessment for RF Optimization
### 1. Data Processing Basics
Question: How would you read a large CSV file containing KPI
data from NetAct and calculate the average RSRP for a
specific sector?
Answer:
```python
import pandas as pd
# Read the CSV file in chunks if it's very large
chunk_size = 100000
results = []
for chunk in pd.read_csv('netact_kpi_data.csv',
chunksize=chunk_size):
# Filter for specific sector and calculate mean RSRP
sector_data = chunk[chunk['SectorID'] == 'SECTOR123']
if not sector_data.empty:
results.append(sector_data['RSRP'].mean())
# Calculate overall average
average_rsrp = sum(results) / len(results) if results else 0
print(f"Average RSRP for Sector123: {average_rsrp} dBm")
```
### 2. KPI Analysis
Question: Given a DataFrame with hourly KPI data including
RSRP, SINR, and throughput, how would you identify cells
with degradation over time?
Answer:
```python
def identify_degrading_cells(df, kpi='Throughput',
threshold=0.2):
# Group by Cell ID and Hour, calculate mean KPI
hourly_kpi = df.groupby(['CellID',
'Hour'])[kpi].mean().unstack()
# Calculate degradation (current hour vs previous)
degradation = hourly_kpi.diff(axis=1).lt(0).mean(axis=1)
# Return cells with degradation > threshold
return degradation[degradation > threshold].index.tolist()
# Usage example:
degrading_cells = identify_degrading_cells(kpi_df, kpi='SINR',
threshold=0.3)
```
### 3. Visualization
Question: How would you visualize the relationship between
RSRP and throughput from NetAct data?
Answer:
```python
import matplotlib.pyplot as plt
import seaborn as sns
def plot_rsrp_vs_throughput(df):
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='RSRP', y='Throughput',
alpha=0.3)
plt.title('RSRP vs Throughput Relationship')
plt.xlabel('RSRP (dBm)')
plt.ylabel('Throughput (Mbps)')
plt.grid(True)
# Add trend line
sns.regplot(data=df, x='RSRP', y='Throughput',
scatter=False, color='red')
plt.show()
# Usage:
plot_rsrp_vs_throughput(netact_data)
```
### 4. Data Cleaning
Question: NetAct data often contains missing or outlier
values. How would you handle these in your analysis?
Answer:
```python
def clean_netact_data(df):
# Handle missing values
# For KPIs, we might forward fill or interpolate
df.fillna(method='ffill', inplace=True)
df.fillna(method='bfill', inplace=True)
# Remove impossible physical values
df = df[(df['RSRP'] >= -140) & (df['RSRP'] <= -50)]
df = df[(df['SINR'] >= -10) & (df['SINR'] <= 40)]
# Remove statistical outliers using IQR
for col in ['RSRP', 'SINR', 'Throughput']:
Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3 - Q1
df = df[~((df[col] < (Q1 - 1.5IQR)) | (df[col] > (Q3 +
1.5IQR)))]
return df
```
### 5. Automation
Question: How would you automate a daily report generation
for top N worst performing cells based on multiple KPIs?
Answer:
```python
def generate_daily_report(df, top_n=10,
output_file='daily_report.html'):
# Calculate composite score based on multiple KPIs
kpis = ['DropRate', 'HandoverSuccess', 'Throughput', 'SINR']
df['Score'] = (df['DropRate'] 0.4 +
(1 - df['HandoverSuccess']) 0.3 +
(1 - df['Throughput']/df['Throughput'].max()) 0.2 +
(1 - df['SINR']/df['SINR'].max()) 0.1)
# Get worst performers
worst_cells = df.nlargest(top_n, 'Score')
# Generate HTML report
report = f"""
<html>
<head><title>Daily KPI Report</title></head>
<body>
<h1>Top {top_n} Worst Performing Cells</h1>
{worst_cells.to_html()}
<h2>Key Metrics Distribution</h2>
{df[kpis].describe().to_html()}
</body>
</html>
"""
with open(output_file, 'w') as f:
f.write(report)
return worst_cells
```
## RF Optimization Specific Questions
### 6. KPI Interpretation
Question: How would you correlate poor throughput KPI with
physical layer measurements like RSRP, SINR, and BLER?
Answer:
```python
def analyze_throughput_issues(df):
# Create correlation matrix
corr_matrix = df[['Throughput', 'RSRP', 'SINR',
'BLER']].corr()
# Plot relationships
sns.pairplot(df[['Throughput', 'RSRP', 'SINR', 'BLER']])
# Categorize issues
df['IssueType'] = 'Other'
df.loc[(df['SINR'] < 0) & (df['Throughput'] < 5), 'IssueType']
= 'Interference'
df.loc[(df['RSRP'] < -110) & (df['Throughput'] < 5),
'IssueType'] = 'Coverage'
df.loc[(df['BLER'] > 10) & (df['Throughput'] < 5), 'IssueType']
= 'Retransmissions'
return df, corr_matrix
```
### 7. Geo-Data Analysis
Question: How would you analyze geographically correlated
KPI issues using Python?
Answer:
```python
import geopandas as gpd
from shapely.geometry import Point
def analyze_geo_kpi(df):
# Convert to GeoDataFrame (assuming lat/lon columns
exist)
geometry = [Point(xy) for xy in zip(df['Longitude'],
df['Latitude'])]
geo_df = gpd.GeoDataFrame(df, geometry=geometry)
# Load shapefile of cluster areas
clusters = gpd.read_file('network_clusters.shp')
# Spatial join to assign cells to clusters
geo_df = gpd.sjoin(geo_df, clusters, how='left', op='within')
# Analyze KPI by cluster
cluster_kpi = geo_df.groupby('ClusterID').agg({
'RSRP': 'mean',
'SINR': 'mean',
'Throughput': 'mean',
'DropRate': 'mean'
})
# Visualize
fig, ax = plt.subplots(figsize=(12, 8))
base = clusters.plot(ax=ax, color='white', edgecolor='black')
geo_df.plot(ax=base, column='Throughput', legend=True,
markersize=50, cmap='RdYlGn', alpha=0.7)
plt.title('Throughput by Geographic Cluster')
return cluster_kpi
```
### 8. Time Series Analysis
Question: How would you identify periodic patterns in KPI
data (daily/weekly variations)?
Answer:
```python
from statsmodels.tsa.seasonal import seasonal_decompose
def analyze_kpi_trends(df, kpi='DropRate'):
# Resample to hourly data
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
ts = df.set_index('Timestamp')[kpi].resample('H').mean()
# Decompose time series
result = seasonal_decompose(ts.fillna(method='ffill'),
model='additive', period=24)
# Plot components
result.plot()
plt.suptitle(f'{kpi} Time Series Decomposition')
plt.tight_layout()
# Return insights
insights = {
'peak_hours':
ts.groupby(ts.index.hour).mean().nlargest(3).index.tolist(),
'weekly_pattern':
ts.groupby(ts.index.dayofweek).mean().to_dict(),
'trend': result.trend.dropna().iloc[-1] -
result.trend.dropna().iloc[0]
}
return insights
```
# Senior RF Engineer (LTE/5G Optimization) - Python Skills
Assessment
## Interview Questions on Python for RF Optimization
### Basic Python Skills
Q1: How would you import and clean KPI data from a NetAct
CSV export in Python?
Answer:
```python
import pandas as pd
# Load data from CSV
df = pd.read_csv('netact_kpi_export.csv')
# Basic cleaning steps
df = df.dropna(thresh=len(df.columns)0.7) # Drop rows with
more than 30% missing values
df.fillna(method='ffill', inplace=True) # Forward fill missing
values
df = df.apply(pd.to_numeric, errors='coerce') # Convert all
possible columns to numeric
df.drop_duplicates(inplace=True) # Remove duplicates
```
Q2: Write a Python function to calculate the 95th percentile
of RSRP values from a dataset.
Answer:
```python
def calculate_rsrp_95th_percentile(data):
import numpy as np
return np.percentile(data['RSRP'], 95)
```
### Data Analysis Skills
Q3: How would you identify cells with poor performance
(e.g., high drop call rate) using Python?
Answer:
```python
def identify_poor_performing_cells(df, kpi_thresholds):
"""
Identify cells with performance below thresholds
:param df: DataFrame with KPI data
:param kpi_thresholds: dict of {'KPI_name':
threshold_value}
:return: DataFrame with problematic cells
"""
problematic_cells = pd.DataFrame()
for kpi, threshold in kpi_thresholds.items():
if kpi in df.columns:
mask = df[kpi] > threshold
problematic = df.loc[mask].copy()
problematic['Issue'] = kpi
problematic_cells = pd.concat([problematic_cells,
problematic])
return
problematic_cells.sort_values(by=list(kpi_thresholds.keys())[
0], ascending=False)
```
Q4: How would you visualize the relationship between RSRP
and throughput using Python?
Answer:
```python
import matplotlib.pyplot as plt
import seaborn as sns
def plot_rsrp_vs_throughput(df):
plt.figure(figsize=(10,6))
sns.scatterplot(x='RSRP', y='Throughput', data=df,
alpha=0.5)
plt.title('RSRP vs Throughput Relationship')
plt.xlabel('RSRP (dBm)')
plt.ylabel('Throughput (Mbps)')
plt.grid(True)
plt.show()
# Add correlation analysis
correlation = df['RSRP'].corr(df['Throughput'])
print(f"Pearson correlation coefficient: {correlation:.2f}")
```
### Advanced Optimization Skills
Q5: Write a Python script to automate the identification of
coverage and capacity issues from NetAct data.
Answer:
```python
def analyze_network_health(df):
# Define thresholds (example values)
thresholds = {
'RSRP': -110, # dBm
'SINR': 0, # dB
'DropCallRate': 0.05, # 5%
'Throughput': 5, # Mbps
'Latency': 100 # ms
}
analysis_results = {}
# Coverage analysis
poor_coverage = df[df['RSRP'] < thresholds['RSRP']]
analysis_results['poor_coverage_cells'] =
poor_coverage.shape[0]
analysis_results['poor_coverage_list'] =
poor_coverage['CellID'].unique()
# Capacity analysis
congestion = df[df['PRB_Utilization'] > 80] # Assuming PRB
utilization > 80% is congested
analysis_results['congested_cells'] = congestion.shape[0]
analysis_results['congested_cell_list'] =
congestion['CellID'].unique()
# Quality analysis
poor_quality = df[df['SINR'] < thresholds['SINR']]
analysis_results['poor_quality_cells'] =
poor_quality.shape[0]
return analysis_results
```
Q6: How would you implement a trend analysis for KPIs over
time using Python?
Answer:
```python
def analyze_kpi_trends(df, kpi_list,
time_column='Timestamp'):
# Convert timestamp if needed
df[time_column] = pd.to_datetime(df[time_column])
# Set timestamp as index
df.set_index(time_column, inplace=True)
# Resample data weekly
weekly_trends = df[kpi_list].resample('W').mean()
# Plot trends
plt.figure(figsize=(12,8))
for kpi in kpi_list:
weekly_trends[kpi].plot(label=kpi)
plt.title('Weekly KPI Trends')
plt.ylabel('KPI Values')
plt.xlabel('Date')
plt.legend()
plt.grid(True)
plt.show()
return weekly_trends
```
### Practical Scenario Questions
Q7: You notice sudden spikes in handover failure rates. How
would you investigate this using Python?
Answer:
```python
def investigate_handover_failures(df):
# Convert timestamp
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
# Calculate hourly handover failure rates
hourly_ho = df.resample('H', on='Timestamp').agg({
'HandoverAttempts': 'sum',
'HandoverFailures': 'sum'
})
hourly_ho['HOFailureRate'] =
hourly_ho['HandoverFailures'] /
hourly_ho['HandoverAttempts']
# Identify spikes (e.g., 3 standard deviations from mean)
mean = hourly_ho['HOFailureRate'].mean()
std = hourly_ho['HOFailureRate'].std()
spikes = hourly_ho[hourly_ho['HOFailureRate'] > mean +
3std]
# Correlate with other KPIs during spike times
spike_times = spikes.index
correlated_kpis =
df[df['Timestamp'].isin(spike_times)].corr()['HandoverFailure
s']
return {
'spike_times': spikes,
'correlation_analysis':
correlated_kpis.sort_values(ascending=False)
}
```
Q8: How would you automate the generation of a daily
optimization report from NetAct data?
Answer:
```python
def generate_daily_report(df, report_date):
from datetime import datetime
# Filter data for the report date
daily_data = df[df['Timestamp'].dt.date ==
report_date.date()]
# Calculate summary statistics
report = {
'report_date': report_date.strftime('%Y-%m-%d'),
'total_cells': daily_data['CellID'].nunique(),
'avg_rsrp': daily_data['RSRP'].mean(),
'avg_sinr': daily_data['SINR'].mean(),
'call_drop_rate': daily_data['Drops'].sum() /
daily_data['Attempts'].sum(),
'top_10_problem_cells':
daily_data.groupby('CellID')['Drops'].sum().nlargest(10).to_di
ct()
}
# Generate visualizations
plt.figure(figsize=(10,5))
daily_data['RSRP'].plot(kind='hist', bins=30)
plt.title('RSRP Distribution')
plt.savefig('rsrp_distribution.png')
# Save report to file
import json
with open(f'daily_report_{report_date.date()}.json', 'w') as
f:
json.dump(report, f, indent=4)
return report
```
These questions test both fundamental Python skills and the
candidate's ability to apply them to real-world RF
optimization scenarios using data from NetAct. The answers
demonstrate practical approaches to common optimization
tasks.