The multi-view deep learning approach with YOLO models
we can implement more sophisticated methods for training, combining
predictions, and utilizing the strengths of each model.
modifications and enhancements:
1. Weighted Averaging for Combining Predictions:
Instead of simple averaging, we can use weighted averaging where the weights
are based on the confidence scores of the predictions.
2. Non-Maximum Suppression (NMS):
Applying NMS can help in reducing overlapping bounding boxes from different
views
3. Feature Fusion:
Instead of just combining the final predictions, you can also fuse intermediate
features from different models before making the final prediction.
4. Training with a Combined Dataset:
Train each model not only on its specific view but also include some data from
other views to improve generalization.
5. Post-processing with Ensemble Techniques:
Use ensemble techniques like stacking to learn how to best combine the
predictions from different models.
CODES
#yolo.py
from ultralytics import YOLO
# Load multiple models for different views
model_view1 = YOLO("yolov8n.yaml") # model for view 1
model_view2 = YOLO("yolov8l.yaml") # model for view 2
model_view3 = YOLO("yolov8x.yaml") # model for view 3
# Train the models
model_view1.train(data="config1.yaml", epochs=1) # train the model for
view 1
model_view2.train(data="config1.yaml", epochs=1) # train the model for
view 2
model_view3.train(data="config1.yaml", epochs=1) # train the model for
view 3
#Detect.py
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.optimize import linear_sum_assignment
def load_models(model_paths):
"""
Load YOLO models from the provided paths.
Args:
model_paths (list): List of paths to YOLO model weights.
Returns:
list: List of loaded YOLO models.
"""
return [YOLO(model_path) for model_path in model_paths]
def open_cameras(camera_urls):
"""
Open video capture streams for the provided camera URLs.
Args:
camera_urls (list): List of IP webcam URLs.
Returns:
list: List of video capture objects.
"""
return [cv2.VideoCapture(url) for url in camera_urls]
def compute_iou(box1, boxes):
"""
Compute Intersection over Union (IoU) between a single box and
multiple boxes.
Args:
box1 (array): Coordinates of the first box.
boxes (array): Coordinates of the other boxes.
Returns:
array: IoU values for the box compared to the other boxes.
"""
x1 = np.maximum(box1[0], boxes[:, 0])
y1 = np.maximum(box1[1], boxes[:, 1])
x2 = np.minimum(box1[2], boxes[:, 2])
y2 = np.minimum(box1[3], boxes[:, 3])
inter_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:,
1])
iou = inter_area / (box1_area + boxes_area - inter_area)
return iou
def nms(predictions, iou_threshold=0.5):
"""
Perform Non-Maximum Suppression (NMS) on the predictions.
Args:
predictions (list): List of prediction dictionaries with
'boxes' and 'scores'.
iou_threshold (float): IoU threshold for NMS.
Returns:
list: List of filtered predictions after NMS.
"""
boxes = np.array([pred['boxes'] for pred in predictions])
scores = np.array([pred['scores'] for pred in predictions])
indices = np.argsort(scores)[::-1]
keep_boxes = []
while len(indices) > 0:
current_index = indices[0]
keep_boxes.append(current_index)
if len(indices) == 1:
break
current_box = boxes[current_index]
remaining_boxes = boxes[indices[1:]]
ious = compute_iou(current_box, remaining_boxes)
indices = indices[1:][ious <= iou_threshold]
return [predictions[i] for i in keep_boxes]
def combine_predictions(predictions_list, weights):
"""
Combine predictions from multiple views using weighted averaging.
Args:
predictions_list (list): List of predictions from multiple
views.
weights (list): List of weights for averaging predictions.
Returns:
list: List of combined predictions after averaging and NMS.
"""
combined_result = []
for preds in zip(*predictions_list):
combined_boxes = np.average([pred['boxes'] for pred in preds],
axis=0, weights=weights)
combined_scores = np.average([pred['scores'] for pred in
preds], axis=0, weights=weights)
combined_labels = max(preds, key=lambda p:
p['scores'])['labels']
combined_result.append({
'boxes': combined_boxes,
'scores': combined_scores,
'labels': combined_labels
})
return nms(combined_result)
def track_objects(detections, prev_detections, iou_threshold=0.3):
"""
Track objects across frames using detected boxes and IoU matching.
Args:
detections (list): Current frame detections.
prev_detections (list): Previous frame detections.
iou_threshold (float): IoU threshold for matching detections.
Returns:
list: List of matched object indices between current and
previous detections.
"""
if len(detections) == 0 or len(prev_detections) == 0:
return []
iou_matrix = np.zeros((len(detections), len(prev_detections)),
dtype=np.float32)
for i, det in enumerate(detections):
for j, prev_det in enumerate(prev_detections):
iou_matrix[i, j] = compute_iou(det['boxes'],
np.array([prev_det['boxes']]))
row_ind, col_ind = linear_sum_assignment(-iou_matrix)
matches = []
for i, j in zip(row_ind, col_ind):
if iou_matrix[i, j] >= iou_threshold:
matches.append((i, j))
return matches
def main():
# Get IP webcam URLs from the user
ip_webcam_urls = input("Enter the IP webcam URLs separated by
commas (e.g.,
http://<IP_ADDRESS1>:<PORT>/video,http://<IP_ADDRESS2>:<PORT>/video):
").split(',')
# Paths to the YOLO models for each view
model_paths = [
r"C:\Users\SRIKANTH\PycharmProjects\yolov8\runs\detect\train2\weights\b
est.pt",
r"C:\Users\SRIKANTH\PycharmProjects\yolov8\runs\detect\train3\weights\b
est.pt",
# Add paths to other models as needed
]
# Load YOLO models
models = load_models(model_paths)
# Open connections to the IP webcams
caps = open_cameras(ip_webcam_urls)
# Check if all video streams opened successfully
if not all(cap.isOpened() for cap in caps):
print("Error: Could not open one or more video streams")
return
else:
print("Successfully opened video streams")
# Store previous detections for tracking
prev_detections = []
while True:
frames = []
for cap in caps:
ret, frame = cap.read()
if not ret:
print("Error: Failed to capture image from one of the
streams")
break
frames.append(frame)
if not frames:
break
# Perform YOLO detection on each frame
predictions = [model(frame, save=False)[0] for model, frame in
zip(models, frames)]
# Combine predictions from all views using weighted averaging
weights = [0.3, 0.4, 0.3] # Adjust weights as needed
combined_predictions = combine_predictions(predictions,
weights)
# Track objects across different views
matches = track_objects(combined_predictions, prev_detections)
prev_detections = combined_predictions
# Plot the combined results on the frames
for frame, preds in zip(frames, combined_predictions):
res_plotted = preds.plot()
cv2.imshow('YOLO Detection', res_plotted)
# Exit the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the video capture objects and close display windows
for cap in caps:
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()