1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
| """ Example using TF Lite to detect objects with the Raspberry USB camera.
Hardware: - Pi 3b+ - usb camera
Software - python 3.7.3 - tflite runtime 2.1 - opencv
Dataset - coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip """
import re import time
import numpy as np import cv2
from tflite_runtime.interpreter import Interpreter
args_camera_width = 640 args_camera_height = 480 args_model = 'detect.tflite' args_labels = 'coco_labels.txt' args_threshold = 0.4
def load_labels(path): """Loads the labels file. Supports files with or without index numbers.""" with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() labels = {} for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): labels[int(pair[0])] = pair[1].strip() else: labels[row_number] = pair[0].strip()
return labels
def detect_objects(interpreter, image, threshold): interpreter.set_tensor(input_details[0]['index'], input_image) interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index']) classes = interpreter.get_tensor(output_details[1]['index']) scores = interpreter.get_tensor(output_details[2]['index']) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.int32) scores = np.squeeze(scores)
results = [] for i, score in enumerate(scores): if score >= threshold: result = { 'box': boxes[i], 'class_id': classes[i], 'score': scores[i] } results.append(result) return results
def annotate_objects(image, results): for rst in results: ymin, xmin, ymax, xmax = rst['box'] class_id = rst['class_id'] name = labels_dict[class_id] score = rst['score']
xmin = int(xmin * args_camera_width) xmax = int(xmax * args_camera_width) ymin = int(ymin * args_camera_height) ymax = int(ymax * args_camera_height) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0))
txt = f'{name} {score:.2%}' cv2.putText(image, txt, (xmin, ymin), 0, 1, (255, 255, 255), 2)
labels_dict = load_labels(args_labels) print('labels_dict: \n ', labels_dict)
interpreter = Interpreter(args_model) interpreter.allocate_tensors()
input_details = interpreter.get_input_details() output_details = interpreter.get_output_details()
camera = cv2.VideoCapture(0) camera.set(3, args_camera_height) camera.set(4, args_camera_width)
frame_rate_calc = 1.0 freq = cv2.getTickFrequency()
while (True): t1 = cv2.getTickCount()
ret, frame = camera.read() input_image = cv2.resize(frame, (300, 300)) input_image = np.expand_dims(input_image, axis=0) input_image = np.uint8(np.float32(input_image))
results = detect_objects(interpreter, input_image, args_threshold) print(f'--- {time.strftime("%Y-%m-%d %H:%M:%S")} ---') for rst in results: box = rst['box'] class_id = rst['class_id'] name = labels_dict[class_id] score = rst['score'] print(f'* {name} : {score:.2%} @ {box}') annotate_objects(frame, results)
txt = f'FPS: {frame_rate_calc:.2f}' cv2.putText(frame, txt, (20, 30), 0, 1, (0, 255, 255), 2)
cv2.imshow('Object detect', frame)
t2 = cv2.getTickCount() frame_rate_calc = freq / (t2 - t1)
cv2.waitKey(1)
camera.release() cv2.destroyAllWindows()
|