Functional style :
The "auto_size" in the code was created by myself because adjusting the detection parameters based on the frame size is the most efficient method. However, during actual usage, it was discovered that the size must be a multiple of 128 in order for it to work properly. Therefore, it was written in this way.
Yolov4 Tiny vs YOLOv5 N: YOLOv4 is much more faster, you can use YOLOv4 Tiny in pydroid3. https://youtu.be/FuDwIU9Ux2k
import time
from fastapi import FastAPI, Response, Request
from fastapi.responses import StreamingResponse, HTMLResponse,FileResponse
import cv2
import os,urllib
import numpy as np
import threading
app = FastAPI(debug=True)
TempFrame = None
FrameRate = 24
camera = None
class_dict = {
"People": {"person", "face","upper body","lower body","eye"},
"Objects": {"backpack", "umbrella", "handbag", "tie", "suitcase", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "hair drier", "toothbrush"},
"Animals": {"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "teddy bear"},
"Vehicles": {"bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat"},
"Facilities": {"traffic light", "fire hydrant", "stop sign", "parking meter"},
"Sports": {"frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket"},
"Food": {"bowl",'banana', "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake"},
"Utensils": {"fork", "knife", "spoon", "bottle", "wine glass", "cup"}
}
class_names = ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
colors = {"People": (0, 255, 255), "Objects": (255, 0, 0), "Animals": (0, 255, 0), "Vehicles": (128, 0, 128), "Food": (0, 165, 255),"Sports":(200,200,0),"Facilities":(156,156,0),"Utensils":(255,128,64)}
model = cv2.dnn.readNetFromDarknet('/sdcard/Documents/Pydroid3/yolov4-tiny.cfg', '/sdcard/Documents/Pydroid3/yolov4-tiny.weights')
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
layer_names = model.getLayerNames()
try:
output_layers = [layer_names[i[0] - 1] for i in model.getUnconnectedOutLayers()]
except:
output_layers = [layer_names[i - 1] for i in model.getUnconnectedOutLayers()]
temp_cases=None
def detect_frame(frame, model=model, output_layers=output_layers, class_dict=class_dict, colors=colors,size=(416,416)):
t0=time.time()
blob = cv2.dnn.blobFromImage(frame, 1.0/255,size, (0, 0, 0), swapRB=True, crop=False)
model.setInput(blob)
layer_outputs = model.forward(output_layers)
boxes = []
confidences = []
class_ids = []
cases = []
for output in layer_outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
center_x = int(detection[0] * frame.shape[1])
center_y = int(detection[1] * frame.shape[0])
width = int(detection[2] * frame.shape[1])
height = int(detection[3] * frame.shape[0])
left = int(center_x - width / 2)
top = int(center_y - height / 2)
boxes.append([left, top, width, height])
confidences.append(float(confidence))
class_ids.append(class_id)
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in indices:
try:i = i[0]
except:i=i
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
class_name = class_names[class_ids[i]]
color = (255, 255, 255)
class0 = None
for key in class_dict:
if class_name in class_dict[key]:
try:
color = colors[key]
class0 = key
except:
pass
cases.append((class0, class_name, confidences[i], color, box))
t=time.time()-t0
print(f"Object Detection {t} secs.")
return cases
def draw_frame(frame, cases):
for case in cases:
class0, class_name, confidence, color, box = case
x, y, w, h = box
label = f"{class_name}: {confidence:.2f}"
if class0:
label = f"{class0}-{label}"
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.rectangle(frame, (x, y - 20), (x + w, y), (255, 255, 255), -1)
cv2.putText(frame, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
return frame
def auto_size(frame,fast=False):
w1,h1=frame.shape[:2]
if fast:
w1=w1/2.2
h1=h1/2.2
rW=max(int(w1/128),2)
rH=max(int(h1/128),2)
size_d=(int(rW*128),int(rH*128))
return size_d
def file_to_frame(resp):
img_bytes = np.asarray(bytearray(resp.read()), dtype=np.uint8)
frame= cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
return frame
def bytes_to_frame(bytes):
img_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
frame= cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
return frame
def frame_to_bytes(frame, format=""):
is_success, buffer = cv2.imencode(f".{format}", frame)
if is_success:
return np.array(buffer).tobytes()
else:
return None
def draw_bytes(bytes, format="png", size_d=None):
frame = bytes_to_frame(bytes)
if not size_d:
size_d = auto_size(frame)
cases = detect_frame(frame, size=size_d)
drawn_frame = draw_frame(frame, cases)
return frame_to_bytes(drawn_frame, format)
def drawing_image(path, output="bytes", format="png",size_d=None):
# Read the input image
frame = cv2.imread(path)
if not size_d:
size_d=auto_size(frame)
# Detect objects in the image
cases = detect_frame(frame,size=size_d)
# Draw the detected objects on the image
drawn_frame = draw_frame(frame, cases)
if output == "bytes":
# Save the image as bytes
is_success, buffer = cv2.imencode(f".{format}", drawn_frame)
if is_success:
return np.array(buffer).tobytes()
else:
return None
else:
# Save the image to the specified output path
cv2.imwrite(output, drawn_frame)
return f"Image saved to {output}"
def detect_frame_thread(t=0.2):
global temp_cases, detection_time, frame
while True:
if (time.time() - detection_time) > t:
cases = detect_frame(frame, model, output_layers, class_dict, colors,size=auto_size(frame,fast=True))
temp_cases = cases
detection_time = time.time()
def generate_frames():
global TempFrame, camera, frame, temp_cases, detection_time
detection_time = time.time()
detection_thread = threading.Thread(target=detect_frame_thread)
detection_thread.start()
while True:
success, frame = camera.read()
if not success:
#yield (b'--frame\r\n' b'Content-Type: image/png\r\n\r\n' + TempFrame + b'\r\n')
time.sleep(1.0 / FrameRate)
else:
if temp_cases and (time.time()-detection_time)<1.5:
frame = draw_frame(frame, temp_cases)
_, frame_encoded = cv2.imencode(".png", frame)
frame_bytes = frame_encoded.tobytes()
yield (b'--frame\r\n' b'Content-Type: image/png\r\n\r\n' + frame_bytes + b'\r\n')
#time.sleep(1.0 / FrameRate)
@app.on_event("startup")
def startup_event():
global camera
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FPS, FrameRate)
@app.on_event("shutdown")
def shutdown_event():
global camera
if camera:
camera.release()
@app.get('/test')
def testD():
imP="test.jpg"
b=drawing_image(imP)
return Response(b)
@app.get('/video_feed')
def video_feed():
return StreamingResponse(generate_frames(), media_type="multipart/x-mixed-replace;boundary=frame")
@app.get("/multi")
def video():
return HTMLResponse("""
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="initial-scale=1.0, width=device-width">
<style>
table td {
border: 1px solid black;
border-radius: 6px;
padding :6px;
}
</style>
<title>Multi Detection</title>
</head>
<body>
<h1>Multi Decetion</h1>
<img width="90%" src="/video_feed"></img>
</body>
</html>
""")
if __name__ == "__main__" and True:
import uvicorn
import webbrowser
host = "localhost"
port = 8080
import os,platform
uri=f"http://{host}:{port}/multi"
pv=platform.python_version()
if pv.startswith("3.9.7"):webbrowser.open(uri)
else :
os.system(f"termux-open {uri}")
uvicorn.run(app, host=host, port=port)
沒有留言:
發佈留言