Translate

OpenCV objective detection + FastAPI

OpenCV objective detection + FastAPI is temporarily the final version, but may be modified in the future if necessary. After a series of testing and improvement, YOLOv4 Tiny was used to achieve smoother and better performance. Detection is done every 0.3 seconds in a subprocess, and more top-level classification is used to prevent text from being covered by the background. The webpage can be refreshed and used normally, and the code is redesigned in a functional style for future modifications, reuse, debugging, and loading of other models. https://youtube.com/shorts/TVjwkNIMg1U?feature=share

Functional style :
The "auto_size" in the code was created by myself because adjusting the detection parameters based on the frame size is the most efficient method. However, during actual usage, it was discovered that the size must be a multiple of 128 in order for it to work properly. Therefore, it was written in this way.
Yolov4 Tiny vs YOLOv5 N: YOLOv4 is much more faster, you can use YOLOv4 Tiny in pydroid3. https://youtu.be/FuDwIU9Ux2k

import time
from fastapi import FastAPI, Response, Request
from fastapi.responses import StreamingResponse, HTMLResponse,FileResponse
import cv2
import os,urllib
import numpy as np
import threading



app = FastAPI(debug=True)

TempFrame = None
FrameRate = 24
camera = None

class_dict = {
        "People": {"person", "face","upper body","lower body","eye"},
        "Objects": {"backpack", "umbrella", "handbag", "tie", "suitcase", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "hair drier", "toothbrush"},
        "Animals": {"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "teddy bear"},
        "Vehicles": {"bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat"},
        "Facilities": {"traffic light", "fire hydrant", "stop sign", "parking meter"},
        "Sports": {"frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket"},
        "Food": {"bowl",'banana', "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake"},
        "Utensils": {"fork", "knife", "spoon", "bottle", "wine glass", "cup"}
}

class_names = ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
colors = {"People": (0, 255, 255), "Objects": (255, 0, 0), "Animals": (0, 255, 0), "Vehicles": (128, 0, 128), "Food": (0, 165, 255),"Sports":(200,200,0),"Facilities":(156,156,0),"Utensils":(255,128,64)}

model = cv2.dnn.readNetFromDarknet('/sdcard/Documents/Pydroid3/yolov4-tiny.cfg', '/sdcard/Documents/Pydroid3/yolov4-tiny.weights')
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
layer_names = model.getLayerNames()
try:
    output_layers = [layer_names[i[0] - 1] for i in model.getUnconnectedOutLayers()]
except:
    output_layers = [layer_names[i - 1] for i in model.getUnconnectedOutLayers()]
    
temp_cases=None

def detect_frame(frame, model=model, output_layers=output_layers, class_dict=class_dict, colors=colors,size=(416,416)):
    t0=time.time()
    blob = cv2.dnn.blobFromImage(frame, 1.0/255,size, (0, 0, 0), swapRB=True, crop=False)
    model.setInput(blob)
    layer_outputs = model.forward(output_layers)
    boxes = []
    confidences = []
    class_ids = []
    cases = []

    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.4:
                center_x = int(detection[0] * frame.shape[1])
                center_y = int(detection[1] * frame.shape[0])
                width = int(detection[2] * frame.shape[1])
                height = int(detection[3] * frame.shape[0])
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                boxes.append([left, top, width, height])
                confidences.append(float(confidence))
                class_ids.append(class_id)

                indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
                for i in indices:
                    try:i = i[0]
                    except:i=i
                    box = boxes[i]
                    x = box[0]
                    y = box[1]
                    w = box[2]
                    h = box[3]
                    class_name = class_names[class_ids[i]]
                    color = (255, 255, 255)
                    class0 = None
                    for key in class_dict:
                        if class_name in class_dict[key]:
                            try:
                                color = colors[key]
                                class0 = key
                            except:
                                pass

                    cases.append((class0, class_name, confidences[i], color, box))
    t=time.time()-t0
    print(f"Object Detection {t} secs.")
    return cases

def draw_frame(frame, cases):
    for case in cases:
        class0, class_name, confidence, color, box = case
        x, y, w, h = box
        label = f"{class_name}: {confidence:.2f}"
        if class0:
            label = f"{class0}-{label}"
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
        cv2.rectangle(frame, (x, y - 20), (x + w, y), (255, 255, 255), -1)
        cv2.putText(frame, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    return frame


def auto_size(frame,fast=False):
    w1,h1=frame.shape[:2]
    if fast:
        w1=w1/2.2
        h1=h1/2.2
    rW=max(int(w1/128),2)
    rH=max(int(h1/128),2)
    size_d=(int(rW*128),int(rH*128))
    
    return size_d
    
def file_to_frame(resp):
    img_bytes = np.asarray(bytearray(resp.read()), dtype=np.uint8)
    frame= cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
    return frame

def bytes_to_frame(bytes):
    img_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
    frame= cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
    return frame

def frame_to_bytes(frame, format=""):
    is_success, buffer = cv2.imencode(f".{format}", frame)
    if is_success:
        return np.array(buffer).tobytes()
    else:
        return None


def draw_bytes(bytes, format="png", size_d=None):
    frame = bytes_to_frame(bytes)
    if not size_d:
        size_d = auto_size(frame)
    cases = detect_frame(frame, size=size_d)
    drawn_frame = draw_frame(frame, cases)
    return frame_to_bytes(drawn_frame, format)
    

def drawing_image(path, output="bytes", format="png",size_d=None):
    # Read the input image
    frame = cv2.imread(path)
    if not size_d:
        size_d=auto_size(frame)
        
    # Detect objects in the image
    cases = detect_frame(frame,size=size_d)

    # Draw the detected objects on the image
    drawn_frame = draw_frame(frame, cases)

    if output == "bytes":
        # Save the image as bytes
        is_success, buffer = cv2.imencode(f".{format}", drawn_frame)
        if is_success:
            return np.array(buffer).tobytes()
        else:
            return None
    else:
        # Save the image to the specified output path
        cv2.imwrite(output, drawn_frame)
        return f"Image saved to {output}"


def detect_frame_thread(t=0.2):
    global temp_cases, detection_time, frame
    while True:
        if (time.time() - detection_time) > t:
            cases = detect_frame(frame, model, output_layers, class_dict, colors,size=auto_size(frame,fast=True))
            temp_cases = cases
            detection_time = time.time()

def generate_frames():
    global TempFrame, camera, frame, temp_cases, detection_time
    detection_time = time.time()

    detection_thread = threading.Thread(target=detect_frame_thread)
    detection_thread.start()

    while True:
        success, frame = camera.read()
        if not success:
            #yield (b'--frame\r\n' b'Content-Type: image/png\r\n\r\n' + TempFrame + b'\r\n')
            time.sleep(1.0 / FrameRate)
        else:
            if temp_cases and (time.time()-detection_time)<1.5:
                frame = draw_frame(frame, temp_cases)

            _, frame_encoded = cv2.imencode(".png", frame)
            
            frame_bytes = frame_encoded.tobytes()
            yield (b'--frame\r\n' b'Content-Type: image/png\r\n\r\n' + frame_bytes + b'\r\n')
            
            #time.sleep(1.0 / FrameRate)








@app.on_event("startup")
def startup_event():
    global camera
    camera = cv2.VideoCapture(0)
    camera.set(cv2.CAP_PROP_FPS, FrameRate)
    
@app.on_event("shutdown")
def shutdown_event():
    global camera
    if camera:
        camera.release()
@app.get('/test')
def testD():
    imP="test.jpg"
    b=drawing_image(imP)
    return Response(b)


@app.get('/video_feed')
def video_feed():
    return StreamingResponse(generate_frames(), media_type="multipart/x-mixed-replace;boundary=frame")

@app.get("/multi")
def video():
    return HTMLResponse("""
    <!DOCTYPE html>
    <html>
    <head>
    <meta name="viewport" content="initial-scale=1.0, width=device-width">
<style>
table td {
  border: 1px solid black;
  border-radius: 6px;
  padding :6px;
}
</style>
        <title>Multi Detection</title>
    </head>
    <body>
        <h1>Multi Decetion</h1>
        <img width="90%" src="/video_feed"></img>
    </body>
    </html>
    """)








if __name__ == "__main__" and True:
    import uvicorn
    import webbrowser
    host = "localhost"
    port = 8080
    import os,platform
    uri=f"http://{host}:{port}/multi"
    pv=platform.python_version()
    if pv.startswith("3.9.7"):webbrowser.open(uri)
    else :
        os.system(f"termux-open {uri}")
    uvicorn.run(app, host=host, port=port)

沒有留言:

發佈留言