Home>

I would like to use yolov2 for accurate object detection.
Since I want to detect only car, I am trying to change class_names, but other than the specified one is also detected.
I would like to know if there is any way to do it.

Corresponding source code
from darkflow.net.build import TFNet
import cv2
import numpy as np
options = {"model": "yolov2-tiny-voc.cfg", "load": "yolov2-tiny-voc.weights", "threshold": 0.1, "GPU": 1.0}
tfnet = TFNet (options)
#Start camera
cap = cv2.VideoCapture (r "C: \ Script \ _home_pi_movie__home_pi_movie_cap20201004_1411.mp4")
class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle",
    "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
    "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
    "train", "tvmonitor"]
width = int (cap.get (3))
height = int (cap.get (4))
fourcc = cv2.VideoWriter_fourcc ('H', '2', '6', '4')
out = cv2.VideoWriter ('tesaaat.mp4', fourcc, 15, (width, height))
num_classes = len (class_names)
class_colors = []
for i in range (0, num_classes):
    hue = 255 * i/num_classes
    col = np.zeros ((1,1,3)). astype ("uint8")
    col [0] [0] [0] = hue
    col [0] [0] [1] = 128
    col [0] [0] [2] = 255
    cvcol = cv2.cvtColor (col, cv2.COLOR_HSV2BGR)
    col = (int (cvcol [0] [0] [0]), int (cvcol [0] [0] [1]), int (cvcol [0] [0] [2]))
    class_colors.append (col)
def main ():
    while (True):
        #Get frame from video stream
        ret, frame = cap.read ()
        result = tfnet.return_predict (frame)
        # cv2.line (frame, (0, pos_linha), (1280, pos_linha), (255,127,0), 1) #line
        for item in result:
            tlx = item ['topleft'] ['x']
            tly = item ['topleft'] ['y']
            brx = item ['bottomright'] ['x']
            bry = item ['bottomright'] ['y']
            label = item ['label']
            conf = item ['confidence']
            if conf>0.5:
                for i in class_names:
                    if label == i and class_names [7] == label:
                        class_num = class_names.index (i)
                        break
                #Create frame
                cv2.rectangle (frame, (tlx, tly), (brx, bry), class_colors [class_num], 2)
                #Create label
                text = label + "" + ('% .2f'% conf)
                cv2.rectangle (frame, (tlx, tly -15), (tlx + 100, tly + 5), class_colors [class_num], -1)
                cv2.putText (frame, text, (tlx, tly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
        # Display
        cv2.imshow ("Show FLAME Image", frame)
        out.write (frame)
        Press # esc to finish.
        k = cv2.waitKey (10);
        if k == ord ('q'): break;
    cap.release ()
    out.release ()
    cv2.destroyAllWindows ()
if __name__ =='__main__':
    main ()
  • Answer # 1

    Corresponding code line 39:

    result = tfnet.return_predict (frame)

    I think it is better to get only the value that detects the car from the results received here.

    variableresult resultIf you tell me the value of, you can write the code, so can you copy the value in the comment?


    And if you want to detect only cars, I think that learning YOLO to detect only cars will improve the accuracy, but is that not the case?

  • Answer # 2

    I don't know what you want to do in the ↓ part, but is it the part you are trying to do your best to make only the car?

                  for i in class_names:
                        if label == i and class_names [7] == label:
                            class_num = class_names.index (i)
                            break


    As temps1101 says, you should limit the number of classes, but if you just move the display like that, it can be deceived like ↓.

              if conf>0.5 and label == "car":
                    #Create frame
                    cv2.rectangle (frame, (tlx, tly), (brx, bry), class_colors [class_num], 2)
                    #Create label
                    text = label + "" + ('% .2f'% conf)
                    cv2.rectangle (frame, (tlx, tly -15), (tlx + 100, tly + 5), class_colors [class_num], -1)
                    cv2.putText (frame, text, (tlx, tly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)