Course notes: Object detection on the Raspberry Pi

Preamble

Notes from Object Detection Raspberry Pi using OpenCV Python | 2020

Technoogy: Mobilenet ssd

Required files

coco.names
ssd_inception_v2_coco_2017_11_17.pbtxt – out of date
ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt
frozen_inference_graph.pb

Required packages

opencv-python, 4.3 or 4.4
numpy

Notes

You can use the same project as before ObjectDetector. However I made a new project ObjectDetectorPi

Make a module: ObjectDetectorModule.py

From Setting Camera Parameters in OpenCV/Python

0. CV_CAP_PROP_POS_MSEC Current position of the video file in milliseconds.
1. CV_CAP_PROP_POS_FRAMES 0-based index of the frame to be decoded/captured next.
2. CV_CAP_PROP_POS_AVI_RATIO Relative position of the video file
3. CV_CAP_PROP_FRAME_WIDTH Width of the frames in the video stream.
4. CV_CAP_PROP_FRAME_HEIGHT Height of the frames in the video stream.
5. CV_CAP_PROP_FPS Frame rate.
6. CV_CAP_PROP_FOURCC 4-character code of codec.
7. CV_CAP_PROP_FRAME_COUNT Number of frames in the video file.
8. CV_CAP_PROP_FORMAT Format of the Mat objects returned by retrieve() .
9. CV_CAP_PROP_MODE Backend-specific value indicating the current capture mode.
10. CV_CAP_PROP_BRIGHTNESS Brightness of the image (only for cameras).
11. CV_CAP_PROP_CONTRAST Contrast of the image (only for cameras).
12. CV_CAP_PROP_SATURATION Saturation of the image (only for cameras).
13. CV_CAP_PROP_HUE Hue of the image (only for cameras).
14. CV_CAP_PROP_GAIN Gain of the image (only for cameras).
15. CV_CAP_PROP_EXPOSURE Exposure (only for cameras).
16. CV_CAP_PROP_CONVERT_RGB Boolean flags indicating whether images should be converted to RGB.
17. CV_CAP_PROP_WHITE_BALANCE Currently unsupported
18. CV_CAP_PROP_RECTIFICATION Rectification flag for stereo cameras (note: only supported by DC1394 v 2.x backend currently)

For OpenCV 4 remove the CV_ prefix.

Full code:

# ObjectDetectorModule.py

import cv2

threshold = 0.45  # Threshold to detect object

cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

classNames = []
classFile = 'coco.names'

with open(classFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')

configPath = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
weightsPath = 'frozen_inference_graph.pb'

net = cv2.dnn_DetectionModel(weightsPath, configPath)
net.setInputSize(320, 320)
net.setInputScale(1.0 / 127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)

while True:
    success, img = cap.read()

    classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    print(classIds, bbox)

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
            cv2.putText(img, classNames[classId - 1].upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                        (0, 255, 0), 2)
            cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                        (0, 255, 0), 2)

    cv2.imshow("Output", img)
    cv2.waitKey(1)

Note on the Pi, you have to use the complete path for configPath, classFile and weightsPath.

Make a module

# ObjectDetectorModule.py

# ObjectDetectorModule.py

import cv2

threshold = 0.45  # Threshold to detect object

classNames = []
classFile = 'coco.names'

with open(classFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')

configPath = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
weightsPath = 'frozen_inference_graph.pb'

net = cv2.dnn_DetectionModel(weightsPath, configPath)
net.setInputSize(320, 320)
net.setInputScale(1.0 / 127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)


def getObjects(img):
    classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    print(classIds, bbox)

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
            cv2.putText(img, classNames[classId - 1].upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                        (0, 255, 0), 2)
            cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                        (0, 255, 0), 2)
    return img


def main():
    cap = cv2.VideoCapture(0)

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
    cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

    while True:
        success, img = cap.read()
        result = getObjects(img)
        cv2.imshow("Output", img)
        cv2.waitKey(1)


if __name__ == '__main__':
    main()

But there is no init()?

Note: Although the img is returned from getObjects, and stored in result, result is not then displayed. Nevertheless, img still shows the drawn rectangles. So what is the point in returning img and storing in result..? It is as if the img used in getObjets() is a reference and not a copy

There may be duplicates shown on the image. Use NMS – to remove duplicates

classIds, confs, bbox = net.detect(img, confThreshold=threshold, nmsThreshold=0.2)

The lower the value of the NMS threshold the stronger the duplicate removal.

Add the option to draw

def getObjects(img, draw=True):
    # classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    classIds, confs, bbox = net.detect(img, confThreshold=threshold, nmsThreshold=0.2)
    print(classIds, bbox)

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            if draw:
                cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                cv2.putText(img, classNames[classId - 1].upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                            (0, 255, 0), 2)
                cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                            (0, 255, 0), 2)
    return img

The draw conditional is added inside the loop because we want to obtain the bounding box and other info, and return it

def getObjects(img, draw=True):
    # classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    classIds, confs, bbox = net.detect(img, confThreshold=threshold, nmsThreshold=0.2)
    print(classIds, bbox)

    objectInfo = []

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            className = classNames[classId - 1]
            objectInfo.append([box, className])
            if draw:
                cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                cv2.putText(img, className.upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                            (0, 255, 0), 2)
                cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                            (0, 255, 0), 2)
    return img, objectInfo

and change the call

result, objectInfo = getObjects(img, draw=False)

Maybe we don’t want to detect everything

def getObjects(img, draw=True, objects=[]):
    # classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    classIds, confs, bbox = net.detect(img, confThreshold=threshold, nmsThreshold=0.2)
    print(classIds, bbox)
    if len(objects) == 0:
        objects = classNames
    objectInfo = []

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            className = classNames[classId - 1]
            if className in objects:
                objectInfo.append([box, className])
                if draw:
                    cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                    cv2.putText(img, className.upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 255, 0), 2)
                    cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30),
                                cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 255, 0), 2)
    return img, objectInfo

and call

result, objectInfo = getObjects(img, objects=['cup'])

Hmmm, it was running, then after a restart, errors:

/Users/macbook/PycharmProjects/ObjectDetectorPi/venv/bin/python /Users/macbook/PycharmProjects/ObjectDetectorPi/ObjectDetectorModule.py
Traceback (most recent call last):
File "/Users/macbook/PycharmProjects/ObjectDetectorPi/ObjectDetectorModule.py", line 31, in <module>
classIds, confs, bbox = net.detect(img, confThreshold=threshold)
cv2.error: OpenCV(4.5.3) /private/var/folders/5z/62x3lj993772jl5n_0j80t8r0000gn/T/pip-install-axehm1j2/opencv-python_9f0a00fb7cc849ae8ff020393160663d/opencv/modules/imgproc/src/resize.cpp:4051: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

I had to create a new Python 3.7 venv and then it started working again!

Full code for module

# ObjectDetectorModule3.py

# ObjectDetectorModule2.py

# ObjectDetectorModule.py

import cv2

threshold = 0.45  # Threshold to detect object

classNames = []
classFile = 'coco.names'

with open(classFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')

configPath = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
weightsPath = 'frozen_inference_graph.pb'

net = cv2.dnn_DetectionModel(weightsPath, configPath)
net.setInputSize(320, 320)
net.setInputScale(1.0 / 127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)


def getObjects(img, confThreshold=0.45, nmsThreshold=0.2,  draw=True, objects=[]):
    # classIds, confs, bbox = net.detect(img, confThreshold=threshold)
    classIds, confs, bbox = net.detect(img, confThreshold=confThreshold, nmsThreshold=nmsThreshold)
    print(classIds, bbox)
    if len(objects) == 0:
        objects = classNames
    objectInfo = []

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
            className = classNames[classId - 1]
            if className in objects:
                objectInfo.append([box, className])
                if draw:
                    cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                    cv2.putText(img, className.upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 255, 0), 2)
                    cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30),
                                cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 255, 0), 2)
    return img, objectInfo


def main():
    cap = cv2.VideoCapture(0)

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
    cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

    while True:
        success, img = cap.read()
        # result, objectInfo = getObjects(img)
        result, objectInfo = getObjects(img, draw=False)
        # result, objectInfo = getObjects(img, objects=['cup'])
        # result, objectInfo = getObjects(img, objects=['cup', 'mouse'])
        print(objectInfo)
        cv2.imshow("Output", img)
        cv2.waitKey(1)


if __name__ == '__main__':
    main()

And to use:

# mainModule.py

from ObjectDetectorModule3 import *

cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

while True:
    success, img = cap.read()
    # result, objectInfo = getObjects(img)
    result, objectInfo = getObjects(img, draw=False)
    # result, objectInfo = getObjects(img, objects=['cup'])
    # result, objectInfo = getObjects(img, objects=['cup', 'mouse'])
    print(objectInfo)
    cv2.imshow("Output", img)
    cv2.waitKey(1)

As a class

# ObjectDetector.py
# A class

# ObjectDetectorModule3.py

# ObjectDetectorModule2.py

# ObjectDetectorModule.py

import cv2


class ObjectDetector:
    def __init__(self):
        configPathFile = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
        weightsPathFile = 'frozen_inference_graph.pb'
        classPathFile = 'coco.names'

        # threshold = 0.45  # Threshold to detect object

        self.classNames = []

        with open(classPathFile, 'rt') as f:
            self.classNames = f.read().rstrip('\n').split('\n')

        self.net = cv2.dnn_DetectionModel(weightsPathFile, configPathFile)
        self.net.setInputSize(320, 320)
        self.net.setInputScale(1.0 / 127.5)
        self.net.setInputMean((127.5, 127.5, 127.5))
        self.net.setInputSwapRB(True)

    def getObjects(self, img, confThreshold=0.45, nmsThreshold=0.2, draw=True, objects=[]):
        # classIds, confs, bbox = net.detect(img, confThreshold=threshold)
        classIds, confs, bbox = self.net.detect(img, confThreshold=confThreshold, nmsThreshold=nmsThreshold)
        print(classIds, bbox)
        if len(objects) == 0:
            objects = self.classNames
        objectInfo = []

        if len(classIds) != 0:
            for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
                className = self.classNames[classId - 1]
                if className in objects:
                    objectInfo.append([box, className])
                    if draw:
                        cv2.rectangle(img, box, color=(0, 255, 0), thickness=2)
                        cv2.putText(img, className.upper(), (box[0] + 10, box[1] + 30), cv2.FONT_HERSHEY_COMPLEX, 1,
                                    (0, 255, 0), 2)
                        cv2.putText(img, str(round(confidence * 100, 2)), (box[0] + 200, box[1] + 30),
                                    cv2.FONT_HERSHEY_COMPLEX, 1,
                                    (0, 255, 0), 2)
        return img, objectInfo


def main():
    cap = cv2.VideoCapture(0)

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
    cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

    objDetector = ObjectDetector()

    while True:
        success, img = cap.read()
        # result, objectInfo = getObjects(img)
        result, objectInfo = objDetector.getObjects(img, draw=False)
        # result, objectInfo = getObjects(img, objects=['cup'])
        # result, objectInfo = getObjects(img, objects=['cup', 'mouse'])
        print(objectInfo)
        cv2.imshow("Output", img)
        cv2.waitKey(1)


if __name__ == '__main__':
    main()

Using it

# mainModuleUsingClass.py

from ObjectDetector import ObjectDetector
import cv2

cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
cap.set(cv2.CAP_PROP_BRIGHTNESS, 70)

objDetector = ObjectDetector()

while True:
    success, img = cap.read()
    # result, objectInfo = getObjects(img)
    result, objectInfo = objDetector.getObjects(img, draw=False)
    # result, objectInfo = getObjects(img, objects=['cup'])
    # result, objectInfo = getObjects(img, objects=['cup', 'mouse'])
    print(objectInfo)
    cv2.imshow("Output", img)
    cv2.waitKey(1)

On the Pi

You will need OpenCV 4.3 or 4.4 in order to support DetectionModel().

From How to install OpenCV on Raspberry Pi 4 | Raspberry Pi Tutorials for Beginners (2020), OpenCV 4.1.0

sudo apt-get update && sudo apt-get upgrade && sudo rpi-update
sudo nano /etc/dphys-swapfile
    CONF_SWAPSIZE=2048
sudo apt-get install build-essential cmake pkg-config
sudo apt-get install libjpeg-dev libtiff5-dev libjasper-dev libpng12-dev
sudo apt-get install libavcodec-dev libavformat-dev libswscale-dev libv4l-dev
sudo apt-get install libxvidcore-dev libx264-dev
sudo apt-get install libgtk2.0-dev libgtk-3-dev
sudo apt-get install libatlas-base-dev gfortran
wget -O opencv.zip https://github.com/opencv/opencv/archive/4.1.0.zip
wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/4.1.0.zip
unzip opencv.zip
unzip opencv_contrib.zip
sudo pip3 install numpy
cd ~/opencv-4.1.0/
mkdir build
cd build
cmake -D CMAKE_BUILD_TYPE=RELEASE \
      -D CMAKE_INSTALL_PREFIX=/usr/local/ \
      -D INSTALL_PYTHON_EXAMPLES=ON \
      -D OPENCV_EXTRA_MODULES_PATH=~/opencv_contrib-4.1.0/modules \
      -D BUILD_EXAMPLES=ON
make -j4
sudo make install && sudo ldconfig
sudo reboot

OpenCV 4.4.0

sudo apt-get update && sudo apt-get upgrade && sudo rpi-update
sudo nano /etc/dphys-swapfile
    CONF_SWAPSIZE=2048
sudo apt-get install build-essential cmake pkg-config
sudo apt-get install libjpeg-dev libtiff5-dev libjasper-dev libpng12-dev
sudo apt-get install libavcodec-dev libavformat-dev libswscale-dev libv4l-dev
sudo apt-get install libxvidcore-dev libx264-dev
sudo apt-get install libgtk2.0-dev libgtk-3-dev
sudo apt-get install libatlas-base-dev gfortran
wget -O opencv.zip https://github.com/opencv/opencv/archive/4.4.0.zip
wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/4.4.0.zip
unzip opencv.zip
unzip opencv_contrib.zip
sudo pip3 install numpy
cd ~/opencv-4.4.0/
mkdir build
cd build
cmake -D CMAKE_BUILD_TYPE=RELEASE \
      -D CMAKE_INSTALL_PREFIX=/usr/local/ \
      -D INSTALL_PYTHON_EXAMPLES=ON \
      -D OPENCV_EXTRA_MODULES_PATH=~/opencv_contrib-4.4.0/modules \
      -D BUILD_EXAMPLES=ON
make -j4
sudo make install && sudo ldconfig
sudo reboot

This is the end, my friend

Course notes: Object detection on the Raspberry Pi

Preamble

See also

Courses

Related videos

Required files

Required packages

Notes

As a class

On the Pi

Leave a comment Cancel reply

Preamble

See also

Courses

Related videos

Required files

Required packages

Notes

As a class

On the Pi

Share this:

Related

Leave a comment Cancel reply