Skip to content

PyKinect2 Integration wih OpenCV for YOLOv3 throws an error of : Number of input channels should be multiple of 3 but got 4 in function  #104

@Yudh0B

Description

@Yudh0B

Here is the full code of my projects (big thanks to KonstantinosAng for mapper functions and guanming001 for the basic_2d examples)

from asyncore import read
from pykinect2.PyKinectV2 import *
from pykinect2 import PyKinectV2
from pykinect2 import PyKinectRuntime
import numpy as np
import cv2
import mapper
import time

net = cv2.dnn.readNet("yolov3-tiny_training_last.weights", "yolov3-tiny_testing (1).cfg")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)

classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3)) #Generate Random Color
font = cv2.FONT_HERSHEY_SIMPLEX
timeframe = time.time()
frame_id = 0
maskx=0
masky=0
no_maskx=0
no_masky=0

kinect = PyKinectRuntime.PyKinectRuntime(PyKinectV2.FrameSourceTypes_Color |
PyKinectV2.FrameSourceTypes_Depth)
depth_width, depth_height = kinect.depth_frame_desc.Width, kinect.depth_frame_desc.Height # Default: 512, 424
color_width, color_height = kinect.color_frame_desc.Width, kinect.color_frame_desc.Height # Default: 1920, 1080

while True:

##############################
### Get images from kinect ###
##############################
if kinect.has_new_color_frame() and \
   kinect.has_new_depth_frame() :


    depth_frame = kinect.get_last_depth_frame()
    color_frame = kinect.get_last_color_frame()

    depth_img = depth_frame.reshape(((depth_height, depth_width))).astype(np.uint16)
    color_img = color_frame.reshape(((color_height, color_width, 4))).astype(np.uint8)

    color_img_resize = cv2.resize(color_img, (0,0), fx=0.5, fy=0.5) # Resize (1080, 1920, 4) into half (540, 960, 4)
    depth_colormap   = cv2.applyColorMap(cv2.convertScaleAbs(depth_img, alpha=255/1500), cv2.COLORMAP_JET) # Scale to display from 0 mm to 1500 mm

    frame = color_img_resize
    depth = depth_colormap
    frame_id += 1
    color_height, color_width, channels = frame.shape

    #Detecting Object
    blob = cv2.dnn.blobFromImage(frame, 1/255, (320, 320), (0, 0, 0), True, crop=False)
    
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0: #Confidence Level -> Accuracy
                # Object detected
                center_x = int(detection[0] * color_width)
                center_y = int(detection[1] * color_height)
                w = int(detection[2] * color_width)
                h = int(detection[3] * color_height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)

    
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = confidences[i]
            # color = colors[i]
            color = (255,255,255)
            if label == "mask" :
                no_maskx = 0
                no_masky = 0
                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                cv2.putText(frame, label, (x, y+30), font, 1, color, 2)
                cv2.putText(frame, label + " " + str(round(confidence, 2)), (x, y+30), font, 1, color, 2)
                center = ((x+w/2)-(color_width/2), (y+h/2)-(color_height/2))
                maskx = (x+w/2)
                masky = (y+h/2)
                print (maskx,masky)
            if label == "no_mask":
                maskx = 0
                masky = 0
                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                cv2.putText(frame, label, (x, y+30), font, 1, color, 2)
                cv2.putText(frame, label + " " + str(round(confidence, 2)), (x, y+30), font, 1, color, 2)
                center = ((x+w/2)-(color_width/2), (y+h/2)-(color_height/2))
                no_maskx = (x+w/2)
                no_masky = (y+h/2)
                #print (no_maskx,no_masky)
                mapped_no_maskx = no_maskx/0.4
                mapped_no_masky = no_masky/0.4
                depth_x, depth_y = mapper.color_point_2_depth_point(kinect, _DepthSpacePoint, kinect._depth_frame_data, [mapped_no_maskx, mapped_no_masky]) # pixel        
    elapsed_time = time.time() - timeframe
    fps = frame_id / elapsed_time
    cv2.putText(frame, str(round(fps,2)), (10, 50), font, 2, (255, 255, 255), 2) #FPS Value
    cv2.putText(frame, "FPS", (220, 50), font, 2, (255, 255, 255), 2) #FPS Label
    cv2.imshow("Image", frame)
    cv2.circle(color_img_resize, (960,540), radius=10, color=(0, 0, 0), thickness=2)
    cv2.circle(depth_colormap, (960,540), radius=10, color=(0, 0, 0), thickness=2)
    cv2.putText(depth, str(round(fps,2)), (10, 50), font, 2, (255, 255, 255), 2) #FPS Value
    cv2.putText(depth, "FPS", (220, 50), font, 2, (255, 255, 255), 2) #FPS Label
    cv2.imshow("dotted Object", depth)
    cv2.imshow('color', frame)
    cv2.imshow('depth', depth)
    depth_x, depth_y = mapper.color_point_2_depth_point(kinect, _DepthSpacePoint, kinect._depth_frame_data, [960,540]) # pixel
    if (int(depth_y ) * 512 + int(depth_x)) < 512 * 424:
        depth_z = float(depth_frame[int(depth_y ) * 512 + int(depth_y )]) # mm
    else:
        # If it exceeds return the last value to catch overflow
        depth_z = float(depth_frame[int((512 * 424) - 1)]) # mm
    print (depth_z)
key = cv2.waitKey(1)
if key == 27:
    break

kinect.close()
cv2.destroyAllWindows

here's the error for the code, i have done YOLOv3 with a webcam before and the data types seems to be the same as the variable color_img_resize, kinda confused here bcs apparently color_img_resize doesnt have the same input channels as the one with the webcam as an input

[ERROR:0@2.014] global D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\net_impl.cpp (1171) cv::dnn::dnn4_v20220524::Net::Impl::getLayerShapesRecursively OPENCV/DNN: [Convolution]:(conv_0): getMemoryShapes() throws exception. inputs=1 outputs=0/1 blobs=1
[ERROR:0@2.032] global D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\net_impl.cpp (1174) cv::dnn::dnn4_v20220524::Net::Impl::getLayerShapesRecursively input[0] = [ 1 4 320 320 ]
[ERROR:0@2.054] global D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\net_impl.cpp (1182) cv::dnn::dnn4_v20220524::Net::Impl::getLayerShapesRecursively blobs[0] = CV_32FC1 [ 16 3 3 3 ]
[ERROR:0@2.069] global D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\net_impl.cpp (1184) cv::dnn::dnn4_v20220524::Net::Impl::getLayerShapesRecursively Exception message: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:405: error: (-2:Unspecified error) Number of input Traceback (most recent call last):
File "cv2--kinect.py", line 62, in
outs = net.forward(output_layers)
cv2.error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:405: error: (-2:Unspecified error) Number of input channels should be multiple of 3 but got 4 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions