V4L2.cpp
and V4L2.h
This is actually the minimum required API for connecting to the webcam. During the work on the Python wrapper, I found out that the video4linux2 devices can be accessed in three ways: READ, MMAP and STREAM, but only the MMAP method works with my webcams. As it turned out, other examples of programs that did not work for me, used the READ method.main_v4l2.cpp
#include "V4L2.h" #include <cstring> #include <iostream> using namespace std; extern "C" { // Specify the video device here V4L2 v4l2("/dev/video0"); unsigned char *rgbFrame; float clamp(float num) { if (num < 0) num = 0; if (num > 255) num = 255; return num; } // Convert between YUV and RGB colorspaces void yuv2rgb(unsigned char y, unsigned char u, unsigned char v, unsigned char &r, unsigned char &g, unsigned char &b) { float C = y - 16; float D = u - 128; float E = v - 128; r = (char)clamp(C + ( 1.402 * E )) ; g = (char)clamp(C - ( 0.344136 * D + 0.714136 * E )) ; b = (char)clamp(C + ( 1.772 * D )) ; } unsigned char *getFrame() { unsigned char *frame = (unsigned char *)v4l2.getFrame(); int i = 0, k = 0; unsigned char Y, U, V, R, G, B; for (i=0;i<640*480*2;i+=4) { Y = frame[i]; U = frame[i+1]; V = frame[i+3]; yuv2rgb(Y, U, V, R, G, B); rgbFrame[k] = R; k++; rgbFrame[k] = G; k++; rgbFrame[k] = B; k++; Y = frame[i+2]; yuv2rgb(Y, U, V, R, G, B); rgbFrame[k] = R; k++; rgbFrame[k] = G; k++; rgbFrame[k] = B; k++; } return rgbFrame; } void stopCapture() { v4l2.freeBuffers(); } // Call this before using the device void openDevice() { // set format struct v4l2_format fmt; CLEAR(fmt); fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; // Adjust resolution fmt.fmt.pix.width = 640; fmt.fmt.pix.height = 480; fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV; if (!v4l2.set(fmt)) { fprintf(stderr, "device does not support used settings.\n"); } v4l2.initBuffers(); v4l2.startCapture(); rgbFrame = (unsigned char *)malloc(640*480*3); } }
getFrame
request we read the frame from the webcam, convert it to RGB format and give the link to the frame to the one who requested it. I also provide a Makefile
to quickly compile this library, if you need it.v4l2.py
from ctypes import * import Image import time lib = cdll.LoadLibrary("linux/libv4l2.so") class VideoDevice(object): def __init__(self): lib.openDevice() lib.getFrame.restype = c_void_p def getImage(self): buf = lib.getFrame() frame = (c_char * (640*480*3)).from_address(buf) img = Image.frombuffer('RGB', (640, 480), frame, 'raw', 'RGB', 0, 1) return img, time.time()
frame = (c_char * (640*480*3)).from_address(buf)
getFrame()
as c_char_p
, ctypes will interpret the data as a string with a zero ending, that is, as soon as zero is encountered in the byte stream, the reading will stop. The same design allows you to clearly define how many bytes should be counted. In our case, this is always a fixed value - 640 * 480 * 3.windows
folder with the name directx.py
.lkdemo.py
, supplied with OpenCV, as a basis, and again simplified it for our needs, modifying it in a class:tracker.py
class Tracker(object): "Simple object tracking class" def __init__(self): self.grey = None self.point = None self.WIN_SIZE = 10 def target(self, x, y): "Tell which object to track" # It needs to be an array for the optical flow calculation self.point = [(x, y)] def takeImage(self, img): "Loads and processes next frame" # Convert it to IPL Image frame = cv.CreateImageHeader(img.size, 8, 3) cv.SetData(frame, img.tostring()) if self.grey is None: # create the images we need self.grey = cv.CreateImage (cv.GetSize (frame), 8, 1) self.prev_grey = cv.CreateImage (cv.GetSize (frame), 8, 1) self.pyramid = cv.CreateImage (cv.GetSize (frame), 8, 1) self.prev_pyramid = cv.CreateImage (cv.GetSize (frame), 8, 1) cv.CvtColor (frame, self.grey, cv.CV_BGR2GRAY) if self.point: # calculate the optical flow new_point, status, something = cv.CalcOpticalFlowPyrLK ( self.prev_grey, self.grey, self.prev_pyramid, self.pyramid, self.point, (self.WIN_SIZE, self.WIN_SIZE), 3, (cv.CV_TERMCRIT_ITER|cv.CV_TERMCRIT_EPS, 20, 0.03), 0) # If the point is still alive if status[0]: self.point = new_point else: self.point = None # swapping self.prev_grey, self.grey = self.grey, self.prev_grey self.prev_pyramid, self.pyramid = self.pyramid, self.prev_pyramid
target
method. Then we give it frame by frame using the takeImage
method, it in turn converts the image frame into a format that it understands, creates the image necessary for the algorithm, converts the frame from color to grayscale and then feeds all of the CalcOpticalFlowPyrLK
functions that CalcOpticalFlowPyrLK
optical stream pyramidal method of Lucas-Canada. At the output of this function, we get new coordinates of the point we are following. If the point is lost, then status[0]
will be zero. Optical flow can be calculated not only for one point. Run the lkdemo.py
program with the webcam and see how well it handles a lot of points.cv.CvtColor(frame, frame, cv.CV_BGR2RGB)
, but most of the tracking algorithms are absolutely still confused with your color components or not, our example generally uses only black and white images. Therefore, this line can not be included in the code.distance_measure.py
.main.py
from distance_measure import Calculator from webcam import WebCam from tracker import Tracker from Tkinter import * import ImageTk as PILImageTk import time class GUIFramework(Frame): "This is the GUI" def __init__(self,master=None): Frame.__init__(self,master) self.grid(padx=10,pady=10) self.distanceLabel = Label(self, text='Distance =') self.distanceLabel.grid(row=0, column=0) self.speedLabel = Label(self, text='Speed =') self.speedLabel.grid(row=0, column=1) self.imageLabel = None self.cameraImage = None self.webcam = WebCam() # M = 510, L = 0.5, W = 640 self.dist_calculator = Calculator(500, 0.5, 640, 1) self.tracker = Tracker() self.after(100, self.drawImage) def updateMeasure(self, x): (distance, speed) = self.dist_calculator.calculate(x, time.time()) self.distanceLabel.config(text = 'Distance = '+str(distance)) # If you want get km/h instead of m/s just multiply # m/s value by 3.6 #speed *= 3.6 self.speedLabel.config(text = 'Speed = '+str(speed) + ' m/s') def imgClicked(self, event): """ On left mouse button click calculate distance and tell tracker which object to track """ self.updateMeasure(event.x) self.tracker.target(event.x, event.y) def drawImage(self): "Load and display the image" img, timestamp = self.webcam.getImage() # Pass image to tracker self.tracker.takeImage(img) if self.tracker.point: pt = self.tracker.point[0] self.updateMeasure(pt[0]) # Draw rectangle around tracked point img.paste((128, 255, 128), (int(pt[0])-2, int(pt[1])-2, int(pt[0])+2, int(pt[1])+2)) self.cameraImage = PILImageTk.PhotoImage(img) if not self.imageLabel: self.imageLabel = Label(self, image = self.cameraImage) self.imageLabel.bind("<Button-1>", self.imgClicked) self.imageLabel.grid(row=1, column=0, columnspan=2) else: self.imageLabel.config(image = self.cameraImage) # 30 FPS refresh rate self.after(1000/30, self.drawImage) if __name__ == '__main__': guiFrame = GUIFramework() guiFrame.mainloop()
grid
to position other widgets in it: two text fields and one image. With Tkinter, I didn’t even have to separately create streams for downloading images from a webcam, because there is an after
method that allows you to perform the specified function after a certain period of time. You can update the text and image with the Label
method config
. Very simple! Handling a mouse click event with the bind
method is translated to the imgClicked
method.self.webcam.getImage
by the self.webcam.getImage
function. The webcam module just simply loads the appropriate module for working with the webcam, depending on which operating system the program is running under.python main.py
Source: https://habr.com/ru/post/115661/
All Articles