东莞网推广网站建设,html底部友情链接代码,网站网站开发违法吗,网址推广软件在使用opencv中的remap函数时#xff0c;发现运行时间太长了#xff0c;如果使用视频流进行重映射时根本不能实时#xff0c;因此只能加速
1.使用opencv里的cv::cuda::remap函数
cv::cuda::remap函数头文件是#include opencv2/cudawarping.hpp#xff0c;编译ope…在使用opencv中的remap函数时发现运行时间太长了如果使用视频流进行重映射时根本不能实时因此只能加速
1.使用opencv里的cv::cuda::remap函数
cv::cuda::remap函数头文件是#include opencv2/cudawarping.hpp编译opencv时需要用cuda进行编译 //1.重映射矩阵转成cuda处理的数据格式//map_x,map_y是重映射表数据类型是CV_32FC1cv::cuda::GpuMat m_mapx ::cv::cuda::GpuMat(map_x);cv::cuda::GpuMat m_mapy ::cv::cuda::GpuMat(map_y);//2.原图像转成cuda处理的数据格式cv::cuda::GpuMat src(img);//3.计算结果cv::cuda::GpuMat gpuMat2;cv::cuda::remap(src, gpuMat2, m_mapx, m_mapy, cv::INTER_LINEAR);//4.结果转成Matcv::Mat dstimage; gpuMat2.download(dstimage);示例
#include iostream
#include opencv2/opencv.hpp
#include opencv2/cudawarping.hppusing namespace cv;int main(int argc, char** argv)
{Mat img imread(image.jpg, IMREAD_COLOR);if (img.empty()){std::cout Could not open the input image std::endl;exit(1);}int in_width img.cols;int in_height img.rows;Mat map_x(in_height, in_width, CV_32FC1);Mat map_y(in_height, in_width, CV_32FC1);// 创建重映射映射表for (int y 0; y in_height; y) {for (int x 0; x in_width; x) {map_x.atfloat(y, x) (x 20) / (float)in_width * in_width;map_y.atfloat(y, x) y / (float)in_height * in_height;}}cv::cuda::GpuMat m_mapx ::cv::cuda::GpuMat(map_x);cv::cuda::GpuMat m_mapy ::cv::cuda::GpuMat(map_y);cv::cuda::GpuMat gpuMat1(img);double time0 static_castdouble(cv::getTickCount());//记录起始时间cv::cuda::GpuMat gpuMat2;cv::cuda::remap(gpuMat1, gpuMat2, m_mapx, m_mapy, cv::INTER_LINEAR);cv::Mat GPUimage;gpuMat2.download(GPUimage); time0 ((double)cv::getTickCount() - time0) / cv::getTickFrequency();std::cout GPU运行remap函数的时间为 time0 * 1000 ms std::endl;double time1 static_castdouble(cv::getTickCount());//记录起始时间cv::Mat CPUimage;cv::remap(img, CPUimage, map_x, map_y, cv::INTER_LINEAR);time1 ((double)cv::getTickCount() - time1) / cv::getTickFrequency();std::cout CPU运行remap函数的时间为 time1 * 1000 ms std::endl;return 0;
}经过实际运行在我电脑上速度快了15倍左右
2.在cuda上重写remap函数
这是在csdn上看到的一篇文章上写的代码在我的实际应用中变换的结果是错误的由于我实际的应用时我的图像输入尺寸和输出尺寸是不相同的因此运行错误但是在输入输出是相同尺寸时是正确的因为使用了cv::cuda::remap,我也没修改这个程序。 建立.cu文件可以生成静态库使用也可以不生成使用
#include cuda_runtime.h
#include device_launch_parameters.h
#include cuda_runtime_api.h
#include stdio.h
#include math.h__global__ void remap_kernel(const unsigned char* src, int src_width, int src_height,unsigned char* dst, int dst_width, int dst_height,const float* map_x, const float* map_y)
{int x threadIdx.x blockIdx.x * blockDim.x;int y threadIdx.y blockIdx.y * blockDim.y;if (x dst_width y dst_height) {int index (y * dst_width x) * 3;float src_x map_x[index / 3];float src_y map_y[index / 3];if (src_x 0 src_x src_width - 1 src_y 0 src_y src_height - 1) {int x0 floorf(src_x);int y0 floorf(src_y);int x1 x0 1;int y1 y0 1;float tx src_x - x0;float ty src_y - y0;int src_index00 (y0 * src_width x0) * 3;int src_index10 (y0 * src_width x1) * 3;int src_index01 (y1 * src_width x0) * 3;int src_index11 (y1 * src_width x1) * 3;for (int i 0; i 3; i) {float value00 src[src_index00 i];float value10 src[src_index10 i];float value01 src[src_index01 i];float value11 src[src_index11 i];float value0 value00 * (1.0f - tx) value10 * tx;float value1 value01 * (1.0f - tx) value11 * tx;float value value0 * (1.0f - ty) value1 * ty;dst[index i] static_castunsigned char(value);}}}
}extern C void remap_gpu(const unsigned char* in, int in_width, int in_height,unsigned char* out, int out_width, int out_height,const float* map_x, const float* map_y) {unsigned char* d_in, * d_out;float* d_map_x, * d_map_y;cudaMalloc((void**)d_in, in_width * in_height * 3);cudaMalloc((void**)d_out, out_width * out_height * 3);cudaMalloc((void**)d_map_x, out_width * out_height * sizeof(float));cudaMalloc((void**)d_map_y, out_width * out_height * sizeof(float));cudaMemcpy(d_in, in, in_width * in_height * 3, cudaMemcpyHostToDevice);cudaMemcpy(d_map_x, map_x, out_width * out_height * sizeof(float), cudaMemcpyHostToDevice);cudaMemcpy(d_map_y, map_y, out_width * out_height * sizeof(float), cudaMemcpyHostToDevice);dim3 block(32, 32, 1);dim3 grid((out_width block.x - 1) / block.x, (out_height block.y - 1) / block.y, 1);remap_kernel grid, block (d_in, in_width, in_height, d_out, out_width, out_height, d_map_x, d_map_y);cudaMemcpy(out, d_out, out_width * out_height * 3, cudaMemcpyDeviceToHost);cudaFree(d_in);cudaFree(d_out);cudaFree(d_map_x);cudaFree(d_map_y);
}重新新建一个.cpp文件
#include iostream
#include opencv2/opencv.hppusing namespace cv;extern C void remap_gpu(const unsigned char* in, int in_width, int in_height,unsigned char* out, int out_width, int out_height,const float* map_x, const float* map_y);int main(int argc, char** argv)
{cv::Mat img imread(image.jpg, IMREAD_COLOR);if (img.empty()) {std::cout Could not open the input image std::endl;exit(1);}int in_width img.cols;int in_height img.rows;cv::Mat map_x(in_height, in_width, CV_32FC1);cv::Mat map_y(in_height, in_width, CV_32FC1);// 创建重映射映射表for (int y 0; y in_height; y) {for (int x 0; x in_width; x) {map_x.atfloat(y, x) (x 20) / (float)in_width * in_width;map_y.atfloat(y, x) y / (float)in_height * in_height;}}double time0 static_castdouble(cv::getTickCount());//记录起始时间cv::Mat CPUimage;remap(img, CPUimage, map_x, map_y, cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));time0 ((double)cv::getTickCount() - time0) / cv::getTickFrequency();std::cout CPU 运行remap函数时间为 time0 * 1000 ms std::endl;int out_width in_width;int out_height in_height; unsigned char* out (unsigned char*)malloc(out_width * out_height * 3);double time1 static_castdouble(cv::getTickCount());//记录起始时间unsigned char* in (unsigned char*)img.data;remap_gpu(in, in_width, in_height, out, out_width, out_height, (float*)map_x.data, (float*)map_y.data);cv::Mat GPUimage(out_height, out_width, CV_8UC3, out);time1 ((double)cv::getTickCount() - time1) / cv::getTickFrequency();std::cout GPU 运行remap函数时间为 time1 * 1000 ms std::endl;free(out);return 0;
}只运行一帧时cpu上运行的remap较快运行多帧时GPU上运行的remap函数要比CPU上运行快5倍左右
总结
如果自己编译的opencv带cuda最好还是使用cv::cuda::remap函数耗时较少