Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b){ int cn = i1.channels(); const float C1 = 6.5025f, C2 = 58.5225f; /***************************** INITS **********************************/ b.gI1.upload(i1); b.gI2.upload(i2); gpu::Stream stream; stream.enqueueConvert(b.gI1, b.t1, CV_32F); stream.enqueueConvert(b.gI2, b.t2, CV_32F); gpu::split(b.t1, b.vI1, stream); gpu::split(b.t2, b.vI2, stream); Scalar mssim; for( int i = 0; i < b.gI1.channels(); ++i ) { gpu::multiply(b.vI2, b.vI2, b.I2_2, stream); // I2^2 gpu::multiply(b.vI1, b.vI1, b.I1_2, stream); // I1^2 gpu::multiply(b.vI1, b.vI2, b.I1_I2, stream); // I1 * I2 //gpu::GaussianBlur(b.vI1, b.mu1, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream); //gpu::GaussianBlur(b.vI2, b.mu2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream); gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream); gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream); gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream); //gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream); //gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, stream); //b.sigma1_2 -= b.mu1_2; - This would result in an extra data transfer operation //gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream); //gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, stream); //b.sigma2_2 -= b.mu2_2; //gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), 1.5, 0, BORDER_DEFAULT, -1, stream); //gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, stream); //b.sigma12 -= b.mu1_mu2; //here too it would be an extra data transfer due to call of operator*(Scalar, Mat) gpu::multiply(b.mu1_mu2, 2, b.t1, stream); //b.t1 = 2 * b.mu1_mu2 + C1; //gpu::add(b.t1, C1, b.t1, stream); gpu::multiply(b.sigma12, 2, b.t2, stream); //b.t2 = 2 * b.sigma12 + C2; //gpu::add(b.t2, C2, b.t2, stream); gpu::multiply(b.t1, b.t2, b.t3, stream); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2)) //gpu::add(b.mu1_2, b.mu2_2, b.t1, stream); //gpu::add(b.t1, C1, b.t1, stream); //gpu::add(b.sigma1_2, b.sigma2_2, b.t2, stream); //gpu::add(b.t2, C2, b.t2, stream); gpu::multiply(b.t1, b.t2, b.t1, stream); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2)) gpu::divide(b.t3, b.t1, b.ssim_map, stream); // ssim_map = t3./t1; stream.waitForCompletion(); Scalar s = gpu::sum(b.ssim_map, b.buf); mssim.val = s.val[0] / (b.ssim_map.rows * b.ssim_map.cols); } return mssim; }两幅一样的图片,对比结果:
|