/* ************************** cnv2.c Created on: June 11, 2011 Author: Michael Hirsch ************************** */ #ifndef CNV2_C #define CNV2_C #include #include #include #include #include #include #include #include "cnv2.h" #include "utils.h" cnvmat * cnv2mat(matrix * k, size sz, char cnv_type[5]){ // Allocate parameters of convolution cnvmat * cnvobj = (cnvmat*) malloc(sizeof(cnvmat)); size sfft; size sy; size sf; size sx; size offset; // Check what is f and what is x assuming size(f) < size(x) if ( (sz.m >= k->size.m) && (sz.n >= k->size.n) ) { cnvobj->id = "F"; sf = k->size; sx = sz; } else if ( (sz.m < k->size.m) && (sz.n < k->size.n) ) { cnvobj->id = "X"; sx = k->size; sf = sz; } else { fprintf(stderr, "Size missmatch!\n"); exit(EXIT_FAILURE); } // Check type of convolution if (strncmp(cnv_type, "valid", 5) == 0) { sy.m = sx.m - sf.m + 1; sy.n = sx.n - sf.n + 1; sfft.m = sx.m; sfft.n = sx.n; offset.m = sf.m - 1; offset.n = sf.n - 1; } else if (strncmp(cnv_type, "same", 4) == 0) { sy.m = sx.m; sy.n = sx.n; sfft.m = sx.m + sf.m - 1; sfft.n = sx.n + sf.n - 1; offset.m = floor(0.5 * sf.m); offset.n = floor(0.5 * sf.n); } else if (strncmp(cnv_type, "full", 4) == 0) { sy.m = sx.m + sf.m - 1; sy.n = sx.n + sf.n - 1; sfft.m = sx.m + sf.m - 1; sfft.n = sx.n + sf.n - 1; offset.m = 0; offset.n = 0; fprintf(stderr, "Not yet implemented!\n"); exit(EXIT_FAILURE); } else if (strncmp(cnv_type, "circ", 4) == 0) { sy.m = sx.m; sy.n = sx.n; sfft.m = sx.m; sfft.n = sx.m; offset.m = floor(0.5 * sf.m); offset.n = floor(0.5 * sf.n); fprintf(stderr, "Not yet implemented!\n"); exit(EXIT_FAILURE); } else { fprintf(stderr, "Not a valid convolution type!\n"); exit(EXIT_FAILURE); } // Pad f and x to be sized a power of 2 for efficient comp. of FFT // Not sure if this is particularly clever, padding the array // might take as long as for FFTW3 to compute the FFT // for the time being I keep it as it is as it's essential for CUDA // see http://software.intel.com/en-us/articles/fft-length-and-layout-advisor/ //sfft.m = pow(2, ceil( log(sfft.m) / log(2) )); //sfft.n = pow(2, ceil( log(sfft.n) / log(2) )); // Do in place padding, TODO: set offsets in switch above size offset_tmp; offset_tmp.m = 0; offset_tmp.n = 0; // add additional padding to ensure that FFT(fp) fits in mem space of fp // note that size(FFT(fp)) = (sp.n/2 + 1) * 2 (factor 2 since complex) size sp; sp.m = sfft.m; sp.n = sfft.n + 2; matrix * fp = pad(k, sp, offset_tmp); // Compute FFT, use FFT for real-valued signals // we do in-place transform hence in- and ouput pointer must be equal // need void pointer since input and output are of different type cmatrix * fft = (cmatrix*) fftw_malloc(sizeof(cmatrix)); // for out of place transforms, seems to need the same runtime // y->mat = (cmplx*) fftw_malloc(sfft.m * (sfft.n/2+1)*2 * sizeof(y->mat)); // for in place transforms void * pt_tmp = fp->mat; fft->mat = pt_tmp; const fftw_plan plan_fwd = fftw_plan_dft_r2c_2d(sfft.m, sfft.n, fp->mat, fft->mat, FFTW_ESTIMATE); fftw_execute(plan_fwd); const fftw_plan plan_bwd = fftw_plan_dft_c2r_2d(sfft.m, sfft.n, fft->mat, fp->mat, FFTW_ESTIMATE); // Save fft of padded kernel in cnvmat as well as all other relevant stuff // If possible save bwd plan also in cnvmat /* matrix * ft = (matrix*) malloc(sizeof(matrix)); ft->mat = (real*) malloc(sfft.m * 2*(sfft.n/2 + 1) * sizeof(real)); fftw_plan plan_bwd = fftw_plan_dft_c2r_2d(ft->size.m, ft->size.n, y->mat, ft->mat, FFTW_ESTIMATE); fftw_execute(plan_bwd); fftw_destroy_plan(plan_bwd); */ // Save everything in cnvmat //cnvobj->cnv_type = cnv_type; cnvobj->offset = offset; cnvobj->sx = sx; cnvobj->sy = sy; cnvobj->sf = sf; cnvobj->sz = sz; cnvobj->sfft = sfft; cnvobj->sp = sp; cnvobj->fft = fft; cnvobj->plan_fwd = plan_fwd; cnvobj->plan_bwd = plan_bwd; //printf("ID of cnvmat: %s \n", cnvobj->id); //printf("Type of cnvmat: %s \n", cnvobj->cnv_type); return cnvobj; } matrix * cnv(cnvmat * cnvobj, matrix * z){ // Input is either PSF or image, the output is the convolved image // Do padding and compute fft size offset_tmp; offset_tmp.m = 0; offset_tmp.n = 0; matrix * zp = pad(z, cnvobj->sp, offset_tmp); cmatrix * zp_fft = (cmatrix*) fftw_malloc(sizeof(cmatrix)); void * pt_tmp1 = zp->mat; zp_fft->mat = pt_tmp1; // compute Fourier transform fftw_execute_dft_r2c(cnvobj->plan_fwd, zp->mat, zp_fft->mat); // do modulation in Fourier domain in-place! // Divide by number of FFT elements which is not done by FFTW for (int i=0; i < (cnvobj->sp.m * cnvobj->sp.n/2 ); i++){ zp_fft->mat[i] *= (cnvobj->fft->mat[i] / (cnvobj->sfft.m * cnvobj->sfft.n )); } // compute inverse Fourier transform matrix * yp = (matrix*) fftw_malloc(sizeof(matrix)); void * pt_tmp2 = zp_fft->mat; yp->mat = pt_tmp2; yp->size = cnvobj->sp; fftw_execute_dft_c2r(cnvobj->plan_bwd, zp_fft->mat, yp->mat); // Do cropping matrix * y = crop(yp, cnvobj->sy, cnvobj->offset); return y; } matrix * cnvtp(cnvmat * cnvobj, matrix * z){ // Input is either PSF or image, the output is the convolved image // Do padding and compute fft matrix * zp = pad(z, cnvobj->sp, cnvobj->offset); cmatrix * zp_fft = (cmatrix*) fftw_malloc(sizeof(cmatrix)); void * pt_tmp1 = zp->mat; zp_fft->mat = pt_tmp1; // compute Fourier transform fftw_execute_dft_r2c(cnvobj->plan_fwd, zp->mat, zp_fft->mat); // do modulation in Fourier domain in-place! for (int i=0; i < (cnvobj->sp.m * cnvobj->sp.n/2 ); i++){ zp_fft->mat[i] *= (conj(cnvobj->fft->mat[i]) / (cnvobj->sfft.m*cnvobj->sfft.n )); } // compute inverse Fourier transform matrix * yp = (matrix*) fftw_malloc(sizeof(matrix)); void * pt_tmp2 = zp_fft->mat; yp->mat = pt_tmp2; yp->size = cnvobj->sp; fftw_execute_dft_c2r(cnvobj->plan_bwd, zp_fft->mat, yp->mat); // Do cropping size offset_tmp; offset_tmp.m = 0; offset_tmp.n = 0; matrix * y = crop(yp, cnvobj->sz, offset_tmp); return y; } matrix * pad(matrix * in, size sfft, size offset){ // Allocate memory for padded matrix matrix * out = (matrix*) fftw_malloc(sizeof(matrix)); // Set new size out->size = sfft; // Do copying and padding out->mat = (real*) calloc(sfft.m * sfft.n, sizeof(out->mat)); for (int i=0; i < in->size.m; i++){ for (int j=0; j < in->size.n; j++){ out->mat[IDX(i+offset.m,j+offset.n, out->size.n)] = in->mat[IDX(i,j,in->size.n)]; } } return out; } matrix * crop(matrix * in, size sfft, size offset){ // Allocate memory for padded matrix matrix * out = (matrix*) malloc(sizeof(matrix)); // Set new size out->size = sfft; // Do cropping out->mat = (real*) malloc(sfft.m * sfft.n * sizeof(out->mat)); for (int i=0; i < sfft.m; i++){ for (int j=0; j < sfft.n; j++){ out->mat[IDX(i,j,out->size.n)] = in->mat[IDX(i+offset.m,j+offset.n,in->size.n)]; } } return out; } #endif