I'm trying to implement normalized cross correlation in Halide.
The code below builds, and Halide JIT compilation doesn't throw any errors. However, Halide seems to hang after JIT compilation. No matter how many trace_* calls I put on different Funcs, only one trace ever prints (on Func output):
Begin realization normxcorr.0(0, 2028, 0, 2028)
Produce normxcorr.0(0, 2028, 0, 2028)
Any advice at all would be helpful.
This algorithm is meant to be equivalent to CV_TM_CCOEFF_NORMED in OpenCV, and normxcorr2 in MATLAB:
void normxcorr( Halide::ImageParam input,
Halide::ImageParam kernel,
Halide::Param<pixel_t> kernel_mean,
Halide::Param<pixel_t> kernel_var,
Halide::Func& output )
{
Halide::Var x, y;
Halide::RDom rk( kernel );
// reduction domain for cumulative sums
Halide::RDom ri( 1, input.width() - kernel.width() - 1,
1, input.height() - kernel.height() - 1 );
Halide::Func input_32( "input32" ),
bounded_input( "bounded_input"),
kernel_32( "kernel32" ),
knorm( "knorm" ),
conv( "conv" ),
normxcorr( "normxcorr_internal" ),
sq_sum_x( "sq_sum_x" ),
sq_sum_x_local( "sq_sum_x_local" ),
sq_sum_y( "sq_sum_y" ),
sq_sum_y_local( "sq_sum_y_local" ),
sum_x( "sum_x" ),
sum_x_local( "sum_x_local" ),
sum_y( "sum_y" ),
sum_y_local( "sum_y_local" ),
win_var( "win_var" ),
win_mean( "win_mean" );
Halide::Expr ksize = kernel.width() * kernel.height();
// accessing outside the input image always returns 0
bounded_input( x, y ) = Halide::BoundaryConditions::constant_exterior( input, 0 )( x, y );
// cast to 32-bit to make room for multiplication
input_32( x, y ) = Halide::cast<int32_t>( bounded_input( x, y ) );
kernel_32( x, y ) = Halide::cast<int32_t>( kernel( x, y ) );
// cumulative sum along each row
sum_x( x, y ) = input_32( x, y );
sum_x( ri.x, ri.y ) += sum_x( ri.x - 1, ri.y );
// sum of 1 x W strips
// (W is the width of the kernel)
sum_x_local( x, y ) = sum_x( x + kernel.width() - 1, y );
sum_x_local( x, y ) -= sum_x( x - 1, y );
// cumulative sums of the 1 x W strips along each column
sum_y( x, y ) = sum_x_local( x, y );
sum_y( ri.x, ri.y ) += sum_y( ri.x, ri.y - 1);
// sums up H strips (as above) to get the sum of an H x W rectangle
// (H is the height of the kernel)
sum_y_local( x, y ) = sum_y( x, y + kernel.height() - 1 );
sum_y_local( x, y ) -= sum_y( x, y - 1 );
// same as above, just with squared image values
sq_sum_x( x, y ) = input_32( x, y ) * input_32( x, y );
sq_sum_x( ri.x, ri.y ) += sq_sum_x( ri.x - 1, ri.y );
sq_sum_x_local( x, y ) = sq_sum_x( x + kernel.width() - 1, y );
sq_sum_x_local( x, y ) -= sq_sum_x( x - 1, y );
sq_sum_y( x, y ) = sq_sum_x_local( x, y );
sq_sum_y( ri.x, ri.y ) += sq_sum_y( ri.x, ri.y - 1);
sq_sum_y_local( x, y ) = sq_sum_y( x, y + kernel.height() - 1 );
sq_sum_y_local( x, y ) -= sq_sum_y( x, y - 1 );
// the mean value of each window
win_mean( x, y ) = sum_y_local( x, y ) / ksize;
// the variance of each window
win_var( x, y ) = sq_sum_y_local( x, y ) / ksize;
win_var( x, y) -= win_mean( x, y ) * win_mean( x, y );
// partially normalize the kernel
// (we'll divide by std. dev. at the end)
knorm( x, y ) = kernel_32( x, y ) - kernel_mean;
// convolve kernel and the input
conv( x, y ) = Halide::sum( knorm( rk.x, rk.y ) * input_32( x + rk.x, y + rk.y ) );
// calculate normxcorr, except scaled to 0 to 254 (for an 8-bit image)
normxcorr( x, y ) = conv( x, y ) * 127 / Halide::sqrt( kernel_var * win_var( x, y ) ) + 127;
// after scaling pixel values, it's safe to cast down to 8-bit
output( x, y ) = Halide::cast<pixel_t>( normxcorr( x, y ) );
}
Aucun commentaire:
Enregistrer un commentaire