mirror of
https://github.com/ImageMagick/ImageMagick.git
synced 2026-05-31 11:18:42 +02:00
Removed broken OpenCL ConvolveImage implementation.
This commit is contained in:
@@ -1176,231 +1176,6 @@ OPENCL_ENDIF()
|
||||
|
||||
}
|
||||
)
|
||||
|
||||
/*
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% %
|
||||
% %
|
||||
% %
|
||||
% C o n v o l v e %
|
||||
% %
|
||||
% %
|
||||
% %
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
*/
|
||||
|
||||
STRINGIFY(
|
||||
__kernel
|
||||
void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
|
||||
const unsigned int imageWidth, const unsigned int imageHeight,
|
||||
__constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
|
||||
const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
|
||||
|
||||
int2 blockID;
|
||||
blockID.x = get_global_id(0) / get_local_size(0);
|
||||
blockID.y = get_global_id(1) / get_local_size(1);
|
||||
|
||||
// image area processed by this workgroup
|
||||
int2 imageAreaOrg;
|
||||
imageAreaOrg.x = blockID.x * get_local_size(0);
|
||||
imageAreaOrg.y = blockID.y * get_local_size(1);
|
||||
|
||||
int2 midFilterDimen;
|
||||
midFilterDimen.x = (filterWidth-1)/2;
|
||||
midFilterDimen.y = (filterHeight-1)/2;
|
||||
|
||||
int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
|
||||
|
||||
// dimension of the local cache
|
||||
int2 cachedAreaDimen;
|
||||
cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
|
||||
cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
|
||||
|
||||
// cache the pixels accessed by this workgroup in local memory
|
||||
int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
|
||||
int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
|
||||
int groupSize = get_local_size(0) * get_local_size(1);
|
||||
for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
|
||||
|
||||
int2 cachedAreaIndex;
|
||||
cachedAreaIndex.x = i % cachedAreaDimen.x;
|
||||
cachedAreaIndex.y = i / cachedAreaDimen.x;
|
||||
|
||||
int2 imagePixelIndex;
|
||||
imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
|
||||
|
||||
// only support EdgeVirtualPixelMethod through ClampToCanvas
|
||||
// TODO: implement other virtual pixel method
|
||||
imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
|
||||
imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
|
||||
|
||||
pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
|
||||
}
|
||||
|
||||
// cache the filter
|
||||
for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
|
||||
filterCache[i] = filter[i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
int2 imageIndex;
|
||||
imageIndex.x = imageAreaOrg.x + get_local_id(0);
|
||||
imageIndex.y = imageAreaOrg.y + get_local_id(1);
|
||||
|
||||
// if out-of-range, stops here and quit
|
||||
if (imageIndex.x >= imageWidth
|
||||
|| imageIndex.y >= imageHeight) {
|
||||
return;
|
||||
}
|
||||
|
||||
int filterIndex = 0;
|
||||
float4 sum = (float4)0.0f;
|
||||
float gamma = 0.0f;
|
||||
if (((channel & AlphaChannel) == 0) || (matte == 0)) {
|
||||
int cacheIndexY = get_local_id(1);
|
||||
for (int j = 0; j < filterHeight; j++) {
|
||||
int cacheIndexX = get_local_id(0);
|
||||
for (int i = 0; i < filterWidth; i++) {
|
||||
CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
|
||||
float f = filterCache[filterIndex];
|
||||
|
||||
sum.x += f * p.x;
|
||||
sum.y += f * p.y;
|
||||
sum.z += f * p.z;
|
||||
sum.w += f * p.w;
|
||||
|
||||
gamma += f;
|
||||
filterIndex++;
|
||||
cacheIndexX++;
|
||||
}
|
||||
cacheIndexY++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int cacheIndexY = get_local_id(1);
|
||||
for (int j = 0; j < filterHeight; j++) {
|
||||
int cacheIndexX = get_local_id(0);
|
||||
for (int i = 0; i < filterWidth; i++) {
|
||||
|
||||
CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
|
||||
float alpha = QuantumScale*p.w;
|
||||
float f = filterCache[filterIndex];
|
||||
float g = alpha * f;
|
||||
|
||||
sum.x += g*p.x;
|
||||
sum.y += g*p.y;
|
||||
sum.z += g*p.z;
|
||||
sum.w += f*p.w;
|
||||
|
||||
gamma += g;
|
||||
filterIndex++;
|
||||
cacheIndexX++;
|
||||
}
|
||||
cacheIndexY++;
|
||||
}
|
||||
gamma = PerceptibleReciprocal(gamma);
|
||||
sum.xyz = gamma*sum.xyz;
|
||||
}
|
||||
CLPixelType outputPixel;
|
||||
outputPixel.x = ClampToQuantum(sum.x);
|
||||
outputPixel.y = ClampToQuantum(sum.y);
|
||||
outputPixel.z = ClampToQuantum(sum.z);
|
||||
outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
|
||||
|
||||
output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
|
||||
}
|
||||
)
|
||||
|
||||
STRINGIFY(
|
||||
__kernel
|
||||
void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
|
||||
const uint imageWidth, const uint imageHeight,
|
||||
__constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
|
||||
const uint matte, const ChannelType channel) {
|
||||
|
||||
int2 imageIndex;
|
||||
imageIndex.x = get_global_id(0);
|
||||
imageIndex.y = get_global_id(1);
|
||||
|
||||
/*
|
||||
unsigned int imageWidth = get_global_size(0);
|
||||
unsigned int imageHeight = get_global_size(1);
|
||||
*/
|
||||
if (imageIndex.x >= imageWidth
|
||||
|| imageIndex.y >= imageHeight)
|
||||
return;
|
||||
|
||||
int2 midFilterDimen;
|
||||
midFilterDimen.x = (filterWidth-1)/2;
|
||||
midFilterDimen.y = (filterHeight-1)/2;
|
||||
|
||||
int filterIndex = 0;
|
||||
float4 sum = (float4)0.0f;
|
||||
float gamma = 0.0f;
|
||||
if (((channel & AlphaChannel) == 0) || (matte == 0)) {
|
||||
for (int j = 0; j < filterHeight; j++) {
|
||||
int2 inputPixelIndex;
|
||||
inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
|
||||
inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
|
||||
for (int i = 0; i < filterWidth; i++) {
|
||||
inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
|
||||
inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
|
||||
|
||||
CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
|
||||
float f = filter[filterIndex];
|
||||
|
||||
sum.x += f * p.x;
|
||||
sum.y += f * p.y;
|
||||
sum.z += f * p.z;
|
||||
sum.w += f * p.w;
|
||||
|
||||
gamma += f;
|
||||
|
||||
filterIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
for (int j = 0; j < filterHeight; j++) {
|
||||
int2 inputPixelIndex;
|
||||
inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
|
||||
inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
|
||||
for (int i = 0; i < filterWidth; i++) {
|
||||
inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
|
||||
inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
|
||||
|
||||
CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
|
||||
float alpha = QuantumScale*p.w;
|
||||
float f = filter[filterIndex];
|
||||
float g = alpha * f;
|
||||
|
||||
sum.x += g*p.x;
|
||||
sum.y += g*p.y;
|
||||
sum.z += g*p.z;
|
||||
sum.w += f*p.w;
|
||||
|
||||
gamma += g;
|
||||
|
||||
|
||||
filterIndex++;
|
||||
}
|
||||
}
|
||||
gamma = PerceptibleReciprocal(gamma);
|
||||
sum.xyz = gamma*sum.xyz;
|
||||
}
|
||||
|
||||
CLPixelType outputPixel;
|
||||
outputPixel.x = ClampToQuantum(sum.x);
|
||||
outputPixel.y = ClampToQuantum(sum.y);
|
||||
outputPixel.z = ClampToQuantum(sum.z);
|
||||
outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
|
||||
|
||||
output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
|
||||
}
|
||||
)
|
||||
|
||||
/*
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% %
|
||||
|
||||
@@ -33,7 +33,6 @@ extern "C" {
|
||||
|
||||
extern MagickPrivate Image
|
||||
*AccelerateBlurImage(const Image *,const double,const double,ExceptionInfo *),
|
||||
*AccelerateConvolveImage(const Image *,const KernelInfo *,ExceptionInfo *),
|
||||
*AccelerateDespeckleImage(const Image *,ExceptionInfo *),
|
||||
*AccelerateLocalContrastImage(const Image *,const double,const double,
|
||||
ExceptionInfo *),
|
||||
|
||||
@@ -1288,366 +1288,6 @@ MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
|
||||
return(status);
|
||||
}
|
||||
|
||||
/*
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% %
|
||||
% %
|
||||
% %
|
||||
% A c c e l e r a t e C o n v o l v e I m a g e %
|
||||
% %
|
||||
% %
|
||||
% %
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
*/
|
||||
|
||||
static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
|
||||
const KernelInfo *kernel,ExceptionInfo *exception)
|
||||
{
|
||||
CacheView
|
||||
*filteredImage_view,
|
||||
*image_view;
|
||||
|
||||
cl_command_queue
|
||||
queue;
|
||||
|
||||
cl_event
|
||||
event;
|
||||
|
||||
cl_kernel
|
||||
clkernel;
|
||||
|
||||
cl_int
|
||||
clStatus;
|
||||
|
||||
cl_mem
|
||||
convolutionKernel,
|
||||
filteredImageBuffer,
|
||||
imageBuffer;
|
||||
|
||||
cl_mem_flags
|
||||
mem_flags;
|
||||
|
||||
const void
|
||||
*inputPixels;
|
||||
|
||||
float
|
||||
*kernelBufferPtr;
|
||||
|
||||
Image
|
||||
*filteredImage;
|
||||
|
||||
MagickBooleanType
|
||||
outputReady;
|
||||
|
||||
MagickCLDevice
|
||||
device;
|
||||
|
||||
MagickSizeType
|
||||
length;
|
||||
|
||||
size_t
|
||||
global_work_size[3],
|
||||
localGroupSize[3],
|
||||
localMemoryRequirement;
|
||||
|
||||
unsigned
|
||||
kernelSize;
|
||||
|
||||
unsigned int
|
||||
filterHeight,
|
||||
filterWidth,
|
||||
i,
|
||||
imageHeight,
|
||||
imageWidth,
|
||||
matte;
|
||||
|
||||
void
|
||||
*filteredPixels,
|
||||
*hostPtr;
|
||||
|
||||
/* initialize all CL objects to NULL */
|
||||
imageBuffer = NULL;
|
||||
filteredImageBuffer = NULL;
|
||||
convolutionKernel = NULL;
|
||||
clkernel = NULL;
|
||||
queue = NULL;
|
||||
|
||||
filteredImage = NULL;
|
||||
filteredImage_view = NULL;
|
||||
outputReady = MagickFalse;
|
||||
|
||||
device = RequestOpenCLDevice(clEnv);
|
||||
|
||||
image_view=AcquireAuthenticCacheView(image,exception);
|
||||
inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
|
||||
if (inputPixels == (const void *) NULL)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Create and initialize OpenCL buffers. */
|
||||
|
||||
/* If the host pointer is aligned to the size of CLPixelPacket,
|
||||
then use the host buffer directly from the GPU; otherwise,
|
||||
create a buffer on the GPU and copy the data over */
|
||||
if (ALIGNED(inputPixels,CLPixelPacket))
|
||||
{
|
||||
mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
/* create a CL buffer from image pixel buffer */
|
||||
length = image->columns * image->rows;
|
||||
imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
filteredImage = CloneImage(image,0,0,MagickTrue,exception);
|
||||
assert(filteredImage != NULL);
|
||||
if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
|
||||
filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
|
||||
if (filteredPixels == (void *) NULL)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ALIGNED(filteredPixels,CLPixelPacket))
|
||||
{
|
||||
mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
|
||||
hostPtr = filteredPixels;
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flags = CL_MEM_WRITE_ONLY;
|
||||
hostPtr = NULL;
|
||||
}
|
||||
/* create a CL buffer from image pixel buffer */
|
||||
length = image->columns * image->rows;
|
||||
filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
kernelSize = (unsigned int) (kernel->width * kernel->height);
|
||||
convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
queue = AcquireOpenCLCommandQueue(device);
|
||||
|
||||
kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
|
||||
, 0, NULL, NULL, &clStatus);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
|
||||
goto cleanup;
|
||||
}
|
||||
for (i = 0; i < kernelSize; i++)
|
||||
{
|
||||
kernelBufferPtr[i] = (float) kernel->values[i];
|
||||
}
|
||||
clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Compute the local memory requirement for a 16x16 workgroup.
|
||||
If it's larger than 16k, reduce the workgroup size to 8x8 */
|
||||
localGroupSize[0] = 16;
|
||||
localGroupSize[1] = 16;
|
||||
localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
|
||||
+ kernel->width*kernel->height*sizeof(float);
|
||||
|
||||
if (localMemoryRequirement > device->local_memory_size)
|
||||
{
|
||||
localGroupSize[0] = 8;
|
||||
localGroupSize[1] = 8;
|
||||
localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
|
||||
+ kernel->width*kernel->height*sizeof(float);
|
||||
}
|
||||
if (localMemoryRequirement <= device->local_memory_size)
|
||||
{
|
||||
/* get the OpenCL kernel */
|
||||
clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
|
||||
if (clkernel == NULL)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* set the kernel arguments */
|
||||
i = 0;
|
||||
clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
|
||||
imageWidth = (unsigned int) image->columns;
|
||||
imageHeight = (unsigned int) image->rows;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
|
||||
filterWidth = (unsigned int) kernel->width;
|
||||
filterHeight = (unsigned int) kernel->height;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
|
||||
matte = (image->alpha_trait > CopyPixelTrait)?1:0;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* pad the global size to a multiple of the local work size dimension */
|
||||
global_work_size[0] = ((image->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
|
||||
global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
|
||||
|
||||
/* launch the kernel */
|
||||
clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
RecordProfileData(device,clkernel,event);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* get the OpenCL kernel */
|
||||
clkernel = AcquireOpenCLKernel(device,"Convolve");
|
||||
if (clkernel == NULL)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* set the kernel arguments */
|
||||
i = 0;
|
||||
clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
|
||||
imageWidth = (unsigned int) image->columns;
|
||||
imageHeight = (unsigned int) image->rows;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
|
||||
filterWidth = (unsigned int) kernel->width;
|
||||
filterHeight = (unsigned int) kernel->height;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
|
||||
matte = (image->alpha_trait > CopyPixelTrait)?1:0;
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
|
||||
clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
localGroupSize[0] = 8;
|
||||
localGroupSize[1] = 8;
|
||||
global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
|
||||
global_work_size[1] = (image->rows + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
|
||||
clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
|
||||
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
RecordProfileData(device,clkernel,event);
|
||||
|
||||
if (ALIGNED(filteredPixels,CLPixelPacket))
|
||||
{
|
||||
length = image->columns * image->rows;
|
||||
clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
|
||||
}
|
||||
else
|
||||
{
|
||||
length = image->columns * image->rows;
|
||||
clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
|
||||
}
|
||||
if (clStatus != CL_SUCCESS)
|
||||
{
|
||||
(void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
|
||||
|
||||
cleanup:
|
||||
|
||||
image_view=DestroyCacheView(image_view);
|
||||
if (filteredImage_view != NULL)
|
||||
filteredImage_view=DestroyCacheView(filteredImage_view);
|
||||
if (imageBuffer != NULL)
|
||||
clEnv->library->clReleaseMemObject(imageBuffer);
|
||||
if (filteredImageBuffer != NULL)
|
||||
clEnv->library->clReleaseMemObject(filteredImageBuffer);
|
||||
if (convolutionKernel != NULL)
|
||||
clEnv->library->clReleaseMemObject(convolutionKernel);
|
||||
if (clkernel != NULL)
|
||||
ReleaseOpenCLKernel(clkernel);
|
||||
if (queue != NULL)
|
||||
ReleaseOpenCLCommandQueue(device,queue);
|
||||
if (device != NULL)
|
||||
ReleaseOpenCLDevice(device);
|
||||
if (outputReady == MagickFalse)
|
||||
{
|
||||
if (filteredImage != NULL)
|
||||
{
|
||||
DestroyImage(filteredImage);
|
||||
filteredImage = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return(filteredImage);
|
||||
}
|
||||
|
||||
MagickPrivate Image *AccelerateConvolveImage(const Image *image,
|
||||
const KernelInfo *kernel,ExceptionInfo *exception)
|
||||
{
|
||||
/* Temporary disabled due to access violation
|
||||
|
||||
Image
|
||||
*filteredImage;
|
||||
|
||||
assert(image != NULL);
|
||||
assert(kernel != (KernelInfo *) NULL);
|
||||
assert(exception != (ExceptionInfo *) NULL);
|
||||
if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
|
||||
(checkOpenCLEnvironment(exception) == MagickFalse))
|
||||
return((Image *) NULL);
|
||||
|
||||
filteredImage=ComputeConvolveImage(image,kernel,exception);
|
||||
return(filteredImage);
|
||||
*/
|
||||
magick_unreferenced(image);
|
||||
magick_unreferenced(kernel);
|
||||
magick_unreferenced(exception);
|
||||
return((Image *)NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% %
|
||||
|
||||
@@ -1177,12 +1177,6 @@ MagickExport Image *ConvolveImage(const Image *image,
|
||||
Image
|
||||
*convolve_image;
|
||||
|
||||
#if defined(MAGICKCORE_OPENCL_SUPPORT)
|
||||
convolve_image=AccelerateConvolveImage(image,kernel_info,exception);
|
||||
if (convolve_image != (Image *) NULL)
|
||||
return(convolve_image);
|
||||
#endif
|
||||
|
||||
convolve_image=MorphologyImage(image,ConvolveMorphology,1,kernel_info,
|
||||
exception);
|
||||
return(convolve_image);
|
||||
|
||||
Reference in New Issue
Block a user