图像对象的操作,可能让刚接触OpenCL的读者有点无从下手,至少笔者刚接触OpenCL编程时有这样的感觉。在本节中以图像处理中常见算法之一的图像旋转作为例子来具体演示图像对象的操作。
基于最近邻域取值的方法如下图所示。点(x0,y0)与坐标原心连线角度为a,距离坐标原心的距离为r。从(x0,y0)旋转c角度到新位置(x1,y1),旋转前:
x0=rcos(a);y0=rsin(a)
则旋转后(下图)的值:
x1=Rcos(c+a)=rcos(c)cos(a)-rsin(c)sin(a)=x0cos(c)-y0sin(c)
y1=Rsin(c+a)=rsin(c)cos(a)+rcos(c)sin(a)=x0sin(a)+y0cos(a)
内核用OpenCL图像对象实现,代码如下:
image_rotate.cl文件内容:
//采样器设置
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_FILTER_NEAREST |
CLK_ADDRESS_CLAMP;
/************************
*srcImg:输入原始图像对象
*dstImg:旋转后的输出图像对象
*angle:旋转角度(单位为弧度)
************************/
__kernel void image_rotate(__read_only image2d_t srcImg,
__write_only image2d_t dstImg,
float angle
)
{
//根据图像对象获得图像长和宽
int width = get_image_width(srcImg);
int height = get_image_height(srcImg);
const int x = get_global_id(0);
const int y = get_global_id(1);
float sinma = sin(angle);
float cosma = cos(angle);
//计算旋转中心点
int hwidth = width / 2;
int hheight = height / 2;
int xt = x - hwidth;
int yt = y - hheight;
//计算旋转后坐标
float2 readCoord;
readCoord.x = (cosma * xt - sinma * yt) + hwidth;
readCoord.y = (sinma * xt + cosma * yt) + hheight;
//根据旋转后坐标读取原始图像元素值
float4 value = read_imagef(srcImg, sampler, readCoord);
write_imagef(dstImg, (int2)(x, y), value);
}
在内核代码中,sampler_t sampler用于描述如何访问图像。CLK_NORMALIZED_COORDS_FALSE指明使用非规格化坐标。CL_ADDRESS_CLAMP指定边界之外的值,对于RGB通道设置为0,对于A通道返回0或1(根据图像格式),这也就解释了为什么选择之后其他区域为黑色。CLK_FILTER_NEAREST返回最接近坐标位置的像素值,是线性插值后的像素值。
在主机端读取图像,笔者采用的是FreeImage图像处理库。主机端主要代码如下:
/************************
*读取图像,返回图像对象
************************/
cl_mem LoadImage(cl_context context, char *fileName, int &width,
int &height)
{
FREE_IMAGE_FORMAT format = FreeImage_GetFileType(fileName, 0);
FIBITMAP *image = FreeImage_Load(format, fileName);
FIBITMAP *temp = image;
image = FreeImage_ConvertTo32Bits(image);
FreeImage_Unload(temp);
width = FreeImage_GetWidth(image);
height = FreeImage_GetHeight(image);
char *buffer = new char[width * height * 4];
memcpy(buffer, FreeImage_GetBits(image), width * height * 4);
FreeImage_Unload(image);
cl_image_format clImageFormat;
clImageFormat.image_channel_order = CL_RGBA;
clImageFormat.image_channel_data_type = CL_UNORM_INT8;
cl_image_desc clImageDesc;
memset(&clImageDesc, 0, sizeof(cl_image_desc));
clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
clImageDesc.image_width = width;
clImageDesc.image_height = height;
cl_int errNum;
cl_mem clImage;
clImage = clCreateImage(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
&clImageFormat,
&clImageDesc,
buffer, &errNum);
return clImage;
}
void GPU_Rotate(char *ImageFileName)
{
cl_context context = 0;
cl_command_queue commandQueue = 0;
cl_program program = 0;
cl_device_id device = 0;
cl_kernel kernel = 0;
cl_mem imageObjects[2] = { 0, 0 };
cl_int errNum;
cl_platform_id platform;
errNum = clGetPlatformIDs(1, &platform, NULL);
errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1,
&device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL,
&errNum);
commandQueue = clCreateCommandQueue(context, device, NULL,
&errNum);
//imageObjects[0]:原始图像;imageObjects[1]:原始图像
cl_mem imageObjects[2] = { 0, 0 };
int width, height;
imageObjects[0] = LoadImage(context, ImageFileName, width,
height);
//创建输出图像对象
cl_image_format clImageFormat;
clImageFormat.image_channel_order = CL_RGBA;
clImageFormat.image_channel_data_type = CL_UNORM_INT8;
cl_image_desc desc;
memset(&desc, '\0', sizeof(desc));
desc.image_height = height;
desc.image_width = width;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageObjects[1] = clCreateImage(context,
CL_MEM_WRITE_ONLY,
&clImageFormat,
&desc,
NULL,
&errNum);
program = CreateProgram(context, device, "image_rotate.cl");
kernel = clCreateKernel(program, "image_rotate", &errNum);
errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem),
&imageObjects[0]);
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem),
&imageObjects[1]);
float angle = 45 * PI / 180.0f;
errNum |= clSetKernelArg(kernel, 2, sizeof(cl_float), &angle);
size_t globalWorkSize[2] = { width, height };
errNum = clEnqueueNDRangeKernel(commandQueue,
kernel, 2, NULL,
globalWorkSize, NULL,
0, NULL, NULL);
clFinish(commandQueue);
//拷贝旋转后的图像
char *buffer = new char [width * height * 4];
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { width, height, 1};
errNum = clEnqueueReadImage(commandQueue,
imageObjects[1], CL_TRUE,
origin, region, 0, 0, buffer,
0, NULL, NULL);
SaveImage("gpu_rotate.bmp", buffer, width, height);
Cleanup(context, commandQueue, program, kernel, imageObjects);
……
}
在函数LoadImage()中,使用FreeImage图像处理库读取图像数据,并把图像数据拷贝到新创建的图像对象中,返回这个图像对象。在GPU_Rotate函数中,创建保存旋转图像的图像对象把包含原始图像的图像对象、保存旋转图像的图像对象和旋转角度作为内核参数传入。完成计算后,从保存旋转图像的图像对象中拷贝图像数据并保存图像。主机代码省略了部分无关代码。
对于图像对象操作理解有问题的读者,笔者相信,通过阅读上述例子代码,一定可以加深他们对图像对象操作的理解。