OpenCL 使用OpenGL共享的缓存对象介绍了OpenCL与OpenGL共享顶点缓存对象的方法。本节,我们来谈谈OpenGL与OpenCL共享纹理对象的方法。我们先来看一下从OpenGL的纹理对象来创建OpenCL的图像存储器对象的函数接口:
cl_mem clCreateFromGLTexture(cl_context context,
cl_mem_flags flags,
GLenum texture_target, GLint miplevel,
GLuint texture,
cl_int *errcode_ret)
这个函数中的第2个参数f lags只能取CL_MEM_READ_ONLY、CL_MEM_WRITE_ONLY或CL_MEM_READ_WRITE的其中之一。
第3个参数texture_target表示使用的是哪种类型的纹理。由于种类太多,大家可以看Khronos官方文档。我们一般使用GL_TEXTURE_2D,表示一个二维纹理。当然,这个参数必须与设置纹理时所使用的目标纹理的类型一致。也就是我们在调用glBindTexture等接口时所使用的目标纹理。
第4个参数miplevel指明纹理的细节度。0表示最大细节,即原图像本身。
第5个参数texture就是我们用glGenTextures所生成的纹理对象。我们将通过这个纹理来创建相应的图像类型的OpenCL存储器对象。
如果创建成功,那么此函数将直接返回有效的存储器对象。如果创建失败,则返回空,并且会在errcode_ret参数所指向的变量中给出错误码。
下面我们将针对OS X系统举一个实际的例子来看看如何从OpenGL纹理对象创建OpenCL图像存储器对象,然后在OpenCL的内核程序中对图像进行处理直接交给OpenGL的片段着色器所使用的。在以下代码例子中,我们将对给定的原始纹理图像转为黑白色图像。这个变换过程就是在OpenCL的内核程序中完成的。
#import "MyGLView.h"
#define GL_DO_NOT_WARN_IF_MULTI_GL_VERSION_HEADERS_INCLUDED
//这里必须注意!〈gl3.h>头文件必须被包含并取代〈gl.h>,
//否则VAO接口会调用不正常,从而无法正确显示图形!
#import 〈OpenGL/gl3.h>
#ifdef _APPLE_
#include 〈OpenCL/opencl.h>
#else
#include 〈CL/cl.h>
#endif
@interface MyGLView()
{
@private
GLuint mProgram;
GLuint mVAO, mVBOVertices, mVBOTextureCoords;
GLuint mTexName;
GLint mSamplerLocation;
int mImageWidth, mImageHeight;
NSInteger mTag;
}
@end
@implementation MyGLView
static GLuint CompileShader(GLenum type, const char *filename)
{
FILE *fp = fopen(filename, "r");
if(fp == NULL)
{
printf("File %s cannot be opened!", filename);
return 0;
}
fseek(fp, 0, SEEK_END);
const size_t length = ftell(fp);
fseek(fp, 0, SEEK_SET);
GLchar *souceBuffer = malloc(length);
fread(souceBuffer, 1, length, fp);
fclose(fp);
const GLchar *source = souceBuffer;
GLuint shader = glCreateShader(type);
glShaderSource(shader, 1, &source, (GLint[]){
(int)length });
glCompileShader(shader);
free(souceBuffer);
GLint logLength;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0)
{
GLchar *log = malloc(logLength);
glGetShaderInfoLog(shader, logLength, &logLength, log);
printf("Shader compile log:\n%s\n", log);
free(log);
}
GLint status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if(status == 0)
{
glDeleteShader(shader);
return 0;
}
return shader;
}
static bool LinkProgram(GLuint prog)
{
glLinkProgram(prog);
GLint logLength;
glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength);
glGetProgramInfoLog(prog, logLength, &logLength, log);
printf("Program link log:\n%s\n", log);
free(log);
}
GLint status;
glGetProgramiv(prog, GL_LINK_STATUS, &status);
if (status == 0)
return false;
}
return true;
static bool ValidateProgram(GLuint prog)
{
GLint logLength, status;
glValidateProgram(prog);
glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0)
{
GLchar *log = (GLchar *)malloc(logLength);
}
glGetProgramInfoLog(prog, logLength, &logLength, log);
printf("Program validate log:\n%s\n", log);
free(log);
}
glGetProgramiv(prog, GL_VALIDATE_STATUS, &status);
if (status == 0)
return false;
return true;
}
- (BOOL)loadShaders
{
//创建着色器程序对象
mProgram = glCreateProgram();
//在做程序连接之前绑定顶点着色器中属性的位置
glBindAttribLocation(mProgram, 0, "inPos");
glBindAttribLocation(mProgram, 1, "inTexCoords");
//创建并编译顶点着色器
NSString *vertShaderPathname = [[NSBundle mainBundle]
pathForResource:@"shader"
ofType:@"vsh"];
GLuint vertShader = CompileShader(
GL_VERTEX_SHADER, [vertShaderPathname UTF8String]);
if(vertShader == 0)
{
NSLog(@"Failed to compile vertex shader");
return FALSE;
}
//创建并编译片段着色器
NSString *fragShaderPathname = [[NSBundle mainBundle]
pathForResource:@"shader"
ofType:@"fsh"];
GLuint fragShader = CompileShader(
GL_FRAGMENT_SHADER,
[fragShaderPathname UTF8String]);
if(fragShader == 0)
{
NSLog(@"Failed to compile fragment shader");
return FALSE;
}
//将顶点着色器添加到程序中
glAttachShader(mProgram, vertShader);
//将片段着色器添加到程序中
glAttachShader(mProgram, fragShader);
//连接程序
if (!LinkProgram(mProgram))
{
NSLog(@"Failed to link program: %d", mProgram);
return FALSE;
}
//获取片段着色器中采样器uniform变量的位置
mSamplerLocation = glGetUniformLocation(mProgram, "texSampler");
//这里顶点着色器对象以及片段着色器对象已经没用了,将它们释放
if(vertShader != 0)
glDeleteShader(vertShader);
if(fragShader != 0)
glDeleteShader(fragShader);
//校验程序
return ValidateProgram(mProgram);
}
- (id)initWithFrame:(NSRect)frameRect
{
self = [super initWithFrame:frameRect];
const NSOpenGLPixelFormatAttribute attrs[] =
{
//可选项,表示启用双缓冲
NSOpenGLPFADoubleBuffer,
//必须使用这个属性以指定我们将使用OpenGL Core Profile
NSOpenGLPFAOpenGLProfile,
//指定使用OpenGL3.2 Core Profile
NSOpenGLProfileVersion3_2Core,
//这里使用多重采样反走样处理
NSOpenGLPFAMultisample,
NSOpenGLPFASampleBuffers, (NSOpenGLPixelFormatAttribute)1,
//采用4个样本对应一个像素
NSOpenGLPFASamples, (NSOpenGLPixelFormatAttribute)4,
//end
0
};
NSOpenGLPixelFormat *pf = [[NSOpenGLPixelFormat alloc]
initWithAttributes:attrs];
if (pf == nil)
{
NSLog(@"No OpenGL pixel format");
return nil;
}
NSOpenGLContext *context = [[NSOpenGLContext alloc]
initWithFormat:pf shareContext:nil];
[self setPixelFormat:pf];
[pf release];
[self setOpenGLContext:context];
[context release];
return self;
}
- (void)dealloc
{
NSLog(@"MyGLView deallocated!");
[super dealloc];
}
- (void)destroyBuffers
{
}
//释放程序对象
if(mProgram != 0)
glDeleteProgram(mProgram);
//释放VAO对象
if(mVAO != 0)
glDeleteVertexArrays(1, &mVAO);
//释放顶点与纹理顶点VBO
if(mVBOVertices != 0)
glDeleteBuffers(1, &mVBOVertices);
if(mVBOTextureCoords != 0)
glDeleteBuffers(1, &mVBOTextureCoords);
//清除纹理对象
if(mTexName != 0)
glDeleteTextures(1, &mTexName);
//清除上下文
[[self openGLContext] clearDrawable];
[self clearGLContext];
{
- (void)setTag:(NSInteger)tag
}
mTag = tag;
- (NSInteger)tag
{
}
return mTag;
- (GLubyte *)getImageData:(CGSize *)pImageSize fromPath:
(NSString *)path
{
NSUInteger width, height;
NSURL *url = nil;
CGImageSourceRef src;
CGImageRef image;
CGContextRef context = nil;
CGColorSpaceRef colorSpace;
url = [NSURL fileURLWithPath: path];
src = CGImageSourceCreateWithURL((CFURLRef)url, NULL);
if (!src)
{
NSLog(@"No image");
return NULL;
}
image = CGImageSourceCreateImageAtIndex(src, 0, NULL);
CFRelease(src);
width = CGImageGetWidth(image);
height = CGImageGetHeight(image);
GLubyte *imageData = (GLubyte *)malloc(width * height * 4);
colorSpace = CGColorSpaceCreateDeviceRGB();
context = CGBitmapContextCreate(imageData, width, height, 8,
4 * width, colorSpace,
kCGImageAlphaPremultipliedFirst | kCGBitmapByteOrder32Host);
CGColorSpaceRelease(colorSpace);
CGContextTranslateCTM(context, 0.0, height);
CGContextScaleCTM(context, 1.0, -1.0);
CGContextDrawImage(context, CGRectMake(0, 0, width, height),
image);
CGContextRelease(context);
CGImageRelease(image);
*pImageSize = CGSizeMake(width, height);
return imageData;
}
static GLfloat sVertexCoords[] =
{
//左上顶点
-0.8f, 0.6f, 0.0f, 1.0f,
//左下顶点
-0.8f, -0.6f, 0.0f, 1.0f,
//右上顶点
0.8f, 0.6f, 0.0f, 1.0f,
//右下顶点
0.8f, -0.6f, 0.0f, 1.0f
};
static GLfloat sTextureCoords[] =
{
//左上顶点
0.0f, 1.0f,
//左下顶点
0.0f, 0.0f,
//右上顶点
1.0f, 1.0f,
//右下顶点
1.0f, 0.0f
};
- (void)prepareOpenGL
{
[[self openGLContext] makeCurrentContext];
//用垂直刷新率来同步缓存交换
GLint swapInt = 1;
[[self openGLContext] setValues:&swapInt forParameter:
NSOpenGLCPSwapInterval];
//在OpenGL3.2 Core Profile中,必须使用VAO(顶点数组对象)
glGenVertexArrays(1, &mVAO);
glBindVertexArray(mVAO);
//设置顶点VBO
glGenBuffers(1, &mVBOVertices);
glBindBuffer(GL_ARRAY_BUFFER, mVBOVertices);
//将顶点坐标数据拷贝到mVBOVertices对象的缓存中
glBufferData(GL_ARRAY_BUFFER, sizeof(sVertexCoords),
sVertexCoords, GL_STATIC_DRAW);
//将顶点VBO绑定到属性0
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0,
(const GLvoid *)0);
//设置纹理坐标VBO
glGenBuffers(1, &mVBOTextureCoords);
glBindBuffer(GL_ARRAY_BUFFER, mVBOTextureCoords);
//将sTextureCoords中的数据拷贝到mVBOTextureCoords对象的缓存中
glBufferData(GL_ARRAY_BUFFER, sizeof(sTextureCoords),
sTextureCoords, GL_STATIC_DRAW);
//将纹理坐标VBO绑定到属性1
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 0,
(const GLvoid *)0);
//解绑
glBindBuffer(GL_ARRAY_BUFFER, 0);
//设置纹理
glPixelStorei(GL_UNPACK_ALIGNMENT, 8);
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &mTexName);
glBindTexture(GL_TEXTURE_2D, mTexName);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER,
GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
GL_LINEAR);
CGSize imageSize = CGSizeZero;
GLubyte *imageData = [self getImageData:&imageSize fromPath:
[[NSBundle mainBundle]
pathForResource:@"image" ofType:@"png"]];
mImageWidth = imageSize.width;
mImageHeight = imageSize.height;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, mImageWidth,
mImageHeight, 0, GL_BGRA,
GL_UNSIGNED_INT_8_8_8_8_REV,
imageData);
//加载着色器并构建OpenGL程序
if(![self loadShaders])
return;
glUseProgram(mProgram);
//将采样器对象映射到GL_TEXTURE0,使得它对0号纹理单元进行采样
glUniform1i(mSamplerLocation, 0);
glViewport(0, 0, self.frame.size.width, self.frame.size.height);
glClearColor(0.5f, 0.5f, 0.5f, 1.0f);
}
- (void)doOpenCLComputing
{
//做OpenCL初始化
cl_platform_id oclPlatform = NULL;
cl_device_id oclDevice = NULL;
//要被创建的OpenCL上下文对象
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
cl_program oclProgram = NULL;
cl_kernel kernel = NULL;
//与OpenGL共享的图像存储器对象,用于输入
cl_mem imageMemSrc = NULL;
//与OpenGL共享的图像存储器对象,用于输出
cl_mem imageMemDst = NULL;
//访问图像对象的采样器
cl_sampler sampler = NULL;
#ifdef _APPLE_
CGLContextObj cgl_context = CGLGetCurrentContext();
CGLShareGroupObj sharegroup = CGLGetShareGroup(cgl_context);
gcl_gl_set_sharegroup(sharegroup);
#endif
do
{
//获得当前OpenCL平台
cl_int status = clGetPlatformIDs(1, &oclPlatform, NULL);
if(status != CL_SUCCESS)
{
NSLog(@"OpenCL platform get failed!");
break;
}
//获得当前GPU设备。严格地来说,
//此GPU设备也应该是OpenGL所使用的设备
status = clGetDeviceIDs(oclPlatform, CL_DEVICE_TYPE_GPU, 1,
&oclDevice, NULL);
if(status != CL_SUCCESS)
{
NSLog(@"OpenCL GPU cannot be found!");
break;
}
//设置用于创建OpenCL上下文的属性列表
cl_context_properties properties[] =
{
#ifdef WIN32
CL_GL_CONTEXT_KHR ,
(cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR ,
(cl_context_properties)wglGetCurrentDC(),
#endif
#ifdef _linux_
CL_GL_CONTEXT_KHR ,
(cl_context_properties)glXGetCurrentContext(),
CL_GLX_DISPLAY_KHR ,
(cl_context_properties)glXGetCurrentDisplay(),
#endif
#ifdef _APPLE_
CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
(cl_context_properties)sharegroup,
#endif
0
};
//创建OpenCL上下文
context = clCreateContext(properties, 1, &oclDevice,
NULL, NULL, NULL);
//创建命令队列
commandQueue = clCreateCommandQueue(context, oclDevice,
0, NULL);
//编译内核程序
NSString *kernelPath = [[NSBundle mainBundle]
pathForResource:
@"compute" ofType:@"ocl"];
const char *aSource = [[NSString stringWithContentsOfFile:
kernelPath
encoding:NSUTF8StringEncoding error:nil] UTF8String];
size_t kernelLength = strlen(aSource);
oclProgram = clCreateProgramWithSource(context, 1, &aSource,
&kernelLength, NULL);
if(oclProgram == NULL)
{
NSLog(@"OpenCL program create failed!");
break;
}
//构建程序
status = clBuildProgram(oclProgram, 1, &oclDevice,
NULL, NULL, NULL);
if(status != CL_SUCCESS)
{
size_t len = 64 * 1024;
char *buffer = (char *)malloc(64 * 1024);
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(oclProgram, oclDevice,
CL_PROGRAM_BUILD_LOG,
len, buffer, &len);
printf("%s\n", buffer);
free(buffer);
break;
}
//使用只读方式来创建与GL纹理对象共享的输入存储器对象
imageMemSrc = clCreateFromGLTexture(context,
CL_MEM_READ_ONLY,
GL_TEXTURE_2D, 0,
mTexName, NULL);
//使用只写方式来创建与GL纹理对象共享的输出存储器对象
imageMemDst = clCreateFromGLTexture(context,
CL_MEM_WRITE_ONLY,
GL_TEXTURE_2D, 0,
mTexName, NULL);
//创建采样器对象
sampler = clCreateSampler(context, CL_FALSE,
CL_ADDRESS_CLAMP_TO_EDGE,
CL_FILTER_LINEAR, NULL);
//创建内核对象
kernel = clCreateKernel(oclProgram, "ImageProcessing",
NULL);
//设置内核参数
status = clSetKernelArg(kernel, 0, sizeof(imageMemDst),
&imageMemDst);
status |= clSetKernelArg(kernel, 1, sizeof(imageMemSrc),
&imageMemSrc);
status |= clSetKernelArg(kernel, 2, sizeof(cl_sampler),
(void *)&sampler);
if(status != CL_SUCCESS)
{
NSLog(@"Kernel parameters pass failed!");
break;
}
//这里我们总共使用mImageWidth * mImageHeight个工作项,
//每个工作项来处理一个像素
size_t global_work_size[] = { mImageWidth, mImageHeight };
//然后设置一个工作组中的工作项个数
//要注意,x维度与y维度两个数相乘
//不能大于工作组中最多可容纳的工作项的个数
size_t local_work_size[] = { mImageWidth / 64,
mImageHeight / 64 };
//运行内核程序
status |= clEnqueueNDRangeKernel(commandQueue, kernel,
2, NULL,global_work_size,
local_work_size, 0, NULL,
NULL);
//这里直接用clFinish进行同步,确保顶点坐标以及相应的颜色值全都设置好
clFinish(commandQueue);
if(status != CL_SUCCESS)
{
NSLog(@"OpenCL kernel run error!");
}
}
while(false);
//释放OpenCL各种对象
if(imageMemDst != NULL)
clReleaseMemObject(imageMemDst);
if(imageMemSrc != NULL)
clReleaseMemObject(imageMemSrc);
if(sampler != NULL)
clReleaseSampler(sampler);
if(kernel != NULL)
clReleaseKernel(kernel);
if(oclProgram != NULL)
clReleaseProgram(oclProgram);
if(commandQueue != NULL)
clReleaseCommandQueue(commandQueue);
if(context != NULL)
clReleaseContext(context);
{
- (void)drawRect:(NSRect)dirtyRect
}
[self doOpenCLComputing];
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glFlush();
[[self openGLContext] flushBuffer];
@end
由于通过OS X的CoreGraphics接口所生成的位图图像在格式上是BGRA模式,所以在调用glTexImage2D时,我们对源图像格式设置的是GL_BGRA,并且像素颜色的数据类型是GL_UNSIGNED_INT_8_8_8_8_REV。而目标纹理的像素颜色格式则为传统的RGBA模式。
我们在创建采样器对象时使用的是线性插值模式,并且纹理坐标不做规格化,这样有利于我们用整型数据来作为纹理的坐标值,可以少一些额外的计算。另外,这里需要注意的是,在OpenCL中,如果是对一个图像存储器对象进行操作,那么该对象要么是只读的,要么是只写的。也就是说,我们不能同时对一个图像存储器对象设置为可读可写。或者说,对同一个图像存储器对象同时读写是非法的。所以,在本例中,我们定义了两个图像存储器对象,这两个对象都创建于同一个纹理对象。一个作为输入,一个作为输出。对指向同一个纹理对象的两个不同的图像存储器对象进行同时读写是没有问题的。
下面我们来看看OpenCL内核代码:
//在OpenCL 2.0之前,作为内核函数参数的图像对象只能用
//_read_only或_write_only来修饰,不能用read_write进行修饰
_kernel void ImageProcessing(_write_only image2d_t imageDst,
_read_only image2d_t imageSrc,
sampler_t sampler)
{
int x = get_global_id(0);
int y = get_global_id(1);
float4 transVector = (float4)(0.299f, 0.587f, 0.114f, 0.0f);
float4 color = read_imagef(imageSrc, sampler, (int2)(x, y));
float alpha = color.w;
float yComp = dot(color, transVector);
color = (float4)(yComp, yComp, yComp, 1.0f);
write_imagef(imageDst, (int2)(x, y), color);
}
这段代码非常简单,就是将读入的原始图像像素转为其相关的黑白颜色,然后写到相应图像位置。这里要提一点的是,在OpenCL 2.0之前,图像对象类型image2d_t前面只能用_write_only或_read_only来显示修饰,并且我们显式地加上此限定符以至于在调用read_imagef或write_imagef时不会引发歧义。因为在调用read_imagef或write_imagef时, OpenCL编译器会对图像对象类型进行检查。read_imagef的图像对象只能是只读类型或读写类型;write_imagef的图像对象只能是只写类型或读写类型。而在OpenCL 2.0中可以使用read_write属性以非采样形式直接对image2d_t对象进行访问。
下面给出OpenGL顶点着色器代码,也比较简单:
//在OpenGL3.2 Core Profile中,版本号必须显式地给出
#version 150 core
in vec4 inPos;
in vec2 inTexCoords;
out vec2 textureCoords;
/** 模型视图变换矩阵 *
* [ 1000
0100
0010
xyz1
*]
*/
/** 正交投影变换矩阵 *
* [ 2/(r-l) 0 0 0
0 2/(t-b) 0 0
0 0 -2/(f-n) 0
-(r+l)/(r-l) -(t+b)/(t-b) -(f+n)/(f-n) 1
*]
*/
void main()
{
//glTranslate(0.0, 0.0, -1.0, 1.0)
mat4 translateMatrix = mat4(1.0, 0.0, 0.0, 0.0, //column 0
0.0, 1.0, 0.0, 0.0, //column 1
0.0, 0.0, 1.0, -1.0, //column 2
0.0, 0.0, 0.0, 1.0 //column 3
);
//glOrtho(-1.0, 1.0, -1.0, 1.0, 1.0, 3.0)
mat4 projectionMatrix = mat4(1.0, 0.0, 0.0, 0.0, //column 0
0.0, 1.0, 0.0, 0.0, //column 1
0.0, 0.0, -1.0, -2.0, //column 2
0.0, 0.0, 0.0, 1.0 //colimn 3
);
gl_Position = inPos * (translateMatrix * projectionMatrix);
textureCoords = inTexCoords;
}
而对于片段着色器程序而言就更简单了。只要直接把读到的纹理像素数据作为输出即可。
//在OpenGL3.2 Core Profile中,版本号必须显式地给出
#version 150 core
in vec2 textureCoords;
out vec4 myOutput;
uniform sampler2D texSampler;
void main()
{
myOutput = texture(texSampler, textureCoords);
}