OpenCL 使用OpenGL共享的缓存对象

我们先介绍一下OpenCL如何通过已有的OpenGL缓存对象来创建与之共享的OpenCL存储器对象。OpenCL标准提供了以下函数API:

cl_mem clCreateFromGLBuffer (cl_context context,
                                  cl_mem_flags flags,
                                  GLuint bufobj, cl_int *errcode_ret)
  • 参数context必须是一个通过从OpenGL上下文或与OpenGL关联的共享组创建得到的OpenCL上下文。
  • 参数f lags用于指明该存储器对象的读写属性,并且只能使用CL_MEM_READ_ONLY、CL_MEM_WRITE_ONLY或CL_MEM_READ_WRITE的其中一个。
  • 参数bufobj就是一个OpenGL缓存对象名。该对象不能是0,而是要通过glGenBuffers这一OpenGL函数API所获得的缓存对象。而在调用此OpenCL函数API之前,应该已经使用了glBufferData这一OpenGL函数API对该缓存对象进行初始化了。

如果相应的OpenCL存储器对象创建成功,则返回有效的cl_mem对象。否则,将返回空,并且我们可以通过errcode_ret参数来查找错误码。
由于现在市面上在Windows系统上介绍OpenCL与OpenGL交互的例子比较多,而最最缺乏的是在OS X环境下OpenCL与OpenGL的交互示例。因此,本专题将针对OS X系统来给出OpenCL与OpenGL的交互代码示例。我们这里先给出CL与GL共享缓存对象的例子。下面提供核心的代码片段。

#import "MyGLView.h"
#define GL_DO_NOT_WARN_IF_MULTI_GL_VERSION_HEADERS_INCLUDED
//这里必须注意!〈gl3.h>头文件必须被包含并取代〈gl.h>,
//否则VAO接口会调用不正常,从而无法正确显示图形!
#import 〈OpenGL/gl3.h>
#ifdef _APPLE_
#include 〈OpenCL/opencl.h>
#else
#include 〈CL/cl.h>
#endif
@interface MyGLView()
{
}
    @private
    GLuint mProgram;
    GLuint mVAO, mVBOVertices, mVBOColors;
    NSInteger mTag;
@end
@implementation MyGLView
static GLuint CompileShader(GLenum type, const char *filename)
{
    FILE *fp = fopen(filename, "r");
    if(fp == NULL)
    {
        printf("File %s cannot be opened!", filename);
        return 0;
}
    fseek(fp, 0, SEEK_END);
    const size_t length = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    GLchar *souceBuffer = malloc(length);
    fread(souceBuffer, 1, length, fp);
    fclose(fp);
    const GLchar *source = souceBuffer;
    GLuint shader = glCreateShader(type);
    glShaderSource(shader, 1, &source, (GLint[]){
                (int)length
                });
    glCompileShader(shader);
    free(souceBuffer);
    GLint logLength;
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
    if (logLength > 0)
    {
        GLchar *log = malloc(logLength);
        glGetShaderInfoLog(shader, logLength, &logLength, log);
        printf("Shader compile log:\n%s\n", log);
        free(log);
    }
    GLint status;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
    if(status == 0)
    {
        glDeleteShader(shader);
        return 0;
    }
    return shader;
}
static bool LinkProgram(GLuint prog)
{
    glLinkProgram(prog);
    GLint logLength;
    glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &logLength);
    if (logLength > 0)
    {
        GLchar *log = (GLchar *)malloc(logLength);
        glGetProgramInfoLog(prog, logLength, &logLength, log);
        printf("Program link log:\n%s\n", log);
        free(log);
    }
    GLint status;
    glGetProgramiv(prog, GL_LINK_STATUS, &status);
    if (status == 0)
        return false;
}
    return true;
static bool ValidateProgram(GLuint prog)
{
    GLint logLength, status;
    glValidateProgram(prog);
    glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &logLength);
    if (logLength > 0)
    {
        GLchar *log = (GLchar *)malloc(logLength);
        glGetProgramInfoLog(prog, logLength, &logLength, log);
        printf("Program validate log:\n%s\n", log);
        free(log);
    }
    glGetProgramiv(prog, GL_VALIDATE_STATUS, &status);
    if (status == 0)
        return false;
    return true;
- (BOOL)loadShaders
{
    //创建着色器程序对象
    mProgram = glCreateProgram();
    //在做程序连接之前绑定顶点着色器中属性的位置
    glBindAttribLocation(mProgram, 0, "inPos");
    glBindAttribLocation(mProgram, 1, "inColor");
    //创建并编译顶点着色器
    NSString *vertShaderPathname = [[NSBundle mainBundle]
pathForResource:@"shader"
                                          ofType:@"vsh"];
    GLuint vertShader = CompileShader(GL_VERTEX_SHADER,
                                            [vertShaderPathname UTF8String]);
    if(vertShader == 0)
    {
        NSLog(@"Failed to compile vertex shader");
        return NO;
    }
    //创建并编译片段着色器
    NSString *fragShaderPathname = [[NSBundle mainBundle]
                                          pathForResource:@"shader"
                                          ofType:@"fsh"];
    GLuint fragShader = CompileShader(
                                GL_FRAGMENT_SHADER,
                                [fragShaderPathname UTF8String]);
    if(fragShader == 0)
    {
        NSLog(@"Failed to compile fragment shader");
        return NO;
    }
    //将顶点着色器添加到程序中
    glAttachShader(mProgram, vertShader);
    //将片段着色器添加到程序中
    glAttachShader(mProgram, fragShader);
    //连接程序
    if (!LinkProgram(mProgram))
    {
        NSLog(@"Failed to link program: %d", mProgram);
        return NO;
    }
    //这里顶点着色器对象以及片段着色器对象已经没用了,将它们释放
if(vertShader != 0)
    glDeleteShader(vertShader);
    if(fragShader != 0)
        glDeleteShader(fragShader);
    //校验程序
    return ValidateProgram(mProgram);
}
- (id)initWithFrame:(NSRect)frameRect
{
    self = [super initWithFrame:frameRect];
    const NSOpenGLPixelFormatAttribute attrs[] =
    {
        //可选项,表示开启双缓冲
        NSOpenGLPFADoubleBuffer,
        //必须使用这个属性以指定我们将使用OpenGL Core Profile
        NSOpenGLPFAOpenGLProfile,
        //指定使用OpenGL3.2 Core Profile
        NSOpenGLProfileVersion3_2Core,
        //这里使用多重采样反走样处理
        NSOpenGLPFAMultisample,
        NSOpenGLPFASampleBuffers, (NSOpenGLPixelFormatAttribute)1,
        //采用4个样本对应一个像素
        NSOpenGLPFASamples, (NSOpenGLPixelFormatAttribute)4,
        //end
        0
    };
    NSOpenGLPixelFormat *pf = [[NSOpenGLPixelFormat alloc]
                                    initWithAttributes:attrs];
    if (pf == nil)
    {
        NSLog(@"No OpenGL pixel format");
        return nil;
    }
    NSOpenGLContext *context = [[NSOpenGLContext alloc]
                                      initWithFormat:pf shareContext:nil];
                                      [self setPixelFormat:pf];
                                      [pf release];
    [self setOpenGLContext:context];
    [context release];
    return self;
{
- (void)dealloc
}
NSLog(@"MyGLView deallocated!");
    [super dealloc];
{
- (void)destroyBuffers
}
    //释放程序对象
    if(mProgram != 0)
        glDeleteProgram(mProgram);
    //释放VAO对象
    if(mVAO != 0)
        glDeleteVertexArrays(1, &mVAO);
    //释放顶点与颜色VBO
    if(mVBOVertices != 0)
        glDeleteBuffers(1, &mVBOVertices);
    if(mVBOColors != 0)
        glDeleteBuffers(1, &mVBOColors);
    //清除上下文
    [[self openGLContext] clearDrawable];
    [self clearGLContext];
{
- (void)setTag:(NSInteger)tag
}
    mTag = tag;
{
- (NSInteger)tag
}
    return mTag;
}
- (void)prepareOpenGL
{
    [[self openGLContext] makeCurrentContext];
    //用垂直刷新率来同步缓存交换
    GLint swapInt = 1;
    [[self openGLContext] setValues:&swapInt forParameter:
      NSOpenGLCPSwapInterval];
    //在OpenGL 3.2 Core Profile中,必须使用VAO(顶点数组对象)
    glGenVertexArrays(1, &mVAO);
    glBindVertexArray(mVAO);
    //这里要绘制一个圆形,因此需要362个顶点,
    //每个顶点分配4个分量(分别为x, y, z坐标与w)
    //最后扩充到512个顶点,以优化OpenCL的数据处理
    const size_t dataLength = 512 * 4 * sizeof(GLfloat);
    //设置顶点VBO
    glGenBuffers(1, &mVBOVertices);
    glBindBuffer(GL_ARRAY_BUFFER, mVBOVertices);
    //初始化顶点VBO,这里仅分配空间,而不传递任何数据
    glBufferData(GL_ARRAY_BUFFER, dataLength, NULL, GL_STATIC_DRAW);
    GLenum errCode = glGetError();
    if(errCode != GL_NO_ERROR)
        NSLog(@"Buffer data vertices error!");
//将顶点VBO绑定到属性0
    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0,
                              (const GLvoid *)0);
    //设置颜色VBO
    glGenBuffers(1, &mVBOColors);
    glBindBuffer(GL_ARRAY_BUFFER, mVBOColors);
    //初始化颜色VBO,这里仅分配空间,不传递任何数据
    glBufferData(GL_ARRAY_BUFFER, dataLength, NULL, GL_STATIC_DRAW);
//将颜色VBO绑定到属性1
    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0,
                              (const GLvoid *)0);
    //解绑
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    //加载着色器并构建OpenGL程序
    if(![self loadShaders])
         return;
    glUseProgram(mProgram);
    glViewport(0, 0, self.frame.size.width, self.frame.size.height);
    glClearColor(0.5f, 0.5f, 0.5f, 1.0f);
}
- (void)doOpenCLComputing
{
    /** 做OpenCL初始化 */
cl_platform_id oclPlatform = NULL;
    cl_device_id oclDevice = NULL;
    //要被创建的OpenCL上下文对象
    cl_context context = NULL;
    cl_command_queue commandQueue = NULL;
    cl_program oclProgram = NULL;
    cl_kernel kernel = NULL;
    //要被创建的与GL共享的顶点缓存对象
    cl_mem memObjVertices = NULL;
    //要被创建的与GL共享的颜色缓存对象
    cl_mem memObjColors = NULL;
#ifdef _APPLE_
    CGLContextObj cgl_context = CGLGetCurrentContext();
    CGLShareGroupObj sharegroup = CGLGetShareGroup(cgl_context);
    gcl_gl_set_sharegroup(sharegroup);
#endif
    do
    {
        //获得当前OpenCL平台
        cl_int status = clGetPlatformIDs(1, &oclPlatform, NULL);
        if(status != CL_SUCCESS)
        {
              NSLog(@"OpenCL platform get failed!");
              break;
        }
        //获得当前GPU设备。严格地来说,
        //此GPU设备也应该是OpenGL所使用的设备
        status = clGetDeviceIDs(oclPlatform, CL_DEVICE_TYPE_GPU, 1,
                                      &oclDevice, NULL);
        if(status != CL_SUCCESS)
        {
              NSLog(@"OpenCL GPU cannot be found!");
              break;
        }
        //设置用于创建OpenCL上下文的属性列表
        cl_context_properties properties[] =
        {
#ifdef WIN32
              CL_GL_CONTEXT_KHR ,
              (cl_context_properties)wglGetCurrentContext(),
              CL_WGL_HDC_KHR ,
              (cl_context_properties)wglGetCurrentDC(),
#endif
#ifdef _linux_
              CL_GL_CONTEXT_KHR ,
              (cl_context_properties)glXGetCurrentContext(),
              CL_GLX_DISPLAY_KHR ,
              (cl_context_properties)glXGetCurrentDisplay(),
#endif
#ifdef _APPLE_
            CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
            (cl_context_properties)sharegroup,
#endif
              0
        };
        //创建OpenCL上下文
        context = clCreateContext(properties, 1, &oclDevice,
                                          NULL, NULL, NULL);
        //创建命令队列
        commandQueue = clCreateCommandQueue(context, oclDevice,
                                                    0, NULL);
        //编译内核程序
        NSString *kernelPath = [[NSBundle mainBundle]
                                    pathForResource:
                                    @"compute" ofType:@"ocl"];
        const char *aSource = [[NSString stringWithContentsOfFile:
                kernelPath encoding:NSUTF8StringEncoding error:nil]
                                      UTF8String];
        size_t kernelLength = strlen(aSource);
        oclProgram = clCreateProgramWithSource(context, 1, &aSource,
                                                      &kernelLength, NULL);
        if(oclProgram == NULL)
      {
           NSLog(@"OpenCL program create failed!");
           break;
}
        //构建程序
        status = clBuildProgram(oclProgram, 1, &oclDevice,
                                    NULL, NULL, NULL);
        if(status != CL_SUCCESS)
        {
            NSLog(@"OpenCL kernel build failed!");
                    break;
        }
        //创建与GL顶点缓存对象共享的存储器对象
        memObjVertices = clCreateFromGLBuffer(context,
                                                      CL_MEM_WRITE_ONLY,
                                                      mVBOVertices,
                                                      &status);
        //创建与GL颜色缓存对象共享的存储器对象
        memObjColors = clCreateFromGLBuffer(context,
                                                  CL_MEM_WRITE_ONLY,
                                                  mVBOColors, &status);
        //创建内核对象
        kernel = clCreateKernel(oclProgram, "GenerateRoundVertices",
                                    NULL);
        //设置内核参数
        status |= clSetKernelArg(kernel, 0, sizeof(memObjVertices),
                                      &memObjVertices);
        status |= clSetKernelArg(kernel, 1, sizeof(memObjColors),
                                      &memObjColors);
        if(status != CL_SUCCESS)
        {
            NSLog(@"Kernel parameters pass failed!");
            break;
        }
        //这里我们总共使用512个工作项
        //由于一共要处理362个顶点,每个顶点对应到一个工作项,
        //而362向上对应的能满足2的N次幂整数就是512
        size_t global_work_size[1] = { 512 };
        size_t groupSize;
        clGetDeviceInfo(oclDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE,
                          sizeof(groupSize), &groupSize, NULL);
        size_t local_work_size[1] = { groupSize };
        //运行内核程序
        status |= clEnqueueNDRangeKernel(commandQueue, kernel, 1,
                                                NULL,global_work_size,
                                                local_work_size, 0, NULL,
                                                NULL);
        //这里直接用clFinish进行同步,确保顶点坐标以及相应的颜色值全都设置好
        clFinish(commandQueue);
        if(status != CL_SUCCESS)
        {
            NSLog(@"OpenCL kernel run error!");
        }
    }
    while(NO);
    //释放OpenCL各种对象
    if(memObjVertices != NULL)
        clReleaseMemObject(memObjVertices);
    if(memObjColors != NULL)
        clReleaseMemObject(memObjColors);
    if(kernel != NULL)
        clReleaseKernel(kernel);
    if(oclProgram != NULL)
        clReleaseProgram(oclProgram);
    if(commandQueue != NULL)
        clReleaseCommandQueue(commandQueue);
    if(context != NULL)
        clReleaseContext(context);
{
- (void)drawRect:(NSRect)dirtyRect
}
    [self doOpenCLComputing];
    glClear(GL_COLOR_BUFFER_BIT);
    glDrawArrays(GL_TRIANGLE_FAN, 0, 362);
    glFlush();
    [[self openGLContext] flushBuffer];
@end

我们这里主要关心的就是prepareOpenGL方法以及doOpenCLComputing方法。这个程序中,我们要绘制一个圆,并且其顶点坐标与颜色坐标都是通过OpenCL内核程序进行设置的。

最后,在OpenGL端使用扇形绘制模式进行绘制。整个程序在执行时,prepareOpenGL方法会先被调用。然后当用户点击draw按钮之后,将会调用doOpenCLComputing方法。大家也能在代码中观察到,虽然我们要处理362个顶点,但是实际分配的工作项总数是512。对于OpenCL数据处理特性而言,如果是最小并行粒度个数的倍数,那么将会充分发挥GPU的计算性能。
以下是OpenGL顶点着色器代码:

//在OpenGL3.2 Core Profile中,版本号必须显式地给出
#version 150 core
in vec4 inPos;
in vec4 inColor;
//flat shade model(默认为smooth)
flat out vec4 myColor;
/** 模型视图变换矩阵 *
* [ 1000
     0100
     0010
     xyz1
 *]
*/
/** 正交投影变换矩阵 *
* [ 2/(r-l)       0              0              0
      0              2/(t-b)       0              0
      0              0              -2/(f-n)      0
      -(r+l)/(r-l) -(t+b)/(t-b) -(f+n)/(f-n) 1
 *]
*/
void main()
{
    //glTranslate(0.0, 0.0, -1.0, 1.0)
    mat4 translateMatrix = mat4(1.0, 0.0, 0.0, 0.0,    //column 0
                                0.0, 1.0, 0.0, 0.0,    //column 1
                                0.0, 0.0, 1.0, -1.0, //column 2
                                0.0, 0.0, 0.0, 1.0     //column 3
                                    );
    //glOrtho(-1.0, 1.0, -1.0, 1.0, 1.0, 3.0)
    mat4 projectionMatrix = mat4(1.0, 0.0, 0.0, 0.0, //column 0
                                 0.0, 1.0, 0.0, 0.0, //column 1
                                 0.0, 0.0, -1.0, -2.0, //column 2
                                 0.0, 0.0, 0.0, 1.0    //colimn 3
                                      );
    gl_Position = inPos * (translateMatrix * projectionMatrix);
    myColor = inColor;
}
下面是OpenGL片段着色器代码:
//在OpenGL 3.2 Core Profile中,版本号必须显式地给出
#version 150 core
//flat shade model(默认为smooth),
//必须与vertex shader所定义的in变量要完全匹配
flat in vec4 myColor;
out vec4 myOutput;
void main()
{
    myOutput = myColor;
}
下面是OpenCL内核程序代码:

_kernel void GenerateRoundVertices(_global float *pVertices,
                                          _global float *pColors)
{
    int index = get_global_id(0);
    float theta = radians((float)(index - 1));
    //设置圆的半径为0.8
    float x = 0.8f * cos(theta);        //设置当前顶点的x坐标
float y = 0.8f * sin(theta);        //设置当前顶点的y坐标
    //第0个工作项设置圆的原点坐标(0, 0)
    if(index == 0)
        x = 0.0f;
    if(index == 0)
        y = 0.0f;
    pVertices[index * 4 + 0] = x;
    pVertices[index * 4 + 1] = y;
    pVertices[index * 4 + 2] = 0.0f;    //z
    pVertices[index * 4 + 3] = 1.0f;    //w
    float r, g, b;
    if(index == 0)
    {
        r = 0.0f;
        g = 0.0f;
        b = 0.0f;
    }
    else if(index 〈= 45)
    {
        r = 0.1f;
        g = 0.9f;
        b = 0.1f;
    }
    else if(index 〈= 90)
    {
        r = 0.1f;
        g = 0.1f;
        b = 0.9f;
    }
    else if(index 〈 180)
    {
        r = 0.9f;
        g = 0.9f;
        b = 0.1f;
    }
    else if(index 〈 270)
    {
        r = 0.9f;
        g = 0.1f;
        b = 0.9f;
    }
    else
    {
        r = 0.1f;
        g = 0.9f;
        b = 0.9f;
    }
    pColors[index * 4 + 0] = r;
    pColors[index * 4 + 1] = g;
    pColors[index * 4 + 2] = b;
    pColors[index * 4 + 3] = 1.0f;
}

大家下载好完整工程之后可以在OS X 10.9或更高版本的Mac上运行查看效果。当然,这段代码也能被很容易地移植到Windows或Linux系统上。

Camera课程

Python教程

Java教程

Web教程

数据库教程

图形图像教程

办公软件教程

Linux教程

计算机教程

大数据教程

开发工具教程