
git-svn-id: http://svn.us.apache.org/repos/asf/tuscany@1428206 13f79535-47bb-0310-9956-ffa450edef68
739 lines
25 KiB
C++
739 lines
25 KiB
C++
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. The ASF licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
|
|
/* $Rev$ $Date$ */
|
|
|
|
#ifndef tuscany_opencl_eval_hpp
|
|
#define tuscany_opencl_eval_hpp
|
|
|
|
/**
|
|
* OpenCL kernel evaluation logic.
|
|
*/
|
|
#ifdef IS_DARWIN
|
|
#include <OpenCL/opencl.h>
|
|
#else
|
|
#include <CL/cl.h>
|
|
#endif
|
|
|
|
#include "list.hpp"
|
|
#include "value.hpp"
|
|
#include "perf.hpp"
|
|
|
|
namespace tuscany {
|
|
namespace opencl {
|
|
|
|
/**
|
|
* Convert an OpenCL error code to a string.
|
|
*/
|
|
const string clError(const cl_int e) {
|
|
ostringstream s;
|
|
s << "error " << e;
|
|
return str(s);
|
|
}
|
|
|
|
/**
|
|
* OpenCL profiling counters.
|
|
*/
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
|
|
cl_ulong memtime = 0;
|
|
cl_ulong kernelqtime = 0;
|
|
cl_ulong kerneltime = 0;
|
|
cl_ulong preptime = 0;
|
|
cl_ulong evaltime = 0;
|
|
|
|
/**
|
|
* Reset the OpenCL profiling counters.
|
|
*/
|
|
const bool resetOpenCLCounters() {
|
|
memtime = kernelqtime = kerneltime = preptime = evaltime = 0;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Print the OpenCL profiling counters.
|
|
*/
|
|
const bool printOpenCLCounters(const long n) {
|
|
cout << " kernelq " << ((double)kernelqtime / 1000000.0) / (double)n << " ms kernel " << ((double)kerneltime / 1000000.0) / (double)n << " ms memory " << ((double)memtime / 1000000.0) / (double)n << " ms prep " << ((double)preptime / 1000000.0) / (double)n << " ms eval " << ((double)evaltime / 1000000.0) / (double)n << " ms";
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Profile a memory event.
|
|
*/
|
|
const failable<cl_ulong> profileMemEvent(const cl_event evt) {
|
|
cl_ulong start = 0;
|
|
const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
|
|
if (serr != CL_SUCCESS)
|
|
return mkfailure<cl_ulong>("Couldn't profile memory event start: " + clError(serr));
|
|
cl_ulong end = 0;
|
|
const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
|
|
if (eerr != CL_SUCCESS)
|
|
return mkfailure<cl_ulong>("Couldn't profile memory event end: " + clError(eerr));
|
|
const cl_ulong t = end - start;
|
|
memtime += t;
|
|
return t;
|
|
}
|
|
|
|
/**
|
|
* Profile a kernel event.
|
|
*/
|
|
const failable<cl_ulong> profileKernelEvent(const cl_event evt) {
|
|
cl_ulong queued = 0;
|
|
const cl_int qerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, NULL);
|
|
if (qerr != CL_SUCCESS)
|
|
return mkfailure<cl_ulong>("Couldn't profile kernel event queue: " + clError(qerr));
|
|
cl_ulong start = 0;
|
|
const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
|
|
if (serr != CL_SUCCESS)
|
|
return mkfailure<cl_ulong>("Couldn't profile kernel event start: " + clError(serr));
|
|
cl_ulong end = 0;
|
|
const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
|
|
if (eerr != CL_SUCCESS)
|
|
return mkfailure<cl_ulong>("Couldn't profile kernel event end: " + clError(eerr));
|
|
const cl_ulong q = start - queued;
|
|
kernelqtime += q;
|
|
const cl_ulong t = end - start;
|
|
kerneltime += t;
|
|
return t;
|
|
}
|
|
|
|
/**
|
|
* Profile an array of memory events.
|
|
*/
|
|
const failable<cl_ulong> profileMemEvents(const cl_uint n, const cl_event* const evt) {
|
|
if (n == 0)
|
|
return 0;
|
|
const failable<cl_ulong> t = profileMemEvent(*evt);
|
|
if (!hasContent(t))
|
|
return t;
|
|
const failable<cl_ulong> r = profileMemEvents(n - 1, evt + 1);
|
|
if (!hasContent(r))
|
|
return r;
|
|
return content(t) + content(r);
|
|
}
|
|
|
|
#else
|
|
|
|
#define resetOpenCLCounters()
|
|
#define printOpenCLCounters(n)
|
|
|
|
#endif
|
|
|
|
class OpenCLContext;
|
|
class OpenCLProgram;
|
|
class OpenCLKernel;
|
|
class OpenCLBuffer;
|
|
|
|
/**
|
|
* Represent an OpenCL context.
|
|
*/
|
|
class OpenCLContext {
|
|
public:
|
|
#define OPENCL_MAX_DEVICES 64
|
|
|
|
enum DeviceType {
|
|
DEFAULT = 0, CPU = 1, GPU = 2
|
|
};
|
|
|
|
OpenCLContext(const OpenCLContext::DeviceType devtype) : dev(OpenCLContext::DEFAULT), ndevs(0), ctx(0) {
|
|
debug("opencl::OpenCLContext");
|
|
for (int i = 0; i < OPENCL_MAX_DEVICES; i++)
|
|
cq[i] = 0;
|
|
|
|
// Get the available platforms
|
|
cl_uint nplatforms;
|
|
cl_platform_id platforms[16];
|
|
const cl_int gperr = clGetPlatformIDs(16, platforms, &nplatforms);
|
|
if(nplatforms == 0 || gperr != CL_SUCCESS) {
|
|
mkfailure<bool>("Couldn't get OpenCL platforms: " + clError(gperr));
|
|
return;
|
|
}
|
|
for(cl_uint i = 0; i < nplatforms; ++i) {
|
|
char vendor[256];
|
|
const cl_int gverr = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
|
|
if(gverr != CL_SUCCESS) {
|
|
mkfailure<bool>("Couldn't get OpenCL platform: " + clError(gverr));
|
|
return;
|
|
}
|
|
debug(vendor, "opencl::OpenCLContext::vendor");
|
|
}
|
|
|
|
// Get the available devices of the requested type
|
|
if (devtype == OpenCLContext::DEFAULT || devtype == OpenCLContext::GPU) {
|
|
for(cl_uint i = 0; i < nplatforms; ++i) {
|
|
const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, OPENCL_MAX_DEVICES, devid, &ndevs);
|
|
if (err == CL_SUCCESS) {
|
|
debug(ndevs, "opencl::OpenCLContext::gcpus");
|
|
dev = OpenCLContext::GPU;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ((devtype == OpenCLContext::DEFAULT && ndevs == 0) || devtype == OpenCLContext::CPU) {
|
|
for(cl_uint i = 0; i < nplatforms; ++i) {
|
|
const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, OPENCL_MAX_DEVICES, devid, &ndevs);
|
|
if (err == CL_SUCCESS) {
|
|
debug(ndevs, "opencl::OpenCLContext::ncpus");
|
|
dev = OpenCLContext::CPU;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (ndevs == 0)
|
|
return;
|
|
|
|
// Initialize OpenCL context and command queues
|
|
cl_int ccerr;
|
|
ctx = clCreateContext(0, ndevs, devid, NULL, NULL, &ccerr);
|
|
if(!ctx || ccerr != CL_SUCCESS) {
|
|
mkfailure<bool>("Couldn't create OpenCL context: " + clError(ccerr));
|
|
return;
|
|
}
|
|
|
|
for (cl_uint i = 0; i < ndevs; i++) {
|
|
cl_int cqerr;
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
cq[i] = clCreateCommandQueue(ctx, devid[i], CL_QUEUE_PROFILING_ENABLE, &cqerr);
|
|
#else
|
|
cq[i] = clCreateCommandQueue(ctx, devid[i], 0, &cqerr);
|
|
#endif
|
|
if (!cq[i] || cqerr != CL_SUCCESS) {
|
|
mkfailure<bool>("Couldn't create OpenCL command queue: " + clError(cqerr));
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
OpenCLContext(const OpenCLContext& c) : dev(c.dev), ndevs(c.ndevs), ctx(c.ctx) {
|
|
for (cl_uint i = 0; i < ndevs; i++) {
|
|
devid[i] = c.devid[i];
|
|
cq[i] = c.cq[i];
|
|
if (cq[i] != 0) {
|
|
const cl_int rcqerr = clRetainCommandQueue(cq[i]);
|
|
if (rcqerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL command queue: ") + clError(rcqerr));
|
|
}
|
|
}
|
|
const cl_int rcerr = clRetainContext(ctx);
|
|
if (rcerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL context: ") + clError(rcerr));
|
|
}
|
|
|
|
~OpenCLContext() {
|
|
for (cl_uint i = 0; i < ndevs; i++) {
|
|
if (cq[i] != 0)
|
|
clReleaseCommandQueue(cq[i]);
|
|
}
|
|
if (ctx != 0)
|
|
clReleaseContext(ctx);
|
|
}
|
|
|
|
private:
|
|
OpenCLContext::DeviceType dev;
|
|
cl_uint ndevs;
|
|
cl_device_id devid[OPENCL_MAX_DEVICES];
|
|
cl_context ctx;
|
|
cl_command_queue cq[OPENCL_MAX_DEVICES];
|
|
|
|
friend const cl_uint devices(const OpenCLContext& cl);
|
|
friend const cl_command_queue commandq(const OpenCLContext& cl);
|
|
friend const failable<OpenCLBuffer> readOnlyBuffer(const size_t size, const void* p, const OpenCLContext& cl, cl_command_queue cq);
|
|
friend const failable<OpenCLBuffer> writeOnlyBuffer(const size_t size, const OpenCLContext& cl);
|
|
friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
|
|
friend const failable<OpenCLProgram> readProgram(const string& path, istream& is, const OpenCLContext& cl);
|
|
};
|
|
|
|
/**
|
|
* Return the number of computing devices available in a context.
|
|
*/
|
|
const cl_uint devices(const OpenCLContext& cl) {
|
|
return cl.ndevs;
|
|
}
|
|
|
|
/**
|
|
* Return a command queue from a context.
|
|
*/
|
|
const cl_command_queue commandq(const OpenCLContext& cl) {
|
|
return cl.cq[0];
|
|
}
|
|
|
|
/**
|
|
* Represents an OpenCL program.
|
|
*/
|
|
class OpenCLProgram {
|
|
public:
|
|
OpenCLProgram() : prog(0) {
|
|
}
|
|
|
|
OpenCLProgram(const cl_program prog) : prog(prog) {
|
|
}
|
|
|
|
OpenCLProgram(const OpenCLProgram& c) : prog(c.prog) {
|
|
const cl_int rperr = clRetainProgram(prog);
|
|
if (rperr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL program: ") + clError(rperr));
|
|
}
|
|
|
|
~OpenCLProgram() {
|
|
if (!prog)
|
|
return;
|
|
const cl_int rperr = clReleaseProgram(prog);
|
|
if (rperr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't release OpenCL program: ") + clError(rperr));
|
|
}
|
|
|
|
private:
|
|
const cl_program prog;
|
|
|
|
friend const failable<OpenCLKernel> createKernel(const value& expr, const OpenCLProgram& clprog);
|
|
friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
|
|
};
|
|
|
|
/**
|
|
* Represents an OpenCL kernel.
|
|
*/
|
|
class OpenCLKernel {
|
|
public:
|
|
OpenCLKernel() : k(0) {
|
|
}
|
|
|
|
OpenCLKernel(const cl_kernel k) : k(k) {
|
|
}
|
|
|
|
OpenCLKernel(const OpenCLKernel& c) : k(c.k) {
|
|
const cl_int rkerr = clRetainKernel(k);
|
|
if (rkerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL kernel: ") + clError(rkerr));
|
|
}
|
|
|
|
~OpenCLKernel() {
|
|
if (!k)
|
|
return;
|
|
const cl_int rkerr = clReleaseKernel(k);
|
|
if (rkerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't release OpenCL kernel: ") + clError(rkerr));
|
|
}
|
|
|
|
private:
|
|
const cl_kernel k;
|
|
|
|
friend const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel);
|
|
friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
|
|
};
|
|
|
|
/**
|
|
* Represents an OpenCL buffer.
|
|
*/
|
|
class OpenCLBuffer {
|
|
public:
|
|
OpenCLBuffer() : mem(0), evt(0) {
|
|
}
|
|
|
|
OpenCLBuffer(const cl_mem mem, const cl_event evt) : mem(mem), evt(evt) {
|
|
}
|
|
|
|
OpenCLBuffer(const OpenCLBuffer& c) : mem(c.mem), evt(c.evt) {
|
|
if (mem != 0) {
|
|
const cl_int rmerr = clRetainMemObject(mem);
|
|
if (rmerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL buffer: ") + clError(rmerr));
|
|
}
|
|
if (evt != 0) {
|
|
const cl_int reerr = clRetainEvent(evt);
|
|
if (reerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't retain OpenCL event: ") + clError(reerr));
|
|
}
|
|
}
|
|
|
|
~OpenCLBuffer() {
|
|
if (mem != 0) {
|
|
const cl_int rmerr = clReleaseMemObject(mem);
|
|
if (rmerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't release OpenCL buffer: ") + clError(rmerr));
|
|
}
|
|
if (evt != 0) {
|
|
const cl_int reerr = clReleaseEvent(evt);
|
|
if (reerr != CL_SUCCESS)
|
|
mkfailure<bool>(string("Couldn't release OpenCL event: ") + clError(reerr));
|
|
}
|
|
}
|
|
|
|
private:
|
|
const cl_mem mem;
|
|
const cl_event evt;
|
|
|
|
friend const cl_uint writeBufferEvents(const list<OpenCLBuffer>& buf, cl_event* evt);
|
|
friend const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel);
|
|
friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
|
|
};
|
|
|
|
/**
|
|
* Return a read-only memory buffer.
|
|
*/
|
|
const failable<OpenCLBuffer> readOnlyBuffer(const size_t size, const void* const p, const OpenCLContext& cl, const cl_command_queue cq) {
|
|
if (cl.dev == OpenCLContext::CPU) {
|
|
cl_int err;
|
|
const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size, const_cast<void*>(p), &err);
|
|
if (!buf || err != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't map OpenCL host memory: ") + clError(err));
|
|
return OpenCLBuffer(buf, 0);
|
|
}
|
|
cl_int berr;
|
|
const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY, size, NULL, &berr);
|
|
if (!buf || berr != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't allocate OpenCL device memory: ") + clError(berr));
|
|
cl_event wevt;
|
|
const cl_int werr = clEnqueueWriteBuffer(cq, buf, CL_FALSE, 0, size, p, 0, NULL, &wevt);
|
|
if (werr != CL_SUCCESS) {
|
|
clReleaseMemObject(buf);
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't enqueue write to device memory: ") + clError(werr));
|
|
}
|
|
return OpenCLBuffer(buf, wevt);
|
|
}
|
|
|
|
/**
|
|
* Fill an array of write events for a given list of buffers.
|
|
*/
|
|
const cl_uint writeBufferEvents(const list<OpenCLBuffer>& buf, cl_event* const evt) {
|
|
if (isNull(buf))
|
|
return 0;
|
|
const cl_event e = car(buf).evt;
|
|
if (e == 0)
|
|
return writeBufferEvents(cdr(buf), evt);
|
|
*evt = e;
|
|
return 1 + writeBufferEvents(cdr(buf), evt + 1);
|
|
}
|
|
|
|
/**
|
|
* Return a write-only memory buffer.
|
|
*/
|
|
const failable<OpenCLBuffer> writeOnlyBuffer(const size_t size, const OpenCLContext& cl) {
|
|
if (cl.dev == OpenCLContext::CPU) {
|
|
cl_int err;
|
|
const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &err);
|
|
if (!buf || err != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't map OpenCL host memory: ") + clError(err));
|
|
return OpenCLBuffer(buf, 0);
|
|
}
|
|
cl_int err;
|
|
const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY, size, NULL, &err);
|
|
if (!buf || err != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't allocate OpenCL device memory: ") + clError(err));
|
|
return OpenCLBuffer(buf, 0);
|
|
}
|
|
|
|
/**
|
|
* Convert a value to a kernel arg.
|
|
*/
|
|
const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* const p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel) {
|
|
if (!hasContent(buf))
|
|
return buf;
|
|
if (p != NULL) {
|
|
const cl_int err = clSetKernelArg(kernel.k, (cl_uint)i, size, p);
|
|
if (err != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't set OpenCL simple kernel arg: ") + clError(err));
|
|
return buf;
|
|
}
|
|
const OpenCLBuffer b = content(buf);
|
|
const cl_int err = clSetKernelArg(kernel.k, i, sizeof(cl_mem), &b.mem);
|
|
if (err != CL_SUCCESS)
|
|
return mkfailure<OpenCLBuffer>(string("Couldn't set OpenCL buffer kernel arg: ") + clError(err));
|
|
return buf;
|
|
}
|
|
|
|
const failable<OpenCLBuffer> valueToKernelArg(const value& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
|
|
switch (type(v)) {
|
|
case value::Symbol: {
|
|
const string s = string("'") + (string)v;
|
|
return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel);
|
|
}
|
|
case value::String: {
|
|
const string s = (string)v;
|
|
return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel);
|
|
}
|
|
case value::Number: {
|
|
const cl_float d = (cl_float)((double)v);
|
|
return valueToKernelArg(i, sizeof(cl_float), &d, OpenCLBuffer(), kernel);
|
|
}
|
|
case value::Bool: {
|
|
const cl_int b = (cl_int)((bool)v);
|
|
return valueToKernelArg(i, sizeof(cl_int), &b, OpenCLBuffer(), kernel);
|
|
}
|
|
default: {
|
|
return valueToKernelArg(i, sizeof(cl_mem), NULL, readOnlyBuffer(sizeof(value), &v, cl, cq), kernel);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert a list of values to kernel args.
|
|
*/
|
|
const failable<list<OpenCLBuffer>> valuesToKernelArgsListHelper(const list<value>& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
|
|
if (isNull(v))
|
|
return list<OpenCLBuffer>();
|
|
const failable<OpenCLBuffer> a = valueToKernelArg(car(v), i, kernel, cl, cq);
|
|
if (!hasContent(a))
|
|
return mkfailure<list<OpenCLBuffer>>(a);
|
|
const failable<list<OpenCLBuffer>> al = valuesToKernelArgsListHelper(cdr(v), i + 1, kernel, cl, cq);
|
|
if (!hasContent(al))
|
|
return al;
|
|
return cons<OpenCLBuffer>(content(a), content(al));
|
|
}
|
|
|
|
const failable<list<OpenCLBuffer>> valuesToKernelArgs(const list<value>& v, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
|
|
return valuesToKernelArgsListHelper(v, 0, kernel, cl, cq);
|
|
}
|
|
|
|
/**
|
|
* Convert a kernel result to a value.
|
|
*/
|
|
const value kernelResultToValue(const void* const p, const value::ValueType type) {
|
|
switch(type) {
|
|
case value::Symbol: {
|
|
const char* const s = (const char*)p;
|
|
const size_t l = strlen(s);
|
|
if (l != 0 && *s == '\'')
|
|
return value(s + 1);
|
|
return value(s);
|
|
}
|
|
case value::String: {
|
|
const char* const s = (const char*)p;
|
|
const size_t l = strlen(s);
|
|
if (l != 0 && *s == '\'')
|
|
return value(s + 1);
|
|
return value(string(s, l));
|
|
}
|
|
case value::Number:
|
|
return (double)(*(const cl_float*)p);
|
|
case value::Bool:
|
|
return (bool)(*(const cl_int*)p);
|
|
default:
|
|
return *(const value*)p;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return the value type corresponding to a C99 type name.
|
|
*/
|
|
const value::ValueType valueType(const string& t) {
|
|
if (t == "float")
|
|
return value::Number;
|
|
if (t == "int")
|
|
return value::Bool;
|
|
if (t == "char")
|
|
return value::String;
|
|
return value::Nil;
|
|
}
|
|
|
|
/**
|
|
* Return the size of a C99 type corresponding to a value type.
|
|
*/
|
|
const size_t valueSize(const value::ValueType type) {
|
|
switch(type) {
|
|
case value::Number:
|
|
return sizeof(cl_float);
|
|
case value::Bool:
|
|
return sizeof(cl_int);
|
|
case value::Symbol:
|
|
return sizeof(cl_char);
|
|
case value::String:
|
|
return sizeof(cl_char);
|
|
default:
|
|
return sizeof(value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return the result type of a kernel.
|
|
*/
|
|
class OpenCLResultType {
|
|
public:
|
|
OpenCLResultType(const value::ValueType type, const size_t n, const size_t size) : type(type), n(n), size(size) {}
|
|
const value::ValueType type;
|
|
const size_t n;
|
|
const size_t size;
|
|
};
|
|
|
|
const OpenCLResultType kernelResultType(const string& fn, value::ValueType type, const size_t n) {
|
|
if (type != value::Nil)
|
|
return OpenCLResultType(type, n, valueSize(type));
|
|
const string s = car(tokenize("_", fn));
|
|
const size_t d = find_first_of(s, "0123456789");
|
|
if (d == length(s)) {
|
|
const value::ValueType vt = valueType(s);
|
|
return OpenCLResultType(vt, 1, valueSize(vt));
|
|
}
|
|
const value::ValueType vt = valueType(substr(s, 0, d));
|
|
return OpenCLResultType(vt, atoi(c_str(substr(s, d))), valueSize(vt));
|
|
}
|
|
|
|
/**
|
|
* Create the kernel implementing an expression.
|
|
*/
|
|
const failable<OpenCLKernel> createKernel(const value& expr, const OpenCLProgram& clprog) {
|
|
|
|
// Create an OpenCL kernel for the requested function
|
|
const value fn = car<value>(expr);
|
|
cl_int ckerr;
|
|
const cl_kernel k = clCreateKernel(clprog.prog, c_str(fn), &ckerr);
|
|
if (k == NULL || ckerr != CL_SUCCESS) {
|
|
|
|
// The start, stop, and restart functions are optional
|
|
//if (fn == "start" || fn == "stop")
|
|
//return value(lvvlambda());
|
|
|
|
return mkfailure<OpenCLKernel>(string("Couldn't find function: ") + (string)car<value>(expr) + " : " + clError(ckerr));
|
|
}
|
|
return OpenCLKernel(k);
|
|
}
|
|
|
|
/**
|
|
* Evaluate an expression implemented by a kernel.
|
|
*/
|
|
const failable<value> evalKernel(const failable<OpenCLKernel>& fkernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl) {
|
|
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
const cl_uint estart = (cl_uint)timens();
|
|
const cl_uint pstart = estart;
|
|
#endif
|
|
|
|
if (!hasContent(fkernel))
|
|
return mkfailure<value>(fkernel);
|
|
const OpenCLKernel kernel = content(fkernel);
|
|
|
|
// Get a command queue for the specified device type
|
|
const cl_command_queue cq = commandq(cl);
|
|
|
|
// Set the kernel input args
|
|
const failable<list<OpenCLBuffer>> args = valuesToKernelArgs(cdr<value>(expr), kernel, cl, cq);
|
|
if (!hasContent(args)) {
|
|
return mkfailure<value>(args);
|
|
}
|
|
|
|
// Allocate result buffer in device memory
|
|
const value fn = car<value>(expr);
|
|
const OpenCLResultType rtype = kernelResultType(fn, type, n);
|
|
const size_t rsize = rtype.n * rtype.size;
|
|
const failable<OpenCLBuffer> rbuf = writeOnlyBuffer(rsize, cl);
|
|
if (!hasContent(rbuf))
|
|
return mkfailure<value>(rbuf);
|
|
|
|
// Set it as a kernel output arg
|
|
const cl_mem rmem = content(rbuf).mem;
|
|
const failable<OpenCLBuffer> rarg = valueToKernelArg((cl_uint)length(cdr<value>(expr)), sizeof(cl_mem), &rmem, rbuf, kernel);
|
|
if (!hasContent(rarg))
|
|
return mkfailure<value>(rarg);
|
|
|
|
// Enqueue the kernel, to be executed after all the writes complete
|
|
cl_event wevt[32];
|
|
const cl_uint nwevt = writeBufferEvents(content(args), wevt);
|
|
cl_event kevt;
|
|
const cl_int qerr = clEnqueueNDRangeKernel(cq, kernel.k, 1, NULL, &gwsize, NULL, nwevt, nwevt != 0? wevt : NULL, &kevt);
|
|
if (qerr != CL_SUCCESS)
|
|
return mkfailure<value>(string("Couldn't enqueue kernel task: ") + clError(qerr));
|
|
|
|
// Enqueue result buffer read, to be executed after the kernel completes
|
|
char res[rsize];
|
|
cl_event revt;
|
|
const cl_int rerr = clEnqueueReadBuffer(cq, rmem, CL_FALSE, 0, rsize, res, 1, &kevt, &revt);
|
|
if (rerr != CL_SUCCESS) {
|
|
clReleaseEvent(kevt);
|
|
return mkfailure<value>(string("Couldn't read from OpenCL device memory: ") + clError(rerr));
|
|
}
|
|
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
const cl_uint pend = (cl_uint)timens();
|
|
preptime += (pend - pstart);
|
|
#endif
|
|
|
|
// Wait for completion
|
|
const cl_int werr = clWaitForEvents(1, &revt);
|
|
if (werr != CL_SUCCESS) {
|
|
clReleaseEvent(revt);
|
|
clReleaseEvent(kevt);
|
|
return mkfailure<value>(string("Couldn't wait for kernel completion: ") + clError(werr));
|
|
}
|
|
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
profileMemEvents(nwevt, wevt);
|
|
profileKernelEvent(kevt);
|
|
profileMemEvent(revt);
|
|
#endif
|
|
|
|
// Convert the result to a value
|
|
const value v = kernelResultToValue(res, rtype.type);
|
|
|
|
// Release OpenCL resources
|
|
clReleaseEvent(revt);
|
|
clReleaseEvent(kevt);
|
|
|
|
#ifdef WANT_MAINTAINER_OPENCL_PROF
|
|
const cl_uint eend = (cl_uint)timens();
|
|
evaltime += (eend - estart);
|
|
#endif
|
|
|
|
return v;
|
|
}
|
|
|
|
const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const OpenCLContext& cl) {
|
|
return evalKernel(kernel, expr, 1, value::Nil, 0, cl);
|
|
}
|
|
|
|
/**
|
|
* Read an opencl program from an input stream.
|
|
*/
|
|
const failable<OpenCLProgram> readProgram(const string& path, istream& is, const OpenCLContext& cl) {
|
|
|
|
// Read the program source
|
|
const list<string> ls = streamList(is);
|
|
ostringstream os;
|
|
write(ls, os);
|
|
const char* cs = c_str(str(os));
|
|
|
|
// Create the OpenCL program
|
|
cl_int cperr;
|
|
const cl_program prog = clCreateProgramWithSource(cl.ctx, 1, (const char **)&cs, NULL, &cperr);
|
|
if (!prog || cperr != CL_SUCCESS)
|
|
return mkfailure<OpenCLProgram>(string("Couldn't create OpenCL program from source: ") + path + " : " + clError(cperr));
|
|
|
|
// Built it
|
|
const cl_int bperr = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
|
|
if(bperr != CL_SUCCESS) {
|
|
size_t l;
|
|
char b[2048];
|
|
clGetProgramBuildInfo(prog, cl.devid[0], CL_PROGRAM_BUILD_LOG, sizeof(b), b, &l);
|
|
return mkfailure<OpenCLProgram>(string("Couldn't build OpenCL program: ") + path + " : " + clError(bperr) + "\n" + string(b));
|
|
}
|
|
return OpenCLProgram(prog);
|
|
}
|
|
|
|
/**
|
|
* Evaluate an expression against an OpenCL program provided as an input stream.
|
|
*/
|
|
const failable<value> evalKernel(const value& expr, istream& is, const OpenCLContext& cl) {
|
|
failable<OpenCLProgram> clprog = readProgram("program.cl", is, cl);
|
|
if (!hasContent(clprog))
|
|
return mkfailure<value>(clprog);
|
|
return evalKernel(createKernel(expr, content(clprog)), expr, 1, value::Nil, 0, cl);
|
|
}
|
|
|
|
}
|
|
}
|
|
#endif /* tuscany_opencl_eval_hpp */
|