From 88fb903346fd23fb25eb6bbd501632078745c6bc Mon Sep 17 00:00:00 2001 From: jsdelfino Date: Sun, 28 Aug 2011 02:50:09 +0000 Subject: Implement an OpenCL component implementation evaluator. Add OpenCL tests, working on Ubuntu and MacOS with both CPU and NVIDIA and ATI GPUs. git-svn-id: http://svn.us.apache.org/repos/asf/tuscany@1162473 13f79535-47bb-0310-9956-ffa450edef68 --- sca-cpp/trunk/.gitignore | 2 + sca-cpp/trunk/INSTALL | 8 + sca-cpp/trunk/README | 1 + sca-cpp/trunk/configure.ac | 48 ++ sca-cpp/trunk/kernel/perf.hpp | 12 + sca-cpp/trunk/macos/macos-install | 2 +- sca-cpp/trunk/modules/Makefile.am | 2 +- sca-cpp/trunk/modules/opencl/Makefile.am | 57 ++ sca-cpp/trunk/modules/opencl/client-test.cpp | 39 ++ sca-cpp/trunk/modules/opencl/domain-test.composite | 31 + sca-cpp/trunk/modules/opencl/driver.hpp | 62 ++ sca-cpp/trunk/modules/opencl/eval.hpp | 725 +++++++++++++++++++++ sca-cpp/trunk/modules/opencl/opencl-conf | 37 ++ sca-cpp/trunk/modules/opencl/opencl-shell.cpp | 40 ++ sca-cpp/trunk/modules/opencl/opencl-test.cpp | 332 ++++++++++ sca-cpp/trunk/modules/opencl/server-test | 39 ++ sca-cpp/trunk/modules/opencl/server-test.cl | 17 + 17 files changed, 1452 insertions(+), 2 deletions(-) create mode 100644 sca-cpp/trunk/modules/opencl/Makefile.am create mode 100644 sca-cpp/trunk/modules/opencl/client-test.cpp create mode 100644 sca-cpp/trunk/modules/opencl/domain-test.composite create mode 100644 sca-cpp/trunk/modules/opencl/driver.hpp create mode 100644 sca-cpp/trunk/modules/opencl/eval.hpp create mode 100755 sca-cpp/trunk/modules/opencl/opencl-conf create mode 100644 sca-cpp/trunk/modules/opencl/opencl-shell.cpp create mode 100644 sca-cpp/trunk/modules/opencl/opencl-test.cpp create mode 100755 sca-cpp/trunk/modules/opencl/server-test create mode 100644 sca-cpp/trunk/modules/opencl/server-test.cl (limited to 'sca-cpp') diff --git a/sca-cpp/trunk/.gitignore b/sca-cpp/trunk/.gitignore index 792e5b6323..c2b43cc804 100644 --- a/sca-cpp/trunk/.gitignore +++ b/sca-cpp/trunk/.gitignore @@ -141,4 +141,6 @@ value-element modules/edit/apps/*/nuvem modules/edit/apps/*/lib chat-send +opencl-shell +opencl-test diff --git a/sca-cpp/trunk/INSTALL b/sca-cpp/trunk/INSTALL index 5e2cb83eab..ca73ef5a4e 100644 --- a/sca-cpp/trunk/INSTALL +++ b/sca-cpp/trunk/INSTALL @@ -131,6 +131,10 @@ Python: Python 2.6.6 (http://www.python.org/) Google AppEngine 1.4.0 (http://code.google.com/appengine/) +OpenCL: +an OpenCL SDK (http://software.intel.com/en-us/articles/opencl-sdk/, +http://developer.nvidia.com/, http://developer.amd.com/sdks/amdappsdk) + Java: a Java 5+ JDK (http://openjdk.java.net/, http://harmony.apache.org/) @@ -178,6 +182,9 @@ for running with the HTTPD worker or event multi-threaded MPMs): To enable support for Python component implementations: --enable-python +To enable support for OpenCL component implementations: +--enable-opencl + To enable support for Java component implementations: --enable-java @@ -229,6 +236,7 @@ dependencies installed under $HOME: --with-libcloud=$HOME/libcloud-0.3.1-bin \ --enable-threads \ --enable-python --with-python=/usr \ +--enable-opencl --with-opencl-include=/usr/include --with-opencl-lib=/usr/lib \ --enable-gae --with-gae=$HOME/google_appengine \ --enable-java --with-java=/usr/lib/jvm/default-java \ --enable-webservice --with-axis2c=$HOME/axis2c-1.6.0-bin \ diff --git a/sca-cpp/trunk/README b/sca-cpp/trunk/README index 6e7b99c348..b4d257d8cb 100644 --- a/sca-cpp/trunk/README +++ b/sca-cpp/trunk/README @@ -64,6 +64,7 @@ Here's a rough guide to the Tuscany SCA source tree: | | |-- java Support for Java components | | |-- json JSON data encoding | | |-- oauth User signin using OAuth + | | |-- opencl Support for OpenCL components | | |-- openid User signin using OpenID | | |-- python Support for Python components | | |-- rss RSS data encoding diff --git a/sca-cpp/trunk/configure.ac b/sca-cpp/trunk/configure.ac index 28f93fd8c1..66d4e647e1 100644 --- a/sca-cpp/trunk/configure.ac +++ b/sca-cpp/trunk/configure.ac @@ -388,6 +388,53 @@ else AM_CONDITIONAL([WANT_PYTHON], false) fi +# Enable OpenCL support. +AC_MSG_CHECKING([whether to enable OpenCL support]) +AC_ARG_ENABLE(opencl, [AS_HELP_STRING([--enable-opencl], [enable OpenCL support [default=yes]])], +[ case "${enableval}" in + no) + AC_MSG_RESULT(no) + ;; + *) + AC_MSG_RESULT(yes) + want_opencl=true + ;; + esac ], +[ + AC_MSG_RESULT(yes) + want_opencl=true +]) +if test "${want_opencl}" = "true"; then + + # Configure path to OpenCL includes and lib. + AC_MSG_CHECKING([for opencl]) + AC_ARG_WITH([opencl-include], [AC_HELP_STRING([--with-opencl-include=PATH], [path to installed OpenCL 1.1 include dir [default=/usr/include]])], [ + OPENCL_INCLUDE="${withval}" + AC_MSG_RESULT("${withval}") + ], [ + OPENCL_INCLUDE="/usr/include" + AC_MSG_RESULT(/usr/include) + ]) + AC_SUBST(OPENCL_INCLUDE) + AC_ARG_WITH([opencl-lib], [AC_HELP_STRING([--with-opencl-lib=PATH], [path to installed OpenCL 1.1 lib dir [default=/usr/lib]])], [ + OPENCL_LIB="${withval}" + AC_MSG_RESULT("${withval}") + ], [ + OPENCL_LIB="/usr/lib" + AC_MSG_RESULT(/usr/lib) + ]) + AC_SUBST(OPENCL_LIB) + if test "${UNAME}" != "Darwin"; then + LIBS="-L${OPENCL_LIB} ${defaultlibs}" + AC_CHECK_LIB([OpenCL], [clGetDeviceIDs], [], [AC_MSG_ERROR([couldn't find a suitable libOpenCL, use --with-opencl=PATH])]) + fi + AM_CONDITIONAL([WANT_OPENCL], true) + AC_DEFINE([WANT_OPENCL], 1, [enable OpenCL support]) + +else + AM_CONDITIONAL([WANT_OPENCL], false) +fi + # Enable Java support. AC_MSG_CHECKING([whether to enable Java support]) AC_ARG_ENABLE(java, [AS_HELP_STRING([--enable-java], [enable Java support [default=no]])], @@ -929,6 +976,7 @@ AC_CONFIG_FILES([Makefile modules/http/Makefile modules/server/Makefile modules/python/Makefile + modules/opencl/Makefile modules/java/Makefile modules/openid/Makefile modules/oauth/Makefile diff --git a/sca-cpp/trunk/kernel/perf.hpp b/sca-cpp/trunk/kernel/perf.hpp index f5004d015b..82d0ddad03 100644 --- a/sca-cpp/trunk/kernel/perf.hpp +++ b/sca-cpp/trunk/kernel/perf.hpp @@ -64,5 +64,17 @@ const double time(const lambda& f, const long warmup, const long count) return (double)t / (double)count; } +const unsigned long timems() { + struct timeval t; + gettimeofday(&t, NULL); + return (unsigned long)(t.tv_sec * 1000 + t.tv_usec / 1000); +} + +const unsigned long timens() { + struct timeval t; + gettimeofday(&t, NULL); + return (unsigned long)(t.tv_sec * 1000000000 + t.tv_usec * 1000); +} + } #endif /* tuscany_perf_hpp */ diff --git a/sca-cpp/trunk/macos/macos-install b/sca-cpp/trunk/macos/macos-install index 6d7b90cd28..d7b2feb37a 100755 --- a/sca-cpp/trunk/macos/macos-install +++ b/sca-cpp/trunk/macos/macos-install @@ -285,7 +285,7 @@ cd $build git clone git://git.apache.org/tuscany-sca-cpp.git cd tuscany-sca-cpp ./bootstrap -./configure CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ --prefix=$build/tuscany-sca-cpp-bin --with-curl=$build/curl-7.19.5-bin --with-apr=$build/apr-1.4.x-bin --with-httpd=$build/httpd-2.3.10-bin --with-memcached=$build/memcached-1.4.5-bin --with-tinycdb=$build/tinycdb-bin --with-js-include=$build/js-1.8.5-bin/include/js --with-js-lib=$build/js-1.8.5-bin/lib --with-libcloud=$build/libcloud-0.4.2-bin --enable-threads --enable-python --with-libxml2=$build/libxml2-2.7.7-bin --enable-chat --with-libstrophe=$build/libstrophe-bin --enable-openid --with-mod-auth-openid=$build/mod-auth-openid-bin --enable-oauth --with-liboauth=$build/liboauth-0.9.1-bin --enable-mod-security --with-mod-security=$build/modsecurity-apache-2.6.0-bin +./configure CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ --prefix=$build/tuscany-sca-cpp-bin --with-curl=$build/curl-7.19.5-bin --with-apr=$build/apr-1.4.x-bin --with-httpd=$build/httpd-2.3.10-bin --with-memcached=$build/memcached-1.4.5-bin --with-tinycdb=$build/tinycdb-bin --with-js-include=$build/js-1.8.5-bin/include/js --with-js-lib=$build/js-1.8.5-bin/lib --with-libcloud=$build/libcloud-0.4.2-bin --enable-threads --enable-python --enable-opencl --with-libxml2=$build/libxml2-2.7.7-bin --enable-chat --with-libstrophe=$build/libstrophe-bin --enable-openid --with-mod-auth-openid=$build/mod-auth-openid-bin --enable-oauth --with-liboauth=$build/liboauth-0.9.1-bin --enable-mod-security --with-mod-security=$build/modsecurity-apache-2.6.0-bin make make install if [ "$?" != "0" ]; then diff --git a/sca-cpp/trunk/modules/Makefile.am b/sca-cpp/trunk/modules/Makefile.am index 278d8b40cc..f4195dc7e8 100644 --- a/sca-cpp/trunk/modules/Makefile.am +++ b/sca-cpp/trunk/modules/Makefile.am @@ -15,5 +15,5 @@ # specific language governing permissions and limitations # under the License. -SUBDIRS = scheme atom rss js json scdl http server python java openid oauth wsgi edit +SUBDIRS = scheme atom rss js json scdl http server python opencl java openid oauth wsgi edit diff --git a/sca-cpp/trunk/modules/opencl/Makefile.am b/sca-cpp/trunk/modules/opencl/Makefile.am new file mode 100644 index 0000000000..95e30d20b1 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/Makefile.am @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +if WANT_OPENCL + +INCLUDES = -I${OPENCL_INCLUDE} + +incl_HEADERS = *.hpp +incldir = $(prefix)/include/modules/opencl + +dist_mod_SCRIPTS = opencl-conf +moddir = $(prefix)/modules/opencl + +EXTRA_DIST = domain-test.composite server-test.cl + +if DARWIN +OPENCL_FLAGS = -framework OpenCL +else +OPENCL_FLAGS = -L${OPENCL_LIB} -R${OPENCL_LIB} -lOpenCL +endif + +#mod_LTLIBRARIES = libmod_tuscany_opencl.la +#libmod_tuscany_opencl_la_SOURCES = mod-opencl.cpp +#libmod_tuscany_opencl_la_LDFLAGS = -lxml2 -lcurl -lmozjs -framework OpenCL +#noinst_DATA = libmod_tuscany_opencl${libsuffix} +#libmod_tuscany_opencl${libsuffix}: +# ln -s .libs/libmod_tuscany_opencl${libsuffix} + +opencl_test_SOURCES = opencl-test.cpp +opencl_test_LDFLAGS = ${OPENCL_FLAGS} + +opencl_shell_SOURCES = opencl-shell.cpp +opencl_shell_LDFLAGS = ${OPENCL_FLAGS} + +client_test_SOURCES = client-test.cpp +client_test_LDFLAGS = -lxml2 -lcurl -lmozjs + +dist_noinst_SCRIPTS = server-test +noinst_PROGRAMS = opencl-test opencl-shell client-test +TESTS = opencl-test + +endif diff --git a/sca-cpp/trunk/modules/opencl/client-test.cpp b/sca-cpp/trunk/modules/opencl/client-test.cpp new file mode 100644 index 0000000000..7af3cc73d2 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/client-test.cpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* $Rev$ $Date$ */ + +/** + * Test HTTP client functions. + */ + +#include "stream.hpp" +#include "string.hpp" +#include "../server/client-test.hpp" + +int main() { + tuscany::cout << "Testing..." << tuscany::endl; + tuscany::server::testURI = "http://localhost:8090/opencl"; + + tuscany::server::testServer(); + + tuscany::cout << "OK" << tuscany::endl; + + return 0; +} diff --git a/sca-cpp/trunk/modules/opencl/domain-test.composite b/sca-cpp/trunk/modules/opencl/domain-test.composite new file mode 100644 index 0000000000..e69399d581 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/domain-test.composite @@ -0,0 +1,31 @@ + + + + + + + + + + + + diff --git a/sca-cpp/trunk/modules/opencl/driver.hpp b/sca-cpp/trunk/modules/opencl/driver.hpp new file mode 100644 index 0000000000..b4b6c2845b --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/driver.hpp @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* $Rev$ $Date$ */ + +#ifndef tuscany_opencl_driver_hpp +#define tuscany_opencl_driver_hpp + +/** + * OpenCL evaluator main driver loop. + */ + +#include "string.hpp" +#include "stream.hpp" +#include "monad.hpp" +#include "../scheme/driver.hpp" +#include "eval.hpp" + +namespace tuscany { +namespace opencl { + +const value evalDriverLoop(const OpenCLProgram& clprog, istream& in, ostream& out, const OpenCLContext& cl) { + scheme::promptForInput(scheme::evalInputPrompt, out); + value input = scheme::readValue(in); + if (isNil(input)) + return input; + const failable output = evalKernel(createKernel(input, clprog), input, 1, value::String, 512, cl); + scheme::announceOutput(scheme::evalOutputPrompt, out); + scheme::userPrint(content(output), out); + return evalDriverLoop(clprog, in, out, cl); +} + +const bool evalDriverRun(const char* path, istream& in, ostream& out) { + OpenCLContext cl(OpenCLContext::DEFAULT); + scheme::setupDisplay(out); + ifstream is(path); + failable clprog = readProgram(path, is, cl); + if (!hasContent(clprog)) + return true; + evalDriverLoop(content(clprog), in, out, cl); + return true; +} + +} +} +#endif /* tuscany_opencl_driver_hpp */ diff --git a/sca-cpp/trunk/modules/opencl/eval.hpp b/sca-cpp/trunk/modules/opencl/eval.hpp new file mode 100644 index 0000000000..ef0e028e71 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/eval.hpp @@ -0,0 +1,725 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* $Rev$ $Date$ */ + +#ifndef tuscany_opencl_eval_hpp +#define tuscany_opencl_eval_hpp + +/** + * OpenCL kernel evaluation logic. + */ +#ifdef IS_DARWIN +#include +#else +#include +#endif + +#include "list.hpp" +#include "value.hpp" +#include "perf.hpp" + +namespace tuscany { +namespace opencl { + +/** + * Convert an OpenCL error code to a string. + */ +const string clError(const cl_int e) { + ostringstream s; + s << "error " << e; + return str(s); +} + +#ifdef WANT_MAINTAINER_MODE + +/** + * OpenCL profiling counters. + */ +cl_ulong memtime = 0; +cl_ulong kernelqtime = 0; +cl_ulong kerneltime = 0; +cl_ulong preptime = 0; +cl_ulong evaltime = 0; + +/** + * Reset the OpenCL profiling counters. + */ +bool resetOpenCLCounters() { + memtime = kernelqtime = kerneltime = preptime = evaltime = 0; + return true; +} + +/** + * Print the OpenCL profiling counters. + */ +bool printOpenCLCounters(const long n) { + cout << " kernelq " << ((double)kernelqtime / 1000000.0) / (double)n << " ms kernel " << ((double)kerneltime / 1000000.0) / (double)n << " ms memory " << ((double)memtime / 1000000.0) / (double)n << " ms prep " << ((double)preptime / 1000000.0) / (double)n << " ms eval " << ((double)evaltime / 1000000.0) / (double)n << " ms"; + return true; +} + +/** + * Profile a memory event. + */ +failable profileMemEvent(const cl_event evt) { + cl_ulong start = 0; + const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); + if (serr != CL_SUCCESS) + return mkfailure("Couldn't profile memory event start: " + clError(serr)); + cl_ulong end = 0; + const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); + if (eerr != CL_SUCCESS) + return mkfailure("Couldn't profile memory event end: " + clError(eerr)); + const cl_ulong t = end - start; + memtime += t; + return t; +} + +/** + * Profile a kernel event. + */ +failable profileKernelEvent(const cl_event evt) { + cl_ulong queued = 0; + const cl_int qerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, NULL); + if (qerr != CL_SUCCESS) + return mkfailure("Couldn't profile kernel event queue: " + clError(qerr)); + cl_ulong start = 0; + const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); + if (serr != CL_SUCCESS) + return mkfailure("Couldn't profile kernel event start: " + clError(serr)); + cl_ulong end = 0; + const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); + if (eerr != CL_SUCCESS) + return mkfailure("Couldn't profile kernel event end: " + clError(eerr)); + const cl_ulong q = start - queued; + kernelqtime += q; + const cl_ulong t = end - start; + kerneltime += t; + return t; +} + +/** + * Profile an array of memory events. + */ +failable profileMemEvents(const cl_uint n, const cl_event* evt) { + if (n == 0) + return 0; + const failable t = profileMemEvent(*evt); + if (!hasContent(t)) + return t; + const failable r = profileMemEvents(n - 1, evt + 1); + if (!hasContent(r)) + return r; + return content(t) + content(r); +} + +#else + +#define resetOpenCLCounters() +#define printOpenCLCounters(n) + +#endif + +class OpenCLContext; +class OpenCLProgram; +class OpenCLKernel; +class OpenCLBuffer; + +/** + * Represent an OpenCL context. + */ +class OpenCLContext { +public: +#define OPENCL_MAX_DEVICES 64 + + enum DeviceType { + DEFAULT = 0, CPU = 1, GPU = 2 + }; + + OpenCLContext(const OpenCLContext::DeviceType devtype) : dev(OpenCLContext::DEFAULT), ndevs(0), ctx(0) { + debug("opencl::OpenCLContext"); + for (int i = 0; i < OPENCL_MAX_DEVICES; i++) + cq[i] = 0; + + // Get the available platforms + cl_uint nplatforms; + cl_platform_id platforms[16]; + const cl_int gperr = clGetPlatformIDs(16, platforms, &nplatforms); + if(nplatforms == 0 || gperr != CL_SUCCESS) { + mkfailure("Couldn't get OpenCL platforms: " + clError(gperr)); + return; + } + for(cl_uint i = 0; i < nplatforms; ++i) { + char vendor[256]; + const cl_int gverr = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL); + if(gverr != CL_SUCCESS) { + mkfailure("Couldn't get OpenCL platform: " + clError(gverr)); + return; + } + debug(vendor, "opencl::OpenCLContext::vendor"); + } + + // Get the available devices of the requested type + if (devtype == OpenCLContext::DEFAULT || devtype == OpenCLContext::GPU) { + for(cl_uint i = 0; i < nplatforms; ++i) { + const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, OPENCL_MAX_DEVICES, devid, &ndevs); + if (err == CL_SUCCESS) { + debug(ndevs, "opencl::OpenCLContext::gcpus"); + dev = OpenCLContext::GPU; + break; + } + } + } + if ((devtype == OpenCLContext::DEFAULT && ndevs == 0) || devtype == OpenCLContext::CPU) { + for(cl_uint i = 0; i < nplatforms; ++i) { + const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, OPENCL_MAX_DEVICES, devid, &ndevs); + if (err == CL_SUCCESS) { + debug(ndevs, "opencl::OpenCLContext::ncpus"); + dev = OpenCLContext::CPU; + break; + } + } + } + if (ndevs == 0) + return; + + // Initialize OpenCL context and command queues + cl_int ccerr; + ctx = clCreateContext(0, ndevs, devid, NULL, NULL, &ccerr); + if(!ctx || ccerr != CL_SUCCESS) { + mkfailure("Couldn't create OpenCL context: " + clError(ccerr)); + return; + } + + for (cl_uint i = 0; i < ndevs; i++) { + cl_int cqerr; +#ifdef WANT_MAINTAINER_MODE + cq[i] = clCreateCommandQueue(ctx, devid[i], CL_QUEUE_PROFILING_ENABLE, &cqerr); +#else + cq[i] = clCreateCommandQueue(ctx, devid[i], 0, &cqerr); +#endif + if (!cq[i] || cqerr != CL_SUCCESS) { + mkfailure("Couldn't create OpenCL command queue: " + clError(cqerr)); + return; + } + } + } + + ~OpenCLContext() { + debug("opencl::~OpenCLContext"); + for (cl_uint i = 0; i < ndevs; i++) { + if (cq[i] != 0) + clReleaseCommandQueue(cq[i]); + } + if (ctx != 0) + clReleaseContext(ctx); + } + +private: + OpenCLContext::DeviceType dev; + cl_uint ndevs; + cl_device_id devid[OPENCL_MAX_DEVICES]; + cl_context ctx; + cl_command_queue cq[OPENCL_MAX_DEVICES]; + + friend const cl_uint devices(const OpenCLContext& cl); + friend const cl_command_queue commandq(const OpenCLContext& cl); + friend const failable readOnlyBuffer(const size_t size, const void* p, const OpenCLContext& cl, cl_command_queue cq); + friend const failable writeOnlyBuffer(const size_t size, const OpenCLContext& cl); + friend const failable evalKernel(const failable& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl); + friend const failable readProgram(const string& path, istream& is, const OpenCLContext& cl); +}; + +/** + * Return the number of computing devices available in a context. + */ +const cl_uint devices(const OpenCLContext& cl) { + return cl.ndevs; +} + +/** + * Return a command queue from a context. + */ +const cl_command_queue commandq(const OpenCLContext& cl) { + return cl.cq[0]; +} + +/** + * Represents an OpenCL program. + */ +class OpenCLProgram { +public: + OpenCLProgram() : prog(0) { + } + + OpenCLProgram(const cl_program prog) : prog(prog) { + } + + OpenCLProgram(const OpenCLProgram& c) : prog(c.prog) { + const cl_int rperr = clRetainProgram(prog); + if (rperr != CL_SUCCESS) + mkfailure(string("Couldn't retain OpenCL program: ") + clError(rperr)); + } + + ~OpenCLProgram() { + if (!prog) + return; + const cl_int rperr = clReleaseProgram(prog); + if (rperr != CL_SUCCESS) + mkfailure(string("Couldn't release OpenCL program: ") + clError(rperr)); + } + +private: + const cl_program prog; + + friend const failable createKernel(const value& expr, const OpenCLProgram& clprog); + friend const failable evalKernel(const failable& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl); +}; + +/** + * Represents an OpenCL kernel. + */ +class OpenCLKernel { +public: + OpenCLKernel() : k(0) { + } + + OpenCLKernel(const cl_kernel k) : k(k) { + } + + OpenCLKernel(const OpenCLKernel& c) : k(c.k) { + const cl_int rkerr = clRetainKernel(k); + if (rkerr != CL_SUCCESS) + mkfailure(string("Couldn't retain OpenCL kernel: ") + clError(rkerr)); + } + + ~OpenCLKernel() { + if (!k) + return; + const cl_int rkerr = clReleaseKernel(k); + if (rkerr != CL_SUCCESS) + mkfailure(string("Couldn't release OpenCL kernel: ") + clError(rkerr)); + } + +private: + const cl_kernel k; + + friend const failable valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable& buf, const OpenCLKernel& kernel); + friend const failable evalKernel(const failable& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl); +}; + +/** + * Represents an OpenCL buffer. + */ +class OpenCLBuffer { +public: + OpenCLBuffer() : mem(0), evt(0) { + } + + OpenCLBuffer(const cl_mem mem, const cl_event evt) : mem(mem), evt(evt) { + } + + OpenCLBuffer(const OpenCLBuffer& c) : mem(c.mem), evt(c.evt) { + if (mem != 0) { + const cl_int rmerr = clRetainMemObject(mem); + if (rmerr != CL_SUCCESS) + mkfailure(string("Couldn't retain OpenCL buffer: ") + clError(rmerr)); + } + if (evt != 0) { + const cl_int reerr = clRetainEvent(evt); + if (reerr != CL_SUCCESS) + mkfailure(string("Couldn't retain OpenCL event: ") + clError(reerr)); + } + } + + ~OpenCLBuffer() { + if (mem != 0) { + const cl_int rmerr = clReleaseMemObject(mem); + if (rmerr != CL_SUCCESS) + mkfailure(string("Couldn't release OpenCL buffer: ") + clError(rmerr)); + } + if (evt != 0) { + const cl_int reerr = clReleaseEvent(evt); + if (reerr != CL_SUCCESS) + mkfailure(string("Couldn't release OpenCL event: ") + clError(reerr)); + } + } + +private: + const cl_mem mem; + const cl_event evt; + + friend const cl_uint writeBufferEvents(const list& buf, cl_event* evt); + friend const failable valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable& buf, const OpenCLKernel& kernel); + friend const failable evalKernel(const failable& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl); +}; + +/** + * Return a read-only memory buffer. + */ +const failable readOnlyBuffer(const size_t size, const void* p, const OpenCLContext& cl, const cl_command_queue cq) { + if (cl.dev == OpenCLContext::CPU) { + cl_int err; + const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size, const_cast(p), &err); + if (!buf || err != CL_SUCCESS) + return mkfailure(string("Couldn't map OpenCL host memory: ") + clError(err)); + return OpenCLBuffer(buf, 0); + } + cl_int berr; + const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY, size, NULL, &berr); + if (!buf || berr != CL_SUCCESS) + return mkfailure(string("Couldn't allocate OpenCL device memory: ") + clError(berr)); + cl_event wevt; + const cl_int werr = clEnqueueWriteBuffer(cq, buf, CL_FALSE, 0, size, p, 0, NULL, &wevt); + if (werr != CL_SUCCESS) { + clReleaseMemObject(buf); + return mkfailure(string("Couldn't enqueue write to device memory: ") + clError(werr)); + } + return OpenCLBuffer(buf, wevt); +} + +/** + * Fill an array of write events for a given list of buffers. + */ +const cl_uint writeBufferEvents(const list& buf, cl_event* evt) { + if (isNil(buf)) + return 0; + const cl_event e = car(buf).evt; + if (e == 0) + return writeBufferEvents(cdr(buf), evt); + *evt = e; + return 1 + writeBufferEvents(cdr(buf), evt + 1); +} + +/** + * Return a write-only memory buffer. + */ +const failable writeOnlyBuffer(const size_t size, const OpenCLContext& cl) { + if (cl.dev == OpenCLContext::CPU) { + cl_int err; + const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &err); + if (!buf || err != CL_SUCCESS) + return mkfailure(string("Couldn't map OpenCL host memory: ") + clError(err)); + return OpenCLBuffer(buf, 0); + } + cl_int err; + const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY, size, NULL, &err); + if (!buf || err != CL_SUCCESS) + return mkfailure(string("Couldn't allocate OpenCL device memory: ") + clError(err)); + return OpenCLBuffer(buf, 0); +} + +/** + * Convert a value to a kernel arg. + */ +const failable valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable& buf, const OpenCLKernel& kernel) { + if (!hasContent(buf)) + return buf; + if (p != NULL) { + const cl_int err = clSetKernelArg(kernel.k, (cl_uint)i, size, p); + if (err != CL_SUCCESS) + return mkfailure(string("Couldn't set OpenCL simple kernel arg: ") + clError(err)); + return buf; + } + const OpenCLBuffer b = content(buf); + const cl_int err = clSetKernelArg(kernel.k, i, sizeof(cl_mem), &b.mem); + if (err != CL_SUCCESS) + return mkfailure(string("Couldn't set OpenCL buffer kernel arg: ") + clError(err)); + return buf; +} + +const failable valueToKernelArg(const value& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) { + switch (type(v)) { + case value::Symbol: { + const string s = string("'") + v; + return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel); + } + case value::String: { + const string s = (string)v; + return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel); + } + case value::Number: { + const cl_float d = (cl_float)((double)v); + return valueToKernelArg(i, sizeof(cl_float), &d, OpenCLBuffer(), kernel); + } + case value::Bool: { + const cl_int b = (cl_int)((bool)v); + return valueToKernelArg(i, sizeof(cl_int), &b, OpenCLBuffer(), kernel); + } + default: { + return valueToKernelArg(i, sizeof(cl_mem), NULL, readOnlyBuffer(sizeof(value), &v, cl, cq), kernel); + } + } +} + +/** + * Convert a list of values to kernel args. + */ +const failable> valuesToKernelArgsListHelper(const list& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) { + if (isNil(v)) + return list(); + const failable a = valueToKernelArg(car(v), i, kernel, cl, cq); + if (!hasContent(a)) + return mkfailure>(reason(a)); + const failable> al = valuesToKernelArgsListHelper(cdr(v), i + 1, kernel, cl, cq); + if (!hasContent(al)) + return al; + return cons(content(a), content(al)); +} + +const failable> valuesToKernelArgs(const list& v, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) { + return valuesToKernelArgsListHelper(v, 0, kernel, cl, cq); +} + +/** + * Convert a kernel result to a value. + */ +const value kernelResultToValue(const void* p, const value::ValueType type) { + switch(type) { + case value::Symbol: { + const char* s = static_cast(p); + const size_t l = strlen(s); + if (l != 0 && *s == '\'') + return value(s + 1); + return value(s); + } + case value::String: { + const char* s = static_cast(p); + const size_t l = strlen(s); + if (l != 0 && *s == '\'') + return value(s + 1); + return value(string(s, l)); + } + case value::Number: + return (double)(*(static_cast(p))); + case value::Bool: + return (bool)(*(static_cast(p))); + default: + return *(static_cast(p)); + } +} + +/** + * Return the value type corresponding to a C99 type name. + */ +const value::ValueType valueType(const string& t) { + if (t == "float") + return value::Number; + if (t == "int") + return value::Bool; + if (t == "char") + return value::String; + return value::Nil; +} + +/** + * Return the size of a C99 type corresponding to a value type. + */ +const size_t valueSize(const value::ValueType type) { + switch(type) { + case value::Number: + return sizeof(cl_float); + case value::Bool: + return sizeof(cl_int); + case value::Symbol: + return sizeof(cl_char); + case value::String: + return sizeof(cl_char); + default: + return sizeof(value); + } +} + +/** + * Return the result type of a kernel. + */ +class OpenCLResultType { +public: + OpenCLResultType(const value::ValueType type, const size_t n, const size_t size) : type(type), n(n), size(size) {} + const value::ValueType type; + const size_t n; + const size_t size; +}; + +const OpenCLResultType kernelResultType(const string& fn, value::ValueType type, const size_t n) { + if (type != value::Nil) + return OpenCLResultType(type, n, valueSize(type)); + const string s = car(tokenize("_", fn)); + const size_t d = find_first_of(s, "0123456789"); + if (d == length(s)) { + const value::ValueType vt = valueType(s); + return OpenCLResultType(vt, 1, valueSize(vt)); + } + const value::ValueType vt = valueType(substr(s, 0, d)); + return OpenCLResultType(vt, atoi(c_str(substr(s, d))), valueSize(vt)); +} + +/** + * Create the kernel implementing an expression. + */ +const failable createKernel(const value& expr, const OpenCLProgram& clprog) { + + // Create an OpenCL kernel for the requested function + const value fn = car(expr); + cl_int ckerr; + const cl_kernel k = clCreateKernel(clprog.prog, c_str(fn), &ckerr); + if (k == NULL || ckerr != CL_SUCCESS) { + + // The start, stop, and restart functions are optional + //if (fn == "start" || fn == "stop") + //return value(lambda&)>()); + + return mkfailure(string("Couldn't find function: ") + car(expr) + " : " + clError(ckerr)); + } + return OpenCLKernel(k); +} + +/** + * Evaluate an expression implemented by a kernel. + */ +const failable evalKernel(const failable& fkernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl) { + +#ifdef WANT_MAINTAINER_MODE + const cl_uint estart = (cl_uint)timens(); + const cl_uint pstart = estart; +#endif + + if (!hasContent(fkernel)) + return mkfailure(reason(fkernel)); + const OpenCLKernel kernel = content(fkernel); + + // Get a command queue for the specified device type + const cl_command_queue cq = commandq(cl); + + // Set the kernel input args + const failable> args = valuesToKernelArgs(cdr(expr), kernel, cl, cq); + if (!hasContent(args)) { + return mkfailure(reason(args)); + } + + // Allocate result buffer in device memory + const value fn = car(expr); + const OpenCLResultType rtype = kernelResultType(fn, type, n); + const size_t rsize = rtype.n * rtype.size; + const failable rbuf = writeOnlyBuffer(rsize, cl); + if (!hasContent(rbuf)) + return mkfailure(reason(rbuf)); + + // Set it as a kernel output arg + const cl_mem rmem = content(rbuf).mem; + const failable rarg = valueToKernelArg((cl_uint)length(cdr(expr)), sizeof(cl_mem), &rmem, rbuf, kernel); + if (!hasContent(rarg)) + return mkfailure(reason(rarg)); + + // Enqueue the kernel, to be executed after all the writes complete + cl_event wevt[32]; + const cl_uint nwevt = writeBufferEvents(content(args), wevt); + cl_event kevt; + const cl_int qerr = clEnqueueNDRangeKernel(cq, kernel.k, 1, NULL, &gwsize, NULL, nwevt, nwevt != 0? wevt : NULL, &kevt); + if (qerr != CL_SUCCESS) + return mkfailure(string("Couldn't enqueue kernel task: ") + clError(qerr)); + + // Enqueue result buffer read, to be executed after the kernel completes + char res[rsize]; + cl_event revt; + const cl_int rerr = clEnqueueReadBuffer(cq, rmem, CL_FALSE, 0, rsize, res, 1, &kevt, &revt); + if (rerr != CL_SUCCESS) { + clReleaseEvent(kevt); + return mkfailure(string("Couldn't read from OpenCL device memory: ") + clError(rerr)); + } + +#ifdef WANT_MAINTAINER_MODE + const cl_uint pend = (cl_uint)timens(); + preptime += (pend - pstart); +#endif + + // Wait for completion + const cl_int werr = clWaitForEvents(1, &revt); + if (werr != CL_SUCCESS) { + clReleaseEvent(revt); + clReleaseEvent(kevt); + return mkfailure(string("Couldn't wait for kernel completion: ") + clError(werr)); + } + +#ifdef WANT_MAINTAINER_MODE + profileMemEvents(nwevt, wevt); + profileKernelEvent(kevt); + profileMemEvent(revt); +#endif + + // Convert the result to a value + const value v = kernelResultToValue(res, rtype.type); + + // Release OpenCL resources + clReleaseEvent(revt); + clReleaseEvent(kevt); + +#ifdef WANT_MAINTAINER_MODE + const cl_uint eend = (cl_uint)timens(); + evaltime += (eend - estart); +#endif + + return v; +} + +const failable evalKernel(const failable& kernel, const value& expr, const OpenCLContext& cl) { + return evalKernel(kernel, expr, 1, value::Nil, 0, cl); +} + +/** + * Read an opencl program from an input stream. + */ +const failable readProgram(const string& path, istream& is, const OpenCLContext& cl) { + + // Read the program source + const list ls = streamList(is); + ostringstream os; + write(ls, os); + const char* cs = c_str(str(os)); + + // Create the OpenCL program + cl_int cperr; + const cl_program prog = clCreateProgramWithSource(cl.ctx, 1, (const char **)&cs, NULL, &cperr); + if (!prog || cperr != CL_SUCCESS) + return mkfailure(string("Couldn't create OpenCL program from source: ") + path + " : " + clError(cperr)); + + // Built it + const cl_int bperr = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); + if(bperr != CL_SUCCESS) { + size_t l; + char b[2048]; + clGetProgramBuildInfo(prog, cl.devid[0], CL_PROGRAM_BUILD_LOG, sizeof(b), b, &l); + return mkfailure(string("Couldn't build OpenCL program: ") + path + " : " + clError(bperr) + "\n" + string(b)); + } + return OpenCLProgram(prog); +} + +/** + * Evaluate an expression against an OpenCL program provided as an input stream. + */ +const failable evalKernel(const value& expr, istream& is, const OpenCLContext& cl) { + failable clprog = readProgram("program.cl", is, cl); + if (!hasContent(clprog)) + return mkfailure(reason(clprog)); + return evalKernel(createKernel(expr, content(clprog)), expr, 1, value::Nil, 0, cl); +} + +} +} +#endif /* tuscany_opencl_eval_hpp */ diff --git a/sca-cpp/trunk/modules/opencl/opencl-conf b/sca-cpp/trunk/modules/opencl/opencl-conf new file mode 100755 index 0000000000..1ba2c336a3 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/opencl-conf @@ -0,0 +1,37 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Generate an OpenCL server conf +here=`echo "import os; print os.path.realpath('$0')" | python`; here=`dirname $here` +mkdir -p $1 +root=`echo "import os; print os.path.realpath('$1')" | python` + +uname=`uname -s` +if [ $uname = "Darwin" ]; then + libsuffix=".dylib" +else + libsuffix=".so" +fi + +cat >>$root/conf/modules.conf < +#include "gc.hpp" +#include "stream.hpp" +#include "string.hpp" +#include "driver.hpp" + +int main(const int argc, char** argv) { + tuscany::gc_scoped_pool pool; + if (argc != 2) { + tuscany::cerr << "Usage: opencl-shell " << tuscany::endl; + return 1; + } + tuscany::opencl::evalDriverRun(argv[1], tuscany::cin, tuscany::cout); + return 0; +} diff --git a/sca-cpp/trunk/modules/opencl/opencl-test.cpp b/sca-cpp/trunk/modules/opencl/opencl-test.cpp new file mode 100644 index 0000000000..17bc5ccfa6 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/opencl-test.cpp @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* $Rev$ $Date$ */ + +/** + * Test OpenCL kernel evaluator. + */ + +#include +#include "stream.hpp" +#include "string.hpp" +#include "driver.hpp" +#include "parallel.hpp" +#include "perf.hpp" + +namespace tuscany { +namespace opencl { + +const string testFloatsCPU = + "kernel void add(const float x, const float y, global float* r) {\n" + " float l_x;\n" + " float l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " *r = l_x + l_y;\n" + "}\n" + "kernel void float_add(const float x, const float y, global float* r) {\n" + " add(x, y, r);\n" + "}\n"; + +const string testFloatsGPU = + "kernel void add(const float x, const float y, global float* r) {\n" + " local float l_x;\n" + " local float l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " *r = l_x + l_y;\n" + "}\n" + "kernel void float_add(const float x, const float y, global float* r) {\n" + " add(x, y, r);\n" + "}\n"; + +const string testBoolsCPU = + "kernel void int_or(const int x, const int y, global int* r) {\n" + " int l_x;\n" + " int l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " *r = l_x || l_y;\n" + "}\n"; + +const string testBoolsGPU = + "kernel void int_or(const int x, const int y, global int* r) {\n" + " local int l_x;\n" + " local int l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " *r = l_x || l_y;\n" + "}\n"; + +const string testCharsCPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " for (int i = 0; i < ixl; i++)\n" + " r[i] = x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " r[ixl + i] = y[i];\n" + " r[ixl + iyl] = '\\0';\n" + "}\n" + "kernel void char128_add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsCopyGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " local char l_x[128];\n" + " local char l_y[128];\n" + " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" + " async_work_group_copy(l_y, y, iyl, re);\n" + " wait_group_events(1, &re);\n" + " local char l_r[128];\n" + " for (int i = 0; i < ixl; i++)\n" + " l_r[i] = l_x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " l_r[ixl + i] = l_y[i];\n" + " l_r[ixl + iyl] = '\\0';\n" + " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" + " wait_group_events(1, &we);\n" + "}\n" + "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " for (int i = 0; i < ixl; i++)\n" + " r[i] = x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " r[ixl + i] = y[i];\n" + " r[ixl + iyl] = '\\0';\n" + "}\n" + "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsParallelCPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" + "}\n"; + +const string testCharsParallelCopyGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " local char l_x[128];\n" + " local char l_y[128];\n" + " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" + " async_work_group_copy(l_y, y, iyl, re);\n" + " wait_group_events(1, &re);\n" + " local char l_r[128];\n" + " l_r[i] = i < ixl? l_x[i] : i < ixl + iyl? l_y[i - ixl] : '\\0';\n" + " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" + " wait_group_events(1, &we);\n" + "}\n"; + +const string testCharsParallelGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" + "}\n"; + +bool testTaskParallel(const OpenCLContext::DeviceType dev) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + { + istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + const value exp = mklist("float_add", 2, 3); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(5)); + } + if (true) return true; + { + istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + const value exp = mklist("add", 2, 3); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::Number, 1, cl); + assert(hasContent(r)); + assert(content(r) == value(5)); + } + { + istringstream is(dev == OpenCLContext::CPU? testBoolsCPU : testBoolsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("int_or", true, false); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(true)); + + const value exp2 = mklist("int_or", false, false); + const failable r2 = evalKernel(createKernel(exp2, content(clprog)), exp2, cl); + assert(hasContent(r2)); + assert(content(r2) == value(false)); + } + { + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("char128", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + } + { + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + } + + return true; +} + +struct evalTaskParallelLoop { + evalTaskParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { + } + const bool operator()() const { + const failable r = evalKernel(createKernel(exp, clprog), exp, 1, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + return true; + } + + const OpenCLContext& cl; + const OpenCLProgram& clprog; + const value exp; +}; + +const bool testTaskParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : copy? testCharsCopyGPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + resetOpenCLCounters(); + const lambda el = evalTaskParallelLoop(cl, content(clprog)); + cout << "OpenCL task-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; + printOpenCLCounters(500); + cout << endl; + return true; +} + +bool testDataParallel(const OpenCLContext::DeviceType dev) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + { + istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : testCharsParallelGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("add", 6, string("Hello "), 5, string("World")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 121, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello World"))); + } + + return true; +} + +struct evalDataParallelLoop { + evalDataParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { + } + const bool operator()() const { + const failable r = evalKernel(createKernel(exp, clprog), exp, 121, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + return true; + } + + const OpenCLContext& cl; + const OpenCLProgram& clprog; + const value exp; +}; + +const bool testDataParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : copy? testCharsParallelCopyGPU : testCharsParallelGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + resetOpenCLCounters(); + const lambda el = evalDataParallelLoop(cl, content(clprog)); + cout << "OpenCL data-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; + printOpenCLCounters(500); + cout << endl; + return true; +} + +} +} + +int main() { + tuscany::cout << "Testing..." << tuscany::endl; + + tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::CPU); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); + tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::CPU); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); + + tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::GPU); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); + tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::GPU); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); + + tuscany::cout << "OK" << tuscany::endl; + return 0; +} diff --git a/sca-cpp/trunk/modules/opencl/server-test b/sca-cpp/trunk/modules/opencl/server-test new file mode 100755 index 0000000000..e623599b39 --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/server-test @@ -0,0 +1,39 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Setup +../http/httpd-conf tmp localhost 8090 ../server/htdocs +../server/server-conf tmp +./opencl-conf tmp +cat >>tmp/conf/httpd.conf </dev/null +rc=$? + +# Cleanup +../http/httpd-stop tmp +sleep 2 +exit $rc diff --git a/sca-cpp/trunk/modules/opencl/server-test.cl b/sca-cpp/trunk/modules/opencl/server-test.cl new file mode 100644 index 0000000000..de5c2d1b1e --- /dev/null +++ b/sca-cpp/trunk/modules/opencl/server-test.cl @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + -- cgit v1.2.3