summaryrefslogtreecommitdiffstats
path: root/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp')
-rw-r--r--sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp332
1 files changed, 332 insertions, 0 deletions
diff --git a/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp b/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp
new file mode 100644
index 0000000000..17bc5ccfa6
--- /dev/null
+++ b/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+/**
+ * Test OpenCL kernel evaluator.
+ */
+
+#include <assert.h>
+#include "stream.hpp"
+#include "string.hpp"
+#include "driver.hpp"
+#include "parallel.hpp"
+#include "perf.hpp"
+
+namespace tuscany {
+namespace opencl {
+
+const string testFloatsCPU =
+ "kernel void add(const float x, const float y, global float* r) {\n"
+ " float l_x;\n"
+ " float l_y;\n"
+ " l_x = x;\n"
+ " l_y = y;\n"
+ " *r = l_x + l_y;\n"
+ "}\n"
+ "kernel void float_add(const float x, const float y, global float* r) {\n"
+ " add(x, y, r);\n"
+ "}\n";
+
+const string testFloatsGPU =
+ "kernel void add(const float x, const float y, global float* r) {\n"
+ " local float l_x;\n"
+ " local float l_y;\n"
+ " l_x = x;\n"
+ " l_y = y;\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n"
+ " *r = l_x + l_y;\n"
+ "}\n"
+ "kernel void float_add(const float x, const float y, global float* r) {\n"
+ " add(x, y, r);\n"
+ "}\n";
+
+const string testBoolsCPU =
+ "kernel void int_or(const int x, const int y, global int* r) {\n"
+ " int l_x;\n"
+ " int l_y;\n"
+ " l_x = x;\n"
+ " l_y = y;\n"
+ " *r = l_x || l_y;\n"
+ "}\n";
+
+const string testBoolsGPU =
+ "kernel void int_or(const int x, const int y, global int* r) {\n"
+ " local int l_x;\n"
+ " local int l_y;\n"
+ " l_x = x;\n"
+ " l_y = y;\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n"
+ " *r = l_x || l_y;\n"
+ "}\n";
+
+const string testCharsCPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " for (int i = 0; i < ixl; i++)\n"
+ " r[i] = x[i];\n"
+ " for (int i = 0; i < iyl; i++)\n"
+ " r[ixl + i] = y[i];\n"
+ " r[ixl + iyl] = '\\0';\n"
+ "}\n"
+ "kernel void char128_add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " add(xl, x, yl, y, r);\n"
+ "}\n";
+
+const string testCharsCopyGPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " local char l_x[128];\n"
+ " local char l_y[128];\n"
+ " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n"
+ " async_work_group_copy(l_y, y, iyl, re);\n"
+ " wait_group_events(1, &re);\n"
+ " local char l_r[128];\n"
+ " for (int i = 0; i < ixl; i++)\n"
+ " l_r[i] = l_x[i];\n"
+ " for (int i = 0; i < iyl; i++)\n"
+ " l_r[ixl + i] = l_y[i];\n"
+ " l_r[ixl + iyl] = '\\0';\n"
+ " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n"
+ " wait_group_events(1, &we);\n"
+ "}\n"
+ "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " add(xl, x, yl, y, r);\n"
+ "}\n";
+
+const string testCharsGPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " for (int i = 0; i < ixl; i++)\n"
+ " r[i] = x[i];\n"
+ " for (int i = 0; i < iyl; i++)\n"
+ " r[ixl + i] = y[i];\n"
+ " r[ixl + iyl] = '\\0';\n"
+ "}\n"
+ "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " add(xl, x, yl, y, r);\n"
+ "}\n";
+
+const string testCharsParallelCPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int i = get_global_id(0);\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n"
+ "}\n";
+
+const string testCharsParallelCopyGPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int i = get_global_id(0);\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " local char l_x[128];\n"
+ " local char l_y[128];\n"
+ " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n"
+ " async_work_group_copy(l_y, y, iyl, re);\n"
+ " wait_group_events(1, &re);\n"
+ " local char l_r[128];\n"
+ " l_r[i] = i < ixl? l_x[i] : i < ixl + iyl? l_y[i - ixl] : '\\0';\n"
+ " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n"
+ " wait_group_events(1, &we);\n"
+ "}\n";
+
+const string testCharsParallelGPU =
+ "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+ " const int i = get_global_id(0);\n"
+ " const int ixl = (int)xl;\n"
+ " const int iyl = (int)yl;\n"
+ " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n"
+ "}\n";
+
+bool testTaskParallel(const OpenCLContext::DeviceType dev) {
+ gc_scoped_pool pool;
+ OpenCLContext cl(dev);
+ if (!devices(cl) != 0)
+ return true;
+
+ {
+ istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+ const value exp = mklist<value>("float_add", 2, 3);
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(5));
+ }
+ if (true) return true;
+ {
+ istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+ const value exp = mklist<value>("add", 2, 3);
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::Number, 1, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(5));
+ }
+ {
+ istringstream is(dev == OpenCLContext::CPU? testBoolsCPU : testBoolsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ const value exp = mklist<value>("int_or", true, false);
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(true));
+
+ const value exp2 = mklist<value>("int_or", false, false);
+ const failable<value> r2 = evalKernel(createKernel(exp2, content(clprog)), exp2, cl);
+ assert(hasContent(r2));
+ assert(content(r2) == value(false));
+ }
+ {
+ istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ const value exp = mklist<value>("char128", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "));
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+ }
+ {
+ istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ const value exp = mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "));
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::String, 128, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+ }
+
+ return true;
+}
+
+struct evalTaskParallelLoop {
+ evalTaskParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) {
+ }
+ const bool operator()() const {
+ const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 1, value::String, 128, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+ return true;
+ }
+
+ const OpenCLContext& cl;
+ const OpenCLProgram& clprog;
+ const value exp;
+};
+
+const bool testTaskParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) {
+ gc_scoped_pool pool;
+ OpenCLContext cl(dev);
+ if (!devices(cl) != 0)
+ return true;
+
+ istringstream is(dev == OpenCLContext::CPU? testCharsCPU : copy? testCharsCopyGPU : testCharsGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ resetOpenCLCounters();
+ const lambda<bool()> el = evalTaskParallelLoop(cl, content(clprog));
+ cout << "OpenCL task-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms";
+ printOpenCLCounters(500);
+ cout << endl;
+ return true;
+}
+
+bool testDataParallel(const OpenCLContext::DeviceType dev) {
+ gc_scoped_pool pool;
+ OpenCLContext cl(dev);
+ if (!devices(cl) != 0)
+ return true;
+
+ {
+ istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : testCharsParallelGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ const value exp = mklist<value>("add", 6, string("Hello "), 5, string("World"));
+ const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 121, value::String, 128, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(string("Hello World")));
+ }
+
+ return true;
+}
+
+struct evalDataParallelLoop {
+ evalDataParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) {
+ }
+ const bool operator()() const {
+ const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 121, value::String, 128, cl);
+ assert(hasContent(r));
+ assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+ return true;
+ }
+
+ const OpenCLContext& cl;
+ const OpenCLProgram& clprog;
+ const value exp;
+};
+
+const bool testDataParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) {
+ gc_scoped_pool pool;
+ OpenCLContext cl(dev);
+ if (!devices(cl) != 0)
+ return true;
+
+ istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : copy? testCharsParallelCopyGPU : testCharsParallelGPU);
+ failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+ assert(hasContent(clprog));
+
+ resetOpenCLCounters();
+ const lambda<bool()> el = evalDataParallelLoop(cl, content(clprog));
+ cout << "OpenCL data-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms";
+ printOpenCLCounters(500);
+ cout << endl;
+ return true;
+}
+
+}
+}
+
+int main() {
+ tuscany::cout << "Testing..." << tuscany::endl;
+
+ tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::CPU);
+ tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::CPU, false);
+ tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::CPU);
+ tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::CPU, false);
+
+ tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::GPU);
+ tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, false);
+ tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, true);
+ tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::GPU);
+ tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, false);
+ tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, true);
+
+ tuscany::cout << "OK" << tuscany::endl;
+ return 0;
+}