From c9bfccc35345ce58fb5774d4b0b6a9868b262c0a Mon Sep 17 00:00:00 2001 From: giorgio Date: Wed, 5 Sep 2012 08:31:30 +0000 Subject: git-svn-id: http://svn.us.apache.org/repos/asf/tuscany@1381061 13f79535-47bb-0310-9956-ffa450edef68 --- .../lightweight-sca/modules/opencl/opencl-test.cpp | 332 +++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp (limited to 'sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp') diff --git a/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp b/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp new file mode 100644 index 0000000000..17bc5ccfa6 --- /dev/null +++ b/sca-cpp/branches/lightweight-sca/modules/opencl/opencl-test.cpp @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* $Rev$ $Date$ */ + +/** + * Test OpenCL kernel evaluator. + */ + +#include +#include "stream.hpp" +#include "string.hpp" +#include "driver.hpp" +#include "parallel.hpp" +#include "perf.hpp" + +namespace tuscany { +namespace opencl { + +const string testFloatsCPU = + "kernel void add(const float x, const float y, global float* r) {\n" + " float l_x;\n" + " float l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " *r = l_x + l_y;\n" + "}\n" + "kernel void float_add(const float x, const float y, global float* r) {\n" + " add(x, y, r);\n" + "}\n"; + +const string testFloatsGPU = + "kernel void add(const float x, const float y, global float* r) {\n" + " local float l_x;\n" + " local float l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " *r = l_x + l_y;\n" + "}\n" + "kernel void float_add(const float x, const float y, global float* r) {\n" + " add(x, y, r);\n" + "}\n"; + +const string testBoolsCPU = + "kernel void int_or(const int x, const int y, global int* r) {\n" + " int l_x;\n" + " int l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " *r = l_x || l_y;\n" + "}\n"; + +const string testBoolsGPU = + "kernel void int_or(const int x, const int y, global int* r) {\n" + " local int l_x;\n" + " local int l_y;\n" + " l_x = x;\n" + " l_y = y;\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " *r = l_x || l_y;\n" + "}\n"; + +const string testCharsCPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " for (int i = 0; i < ixl; i++)\n" + " r[i] = x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " r[ixl + i] = y[i];\n" + " r[ixl + iyl] = '\\0';\n" + "}\n" + "kernel void char128_add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsCopyGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " local char l_x[128];\n" + " local char l_y[128];\n" + " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" + " async_work_group_copy(l_y, y, iyl, re);\n" + " wait_group_events(1, &re);\n" + " local char l_r[128];\n" + " for (int i = 0; i < ixl; i++)\n" + " l_r[i] = l_x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " l_r[ixl + i] = l_y[i];\n" + " l_r[ixl + iyl] = '\\0';\n" + " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" + " wait_group_events(1, &we);\n" + "}\n" + "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " for (int i = 0; i < ixl; i++)\n" + " r[i] = x[i];\n" + " for (int i = 0; i < iyl; i++)\n" + " r[ixl + i] = y[i];\n" + " r[ixl + iyl] = '\\0';\n" + "}\n" + "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " add(xl, x, yl, y, r);\n" + "}\n"; + +const string testCharsParallelCPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" + "}\n"; + +const string testCharsParallelCopyGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " local char l_x[128];\n" + " local char l_y[128];\n" + " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" + " async_work_group_copy(l_y, y, iyl, re);\n" + " wait_group_events(1, &re);\n" + " local char l_r[128];\n" + " l_r[i] = i < ixl? l_x[i] : i < ixl + iyl? l_y[i - ixl] : '\\0';\n" + " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" + " wait_group_events(1, &we);\n" + "}\n"; + +const string testCharsParallelGPU = + "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" + " const int i = get_global_id(0);\n" + " const int ixl = (int)xl;\n" + " const int iyl = (int)yl;\n" + " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" + "}\n"; + +bool testTaskParallel(const OpenCLContext::DeviceType dev) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + { + istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + const value exp = mklist("float_add", 2, 3); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(5)); + } + if (true) return true; + { + istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + const value exp = mklist("add", 2, 3); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::Number, 1, cl); + assert(hasContent(r)); + assert(content(r) == value(5)); + } + { + istringstream is(dev == OpenCLContext::CPU? testBoolsCPU : testBoolsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("int_or", true, false); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(true)); + + const value exp2 = mklist("int_or", false, false); + const failable r2 = evalKernel(createKernel(exp2, content(clprog)), exp2, cl); + assert(hasContent(r2)); + assert(content(r2) == value(false)); + } + { + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("char128", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + } + { + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + } + + return true; +} + +struct evalTaskParallelLoop { + evalTaskParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { + } + const bool operator()() const { + const failable r = evalKernel(createKernel(exp, clprog), exp, 1, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + return true; + } + + const OpenCLContext& cl; + const OpenCLProgram& clprog; + const value exp; +}; + +const bool testTaskParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + istringstream is(dev == OpenCLContext::CPU? testCharsCPU : copy? testCharsCopyGPU : testCharsGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + resetOpenCLCounters(); + const lambda el = evalTaskParallelLoop(cl, content(clprog)); + cout << "OpenCL task-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; + printOpenCLCounters(500); + cout << endl; + return true; +} + +bool testDataParallel(const OpenCLContext::DeviceType dev) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + { + istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : testCharsParallelGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + const value exp = mklist("add", 6, string("Hello "), 5, string("World")); + const failable r = evalKernel(createKernel(exp, content(clprog)), exp, 121, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello World"))); + } + + return true; +} + +struct evalDataParallelLoop { + evalDataParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { + } + const bool operator()() const { + const failable r = evalKernel(createKernel(exp, clprog), exp, 121, value::String, 128, cl); + assert(hasContent(r)); + assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); + return true; + } + + const OpenCLContext& cl; + const OpenCLProgram& clprog; + const value exp; +}; + +const bool testDataParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { + gc_scoped_pool pool; + OpenCLContext cl(dev); + if (!devices(cl) != 0) + return true; + + istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : copy? testCharsParallelCopyGPU : testCharsParallelGPU); + failable clprog = readProgram("kernel.cl", is, cl); + assert(hasContent(clprog)); + + resetOpenCLCounters(); + const lambda el = evalDataParallelLoop(cl, content(clprog)); + cout << "OpenCL data-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; + printOpenCLCounters(500); + cout << endl; + return true; +} + +} +} + +int main() { + tuscany::cout << "Testing..." << tuscany::endl; + + tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::CPU); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); + tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::CPU); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); + + tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::GPU); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); + tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); + tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::GPU); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); + tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); + + tuscany::cout << "OK" << tuscany::endl; + return 0; +} -- cgit v1.2.3