OpenCL/セルオートマトン
CA1.cl
#define BLOCK_SIZE 128
__constant
const int RULE[8] = {
// 000 001 010 011 100 101 110 111
0, 1, 0, 1, 1, 0, 1, 0
};
__kernel void
runCA90(
__global const int * input1,
__global int * output,
__local int * work)
{
int index = get_global_id(0) + 1;
//// int before = (input1[index - 1] * 4) + (input1[index +1]);
//// output[index] = RULE[before];
//// return;
//// output[index] = (input1[index - 1] ^ input1[index + 1]);
//// return;
int lx = get_local_id(0) + 1;
work[lx] = input1[index];
if ( lx == 1 ) {
work[0] = input1[index - 1];
work[BLOCK_SIZE +1] = input1[index + BLOCK_SIZE];
}
barrier(CLK_LOCAL_MEM_FENCE);
int before = (work[lx - 1] * 4) + (work[lx] * 2) + (work[lx +1]);
output[index] = RULE[before];
//// output[index] = (work[lx - 1] ^ work[lx + 1]);
}
LifeGame.cpp
#include "../Common/OclWrap.h"
#include "../Common/SimpleTimer.h"
#include <fstream>
#include <sstream>
#include <iostream>
const int MAX_TIMES = 500;
const int BLOCK_SIZE = 128;
typedef int INPUT_TYPE;
void
runOnHost(
const INPUT_TYPE * input1,
INPUT_TYPE * output,
const size_t NUM)
{
for ( size_t index = 1; index <= NUM; ++ index ) {
output[index] = (input1[index - 1] ^ input1[index + 1]);
}
return;
}
int main(int argc, char * argv[])
{
cl_int err = CL_SUCCESS;
const size_t NUM = (argc >= 2 ? atol(argv[1]) : BLOCK_SIZE * 100000);
const size_t BUFFER_SIZE = (NUM + 2);
std::vector<INPUT_TYPE> vecInput1(BUFFER_SIZE);
std::vector<INPUT_TYPE> vecInput2(BUFFER_SIZE);
std::vector<int> vecOutputCPU(BUFFER_SIZE, 0);
std::vector<INPUT_TYPE> vecOutputGPU(BUFFER_SIZE, 0);
INPUT_TYPE * const input1 = &(vecInput1[0]);
INPUT_TYPE * const input2 = &(vecInput2[0]);
INPUT_TYPE * const output1 = &(vecOutputGPU[0]);
CLWRAP::OclWrap wrapper;
// データの準備。 //
for ( size_t i = 0; i < BUFFER_SIZE; ++ i ) {
input1[i] = 0;
input2[i] = 0;
}
input1[BUFFER_SIZE / 2] = 1;
// Running By GPU //
CLWRAP::SimpleTimer timer;
CLWRAP::SimpleTimer timeGPU;
timer.startTimer();
try {
wrapper.setupInstance(std::cerr);
wrapper.readSourceFile("CA1.cl");
wrapper.buildPrograms(std::cerr);
cl::Kernel kernel(wrapper.getProgram(), "runCA90", &err);
cl::Buffer memInput1 = wrapper.createBuffer(
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(INPUT_TYPE) * BUFFER_SIZE, input1, &err);
cl::Buffer memInput2 = wrapper.createBuffer(
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(INPUT_TYPE) * BUFFER_SIZE, input2, &err);
cl::CommandQueue queue = wrapper.createCommandQueue(0, &err);
kernel.setArg(2, sizeof(INPUT_TYPE) * (BLOCK_SIZE + 2) , NULL);
timeGPU.startTimer();
for ( size_t t = 0; t < MAX_TIMES; t += 2 ) {
kernel.setArg(0, memInput1);
kernel.setArg(1, memInput2);
queue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(NUM), cl::NDRange(BLOCK_SIZE), NULL, NULL);
kernel.setArg(0, memInput2);
kernel.setArg(1, memInput1);
queue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(NUM), cl::NDRange(BLOCK_SIZE), NULL, NULL);
}
timeGPU.stopTimer();
queue.enqueueReadBuffer(memInput1, CL_TRUE, 0,
sizeof(INPUT_TYPE) * BUFFER_SIZE, output1, NULL, NULL);
// } catch ( cl::Error &err ) {
// std::cerr << "ERROR : " << err.what() << "(" << err.err() << ")" << std::endl;
} catch ( std::exception & err ) {
std::cerr << "ERROR : " << err.what() << std::endl;
exit( EXIT_FAILURE );
} catch ( ... ) {
std::cerr << "Unknown Exception" << std::endl;
exit( EXIT_FAILURE );
}
timer.stopTimer();
std::cerr << "Run On GPU --- "
<< "CLOCK : " << timer.getClockTime()
<< ", REAL : " << timer.getRealTime()
<< std::endl;
std::cerr << "In Kernel --- "
<< "CLOCK : " << timeGPU.getClockTime()
<< ", REAL : " << timeGPU.getRealTime()
<< std::endl;
//// const size_t SHOW_OFFSET = (BUFFER_SIZE - 30) / 2;
const size_t SHOW_OFFSET = 0;
for ( int i = 0; i < 30; ++ i ) {
std::cout << output1[i + SHOW_OFFSET];
}
std::cout << std::endl;
// Running By CPU //
input2[BUFFER_SIZE / 2] = 1;
timer.startTimer();
for ( size_t t = 0; t < MAX_TIMES; t += 2 ) {
runOnHost(input2, input1, NUM);
runOnHost(input1, input2, NUM);
}
timer.stopTimer();
std::cerr << "Run On CPU --- "
<< "CLOCK : " << timer.getClockTime()
<< ", REAL : " << timer.getRealTime()
<< std::endl;
for ( int i = 0; i < 30; ++ i ) {
std::cout << input1[i + SHOW_OFFSET];
}
std::cout << std::endl;
return ( 0 );
}
リンク
合計: -
今日: -
昨日: -
トップページの合計: -
最終更新:2014年03月08日 03:26