アットウィキロゴ

プログラミング > OpenCL > CA

OpenCL/セルオートマトン


CA1.cl


#define     BLOCK_SIZE      128

__constant
const  int  RULE[8] = {
    //  000 001 010 011 100 101 110 111
         0,  1,  0,  1,  1,  0,  1,  0
    };

__kernel  void
runCA90(
        __global  const  int  *     input1,
        __global  int  *            output,
        __local   int  *            work)
        
{
    int  index  = get_global_id(0) + 1;
////    int  before = (input1[index - 1] * 4) + (input1[index +1]);
////    output[index]   = RULE[before];
////    return;
    
////    output[index]   = (input1[index - 1] ^ input1[index + 1]);
////    return;

    int  lx     = get_local_id(0)  + 1;
    
    work[lx]    = input1[index];
    if ( lx == 1 ) {
        work[0]             = input1[index - 1];
        work[BLOCK_SIZE +1] = input1[index + BLOCK_SIZE];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int  before = (work[lx - 1] * 4) + (work[lx] * 2) + (work[lx +1]);
    output[index]   = RULE[before];
////    output[index]   = (work[lx - 1] ^ work[lx + 1]);
}

LifeGame.cpp

#include    "../Common/OclWrap.h"
#include    "../Common/SimpleTimer.h"
#include    <fstream>
#include    <sstream>
#include    <iostream>

const  int  MAX_TIMES   = 500;
const  int  BLOCK_SIZE  = 128;

typedef     int     INPUT_TYPE;

void
runOnHost(
        const  INPUT_TYPE  *   input1,
        INPUT_TYPE  *          output,
        const  size_t   NUM)
{
    for ( size_t index = 1; index <= NUM; ++ index ) {
        output[index]   = (input1[index - 1] ^ input1[index + 1]);
    }
    return;
}

int  main(int argc, char * argv[])
{
    cl_int          err = CL_SUCCESS;

    const  size_t   NUM = (argc >= 2 ? atol(argv[1]) : BLOCK_SIZE * 100000);
    const  size_t   BUFFER_SIZE = (NUM + 2);
    std::vector<INPUT_TYPE>    vecInput1(BUFFER_SIZE);
    std::vector<INPUT_TYPE>    vecInput2(BUFFER_SIZE);
    std::vector<int>            vecOutputCPU(BUFFER_SIZE, 0);
    std::vector<INPUT_TYPE>    vecOutputGPU(BUFFER_SIZE, 0);

    INPUT_TYPE  *  const   input1  = &(vecInput1[0]);
    INPUT_TYPE  *  const   input2  = &(vecInput2[0]);
    INPUT_TYPE  *  const   output1 = &(vecOutputGPU[0]);

    CLWRAP::OclWrap wrapper;

    //  データの準備。  //
    for ( size_t i = 0; i < BUFFER_SIZE; ++ i ) {
        input1[i]   = 0;
        input2[i]   = 0;
    }
    input1[BUFFER_SIZE / 2] = 1;

    //  Running By GPU  //
    CLWRAP::SimpleTimer  timer;
    CLWRAP::SimpleTimer  timeGPU;

    timer.startTimer();
    try {
        wrapper.setupInstance(std::cerr);
        wrapper.readSourceFile("CA1.cl");
        wrapper.buildPrograms(std::cerr);
        cl::Kernel  kernel(wrapper.getProgram(), "runCA90", &err);

        cl::Buffer  memInput1   = wrapper.createBuffer(
                CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                sizeof(INPUT_TYPE) * BUFFER_SIZE, input1, &err);

        cl::Buffer  memInput2   = wrapper.createBuffer(
                CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                sizeof(INPUT_TYPE) * BUFFER_SIZE, input2, &err);

        cl::CommandQueue    queue   = wrapper.createCommandQueue(0, &err);
        kernel.setArg(2, sizeof(INPUT_TYPE) * (BLOCK_SIZE + 2) , NULL);

        timeGPU.startTimer();
        for ( size_t t = 0; t < MAX_TIMES; t += 2 ) {
            kernel.setArg(0, memInput1);
            kernel.setArg(1, memInput2);
            queue.enqueueNDRangeKernel(kernel, cl::NullRange,
                cl::NDRange(NUM), cl::NDRange(BLOCK_SIZE), NULL, NULL);

            kernel.setArg(0, memInput2);
            kernel.setArg(1, memInput1);
            queue.enqueueNDRangeKernel(kernel, cl::NullRange,
                cl::NDRange(NUM), cl::NDRange(BLOCK_SIZE), NULL, NULL);
        }
        timeGPU.stopTimer();

        queue.enqueueReadBuffer(memInput1, CL_TRUE, 0,
            sizeof(INPUT_TYPE) * BUFFER_SIZE, output1, NULL, NULL);

//    } catch ( cl::Error &err ) {
//        std::cerr   << "ERROR : " << err.what() << "(" << err.err() << ")" << std::endl;
    } catch ( std::exception & err ) {
        std::cerr   << "ERROR : " << err.what() << std::endl;
        exit( EXIT_FAILURE );
    } catch ( ... ) {
        std::cerr   << "Unknown Exception" << std::endl;
        exit( EXIT_FAILURE );                
    }
    timer.stopTimer();

    std::cerr   << "Run On GPU --- "
                << "CLOCK : "  << timer.getClockTime()
                << ", REAL : " << timer.getRealTime()
                << std::endl;
    std::cerr   << "In Kernel  --- "
                << "CLOCK : "  << timeGPU.getClockTime()
                << ", REAL : " << timeGPU.getRealTime()
                << std::endl;
////    const  size_t   SHOW_OFFSET = (BUFFER_SIZE - 30) / 2;
    const  size_t   SHOW_OFFSET = 0;
    for ( int i = 0; i < 30; ++ i ) {
        std::cout   << output1[i + SHOW_OFFSET];
    }
    std::cout   << std::endl;

    //  Running By CPU  //
    input2[BUFFER_SIZE / 2] = 1;
    timer.startTimer();
    for ( size_t t = 0; t < MAX_TIMES; t += 2 ) {
        runOnHost(input2, input1, NUM);
        runOnHost(input1, input2, NUM);
    }
    timer.stopTimer();

    std::cerr   << "Run On CPU --- "
                << "CLOCK : "  << timer.getClockTime()
                << ", REAL : " << timer.getRealTime()
                << std::endl;
    for ( int i = 0; i < 30; ++ i ) {
        std::cout   << input1[i + SHOW_OFFSET];
    }
    std::cout   << std::endl;

    return ( 0 );
}

リンク

トップページ

合計: -
今日: -
昨日: -
トップページの合計: -
最終更新:2014年03月08日 03:26