[CUDA] hello cuda
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<cutil_inline.h>
using namespace std;
#if __DEVICE_EMULATION__
bool InitCUDA(void){
return true;
}
#else
bool InitCUDA(void){
int count=0;
int i=0;
cudaError cue = cudaGetDeviceCount(&count);
if(cue == cudaErrorNoDevice)
{
cerr<<"There is no device"<<endl;
return false;
}
cout<<"i found "<<count<<" devices!"<<endl;
for(i=0; i<count; i++){
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess)
{
cout<<"\tdevice name : "<<prop.name<<endl;
if(prop.major >=1)
break;
}
}
if(i==count){
cerr<<"There is no device supporting CUDA"<<endl;
return false;
}
cudaSetDevice(i);
cout<<"CUDA initialized"<<endl;
return true;
}
#endif
__global__ static void HelloCUDA(char* result, int num){
int i=0;
char p_HelloCUDA[] = "Hello CUDA!";
for(i=0; i<num; i++)
result[i] = p_HelloCUDA[i];
}
int main(){
if(! InitCUDA())
return 0;
char* device_result = 0;
char host_result[12]={0};
//1. device의 메모리 allocation
cutilSafeCall(cudaMalloc((void**)&device_result, sizeof(char)*11));
//2. CUDA함수 실행
HelloCUDA<<<1,1,0>>>(device_result, 11);
cutilCheckMsg("Kernel execusion failed\n");
cudaThreadSynchronize();
//3.device memory를 host로 할당
cutilSafeCall(cudaMemcpy((void*)host_result, (const void*)device_result, (size_t)sizeof(char)*11, cudaMemcpyDeviceToHost));
cout<<endl<<host_result<<endl;
//4. device
cutilSafeCall(cudaFree(device_result));
cout<<"end the cuda hello.."<<endl;
return 0;
}
어느 블로그에서 보고 따라해본 헬로쿠다