#include #include #include #include using namespace std; // BUFFERSIZE in KB //#define BUFFERSIZE 8192 int main (int argc, char *argv[]) { int gpuNumber, gpuID; size_t bufferSize= 1024000000*sizeof(int); int * h_buffer; cudaEvent_t start, end; cudaError_t error; float eventEtime, eventBandwidth; error = cudaGetDeviceCount( &gpuNumber ); if (error) cerr << "Error with cudaDeviceCount" << endl; cout << "Available GPUs: " << gpuNumber << endl; cout << "Data Chunk Size: " << bufferSize << " bytes" << endl; h_buffer = (int *) malloc (bufferSize); for (gpuID = 0; gpuID < gpuNumber; gpuID++) { // be careful !!!!!!! int * d_buffer; cudaSetDevice(gpuID); cudaEventCreate(&start); cudaEventCreate(&end); error = cudaMalloc((void**)&d_buffer, bufferSize); cout << "From GPU: " << gpuID << " " << cudaGetErrorString(error) << endl; cudaEventRecord(start,0); error = cudaMemcpy(d_buffer, h_buffer, bufferSize, cudaMemcpyHostToDevice); cout << "From GPU: " << gpuID << " " << cudaGetErrorString(error) << endl; cudaEventRecord(end,0); cudaDeviceSynchronize(); cudaEventElapsedTime(&eventEtime, start, end); cout << "GPU " << gpuID << " copy Host To Device" << endl; cout << "cudaMemcpy elapsed time: " << eventEtime << endl; eventBandwidth = (float) bufferSize / (1<<20); eventBandwidth /= 0.001 * eventEtime; // bandwidth in MB/s cout << "Bandwidth " << eventBandwidth << " MB/s " << endl; cudaEventDestroy(start); cudaEventDestroy(end); cudaFree(d_buffer); } free(h_buffer); return 0; }