@inproceedings{ author={ C. Chu, K. Hamidouche, A. Venkatesh, A. Awan, D. Panda }, title={ CUDA Kernel based Collective Reduction Operations on Large-scale GPU Clusters }, conference={ 16th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid'16) }, year={ 2016 }, month={ May }, location={ Cartagena, Colombia }, source={ http://nowlab.cse.ohio-state.edu/publications/ }, }