@inproceedings{
  author={ K. Kandalla, U. Yang, J. Keasler, T. Kolev, A. Moody, H. Subramoni, K. Tomko, J. Vienne, D. Panda },
  title={ Designing Non-blocking Allreduce with Collective Offload on InfiniBand Clusters: A Case Study with Conjugate Gradient Solvers },
  conference={ International Parallel and Distributed Processing Symposium 2012 },
  year={ 2012 },
  month={ May },
  location={  },
  source={ http://nowlab.cse.ohio-state.edu/publications/ },
}