summaryrefslogtreecommitdiffstats
path: root/perfrefs.bib
diff options
context:
space:
mode:
Diffstat (limited to 'perfrefs.bib')
-rw-r--r--perfrefs.bib99
1 files changed, 99 insertions, 0 deletions
diff --git a/perfrefs.bib b/perfrefs.bib
new file mode 100644
index 0000000..9ece185
--- /dev/null
+++ b/perfrefs.bib
@@ -0,0 +1,99 @@
+@article{mei2017microbench,
+ author = {Mei, Xinxin and Chu, Xiaowen},
+ title = {Dissecting GPU Memory Hierarchy Through Microbenchmarking},
+ journal = {IEEE Trans. Parallel Distrib. Syst.},
+ volume = {28},
+ number = {1},
+ pages = {72--86},
+ publisher = {IEEE Press},
+ year = {2017},
+ issn = {1045-9219},
+ doi = {10.1109/TPDS.2016.2549523},
+}
+
+@inproceedings{zhang2017performance,
+ author = {Zhang, Xiuxia and Tan, Guangming and Xue, Shuangbai and Li, Jiajia and Zhou, Keren and Chen, Mingyu},
+ title = {Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning},
+ booktitle = {Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
+ series = {PPoPP '17},
+ year = {2017},
+ isbn = {978-1-4503-4493-7},
+ pages = {31--43},
+ doi = {10.1145/3018743.3018755},
+ publisher = {ACM}
+}
+
+@article{lim2017autotuning,
+ author = {Robert V. Lim and Boyana Norris and Allen D. Malony},
+ title = {Autotuning {GPU} Kernels via Static and Predictive Analysis},
+ journal = {CoRR},
+ volume = {abs/1701.08547},
+ year = {2017},
+ url = {http://arxiv.org/abs/1701.08547}
+}
+
+@phdthesis{volkov2016thesis,
+ author = {Volkov, Vasily},
+ title = {Understanding Latency Hiding on GPUs},
+ school = {EECS Department, University of California, Berkeley},
+ number = {UCB/EECS-2016-143},
+ year = {2016},
+ url = {http://www2.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-143.html}
+}
+
+@inbook{mei2014,
+ author = {Mei, Xinxin and Zhao, Kaiyong and Liu, Chengjian and Chu, Xiaowen},
+ title = {Benchmarking the Memory Hierarchy of Modern GPUs},
+ booktitle = {Network and Parallel Computing: 11th IFIP WG 10.3 International Conference},
+ year = {2014},
+ publisher = {Springer Berlin Heidelberg},
+ pages = {144--156},
+ isbn = {978-3-662-44917-2},
+ doi = {10.1007/978-3-662-44917-2_13},
+}
+
+
+@article{zhang2014performance,
+ author = {Zhang, Ying and Peng, Lu and Li, Bin and Peir, Jih-Kwon and Chen, Jianmin},
+ title = {Performance and Power Comparisons Between Nvidia and ATI GPUs},
+ journal = {International Journal of Computer Science \& Information Technology},
+ volume = {6},
+ number = {6},
+ year = {2014}
+}
+
+@inproceedings{zhang2011ati,
+ author = {Zhang, Ying and Hu, Yue and Li, Bin and Peng, Lu},
+ title = {Performance and power analysis of ATI GPU: A statistical approach},
+ booktitle = {Networking, Architecture and Storage (NAS), 6th IEEE International Conference on},
+ pages = {149--158},
+ year = {2011}
+}
+
+@inproceedings{volkov2010occupation,
+ author = {Volkov, Vasily},
+ title = {Better performance at lower occupancy},
+ booktitle = {Proceedings of the GPU technology conference (GTC)},
+ volume = {10},
+ pages = {16},
+ year = {2010}
+}
+
+@inproceedings{konstantinidis2016gpumembench,
+ author = {E. Konstantinidis and Y. Cotronis},
+ title = {A Quantitative Performance Evaluation of Fast on-Chip Memories of GPUs},
+ booktitle= {24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP)},
+ pages = {448-455},
+ year = {2016},
+ doi = {10.1109/PDP.2016.56}
+}
+
+@article{konstantinidis2017mixbench,
+ author = {Elias Konstantinidis and Yiannis Cotronis},
+ title = {A quantitative roofline model for GPU kernel performance estimation using micro-benchmarks and hardware metric profiling},
+ journal = {Journal of Parallel and Distributed Computing},
+ volume = {107},
+ pages = {37 - 56},
+ year = {2017},
+ doi = {10.1016/j.jpdc.2017.04.002}
+}