1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
@article{mei2017microbench,
author = {Mei, Xinxin and Chu, Xiaowen},
title = {Dissecting GPU Memory Hierarchy Through Microbenchmarking},
journal = {IEEE Trans. Parallel Distrib. Syst.},
volume = {28},
number = {1},
pages = {72--86},
publisher = {IEEE Press},
year = {2017},
issn = {1045-9219},
doi = {10.1109/TPDS.2016.2549523},
}
@inproceedings{zhang2017performance,
author = {Zhang, Xiuxia and Tan, Guangming and Xue, Shuangbai and Li, Jiajia and Zhou, Keren and Chen, Mingyu},
title = {Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning},
booktitle = {Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
series = {PPoPP '17},
year = {2017},
isbn = {978-1-4503-4493-7},
pages = {31--43},
doi = {10.1145/3018743.3018755},
publisher = {ACM}
}
@article{lim2017autotuning,
author = {Robert V. Lim and Boyana Norris and Allen D. Malony},
title = {Autotuning {GPU} Kernels via Static and Predictive Analysis},
journal = {CoRR},
volume = {abs/1701.08547},
year = {2017},
url = {http://arxiv.org/abs/1701.08547}
}
@phdthesis{volkov2016thesis,
author = {Volkov, Vasily},
title = {Understanding Latency Hiding on GPUs},
school = {EECS Department, University of California, Berkeley},
number = {UCB/EECS-2016-143},
year = {2016},
url = {http://www2.eecs.berkeley.edu/Pubs/TechRpts/2016/EECS-2016-143.html}
}
@inbook{mei2014,
author = {Mei, Xinxin and Zhao, Kaiyong and Liu, Chengjian and Chu, Xiaowen},
title = {Benchmarking the Memory Hierarchy of Modern GPUs},
booktitle = {Network and Parallel Computing: 11th IFIP WG 10.3 International Conference},
year = {2014},
publisher = {Springer Berlin Heidelberg},
pages = {144--156},
isbn = {978-3-662-44917-2},
doi = {10.1007/978-3-662-44917-2_13},
}
@article{zhang2014performance,
author = {Zhang, Ying and Peng, Lu and Li, Bin and Peir, Jih-Kwon and Chen, Jianmin},
title = {Performance and Power Comparisons Between Nvidia and ATI GPUs},
journal = {International Journal of Computer Science \& Information Technology},
volume = {6},
number = {6},
year = {2014}
}
@inproceedings{zhang2011ati,
author = {Zhang, Ying and Hu, Yue and Li, Bin and Peng, Lu},
title = {Performance and power analysis of ATI GPU: A statistical approach},
booktitle = {Networking, Architecture and Storage (NAS), 6th IEEE International Conference on},
pages = {149--158},
year = {2011}
}
@inproceedings{volkov2010occupation,
author = {Volkov, Vasily},
title = {Better performance at lower occupancy},
booktitle = {Proceedings of the GPU technology conference (GTC)},
volume = {10},
pages = {16},
year = {2010}
}
@inproceedings{konstantinidis2016gpumembench,
author = {E. Konstantinidis and Y. Cotronis},
title = {A Quantitative Performance Evaluation of Fast on-Chip Memories of GPUs},
booktitle= {24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP)},
pages = {448-455},
year = {2016},
doi = {10.1109/PDP.2016.56}
}
@article{konstantinidis2017mixbench,
author = {Elias Konstantinidis and Yiannis Cotronis},
title = {A quantitative roofline model for GPU kernel performance estimation using micro-benchmarks and hardware metric profiling},
journal = {Journal of Parallel and Distributed Computing},
volume = {107},
pages = {37 - 56},
year = {2017},
doi = {10.1016/j.jpdc.2017.04.002}
}
|