by Tianchu Ji, Niranjan Balasubramanian, Michael Ferdman, Peter Milder
Reference:
Enabling Efficient SpMM for Sparse Attention on GEMM-Optimized Hardware with Block Aggregation Tianchu Ji, Niranjan Balasubramanian, Michael Ferdman, Peter Milder, In Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA '26), Association for Computing Machinery, 2026.
Bibtex Entry:
@inproceedings{ferdman-fpga-26-enabling-efficient-spmm-for-sparse-attention-on-gemm-optimized-hardware-with-block-aggregation,
author = {Ji, Tianchu and Balasubramanian, Niranjan and Ferdman, Michael and Milder, Peter},
title = {Enabling Efficient SpMM for Sparse Attention on GEMM-Optimized Hardware with Block Aggregation},
booktitle = {Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA '26)},
year = {2026},
isbn = {9798400720796},
address = {Monterey, CA, USA},
publisher = {Association for Computing Machinery},
numpages = {12},
keywords = {sparse-dense matrix multiplication, self-attention, sparse attention, Tensor Block},
doi = {10.1145/3748173.3779187}
}