Skip to content

Commit c3ae7cd

Browse files
Update julialab.bib
adding new publications
1 parent 338b9ad commit c3ae7cd

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

_assets/julialab.bib

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,45 @@
1010
% url={url to the journal/conference page of your article},
1111
%
1212
13+
@misc{ringoot2025gpuresidentmemoryawarealgorithmaccelerating,
14+
title={A GPU-resident Memory-Aware Algorithm for Accelerating Bidiagonalization of Banded Matrices},
15+
author={Evelyne Ringoot and Rabab Alomairy and Alan Edelman},
16+
year={2025},
17+
eprint={2510.12705},
18+
archivePrefix={arXiv},
19+
primaryClass={cs.DC},
20+
url={https://arxiv.org/abs/2510.12705},
21+
}
22+
@misc{ringoot2025performant Unified,
23+
title={Performant Unified GPU Kernels for Portable Singular Value Computation Across Hardware and Precision},
24+
author={Evelyne Ringoot and Rabab Alomairy and Valentin Churavy and Alan Edelman},
25+
year={2025},
26+
eprint={2508.06339},
27+
archivePrefix={arXiv},
28+
primaryClass={cs.DC},
29+
url={https://arxiv.org/abs/2508.06339},
30+
}
31+
@InProceedings{10.1007/978-3-031-97196-9_13,
32+
author="Carric, Vicki
33+
and Onyango, Maxwell
34+
and Alomairy, Rabab
35+
and Ringoot, Evelyne
36+
and Schloss, James
37+
and Edelman, Alan",
38+
editor="Diehl, Patrick
39+
and Cao, Qinglei
40+
and Herault, Thomas
41+
and Bosilca, George",
42+
title="Toward Portable GPU Performance: Julia Recursive Implementation of TRMM and TRSM",
43+
booktitle="Asynchronous Many-Task Systems and Applications",
44+
year="2026",
45+
publisher="Springer Nature Switzerland",
46+
address="Cham",
47+
pages="154--164",
48+
abstract="This paper presents a performant and portable recursive implementation of triangular matrix-matrix multiplication (TRMM) and triangular solve (TRSM) operations in Julia for GPUs, which form the backbone of many other linear algebra algorithms. This work is based on an existing recursive implementation for TRMM and TRSM, which restructures the operations to include general matrix-matrix multiplication (GEMM) calls, facilitating better utilization of the GPU memory hierarchy, and reducing latency overhead. The unified implementation in Julia harnesses the language's multiple-dispatch and metaprogramming capabilities through the existing GPUArrays and KernelAbstractions frameworks, enabling performant hardware-agnostic execution across different GPU architectures. By supporting a consistent API, this implementation allows users to seamlessly switch between different GPU backends. The recursive hardware-agnostic implementation we present achieves performance comparable to vendor-optimized (cuBLAS/rocBLAS) libraries for larger matrix sizes and provides such methods for the first time to Apple Silicion hardware with only a few hundred lines of code, demonstrating the power of unified implementations.",
49+
isbn="978-3-031-97196-9"
50+
}
51+
1352

1453
@incollection{abdelrehim_active_nodate,
1554
title = {Active {Learning} {Enhanced} {Surrogate} {Modeling} of {Jet} {Engines} in {JuliaSim}},

0 commit comments

Comments
 (0)