Skip to content

Commit 688ac74

Browse files
committed
Elaborate documentation.
1 parent d4fba33 commit 688ac74

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

src/backends/onnxruntime.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "onnxruntime_c_api.h"
1010

1111
// Use as a wrapper for ORT api call. If ORT api hasn't returned null, it has failed.
12+
// A label "error" must exist in every function that uses this macro.
1213
#define ONNX_VALIDATE_STATUS(x) \
1314
if ((status = (x)) != NULL) \
1415
goto error;
@@ -36,12 +37,19 @@ const OrtMemoryInfo *AllocatorInfo(const OrtAllocator *allocator) {
3637
void *AllocatorAlloc(OrtAllocator *ptr, size_t size) {
3738

3839
(void)ptr;
40+
// Allocate an additional 63 bytes to ensure that we can return an address which is
41+
// 64-byte aligned, and an additional space in the size of a pointer to store
42+
// the address that RedisModule_Alloc returns.
3943
int offset = 63 + sizeof(void *);
4044
void *p1 = (void *)RedisModule_Alloc(size + offset);
4145
size_t allocated_size = RedisModule_MallocSize(p1);
46+
// Update the total number of bytes that onnx is using and the number of accesses
47+
// that onnx made to the allocator.
4248
atomic_fetch_add(&OnnxMemory, allocated_size);
4349
atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
44-
void **p2 = (void **)(((uintptr_t)(p1) + offset) & (~63));
50+
// This operation guarantees that p2 is the closest 64-aligned address to (p1+size_t).
51+
void **p2 = (void **)(((size_t)(p1) + offset) & (~63));
52+
// This stores the address p1 right before p2 (so we can retrieve it when we free).
4553
p2[-1] = p1;
4654
return p2;
4755
}
@@ -51,8 +59,12 @@ void AllocatorFree(OrtAllocator *ptr, void *p) {
5159
if (p == NULL) {
5260
return;
5361
}
62+
// Retrieve the address that we originally received from RedisModule_Alloc
63+
// (this is the address that we need to sent to RedisModule_Free).
5464
void *p1 = ((void **)p)[-1];
5565
size_t allocated_size = RedisModule_MallocSize(p1);
66+
// Update the total number of bytes that onnx is using and the number of accesses
67+
// that onnx made to the allocator.
5668
atomic_fetch_sub(&OnnxMemory, allocated_size);
5769
atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
5870
return RedisModule_Free(p1);
@@ -321,6 +333,10 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
321333
OrtSession *session = NULL;
322334
OrtStatus *status = NULL;
323335

336+
// In the first time we set a model for onnx, we create an environment and register
337+
// an allocator to it that uses Redis allocator. This allocator is going to be used for
338+
// allocating buffers when creating and running models that run on CPU, and for allocations of
339+
// models inputs and outputs names (for both models that run on CPU and GPU)
324340
if (env == NULL) {
325341
ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env))
326342
ONNX_VALIDATE_STATUS(ort->CreateCustomDeviceAllocator(
@@ -343,6 +359,9 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
343359
ort->SetIntraOpNumThreads(session_options, (int)opts.backends_intra_op_parallelism))
344360
ONNX_VALIDATE_STATUS(
345361
ort->SetInterOpNumThreads(session_options, (int)opts.backends_inter_op_parallelism))
362+
363+
// If the model is set for GPU, this will set CUDA provider for the session,
364+
// so that onnx will use its own allocator for CUDA (not Redis allocator)
346365
if (!setDeviceId(devicestr, session_options, error)) {
347366
ort->ReleaseSessionOptions(session_options);
348367
return NULL;

tests/flow/tests_onnx.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -467,23 +467,29 @@ def test_onnx_use_custom_allocator(env):
467467
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
468468
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
469469

470-
# Expect using the allocator during model set for allocating the model, its input name and output name.
470+
# Expect using the allocator during model set for allocating the model, its input name and output name:
471+
# overall 3 allocations. The model raw size is 130B ,and the names are 2B each. In practice we allocate
472+
# more than 134B as Redis allocator will use additional memory for its internal management and for the
473+
# 64-Byte alignment. When the test runs with valgrind, redis will use malloc for the allocations
474+
# (hence will not use additional memory).
471475
ret = con.execute_command('AI.MODELSET', 'm{1}', 'ONNX', 'CPU', 'BLOB', model_pb)
472476
env.assertEqual(ret, b'OK')
473477
ai_memory_config = {k.split(":")[0]: k.split(":")[1]
474478
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
475-
env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 100)
479+
480+
# Expect using at least 130+63+(size of an address) + 2*(2+63+(size of an address)) bytes.
481+
env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 334)
476482
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 3)
477483

478-
# Expect using the allocator free function when releasing the model.
484+
# Expect using the allocator free function when releasing the model and input and output names.
479485
con.execute_command('AI.MODELDEL', 'm{1}')
480486
env.assertFalse(con.execute_command('EXISTS', 'm{1}'))
481487
ai_memory_config = {k.split(":")[0]: k.split(":")[1]
482488
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
483489
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
484490
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 6)
485491

486-
# test allocator in model run op
492+
# test the use of Redis allocator in model run op.
487493
model_filename = os.path.join(test_data_path, 'mnist.onnx')
488494
sample_filename = os.path.join(test_data_path, 'one.raw')
489495

@@ -496,7 +502,8 @@ def test_onnx_use_custom_allocator(env):
496502
env.assertEqual(ret, b'OK')
497503
con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
498504

499-
# Expect 16 allocator's access from onnx during the run.
505+
# Expect 16 allocator's access from onnx during the run (in addition to the allocations that were made while
506+
# creating the model).
500507
ai_memory_config = {k.split(":")[0]: k.split(":")[1]
501508
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
502509
allocator_access_num_before = ai_memory_config["ai_onnxruntime_memory_access_num"]
@@ -528,15 +535,20 @@ def test_onnx_use_custom_allocator_with_GPU(env):
528535
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
529536
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
530537

531-
# Create the same model, once for CPU and once for GPU.
532-
# Expect using the allocator during model set for allocating the model, its input name and output name in CPU,
533-
# but for GPU, expcet using the allocator only for allocating input and output names.
538+
# Expect using the allocator during model set for allocating the model, its input name and output name:
539+
# overall 3 allocations. The model raw size is 130B ,and the names are 2B each. In practice we allocate
540+
# more than 134B as Redis allocator will use additional memory for its internal management and for the
541+
# 64-Byte alignment. When the test runs with valgrind, redis will use malloc for the allocations.
534542
ret = con.execute_command('AI.MODELSET', 'm_gpu{1}', 'ONNX', DEVICE, 'BLOB', model_pb)
535543
env.assertEqual(ret, b'OK')
544+
545+
# but for GPU, expect using the allocator only for allocating input and output names (not the model itself).
536546
ret = con.execute_command('AI.MODELSET', 'm_cpu{1}', 'ONNX', 'CPU', 'BLOB', model_pb)
537547
env.assertEqual(ret, b'OK')
538548
ai_memory_config = {k.split(":")[0]: k.split(":")[1]
539549
for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
540-
env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 100)
550+
551+
# Expect using at least 130+63+(size of an address) + 4*(2+63+(size of an address)) bytes.
552+
env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 472)
541553
env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) < 705)
542554
env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 5)

0 commit comments

Comments
 (0)