Elaborate documentation.

alonre24 · alonre24 · commit 688ac741be45 · 2021-03-10T22:54:06.000+02:00
diff --git a/src/backends/onnxruntime.c b/src/backends/onnxruntime.c
@@ -9,6 +9,7 @@
 #include "onnxruntime_c_api.h"
 
 // Use as a wrapper for ORT api call. If ORT api hasn't returned null, it has failed.
+// A label "error" must exist in every function that uses this macro.
 #define ONNX_VALIDATE_STATUS(x)                                                                    \
     if ((status = (x)) != NULL)                                                                    \
         goto error;
@@ -36,12 +37,19 @@ const OrtMemoryInfo *AllocatorInfo(const OrtAllocator *allocator) {
 void *AllocatorAlloc(OrtAllocator *ptr, size_t size) {
 
     (void)ptr;
+    // Allocate an additional 63 bytes to ensure that we can return an address which is
+    // 64-byte aligned, and an additional space in the size of a pointer to store
+    // the address that RedisModule_Alloc returns.
     int offset = 63 + sizeof(void *);
     void *p1 = (void *)RedisModule_Alloc(size + offset);
     size_t allocated_size = RedisModule_MallocSize(p1);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
     atomic_fetch_add(&OnnxMemory, allocated_size);
     atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
-    void **p2 = (void **)(((uintptr_t)(p1) + offset) & (~63));
+    // This operation guarantees that p2 is the closest 64-aligned address to (p1+size_t).
+    void **p2 = (void **)(((size_t)(p1) + offset) & (~63));
+    // This stores the address p1 right before p2 (so we can retrieve it when we free).
     p2[-1] = p1;
     return p2;
 }
@@ -51,8 +59,12 @@ void AllocatorFree(OrtAllocator *ptr, void *p) {
     if (p == NULL) {
         return;
     }
+    // Retrieve the address that we originally received from RedisModule_Alloc
+    // (this is the address that we need to sent to RedisModule_Free).
     void *p1 = ((void **)p)[-1];
     size_t allocated_size = RedisModule_MallocSize(p1);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
     atomic_fetch_sub(&OnnxMemory, allocated_size);
     atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
     return RedisModule_Free(p1);
@@ -321,6 +333,10 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
     OrtSession *session = NULL;
     OrtStatus *status = NULL;
 
+    // In the first time we set a model for onnx, we create an environment and register
+    // an allocator to it that uses Redis allocator. This allocator is going to be used for
+    // allocating buffers when creating and running models that run on CPU, and for allocations of
+    // models inputs and outputs names (for both models that run on CPU and GPU)
     if (env == NULL) {
         ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env))
         ONNX_VALIDATE_STATUS(ort->CreateCustomDeviceAllocator(
@@ -343,6 +359,9 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
         ort->SetIntraOpNumThreads(session_options, (int)opts.backends_intra_op_parallelism))
     ONNX_VALIDATE_STATUS(
         ort->SetInterOpNumThreads(session_options, (int)opts.backends_inter_op_parallelism))
+
+    // If the model is set for GPU, this will set CUDA provider for the session,
+    // so that onnx will use its own allocator for CUDA (not Redis allocator)
     if (!setDeviceId(devicestr, session_options, error)) {
         ort->ReleaseSessionOptions(session_options);
         return NULL;
diff --git a/tests/flow/tests_onnx.py b/tests/flow/tests_onnx.py
@@ -467,23 +467,29 @@ def test_onnx_use_custom_allocator(env):
                             for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
 
-    # Expect using the allocator during model set for allocating the model, its input name and output name.
+    # Expect using the allocator during model set for allocating the model, its input name and output name:
+    # overall 3 allocations. The model raw size is 130B ,and the names are 2B each. In practice we allocate
+    # more than 134B as Redis allocator will use additional memory for its internal management and for the
+    # 64-Byte alignment. When the test runs with valgrind, redis will use malloc for the allocations
+    # (hence will not use additional memory).
     ret = con.execute_command('AI.MODELSET', 'm{1}', 'ONNX', 'CPU', 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 100)
+
+    # Expect using at least 130+63+(size of an address) + 2*(2+63+(size of an address)) bytes.
+    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 334)
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 3)
 
-    # Expect using the allocator free function when releasing the model.
+    # Expect using the allocator free function when releasing the model and input and output names.
     con.execute_command('AI.MODELDEL', 'm{1}')
     env.assertFalse(con.execute_command('EXISTS', 'm{1}'))
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 6)
 
-    # test allocator in model run op
+    # test the use of Redis allocator in model run op.
     model_filename = os.path.join(test_data_path, 'mnist.onnx')
     sample_filename = os.path.join(test_data_path, 'one.raw')
 
@@ -496,7 +502,8 @@ def test_onnx_use_custom_allocator(env):
     env.assertEqual(ret, b'OK')
     con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
 
-    # Expect 16 allocator's access from onnx during the run.
+    # Expect 16 allocator's access from onnx during the run (in addition to the allocations that were made while
+    # creating the model).
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     allocator_access_num_before = ai_memory_config["ai_onnxruntime_memory_access_num"]
@@ -528,15 +535,20 @@ def test_onnx_use_custom_allocator_with_GPU(env):
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
 
-    # Create the same model, once for CPU and once for GPU.
-    # Expect using the allocator during model set for allocating the model, its input name and output name in CPU,
-    # but for GPU, expcet using the allocator only for allocating input and output names.
+    # Expect using the allocator during model set for allocating the model, its input name and output name:
+    # overall 3 allocations. The model raw size is 130B ,and the names are 2B each. In practice we allocate
+    # more than 134B as Redis allocator will use additional memory for its internal management and for the
+    # 64-Byte alignment. When the test runs with valgrind, redis will use malloc for the allocations.
     ret = con.execute_command('AI.MODELSET', 'm_gpu{1}', 'ONNX', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
+
+    # but for GPU, expect using the allocator only for allocating input and output names (not the model itself).
     ret = con.execute_command('AI.MODELSET', 'm_cpu{1}', 'ONNX', 'CPU', 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 100)
+
+    # Expect using at least 130+63+(size of an address) + 4*(2+63+(size of an address)) bytes.
+    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 472)
     env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) < 705)
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 5)