Merge branch 'master' into commands_json_docs

DvirDukhan · web-flow · commit 3f06751389c4 · 2021-10-21T14:51:38.000+03:00
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -139,11 +139,6 @@ commands:
       - run:
           name: Build
           command: make -C opt all SHOW=1
-      - run:
-          name: Unit Tests
-          command: |
-            make -C opt unit_tests SHOW=1
-          no_output_timeout: 5m
       - run:
           name: Test
           command: |
diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml
@@ -1,6 +1,7 @@
 name: Deploy docs to website
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - master
@@ -16,9 +17,9 @@ jobs:
     steps:
     - uses: actions/checkout@v2.1.1
     - name: Set up Python 3.x
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
-        python-version: '3.x'
+        python-version: '3.9'
     - name: Display Python version
       run: python -c "import sys; print(sys.version)"
     - name: Install docs dependencies
diff --git a/.gitmodules b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "opt/readies"]
 	path = opt/readies
 	url = https://github.com/RedisLabsModules/readies.git
-[submodule "opt/googletest"]
-	path = opt/googletest
-	url = https://github.com/google/googletest.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,7 +3,6 @@ PROJECT(RedisAI)
 
 # CMake modules should be included in ${PROJECT_SOURCE_DIR}/opt/cmake/modules
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/opt/cmake/modules)
-list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/opt/GoogleTest)
 
 # Set a default build type if none was specified
 set(default_build_type "Release")
@@ -50,7 +49,6 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_COMMON_FLAGS_DEBUG}"
 
 #----------------------------------------------------------------------------------------------
 
-option(PACKAGE_UNIT_TESTS "Build unit tests" ON)
 option(BUILD_TF "Build the TensorFlow backend" ON)
 option(BUILD_TFLITE "Build the TensorFlow Lite backend" ON)
 option(BUILD_ORT "Build the ONNXRuntime backend" ON)
@@ -321,10 +319,3 @@ ENDIF()
 IF (NOT ${installAbs} STREQUAL ${CMAKE_SOURCE_DIR}/install-${DEVICE})
     INSTALL_SYMLINK(${installAbs} ${CMAKE_SOURCE_DIR}/install-${DEVICE})
 ENDIF()
-
-
-if(PACKAGE_UNIT_TESTS)
-    enable_testing()
-    include(GoogleTest)
-    add_subdirectory(tests/unit)
-endif()
diff --git a/docs/commands.md b/docs/commands.md
@@ -335,8 +335,9 @@ redis> AI.MODELGET mymodel META
 You can also save it to the local file 'model.ext' with [`redis-cli`](https://redis.io/topics/cli) like so:
 
 ```
-$ redis-cli --raw AI.MODELGET mymodel BLOB > model.ext
+$ redis-cli AI.MODELGET mymodel BLOB > model.ext
 ```
+Note that for the time being, redis-cli adds additional linefeed character to redirected output so that the model blob retrieved with redis-cli will have an additional linefeed character.
 
 ## AI.MODELDEL
 The **`AI.MODELDEL`** deletes a model stored as a key's value.
diff --git a/docs/performance.md b/docs/performance.md
@@ -3,9 +3,11 @@
 To get an early sense of what RedisAI is capable of, you can test it with:
 - [`redis-benchmark`](https://redis.io/topics/benchmarks): Redis includes the redis-benchmark utility that simulates running commands done by N clients at the same time sending M total queries (it is similar to the Apache's ab utility).
 
-- [`memtier_benchmark`](https://github.com/RedisLabs/memtier_benchmark): from [Redis Labs](https://redislabs.com/) is a NoSQL Redis and Memcache traffic generation and benchmarking tool.
+- [`memtier_benchmark`](https://github.com/RedisLabs/memtier_benchmark): from [Redis](https://redislabs.com/) is a NoSQL Redis and Memcache traffic generation and benchmarking tool.
 
--  [`aibench`](https://github.com/RedisAI/aibench):  a collection of Go programs that are used to generate datasets and then benchmark the inference performance of various Model Servers.
+- `onnx_benchmark`: a quick tool that benchmarks the inference performance of ONNXRuntime backend for different model sizes.
+
+- [`aibench`](https://github.com/RedisAI/aibench):  a collection of Go programs that are used to generate datasets and then benchmark the inference performance of various Model Servers.
 
 
 This page is intended to provide clarity on how to obtain the benchmark numbers and links to the most recent results. We encourage developers, data scientists, and architects to run these benchmarks for themselves on their particular hardware, datasets, and Model Servers and pull request this documentation with links for the actual numbers.
@@ -65,6 +67,27 @@ The following example will:
 memtier_benchmark --clients 50 --threads 4 --requests 10000 --pipeline 1 --json-out-file results.json --command "AI.MODELEXECUTE model_key INPUTS input_count input1 ... OUTPUTS output_count output1 ..." --command "AI.SCRIPTEXECUTE script_key entry_point INPUTS input_count input1 ... OUTPUTS output_count output1 ..."
 ```
 
+## Using onnx_benchmark
+
+`onnx_benchmark` is a simple python script that is used for loading and benchmarking RedisAI+ONNXRuntime performance on CPU, using a single shard. It uses the following 3 renowned models:
+1. “small" model - [mnist](https://en.wikipedia.org/wiki/MNIST_database) (26.5 KB)
+2. "medium" model - [inception v2](https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202) (45 MB)
+3. "large" model - [bert-base-cased](https://huggingface.co/bert-base-cased) (433 MB)
+
+To simulate a situation where the memory consumption is high from the beginning, the script is loading mnist model under 50 different keys, inception model under 20 different keys and bert model (once). 
+Then, it will execute parallel and sequential inference sessions of all 3 models, and will print the performance results to the screen.
+
+The script can receive the following arguments as inputs:
+- `--num_threads` The number of RedisAI working threads that can execute sessions in parallel. Default value: 1.
+- `--num_parallel_clients` The number of parallel clients that send consecutive run requests per model. Default value: 20.
+- `--num_runs_mnist` The number of requests per client that is running mnist run sessions. Default value: 500
+- `--num_runs_inception` The number of requests per client that is running inception run sessions. Default value: 50
+- `--num_runs_bert` The number of requests per client that is running bert run sessions. Default value: 5
+
+To run the benchmark, first you should build RedisAI for CPU as described in the [quick start](quickstart.md) section. The following command will run `onnx_benchmark` from RedisAI root directory (using the default arguments):
+
+```python3 tests/flow/onnx_benchmark.py --num_threads 1 --num_parallel_clients 20 --num_runs_mnist 500 --num_runs_inception 50 --num_runs_bert 5```
+
 ## Using aibench
 
 _AIBench_ is a collection of Go programs that are used to generate datasets and then benchmark the inference performance of various Model Servers. The intent is to make the AIBench extensible so that a variety of use cases and Model Servers can be included and benchmarked.
diff --git a/opt/Makefile b/opt/Makefile
@@ -41,10 +41,7 @@ make clean      # remove build artifacts
   ALL=1           # remove entire artifacts directory
 make install    # create ready-to-run scheme (module and engines)
 
-make test        # run test suites (flow an unit)
-				 # same as running make unit_tests flow_tests
-
-make unit_tests  # run unit test
+make test        # run test suites (flow)
 
 make flow_tests  # run flow test
   TEST=test        # run only test `test` with Redis output
@@ -242,13 +239,6 @@ export GEN ?= 1
 export SLAVES ?= 1
 export CLUSTER ?= 1
 
-define UNIT_TESTS
-$(SHOW)echo Running unit tests ...
-$(SHOW)\
-BINDIR=$(realpath $(BINDIR)) \
-$(ROOT)/tests/unit/tests.sh
-endef
-
 define FLOW_TESTS
 $(SHOW)echo Running flow tests ...
 $(SHOW)\
@@ -260,11 +250,6 @@ VALGRIND=$(VALGRIND) \
 $(ROOT)/tests/flow/tests_setup/tests.sh
 endef
 
-unit_tests: build
-	$(COVERAGE_RESET)
-	$(UNIT_TESTS)
-	$(COVERAGE_COLLECT_REPORT)
-
 flow_tests: build
 	$(COVERAGE_RESET)
 	$(SHOW)\
@@ -280,10 +265,6 @@ flow_tests: build
 
 test: build
 	$(COVERAGE_RESET)
-	$(UNIT_TESTS)
-	$(SHOW)\
-		BINDIR=$(realpath $(BINDIR)) \
-		$(ROOT)/tests/unit/tests.sh
 	$(SHOW)\
 		DEVICE=$(DEVICE) \
 		MODULE=$(realpath $(INSTALLED_TARGET)) \
diff --git a/opt/googletest b/opt/googletest
diff --git a/tests/flow/includes.py b/tests/flow/includes.py
@@ -25,9 +25,8 @@
 TEST_ONNX = os.environ.get("TEST_ONNX") != "0" and os.environ.get("WITH_ORT") != "0"
 COV = os.environ.get("COV") != "0" and os.environ.get("COV") != "0"
 DEVICE = os.environ.get('DEVICE', 'CPU').upper().encode('utf-8', 'ignore').decode('utf-8')
+print(f'\nRunning inference sessions on {DEVICE}\n')
 VALGRIND = os.environ.get("VALGRIND") == "1"
-print("Running tests on {}\n".format(DEVICE))
-print("Using a max of {} iterations per test\n".format(MAX_ITERATIONS))
 # change this to make inference tests longer
 MAX_TRANSACTIONS=100
 
diff --git a/tests/flow/onnx_benchmark.py b/tests/flow/onnx_benchmark.py
@@ -0,0 +1,190 @@
+import os
+
+from RLTest import Env
+from includes import *
+import shutil
+import argparse
+import signal
+from redis import RedisError
+
+terminate_flag = 0
+parent_pid = os.getpid()
+
+
+# this should capture user SIGINT signals (such as keyboard ctrl-c). Since we are using multi-processing,
+# this handler will be inherited by all the running processes. Note that every process will get the signal,
+# as all of them are at the same group.
+def handler(signum, frame):
+    global terminate_flag
+    terminate_flag = 1
+    global parent_pid
+    if os.getpid() == parent_pid:  # print it only once
+        print("\nReceived user interrupt. Shutting down...")
+
+
+def _exit():
+    # remove the logs that were auto generated by redis
+    shutil.rmtree('logs', ignore_errors=True)
+    print("from exit\n")
+    sys.exit(1)
+
+
+def run_benchmark(env, num_runs_mnist, num_runs_inception, num_runs_bert, num_parallel_clients):
+    global terminate_flag
+    con = get_connection(env, '{1}')
+
+    print("Loading ONNX models...")
+    model_pb = load_file_content('mnist.onnx')
+    sample_raw = load_file_content('one.raw')
+    inception_pb = load_file_content('inception-v2-9.onnx')
+    _, _, _, _, img = load_mobilenet_v2_test_data()
+    bert_pb = load_file_content('bert-base-cased.onnx')
+    bert_in_data = np.random.randint(-2, 1, size=(10, 100), dtype=np.int64)
+
+    for i in range(50):
+        if terminate_flag == 1:
+            _exit()
+        ret = con.execute_command('AI.MODELSTORE', 'mnist{1}'+str(i), 'ONNX', DEVICE, 'BLOB', model_pb)
+        env.assertEqual(ret, b'OK')
+    con.execute_command('AI.TENSORSET', 'mnist_in{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
+
+    for i in range(20):
+        if terminate_flag == 1:
+            _exit()
+        ret = con.execute_command('AI.MODELSTORE', 'inception{1}'+str(i), 'ONNX', DEVICE, 'BLOB', inception_pb)
+        env.assertEqual(ret, b'OK')
+
+    backends_info = get_info_section(con, 'backends_info')
+    print(f'Done. ONNX memory consumption is: {backends_info["ai_onnxruntime_memory"]} bytes')
+
+    ret = con.execute_command('AI.TENSORSET', 'inception_in{1}', 'FLOAT', 1, 3, 224, 224, 'BLOB', img.tobytes())
+    env.assertEqual(ret, b'OK')
+    ret = con.execute_command('AI.MODELSTORE', 'bert{1}', 'ONNX', DEVICE, 'BLOB', bert_pb)
+    env.assertEqual(ret, b'OK')
+    ret = con.execute_command('AI.TENSORSET', 'bert_in{1}', 'INT64', 10, 100, 'BLOB', bert_in_data.tobytes())
+    env.assertEqual(ret, b'OK')
+
+    def run_parallel_onnx_sessions(con, model, input, num_runs):
+        for _ in range(num_runs):
+            if terminate_flag == 1:
+                return
+            # If the user is terminating the benchmark, redis-server will receive a termination signal as well, and
+            # RedisError exception will thrown (and caught)
+            try:
+                if model == 'bert{1}':
+                    ret = con.execute_command('AI.MODELEXECUTE', model, 'INPUTS', 3, input, input, input,
+                                              'OUTPUTS', 2, 'res{1}', 'res2{1}')
+                else:
+                    ret = con.execute_command('AI.MODELEXECUTE', model, 'INPUTS', 1, input, 'OUTPUTS', 1, 'res{1}')
+                env.assertEqual(ret, b'OK')
+            except RedisError:
+                return
+
+    def run_mnist():
+        run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions,
+                           ('mnist{1}0', 'mnist_in{1}', num_runs_mnist))
+
+    def run_bert():
+        run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions,
+                           ('bert{1}', 'bert_in{1}', num_runs_bert))
+
+    # run only mnist
+    mnist_total_requests_count = num_runs_mnist*num_parallel_clients
+    print(f'\nRunning {num_runs_mnist} consecutive executions of mnist from {num_parallel_clients} parallel clients...')
+    start_time = time.time()
+    run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions,
+                       ('mnist{1}0', 'mnist_in{1}', num_runs_mnist))
+    if terminate_flag == 1:
+        _exit()
+    print(f'Done. Total execution time for {mnist_total_requests_count} requests: {time.time()-start_time} seconds')
+    mnist_time = con.execute_command('AI.INFO', 'mnist{1}0')[11]
+    print("Average serving time per mnist run session is: {} seconds"
+          .format(float(mnist_time)/1000000/mnist_total_requests_count))
+
+    # run only inception
+    inception_total_requests_count = num_runs_inception*num_parallel_clients
+    print(f'\nRunning {num_runs_inception} consecutive executions of inception from {num_parallel_clients} parallel clients...')
+    start_time = time.time()
+    run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions,
+                       ('inception{1}0', 'inception_in{1}', num_runs_inception))
+    if terminate_flag == 1:
+        _exit()
+    print(f'Done. Total execution time for {inception_total_requests_count} requests: {time.time()-start_time} seconds')
+    inception_time = con.execute_command('AI.INFO', 'inception{1}0')[11]
+    print("Average serving time per inception run session is: {} seconds"
+          .format(float(inception_time)/1000000/inception_total_requests_count))
+
+    # run only bert
+    bert_total_requests_count = num_runs_bert*num_parallel_clients
+    print(f'\nRunning {num_runs_bert} consecutive executions of bert from {num_parallel_clients} parallel clients...')
+    start_time = time.time()
+    run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions, ('bert{1}', 'bert_in{1}', num_runs_bert))
+    if terminate_flag == 1:
+        _exit()
+    print(f'Done. Total execution time for {bert_total_requests_count} requests: {time.time()-start_time} seconds')
+    bert_time = con.execute_command('AI.INFO', 'bert{1}')[11]
+    print("Average server time per bert run session is: {} seconds"
+          .format(float(bert_time)/1000000/bert_total_requests_count))
+
+    con.execute_command('AI.INFO', 'mnist{1}0', 'RESETSTAT')
+    con.execute_command('AI.INFO', 'inception{1}0', 'RESETSTAT')
+    con.execute_command('AI.INFO', 'bert{1}', 'RESETSTAT')
+
+    # run all 3 models in parallel
+    total_requests_count = mnist_total_requests_count+inception_total_requests_count+bert_total_requests_count
+    print(f'\nRunning requests for all 3 models from {3*num_parallel_clients} parallel clients...')
+    start_time = time.time()
+    t = threading.Thread(target=run_mnist)
+    t.start()
+    t2 = threading.Thread(target=run_bert)
+    t2.start()
+    run_test_multiproc(env, '{1}', num_parallel_clients, run_parallel_onnx_sessions,
+                       ('inception{1}0', 'inception_in{1}', num_runs_inception))
+    t.join()
+    t2.join()
+    if terminate_flag == 1:
+        _exit()
+    print(f'Done. Total execution time for {total_requests_count} requests: {time.time()-start_time} seconds')
+    mnist_info = con.execute_command('AI.INFO', 'mnist{1}0')[11]
+    inception_info = con.execute_command('AI.INFO', 'inception{1}0')[11]
+    bert_info = con.execute_command('AI.INFO', 'bert{1}')[11]
+    total_time = mnist_info+inception_info+bert_info
+    print("Average serving time per run session is: {} seconds"
+          .format(float(total_time)/1000000/total_requests_count))
+
+
+if __name__ == '__main__':
+
+    # set a handler for user interrupt signal
+    signal.signal(signal.SIGINT, handler)
+
+    # parse command line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--num_threads", default='1',
+                        help='The number of RedisAI working threads that can execute sessions in parallel')
+    parser.add_argument("--num_runs_mnist", type=int, default=500,
+                        help='The number of requests per client that is running mnist run sessions')
+    parser.add_argument("--num_runs_inception", type=int, default=50,
+                        help='The number of requests per client that is running inception run sessions')
+    parser.add_argument("--num_runs_bert", type=int, default=5,
+                        help='The number of requests per client that is running bert run sessions')
+    parser.add_argument("--num_parallel_clients", type=int, default=20,
+                        help='The number of parallel clients that send consecutive run requests per model')
+    args = parser.parse_args()
+
+    terminate_flag = 0
+    print(f'Running ONNX benchmark on RedisAI, using {args.num_threads} working threads')
+    env = Env(module='install-cpu/redisai.so',
+              moduleArgs='MODEL_EXECUTION_TIMEOUT 50000 THREADS_PER_QUEUE '+args.num_threads, logDir='logs')
+
+    # If the user is terminating the benchmark, redis-server will receive a termination signal as well, and
+    # RedisError exception will thrown (and caught)
+    try:
+        run_benchmark(env, num_runs_mnist=args.num_runs_mnist, num_runs_inception=args.num_runs_inception,
+                    num_runs_bert=args.num_runs_bert, num_parallel_clients=args.num_parallel_clients)
+        env.stop()
+    except RedisError as e:
+        pass
+    finally:
+        # remove the logs that were auto generated by redis
+        shutil.rmtree('logs', ignore_errors=True)
diff --git a/tests/flow/test_data/bert-base-cased.onnx b/tests/flow/test_data/bert-base-cased.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8b29b06415e08f3d0de97e47ec94ccd6ce6ed52cef0cc1202bb13b3cdff4d45
+size 433311846
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
diff --git a/tests/unit/rmalloc.h b/tests/unit/rmalloc.h
diff --git a/tests/unit/tests.sh b/tests/unit/tests.sh
diff --git a/tests/unit/unit_tests_err.cpp b/tests/unit/unit_tests_err.cpp