Skip to content
Open
42 changes: 42 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,45 @@ _out/
.vscode/vscode-kanban.json
tags
cscope.out

# CMake generated
CMakeFiles/
CMakeCache.txt
Makefile
CPackConfig.cmake
CPackSourceConfig.cmake
CTestTestfile.cmake
DartConfiguration.tcl
Testing/
cmake_install.cmake

# generated
*tests_include-*.cmake
*_tests-*.cmake
dcgmi/dcgmi
dcgmi/testing/dcgmitests
dcgmi/tests/dcgmi_tests
dcgmlib/src/tests/dcgmlibtests
dcgmproftester/dcgmproftester11
hostengine/nv-hostengine
modules/nvswitch/tests/nvswitchtests
nvvs/plugin_src/software/tests/softwaretests
nvvs/src/nvvs
nvvs/src/nvvs.log
nvvs/src/tests/nvvscoretests
common/DcgmBuildInfo.cpp
common/protobuf/dcgm.pb.cc
common/protobuf/dcgm.pb.h
common/tests/commontests
/rt.props
sdk_samples/*_sample
testing/stub/stub_library_test
testing/testdcgmunittests
testing/version.py

# compiled
*.a
*.so
lib*.so.*

dcgm_decode_db.txt
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@ set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" "${PROJECT_SOURCE_DIR}/cmake")

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL "Build Position Independent Code")
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
set(CUDA9_ENABLED OFF CACHE BOOL "Build with CUDA v9")
set(CUDA10_ENABLED OFF CACHE BOOL "Build with CUDA v10")

else ()
set(CUDA9_ENABLED ON CACHE BOOL "Build with CUDA v9")
set(CUDA10_ENABLED ON CACHE BOOL "Build with CUDA v10")
endif()

set(CUDA11_ENABLED ON CACHE BOOL "Build with CUDA v11")

if (NOT (${CUDA9_ENABLED} OR ${CUDA10_ENABLED} OR ${CUDA11_ENABLED}))
message(FATAL_ERROR "At least one CUDA version must be used")
endif()

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS ON)
Expand Down
49 changes: 26 additions & 23 deletions cmake/FindCuda.cmake
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
include(utils)

set(Cuda9_prefix usr/local/cuda-9.2)
set(Cuda10_prefix usr/local/cuda-10.0)
set(Cuda11_prefix usr/local/cuda-11.0)
set(CUDA9_TOOLKIT_PREFIX "${CMAKE_FIND_ROOT_PATH}/usr/local/cuda-9.2"
CACHE PATH "Path to CUDA9 toolkit installation")
set(CUDA10_TOOLKIT_PREFIX "${CMAKE_FIND_ROOT_PATH}/usr/local/cuda-10.0"
CACHE PATH "Path to CUDA9 toolkit installation")
set(CUDA11_TOOLKIT_PREFIX "${CMAKE_FIND_ROOT_PATH}/usr/local/cuda-11.0"
CACHE PATH "Path to CUDA9 toolkit installation")

mark_as_advanced(CUDA9_TOOLKIT_PREFIX CUDA10_TOOLKIT_PREFIX CUDA11_TOOLKIT_PREFIX)

macro (load_cuda cuda_version)
foreach (prefix ${Cuda${cuda_version}_prefix})
list(APPEND Cuda${cuda_version}_INCLUDE_PATHS "${CMAKE_FIND_ROOT_PATH}/${prefix}/include")
list(APPEND Cuda${cuda_version}_LIB_PATHS "${CMAKE_FIND_ROOT_PATH}/${prefix}/lib" "${CMAKE_FIND_ROOT_PATH}/${prefix}/lib64")
foreach (cudapath ${CUDA${cuda_version}_TOOLKIT_PREFIX})
list(APPEND Cuda${cuda_version}_INCLUDE_PATHS "${cudapath}/include")
list(APPEND Cuda${cuda_version}_LIB_PATHS "${cudapath}/lib" "${cudapath}/lib64")
endforeach ()

find_path(CUDA${cuda_version}_INCLUDE_DIR cuda.h PATHS ${Cuda${cuda_version}_INCLUDE_PATHS} NO_DEFAULT_PATHS)
Expand Down Expand Up @@ -37,6 +42,7 @@ macro (load_cuda cuda_version)
set(CUDA${cuda_version}_STATIC_CUBLAS_LIBS ${libcublas${cuda_version}} CACHE STRING "Cuda${cuda_version} static libs")
else ()
set(Cuda${cuda_version}_FOUND FALSE)
unset(CUDA${cuda_version}_INCLUDE_DIR CACHE)
endif ()

if (libcublaslt${cuda_version})
Expand All @@ -48,22 +54,22 @@ macro (load_cuda cuda_version)
endif ()

if (Cuda${cuda_version}_FOUND)
if (NOT Cuda_FIND_QUETLY)
if (NOT Cuda_FIND_QUIETLY)
message(STATUS "Found CUDA ${cuda_version}. CUDA${cuda_version}_INCLUDE_DIR=${CUDA${cuda_version}_INCLUDE_DIR}")
message(${libcublas${cuda_version}})
message(${libcudart${cuda_version}})
message(${libculibos${cuda_version}})
message(${libcublaslt${cuda_version}})
message(${libcuda${cuda_version}})
message("Cublas lib: ${libcublas${cuda_version}}")
message("Cudart lib: ${libcudart${cuda_version}}")
message("Culibos lib: ${libculibos${cuda_version}}")
message("CublasLt lib: ${libcublaslt${cuda_version}}")
message("CUDA lib: ${libcuda${cuda_version}}")
endif ()
else ()
if (Cuda_FIND_REQUIRED)
message(${CUDA${cuda_version}_INCLUDE_DIR})
message(${libcublas${cuda_version}})
message(${libcudart${cuda_version}})
message(${libculibos${cuda_version}})
message(${libcublaslt${cuda_version}})
message(${libcuda${cuda_version}})
message("Cublas lib: ${libcublas${cuda_version}}")
message("Cudart lib: ${libcudart${cuda_version}}")
message("Culibos lib: ${libculibos${cuda_version}}")
message("CublasLt lib: ${libcublaslt${cuda_version}}")
message("CUDA lib: ${libcuda${cuda_version}}")
message(FATAL_ERROR "Could NOT find Cuda ${cuda_version}")
endif ()
message(STATUS "Cuda ${cuda_version} NOT found")
Expand All @@ -79,18 +85,15 @@ macro (load_cuda cuda_version)

endmacro()

if (NOT DEFINED CUDA9_INCLUDE_DIR AND NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
if (NOT DEFINED CUDA9_INCLUDE_DIR AND ${CUDA9_ENABLED})
load_cuda(9)
endif()

if (NOT DEFINED CUDA10_INCLUDE_DIR AND NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
if (NOT DEFINED CUDA10_INCLUDE_DIR AND ${CUDA10_ENABLED})
load_cuda(10)
endif()

if (NOT DEFINED CUDA11_INCLUDE_DIR)
if (NOT DEFINED CUDA11_INCLUDE_DIR AND ${CUDA11_ENABLED})
load_cuda(11)
endif()

unset(Cuda9_prefix)
unset(Cuda10_prefix)
unset(Cuda11_prefix)
2 changes: 1 addition & 1 deletion cmake/FindJsoncpp.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
find_package(jsoncpp REQUIRED CONFIG)
set(JSONCPP_STATIC_LIBS jsoncpp_lib_static)
set(JSONCPP_STATIC_LIBS jsoncpp)
set(JSONCPP_INCLUDE_PATH $<TARGET_PROPERTY:jsoncpp_lib_static,INTERFACE_INCLUDE_DIRECTORIES>)
# set(Jsoncpp_PATH_PREFIXES /usr/local "${Jsoncpp_ROOT}" "$ENV{HOME}")
# foreach(prefix ${Jsoncpp_PATH_PREFIXES})
Expand Down
99 changes: 52 additions & 47 deletions common/DcgmLogging.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

// This tells plog to store file information in log records
#include "dcgm_errors.h"
#include <plog/Appenders/RollingFileAppender.h>
#include <plog/Init.h>
#include <plog/Record.h>
#define PLOG_CAPTURE_FILE

Expand All @@ -28,6 +30,8 @@
#include <cstdio>
#include <dcgm_structs.h>
#include <dcgm_structs_internal.h>
#include <iomanip>
#include <memory>
#include <mutex>
#include <plog/Appenders/ConsoleAppender.h>
#include <syslog.h>
Expand Down Expand Up @@ -93,7 +97,7 @@ extern HostengineAppender hostengineAppender;
namespace
{
template <class... Args>
void SyslogAdapter(int priority, char const *format, Args &&... args)
void SyslogAdapter(int priority, char const *format, Args &&...args)
{
syslog(priority, format, std::forward<Args>(args)...);
}
Expand All @@ -105,23 +109,23 @@ void SyslogAdapter(int priority, char const *format, Args &&... args)
*/
#define SYSLOG_CRITICAL(...) \
PRINT_CRITICAL(nullptr, __VA_ARGS__); \
SyslogAdapter(LOG_CRIT, __VA_ARGS__);
SyslogAdapter(PLOG_CRIT, __VA_ARGS__);

#define SYSLOG_ERROR(...) \
PRINT_ERROR(nullptr, __VA_ARGS__); \
SyslogAdapter(LOG_ERR, __VA_ARGS__);
SyslogAdapter(PLOG_ERR, __VA_ARGS__);

#define SYSLOG_WARNING(...) \
PRINT_WARNING(nullptr, __VA_ARGS__); \
SyslogAdapter(LOG_WARNING, __VA_ARGS__);
SyslogAdapter(PLOG_WARNING, __VA_ARGS__);

#define SYSLOG_NOTICE(...) \
PRINT_INFO(nullptr, __VA_ARGS__); \
SyslogAdapter(LOG_NOTICE, __VA_ARGS__);
SyslogAdapter(PLOG_NOTICE, __VA_ARGS__);

#define SYSLOG_INFO(...) \
PRINT_INFO(nullptr, __VA_ARGS__); \
SyslogAdapter(LOG_INFO, __VA_ARGS__);
SyslogAdapter(PLOG_INFO, __VA_ARGS__);

#define DCGM_MAX_LOG_ROTATE 5

Expand All @@ -142,45 +146,45 @@ DCGM_CASSERT(DcgmLoggingSeverityInfo == (DcgmLoggingSeverity_t)plog::info, 1);
DCGM_CASSERT(DcgmLoggingSeverityDebug == (DcgmLoggingSeverity_t)plog::debug, 1);
DCGM_CASSERT(DcgmLoggingSeverityVerbose == (DcgmLoggingSeverity_t)plog::verbose, 1);

#define DCGM_LOG_VERBOSE_TO(logger) LOG_(logger, plog::verbose)
#define DCGM_LOG_DEBUG_TO(logger) LOG_(logger, plog::debug)
#define DCGM_LOG_INFO_TO(logger) LOG_(logger, plog::info)
#define DCGM_LOG_WARNING_TO(logger) LOG_(logger, plog::warning)
#define DCGM_LOG_ERROR_TO(logger) LOG_(logger, plog::error)
#define DCGM_LOG_FATAL_TO(logger) LOG_(logger, plog::fatal)

#define DCGM_LOG_VERBOSE LOG_(BASE_LOGGER, plog::verbose)
#define DCGM_LOG_DEBUG LOG_(BASE_LOGGER, plog::debug)
#define DCGM_LOG_INFO LOG_(BASE_LOGGER, plog::info)
#define DCGM_LOG_WARNING LOG_(BASE_LOGGER, plog::warning)
#define DCGM_LOG_ERROR LOG_(BASE_LOGGER, plog::error)
#define DCGM_LOG_FATAL LOG_(BASE_LOGGER, plog::fatal)

#define IF_DCGM_LOG_VERBOSE IF_LOG_(BASE_LOGGER, plog::verbose)
#define IF_DCGM_LOG_DEBUG IF_LOG_(BASE_LOGGER, plog::debug)
#define IF_DCGM_LOG_INFO IF_LOG_(BASE_LOGGER, plog::info)
#define IF_DCGM_LOG_WARNING IF_LOG_(BASE_LOGGER, plog::warning)
#define IF_DCGM_LOG_ERROR IF_LOG_(BASE_LOGGER, plog::error)
#define IF_DCGM_LOG_FATAL IF_LOG_(BASE_LOGGER, plog::fatal)

#define DCGM_LOG_SYSLOG_DEBUG LOG_(SYSLOG_LOGGER, plog::verbose)
#define DCGM_LOG_SYSLOG_INFO LOG_(SYSLOG_LOGGER, plog::debug)
#define DCGM_LOG_SYSLOG_NOTICE LOG_(SYSLOG_LOGGER, plog::info)
#define DCGM_LOG_SYSLOG_WARNING LOG_(SYSLOG_LOGGER, plog::warning)
#define DCGM_LOG_SYSLOG_ERROR LOG_(SYSLOG_LOGGER, plog::error)
#define DCGM_LOG_SYSLOG_CRITICAL LOG_(SYSLOG_LOGGER, plog::fatal)

#define IF_DCGM_LOG_SYSLOG_DEBUG IF_LOG_(SYSLOG_LOGGER, plog::verbose)
#define IF_DCGM_LOG_SYSLOG_INFO IF_LOG_(SYSLOG_LOGGER, plog::debug)
#define IF_DCGM_LOG_SYSLOG_NOTICE IF_LOG_(SYSLOG_LOGGER, plog::info)
#define IF_DCGM_LOG_SYSLOG_WARNING IF_LOG_(SYSLOG_LOGGER, plog::warning)
#define IF_DCGM_LOG_SYSLOG_ERROR IF_LOG_(SYSLOG_LOGGER, plog::error)
#define IF_DCGM_LOG_SYSLOG_CRITICAL IF_LOG_(SYSLOG_LOGGER, plog::fatal)
#define DCGM_LOG_VERBOSE_TO(logger) PLOG_(logger, plog::verbose)
#define DCGM_LOG_DEBUG_TO(logger) PLOG_(logger, plog::debug)
#define DCGM_LOG_INFO_TO(logger) PLOG_(logger, plog::info)
#define DCGM_LOG_WARNING_TO(logger) PLOG_(logger, plog::warning)
#define DCGM_LOG_ERROR_TO(logger) PLOG_(logger, plog::error)
#define DCGM_LOG_FATAL_TO(logger) PLOG_(logger, plog::fatal)

#define DCGM_LOG_VERBOSE PLOG_(BASE_LOGGER, plog::verbose)
#define DCGM_LOG_DEBUG PLOG_(BASE_LOGGER, plog::debug)
#define DCGM_LOG_INFO PLOG_(BASE_LOGGER, plog::info)
#define DCGM_LOG_WARNING PLOG_(BASE_LOGGER, plog::warning)
#define DCGM_LOG_ERROR PLOG_(BASE_LOGGER, plog::error)
#define DCGM_LOG_FATAL PLOG_(BASE_LOGGER, plog::fatal)

#define IF_DCGM_LOG_VERBOSE IF_PLOG_(BASE_LOGGER, plog::verbose)
#define IF_DCGM_LOG_DEBUG IF_PLOG_(BASE_LOGGER, plog::debug)
#define IF_DCGM_LOG_INFO IF_PLOG_(BASE_LOGGER, plog::info)
#define IF_DCGM_LOG_WARNING IF_PLOG_(BASE_LOGGER, plog::warning)
#define IF_DCGM_LOG_ERROR IF_PLOG_(BASE_LOGGER, plog::error)
#define IF_DCGM_LOG_FATAL IF_PLOG_(BASE_LOGGER, plog::fatal)

#define DCGM_LOG_SYSLOG_DEBUG PLOG_(SYSLOG_LOGGER, plog::verbose)
#define DCGM_LOG_SYSLOG_INFO PLOG_(SYSLOG_LOGGER, plog::debug)
#define DCGM_LOG_SYSLOG_NOTICE PLOG_(SYSLOG_LOGGER, plog::info)
#define DCGM_LOG_SYSLOG_WARNING PLOG_(SYSLOG_LOGGER, plog::warning)
#define DCGM_LOG_SYSLOG_ERROR PLOG_(SYSLOG_LOGGER, plog::error)
#define DCGM_LOG_SYSLOG_CRITICAL PLOG_(SYSLOG_LOGGER, plog::fatal)

#define IF_DCGM_LOG_SYSLOG_DEBUG IF_PLOG_(SYSLOG_LOGGER, plog::verbose)
#define IF_DCGM_LOG_SYSLOG_INFO IF_PLOG_(SYSLOG_LOGGER, plog::debug)
#define IF_DCGM_LOG_SYSLOG_NOTICE IF_PLOG_(SYSLOG_LOGGER, plog::info)
#define IF_DCGM_LOG_SYSLOG_WARNING IF_PLOG_(SYSLOG_LOGGER, plog::warning)
#define IF_DCGM_LOG_SYSLOG_ERROR IF_PLOG_(SYSLOG_LOGGER, plog::error)
#define IF_DCGM_LOG_SYSLOG_CRITICAL IF_PLOG_(SYSLOG_LOGGER, plog::fatal)

namespace
{
template <class... Args>
void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/, char const *format, Args &&... args)
void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/, char const *format, Args &&...args)
{
snprintf(outBuffer, bufSize, format, std::forward<Args>(args)...);
}
Expand All @@ -194,7 +198,7 @@ void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/,
#undef PRINT_CRITICAL
#define PRINT_CRITICAL(...) \
{ \
IF_LOG_(BASE_LOGGER, plog::fatal) \
IF_PLOG_(BASE_LOGGER, plog::fatal) \
{ \
char _dcgm_logging_buf[4096]; \
OldLoggerAdapter(_dcgm_logging_buf, sizeof(_dcgm_logging_buf), __VA_ARGS__); \
Expand All @@ -205,7 +209,7 @@ void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/,
#undef PRINT_ERROR
#define PRINT_ERROR(...) \
{ \
IF_LOG_(BASE_LOGGER, plog::error) \
IF_PLOG_(BASE_LOGGER, plog::error) \
{ \
char _dcgm_logging_buf[4096]; \
OldLoggerAdapter(_dcgm_logging_buf, sizeof(_dcgm_logging_buf), __VA_ARGS__); \
Expand All @@ -216,7 +220,7 @@ void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/,
#undef PRINT_WARNING
#define PRINT_WARNING(...) \
{ \
IF_LOG_(BASE_LOGGER, plog::warning) \
IF_PLOG_(BASE_LOGGER, plog::warning) \
{ \
char _dcgm_logging_buf[4096]; \
OldLoggerAdapter(_dcgm_logging_buf, sizeof(_dcgm_logging_buf), __VA_ARGS__); \
Expand All @@ -227,7 +231,7 @@ void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/,
#undef PRINT_INFO
#define PRINT_INFO(...) \
{ \
IF_LOG_(BASE_LOGGER, plog::info) \
IF_PLOG_(BASE_LOGGER, plog::info) \
{ \
char _dcgm_logging_buf[4096]; \
OldLoggerAdapter(_dcgm_logging_buf, sizeof(_dcgm_logging_buf), __VA_ARGS__); \
Expand All @@ -241,7 +245,7 @@ void OldLoggerAdapter(char *outBuffer, size_t bufSize, char const * /*unused*/,
#undef PRINT_DEBUG
#define PRINT_DEBUG(...) \
{ \
IF_LOG_(BASE_LOGGER, plog::debug) \
IF_PLOG_(BASE_LOGGER, plog::debug) \
{ \
char _dcgm_logging_buf[1024]; \
OldLoggerAdapter(_dcgm_logging_buf, sizeof(_dcgm_logging_buf), __VA_ARGS__); \
Expand Down Expand Up @@ -549,7 +553,8 @@ class DcgmLogging
record->func,
record->line,
record->file,
record->object)
record->object,
PLOG_DEFAULT_INSTANCE_ID)
, m_record(record)
, m_time({ record->time.time, record->time.millitm })
, m_tid(record->tid)
Expand Down
2 changes: 1 addition & 1 deletion common/protobuf/DcgmProtobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ DcgmProtobuf::~DcgmProtobuf()
*****************************************************************************/
dcgmReturn_t DcgmProtobuf::GetEncodedMessage(std::vector<char> &encodedMessage)
{
encodedMessage.resize(mpProtoMsg->ByteSize());
encodedMessage.resize(mpProtoMsg->ByteSizeLong());
mpProtoMsg->SerializeToArray(encodedMessage.data(), encodedMessage.size());
return DCGM_ST_OK;
}
Expand Down
4 changes: 2 additions & 2 deletions common/transport/DcgmIpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ dcgmReturn_t DcgmIpc::Init(std::optional<DcgmIpcTcpServerParams_t> tcpParameters
(void)evthread_use_pthreads();

/* Enable libevent logging if we're at debug or higher */
IF_LOG_(BASE_LOGGER, plog::verbose)
IF_PLOG_(BASE_LOGGER, plog::verbose)
{
event_set_log_callback(DcgmIpcEventLogCB);
event_enable_debug_logging(EVENT_DBG_ALL);
Expand Down Expand Up @@ -1201,4 +1201,4 @@ dcgmReturn_t DcgmIpc::CloseConnection(dcgm_connection_id_t connectionId)
return DCGM_ST_OK;
}

/*****************************************************************************/
/*****************************************************************************/
Loading