From 1e04a90864bdc933b850a0435a680c02689d82eb Mon Sep 17 00:00:00 2001 From: Tomas Vrana Date: Mon, 27 Oct 2025 16:43:43 +0000 Subject: [PATCH 1/2] fieldClassifier: optimising module --- .../fieldClassifier/src/fieldClassifier.cpp | 329 +++++++++++------- .../fieldClassifier/src/fieldClassifier.hpp | 27 +- modules/fieldClassifier/src/main.cpp | 6 +- 3 files changed, 232 insertions(+), 130 deletions(-) diff --git a/modules/fieldClassifier/src/fieldClassifier.cpp b/modules/fieldClassifier/src/fieldClassifier.cpp index 3125d90..0df00d6 100644 --- a/modules/fieldClassifier/src/fieldClassifier.cpp +++ b/modules/fieldClassifier/src/fieldClassifier.cpp @@ -211,6 +211,48 @@ void FieldClassifier::getUnirecIdsForDirection(const std::string& prefix, IDMap& idMap.at(fieldName) = idField; } } + +void FieldClassifier::getUnirecIdsForInputFields(std::string& templateStr) +{ + m_inputFieldIds.clear(); + m_inputFieldTypes.clear(); + // split template to vector by commas + std::vector templateFields; + size_t pos = 0; + std::string strCopy = templateStr; + if (templateStr.back() != ',') { + strCopy += ','; // Add a trailing comma to ensure the last field is processed + } + while ((pos = strCopy.find(',')) != std::string::npos) { + // Extract the field + std::string field = strCopy.substr(0, pos); + strCopy.erase(0, pos + 1); + if (!field.empty()) { // ignore empty fields + templateFields.push_back(field); + } + } + for (const auto& field : templateFields) { + // split field by space to get field name + size_t spacePos = field.find(' '); + if (spacePos == std::string::npos) { + throw std::runtime_error( + "FieldClassifier: Invalid Unirec field format in template: " + field); + } + std::string fieldName = field.substr(spacePos + 1); + std::string fieldType = field.substr(0, spacePos); + + auto fieldId = static_cast(ur_get_id_by_name(fieldName.c_str())); + if (fieldId == UR_E_INVALID_NAME) { + throw std::runtime_error("FieldClassifier: Invalid Unirec field name: " + fieldName); + } + m_inputFieldIds.push_back(fieldId); + if (m_UnirecTypeMap.find(fieldType) == m_UnirecTypeMap.end()) { + throw std::runtime_error("FieldClassifier: Invalid Unirec field type: " + fieldType); + } + m_inputFieldTypes.push_back(m_UnirecTypeMap.at(fieldType)); + } +} + void FieldClassifier::getUnirecIds() { // get Unirec field IDs for source and destination IP fields @@ -283,138 +325,175 @@ void FieldClassifier::loadIP( } void FieldClassifier::fillInputFieldsToOutput( std::optional& input, - std::optional& output, - std::string& templateStr) + std::optional& output) { - // split template to vector by commas - std::vector templateFields; - size_t pos = 0; - std::string strCopy = templateStr; - if (templateStr.back() != ',') { - strCopy += ','; // Add a trailing comma to ensure the last field is processed - } - while ((pos = strCopy.find(',')) != std::string::npos) { - // Extract the field - std::string field = strCopy.substr(0, pos); - strCopy.erase(0, pos + 1); - if (!field.empty()) { // ignore empty fields - templateFields.push_back(field); + unsigned long size = m_inputFieldIds.size(); + for (unsigned long i = 0; i < size; i++) { + auto fieldId = m_inputFieldIds[i]; + auto fieldTypeIt = m_inputFieldTypes[i]; + + switch (fieldTypeIt) { + case DataType::STRING: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; } - } - // for each field, get data from input and set to output - for (const auto& field : templateFields) { - // split field by space to get field name - size_t spacePos = field.find(' '); - if (spacePos == std::string::npos) { - throw std::runtime_error( - "FieldClassifier: Invalid Unirec field format in template: " + field); + case DataType::UINT8: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; } - std::string fieldName = field.substr(spacePos + 1); - std::string fieldType = field.substr(0, spacePos); - - auto fieldId = static_cast(ur_get_id_by_name(fieldName.c_str())); - if (fieldId == UR_E_INVALID_NAME) { - throw std::runtime_error("FieldClassifier: Invalid Unirec field name: " + fieldName); + case DataType::UINT16: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; } - try { - if (fieldType == "string") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "uint8") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "uint16") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "uint32") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "uint64") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "ipaddr") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "int8") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "int16") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "int32") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "int64") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "char") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "float") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "double") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "macaddr") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "time") { - auto value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); - } else if (fieldType == "bytes") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "int8*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "int16*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "int32*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "int64*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "uint8*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "uint16*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "uint32*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "uint64*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "float*") { - Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else if (fieldType == "double*") { - Nemea::UnirecArray const arr - = input->getFieldAsUnirecArray(fieldId); - output->setFieldFromUnirecArray(arr, fieldId); - } else { - throw std::runtime_error( - "FieldClassifier: Unsupported Unirec field type in template: " + fieldType); - } + case DataType::UINT32: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::UINT64: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::IPADDR: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::INT8: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::INT16: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::INT32: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::INT64: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::CHAR: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::FLOAT: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::DOUBLE: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::MACADDR: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::TIME: { + auto value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + break; + } + case DataType::BYTES: { + // Nemea::UnirecArray const arr + // = input->getFieldAsUnirecArray(fieldId); + // output->setFieldFromUnirecArray(arr, fieldId); - } catch (const std::exception& ex) { + std::byte value = input->getFieldAsType(fieldId); + output->setFieldFromType(value, fieldId); + + break; + } + case DataType::A_INT8: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_INT16: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_INT32: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_INT64: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_UINT8: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_UINT16: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_UINT32: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_UINT64: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_FLOAT: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_DOUBLE: { + Nemea::UnirecArray const arr = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_IP: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_MAC: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + case DataType::A_TIME: { + Nemea::UnirecArray const arr + = input->getFieldAsUnirecArray(fieldId); + output->setFieldFromUnirecArray(arr, fieldId); + break; + } + default: throw std::runtime_error( - std::string("FieldClassifier: Unable to copy field '") + field - + "' from input to output Unirec record: " + ex.what()); + "FieldClassifier: Unsupported Unirec field type for field ID: " + + std::to_string(fieldId)); } } } diff --git a/modules/fieldClassifier/src/fieldClassifier.hpp b/modules/fieldClassifier/src/fieldClassifier.hpp index 7ed2f8d..d072c1a 100644 --- a/modules/fieldClassifier/src/fieldClassifier.hpp +++ b/modules/fieldClassifier/src/fieldClassifier.hpp @@ -76,10 +76,12 @@ class FieldClassifier { */ void handleParams(int argc, char** argv, argparse::ArgumentParser& parser); + void getUnirecIdsForInputFields(std::string& templateStr); + void fillInputFieldsToOutput( std::optional& input, - std::optional& output, - std::string& templateStr); + std::optional& output); + ur_template_t* templateStringAll; private: std::vector m_plugins = g_PLUGINS; @@ -107,6 +109,9 @@ class FieldClassifier { std::string m_templateStr; + std::vector m_inputFieldIds; + std::vector m_inputFieldTypes; + std::string getIPtoString(const Nemea::IpAddress& ipAddr); void loadIP( @@ -146,5 +151,23 @@ class FieldClassifier { void getRequiredFields(const std::string& requiredFields); // ################### + + std::unordered_map m_UnirecTypeMap = { + {"string", DataType::STRING}, {"int8", DataType::INT8}, + {"int16", DataType::INT16}, {"int32", DataType::INT32}, + {"int64", DataType::INT64}, {"uint8", DataType::UINT8}, + {"uint16", DataType::UINT16}, {"uint32", DataType::UINT32}, + {"uint64", DataType::UINT64}, {"char", DataType::CHAR}, + {"float", DataType::FLOAT}, {"double", DataType::DOUBLE}, + {"ipaddr", DataType::IPADDR}, {"macaddr", DataType::MACADDR}, + {"bytes", DataType::BYTES}, {"int8*", DataType::A_INT8}, + {"int16*", DataType::A_INT16}, {"int32*", DataType::A_INT32}, + {"int64*", DataType::A_INT64}, {"uint8*", DataType::A_UINT8}, + {"uint16*", DataType::A_UINT16}, {"uint32*", DataType::A_UINT32}, + {"uint64*", DataType::A_UINT64}, {"float*", DataType::A_FLOAT}, + {"double*", DataType::A_DOUBLE}, {"ipaddr*", DataType::A_IP}, + {"time*", DataType::A_TIME}, {"time", DataType::TIME}, + {"macaddr*", DataType::A_MAC}, + }; }; } // namespace NFieldClassifier diff --git a/modules/fieldClassifier/src/main.cpp b/modules/fieldClassifier/src/main.cpp index 4045d88..dd76c09 100644 --- a/modules/fieldClassifier/src/main.cpp +++ b/modules/fieldClassifier/src/main.cpp @@ -67,10 +67,9 @@ static void processNextRecord( throw std::runtime_error(std::string("Unable to get template from trap input")); } - // convert template to string and append new fileds - std::string stringTemp = static_cast(ur_template_string(templateDef)); + fieldClassifier.templateStringAll = templateDef; - fieldClassifier.fillInputFieldsToOutput(inputUnirecView, unirecRecord, stringTemp); + fieldClassifier.fillInputFieldsToOutput(inputUnirecView, unirecRecord); // populate Unirec record with data from modules try { @@ -113,6 +112,7 @@ static void handleTemplateChange( // convert template to string and append new fileds std::string stringTemp = static_cast(ur_template_string(templateDef)); + fieldClassifier.getUnirecIdsForInputFields(stringTemp); // add finished template stringTemp += templateStr; From 4f3e5c11b2fbb52d8483b70da8b3fb0406f6bfc3 Mon Sep 17 00:00:00 2001 From: Tomas Vrana Date: Wed, 29 Oct 2025 14:45:06 +0100 Subject: [PATCH 2/2] fieldClassifier: bugfix: fixing ipclassifier module --- .../fieldClassifier/src/fieldClassifier.cpp | 4 +-- .../src/plugins/ipClassifier.cpp | 9 +++-- .../fieldClassifier/src/plugins/plugin.hpp | 35 +++++++++---------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/modules/fieldClassifier/src/fieldClassifier.cpp b/modules/fieldClassifier/src/fieldClassifier.cpp index 0df00d6..7846c3a 100644 --- a/modules/fieldClassifier/src/fieldClassifier.cpp +++ b/modules/fieldClassifier/src/fieldClassifier.cpp @@ -414,8 +414,8 @@ void FieldClassifier::fillInputFieldsToOutput( // = input->getFieldAsUnirecArray(fieldId); // output->setFieldFromUnirecArray(arr, fieldId); - std::byte value = input->getFieldAsType(fieldId); - output->setFieldFromType(value, fieldId); + // std::byte value = input->getFieldAsType(fieldId); + // output->setFieldFromType(value, fieldId); break; } diff --git a/modules/fieldClassifier/src/plugins/ipClassifier.cpp b/modules/fieldClassifier/src/plugins/ipClassifier.cpp index 47fc248..c76bc6b 100644 --- a/modules/fieldClassifier/src/plugins/ipClassifier.cpp +++ b/modules/fieldClassifier/src/plugins/ipClassifier.cpp @@ -100,14 +100,13 @@ bool IPClassifier::checkForRule(const uint8_t ipAddr[16], unsigned condition, co } bool IPClassifier::getData(DataMap& dataMap, std::string& ipAddr) { - uint8_t ipHexV4[16]; - uint8_t ipHexV6[16]; + uint8_t ipHex[16]; unsigned condition; bool isIPv4 = false; - if (inet_pton(AF_INET, ipAddr.c_str(), &ipHexV4) == 1) { + if (inet_pton(AF_INET, ipAddr.c_str(), &ipHex) == 1) { condition = 4; isIPv4 = true; - } else if (inet_pton(AF_INET6, ipAddr.c_str(), &ipHexV6) == 1) { + } else if (inet_pton(AF_INET6, ipAddr.c_str(), &ipHex) == 1) { condition = 16; isIPv4 = false; } else { @@ -118,7 +117,7 @@ bool IPClassifier::getData(DataMap& dataMap, std::string& ipAddr) if (rule.isIPv4 != isIPv4) { continue; } - if (checkForRule(ipHexV4, condition, rule)) { + if (checkForRule(ipHex, condition, rule)) { DEBUG_PRINT( 2, "Ip Classifier: Match found for IP: " + ipAddr + " with flags: " + rule.flags); diff --git a/modules/fieldClassifier/src/plugins/plugin.hpp b/modules/fieldClassifier/src/plugins/plugin.hpp index 43e16d0..b18ad67 100644 --- a/modules/fieldClassifier/src/plugins/plugin.hpp +++ b/modules/fieldClassifier/src/plugins/plugin.hpp @@ -11,6 +11,7 @@ #pragma once #include "argparse/argparse.hpp" +#include #include #include #include @@ -26,25 +27,8 @@ namespace NFieldClassifier { #define PREFIX_SRC "SRC_" #define PREFIX_DST "DST_" -// int8 1 8bit singed integer -// int16 2 16bit singed integer -// int32 4 32bit singed integer -// int64 8 64bit singed integer -// uint8 1 8bit unsigned integer -// uint16 2 16bit unsigned integer -// uint32 4 32bit unsigned integer -// uint64 8 64bit unsigned integer -// char 1 A single ASCII character -// float 4 Single precision floating point number (IEEE 754) -// double 8 Double precision floating point number (IEEE 754) -// ipaddr 16 Special type for IPv4/IPv6 addresses, see below for details -// macaddr 6 Special type for MAC address, see below for details -// time 8 Special type for precise timestamps, see below for details -// string - Variable-length array of (mostly) printable characters -// bytes - Variable-length array of bytes (not expected to be printable characters) - /** - * @brief Enum to represent various data types for plugin fields. + * @brief Enum to represent unirec data types. */ enum DataType : uint8_t { INT8, @@ -62,7 +46,20 @@ enum DataType : uint8_t { MACADDR, TIME, STRING, - BYTES + BYTES, + A_INT8, + A_INT16, + A_INT32, + A_INT64, + A_UINT8, + A_UINT16, + A_UINT32, + A_UINT64, + A_FLOAT, + A_DOUBLE, + A_IP, + A_MAC, + A_TIME }; /**