Skip to content

Commit 5d54c5c

Browse files
Refactor yara_detected for better matching and self-extraction (#2759)
* Refactor yara_detected for better matching and self-extraction Refactor yara_detected method to improve matching logic and encapsulate self-extraction processing. * Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update lib/cuckoo/common/abstracts.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 500db67 commit 5d54c5c

File tree

1 file changed

+79
-72
lines changed

1 file changed

+79
-72
lines changed

lib/cuckoo/common/abstracts.py

Lines changed: 79 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -887,82 +887,89 @@ def set_path(self, analysis_path):
887887
CuckooReportError(e)
888888

889889
def yara_detected(self, name):
890-
target = self.results.get("target", {})
891-
if target.get("category") in ("file", "static") and target.get("file"):
890+
name_pattern = re.compile(name, re.I)
891+
892+
def _check_matches(data_block, path, label_override=None):
893+
if not isinstance(data_block, dict):
894+
return
895+
892896
for keyword in ("cape_yara", "yara"):
893-
for yara_block in self.results["target"]["file"].get(keyword, []):
894-
if re.findall(name, yara_block["name"], re.I):
895-
yield "sample", self.results["target"]["file"]["path"], yara_block, self.results["target"]["file"]
896-
897-
if target["file"].get("selfextract"):
898-
for _, toolsblock in target["file"]["selfextract"].items():
899-
for block in toolsblock.get("extracted_files", []):
900-
for keyword in ("cape_yara", "yara"):
901-
for yara_block in block[keyword]:
902-
if re.findall(name, yara_block["name"], re.I):
903-
# we can't use here values from set_path
904-
yield "sample", block["path"], yara_block, block
905-
906-
for block in self.results.get("CAPE", {}).get("payloads", []) or []:
907-
for sub_keyword in ("cape_yara", "yara"):
908-
for yara_block in block.get(sub_keyword, []):
909-
if re.findall(name, yara_block["name"], re.I):
910-
yield sub_keyword, block["path"], yara_block, block
911-
912-
if block.get("selfextract", {}):
913-
for _, toolsblock in block["selfextract"].items():
914-
for subblock in toolsblock.get("extracted_files", []):
915-
for keyword in ("cape_yara", "yara"):
916-
for yara_block in subblock[keyword]:
917-
if re.findall(name, yara_block["name"], re.I):
918-
yield "sample", subblock["path"], yara_block, block
919-
920-
for keyword in ("procdump", "procmemory", "extracted", "dropped"):
921-
if self.results.get(keyword) is not None:
922-
for block in self.results.get(keyword, []):
923-
if not isinstance(block, dict):
924-
continue
925-
for sub_keyword in ("cape_yara", "yara"):
926-
for yara_block in block.get(sub_keyword, []):
927-
if re.findall(name, yara_block["name"], re.I):
928-
path = block["path"] if block.get("path", False) else ""
929-
yield keyword, path, yara_block, block
930-
931-
if keyword == "procmemory":
932-
for pe in block.get("extracted_pe", []) or []:
933-
for sub_keyword in ("cape_yara", "yara"):
934-
for yara_block in pe.get(sub_keyword, []) or []:
935-
if re.findall(name, yara_block["name"], re.I):
936-
yield "extracted_pe", pe["path"], yara_block, block
937-
938-
if block.get("selfextract", {}):
939-
for _, toolsblock in block["selfextract"].items():
940-
for subblock in toolsblock.get("extracted_files", []):
941-
for keyword in ("cape_yara", "yara"):
942-
for yara_block in subblock[keyword]:
943-
if re.findall(name, yara_block["name"], re.I):
944-
yield "sample", subblock["path"], yara_block, block
945-
946-
macro_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(self.results["info"]["id"]), "macros")
947-
for macroname in self.results.get("static", {}).get("office", {}).get("Macro", {}).get("info", []) or []:
948-
for yara_block in self.results["static"]["office"]["Macro"]["info"].get("macroname", []) or []:
949-
for sub_block in self.results["static"]["office"]["Macro"]["info"]["macroname"].get(yara_block, []) or []:
950-
if re.findall(name, sub_block["name"], re.I):
951-
yield (
952-
"macro",
953-
os.path.join(macro_path, macroname),
954-
sub_block,
955-
self.results["static"]["office"]["Macro"]["info"],
956-
)
957-
958-
if self.results.get("static", {}).get("office", {}).get("XLMMacroDeobfuscator", False):
959-
for yara_block in self.results["static"]["office"]["XLMMacroDeobfuscator"].get("info", []).get("yara_macro", []) or []:
960-
if re.findall(name, yara_block["name"], re.I):
897+
for yara_block in data_block.get(keyword, []):
898+
if name_pattern.search(yara_block.get("name", "")):
899+
label = label_override if label_override else keyword
900+
yield label, path, yara_block, data_block
901+
902+
def _process_selfextract(parent_block):
903+
selfextract = parent_block.get("selfextract")
904+
if not selfextract:
905+
return
906+
907+
tools_iter = selfextract.values() if isinstance(selfextract, dict) else []
908+
909+
for toolsblock in tools_iter:
910+
for extracted_file in toolsblock.get("extracted_files", []) or []:
911+
yield from _check_matches(
912+
extracted_file,
913+
path=extracted_file.get("path"),
914+
label_override="sample"
915+
)
916+
917+
results = self.results
918+
target = results.get("target", {})
919+
920+
# 1. Procesar Target
921+
if target.get("category") in ("file", "static") and target.get("file"):
922+
file_info = target["file"]
923+
yield from _check_matches(file_info, file_info.get("path"), label_override="sample")
924+
yield from _process_selfextract(file_info)
925+
926+
cape_payloads = results.get("CAPE", {}).get("payloads", []) or []
927+
for block in cape_payloads:
928+
yield from _check_matches(block, block.get("path"))
929+
yield from _process_selfextract(block)
930+
931+
search_keys = ("procdump", "procmemory", "extracted", "dropped")
932+
for keyword in search_keys:
933+
blocks = results.get(keyword, []) or []
934+
if not blocks:
935+
continue
936+
937+
for block in blocks:
938+
if not isinstance(block, dict):
939+
continue
940+
941+
path = block.get("path", "")
942+
yield from _check_matches(block, path, label_override=keyword)
943+
944+
if keyword == "procmemory":
945+
for pe in block.get("extracted_pe", []) or []:
946+
yield from _check_matches(pe, pe.get("path"), label_override="extracted_pe")
947+
948+
yield from _process_selfextract(block)
949+
950+
# ToDo not sure if static still exist
951+
office_info = results.get("static", {}).get("office", {})
952+
macro_info = office_info.get("Macro", {}).get("info", [])
953+
analysis_id = str(results.get("info", {}).get("id", "unknown"))
954+
macro_base_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", analysis_id, "macros")
955+
956+
if macro_info:
957+
if isinstance(macro_info, list):
958+
for item in macro_info:
959+
yield from _check_matches(item, os.path.join(macro_base_path, item.get("name", "macro")), label_override="macro")
960+
elif isinstance(macro_info, dict):
961+
for macroname, macro_data in macro_info.items():
962+
yield from _check_matches(macro_data, os.path.join(macro_base_path, macroname), label_override="macro")
963+
964+
xlm_info = office_info.get("XLMMacroDeobfuscator", {}).get("info", {})
965+
if xlm_info:
966+
for yara_block in xlm_info.get("yara_macro", []) or []:
967+
if name_pattern.search(yara_block.get("name", "")):
961968
yield (
962969
"macro",
963-
os.path.join(macro_path, "xlm_macro"),
970+
os.path.join(macro_base_path, "xlm_macro"),
964971
yara_block,
965-
self.results["static"]["office"]["XLMMacroDeobfuscator"]["info"],
972+
xlm_info
966973
)
967974

968975
def signature_matched(self, signame: str) -> bool:

0 commit comments

Comments
 (0)