Skip to content

Commit d29ca08

Browse files
authored
Allow metadata to contain a list of values (VirusTotal#201)
The `Rules.match` function now receives an optional `allow_duplicate_metadata=True` argument, which changes the structure of `Match.meta`. By default `Match.meta` is a dictionary with metadata names and their corresponding values, if a metadata name appears duplicated in a rule, the last value will be used. For example, consider the following rule: ```yara rule demo { meta: foo = "foo VirusTotal#1" foo = "foo VirusTotal#2" bar = "bar" condition: false } ``` In that case `Match.meta` would be `{"foo": "foo VirusTotal#2", "bar": "bar"}` by default (`allow_duplicate_metadata=False`), but with `allow_duplicate_metadata=True` it would be: `{"foo": ["foo VirusTotal#1", "foo VirusTotal#2"], "bar": ["bar"]}`.
1 parent e14f096 commit d29ca08

File tree

2 files changed

+52
-8
lines changed

2 files changed

+52
-8
lines changed

tests.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,28 @@ def testScanMeta(self):
752752
self.assertTrue(meta['b'] == 'ñ')
753753
self.assertTrue(meta['c'] == 'ñ')
754754

755+
# This test is similar to testScanMeta but it tests for displaying multiple values in the meta data generated
756+
# when a Match object is created (upon request).
757+
def testDuplicateMeta(self):
758+
r = yara.compile(source="""
759+
rule test {
760+
meta:
761+
a = 1
762+
a = 2
763+
b = 3
764+
condition:
765+
true
766+
}
767+
""")
768+
769+
# Default behaviour should produce a simple KV map and should use the 'latest' metadata value per field
770+
meta = r.match(data="dummy")[0].meta
771+
self.assertTrue(meta['a'] == 2 and meta['b'] == 3)
772+
773+
# `allow_duplicate_metadata` flag should reveal all metadata values per field as a list
774+
meta = r.match(data="dummy", allow_duplicate_metadata=True)[0].meta
775+
self.assertTrue(meta['a'] == [1, 2] and meta['b'] == [3])
776+
755777
def testFilesize(self):
756778

757779
self.assertTrueRules([

yara-python.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ typedef struct _CALLBACK_DATA
432432
PyObject* warnings_callback;
433433
PyObject* console_callback;
434434
int which;
435+
bool allow_duplicate_metadata;
435436

436437
} CALLBACK_DATA;
437438

@@ -885,7 +886,6 @@ static int handle_too_many_matches(
885886
#define CALLBACK_NON_MATCHES 0x02
886887
#define CALLBACK_ALL CALLBACK_MATCHES | CALLBACK_NON_MATCHES
887888

888-
889889
int yara_callback(
890890
YR_SCAN_CONTEXT* context,
891891
int message,
@@ -987,8 +987,24 @@ int yara_callback(
987987
else
988988
object = PY_STRING(meta->string);
989989

990-
PyDict_SetItemString(meta_list, meta->identifier, object);
991-
Py_DECREF(object);
990+
if (((CALLBACK_DATA*) user_data)->allow_duplicate_metadata){
991+
// Check if we already have an array under this key
992+
PyObject* existing_item = PyDict_GetItemString(meta_list, meta->identifier);
993+
// Append object to existing list
994+
if (existing_item)
995+
PyList_Append(existing_item, object);
996+
else{
997+
//Otherwise, instantiate array and append object as first item
998+
PyObject* new_list = PyList_New(0);
999+
PyList_Append(new_list, object);
1000+
PyDict_SetItemString(meta_list, meta->identifier, new_list);
1001+
Py_DECREF(new_list);
1002+
}
1003+
}
1004+
else{
1005+
PyDict_SetItemString(meta_list, meta->identifier, object);
1006+
Py_DECREF(object);
1007+
}
9921008
}
9931009

9941010
yr_rule_strings_foreach(rule, string)
@@ -1594,8 +1610,9 @@ static PyObject* Rules_next(
15941610
else
15951611
object = PY_STRING(meta->string);
15961612

1597-
PyDict_SetItemString(meta_list, meta->identifier, object);
1598-
Py_DECREF(object);
1613+
PyDict_SetItemString(meta_list, meta->identifier, object);
1614+
Py_DECREF(object);
1615+
15991616
}
16001617

16011618
rule->global = PyBool_FromLong(rules->iter_current_rule->flags & RULE_FLAGS_GLOBAL);
@@ -1623,7 +1640,7 @@ static PyObject* Rules_match(
16231640
"filepath", "pid", "data", "externals",
16241641
"callback", "fast", "timeout", "modules_data",
16251642
"modules_callback", "which_callbacks", "warnings_callback",
1626-
"console_callback", NULL
1643+
"console_callback", "allow_duplicate_metadata", NULL
16271644
};
16281645

16291646
char* filepath = NULL;
@@ -1648,11 +1665,12 @@ static PyObject* Rules_match(
16481665
callback_data.warnings_callback = NULL;
16491666
callback_data.console_callback = NULL;
16501667
callback_data.which = CALLBACK_ALL;
1668+
callback_data.allow_duplicate_metadata = false;
16511669

16521670
if (PyArg_ParseTupleAndKeywords(
16531671
args,
16541672
keywords,
1655-
"|sis*OOOiOOiOO",
1673+
"|sis*OOOiOOiOOb",
16561674
kwlist,
16571675
&filepath,
16581676
&pid,
@@ -1665,7 +1683,8 @@ static PyObject* Rules_match(
16651683
&callback_data.modules_callback,
16661684
&callback_data.which,
16671685
&callback_data.warnings_callback,
1668-
&callback_data.console_callback))
1686+
&callback_data.console_callback,
1687+
&callback_data.allow_duplicate_metadata))
16691688
{
16701689
if (filepath == NULL && data.buf == NULL && pid == -1)
16711690
{
@@ -1729,6 +1748,9 @@ static PyObject* Rules_match(
17291748
}
17301749
}
17311750

1751+
if (callback_data.allow_duplicate_metadata == NULL)
1752+
callback_data.allow_duplicate_metadata = false;
1753+
17321754
if (yr_scanner_create(object->rules, &scanner) != 0)
17331755
{
17341756
return PyErr_Format(

0 commit comments

Comments
 (0)