From 9fcf8fcca58e96f8f641995f717bdc8f2f37477c Mon Sep 17 00:00:00 2001 From: Peace Turnah <125773330+pturnah@users.noreply.github.com> Date: Sat, 6 Apr 2024 20:55:34 +0100 Subject: [PATCH] Update features.py Fixed: ValueError: Samples can not be a single string. The input must be an iterable over iterables of strings. By: entry_name_hashed = FeatureHasher(50, input_type="string").transform([raw_obj['entry']]).toarray()[0] with: entry_name_hashed = FeatureHasher(50, input_type="string").transform([ [raw_obj['entry']] ]).toarray()[0] at line 192. In this way an iterable over iterable over raw features is obtained, as transform() method require. --- ember/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ember/features.py b/ember/features.py index bbaa138..ef0dae1 100644 --- a/ember/features.py +++ b/ember/features.py @@ -189,7 +189,7 @@ def process_raw_features(self, raw_obj): section_entropy_hashed = FeatureHasher(50, input_type="pair").transform([section_entropy]).toarray()[0] section_vsize = [(s['name'], s['vsize']) for s in sections] section_vsize_hashed = FeatureHasher(50, input_type="pair").transform([section_vsize]).toarray()[0] - entry_name_hashed = FeatureHasher(50, input_type="string").transform([raw_obj['entry']]).toarray()[0] + entry_name_hashed = FeatureHasher(50, input_type="string").transform([ [raw_obj['entry']] ]).toarray()[0] characteristics = [p for s in sections for p in s['props'] if s['name'] == raw_obj['entry']] characteristics_hashed = FeatureHasher(50, input_type="string").transform([characteristics]).toarray()[0]