Skip to content

Commit

Permalink
Merge pull request #347 from deepfence/add-separate-index-for-artifacts
Browse files Browse the repository at this point in the history
Add new index for sbom artifacts
  • Loading branch information
jatin-baweja authored Mar 19, 2022
2 parents 2715d4d + 1c44937 commit c1ba8a3
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 30 deletions.
9 changes: 8 additions & 1 deletion deepfence_backend/api/common_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
CVE_SCAN_LOGS_INDEX, SCOPE_TOPOLOGY_COUNT, NODE_TYPE_HOST, NODE_TYPE_CONTAINER, NODE_TYPE_POD, ES_MAX_CLAUSE, \
TOPOLOGY_ID_CONTAINER, TOPOLOGY_ID_CONTAINER_IMAGE, TOPOLOGY_ID_HOST, NODE_TYPE_CONTAINER_IMAGE, \
TOPOLOGY_ID_KUBE_SERVICE, NODE_TYPE_KUBE_CLUSTER, ES_TERMS_AGGR_SIZE, \
REGISTRY_IMAGES_CACHE_KEY_PREFIX, NODE_TYPE_KUBE_NAMESPACE, SECRET_SCAN_LOGS_INDEX, SECRET_SCAN_INDEX, SBOM_INDEX
REGISTRY_IMAGES_CACHE_KEY_PREFIX, NODE_TYPE_KUBE_NAMESPACE, SECRET_SCAN_LOGS_INDEX, SECRET_SCAN_INDEX, SBOM_INDEX, \
SBOM_ARTIFACT_INDEX
from utils.scope import fetch_topology_data
from utils.node_helper import determine_node_status
from datetime import datetime, timedelta
Expand Down Expand Up @@ -1003,16 +1004,22 @@ def delete_resources():
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_HOST})
ESConn.bulk_delete(SBOM_INDEX, {
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_HOST})
ESConn.bulk_delete(SBOM_ARTIFACT_INDEX, {
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_HOST})
for delete_node_name_chunk in split_list_into_chunks(image_names_to_delete, ES_MAX_CLAUSE):
ESConn.bulk_delete(CVE_SCAN_LOGS_INDEX, {
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_CONTAINER_IMAGE})
ESConn.bulk_delete(SBOM_INDEX, {
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_CONTAINER_IMAGE})
ESConn.bulk_delete(SBOM_ARTIFACT_INDEX, {
**scan_log_filters, "node_id": delete_node_name_chunk, "node_type": NODE_TYPE_CONTAINER_IMAGE})
else:
ESConn.bulk_delete(CVE_SCAN_LOGS_INDEX, scan_log_filters, number,
TIME_UNIT_MAPPING[time_unit])
ESConn.bulk_delete(SBOM_INDEX, scan_log_filters, number,
TIME_UNIT_MAPPING[time_unit])
ESConn.bulk_delete(SBOM_ARTIFACT_INDEX, scan_log_filters, number,
TIME_UNIT_MAPPING[time_unit])
message = "Successfully scheduled deletion of selected vulnerabilities"

elif index_name == SECRET_SCAN_INDEX:
Expand Down
43 changes: 19 additions & 24 deletions deepfence_backend/api/vulnerability_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
NODE_TYPE_HOST, CVE_SCAN_TYPES, NODE_TYPE_CONTAINER_IMAGE, REGISTRY_IMAGES_CACHE_KEY_PREFIX, \
MAX_TOTAL_SEVERITY_SCORE, MAX_TOP_EXPLOITABLE_VULNERABILITIES, REGISTRY_TYPE_GCLOUD, TOPOLOGY_FILTERS_PREFIX, \
NODE_TYPE_REGISTRY_IMAGE, DF_ID_TO_SCOPE_ID_REDIS_KEY_PREFIX, NODE_ACTION_CVE_SCAN_START, ES_MAX_CLAUSE, \
CVE_SCAN_LOGS_INDEX, SBOM_INDEX, SBOM_DEFAULT_FIELDS
CVE_SCAN_LOGS_INDEX, SBOM_INDEX, SBOM_DEFAULT_FIELDS, SBOM_ARTIFACT_INDEX
from utils.decorators import user_permission, non_read_only_user
from models.node_tags import NodeTags
from models.container_image_registry import RegistryCredential
Expand Down Expand Up @@ -1539,6 +1539,7 @@ def list_process_open_files():
else:
lucene_query_string = ""


sbom_aggs = {
"node_type": {
"terms": {
Expand All @@ -1554,36 +1555,29 @@ def list_process_open_files():
"size": ES_TERMS_AGGR_SIZE
},
"aggs": {
"artifacts": {
"nested": {
"path": "artifacts"
"package_name": {
"terms": {
"field": "name.keyword",
"size": ES_TERMS_AGGR_SIZE
},
"aggs": {
"package_name": {
"package_version": {
"terms": {
"field": "artifacts.name.keyword",
"field": "version.keyword",
"size": ES_TERMS_AGGR_SIZE
},
"aggs": {
"package_version": {
"package_type": {
"terms": {
"field": "artifacts.version.keyword",
"field": "language.keyword",
"size": ES_TERMS_AGGR_SIZE
},
"aggs": {
"package_type": {
"package_file": {
"terms": {
"field": "artifacts.language.keyword",
"field": "locations.path.keyword",
"exclude": [""],
"size": ES_TERMS_AGGR_SIZE
},
"aggs": {
"package_file": {
"terms": {
"field": "artifacts.locations.path.keyword",
"exclude": [""],
"size": ES_TERMS_AGGR_SIZE
}
}
}
}
}
Expand All @@ -1598,7 +1592,7 @@ def list_process_open_files():
}
}

sbom_aggs_response = ESConn.aggregation_helper(SBOM_INDEX, {}, sbom_aggs, number,
sbom_aggs_response = ESConn.aggregation_helper(SBOM_ARTIFACT_INDEX, {}, sbom_aggs, number,
TIME_UNIT_MAPPING.get(time_unit), lucene_query_string,
add_masked_filter=False)

Expand All @@ -1620,7 +1614,7 @@ def list_process_open_files():
"node_name": node_name,
}
packages = []
for package_name_aggr in node_name_aggr["artifacts"]["package_name"]["buckets"]:
for package_name_aggr in node_name_aggr["package_name"]["buckets"]:
package_name = package_name_aggr["key"]
for package_version_aggr in package_name_aggr["package_version"]["buckets"]:
package_version = package_version_aggr["key"]
Expand Down Expand Up @@ -1745,10 +1739,11 @@ def get_sbom():
filters = {}
if action == "get":
es_resp = ESConn.search_by_and_clause(
SBOM_INDEX, filters, req_json.get("start_index", 0),
req_json.get("sort_order", "desc"), size=req_json.get("size", 10),
SBOM_ARTIFACT_INDEX, filters, req_json.get("start_index", 0),
req_json.get("sort_order", "desc"), size=req_json.get("size", 10000),
lucene_query_string=lucene_query, _source=SBOM_DEFAULT_FIELDS)
return set_response(data={"rows": es_resp["hits"], "total": es_resp.get("total", {}).get("value", 0)})
hits = map(lambda x: x["_source"], es_resp["hits"])
return set_response(data={"rows": [{"_source": {"artifacts": list(hits)}}], "total": es_resp.get("total", {}).get("value", 0)})
elif action == "download":
es_resp = ESConn.search_by_and_clause(
SBOM_INDEX, filters, req_json.get("start_index", 0),
Expand Down
99 changes: 98 additions & 1 deletion deepfence_backend/dockerify/api/init_es_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ add_index() {
"type": "date"
},
"artifacts": {
"type": "nested"
"enabled": false
},
"scan_id": {
"type": "text",
Expand Down Expand Up @@ -199,6 +199,97 @@ add_index() {
}'
echo ""

curl -X PUT "http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/sbom-artifact" -H 'Content-Type: application/json' -d'
{
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"language": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"licenses": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locations" : {
"properties" : {
"path": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"scan_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"node_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"node_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"time_stamp": {
"type": "long"
}
}
}
}'
echo ""

declare -a index_arr=("report")
for index_name in "${index_arr[@]}"
do
Expand Down Expand Up @@ -262,10 +353,16 @@ add_index() {
done
}


reindex_sbom_artifacts_python_script () {
python /app/code/init_scripts/reindex_sbom_artifacts.py
}

add_template
add_index
add_cve_map_pipeline
add_cve_scan_map_pipeline
add_indexed_default_upsert_script
reindex_sbom_artifacts_python_script
echo ""
echo "custom configuration added successfully"
81 changes: 81 additions & 0 deletions deepfence_backend/init_scripts/reindex_sbom_artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import os
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import math

EL_HOST = "http://%s:%s" % (os.environ['ELASTICSEARCH_HOST'], os.environ['ELASTICSEARCH_PORT'])
http_auth = None

if 'ELASTICSEARCH_USER' in os.environ:
http_auth = (os.environ['ELASTICSEARCH_USER'],
os.environ['ELASTICSEARCH_PASSWORD'])

if http_auth:
EL_CLIENT = Elasticsearch([EL_HOST], http_auth=http_auth, timeout=300)
else:
EL_CLIENT = Elasticsearch([EL_HOST], timeout=300)

SBOM_INDEX = "sbom-cve-scan"
SBOM_ARTIFACT_INDEX = "sbom-artifact"
ARRAY_SIZE = 5

if EL_CLIENT.indices.exists(index=SBOM_INDEX) and EL_CLIENT.indices.exists(index=SBOM_ARTIFACT_INDEX):
sbom_count_array = EL_CLIENT.cat.count(SBOM_INDEX, params={"format": "json"})
sbom_count = 0
if sbom_count_array:
sbom_count = int(sbom_count_array[0]["count"])
if sbom_count > 0:
for i in range(0, math.ceil(sbom_count/ARRAY_SIZE)):
sbom_docs = EL_CLIENT.search(index=SBOM_INDEX, body={"query": {"match_all": {}}}, from_=i*ARRAY_SIZE, size=ARRAY_SIZE,
sort="scan_id.keyword:desc", _source=["scan_id", "node_id", "node_type",
"@timestamp", "time_stamp", "artifacts"])
if sbom_docs["hits"]["total"]["value"] > 0:
for sbom_doc in sbom_docs["hits"]["hits"]:
body = {
"query": {
"constant_score": {
"filter": {
"bool": {
"must": {
"terms": {
"scan_id.keyword": [sbom_doc["_source"]["scan_id"]]
}
}
}
}
}
}
}
sbom_artifact_res = EL_CLIENT.search(index=SBOM_ARTIFACT_INDEX, body=body, size=1)
if sbom_artifact_res.get("hits", {}).get("total", {}).get("value", -1) == 0:
source_doc = sbom_doc["_source"]
defaults = {
"scan_id": source_doc["scan_id"],
"node_id": source_doc["node_id"],
"node_type": source_doc["node_type"],
"masked": "false",
"@timestamp": source_doc["@timestamp"],
"time_stamp": source_doc["time_stamp"],
}
bulk_index_actions = []
for artifact in sbom_doc["_source"]["artifacts"]:
# print("Going through artifact: ", artifact["name"])
doc = {
**defaults,
"name": artifact["name"],
"version": artifact["version"],
"locations": artifact["locations"],
"licenses": artifact["licenses"],
"language": artifact["language"]
}
bulk_index_actions.append({
"_op_type": "index",
"_index": SBOM_ARTIFACT_INDEX,
"_source": doc
})
errors = bulk(EL_CLIENT, bulk_index_actions)
if errors:
print("Error while bulk processing artifacts for scan_id: ", source_doc["scan_id"])
print(errors)


3 changes: 2 additions & 1 deletion deepfence_backend/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ class USER_ROLES:
CVE_INDEX = "cve"
CVE_SCAN_LOGS_INDEX = "cve-scan"
SBOM_INDEX = "sbom-cve-scan"
SBOM_DEFAULT_FIELDS = ["artifacts.name", "artifacts.version", "artifacts.licenses"]
SBOM_ARTIFACT_INDEX = "sbom-artifact"
SBOM_DEFAULT_FIELDS = ["name", "version", "licenses", "locations.path"]
SECRET_SCAN_INDEX = "secret-scan"
SECRET_SCAN_LOGS_INDEX = "secret-scan-logs"
REPORT_INDEX = "report"
Expand Down
Loading

0 comments on commit c1ba8a3

Please sign in to comment.