diff --git a/cartography/cli.py b/cartography/cli.py index 2c0f08afe0..eee388895d 100644 --- a/cartography/cli.py +++ b/cartography/cli.py @@ -436,6 +436,22 @@ def _build_parser(self): 'The name of environment variable containing secrets for GSuite authentication.' ), ) + parser.add_argument( + '--activedirectory-name', + type=str, + default=None, + help=( + 'Name of used Active Directory name for labelling.' + ), + ) + parser.add_argument( + '--activedirectory-dirpath', + type=str, + default=None, + help=( + 'Directory path where to find ActiveDirectory data from BloodHound and similar tools.' + ), + ) return parser def main(self, argv: str) -> int: diff --git a/cartography/config.py b/cartography/config.py index a18d5f5ad1..d55b95f93f 100644 --- a/cartography/config.py +++ b/cartography/config.py @@ -133,6 +133,8 @@ def __init__( crowdstrike_api_url=None, gsuite_auth_method=None, gsuite_config=None, + activedirectory_name=None, + activedirectory_dirpath=None, ): self.neo4j_uri = neo4j_uri self.neo4j_user = neo4j_user @@ -176,3 +178,5 @@ def __init__( self.crowdstrike_api_url = crowdstrike_api_url self.gsuite_auth_method = gsuite_auth_method self.gsuite_config = gsuite_config + self.activedirectory_name = activedirectory_name + self.activedirectory_dirpath = activedirectory_dirpath diff --git a/cartography/data/jobs/cleanup/activedirectory_import_cleanup.json b/cartography/data/jobs/cleanup/activedirectory_import_cleanup.json new file mode 100644 index 0000000000..4dd0f63956 --- /dev/null +++ b/cartography/data/jobs/cleanup/activedirectory_import_cleanup.json @@ -0,0 +1,10 @@ +{ + "statements": [ + { + "query": "WITH datetime()-duration('P7D') AS threshold MATCH (h:ActiveDirectoryHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)", + "iterative": true, + "iterationsize": 100 + } + ], + "name": "cleanup activedirectory" +} diff --git a/cartography/intel/activedirectory/__init__.py b/cartography/intel/activedirectory/__init__.py new file mode 100644 index 0000000000..0a7edf9a34 --- /dev/null +++ b/cartography/intel/activedirectory/__init__.py @@ -0,0 +1,59 @@ +""" +cartography/intel/activedirectory +""" +import logging + +import neo4j + +from cartography.config import Config +from cartography.intel.activedirectory.endpoints import sync_hosts +from cartography.stats import get_stats_client +from cartography.util import merge_module_sync_metadata +from cartography.util import run_cleanup_job +from cartography.util import timeit + +logger = logging.getLogger(__name__) +stat_handler = get_stats_client(__name__) + + +@timeit +def start_activedirectory_ingestion( + neo4j_session: neo4j.Session, + config: Config, +) -> None: + """ + Perform ingestion of ActiveDirectory data. + :param neo4j_session: Neo4J session for database interface + :param config: A cartography.config object + :return: None + """ + common_job_parameters = { + "UPDATE_TAG": config.update_tag, + } + if not config.activedirectory_dirpath or not config.activedirectory_name: + logger.error("activedirectory config not found") + return + + authorization = config.activedirectory_dirpath + sync_hosts( + neo4j_session, + config.update_tag, + authorization, + ) + run_cleanup_job( + "activedirectory_import_cleanup.json", + neo4j_session, + common_job_parameters, + ) + + group_id = "public" + if config.activedirectory_name: + group_id = config.activedirectory_name + merge_module_sync_metadata( + neo4j_session, + group_type="activedirectory", + group_id=group_id, + synced_type="activedirectory", + update_tag=config.update_tag, + stat_handler=stat_handler, + ) diff --git a/cartography/intel/activedirectory/endpoints.py b/cartography/intel/activedirectory/endpoints.py new file mode 100644 index 0000000000..43b155f46c --- /dev/null +++ b/cartography/intel/activedirectory/endpoints.py @@ -0,0 +1,57 @@ +""" +cartography/intel/activedirectory/endpoints +""" +# pylint: disable=missing-function-docstring,too-many-arguments +import logging +from typing import Dict +from typing import List +from typing import Tuple + +import neo4j + +from .util import get_activedirectory_hosts +from cartography.util import timeit + +logger = logging.getLogger(__name__) + + +@timeit +def sync_hosts( + neo4j_session: neo4j.Session, + update_tag: int, + authorization: Tuple[str], +) -> None: + activedirectory_hosts_list = get_activedirectory_hosts(authorization) + for host_data in activedirectory_hosts_list: + load_host_data(neo4j_session, host_data, update_tag) + + +def load_host_data( + neo4j_session: neo4j.Session, + data: List[Dict], + update_tag: int, +) -> None: + """ + Transform and load scan information + """ + ingestion_cypher_query = """ + UNWIND $Hosts AS host + MERGE (h:ActiveDirectoryHost{hostname: host.hostname}) + ON CREATE SET h.hostname = host.hostname, + h.ad_domain = host.ad_domain, + h.firstseen = timestamp() + SET h.short_hostname = host.short_hostname, + h.objectid = host.objectid, + h.distinguishedname = host.distinguishedname, + h.unconstraineddelegation = host.unconstraineddelegation, + h.enabled = host.enabled, + h.highvalue = host.highvalue, + h.modified_timestamp = host.modified_timestamp, + h.lastupdated = $update_tag + """ + logger.debug("Loading %s activedirectory hosts.", len(data)) + neo4j_session.run( + ingestion_cypher_query, + Hosts=data, + update_tag=update_tag, + ) diff --git a/cartography/intel/activedirectory/util.py b/cartography/intel/activedirectory/util.py new file mode 100644 index 0000000000..b33fdb3915 --- /dev/null +++ b/cartography/intel/activedirectory/util.py @@ -0,0 +1,95 @@ +""" +cartography/intel/activedirectory/util +""" +# pylint: disable=invalid-name,broad-except +import json +import logging +import os +from typing import List +from typing import Tuple + +import pandas + +logger = logging.getLogger(__name__) + + +def get_activedirectory_hosts( + authorization: Tuple[str], +) -> List: + """ + Get ActiveDirectory (Logging) coverage inventory + + Loaded from json files produced by tool like SharpHound, BloodHound.py or RustHound + """ + + (activedirectory_dirpath,) = authorization + activedirectory_filename = os.path.join(activedirectory_dirpath, "computers.json") + + if not ( + os.path.isdir(activedirectory_dirpath) and + os.path.exists(activedirectory_filename) + ): + logger.warning( + "Directory %s or matching computers.json don't exist", + activedirectory_dirpath, + ) + return [] + + with open( + activedirectory_filename, + encoding="utf-8", + ) as data_file: + data = json.load(data_file) + + df_computers = pandas.json_normalize(data["computers"], sep="_", max_level=3) + logger.info("Example df_computers[0]: %s", df_computers.iloc[:1].to_string()) + logger.warning("Example df_computers[0]: %s", df_computers.iloc[:1].to_string()) + + df_computers["ad_domain"] = df_computers["Properties_domain"] + df_computers["hostname"] = df_computers["Properties_name"] + df_computers["short_hostname"] = df_computers["hostname"].str.lower() + df_computers["short_hostname"].replace( + r"\..*$", + "", + regex=True, + method="pad", + inplace=True, + ) + df_computers["objectid"] = df_computers["Properties_objectid"] + df_computers["distinguishedname"] = df_computers["Properties_distinguishedname"] + df_computers["highvalue"] = df_computers["Properties_highvalue"] + df_computers["unconstraineddelegation"] = df_computers[ + "Properties_unconstraineddelegation" + ] + df_computers["enabled"] = df_computers["Properties_enabled"] + + df_computers.drop( + columns=[ + "AllowedToAct", + "LocalAdmins", + "PSRemoteUsers", + "RemoteDesktopUsers", + "DcomUsers", + "AllowedToDelegate", + "Sessions", + "Aces", + ], + inplace=True, + ) + + logger.info("activedirectoryHosts count final: %s", df_computers.shape[0]) + logger.warning("activedirectoryHosts count final: %s", df_computers.shape[0]) + + # Rotate file to avoid importing same twice + if os.access(activedirectory_filename, os.W_OK): + logger.info("Moving %s to .old", activedirectory_filename) + os.rename(activedirectory_filename, f"{activedirectory_filename}.old") + + if df_computers.shape[0]: + flatten_data = json.loads(df_computers.to_json(orient="records")) + logger.debug("Example: %s", flatten_data[0]) + logger.warning("Example: %s", flatten_data[0]) + return flatten_data + + logger.warning("No data returned") + return [] diff --git a/cartography/sync.py b/cartography/sync.py index b417bb2860..0b2df445dd 100644 --- a/cartography/sync.py +++ b/cartography/sync.py @@ -11,6 +11,7 @@ from neo4j import GraphDatabase from statsd import StatsClient +import cartography.intel.activedirectory import cartography.intel.analysis import cartography.intel.aws import cartography.intel.azure @@ -35,6 +36,7 @@ TOP_LEVEL_MODULES = OrderedDict({ # preserve order so that the default sync always runs `analysis` at the very end 'create-indexes': cartography.intel.create_indexes.run, + 'activedirectory': cartography.intel.activedirectory.start_activedirectory_ingestion, 'aws': cartography.intel.aws.start_aws_ingestion, 'azure': cartography.intel.azure.start_azure_ingestion, 'crowdstrike': cartography.intel.crowdstrike.start_crowdstrike_ingestion, diff --git a/docs/root/modules/activedirectory/activedirectory.md b/docs/root/modules/activedirectory/activedirectory.md new file mode 100644 index 0000000000..d7b9683bf5 --- /dev/null +++ b/docs/root/modules/activedirectory/activedirectory.md @@ -0,0 +1,27 @@ +# Cartography - ActiveDirectory Schema + + + + + +## Table of contents + +- [ActiveDirectoryHost](#activedirectoryhost) + +## ActiveDirectoryHost + +Placeholder representation of a single ActiveDirectory Computer as represented by BloodHound and the possible extractors (SharpHound, BloodHound.py, RustHound...). + +| Field | Description | +|-------|--------------| +| firstseen| Timestamp of when a sync job first discovered this node | +| lastupdated | Timestamp of the last time the node was updated | +| hostname | Computer name | +| short_hostname | standardized short hostname lower-case | +| distinguishedname | distinguishedname | +| enabled | enabled | +| highvalue | highvalue | +| objectid | objectid | +| unconstraineddelegation | unconstraineddelegation | + +### Relationships