Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Microsoft Active Directory source #1074

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
16 changes: 16 additions & 0 deletions cartography/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,22 @@ def _build_parser(self):
'The name of environment variable containing secrets for GSuite authentication.'
),
)
parser.add_argument(
'--activedirectory-name',
type=str,
default=None,
help=(
'Name of used Active Directory name for labelling.'
),
)
parser.add_argument(
'--activedirectory-dirpath',
type=str,
default=None,
help=(
'Directory path where to find ActiveDirectory data from BloodHound and similar tools.'
),
)
return parser

def main(self, argv: str) -> int:
Expand Down
4 changes: 4 additions & 0 deletions cartography/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ def __init__(
crowdstrike_api_url=None,
gsuite_auth_method=None,
gsuite_config=None,
activedirectory_name=None,
activedirectory_dirpath=None,
):
self.neo4j_uri = neo4j_uri
self.neo4j_user = neo4j_user
Expand Down Expand Up @@ -176,3 +178,5 @@ def __init__(
self.crowdstrike_api_url = crowdstrike_api_url
self.gsuite_auth_method = gsuite_auth_method
self.gsuite_config = gsuite_config
self.activedirectory_name = activedirectory_name
self.activedirectory_dirpath = activedirectory_dirpath
10 changes: 10 additions & 0 deletions cartography/data/jobs/cleanup/activedirectory_import_cleanup.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"statements": [
{
"query": "WITH datetime()-duration('P7D') AS threshold MATCH (h:ActiveDirectoryHost) WHERE h.lastupdated < threshold WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)",
"iterative": true,
"iterationsize": 100
}
],
"name": "cleanup activedirectory"
}
59 changes: 59 additions & 0 deletions cartography/intel/activedirectory/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
cartography/intel/activedirectory
"""
import logging

import neo4j

from cartography.config import Config
from cartography.intel.activedirectory.endpoints import sync_hosts
from cartography.stats import get_stats_client
from cartography.util import merge_module_sync_metadata
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)
stat_handler = get_stats_client(__name__)


@timeit
def start_activedirectory_ingestion(
neo4j_session: neo4j.Session,
config: Config,
) -> None:
"""
Perform ingestion of ActiveDirectory data.
:param neo4j_session: Neo4J session for database interface
:param config: A cartography.config object
:return: None
"""
common_job_parameters = {
"UPDATE_TAG": config.update_tag,
}
if not config.activedirectory_dirpath or not config.activedirectory_name:
logger.error("activedirectory config not found")
return

authorization = config.activedirectory_dirpath
sync_hosts(
neo4j_session,
config.update_tag,
authorization,
)
run_cleanup_job(
"activedirectory_import_cleanup.json",
neo4j_session,
common_job_parameters,
)

group_id = "public"
if config.activedirectory_name:
group_id = config.activedirectory_name
merge_module_sync_metadata(
neo4j_session,
group_type="activedirectory",
group_id=group_id,
synced_type="activedirectory",
update_tag=config.update_tag,
stat_handler=stat_handler,
)
57 changes: 57 additions & 0 deletions cartography/intel/activedirectory/endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
cartography/intel/activedirectory/endpoints
"""
# pylint: disable=missing-function-docstring,too-many-arguments
import logging
from typing import Dict
from typing import List
from typing import Tuple

import neo4j

from .util import get_activedirectory_hosts
from cartography.util import timeit

logger = logging.getLogger(__name__)


@timeit
def sync_hosts(
neo4j_session: neo4j.Session,
update_tag: int,
authorization: Tuple[str],
) -> None:
activedirectory_hosts_list = get_activedirectory_hosts(authorization)
for host_data in activedirectory_hosts_list:
load_host_data(neo4j_session, host_data, update_tag)


def load_host_data(
neo4j_session: neo4j.Session,
data: List[Dict],
update_tag: int,
) -> None:
"""
Transform and load scan information
"""
ingestion_cypher_query = """
UNWIND $Hosts AS host
MERGE (h:ActiveDirectoryHost{hostname: host.hostname})
ON CREATE SET h.hostname = host.hostname,
h.ad_domain = host.ad_domain,
h.firstseen = timestamp()
SET h.short_hostname = host.short_hostname,
h.objectid = host.objectid,
h.distinguishedname = host.distinguishedname,
h.unconstraineddelegation = host.unconstraineddelegation,
h.enabled = host.enabled,
h.highvalue = host.highvalue,
h.modified_timestamp = host.modified_timestamp,
h.lastupdated = $update_tag
"""
logger.debug("Loading %s activedirectory hosts.", len(data))
neo4j_session.run(
ingestion_cypher_query,
Hosts=data,
update_tag=update_tag,
)
95 changes: 95 additions & 0 deletions cartography/intel/activedirectory/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
cartography/intel/activedirectory/util
"""
# pylint: disable=invalid-name,broad-except
import json
import logging
import os
from typing import List
from typing import Tuple

import pandas

logger = logging.getLogger(__name__)


def get_activedirectory_hosts(
authorization: Tuple[str],
) -> List:
"""
Get ActiveDirectory (Logging) coverage inventory

Loaded from json files produced by tool like SharpHound, BloodHound.py or RustHound
"""

(activedirectory_dirpath,) = authorization
activedirectory_filename = os.path.join(activedirectory_dirpath, "computers.json")

if not (
os.path.isdir(activedirectory_dirpath) and
os.path.exists(activedirectory_filename)
):
logger.warning(
"Directory %s or matching computers.json don't exist",
activedirectory_dirpath,
)
return []

with open(
activedirectory_filename,
encoding="utf-8",
) as data_file:
data = json.load(data_file)

df_computers = pandas.json_normalize(data["computers"], sep="_", max_level=3)
logger.info("Example df_computers[0]: %s", df_computers.iloc[:1].to_string())
logger.warning("Example df_computers[0]: %s", df_computers.iloc[:1].to_string())

df_computers["ad_domain"] = df_computers["Properties_domain"]
df_computers["hostname"] = df_computers["Properties_name"]
df_computers["short_hostname"] = df_computers["hostname"].str.lower()
df_computers["short_hostname"].replace(
r"\..*$",
"",
regex=True,
method="pad",
inplace=True,
)
df_computers["objectid"] = df_computers["Properties_objectid"]
df_computers["distinguishedname"] = df_computers["Properties_distinguishedname"]
df_computers["highvalue"] = df_computers["Properties_highvalue"]
df_computers["unconstraineddelegation"] = df_computers[
"Properties_unconstraineddelegation"
]
df_computers["enabled"] = df_computers["Properties_enabled"]

df_computers.drop(
columns=[
"AllowedToAct",
"LocalAdmins",
"PSRemoteUsers",
"RemoteDesktopUsers",
"DcomUsers",
"AllowedToDelegate",
"Sessions",
"Aces",
],
inplace=True,
)

logger.info("activedirectoryHosts count final: %s", df_computers.shape[0])
logger.warning("activedirectoryHosts count final: %s", df_computers.shape[0])

# Rotate file to avoid importing same twice
if os.access(activedirectory_filename, os.W_OK):
logger.info("Moving %s to .old", activedirectory_filename)
os.rename(activedirectory_filename, f"{activedirectory_filename}.old")

if df_computers.shape[0]:
flatten_data = json.loads(df_computers.to_json(orient="records"))
logger.debug("Example: %s", flatten_data[0])
logger.warning("Example: %s", flatten_data[0])
return flatten_data

logger.warning("No data returned")
return []
2 changes: 2 additions & 0 deletions cartography/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from neo4j import GraphDatabase
from statsd import StatsClient

import cartography.intel.activedirectory
import cartography.intel.analysis
import cartography.intel.aws
import cartography.intel.azure
Expand All @@ -35,6 +36,7 @@

TOP_LEVEL_MODULES = OrderedDict({ # preserve order so that the default sync always runs `analysis` at the very end
'create-indexes': cartography.intel.create_indexes.run,
'activedirectory': cartography.intel.activedirectory.start_activedirectory_ingestion,
'aws': cartography.intel.aws.start_aws_ingestion,
'azure': cartography.intel.azure.start_azure_ingestion,
'crowdstrike': cartography.intel.crowdstrike.start_crowdstrike_ingestion,
Expand Down
27 changes: 27 additions & 0 deletions docs/root/modules/activedirectory/activedirectory.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Cartography - ActiveDirectory Schema

<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
<!-- END doctoc generated TOC please keep comment here to allow auto update -->

## Table of contents

- [ActiveDirectoryHost](#activedirectoryhost)

## ActiveDirectoryHost

Placeholder representation of a single ActiveDirectory Computer as represented by BloodHound and the possible extractors (SharpHound, BloodHound.py, RustHound...).

| Field | Description |
|-------|--------------|
| firstseen| Timestamp of when a sync job first discovered this node |
| lastupdated | Timestamp of the last time the node was updated |
| hostname | Computer name |
| short_hostname | standardized short hostname lower-case |
| distinguishedname | distinguishedname |
| enabled | enabled |
| highvalue | highvalue |
| objectid | objectid |
| unconstraineddelegation | unconstraineddelegation |

### Relationships