Skip to content

Commit

Permalink
[pytx] Remove owner_id from base opinion (#1096)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dcallies authored Aug 5, 2022
1 parent b08372e commit 3900239
Show file tree
Hide file tree
Showing 10 changed files with 373 additions and 112 deletions.
4 changes: 1 addition & 3 deletions python-threatexchange/threatexchange/cli/label_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,7 @@ def execute(self, settings: CLISettings) -> None:
signal_type,
hash_val,
SignalOpinion(
-1, # TODO - remove
SignalOpinionCategory.POSITIVE_CLASS,
self.labels,
True, SignalOpinionCategory.POSITIVE_CLASS, self.labels
),
)
return
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

"""
Wrappers for the json returned by the ThreatExchange API to typed objects.
"""

import collections
import typing as t


Expand Down
32 changes: 24 additions & 8 deletions python-threatexchange/threatexchange/exchanges/fetch_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def get_progress_timestamp(self) -> t.Optional[int]:
return None


TFetchCheckpoint = t.TypeVar("TFetchCheckpoint", bound=FetchCheckpointBase)
TFetchCheckpoint = t.TypeVar(
"TFetchCheckpoint", covariant=True, bound=FetchCheckpointBase
)


@dataclass
Expand Down Expand Up @@ -91,21 +93,31 @@ class SignalOpinion:
The metadata of a single signal upload.
Certain APIs won't have any concept of owner, category, or tags,
in which case owner=0, category=TRUE_POSITIVE, tags=[] is reasonable
in which case get_trivial() is reasonable
default.
Some implementations may extend this to store additional API-specific data
@see threatexchange.fetch_api.SignalExchangeAPI
"""

owner: int
# This opinion was generated by me, rather than an external entity
is_mine: bool
category: SignalOpinionCategory
tags: t.Set[str]

@classmethod
def get_trivial(cls):
return cls(0, SignalOpinionCategory.INVESTIGATION_SEED, [])
return cls(False, SignalOpinionCategory.INVESTIGATION_SEED, [])

def __setstate__(self, d: t.Dict[str, t.Any]) -> None:
"""Implemented for pickle version compatibility."""
# 0.99.0 => 1.0.0:
### field 'owner': int replaced to 'is_mine': bool
if "owner" in d:
d.pop("owner")
d["is_mine"] = False
self.__dict__ = d


class AggregateSignalOpinionCategory(IntEnum):
Expand All @@ -115,7 +127,6 @@ class AggregateSignalOpinionCategory(IntEnum):
Keep in Sync with SignalOpinionCategory
"""

# TODO: Move concept of "my" signals into this
NEGATIVE_CLASS = 0
INVESTIGATION_SEED = 1
POSITIVE_CLASS = 2
Expand Down Expand Up @@ -161,7 +172,9 @@ class AggregateSignalOpinion:
tags: t.Set[str]

@classmethod
def from_opinions(cls, opinions: t.List[SignalOpinion]) -> "AggregateSignalOpinion":
def from_opinions(
cls, opinions: t.Sequence[SignalOpinion]
) -> "AggregateSignalOpinion":
assert opinions
return cls(
tags={t for o in opinions for t in o.tags},
Expand All @@ -183,21 +196,24 @@ class FetchedSignalMetadata:
will need to store that here.
"""

def get_as_opinions(self) -> t.List[SignalOpinion]:
def get_as_opinions(self) -> t.Sequence[SignalOpinion]:
return [SignalOpinion.get_trivial()]

def get_as_aggregate_opinion(self) -> AggregateSignalOpinion:
return AggregateSignalOpinion.from_opinions(self.get_as_opinions())

def __str__(self) -> str:
"""
A human-readable version of the opinion suitable for the terminal
"""
agg = self.get_as_aggregate_opinion()
if not agg.tags:
return agg.category.name
return f"{agg.category.name} {','.join(agg.tags)}"


TFetchedSignalMetadata = t.TypeVar(
"TFetchedSignalMetadata", bound=FetchedSignalMetadata
"TFetchedSignalMetadata", covariant=True, bound=FetchedSignalMetadata
)


Expand Down
8 changes: 2 additions & 6 deletions python-threatexchange/threatexchange/exchanges/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,9 @@ class SimpleFetchedSignalMetadata(fetch_state.FetchedSignalMetadata):

opinions: t.List[fetch_state.SignalOpinion] = field(default_factory=list)

def get_as_opinions(self) -> t.List[fetch_state.SignalOpinion]:
def get_as_opinions(self) -> t.Sequence[fetch_state.SignalOpinion]:
return self.opinions

@classmethod
def get_trivial(cls):
return cls([fetch_state.SignalOpinion.get_trivial()])


@dataclass
class _StateTracker:
Expand Down Expand Up @@ -126,7 +122,7 @@ def merge(
return
state.delta = delta

def flush(self):
def flush(self) -> None:
for collab_name, state in self._state.items():
if state.dirty:
assert state.delta is not None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,17 @@ class FBThreatExchangeOpinion(state.SignalOpinion):

REACTION_DESCRIPTOR_ID: t.ClassVar[int] = -1

owner_app_id: int
descriptor_id: t.Optional[int]

def __setstate__(self, d: t.Dict[str, t.Any]) -> None:
"""Implemented for pickle version compatibility."""
# 0.99.0 => 1.0.0:
### field 'owner_id' renamed to 'owner_app_id' and 'is_mine'
if "owner" in d:
d["owner_app_id"] = d["owner"]
super().__setstate__(d)


@dataclass
class FBThreatExchangeIndicatorRecord(state.FetchedSignalMetadata):
Expand All @@ -103,7 +112,7 @@ def get_as_opinions( # type: ignore # Why can't mypy tell this is a subclass?

@classmethod
def from_threatexchange_json(
cls, te_json: ThreatUpdateJSON
cls, my_app_id: int, te_json: ThreatUpdateJSON
) -> t.Optional["FBThreatExchangeIndicatorRecord"]:
if te_json.should_delete:
return None
Expand All @@ -130,7 +139,11 @@ def from_threatexchange_json(
category = state.SignalOpinionCategory.NEGATIVE_CLASS

explicit_opinions[owner_id] = FBThreatExchangeOpinion(
owner_id, category, tags, td_id
owner_id == my_app_id,
category,
set(tags),
owner_id,
td_id,
)

for reaction in td_json.get("reactions", []):
Expand All @@ -149,9 +162,10 @@ def from_threatexchange_json(
if owner_id in explicit_opinions:
continue
explicit_opinions[owner_id] = FBThreatExchangeOpinion(
owner_id,
owner_id == my_app_id,
category,
set(),
owner_id,
FBThreatExchangeOpinion.REACTION_DESCRIPTOR_ID,
)

Expand Down Expand Up @@ -276,7 +290,7 @@ def fetch_iter(
updates = {}
for u in batch:
updates[u.threat_type, u.indicator] = _indicator_applies(
u, type_mapping
self.api.app_id, u, type_mapping
)

yield ThreatExchangeDelta(
Expand Down Expand Up @@ -358,6 +372,7 @@ def _merge_record_for_signal_type(


def _indicator_applies(
my_app_id: int,
u: ThreatUpdateJSON,
type_mapping: t.Mapping[
str,
Expand All @@ -370,7 +385,7 @@ def _indicator_applies(
potential_signal_type = type_mapping.get(u.threat_type)
if potential_signal_type is None:
return None
indicator = FBThreatExchangeIndicatorRecord.from_threatexchange_json(u)
indicator = FBThreatExchangeIndicatorRecord.from_threatexchange_json(my_app_id, u)
if indicator is None:
return None
if None in potential_signal_type:
Expand Down
30 changes: 19 additions & 11 deletions python-threatexchange/threatexchange/exchanges/impl/ncmec_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@ def get_progress_timestamp(self) -> t.Optional[int]:
def from_ncmec_fetch(cls, response: api.GetEntriesResponse) -> "NCMECCheckpoint":
return cls(response.max_timestamp)

def __setstate__(self, d: t.Dict[str, t.Any]) -> None:
"""Implemented for pickle version compatibility."""
# 0.99.0 => 1.0.0:
### field 'max_timestamp' renamed to 'get_entries_max_ts'
if "max_timestamp" in d:
d["get_entries_max_ts"] = d.pop("max_timestamp")
self.__dict__ = d


@dataclass
class _NCMECCollabConfigRequiredFields:
Expand All @@ -77,6 +69,19 @@ class NCMECCollabConfig(
)


@dataclass
class NCMECOpinion(state.SignalOpinion):
esp_id: int

def __setstate__(self, d: t.Dict[str, t.Any]) -> None:
"""Implemented for pickle version compatibility."""
# 0.99.0 => 1.0.0:
### field 'owner_id' renamed to 'esp_id'
if "owner" in d:
d["esp_id"] = d["owner"]
super().__setstate__(d)


@dataclass
class NCMECSignalMetadata(state.FetchedSignalMetadata):
"""
Expand All @@ -87,10 +92,13 @@ class NCMECSignalMetadata(state.FetchedSignalMetadata):

member_entries: t.Dict[int, t.Set[str]]

def get_as_opinions(self) -> t.List[state.SignalOpinion]:
def get_as_opinions(self) -> t.Sequence[NCMECOpinion]:
return [
state.SignalOpinion(
member_id, state.SignalOpinionCategory.POSITIVE_CLASS, tags
NCMECOpinion(
False, # TODO - get my own esp_id
state.SignalOpinionCategory.POSITIVE_CLASS,
tags,
member_id,
)
for member_id, tags in self.member_entries.items()
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,16 @@ def from_stopncii_fetch(
class StopNCIISignalMetadata(state.FetchedSignalMetadata):
feedbacks: t.List[api.StopNCIICSPFeedback]

def get_as_opinions(self) -> t.List[state.SignalOpinion]:
def get_as_opinions(self) -> t.Sequence[state.SignalOpinion]:
# TODO - handle which opinions are mine
opinions = [
state.SignalOpinion(-1, _opinion_mapping(f.feedbackValue), f.tags)
state.SignalOpinion(False, _opinion_mapping(f.feedbackValue), f.tags)
for f in self.feedbacks
]
# implicitly, all records from StopNCII are from user-submitted cases
opinions.append(
state.SignalOpinion(
0, state.SignalOpinionCategory.INVESTIGATION_SEED, set()
False, state.SignalOpinionCategory.INVESTIGATION_SEED, set()
),
)
return opinions
Expand Down
Loading

0 comments on commit 3900239

Please sign in to comment.