Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: stickiness actors query #27349

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
77 changes: 77 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@
{
"$ref": "#/definitions/FunnelCorrelationActorsQuery"
},
{
"$ref": "#/definitions/StickinessActorsQuery"
},
{
"$ref": "#/definitions/HogQLQuery"
}
Expand Down Expand Up @@ -7779,6 +7782,9 @@
},
{
"$ref": "#/definitions/FunnelCorrelationActorsQuery"
},
{
"$ref": "#/definitions/StickinessActorsQuery"
}
]
}
Expand Down Expand Up @@ -12258,6 +12264,73 @@
"enum": ["strict", "unordered", "ordered"],
"type": "string"
},
"StickinessActorsQuery": {
"additionalProperties": false,
"properties": {
"breakdown": {
"anyOf": [
{
"type": "string"
},
{
"$ref": "#/definitions/BreakdownValueInt"
},
{
"items": {
"type": "string"
},
"type": "array"
}
]
},
"compare": {
"enum": ["current", "previous"],
"type": "string"
},
"day": {
"anyOf": [
{
"type": "string"
},
{
"$ref": "#/definitions/Day"
}
]
},
"includeRecordings": {
"type": "boolean"
},
"interval": {
"description": "An interval selected out of available intervals in source query.",
"type": "integer"
},
"kind": {
"const": "InsightActorsQuery",
"type": "string"
},
"modifiers": {
"$ref": "#/definitions/HogQLQueryModifiers",
"description": "Modifiers used when performing the query"
},
"operator": {
"$ref": "#/definitions/StickinessOperator"
},
"response": {
"$ref": "#/definitions/ActorsQueryResponse"
},
"series": {
"type": "integer"
},
"source": {
"$ref": "#/definitions/InsightQuerySource"
},
"status": {
"type": "string"
}
},
"required": ["kind", "source"],
"type": "object"
},
"StickinessFilter": {
"additionalProperties": false,
"properties": {
Expand Down Expand Up @@ -12356,6 +12429,10 @@
"default": "day",
"description": "Granularity of the response. Can be one of `hour`, `day`, `week` or `month`"
},
"intervalCount": {
"description": "How many intervals comprise a period. Only used for cohorts, otherwise default 1.",
"type": "integer"
},
"kind": {
"const": "StickinessQuery",
"type": "string"
Expand Down
12 changes: 10 additions & 2 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,10 @@ export interface StickinessQuery
* @default day
*/
interval?: IntervalType
/**
* How many intervals comprise a period. Only used for cohorts, otherwise default 1.
*/
intervalCount?: integer
/** Events and actions to include */
series: AnyEntityNode[]
/** Properties specific to the stickiness insight */
Expand Down Expand Up @@ -1796,7 +1800,7 @@ export type CachedActorsQueryResponse = CachedQueryResponse<ActorsQueryResponse>

export interface ActorsQuery extends DataNode<ActorsQueryResponse> {
kind: NodeKind.ActorsQuery
source?: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | HogQLQuery
source?: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | StickinessActorsQuery | HogQLQuery
select?: HogQLExpression[]
search?: string
/** Currently only person filters supported. No filters for querying groups. See `filter_conditions()` in actor_strategies.py. */
Expand Down Expand Up @@ -2143,6 +2147,10 @@ export interface InsightActorsQuery<S extends InsightsQueryBase<AnalyticsQueryRe
compare?: 'current' | 'previous'
}

export interface StickinessActorsQuery extends InsightActorsQuery {
operator?: StickinessOperator
}

export interface FunnelsActorsQuery extends InsightActorsQueryBase {
kind: NodeKind.FunnelsActorsQuery
source: FunnelsQuery
Expand Down Expand Up @@ -2267,7 +2275,7 @@ export type CachedInsightActorsQueryOptionsResponse = CachedQueryResponse<Insigh

export interface InsightActorsQueryOptions extends Node<InsightActorsQueryOptionsResponse> {
kind: NodeKind.InsightActorsQueryOptions
source: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery
source: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | StickinessActorsQuery
}

export interface DatabaseSchemaSchema {
Expand Down
1 change: 1 addition & 0 deletions posthog/api/test/__snapshots__/test_api_docs.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "kind". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "Kind069Enum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "kind". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "Kind0ddEnum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "kind". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "Kind496Enum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "kind". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "Kind642Enum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "kind". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "KindCfaEnum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "type". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "TypeF73Enum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: operationId "Funnels" has collisions [(\'/api/environments/{project_id}/insights/funnel/\', \'post\'), (\'/api/projects/{project_id}/insights/funnel/\', \'post\')]. resolving with numeral suffixes.',
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy
"toSecond": HogQLFunctionMeta("toSecond", 1, 1),
"toUnixTimestamp": HogQLFunctionMeta("toUnixTimestamp", 1, 2),
"toUnixTimestamp64Milli": HogQLFunctionMeta("toUnixTimestamp64Milli", 1, 1),
"toStartOfInterval": HogQLFunctionMeta("toStartOfInterval", 2, 2),
"toStartOfYear": HogQLFunctionMeta("toStartOfYear", 1, 1),
"toStartOfISOYear": HogQLFunctionMeta("toStartOfISOYear", 1, 1),
"toStartOfQuarter": HogQLFunctionMeta("toStartOfQuarter", 1, 1),
Expand Down
8 changes: 6 additions & 2 deletions posthog/hogql_queries/insights/insight_actors_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
TrendsQuery,
FunnelsQuery,
LifecycleQuery,
StickinessActorsQuery,
)
from posthog.types import InsightActorsQueryNode

Expand Down Expand Up @@ -62,8 +63,11 @@ def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
return paths_runner.to_actors_query()
elif isinstance(self.source_runner, StickinessQueryRunner):
stickiness_runner = cast(StickinessQueryRunner, self.source_runner)
query = cast(InsightActorsQuery, self.query)
return stickiness_runner.to_actors_query(interval_num=int(query.day) if query.day is not None else None)
stickiness_actors_query = cast(StickinessActorsQuery, self.query)
return stickiness_runner.to_actors_query(
interval_num=int(stickiness_actors_query.day) if stickiness_actors_query.day is not None else None,
operator=getattr(stickiness_actors_query, "operator", None),
)
elif isinstance(self.source_runner, LifecycleQueryRunner):
lifecycle_runner = cast(LifecycleQueryRunner, self.source_runner)
query = cast(InsightActorsQuery, self.query)
Expand Down
28 changes: 24 additions & 4 deletions posthog/hogql_queries/insights/stickiness_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ def _having_clause(self) -> ast.Expr:
value = ast.Constant(value=self.query.stickinessFilter.stickinessCriteria.value)
return parse_expr(f"""count() {get_count_operator(operator)} {{value}}""", {"value": value})

def date_to_start_of_interval_hogql(self, date: ast.Expr) -> ast.Expr:
if self.query.intervalCount is None:
return self.query_date_range.date_to_start_of_interval_hogql(ast.Field(chain=["e", "timestamp"]))

# find the number of intervals back from the end date
age = parse_expr(
"""age({interval_name}, {from_date}, {to_date})""",
placeholders={
"interval_name": ast.Constant(value=self.query_date_range.interval_name),
"from_date": date,
"to_date": self.query_date_range.date_to_as_hogql(),
},
)
if self.query.intervalCount == 1:
return age

return parse_expr(
"floor({age} / {interval_count})",
placeholders={"age": age, "interval_count": ast.Constant(value=self.query.intervalCount)},
)

def _events_query(self, series_with_extra: SeriesWithExtras) -> ast.SelectQuery:
inner_query = parse_select(
"""
Expand All @@ -109,9 +130,7 @@ def _events_query(self, series_with_extra: SeriesWithExtras) -> ast.SelectQuery:
""",
{
"aggregation": self._aggregation_expressions(series_with_extra.series),
"start_of_interval": self.query_date_range.date_to_start_of_interval_hogql(
ast.Field(chain=["e", "timestamp"])
),
"start_of_interval": self.date_to_start_of_interval_hogql(ast.Field(chain=["e", "timestamp"])),
"sample": self._sample_value(),
"where_clause": self.where_clause(series_with_extra),
"having_clause": self._having_clause(),
Expand Down Expand Up @@ -169,7 +188,7 @@ def to_queries(self) -> list[ast.SelectQuery]:
SELECT sum(num_actors) as num_actors, num_intervals
FROM (
SELECT 0 as num_actors, (number + 1) as num_intervals
FROM numbers(dateDiff({interval}, {date_from_start_of_interval}, {date_to_start_of_interval} + {interval_addition}))
FROM numbers(ceil(dateDiff({interval}, {date_from_start_of_interval}, {date_to_start_of_interval} + {interval_addition}) / {intervalCount}))
UNION ALL
{events_query}
)
Expand All @@ -181,6 +200,7 @@ def to_queries(self) -> list[ast.SelectQuery]:
**date_range.to_placeholders(),
"interval_addition": interval_addition,
"events_query": self._events_query(series),
"intervalCount": ast.Constant(value=self.query.intervalCount or 1),
},
)

Expand Down
104 changes: 102 additions & 2 deletions posthog/hogql_queries/insights/test/test_stickiness_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from posthog.clickhouse.client.execute import sync_execute
from posthog.hogql.constants import LimitContext
from posthog.hogql_queries.insights.stickiness_query_runner import StickinessQueryRunner
from posthog.hogql_queries.query_runner import get_query_runner
from posthog.models.action.action import Action
from posthog.models.group.util import create_group
from posthog.models.group_type_mapping import GroupTypeMapping
Expand Down Expand Up @@ -34,6 +35,7 @@
StickinessQuery,
StickinessQueryResponse,
CompareFilter,
StickinessActorsQuery,
)
from posthog.settings import HOGQL_INCREASED_MAX_EXECUTION_TIME
from posthog.test.base import APIBaseTest, _create_event, _create_person, ClickhouseTestMixin
Expand Down Expand Up @@ -196,16 +198,16 @@ def _create_test_events(self):
]
)

def _run_query(
def _get_query(
self,
series: Optional[list[EventsNode | ActionsNode]] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
interval: Optional[IntervalType] = None,
intervalCount: Optional[int] = None,
properties: Optional[StickinessProperties] = None,
filters: Optional[StickinessFilter] = None,
filter_test_accounts: Optional[bool] = False,
limit_context: Optional[LimitContext] = None,
compare_filters: Optional[CompareFilter] = None,
):
query_series: list[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series
Expand All @@ -217,11 +219,16 @@ def _run_query(
series=query_series,
dateRange=DateRange(date_from=query_date_from, date_to=query_date_to),
interval=query_interval,
intervalCount=intervalCount,
properties=properties,
stickinessFilter=filters,
compareFilter=compare_filters,
filterTestAccounts=filter_test_accounts,
)
return query

def _run_query(self, limit_context: Optional[LimitContext] = None, **kwargs):
query = self._get_query(**kwargs)
return StickinessQueryRunner(team=self.team, query=query, limit_context=limit_context).calculate()

def test_stickiness_runs(self):
Expand Down Expand Up @@ -347,6 +354,99 @@ def test_interval_day(self):
0,
]

def test_interval_2_day(self):
self._create_test_events()

response = self._run_query(interval=IntervalType.DAY, intervalCount=2)

result = response.results[0]

assert result["label"] == "$pageview"
assert result["labels"] == [
"1 day",
"2 days",
"3 days",
"4 days",
"5 days",
]
assert result["days"] == [1, 2, 3, 4, 5]
assert result["data"] == [
0,
0,
0,
0,
2,
]

def test_interval_2_day_filtering(self):
self._create_events(
[
SeriesTestData(
distinct_id="p1",
events=[
Series(
event="$pageview",
timestamps=[
# Day 1
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
# Day 2
"2020-01-13T12:00:00Z",
"2020-01-14T12:00:00Z",
# Day 3
"2020-01-15T12:00:00Z",
"2020-01-16T12:00:00Z",
# Day 4
"2020-01-17T12:00:00Z",
"2020-01-18T12:00:00Z",
# Day 5
"2020-01-19T12:00:00Z",
],
),
],
properties={"$browser": "Chrome", "prop": 10, "bool_field": True, "$group_0": "org:1"},
),
SeriesTestData(
distinct_id="p2",
events=[
Series(
event="$pageview",
timestamps=[
"2020-01-11T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
],
),
],
properties={"$browser": "Firefox", "prop": 10, "bool_field": False, "$group_0": "org:1"},
),
]
)

response = self._run_query(interval=IntervalType.DAY, intervalCount=2)

result = response.results[0]

assert result["label"] == "$pageview"
assert result["labels"] == [
"1 day",
"2 days",
"3 days",
"4 days",
"5 days",
]
assert result["days"] == [1, 2, 3, 4, 5]
assert result["data"] == [0, 0, 1, 0, 1]

# Test Actors
query = self._get_query(interval=IntervalType.DAY, intervalCount=2)
runner = get_query_runner(query=StickinessActorsQuery(source=query, day=1, operator="exact"), team=self.team)
actors = runner.calculate()
assert 0 == len(actors.results)
runner = get_query_runner(query=StickinessActorsQuery(source=query, day=3, operator="gte"), team=self.team)
actors = runner.calculate()
assert 2 == len(actors.results)

def test_interval_week(self):
self._create_test_events()

Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def get_query_runner(
limit_context=limit_context,
modifiers=modifiers,
)
if kind == "InsightActorsQuery" or kind == "FunnelsActorsQuery" or kind == "FunnelCorrelationActorsQuery":
if kind in ("InsightActorsQuery", "FunnelsActorsQuery", "FunnelCorrelationActorsQuery", "StickinessActorsQuery"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can make this a set {"InsightActorsQuery",

from .insights.insight_actors_query_runner import InsightActorsQueryRunner

return InsightActorsQueryRunner(
Expand Down
Loading
Loading