Skip to content

Commit

Permalink
add pageviews referrer support
Browse files Browse the repository at this point in the history
  • Loading branch information
negrel committed Jan 24, 2024
1 parent 1d35c2b commit 7f594f7
Show file tree
Hide file tree
Showing 11 changed files with 202 additions and 60 deletions.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE prisme.events_pageviews ADD COLUMN referrer_domain String DEFAULT 'direct';
3 changes: 2 additions & 1 deletion internal/embedded/static/m.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
fetch(ps.concat("://", pd, "/api/v1/events/pageviews"), {
method: "POST",
headers: {
"X-Prisme-Referrer": s.concat('//', d, location.pathname)
"X-Prisme-Referrer": s.concat('//', d, location.pathname),
"X-Prisme-Document-Referrer": document.referrer
},
referrerPolicy: "no-referrer-when-downgrade"
});
Expand Down
9 changes: 9 additions & 0 deletions internal/event/domain_name.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@ package event

import (
"encoding/json"
"errors"

"golang.org/x/net/idna"
)

var (
errValueIsEmpty = errors.New("value is empty")
)

// DomainName define a valid domain name according to RFC 5891. DomainName are
// stored using their ASCII form.
type DomainName struct {
Expand All @@ -15,6 +20,10 @@ type DomainName struct {
// ParseDomainName parses the given value as a domain name and returns it.
// If the value is considered invalid, an error is returned.
func ParseDomainName(value string) (DomainName, error) {
if value == "" {
return DomainName{}, errValueIsEmpty
}

domain, err := idna.Lookup.ToASCII(value)
if err != nil {
return DomainName{}, err
Expand Down
6 changes: 6 additions & 0 deletions internal/event/domain_name_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ func TestParseDomainName(t *testing.T) {
require.Equal(t, DomainName{}, domainName)
})
}

t.Run("Empty", func(t *testing.T) {
domainName, err := ParseDomainName("")
require.Error(t, err)
require.Equal(t, DomainName{}, domainName)
})
})

t.Run("Valid", func(t *testing.T) {
Expand Down
27 changes: 17 additions & 10 deletions internal/event/pageview.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,30 @@ type OperatingSystem string

// PageView define a page view event.
type PageView struct {
Timestamp time.Time
DomainName DomainName
PathName string
Client uaparser.Client
Timestamp time.Time
DomainName DomainName
PathName string
Client uaparser.Client
ReferrerDomain ReferrerDomain
}

// NewPageView creates a new PageView event.
func NewPageView(u *url.URL, cli uaparser.Client) (PageView, error) {
domain, err := ParseDomainName(u.Hostname())
func NewPageView(pvUrl *url.URL, cli uaparser.Client, pageReferrer string) (PageView, error) {
domain, err := ParseDomainName(pvUrl.Hostname())
if err != nil {
return PageView{}, err
}

referrerDomain, err := ParseReferrerDomain(pageReferrer)
if err != nil {
return PageView{}, err
}

return PageView{
Timestamp: time.Now(),
DomainName: domain,
PathName: u.Path,
Client: cli,
Timestamp: time.Now(),
DomainName: domain,
PathName: pvUrl.Path,
Client: cli,
ReferrerDomain: referrerDomain,
}, nil
}
40 changes: 40 additions & 0 deletions internal/event/referrer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package event

import (
"fmt"
"net/url"

"golang.org/x/net/idna"
)

// ReferrerDomain define an HTTP referral. A referral is either direct (empty string)
// or a valid absolute URL from which domain is extracted.
type ReferrerDomain struct {
value string
}

// ParseReferrerDomain parses the given value as a referrer and returns it.
// An error is returned if the value is not a valid referrer.
func ParseReferrerDomain(value string) (ReferrerDomain, error) {
// Direct source.
if value == "" {
return ReferrerDomain{}, nil
}

u, err := url.ParseRequestURI(value)
if err != nil {
return ReferrerDomain{}, fmt.Errorf("invalid referrer: %w", err)
}

source, err := idna.Lookup.ToASCII(u.Hostname())
if err != nil {
return ReferrerDomain{}, err
}

return ReferrerDomain{source}, nil
}

// String implements fmt.Stringer.
func (s ReferrerDomain) String() string {
return s.value
}
69 changes: 69 additions & 0 deletions internal/event/referrer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package event

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestParseReferrerDomain(t *testing.T) {
t.Run("Invalid", func(t *testing.T) {
invalidDomains := []string{
"mydomain*com",
"123domain!",
"_invalid-domain.com",
"space domain.com",
"my domain .com",
"domain#invalid.com",
"-hyphenstart.com",
"domain_with_underscores-.com",
}

for _, domain := range invalidDomains {
t.Run(domain, func(t *testing.T) {
referrerDomain, err := ParseReferrerDomain("http://" + domain + "/foo")
require.Error(t, err)
require.Equal(t, ReferrerDomain{}, referrerDomain)
})
}
})

t.Run("Valid", func(t *testing.T) {
validDomains := []string{
"alphabets123.com",
"my-domain-name.com",
"1234example.net",
"tech-geeks.org",
"secure-site.info",
"bestblogsite.biz",
"creative-web.dev",
"xyz-company.co",
"e-commerce-site.store",
"travel-experts.travel",
"xn--kn8h.to",
}

for _, domain := range validDomains {
t.Run(domain, func(t *testing.T) {
referrerDomain, err := ParseReferrerDomain("http://" + domain + "/foo")
require.NoError(t, err)
require.NotEqual(t, ReferrerDomain{}, referrerDomain)
require.Equal(t, domain, referrerDomain.String())
})
}

t.Run("🏹.to", func(t *testing.T) {
url := "http://🏹.to/"
referrer, err := ParseReferrerDomain(url)
require.NoError(t, err)
require.NotEqual(t, ReferrerDomain{}, referrer)
require.Equal(t, "xn--kn8h.to", referrer.String())
})

t.Run("Direct", func(t *testing.T) {
referrerDomain, err := ParseReferrerDomain("")
require.NoError(t, err)
require.Equal(t, "", referrerDomain.String())
})
})
}
14 changes: 8 additions & 6 deletions internal/handlers/events_pageviews.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ func ProvidePostEventsPageViews(
uaParserService uaparser.Service,
) PostPageViewEvent {
return func(c *fiber.Ctx) error {
referrer := string(peekReferrerHeader(c))

// Parse URI.
uri, err := url.ParseRequestURI(referrer)
// Referrer of the POST request, that is the viewed page.
pageReferrer := string(peekReferrerHeader(c))
pageUrl, err := url.ParseRequestURI(pageReferrer)
if err != nil {
c.Response().SetStatusCode(fiber.StatusBadRequest)
return err
return fmt.Errorf("invalid referrer: %w", err)
}

// Website from which viewer comes from.
referrer := string(c.Request().Header.Peek("X-Prisme-Document-Referrer"))

// Parse user agent.
cli := uaParserService.ParseUserAgent(string(c.Request().Header.UserAgent()))

// Create pageview.
pageview, err := event.NewPageView(uri, cli)
pageview, err := event.NewPageView(pageUrl, cli, referrer)
if err != nil {
c.Response().SetStatusCode(fiber.StatusBadRequest)
return fmt.Errorf("invalid pageview event: %w", err)
Expand Down
3 changes: 2 additions & 1 deletion internal/services/eventstore/clickhouse_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func batchPageViewLoop(logger log.Logger,
if batch == nil {
batch, err = conn.PrepareBatch(
context.Background(),
"INSERT INTO events_pageviews VALUES ($1, $2, $3, $4, $5, $6)",
"INSERT INTO events_pageviews VALUES ($1, $2, $3, $4, $5, $6; $7)",
)
if err != nil {
logger.Err(err).Msg("failed to prepare batch")
Expand All @@ -76,6 +76,7 @@ func batchPageViewLoop(logger log.Logger,
ev.Client.OperatingSystem,
ev.Client.BrowserFamily,
ev.Client.Device,
ev.ReferrerDomain,
)
if err != nil {
logger.Err(err).Msg("failed to append to pageview batch")
Expand Down
90 changes: 48 additions & 42 deletions tests/bun/events/events_pageviews.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,33 +62,22 @@ test('valid URL with registered domain in X-Prisme-Referrer header is accepted',
method: 'POST',
headers: {
'X-Forwarded-For': faker.internet.ip(),
'X-Prisme-Referrer': 'http://mywebsite.localhost/foo?bar=baz#qux'
'X-Prisme-Referrer': 'http://mywebsite.localhost/foo?bar=baz#qux',
'X-Prisme-Document-Referrer': 'https://www.example.com/foo'
}
})
expect(response.status).toBe(200)

// Wait for clickhouse to ingest batch.
Bun.sleepSync(1000)

const client = createClient({
host: 'http://clickhouse.localhost:8123',
username: 'clickhouse',
password: 'password',
database: 'prisme'
})

const rows = await client.query({
query: 'SELECT * FROM prisme.events_pageviews ORDER BY timestamp DESC LIMIT 1;'
})
const data = await rows.json().then((r: any) => r.data[0])
const data = await getLatestPageview()

expect(data).toMatchObject({
timestamp: expect.stringMatching(TIMESTAMP_REGEX),
domain: 'mywebsite.localhost',
path: '/foo',
operating_system: 'Other',
browser_family: 'Other',
device: 'Other'
device: 'Other',
referrer_domain: 'www.example.com'
})
})

Expand All @@ -97,25 +86,13 @@ test('valid URL with registered domain in Referer header is accepted', async ()
method: 'POST',
headers: {
'X-Forwarded-For': faker.internet.ip(),
Referer: 'http://foo.mywebsite.localhost/another/foo?bar=baz#qux'
Referer: 'http://foo.mywebsite.localhost/another/foo?bar=baz#qux',
'X-Prisme-Document-Referrer': 'https://www.example.com/foo'
}
})
expect(response.status).toBe(200)

// Wait for clickhouse to ingest batch.
Bun.sleepSync(1000)

const client = createClient({
host: 'http://clickhouse.localhost:8123',
username: 'clickhouse',
password: 'password',
database: 'prisme'
})

const rows = await client.query({
query: 'SELECT * FROM prisme.events_pageviews ORDER BY timestamp DESC LIMIT 1;'
})
const data = await rows.json().then((r: any) => r.data[0])
const data = await getLatestPageview()

expect(data).toMatchObject({
timestamp: expect.stringMatching(TIMESTAMP_REGEX),
Expand All @@ -133,11 +110,49 @@ test('valid pageview with Windows + Chrome user agent', async () => {
headers: {
'X-Forwarded-For': faker.internet.ip(),
Referer: 'http://foo.mywebsite.localhost/another/foo?bar=baz#qux',
'X-Prisme-Document-Referrer': 'https://www.example.com/foo',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.3'
}
})
expect(response.status).toBe(200)

const data = await getLatestPageview()

expect(data).toMatchObject({
timestamp: expect.stringMatching(TIMESTAMP_REGEX),
domain: 'foo.mywebsite.localhost',
path: '/another/foo',
operating_system: 'Windows',
browser_family: 'Chrome',
device: 'Other',
referrer_domain: 'www.example.com'
})
})

test('valid pageview without X-Prisme-Document-Referrer', async () => {
const response = await fetch(PRISME_PAGEVIEWS_URL, {
method: 'POST',
headers: {
'X-Forwarded-For': faker.internet.ip(),
Referer: 'http://foo.mywebsite.localhost/another/foo?bar=baz#qux'
}
})
expect(response.status).toBe(200)

const data = await getLatestPageview()

expect(data).toMatchObject({
timestamp: expect.stringMatching(TIMESTAMP_REGEX),
domain: 'foo.mywebsite.localhost',
path: '/another/foo',
operating_system: 'Other',
browser_family: 'Other',
device: 'Other',
referrer_domain: ''
})
})

async function getLatestPageview (): Promise<any> {
// Wait for clickhouse to ingest batch.
Bun.sleepSync(1000)

Expand All @@ -151,14 +166,5 @@ test('valid pageview with Windows + Chrome user agent', async () => {
const rows = await client.query({
query: 'SELECT * FROM prisme.events_pageviews ORDER BY timestamp DESC LIMIT 1;'
})
const data = await rows.json().then((r: any) => r.data[0])

expect(data).toMatchObject({
timestamp: expect.stringMatching(TIMESTAMP_REGEX),
domain: 'foo.mywebsite.localhost',
path: '/another/foo',
operating_system: 'Windows',
browser_family: 'Chrome',
device: 'Other'
})
})
return rows.json().then((r: any) => r.data[0])
}

0 comments on commit 7f594f7

Please sign in to comment.