From eeadba567a4b04bc0e6a1e6f1f1d762c2a1ad06f Mon Sep 17 00:00:00 2001 From: Glorfindel Date: Tue, 14 Apr 2020 08:44:30 +0200 Subject: [PATCH] HNQ views analysis --- .gitattributes | 1 + hnq-research/HNQ.backup.REMOVED.git-id | 1 - hnq-research/views-analysis.sql | 70 ++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 .gitattributes delete mode 100644 hnq-research/HNQ.backup.REMOVED.git-id create mode 100644 hnq-research/views-analysis.sql diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..8b98847 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.backup filter=lfs diff=lfs merge=lfs -text diff --git a/hnq-research/HNQ.backup.REMOVED.git-id b/hnq-research/HNQ.backup.REMOVED.git-id deleted file mode 100644 index 57fddff..0000000 --- a/hnq-research/HNQ.backup.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -c5fd82755a0d2e7503c1f6f3d6c43b8e842422db \ No newline at end of file diff --git a/hnq-research/views-analysis.sql b/hnq-research/views-analysis.sql new file mode 100644 index 0000000..86384c8 --- /dev/null +++ b/hnq-research/views-analysis.sql @@ -0,0 +1,70 @@ +-- HNQ analysis: https://meta.stackexchange.com/q/338009 -- + +-- top sites (# of questions) +WITH questionsBySite AS ( +SELECT site, COUNT(DISTINCT(question)) AS questions + FROM snapshots + WHERE timestamp >= '2019-08-01' + GROUP BY site), +sitesByQuestions AS (SELECT RANK() OVER (ORDER BY questions DESC) AS rank, site, questions, + ROUND(100 * CAST(questions AS NUMERIC) / (SELECT SUM(questions) FROM questionsBySite), 2) AS percentage + FROM questionsBySite) +SELECT * FROM sitesByQuestions + ORDER BY questions DESC; + +-- top sites (# of spots) +WITH spotsBySite AS ( +SELECT site, COUNT(*) AS spots + FROM snapshots + WHERE timestamp >= '2019-08-01' + GROUP BY site), +sitesBySpots AS (SELECT RANK() OVER (ORDER BY spots DESC) AS rank, site, spots, + ROUND(100 * CAST(spots AS NUMERIC) / (SELECT SUM(spots) FROM spotsBySite), 2) AS percentage + FROM spotsBySite) +SELECT * FROM sitesBySpots + ORDER BY spots DESC; + +-- sites with most HNQ views per hour +WITH questionsBySite AS ( +SELECT site, COUNT(DISTINCT(question)) AS questions + FROM snapshots + WHERE timestamp >= '2019-08-01' + GROUP BY site), +sitesByQuestions AS (SELECT RANK() OVER (ORDER BY questions DESC) AS rank, site, questions, + ROUND(100 * CAST(questions AS NUMERIC) / (SELECT SUM(questions) FROM questionsBySite), 2) AS percentage + FROM questionsBySite), +spotsBySite AS ( +SELECT site, COUNT(*) AS spots + FROM snapshots + WHERE timestamp >= '2019-08-01' + GROUP BY site), +sitesBySpots AS (SELECT RANK() OVER (ORDER BY spots DESC) AS rank, site, spots, + ROUND(100 * CAST(spots AS NUMERIC) / (SELECT SUM(spots) FROM spotsBySite), 2) AS percentage + FROM spotsBySite), +temp AS (SELECT s.site, s.question, + MIN(s.views) AS "Views before HNQ", + MIN(s.timestamp) - q.createddate AS "Pre-HNQ duration", + MAX(s.views) - MIN(s.views) AS "Views while HNQ", + MAX(s.timestamp) - MIN(s.timestamp) AS "HNQ duration" + FROM snapshots AS s + INNER JOIN questions AS q ON s.site = q.site AND s.question = q.id + WHERE s.timestamp >= '2019-08-01' + AND s.views IS NOT NULL + GROUP BY s.site, s.question, q.createddate + HAVING MAX(s.views) > MIN(s.views)), +stats AS (SELECT site, + ROUND(AVG("Views before HNQ"), 0) AS "Views before HNQ", + DATE_TRUNC('minute', AVG("Pre-HNQ duration")) AS "Pre-HNQ duration", + ROUND(AVG(3600 * "Views before HNQ" / CAST(EXTRACT(EPOCH FROM "Pre-HNQ duration") AS NUMERIC)), 1) AS "Pre-HNQ views per hour", + ROUND(AVG("Views while HNQ"), 0) AS "Views while HNQ", + DATE_TRUNC('minute', AVG("HNQ duration")) AS "HNQ duration", + ROUND(AVG(3600 * "Views while HNQ" / CAST(EXTRACT(EPOCH FROM "HNQ duration") AS NUMERIC)), 1) AS "HNQ views per hour" + FROM Temp + GROUP BY site) +SELECT stats.*, q.rank, q.percentage, s.rank, s.percentage + FROM stats + INNER JOIN sitesByQuestions AS q ON q.site = stats.site + INNER JOIN sitesBySpots AS s ON s.site = stats.site + ORDER BY 7 DESC + +