From 72cfeb37ef4f7c0bc2dd3e1a0c9ee21b61762b8f Mon Sep 17 00:00:00 2001 From: Kanstantsin Kamkou Date: Fri, 1 Sep 2017 23:44:04 +0200 Subject: [PATCH] query_stars_by_repo is reverted for the task --- gitmostwanted/tasks/repo_most_starred.py | 2 +- gitmostwanted/tasks/repo_stars.py | 15 +++++---------- scripts/repository_mean_reset.py | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/gitmostwanted/tasks/repo_most_starred.py b/gitmostwanted/tasks/repo_most_starred.py index d79a108..064a29b 100644 --- a/gitmostwanted/tasks/repo_most_starred.py +++ b/gitmostwanted/tasks/repo_most_starred.py @@ -8,7 +8,7 @@ from time import sleep -def results_of(j: Job): # @todo #0:15m copy-paste code in multiple tasks +def results_of(j: Job): while not j.complete: app.logger.debug('The job is not complete, waiting...') sleep(10) diff --git a/gitmostwanted/tasks/repo_stars.py b/gitmostwanted/tasks/repo_stars.py index d1409af..3f43ee2 100644 --- a/gitmostwanted/tasks/repo_stars.py +++ b/gitmostwanted/tasks/repo_stars.py @@ -46,20 +46,15 @@ def stars_mature(num_days): # @todo #192:1h move BQ queries to a separate place def query_stars_by_repo(repo_id: int, date_from: datetime, date_to: datetime): query = """ - #standardSQL SELECT - COUNT(1) AS stars, - EXTRACT(YEAR FROM created_at) AS y, - EXTRACT(DAYOFYEAR FROM created_at) AS doy, - EXTRACT(MONTH FROM created_at) AS mon + COUNT(1) AS stars, YEAR(created_at) AS y, DAYOFYEAR(created_at) AS doy, + MONTH(created_at) as mon FROM - `githubarchive.month.*` + TABLE_DATE_RANGE([githubarchive:day.], TIMESTAMP('{date_from}'), TIMESTAMP('{date_to}')) WHERE - (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}') - AND repo.id = {id} - AND type IN ('WatchEvent', 'ForkEvent') + repo.id = {id} AND type IN ('WatchEvent', 'ForkEvent') GROUP BY y, mon, doy """ return query.format( - id=repo_id, date_from=date_from.strftime('%Y%m'), date_to=date_to.strftime('%Y%m') + id=repo_id, date_from=date_from.strftime('%Y-%m-%d'), date_to=date_to.strftime('%Y-%m-%d') ) diff --git a/scripts/repository_mean_reset.py b/scripts/repository_mean_reset.py index ed0c21b..8312521 100644 --- a/scripts/repository_mean_reset.py +++ b/scripts/repository_mean_reset.py @@ -3,12 +3,11 @@ from gitmostwanted.lib.bigquery.job import Job from gitmostwanted.models.repo import Repo, RepoMean, RepoStars from gitmostwanted.services import bigquery -from gitmostwanted.tasks.repo_stars import query_stars_by_repo from gitmostwanted.tasks.repo_status import last_known_mean, repo_mean from time import sleep -def results_of(j: Job): # @todo #0:15m copy-paste code in multiple tasks +def results_of(j: Job): while not j.complete: app.logger.debug('The job is not complete, waiting...') sleep(10) @@ -60,3 +59,24 @@ def results_of(j: Job): # @todo #0:15m copy-paste code in multiple tasks db.session.commit() app.logger.info('Repository %d has %d days', result.id, cnt) + + +def query_stars_by_repo(repo_id: int, date_from: datetime, date_to: datetime): + query = """ + #standardSQL + SELECT + COUNT(1) AS stars, + EXTRACT(YEAR FROM created_at) AS y, + EXTRACT(DAYOFYEAR FROM created_at) AS doy, + EXTRACT(MONTH FROM created_at) AS mon + FROM + `githubarchive.month.*` + WHERE + (_TABLE_SUFFIX BETWEEN '{date_from}' AND '{date_to}') + AND repo.id = {id} + AND type IN ('WatchEvent', 'ForkEvent') + GROUP BY y, mon, doy + """ + return query.format( + id=repo_id, date_from=date_from.strftime('%Y%m'), date_to=date_to.strftime('%Y%m') + )