Skip to content

Commit

Permalink
Remove tree validations and introduce ParameterizedQuery (getredash#3230
Browse files Browse the repository at this point in the history
)
  • Loading branch information
Omer Lachish authored and arikfr committed Jan 17, 2019
1 parent 823e4cc commit 121a44e
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 325 deletions.
54 changes: 1 addition & 53 deletions redash/handlers/embed.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,14 @@
from __future__ import absolute_import
import logging
import time

from flask import request

from .authentication import current_org
from flask_login import current_user, login_required
from flask_restful import abort
from redash import models, utils
from redash import models
from redash.handlers import routes
from redash.handlers.base import (get_object_or_404, org_scoped_rule,
record_event)
from redash.utils import find_missing_params
from redash.handlers.static import render_index
from redash.utils import gen_query_hash, mustache_render


#
# Run a parameterized query synchronously and return the result
# DISCLAIMER: Temporary solution to support parameters in queries. Should be
# removed once we refactor the query results API endpoints and handling
# on the client side. Please don't reuse in other API handlers.
#
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))

query_text = mustache_render(query_text, parameter_values)

if max_age <= 0:
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)

query_hash = gen_query_hash(query_text)

if query_result:
logging.info("Returning cached result for query %s" % query_hash)
return query_result.data

try:
started_at = time.time()
data, error = data_source.query_runner.run_query(query_text, current_user)

if error:
return None
# update cache
if max_age > 0:
run_time = time.time() - started_at
query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source.id,
query_hash, query_text, data,
run_time, utils.utcnow())

models.db.session.commit()
return data
except Exception:
if max_age > 0:
abort(404, message="Unable to get result from the database, and no cached query result found.")
else:
abort(503, message="Unable to get result from the database.")
return None


@routes.route(org_scoped_rule('/embed/query/<query_id>/visualization/<visualization_id>'), methods=['GET'])
Expand Down
58 changes: 16 additions & 42 deletions redash/handlers/query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,66 +8,42 @@
from redash.handlers.base import BaseResource, get_object_or_404
from redash.permissions import (has_access, not_view_only, require_access,
require_permission, view_only)
from redash.tasks import QueryTask, record_event
from redash.tasks import QueryTask
from redash.tasks.queries import enqueue_query
from redash.utils import (collect_parameters_from_request, find_missing_params, gen_query_hash, json_dumps, utcnow)
from redash.utils.sql_query import SQLInjectionError, SQLQuery
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow)
from redash.utils.parameterized_query import ParameterizedQuery


def error_response(message):
return {'job': {'status': 4, 'error': message}}, 400


def apply_parameters(template, parameters, data_source):
query = SQLQuery(template).apply(parameters)

# for now we only log `SQLInjectionError` to detect false positives
try:
text = query.text
except SQLInjectionError:
record_event({
'action': 'sql_injection',
'object_type': 'query',
'query': template,
'parameters': parameters,
'timestamp': time.time(),
'org_id': data_source.org_id
})
except Exception as e:
logging.info(u"Failed applying parameters for query %s: %s", gen_query_hash(query.query), e.message)
finally:
text = query.query

return text


#
# Run a parameterized query synchronously and return the result
# DISCLAIMER: Temporary solution to support parameters in queries. Should be
# removed once we refactor the query results API endpoints and handling
# on the client side. Please don't reuse in other API handlers.
#
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))
query = ParameterizedQuery(query_text).apply(parameter_values)

query_text = apply_parameters(query_text, parameter_values, data_source)
if query.missing_params:
raise Exception('Missing parameter value for: {}'.format(", ".join(query.missing_params)))

if max_age <= 0:
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
query_result = models.QueryResult.get_latest(data_source, query.text, max_age)

query_hash = gen_query_hash(query_text)
query_hash = gen_query_hash(query.text)

if query_result:
logging.info("Returning cached result for query %s" % query_hash)
return query_result

try:
started_at = time.time()
data, error = data_source.query_runner.run_query(query_text, current_user)
data, error = data_source.query_runner.run_query(query.text, current_user)

if error:
logging.info('got bak error')
Expand All @@ -76,9 +52,8 @@ def run_query_sync(data_source, parameter_values, query_text, max_age=0):

run_time = time.time() - started_at
query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source,
query_hash, query_text, data,
query_hash, query.text, data,
run_time, utcnow())

models.db.session.commit()
return query_result
except Exception as e:
Expand All @@ -90,10 +65,6 @@ def run_query_sync(data_source, parameter_values, query_text, max_age=0):


def run_query(data_source, parameter_values, query_text, query_id, max_age=0):
missing_params = find_missing_params(query_text, parameter_values)
if missing_params:
return error_response(u'Missing parameter value for: {}'.format(u", ".join(missing_params)))

if data_source.paused:
if data_source.pause_reason:
message = '{} is paused ({}). Please try later.'.format(data_source.name, data_source.pause_reason)
Expand All @@ -102,17 +73,20 @@ def run_query(data_source, parameter_values, query_text, query_id, max_age=0):

return error_response(message)

query_text = apply_parameters(query_text, parameter_values, data_source)
query = ParameterizedQuery(query_text).apply(parameter_values)

if query.missing_params:
return error_response(u'Missing parameter value for: {}'.format(u", ".join(query.missing_params)))

if max_age == 0:
query_result = None
else:
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
query_result = models.QueryResult.get_latest(data_source, query.text, max_age)

if query_result:
return {'query_result': query_result.to_dict()}
else:
job = enqueue_query(query_text, data_source, current_user.id, metadata={"Username": current_user.email, "Query ID": query_id})
job = enqueue_query(query.text, data_source, current_user.id, metadata={"Username": current_user.email, "Query ID": query_id})
return {'job': job.to_dict()}


Expand Down
3 changes: 3 additions & 0 deletions redash/query_runner/google_spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def request(self, *args, **kwargs):


class GoogleSpreadsheet(BaseQueryRunner):
def __init__(self, configuration):
super(GoogleSpreadsheet, self).__init__(configuration)
self.syntax = 'custom'

@classmethod
def annotate_query(cls):
Expand Down
1 change: 1 addition & 0 deletions redash/query_runner/graphite.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def annotate_query(cls):

def __init__(self, configuration):
super(Graphite, self).__init__(configuration)
self.syntax = 'custom'

if "username" in self.configuration and self.configuration["username"]:
self.auth = (self.configuration["username"], self.configuration["password"])
Expand Down
37 changes: 1 addition & 36 deletions redash/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pystache
import pytz
import simplejson
from funcy import distinct, select_values
from funcy import select_values
from redash import settings
from sqlalchemy.orm.query import Query

Expand Down Expand Up @@ -167,41 +167,6 @@ def writerows(self, rows):
self.writerow(row)


def _collect_key_names(nodes):
keys = []
for node in nodes._parse_tree:
if isinstance(node, pystache.parser._EscapeNode):
keys.append(node.key)
elif isinstance(node, pystache.parser._SectionNode):
keys.append(node.key)
keys.extend(_collect_key_names(node.parsed))

return distinct(keys)


def collect_query_parameters(query):
nodes = pystache.parse(query)
keys = _collect_key_names(nodes)
return keys


def parameter_names(parameter_values):
names = []
for key, value in parameter_values.iteritems():
if isinstance(value, dict):
for inner_key in value.keys():
names.append(u'{}.{}'.format(key, inner_key))
else:
names.append(key)

return names


def find_missing_params(query_text, parameter_values):
query_parameters = set(collect_query_parameters(query_text))
return set(query_parameters) - set(parameter_names(parameter_values))


def collect_parameters_from_request(args):
parameters = {}

Expand Down
54 changes: 54 additions & 0 deletions redash/utils/parameterized_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pystache
from redash.utils import mustache_render
from funcy import distinct


def _collect_key_names(nodes):
keys = []
for node in nodes._parse_tree:
if isinstance(node, pystache.parser._EscapeNode):
keys.append(node.key)
elif isinstance(node, pystache.parser._SectionNode):
keys.append(node.key)
keys.extend(_collect_key_names(node.parsed))

return distinct(keys)


def _collect_query_parameters(query):
nodes = pystache.parse(query)
keys = _collect_key_names(nodes)
return keys


def _parameter_names(parameter_values):
names = []
for key, value in parameter_values.iteritems():
if isinstance(value, dict):
for inner_key in value.keys():
names.append(u'{}.{}'.format(key, inner_key))
else:
names.append(key)

return names


class ParameterizedQuery(object):
def __init__(self, template):
self.template = template
self.query = template
self.parameters = {}

def apply(self, parameters):
self.parameters.update(parameters)
self.query = mustache_render(self.template, self.parameters)
return self

@property
def missing_params(self):
query_parameters = set(_collect_query_parameters(self.template))
return set(query_parameters) - set(_parameter_names(self.parameters))

@property
def text(self):
return self.query
71 changes: 0 additions & 71 deletions redash/utils/sql_query.py

This file was deleted.

Loading

0 comments on commit 121a44e

Please sign in to comment.