-{% block endcode %}
-
-{% endblock %}
-
-
-
diff --git a/django_urls.py b/django_urls.py
deleted file mode 100755
index 27ea2c0..0000000
--- a/django_urls.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from django.conf.urls.defaults import *
-
-urlpatterns = patterns('',
- # Example:
- (r'^account/', include('account.urls')),
- url(r'^_ah/login_required', 'account.views.signin', name="account-signin"),
- (r'^events/', include('events.urls')),
- (r'^sources/', include('sources.urls')),
- (r'^subscriptions/', include('subscriptions.urls')),
- (r'^admin/', include('eventsite.admin.urls')),
- (r'^assets/', include('assets.urls')),
- (r'', include('eventsite.urls')),
-)
diff --git a/handlers.py b/handlers.py
deleted file mode 100755
index 09cf650..0000000
--- a/handlers.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from tipfy import RequestHandler
-from tipfy.ext.jinja2 import render_response
-
-class FrontPageHandler(RequestHandler):
- """A handler that outputs the result of a rendered template."""
- def get(self, **kwargs):
- return render_response('hello.html', message='Hello, Jinja!')
-
-
-class AddEventHandler(RequestHandler):
- """A handler that outputs the result of a rendered template."""
- def get(self, **kwargs):
- return render_response('hello.html', message='Hello, Jinja!')
\ No newline at end of file
diff --git a/apps/__init__.py b/links/__init__.py
similarity index 100%
rename from apps/__init__.py
rename to links/__init__.py
diff --git a/apps/links/forms.py b/links/forms.py
similarity index 100%
rename from apps/links/forms.py
rename to links/forms.py
diff --git a/apps/links/handlers.py b/links/handlers.py
similarity index 100%
rename from apps/links/handlers.py
rename to links/handlers.py
diff --git a/apps/links/models.py b/links/models.py
similarity index 100%
rename from apps/links/models.py
rename to links/models.py
diff --git a/links/urls.py b/links/urls.py
new file mode 100755
index 0000000..1f73a16
--- /dev/null
+++ b/links/urls.py
@@ -0,0 +1,11 @@
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns('links.views',
+
+ url(r'^add/$','add', name="add_link"),
+ url(r'^review/$','review', name="review_links"),
+ url(r'^change/$','add', name="change_link"),
+
+
+)
+
diff --git a/apps/links/__init__.py b/links/views.py
old mode 100755
new mode 100644
similarity index 100%
rename from apps/links/__init__.py
rename to links/views.py
diff --git a/main.py b/main.py
index 2739e4f..b297297 100755
--- a/main.py
+++ b/main.py
@@ -1,38 +1,47 @@
-# -*- coding: utf-8 -*-
-"""
- main
- ~~~~
+import sys, os
+from google.appengine.ext.webapp import util
- Run Tipfy apps.
- :copyright: 2009 by tipfy.org.
- :license: BSD, see LICENSE for more details.
-"""
+sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path
+
+
+# Django imports and other code go here...
import os
-import sys
+os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
+from google.appengine.dist import use_library
+use_library('django', '1.2')
-if 'lib' not in sys.path:
- # Add /lib as primary libraries directory, with fallback to /distlib
- # and optionally to distlib loaded using zipimport.
- sys.path[0:0] = ['lib', 'distlib', 'distlib.zip', 'shared']
-
-import config
-import tipfy
+import django.core.handlers, django.core.handlers.wsgi
-# Is this the development server?
-debug = os.environ.get('SERVER_SOFTWARE', '').startswith('Dev')
+from django.conf import settings
+settings.ROOT_URLCONF="urls"
-# Instantiate the application.
-app = tipfy.make_wsgi_app(config=config.config, debug=debug)
-from tipfy.ext.jinja2 import get_jinja2_instance
-env=get_jinja2_instance()
-env.globals['app_version'] = os.environ['CURRENT_VERSION_ID'] or 'dev'
-def main():
- app.run()
+import logging
+import django.core.signals
+import django.dispatch.dispatcher
+import django.db
+
+def log_exception(*args, **kwds):
+ logging.exception('Exception in request:')
+
+# Log errors.
+django.dispatch.Signal.connect(
+ django.core.signals.got_request_exception, log_exception)
+# Unregister the rollback event handler.
+django.dispatch.Signal.disconnect(
+ django.core.signals.got_request_exception,
+ django.db._rollback_on_exception)
+
+
+
+def main():
+ sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path
+ application = django.core.handlers.wsgi.WSGIHandler()
+ util.run_wsgi_app(application)
if __name__ == '__main__':
- main()
+ main()
\ No newline at end of file
diff --git a/mapreduce.yaml b/mapreduce.yaml
deleted file mode 100755
index 4380181..0000000
--- a/mapreduce.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-mapreduce:
-- name: 'migrate.process'
- mapper:
- input_reader: mapreduce.input_readers.DatastoreInputReader
- handler: migrate.process
- params:
- - name: entity_kind
- default: events.models.Event
\ No newline at end of file
diff --git a/mapreduce/__init__.py b/mapreduce/__init__.py
deleted file mode 100755
index de5df1c..0000000
--- a/mapreduce/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
diff --git a/mapreduce/base_handler.py b/mapreduce/base_handler.py
deleted file mode 100755
index 00b88eb..0000000
--- a/mapreduce/base_handler.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Base handler class for all mapreduce handlers.
-"""
-
-
-
-
-import logging
-from mapreduce.lib import simplejson
-
-from google.appengine.ext import webapp
-
-
-class Error(Exception):
- """Base-class for exceptions in this module."""
-
-
-class BadRequestPathError(Error):
- """The request path for the handler is invalid."""
-
-
-class BaseHandler(webapp.RequestHandler):
- """Base class for all mapreduce handlers."""
-
- def base_path(self):
- """Base path for all mapreduce-related urls."""
- path = self.request.path
- return path[:path.rfind("/")]
-
-
-class TaskQueueHandler(BaseHandler):
- """Base class for handlers intended to be run only from the task queue.
-
- Sub-classes should implement the 'handle' method.
- """
-
- def post(self):
- if "X-AppEngine-QueueName" not in self.request.headers:
- logging.error(self.request.headers)
- logging.error("Task queue handler received non-task queue request")
- self.response.set_status(
- 403, message="Task queue handler received non-task queue request")
- return
- self.handle()
-
- def handle(self):
- """To be implemented by subclasses."""
- raise NotImplementedError()
-
- def task_retry_count(self):
- """Number of times this task has been retried."""
- return int(self.request.headers.get("X-AppEngine-TaskRetryCount", 0))
-
-
-class JsonHandler(BaseHandler):
- """Base class for JSON handlers for user interface.
-
- Sub-classes should implement the 'handle' method. They should put their
- response data in the 'self.json_response' dictionary. Any exceptions raised
- by the sub-class implementation will be sent in a JSON response with the
- name of the error_class and the error_message.
- """
-
- def __init__(self):
- """Initializer."""
- super(BaseHandler, self).__init__()
- self.json_response = {}
-
- def base_path(self):
- """Base path for all mapreduce-related urls.
-
- JSON handlers are mapped to /base_path/command/command_name thus they
- require special treatment.
- """
- path = self.request.path
- base_path = path[:path.rfind("/")]
- if not base_path.endswith("/command"):
- raise BadRequestPathError(
- "Json handlers should have /command path prefix")
- return base_path[:base_path.rfind("/")]
-
- def _handle_wrapper(self):
- if self.request.headers.get("X-Requested-With") != "XMLHttpRequest":
- logging.error(self.request.headers)
- logging.error("Got JSON request with no X-Requested-With header")
- self.response.set_status(
- 403, message="Got JSON request with no X-Requested-With header")
- return
-
- self.json_response.clear()
- try:
- self.handle()
- except Exception, e:
- logging.exception("Error in JsonHandler, returning exception.")
- # TODO(user): Include full traceback here for the end-user.
- self.json_response.clear()
- self.json_response["error_class"] = e.__class__.__name__
- self.json_response["error_message"] = str(e)
-
- self.response.headers["Content-Type"] = "text/javascript"
- try:
- output = simplejson.dumps(self.json_response)
- except:
- logging.exception("Could not serialize to JSON")
- self.response.set_status(500, message="Could not serialize to JSON")
- return
- else:
- self.response.out.write(output)
-
- def handle(self):
- """To be implemented by sub-classes."""
- raise NotImplementedError()
-
-
-class PostJsonHandler(JsonHandler):
- """JSON handler that accepts POST requests."""
-
- def post(self):
- self._handle_wrapper()
-
-
-class GetJsonHandler(JsonHandler):
- """JSON handler that accepts GET posts."""
-
- def get(self):
- self._handle_wrapper()
diff --git a/mapreduce/context.py b/mapreduce/context.py
deleted file mode 100755
index 93c1017..0000000
--- a/mapreduce/context.py
+++ /dev/null
@@ -1,305 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Mapreduce execution context.
-
-Mapreduce context provides handler code with information about
-current mapreduce execution and organizes utility data flow
-from handlers such as counters, log messages, mutation pools.
-"""
-
-
-
-__all__ = ["MAX_ENTITY_COUNT", "MAX_POOL_SIZE", "Context", "MutationPool",
- "Counters", "ItemList", "EntityList", "get", "COUNTER_MAPPER_CALLS",
- "DATASTORE_DEADLINE"]
-
-from google.appengine.api import datastore
-from google.appengine.ext import db
-
-# Maximum pool size in bytes. Pool will be flushed when reaches this amount.
-# We use 950,000 bytes which is slightly less than maximum allowed RPC size of
-# 1M to have some space cushion.
-MAX_POOL_SIZE = 900 * 1000
-
-# Maximum number of items. Pool will be flushed when reaches this amount.
-MAX_ENTITY_COUNT = 500
-
-# Deadline in seconds for mutation pool datastore operations.
-DATASTORE_DEADLINE = 15
-
-# The name of the counter which counts all mapper calls.
-COUNTER_MAPPER_CALLS = "mapper_calls"
-
-
-def _normalize_entity(value):
- """Return an entity from an entity or model instance."""
- # TODO(user): Consider using datastore.NormalizeAndTypeCheck.
- if getattr(value, "_populate_internal_entity", None):
- return value._populate_internal_entity()
- return value
-
-def _normalize_key(value):
- """Return a key from an entity, model instance, key, or key string."""
- if getattr(value, "key", None):
- return value.key()
- elif isinstance(value, basestring):
- return datastore.Key(value)
- else:
- return value
-
-class ItemList(object):
- """Holds list of arbitrary items, and their total size.
-
- Properties:
- items: list of objects.
- length: length of item list.
- size: aggregate item size in bytes.
- """
-
- def __init__(self):
- """Constructor."""
- self.items = []
- self.length = 0
- self.size = 0
-
- def append(self, item, item_size):
- """Add new item to the list.
-
- Args:
- item: an item to add to the list.
- item_size: item size in bytes as int.
- """
- self.items.append(item)
- self.length += 1
- self.size += item_size
-
- def clear(self):
- """Clear item list."""
- self.items = []
- self.length = 0
- self.size = 0
-
- @property
- def entities(self):
- """Return items. For backwards compatability."""
- return self.items
-
-
-# For backwards compatability.
-EntityList = ItemList
-
-
-# TODO(user): mutation pool has no error handling at all. Add some.
-class MutationPool(object):
- """Mutation pool accumulates datastore changes to perform them in batch.
-
- Properties:
- puts: ItemList of entities to put to datastore.
- deletes: ItemList of keys to delete from datastore.
- max_pool_size: maximum single list pool size. List changes will be flushed
- when this size is reached.
- """
-
- def __init__(self,
- max_pool_size=MAX_POOL_SIZE,
- max_entity_count=MAX_ENTITY_COUNT):
- """Constructor.
-
- Args:
- max_pool_size: maximum pools size in bytes before flushing it to db.
- max_entity_count: maximum number of entities before flushing it to db.
- """
- self.max_pool_size = max_pool_size
- self.max_entity_count = max_entity_count
- self.puts = ItemList()
- self.deletes = ItemList()
-
- def put(self, entity):
- """Registers entity to put to datastore.
-
- Args:
- entity: an entity or model instance to put.
- """
- actual_entity = _normalize_entity(entity)
- entity_size = len(actual_entity._ToPb().Encode())
- if (self.puts.length >= self.max_entity_count or
- (self.puts.size + entity_size) > self.max_pool_size):
- self.__flush_puts()
- self.puts.append(actual_entity, entity_size)
-
- def delete(self, entity):
- """Registers entity to delete from datastore.
-
- Args:
- entity: an entity, model instance, or key to delete.
- """
- # This is not very nice: we're calling two protected methods here...
- key = _normalize_key(entity)
- key_size = len(key._ToPb().Encode())
- if (self.deletes.length >= self.max_entity_count or
- (self.deletes.size + key_size) > self.max_pool_size):
- self.__flush_deletes()
- self.deletes.append(key, key_size)
-
- # TODO(user): some kind of error handling/retries is needed here.
- def flush(self):
- """Flush(apply) all changed to datastore."""
- self.__flush_puts()
- self.__flush_deletes()
-
- def __flush_puts(self):
- """Flush all puts to datastore."""
- if self.puts.length:
- datastore.Put(self.puts.items, rpc=self.__create_rpc())
- self.puts.clear()
-
- def __flush_deletes(self):
- """Flush all deletes to datastore."""
- if self.deletes.length:
- datastore.Delete(self.deletes.items, rpc=self.__create_rpc())
- self.deletes.clear()
-
- def __create_rpc(self):
- """Creates correctly configured RPC object for datastore calls.
-
- Returns:
- A UserRPC instance.
- """
- return datastore.CreateRPC(deadline=DATASTORE_DEADLINE)
-
-
-# This doesn't do much yet. In future it will play nicely with checkpoint/error
-# handling system.
-class Counters(object):
- """Regulates access to counters."""
-
- def __init__(self, shard_state):
- """Constructor.
-
- Args:
- shard_state: current mapreduce shard state as model.ShardState.
- """
- self._shard_state = shard_state
-
- def increment(self, counter_name, delta=1):
- """Increment counter value.
-
- Args:
- counter_name: name of the counter as string.
- delta: increment delta as int.
- """
- self._shard_state.counters_map.increment(counter_name, delta)
-
- def flush(self):
- """Flush unsaved counter values."""
- pass
-
-
-class Context(object):
- """MapReduce execution context.
-
- Properties:
- mapreduce_spec: current mapreduce specification as model.MapreduceSpec.
- shard_state: current shard state as model.ShardState.
- mutation_pool: current mutation pool as MutationPool.
- counters: counters object as Counters.
- """
-
- # Current context instance
- _context_instance = None
-
- def __init__(self, mapreduce_spec, shard_state, task_retry_count=0):
- """Constructor.
-
- Args:
- mapreduce_spec: mapreduce specification as model.MapreduceSpec.
- shard_state: shard state as model.ShardState.
- """
- self.mapreduce_spec = mapreduce_spec
- self.shard_state = shard_state
- self.task_retry_count = task_retry_count
-
- if self.mapreduce_spec:
- self.mapreduce_id = self.mapreduce_spec.mapreduce_id
- else:
- # Only in tests
- self.mapreduce_id = None
- if self.shard_state:
- self.shard_id = self.shard_state.get_shard_id()
- else:
- # Only in tests
- self.shard_id = None
-
- self.mutation_pool = MutationPool(
- max_pool_size=(MAX_POOL_SIZE/(2**self.task_retry_count)),
- max_entity_count=(MAX_ENTITY_COUNT/(2**self.task_retry_count)))
- self.counters = Counters(shard_state)
-
- self._pools = {}
- self.register_pool("mutation_pool", self.mutation_pool)
- self.register_pool("counters", self.counters)
-
- def flush(self):
- """Flush all information recorded in context."""
- for pool in self._pools.values():
- pool.flush()
- if self.shard_state:
- self.shard_state.put()
-
- # TODO(user): Add convenience method for mapper params.
-
- # TODO(user): Add fatal error logging method here. Will log the message
- # and set the shard state to failure result status, which the controller
- # callback should pick up and force all shards to terminate.
-
- def register_pool(self, key, pool):
- """Register an arbitrary pool to be flushed together with this context.
-
- Args:
- key: pool key as string.
- pool: a pool instance. Pool should implement flush(self) method.
- """
- self._pools[key] = pool
-
- def get_pool(self, key):
- """Obtains an instance of registered pool.
-
- Args:
- key: pool key as string.
-
- Returns:
- an instance of the pool registered earlier, or None.
- """
- return self._pools.get(key, None)
-
- @classmethod
- def _set(cls, context):
- """Set current context instance.
-
- Args:
- context: new context as Context or None.
- """
- cls._context_instance = context
-
-
-def get():
- """Get current context instance.
-
- Returns:
- current context as Context.
- """
- return Context._context_instance
diff --git a/mapreduce/control.py b/mapreduce/control.py
deleted file mode 100755
index 616d55f..0000000
--- a/mapreduce/control.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""API for controlling MapReduce execution outside of MapReduce framework."""
-
-
-
-__all__ = ["start_map"]
-
-# pylint: disable-msg=C6409
-
-
-from mapreduce import handlers
-from mapreduce import model
-
-
-_DEFAULT_SHARD_COUNT = 8
-
-
-def start_map(name,
- handler_spec,
- reader_spec,
- reader_parameters,
- shard_count=_DEFAULT_SHARD_COUNT,
- mapreduce_parameters=None,
- base_path="/mapreduce",
- queue_name="default",
- eta=None,
- countdown=None,
- hooks_class_name=None,
- _app=None,
- transactional=False):
- """Start a new, mapper-only mapreduce.
-
- Args:
- name: mapreduce name. Used only for display purposes.
- handler_spec: fully qualified name of mapper handler function/class to call.
- reader_spec: fully qualified name of mapper reader to use
- reader_parameters: dictionary of parameters to pass to reader. These are
- reader-specific.
- shard_count: number of shards to create.
- mapreduce_parameters: dictionary of mapreduce parameters relevant to the
- whole job.
- base_path: base path of mapreduce library handler specified in app.yaml.
- "/mapreduce" by default.
- queue_name: executor queue name to be used for mapreduce tasks.
- eta: Absolute time when the MR should execute. May not be specified
- if 'countdown' is also supplied. This may be timezone-aware or
- timezone-naive.
- countdown: Time in seconds into the future that this MR should execute.
- Defaults to zero.
- hooks_class_name: fully qualified name of a hooks.Hooks subclass.
- transactional: Specifies if job should be started as a part of already
- opened transaction.
-
- Returns:
- mapreduce id as string.
- """
- mapper_spec = model.MapperSpec(handler_spec, reader_spec, reader_parameters,
- shard_count)
-
- return handlers.StartJobHandler._start_map(
- name,
- mapper_spec,
- mapreduce_parameters or {},
- base_path=base_path,
- queue_name=queue_name,
- eta=eta,
- countdown=countdown,
- hooks_class_name=hooks_class_name,
- _app=_app,
- transactional=transactional)
diff --git a/mapreduce/handlers.py b/mapreduce/handlers.py
deleted file mode 100755
index 99702c9..0000000
--- a/mapreduce/handlers.py
+++ /dev/null
@@ -1,876 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""Defines executor tasks handlers for MapReduce implementation."""
-
-
-
-# Disable "Invalid method name"
-# pylint: disable-msg=C6409
-
-import datetime
-import logging
-import math
-import os
-from mapreduce.lib import simplejson
-import time
-
-from google.appengine.api import memcache
-from google.appengine.api.labs import taskqueue
-from google.appengine.ext import db
-from mapreduce import base_handler
-from mapreduce import context
-from mapreduce import model
-from mapreduce import quota
-from mapreduce import util
-
-
-# TODO(user): Make this a product of the reader or in quotas.py
-_QUOTA_BATCH_SIZE = 20
-
-# The amount of time to perform scanning in one slice. New slice will be
-# scheduled as soon as current one takes this long.
-_SLICE_DURATION_SEC = 15
-
-# Delay between consecutive controller callback invocations.
-_CONTROLLER_PERIOD_SEC = 2
-
-
-class Error(Exception):
- """Base class for exceptions in this module."""
-
-
-class NotEnoughArgumentsError(Error):
- """Required argument is missing."""
-
-
-class NoDataError(Error):
- """There is no data present for a desired input."""
-
-
-def _run_task_hook(hooks, method, task, queue_name):
- """Invokes hooks.method(task, queue_name).
-
- Args:
- hooks: A hooks.Hooks instance or None.
- method: The name of the method to invoke on the hooks class e.g.
- "enqueue_kickoff_task".
- task: The taskqueue.Task to pass to the hook method.
- queue_name: The name of the queue to pass to the hook method.
-
- Returns:
- True if the hooks.Hooks instance handled the method, False otherwise.
- """
- if hooks is not None:
- try:
- getattr(hooks, method)(task, queue_name)
- except NotImplementedError:
- # Use the default task addition implementation.
- return False
-
- return True
- return False
-
-
-class MapperWorkerCallbackHandler(base_handler.TaskQueueHandler):
- """Callback handler for mapreduce worker task.
-
- Request Parameters:
- mapreduce_spec: MapreduceSpec of the mapreduce serialized to json.
- shard_id: id of the shard.
- slice_id: id of the slice.
- """
-
- def __init__(self, time_function=time.time):
- """Constructor.
-
- Args:
- time_function: time function to use to obtain current time.
- """
- base_handler.TaskQueueHandler.__init__(self)
- self._time = time_function
-
- def handle(self):
- """Handle request."""
- spec = model.MapreduceSpec.from_json_str(
- self.request.get("mapreduce_spec"))
- self._start_time = self._time()
- shard_id = self.shard_id()
-
- # TODO(user): Make this prettier
- logging.debug("post: shard=%s slice=%s headers=%s",
- shard_id, self.slice_id(), self.request.headers)
-
- shard_state, control = db.get([
- model.ShardState.get_key_by_shard_id(shard_id),
- model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
- ])
- if not shard_state:
- # We're letting this task to die. It's up to controller code to
- # reinitialize and restart the task.
- logging.error("State not found for shard ID %r; shutting down",
- shard_id)
- return
-
- if control and control.command == model.MapreduceControl.ABORT:
- logging.info("Abort command received by shard %d of job '%s'",
- shard_state.shard_number, shard_state.mapreduce_id)
- shard_state.active = False
- shard_state.result_status = model.ShardState.RESULT_ABORTED
- shard_state.put()
- model.MapreduceControl.abort(spec.mapreduce_id)
- return
-
- input_reader = self.input_reader(spec.mapper)
-
- if spec.mapper.params.get("enable_quota", True):
- quota_consumer = quota.QuotaConsumer(
- quota.QuotaManager(memcache.Client()),
- shard_id,
- _QUOTA_BATCH_SIZE)
- else:
- quota_consumer = None
-
- ctx = context.Context(spec, shard_state,
- task_retry_count=self.task_retry_count())
- context.Context._set(ctx)
-
- try:
- # consume quota ahead, because we do not want to run a datastore
- # query if there's not enough quota for the shard.
- if not quota_consumer or quota_consumer.check():
- scan_aborted = False
- entity = None
-
- # We shouldn't fetch an entity from the reader if there's not enough
- # quota to process it. Perform all quota checks proactively.
- if not quota_consumer or quota_consumer.consume():
- for entity in input_reader:
- if isinstance(entity, db.Model):
- shard_state.last_work_item = repr(entity.key())
- else:
- shard_state.last_work_item = repr(entity)[:100]
-
- scan_aborted = not self.process_entity(entity, ctx)
-
- # Check if we've got enough quota for the next entity.
- if (quota_consumer and not scan_aborted and
- not quota_consumer.consume()):
- scan_aborted = True
- if scan_aborted:
- break
- else:
- scan_aborted = True
-
-
- if not scan_aborted:
- logging.info("Processing done for shard %d of job '%s'",
- shard_state.shard_number, shard_state.mapreduce_id)
- # We consumed extra quota item at the end of for loop.
- # Just be nice here and give it back :)
- if quota_consumer:
- quota_consumer.put(1)
- shard_state.active = False
- shard_state.result_status = model.ShardState.RESULT_SUCCESS
-
- # TODO(user): Mike said we don't want this happen in case of
- # exception while scanning. Figure out when it's appropriate to skip.
- ctx.flush()
- finally:
- context.Context._set(None)
- if quota_consumer:
- quota_consumer.dispose()
-
- # Rescheduling work should always be the last statement. It shouldn't happen
- # if there were any exceptions in code before it.
- if shard_state.active:
- self.reschedule(spec, input_reader)
-
- def process_entity(self, entity, ctx):
- """Process a single entity.
-
- Call mapper handler on the entity.
-
- Args:
- entity: an entity to process.
- ctx: current execution context.
-
- Returns:
- True if scan should be continued, False if scan should be aborted.
- """
- ctx.counters.increment(context.COUNTER_MAPPER_CALLS)
-
- handler = ctx.mapreduce_spec.mapper.handler
- if util.is_generator_function(handler):
- for result in handler(entity):
- if callable(result):
- result(ctx)
- else:
- try:
- if len(result) == 2:
- logging.error("Collectors not implemented yet")
- else:
- logging.error("Got bad output tuple of length %d", len(result))
- except TypeError:
- logging.error(
- "Handler yielded type %s, expected a callable or a tuple",
- result.__class__.__name__)
- else:
- handler(entity)
-
- if self._time() - self._start_time > _SLICE_DURATION_SEC:
- logging.debug("Spent %s seconds. Rescheduling",
- self._time() - self._start_time)
- return False
- return True
-
- def shard_id(self):
- """Get shard unique identifier of this task from request.
-
- Returns:
- shard identifier as string.
- """
- return str(self.request.get("shard_id"))
-
- def slice_id(self):
- """Get slice unique identifier of this task from request.
-
- Returns:
- slice identifier as int.
- """
- return int(self.request.get("slice_id"))
-
- def input_reader(self, mapper_spec):
- """Get the reader from mapper_spec initialized with the request's state.
-
- Args:
- mapper_spec: a mapper spec containing the immutable mapper state.
-
- Returns:
- An initialized InputReader.
- """
- input_reader_spec_dict = simplejson.loads(
- self.request.get("input_reader_state"))
- return mapper_spec.input_reader_class().from_json(
- input_reader_spec_dict)
-
- @staticmethod
- def worker_parameters(mapreduce_spec,
- shard_id,
- slice_id,
- input_reader):
- """Fill in mapper worker task parameters.
-
- Returned parameters map is to be used as task payload, and it contains
- all the data, required by mapper worker to perform its function.
-
- Args:
- mapreduce_spec: specification of the mapreduce.
- shard_id: id of the shard (part of the whole dataset).
- slice_id: id of the slice (part of the shard).
- input_reader: InputReader containing the remaining inputs for this
- shard.
-
- Returns:
- string->string map of parameters to be used as task payload.
- """
- return {"mapreduce_spec": mapreduce_spec.to_json_str(),
- "shard_id": shard_id,
- "slice_id": str(slice_id),
- "input_reader_state": input_reader.to_json_str()}
-
- @staticmethod
- def get_task_name(shard_id, slice_id):
- """Compute single worker task name.
-
- Args:
- shard_id: id of the shard (part of the whole dataset) as string.
- slice_id: id of the slice (part of the shard) as int.
-
- Returns:
- task name which should be used to process specified shard/slice.
- """
- # Prefix the task name with something unique to this framework's
- # namespace so we don't conflict with user tasks on the queue.
- return "appengine-mrshard-%s-%s" % (shard_id, slice_id)
-
- def reschedule(self, mapreduce_spec, input_reader):
- """Reschedule worker task to continue scanning work.
-
- Args:
- mapreduce_spec: mapreduce specification.
- input_reader: remaining input reader to process.
- """
- MapperWorkerCallbackHandler.schedule_slice(
- self.base_path(), mapreduce_spec, self.shard_id(),
- self.slice_id() + 1, input_reader)
-
- @classmethod
- def schedule_slice(cls,
- base_path,
- mapreduce_spec,
- shard_id,
- slice_id,
- input_reader,
- queue_name=None,
- eta=None,
- countdown=None):
- """Schedule slice scanning by adding it to the task queue.
-
- Args:
- base_path: base_path of mapreduce request handlers as string.
- mapreduce_spec: mapreduce specification as MapreduceSpec.
- shard_id: current shard id as string.
- slice_id: slice id as int.
- input_reader: remaining InputReader for given shard.
- queue_name: Optional queue to run on; uses the current queue of
- execution or the default queue if unspecified.
- eta: Absolute time when the MR should execute. May not be specified
- if 'countdown' is also supplied. This may be timezone-aware or
- timezone-naive.
- countdown: Time in seconds into the future that this MR should execute.
- Defaults to zero.
- """
- task_params = MapperWorkerCallbackHandler.worker_parameters(
- mapreduce_spec, shard_id, slice_id, input_reader)
- task_name = MapperWorkerCallbackHandler.get_task_name(shard_id, slice_id)
- queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
- queue_name or "default")
-
- worker_task = taskqueue.Task(url=base_path + "/worker_callback",
- params=task_params,
- name=task_name,
- eta=eta,
- countdown=countdown)
-
- if not _run_task_hook(mapreduce_spec.get_hooks(),
- "enqueue_worker_task",
- worker_task,
- queue_name):
- try:
- worker_task.add(queue_name)
- except (taskqueue.TombstonedTaskError,
- taskqueue.TaskAlreadyExistsError), e:
- logging.warning("Task %r with params %r already exists. %s: %s",
- task_name, task_params, e.__class__, e)
-
-
-class ControllerCallbackHandler(base_handler.TaskQueueHandler):
- """Supervises mapreduce execution.
-
- Is also responsible for gathering execution status from shards together.
-
- This task is "continuously" running by adding itself again to taskqueue if
- mapreduce is still active.
- """
-
- def __init__(self, time_function=time.time):
- """Constructor.
-
- Args:
- time_function: time function to use to obtain current time.
- """
- base_handler.TaskQueueHandler.__init__(self)
- self._time = time_function
-
- def handle(self):
- """Handle request."""
- spec = model.MapreduceSpec.from_json_str(
- self.request.get("mapreduce_spec"))
-
- # TODO(user): Make this logging prettier.
- logging.debug("post: id=%s headers=%s",
- spec.mapreduce_id, self.request.headers)
-
- state, control = db.get([
- model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
- model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
- ])
- if not state:
- logging.error("State not found for mapreduce_id '%s'; skipping",
- spec.mapreduce_id)
- return
-
- shard_states = model.ShardState.find_by_mapreduce_id(spec.mapreduce_id)
- if state.active and len(shard_states) != spec.mapper.shard_count:
- # Some shards were lost
- logging.error("Incorrect number of shard states: %d vs %d; "
- "aborting job '%s'",
- len(shard_states), spec.mapper.shard_count,
- spec.mapreduce_id)
- state.active = False
- state.result_status = model.MapreduceState.RESULT_FAILED
- model.MapreduceControl.abort(spec.mapreduce_id)
-
- active_shards = [s for s in shard_states if s.active]
- failed_shards = [s for s in shard_states
- if s.result_status == model.ShardState.RESULT_FAILED]
- aborted_shards = [s for s in shard_states
- if s.result_status == model.ShardState.RESULT_ABORTED]
- if state.active:
- state.active = bool(active_shards)
- state.active_shards = len(active_shards)
- state.failed_shards = len(failed_shards)
- state.aborted_shards = len(aborted_shards)
-
- if (not state.active and control and
- control.command == model.MapreduceControl.ABORT):
- # User-initiated abort *after* all shards have completed.
- logging.info("Abort signal received for job '%s'", spec.mapreduce_id)
- state.result_status = model.MapreduceState.RESULT_ABORTED
-
- if not state.active:
- state.active_shards = 0
- if not state.result_status:
- # Set final result status derived from shard states.
- if [s for s in shard_states
- if s.result_status != model.ShardState.RESULT_SUCCESS]:
- state.result_status = model.MapreduceState.RESULT_FAILED
- else:
- state.result_status = model.MapreduceState.RESULT_SUCCESS
- logging.info("Final result for job '%s' is '%s'",
- spec.mapreduce_id, state.result_status)
-
- # We don't need a transaction here, since we change only statistics data,
- # and we don't care if it gets overwritten/slightly inconsistent.
- self.aggregate_state(state, shard_states)
- poll_time = state.last_poll_time
- state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())
-
- if not state.active:
- # This is the last execution.
- # Enqueue done_callback if needed.
- def put_state(state):
- state.put()
- done_callback = spec.params.get(
- model.MapreduceSpec.PARAM_DONE_CALLBACK)
- if done_callback:
- done_task = taskqueue.Task(
- url=done_callback,
- headers={"Mapreduce-Id": spec.mapreduce_id})
- queue_name = spec.params.get(
- model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
- "default")
-
- if not _run_task_hook(spec.get_hooks(),
- "enqueue_done_task",
- done_task,
- queue_name):
- done_task.add(queue_name, transactional=True)
- db.run_in_transaction(put_state, state)
- return
- else:
- state.put()
-
- processing_rate = int(spec.mapper.params.get(
- "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
- self.refill_quotas(poll_time, processing_rate, active_shards)
- ControllerCallbackHandler.reschedule(
- self.base_path(), spec, self.serial_id() + 1)
-
- def aggregate_state(self, mapreduce_state, shard_states):
- """Update current mapreduce state by aggregating shard states.
-
- Args:
- mapreduce_state: current mapreduce state as MapreduceState.
- shard_states: all shard states (active and inactive). list of ShardState.
- """
- processed_counts = []
- mapreduce_state.counters_map.clear()
-
- for shard_state in shard_states:
- mapreduce_state.counters_map.add_map(shard_state.counters_map)
- processed_counts.append(shard_state.counters_map.get(
- context.COUNTER_MAPPER_CALLS))
-
- mapreduce_state.set_processed_counts(processed_counts)
-
- def refill_quotas(self,
- last_poll_time,
- processing_rate,
- active_shard_states):
- """Refill quotas for all active shards.
-
- Args:
- last_poll_time: Datetime with the last time the job state was updated.
- processing_rate: How many items to process per second overall.
- active_shard_states: All active shard states, list of ShardState.
- """
- if not active_shard_states:
- return
- quota_manager = quota.QuotaManager(memcache.Client())
-
- current_time = int(self._time())
- last_poll_time = time.mktime(last_poll_time.timetuple())
- total_quota_refill = processing_rate * max(0, current_time - last_poll_time)
- quota_refill = int(math.ceil(
- 1.0 * total_quota_refill / len(active_shard_states)))
-
- if not quota_refill:
- return
-
- # TODO(user): use batch memcache API to refill quota in one API call.
- for shard_state in active_shard_states:
- quota_manager.put(shard_state.shard_id, quota_refill)
-
- def serial_id(self):
- """Get serial unique identifier of this task from request.
-
- Returns:
- serial identifier as int.
- """
- return int(self.request.get("serial_id"))
-
- @staticmethod
- def get_task_name(mapreduce_spec, serial_id):
- """Compute single controller task name.
-
- Args:
- mapreduce_spec: specification of the mapreduce.
- serial_id: id of the invocation as int.
-
- Returns:
- task name which should be used to process specified shard/slice.
- """
- # Prefix the task name with something unique to this framework's
- # namespace so we don't conflict with user tasks on the queue.
- return "appengine-mrcontrol-%s-%s" % (
- mapreduce_spec.mapreduce_id, serial_id)
-
- @staticmethod
- def controller_parameters(mapreduce_spec, serial_id):
- """Fill in controller task parameters.
-
- Returned parameters map is to be used as task payload, and it contains
- all the data, required by controller to perform its function.
-
- Args:
- mapreduce_spec: specification of the mapreduce.
- serial_id: id of the invocation as int.
-
- Returns:
- string->string map of parameters to be used as task payload.
- """
- return {"mapreduce_spec": mapreduce_spec.to_json_str(),
- "serial_id": str(serial_id)}
-
- @classmethod
- def reschedule(cls, base_path, mapreduce_spec, serial_id, queue_name=None):
- """Schedule new update status callback task.
-
- Args:
- base_path: mapreduce handlers url base path as string.
- mapreduce_spec: mapreduce specification as MapreduceSpec.
- serial_id: id of the invocation as int.
- queue_name: The queue to schedule this task on. Will use the current
- queue of execution if not supplied.
- """
- task_name = ControllerCallbackHandler.get_task_name(
- mapreduce_spec, serial_id)
- task_params = ControllerCallbackHandler.controller_parameters(
- mapreduce_spec, serial_id)
- if not queue_name:
- queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
-
- controller_callback_task = taskqueue.Task(
- url=base_path + "/controller_callback",
- name=task_name, params=task_params,
- countdown=_CONTROLLER_PERIOD_SEC)
-
- if not _run_task_hook(mapreduce_spec.get_hooks(),
- "enqueue_controller_task",
- controller_callback_task,
- queue_name):
- try:
- controller_callback_task.add(queue_name)
- except (taskqueue.TombstonedTaskError,
- taskqueue.TaskAlreadyExistsError), e:
- logging.warning("Task %r with params %r already exists. %s: %s",
- task_name, task_params, e.__class__, e)
-
-
-class KickOffJobHandler(base_handler.TaskQueueHandler):
- """Taskqueue handler which kicks off a mapreduce processing.
-
- Request Parameters:
- mapreduce_spec: MapreduceSpec of the mapreduce serialized to json.
- input_readers: List of InputReaders objects separated by semi-colons.
- """
-
- def handle(self):
- """Handles kick off request."""
- spec = model.MapreduceSpec.from_json_str(
- self._get_required_param("mapreduce_spec"))
- app_id = self.request.get("app", None)
- queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
- mapper_input_reader_class = spec.mapper.input_reader_class()
-
- # StartJobHandler might have already saved the state, but it's OK
- # to override it because we're using the same mapreduce id.
- state = model.MapreduceState.create_new(spec.mapreduce_id)
- state.mapreduce_spec = spec
- state.active = True
- # TODO(user): Initialize UI fields correctly.
- state.char_url = ""
- state.sparkline_url = ""
- if app_id:
- state.app_id = app_id
-
- input_readers = mapper_input_reader_class.split_input(spec.mapper)
- if not input_readers:
- # We don't have any data. Finish map.
- logging.warning("Found no mapper input data to process.")
- state.active = False
- state.active_shards = 0
- state.put()
- return
-
- # Update state and spec with actual shard count.
- spec.mapper.shard_count = len(input_readers)
- state.active_shards = len(input_readers)
- state.mapreduce_spec = spec
- state.put()
-
- KickOffJobHandler._schedule_shards(
- spec, input_readers, queue_name, self.base_path())
-
- ControllerCallbackHandler.reschedule(
- self.base_path(), spec, queue_name=queue_name, serial_id=0)
-
- def _get_required_param(self, param_name):
- """Get a required request parameter.
-
- Args:
- param_name: name of request parameter to fetch.
-
- Returns:
- parameter value
-
- Raises:
- NotEnoughArgumentsError: if parameter is not specified.
- """
- value = self.request.get(param_name)
- if not value:
- raise NotEnoughArgumentsError(param_name + " not specified")
- return value
-
- @classmethod
- def _schedule_shards(cls, spec, input_readers, queue_name, base_path):
- """Prepares shard states and schedules their execution.
-
- Args:
- spec: mapreduce specification as MapreduceSpec.
- input_readers: list of InputReaders describing shard splits.
- queue_name: The queue to run this job on.
- base_path: The base url path of mapreduce callbacks.
- """
- # Note: it's safe to re-attempt this handler because:
- # - shard state has deterministic and unique key.
- # - schedule_slice will fall back gracefully if a task already exists.
- shard_states = []
- for shard_number, input_reader in enumerate(input_readers):
- shard = model.ShardState.create_new(spec.mapreduce_id, shard_number)
- shard.shard_description = str(input_reader)
- shard_states.append(shard)
-
- # Retrievs already existing shards.
- existing_shard_states = db.get(shard.key() for shard in shard_states)
- existing_shard_keys = set(shard.key() for shard in existing_shard_states
- if shard is not None)
-
- # Puts only non-existing shards.
- db.put(shard for shard in shard_states
- if shard.key() not in existing_shard_keys)
-
- for shard_number, input_reader in enumerate(input_readers):
- shard_id = model.ShardState.shard_id_from_number(
- spec.mapreduce_id, shard_number)
- MapperWorkerCallbackHandler.schedule_slice(
- base_path, spec, shard_id, 0, input_reader, queue_name=queue_name)
-
-
-class StartJobHandler(base_handler.PostJsonHandler):
- """Command handler starts a mapreduce job."""
-
- def handle(self):
- """Handles start request."""
- # Mapper spec as form arguments.
- mapreduce_name = self._get_required_param("name")
- mapper_input_reader_spec = self._get_required_param("mapper_input_reader")
- mapper_handler_spec = self._get_required_param("mapper_handler")
- mapper_params = self._get_params(
- "mapper_params_validator", "mapper_params.")
- params = self._get_params(
- "params_validator", "params.")
-
- # Set some mapper param defaults if not present.
- mapper_params["processing_rate"] = int(mapper_params.get(
- "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
- queue_name = mapper_params["queue_name"] = mapper_params.get(
- "queue_name", "default")
-
- # Validate the Mapper spec, handler, and input reader.
- mapper_spec = model.MapperSpec(
- mapper_handler_spec,
- mapper_input_reader_spec,
- mapper_params,
- int(mapper_params.get("shard_count", model._DEFAULT_SHARD_COUNT)))
-
- mapreduce_id = type(self)._start_map(
- mapreduce_name,
- mapper_spec,
- params,
- base_path=self.base_path(),
- queue_name=queue_name,
- _app=mapper_params.get("_app"))
- self.json_response["mapreduce_id"] = mapreduce_id
-
- def _get_params(self, validator_parameter, name_prefix):
- """Retrieves additional user-supplied params for the job and validates them.
-
- Args:
- validator_parameter: name of the request parameter which supplies
- validator for this parameter set.
- name_prefix: common prefix for all parameter names in the request.
-
- Raises:
- Any exception raised by the 'params_validator' request parameter if
- the params fail to validate.
- """
- params_validator = self.request.get(validator_parameter)
-
- user_params = {}
- for key in self.request.arguments():
- if key.startswith(name_prefix):
- values = self.request.get_all(key)
- adjusted_key = key[len(name_prefix):]
- if len(values) == 1:
- user_params[adjusted_key] = values[0]
- else:
- user_params[adjusted_key] = values
-
- if params_validator:
- resolved_validator = util.for_name(params_validator)
- resolved_validator(user_params)
-
- return user_params
-
- def _get_required_param(self, param_name):
- """Get a required request parameter.
-
- Args:
- param_name: name of request parameter to fetch.
-
- Returns:
- parameter value
-
- Raises:
- NotEnoughArgumentsError: if parameter is not specified.
- """
- value = self.request.get(param_name)
- if not value:
- raise NotEnoughArgumentsError(param_name + " not specified")
- return value
-
- @classmethod
- def _start_map(cls, name, mapper_spec,
- mapreduce_params,
- base_path="/mapreduce",
- queue_name="default",
- eta=None,
- countdown=None,
- hooks_class_name=None,
- _app=None,
- transactional=False):
- # Check that handler can be instantiated.
- mapper_spec.get_handler()
-
- # Check that reader can be instantiated and is configured correctly
- mapper_input_reader_class = mapper_spec.input_reader_class()
- mapper_input_reader_class.validate(mapper_spec)
-
- mapreduce_id = model.MapreduceState.new_mapreduce_id()
- mapreduce_spec = model.MapreduceSpec(
- name,
- mapreduce_id,
- mapper_spec.to_json(),
- mapreduce_params,
- hooks_class_name)
-
- kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
- if _app:
- kickoff_params["app"] = _app
- kickoff_worker_task = taskqueue.Task(
- url=base_path + "/kickoffjob_callback",
- params=kickoff_params,
- eta=eta, countdown=countdown)
-
- hooks = mapreduce_spec.get_hooks()
-
- def start_mapreduce():
- if not transactional:
- # Save state in datastore so that UI can see it.
- # We can't save state in foreign transaction, but conventional UI
- # doesn't ask for transactional starts anyway.
- state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
- state.mapreduce_spec = mapreduce_spec
- state.active = True
- state.active_shards = mapper_spec.shard_count
- if _app:
- state.app_id = _app
- state.put()
-
- if hooks is not None:
- try:
- hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
- except NotImplementedError:
- # Use the default task addition implementation.
- pass
- else:
- return
- kickoff_worker_task.add(queue_name, transactional=True)
-
- if transactional:
- start_mapreduce()
- else:
- db.run_in_transaction(start_mapreduce)
-
- return mapreduce_id
-
-
-class CleanUpJobHandler(base_handler.PostJsonHandler):
- """Command to kick off tasks to clean up a job's data."""
-
- def handle(self):
- mapreduce_id = self.request.get("mapreduce_id")
- db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id))
-
- shards = model.ShardState.find_by_mapreduce_id(mapreduce_id)
- db.delete(shards)
-
- db.delete(model.MapreduceState.get_key_by_job_id(mapreduce_id))
-
- self.json_response["status"] = ("Job %s successfully cleaned up." %
- mapreduce_id)
-
-
-class AbortJobHandler(base_handler.PostJsonHandler):
- """Command to abort a running job."""
-
- def handle(self):
- model.MapreduceControl.abort(self.request.get("mapreduce_id"))
- self.json_response["status"] = "Abort signal sent."
diff --git a/mapreduce/hooks.py b/mapreduce/hooks.py
deleted file mode 100755
index 7ab1123..0000000
--- a/mapreduce/hooks.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""API allowing control over some mapreduce implementation details."""
-
-
-
-__all__ = ["Hooks"]
-
-
-class Hooks(object):
- """Allows subclasses to control some aspects of mapreduce execution.
-
- control.start_map accepts an optional "hooks" argument that can be passed a
- subclass of this class.
- """
-
- def __init__(self, mapper):
- """Initializes a Hooks class.
-
- Args:
- mapper: The mapreduce.model.MapperSpec for the current mapreduce.
- """
- self.mapper = mapper
-
- def enqueue_worker_task(self, task, queue_name):
- """Enqueues a worker task that is used to run the mapper.
-
- Args:
- task: A taskqueue.Task that must be queued in order for the mapreduce
- mappers to be run.
- queue_name: The queue where the task should be run e.g. "default".
-
- Raises:
- NotImplementedError: to indicate that the default worker queueing strategy
- should be used.
- """
- raise NotImplementedError()
-
- def enqueue_kickoff_task(self, task, queue_name):
- """Enqueues a task that is used to start the mapreduce.
-
- Args:
- task: A taskqueue.Task that must be queued in order for the mapreduce
- to start.
- queue_name: The queue where the task should be run e.g. "default".
-
- Raises:
- NotImplementedError: to indicate that the default mapreduce start strategy
- should be used.
- """
- raise NotImplementedError()
-
- def enqueue_done_task(self, task, queue_name):
- """Enqueues a task that is triggered when the mapreduce completes.
-
- Args:
- task: A taskqueue.Task that must be queued in order for the client to be
- notified when the mapreduce is complete.
- queue_name: The queue where the task should be run e.g. "default".
-
- Raises:
- NotImplementedError: to indicate that the default mapreduce notification
- strategy should be used.
- """
- raise NotImplementedError()
-
- def enqueue_controller_task(self, task, queue_name):
- """Enqueues a task that is used to monitor the mapreduce process.
-
- Args:
- task: A taskqueue.Task that must be queued in order for updates to the
- mapreduce process to be properly tracked.
- queue_name: The queue where the task should be run e.g. "default".
-
- Raises:
- NotImplementedError: to indicate that the default mapreduce tracking
- strategy should be used.
- """
- raise NotImplementedError()
diff --git a/mapreduce/input_readers.py b/mapreduce/input_readers.py
deleted file mode 100755
index 4c09206..0000000
--- a/mapreduce/input_readers.py
+++ /dev/null
@@ -1,1244 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Defines input readers for MapReduce."""
-
-
-
-# pylint: disable-msg=C6409
-
-import logging
-import math
-import StringIO
-import time
-import zipfile
-
-from google.appengine.api import datastore
-from google.appengine.api import namespace_manager
-# TODO(user): Remove this hack once 1.4.0 is live in production.
-try:
- from google.appengine.datastore import datastore_rpc
-except ImportError:
- datastore_rpc = None
-from mapreduce.lib import blobstore
-from google.appengine.ext import db
-from mapreduce.lib import key_range
-from google.appengine.ext.db import metadata
-from mapreduce import util
-from mapreduce.model import JsonMixin
-
-
-class Error(Exception):
- """Base-class for exceptions in this module."""
-
-
-class BadReaderParamsError(Error):
- """The input parameters to a reader were invalid."""
-
-
-class InputReader(JsonMixin):
- """Abstract base class for input readers.
-
- InputReaders have the following properties:
- * They are created by using the split_input method to generate a set of
- InputReaders from a MapperSpec.
- * They generate inputs to the mapper via the iterator interface.
- * After creation, they can be serialized and resumed using the JsonMixin
- interface.
- * They are cast to string for a user-readable description; it may be
- valuable to implement __str__.
- """
-
- # Mapreduce parameters.
- _APP_PARAM = "_app"
- NAMESPACES_PARAM = "namespaces"
- MAPPER_PARAMS = "mapper_params"
-
- def __iter__(self):
- return self
-
- def next(self):
- """Returns the next input from this input reader as a key, value pair.
-
- Returns:
- The next input from this input reader.
- """
- raise NotImplementedError("next() not implemented in %s" % cls)
-
- @classmethod
- def from_json(cls, input_shard_state):
- """Creates an instance of the InputReader for the given input shard state.
-
- Args:
- input_shard_state: The InputReader state as a dict-like object.
-
- Returns:
- An instance of the InputReader configured using the values of json.
- """
- raise NotImplementedError("from_json() not implemented in %s" % cls)
-
- def to_json(self):
- """Returns an input shard state for the remaining inputs.
-
- Returns:
- A json-izable version of the remaining InputReader.
- """
- raise NotImplementedError("to_json() not implemented in %s" % cls)
-
- @classmethod
- def split_input(cls, mapper_spec):
- """Returns a list of input readers for the input spec.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Returns:
- A list of InputReaders.
- """
- raise NotImplementedError("split_input() not implemented in %s" % cls)
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- raise NotImplementedError("validate() not implemented in %s" % cls)
-
-
-# TODO(user): Use cursor API as soon as we have it available.
-class DatastoreInputReader(InputReader):
- """Represents a range in query results.
-
- DatastoreInputReader yields model instances from the entities in a given key
- range. Iterating over DatastoreInputReader changes its range past consumed
- entries.
-
- The class shouldn't be instantiated directly. Use the split_input class method
- instead.
- """
-
- # Number of entities to fetch at once while doing scanning.
- _BATCH_SIZE = 50
-
- # Maximum number of shards we'll create.
- _MAX_SHARD_COUNT = 256
-
- # Mapreduce parameters.
- ENTITY_KIND_PARAM = "entity_kind"
- KEYS_ONLY_PARAM = "keys_only"
- BATCH_SIZE_PARAM = "batch_size"
- KEY_RANGE_PARAM = "key_range"
-
- # TODO(user): Add support for arbitrary queries. It's not possible to
- # support them without cursors since right now you can't even serialize query
- # definition.
- def __init__(self, entity_kind, key_ranges, batch_size = _BATCH_SIZE):
- """Create new DatastoreInputReader object.
-
- This is internal constructor. Use split_query instead.
-
- Args:
- entity_kind: entity kind as string.
- key_ranges: a sequence of key_range.KeyRange instances to process.
- batch_size: size of read batch as int.
- """
- self._entity_kind = entity_kind
- # Reverse the KeyRanges so they can be processed in order as a stack of
- # work items.
- self._key_ranges = list(reversed(key_ranges))
- self._batch_size = int(batch_size)
-
- def __iter__(self):
- """Create a generator for model instances for entities.
-
- Iterating through entities moves query range past the consumed entities.
-
- Yields:
- next model instance.
- """
- while True:
- if self._current_key_range is None:
- break
-
- while True:
- query = self._current_key_range.make_ascending_query(
- util.for_name(self._entity_kind))
- results = query.fetch(limit=self._batch_size)
-
- if not results:
- self._advance_key_range()
- break
-
- for model_instance in results:
- key = model_instance.key()
-
- self._current_key_range.advance(key)
- yield model_instance
-
- @property
- def _current_key_range(self):
- if self._key_ranges:
- return self._key_ranges[-1]
- else:
- return None
-
- def _advance_key_range(self):
- if self._key_ranges:
- self._key_ranges.pop()
-
- # TODO(user): use query splitting functionality when it becomes available
- # instead.
- @classmethod
- def _split_input_from_namespace(cls, app, namespace, entity_kind_name,
- shard_count):
- """Return KeyRange objects. Helper for _split_input_from_params."""
-
- raw_entity_kind = util.get_short_name(entity_kind_name)
-
- if shard_count == 1:
- # With one shard we don't need to calculate any splitpoints at all.
- return [key_range.KeyRange(namespace=namespace, _app=app)]
-
- # we use datastore.Query instead of ext.db.Query here, because we can't
- # erase ordering on db.Query once we set it.
- ds_query = datastore.Query(kind=raw_entity_kind,
- namespace=namespace,
- _app=app,
- keys_only=True)
- ds_query.Order("__key__")
- first_entity_key_list = ds_query.Get(1)
- if not first_entity_key_list:
- logging.warning("Could not retrieve an entity of type %s.",
- raw_entity_kind)
- return []
- first_entity_key = first_entity_key_list[0]
- ds_query.Order(("__key__", datastore.Query.DESCENDING))
- try:
- last_entity_key, = ds_query.Get(1)
- except db.NeedIndexError, e:
- # TODO(user): Show this error in the worker log, not the app logs.
- logging.warning("Cannot create accurate approximation of keyspace, "
- "guessing instead. Please address this problem: %s", e)
- # TODO(user): Use a key-end hint from the user input parameters
- # in this case, in the event the user has a good way of figuring out
- # the range of the keyspace.
- last_entity_key = key_range.KeyRange.guess_end_key(raw_entity_kind,
- first_entity_key)
- full_keyrange = key_range.KeyRange(
- first_entity_key, last_entity_key, None, True, True,
- namespace=namespace,
- _app=app)
- key_ranges = [full_keyrange]
- number_of_half_splits = int(math.floor(math.log(shard_count, 2)))
- for _ in range(0, number_of_half_splits):
- new_ranges = []
- for r in key_ranges:
- new_ranges += r.split_range(1)
- key_ranges = new_ranges
- return key_ranges
-
- @classmethod
- def _split_input_from_params(cls, app, namespaces, entity_kind_name,
- params, shard_count):
- """Return input reader objects. Helper for split_input."""
- key_ranges = [] # KeyRanges for all namespaces
- for namespace in namespaces:
- key_ranges.extend(
- cls._split_input_from_namespace(app,
- namespace,
- entity_kind_name,
- shard_count))
-
- # Divide the KeyRanges into shard_count shards. The KeyRanges for different
- # namespaces might be very different in size so the assignment of KeyRanges
- # to shards is done round-robin.
- shared_ranges = [[] for _ in range(shard_count)]
- for i, k_range in enumerate(key_ranges):
- shared_ranges[i % shard_count].append(k_range)
- batch_size = int(params.get(cls.BATCH_SIZE_PARAM, cls._BATCH_SIZE))
- return [cls(entity_kind_name, ranges, batch_size)
- for ranges in shared_ranges if ranges]
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- cls._common_validate(mapper_spec)
- params = mapper_spec.params
- keys_only = util.parse_bool(params.get(cls.KEYS_ONLY_PARAM, False))
- if keys_only:
- raise BadReaderParamsError("The keys_only parameter is obsolete. "
- "Use DatastoreKeyInputReader instead.")
-
- entity_kind_name = params[cls.ENTITY_KIND_PARAM]
- # Fail fast if Model cannot be located.
- try:
- util.for_name(entity_kind_name)
- except ImportError, e:
- raise BadReaderParamsError("Bad entity kind: %s" % e)
-
- @classmethod
- def _common_validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Common portion of validate method shared between DatastoreInputReader,
- DatastoreKeyInputReader, and DatastoreEntityInputReader.
-
- Args:
- cls: The class argument from the calling class method.
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- if mapper_spec.input_reader_class() != cls:
- raise BadReaderParamsError("Input reader class mismatch")
- params = mapper_spec.params
- if cls.ENTITY_KIND_PARAM not in params:
- raise BadReaderParamsError("Missing mapper parameter 'entity_kind'")
- if cls.BATCH_SIZE_PARAM in params:
- try:
- batch_size = int(params[cls.BATCH_SIZE_PARAM])
- if batch_size < 1:
- raise BadReaderParamsError("Bad batch size: %s" % batch_size)
- except ValueError, e:
- raise BadReaderParamsError("Bad batch size: %s" % e)
- if cls.NAMESPACES_PARAM in params:
- if isinstance(params[cls.NAMESPACES_PARAM], (str, unicode)):
- pass
- elif isinstance(params[cls.NAMESPACES_PARAM], list):
- for namespace in params[cls.NAMESPACES_PARAM]:
- if not isinstance(namespace, (str, unicode)):
- raise BadReaderParamsError(
- "Bad namespace list: expected a list of strings")
- else:
- raise BadReaderParamsError(
- "Bad namespace list: expected a list of strings")
-
- @classmethod
- def split_input(cls, mapper_spec):
- """Splits query into shards without fetching query results.
-
- Tries as best as it can to split the whole query result set into equal
- shards. Due to difficulty of making the perfect split, resulting shards'
- sizes might differ significantly from each other. The actual number of
- shards might also be less then requested (even 1), though it is never
- greater.
-
- Current implementation does key-lexicographic order splitting. It requires
- query not to specify any __key__-based ordering. If an index for
- query.order('-__key__') query is not present, an inaccurate guess at
- sharding will be made by splitting the full key range.
-
- Args:
- mapper_spec: MapperSpec with params containing 'entity_kind'.
- May have 'namespaces' in the params as either a list of namespace
- strings or a comma-seperated list of namespaces. If specified then the
- input reader will only yield entities in the given namespaces. If
- 'namespaces' is not given then the current namespace will be used. May
- also have 'batch_size' in the params to specify the number of entities
- to process in each batch.
-
- Returns:
- A list of InputReader objects of length <= number_of_shards. These
- may be DatastoreInputReader or DatastoreKeyInputReader objects.
- """
- params = mapper_spec.params
- entity_kind_name = params[cls.ENTITY_KIND_PARAM]
- shard_count = mapper_spec.shard_count
- namespaces = params.get(cls.NAMESPACES_PARAM,
- [namespace_manager.get_namespace()])
- if isinstance(namespaces, (str, unicode)):
- namespaces = namespaces.split(",")
- app = params.get(cls._APP_PARAM)
-
- return cls._split_input_from_params(
- app, namespaces, entity_kind_name, params, shard_count)
-
- def to_json(self):
- """Serializes all the data in this query range into json form.
-
- Returns:
- all the data in json-compatible map.
- """
- json_dict = {self.KEY_RANGE_PARAM: [k.to_json() for k in self._key_ranges],
- self.ENTITY_KIND_PARAM: self._entity_kind,
- self.BATCH_SIZE_PARAM: self._batch_size}
- return json_dict
-
- def __str__(self):
- """Returns the string representation of this DatastoreInputReader."""
- return repr(self._key_ranges)
-
- @classmethod
- def from_json(cls, json):
- """Create new DatastoreInputReader from the json, encoded by to_json.
-
- Args:
- json: json map representation of DatastoreInputReader.
-
- Returns:
- an instance of DatastoreInputReader with all data deserialized from json.
- """
- query_range = cls(
- json[cls.ENTITY_KIND_PARAM],
- [key_range.KeyRange.from_json(k) for k in json[cls.KEY_RANGE_PARAM]],
- json[cls.BATCH_SIZE_PARAM])
- return query_range
-
-
-class DatastoreKeyInputReader(DatastoreInputReader):
- """An input reader which takes a Kind and yields Keys for that kind."""
-
- def __iter__(self):
- """Create a generator for keys in the range.
-
- Iterating through entries moves query range past the consumed entries.
-
- Yields:
- next entry.
- """
- raw_entity_kind = util.get_short_name(self._entity_kind)
- while True:
- if self._current_key_range is None:
- break
-
- while True:
- query = self._current_key_range.make_ascending_datastore_query(
- raw_entity_kind, keys_only=True)
- results = query.Get(limit=self._batch_size)
-
- if not results:
- self._advance_key_range()
- break
-
- for key in results:
- self._current_key_range.advance(key)
- yield key
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- cls._common_validate(mapper_spec)
-
-
-class DatastoreEntityInputReader(DatastoreInputReader):
- """An input reader which yields low level datastore entities for a kind."""
-
- def __iter__(self):
- """Create a generator for low level entities in the range.
-
- Iterating through entries moves query range past the consumed entries.
-
- Yields:
- next entry.
- """
- raw_entity_kind = util.get_short_name(self._entity_kind)
- while True:
- if self._current_key_range is None:
- break
-
- while True:
- query = self._current_key_range.make_ascending_datastore_query(
- raw_entity_kind)
- results = query.Get(limit=self._batch_size)
-
- if not results:
- self._advance_key_range()
- break
-
- for entity in results:
- self._current_key_range.advance(entity.key())
- yield entity
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- cls._common_validate(mapper_spec)
-
-
-class BlobstoreLineInputReader(InputReader):
- """Input reader for a newline delimited blob in Blobstore."""
-
- # TODO(user): Should we set this based on MAX_BLOB_FETCH_SIZE?
- _BLOB_BUFFER_SIZE = 64000
-
- # Maximum number of shards to allow.
- _MAX_SHARD_COUNT = 256
-
- # Maximum number of blobs to allow.
- _MAX_BLOB_KEYS_COUNT = 246
-
- # Mapreduce parameters.
- BLOB_KEYS_PARAM = "blob_keys"
-
- # Serialization parmaeters.
- INITIAL_POSITION_PARAM = "initial_position"
- END_POSITION_PARAM = "end_position"
- BLOB_KEY_PARAM = "blob_key"
-
- def __init__(self, blob_key, start_position, end_position):
- """Initializes this instance with the given blob key and character range.
-
- This BlobstoreInputReader will read from the first record starting after
- strictly after start_position until the first record ending at or after
- end_position (exclusive). As an exception, if start_position is 0, then
- this InputReader starts reading at the first record.
-
- Args:
- blob_key: the BlobKey that this input reader is processing.
- start_position: the position to start reading at.
- end_position: a position in the last record to read.
- """
- self._blob_key = blob_key
- self._blob_reader = blobstore.BlobReader(blob_key,
- self._BLOB_BUFFER_SIZE,
- start_position)
- self._end_position = end_position
- self._has_iterated = False
- self._read_before_start = bool(start_position)
-
- def next(self):
- """Returns the next input from as an (offset, line) tuple."""
- self._has_iterated = True
-
- if self._read_before_start:
- self._blob_reader.readline()
- self._read_before_start = False
- start_position = self._blob_reader.tell()
-
- if start_position >= self._end_position:
- raise StopIteration()
-
- line = self._blob_reader.readline()
-
- if not line:
- raise StopIteration()
-
- return start_position, line.rstrip("\n")
-
- def to_json(self):
- """Returns an json-compatible input shard spec for remaining inputs."""
- new_pos = self._blob_reader.tell()
- if self._has_iterated:
- new_pos -= 1
- return {self.BLOB_KEY_PARAM: self._blob_key,
- self.INITIAL_POSITION_PARAM: new_pos,
- self.END_POSITION_PARAM: self._end_position}
-
- def __str__(self):
- """Returns the string representation of this BlobstoreLineInputReader."""
- return "blobstore.BlobKey(%r):[%d, %d]" % (
- self._blob_key, self._blob_reader.tell(), self._end_position)
-
- @classmethod
- def from_json(cls, json):
- """Instantiates an instance of this InputReader for the given shard spec."""
- return cls(json[cls.BLOB_KEY_PARAM],
- json[cls.INITIAL_POSITION_PARAM],
- json[cls.END_POSITION_PARAM])
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- if mapper_spec.input_reader_class() != cls:
- raise BadReaderParamsError("Mapper input reader class mismatch")
- params = mapper_spec.params
- if cls.BLOB_KEYS_PARAM not in params:
- raise BadReaderParamsError("Must specify 'blob_keys' for mapper input")
- blob_keys = params[cls.BLOB_KEYS_PARAM]
- if isinstance(blob_keys, basestring):
- # This is a mechanism to allow multiple blob keys (which do not contain
- # commas) in a single string. It may go away.
- blob_keys = blob_keys.split(",")
- if len(blob_keys) > cls._MAX_BLOB_KEYS_COUNT:
- raise BadReaderParamsError("Too many 'blob_keys' for mapper input")
- if not blob_keys:
- raise BadReaderParamsError("No 'blob_keys' specified for mapper input")
- for blob_key in blob_keys:
- blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key))
- if not blob_info:
- raise BadReaderParamsError("Could not find blobinfo for key %s" %
- blob_key)
-
- @classmethod
- def split_input(cls, mapper_spec):
- """Returns a list of shard_count input_spec_shards for input_spec.
-
- Args:
- mapper_spec: The mapper specification to split from. Must contain
- 'blob_keys' parameter with one or more blob keys.
-
- Returns:
- A list of BlobstoreInputReaders corresponding to the specified shards.
- """
- params = mapper_spec.params
- blob_keys = params[cls.BLOB_KEYS_PARAM]
- if isinstance(blob_keys, basestring):
- # This is a mechanism to allow multiple blob keys (which do not contain
- # commas) in a single string. It may go away.
- blob_keys = blob_keys.split(",")
-
- blob_sizes = {}
- for blob_key in blob_keys:
- blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key))
- blob_sizes[blob_key] = blob_info.size
-
- shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count)
- shards_per_blob = shard_count // len(blob_keys)
- if shards_per_blob == 0:
- shards_per_blob = 1
-
- chunks = []
- for blob_key, blob_size in blob_sizes.items():
- blob_chunk_size = blob_size // shards_per_blob
- for i in xrange(shards_per_blob - 1):
- chunks.append(BlobstoreLineInputReader.from_json(
- {cls.BLOB_KEY_PARAM: blob_key,
- cls.INITIAL_POSITION_PARAM: blob_chunk_size * i,
- cls.END_POSITION_PARAM: blob_chunk_size * (i + 1)}))
- chunks.append(BlobstoreLineInputReader.from_json(
- {cls.BLOB_KEY_PARAM: blob_key,
- cls.INITIAL_POSITION_PARAM: blob_chunk_size * (shards_per_blob - 1),
- cls.END_POSITION_PARAM: blob_size}))
- return chunks
-
-
-class BlobstoreZipInputReader(InputReader):
- """Input reader for files from a zip archive stored in the Blobstore.
-
- Each instance of the reader will read the TOC, from the end of the zip file,
- and then only the contained files which it is responsible for.
- """
-
- # Maximum number of shards to allow.
- _MAX_SHARD_COUNT = 256
-
- # Mapreduce parameters.
- BLOB_KEY_PARAM = "blob_key"
- START_INDEX_PARAM = "start_index"
- END_INDEX_PARAM = "end_index"
-
- def __init__(self, blob_key, start_index, end_index,
- _reader=blobstore.BlobReader):
- """Initializes this instance with the given blob key and file range.
-
- This BlobstoreZipInputReader will read from the file with index start_index
- up to but not including the file with index end_index.
-
- Args:
- blob_key: the BlobKey that this input reader is processing.
- start_index: the index of the first file to read.
- end_index: the index of the first file that will not be read.
- _reader: a callable that returns a file-like object for reading blobs.
- Used for dependency injection.
- """
- self._blob_key = blob_key
- self._start_index = start_index
- self._end_index = end_index
- self._reader = _reader
- self._zip = None
- self._entries = None
-
- def next(self):
- """Returns the next input from this input reader as (ZipInfo, opener) tuple.
-
- Returns:
- The next input from this input reader, in the form of a 2-tuple.
- The first element of the tuple is a zipfile.ZipInfo object.
- The second element of the tuple is a zero-argument function that, when
- called, returns the complete body of the file.
- """
- if not self._zip:
- self._zip = zipfile.ZipFile(self._reader(self._blob_key))
- # Get a list of entries, reversed so we can pop entries off in order
- self._entries = self._zip.infolist()[self._start_index:self._end_index]
- self._entries.reverse()
- if not self._entries:
- raise StopIteration()
- entry = self._entries.pop()
- self._start_index += 1
- return (entry, lambda: self._zip.read(entry.filename))
-
- @classmethod
- def from_json(cls, json):
- """Creates an instance of the InputReader for the given input shard state.
-
- Args:
- json: The InputReader state as a dict-like object.
-
- Returns:
- An instance of the InputReader configured using the values of json.
- """
- return cls(json[cls.BLOB_KEY_PARAM],
- json[cls.START_INDEX_PARAM],
- json[cls.END_INDEX_PARAM])
-
- def to_json(self):
- """Returns an input shard state for the remaining inputs.
-
- Returns:
- A json-izable version of the remaining InputReader.
- """
- return {self.BLOB_KEY_PARAM: self._blob_key,
- self.START_INDEX_PARAM: self._start_index,
- self.END_INDEX_PARAM: self._end_index}
-
- def __str__(self):
- """Returns the string representation of this BlobstoreZipInputReader."""
- return "blobstore.BlobKey(%r):[%d, %d]" % (
- self._blob_key, self._start_index, self._end_index)
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- if mapper_spec.input_reader_class() != cls:
- raise BadReaderParamsError("Mapper input reader class mismatch")
- params = mapper_spec.params
- if cls.BLOB_KEY_PARAM not in params:
- raise BadReaderParamsError("Must specify 'blob_key' for mapper input")
- blob_key = params[cls.BLOB_KEY_PARAM]
- blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key))
- if not blob_info:
- raise BadReaderParamsError("Could not find blobinfo for key %s" %
- blob_key)
-
-
- @classmethod
- def split_input(cls, mapper_spec, _reader=blobstore.BlobReader):
- """Returns a list of input shard states for the input spec.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader. Must contain
- 'blob_key' parameter with one blob key.
- _reader: a callable that returns a file-like object for reading blobs.
- Used for dependency injection.
-
- Returns:
- A list of InputReaders spanning files within the zip.
- """
- params = mapper_spec.params
- blob_key = params[cls.BLOB_KEY_PARAM]
- zip_input = zipfile.ZipFile(_reader(blob_key))
- files = zip_input.infolist()
- total_size = sum(x.file_size for x in files)
- num_shards = min(mapper_spec.shard_count, cls._MAX_SHARD_COUNT)
- size_per_shard = total_size // num_shards
-
- # Break the list of files into sublists, each of approximately
- # size_per_shard bytes.
- shard_start_indexes = [0]
- current_shard_size = 0
- for i, fileinfo in enumerate(files):
- current_shard_size += fileinfo.file_size
- if current_shard_size >= size_per_shard:
- shard_start_indexes.append(i + 1)
- current_shard_size = 0
-
- if shard_start_indexes[-1] != len(files):
- shard_start_indexes.append(len(files))
-
- return [cls(blob_key, start_index, end_index, _reader)
- for start_index, end_index
- in zip(shard_start_indexes, shard_start_indexes[1:])]
-
-
-class BlobstoreZipLineInputReader(InputReader):
- """Input reader for newline delimited files in zip archives from Blobstore.
-
- This has the same external interface as the BlobstoreLineInputReader, in that
- it takes a list of blobs as its input and yields lines to the reader.
- However the blobs themselves are expected to be zip archives of line delimited
- files instead of the files themselves.
-
- This is useful as many line delimited files gain greatly from compression.
- """
-
- # Maximum number of shards to allow.
- _MAX_SHARD_COUNT = 256
-
- # Maximum number of blobs to allow.
- _MAX_BLOB_KEYS_COUNT = 246
-
- # Mapreduce parameters.
- BLOB_KEYS_PARAM = "blob_keys"
-
- # Serialization parameters.
- BLOB_KEY_PARAM = "blob_key"
- START_FILE_INDEX_PARAM = "start_file_index"
- END_FILE_INDEX_PARAM = "end_file_index"
- OFFSET_PARAM = "offset"
-
- def __init__(self, blob_key, start_file_index, end_file_index, offset,
- _reader=blobstore.BlobReader):
- """Initializes this instance with the given blob key and file range.
-
- This BlobstoreZipLineInputReader will read from the file with index
- start_file_index up to but not including the file with index end_file_index.
- It will return lines starting at offset within file[start_file_index]
-
- Args:
- blob_key: the BlobKey that this input reader is processing.
- start_file_index: the index of the first file to read within the zip.
- end_file_index: the index of the first file that will not be read.
- offset: the byte offset within blob_key.zip[start_file_index] to start
- reading. The reader will continue to the end of the file.
- _reader: a callable that returns a file-like object for reading blobs.
- Used for dependency injection.
- """
- self._blob_key = blob_key
- self._start_file_index = start_file_index
- self._end_file_index = end_file_index
- self._initial_offset = offset
- self._reader = _reader
- self._zip = None
- self._entries = None
- self._filestream = None
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec and all mapper parameters.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- if mapper_spec.input_reader_class() != cls:
- raise BadReaderParamsError("Mapper input reader class mismatch")
- params = mapper_spec.params
- if cls.BLOB_KEYS_PARAM not in params:
- raise BadReaderParamsError("Must specify 'blob_key' for mapper input")
-
- blob_keys = params[cls.BLOB_KEYS_PARAM]
- if isinstance(blob_keys, basestring):
- # This is a mechanism to allow multiple blob keys (which do not contain
- # commas) in a single string. It may go away.
- blob_keys = blob_keys.split(",")
- if len(blob_keys) > cls._MAX_BLOB_KEYS_COUNT:
- raise BadReaderParamsError("Too many 'blob_keys' for mapper input")
- if not blob_keys:
- raise BadReaderParamsError("No 'blob_keys' specified for mapper input")
- for blob_key in blob_keys:
- blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key))
- if not blob_info:
- raise BadReaderParamsError("Could not find blobinfo for key %s" %
- blob_key)
-
- @classmethod
- def split_input(cls, mapper_spec, _reader=blobstore.BlobReader):
- """Returns a list of input readers for the input spec.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader. Must contain
- 'blob_keys' parameter with one or more blob keys.
- _reader: a callable that returns a file-like object for reading blobs.
- Used for dependency injection.
-
- Returns:
- A list of InputReaders spanning the subfiles within the blobs.
- There will be at least one reader per blob, but it will otherwise
- attempt to keep the expanded size even.
- """
- params = mapper_spec.params
- blob_keys = params[cls.BLOB_KEYS_PARAM]
- if isinstance(blob_keys, basestring):
- # This is a mechanism to allow multiple blob keys (which do not contain
- # commas) in a single string. It may go away.
- blob_keys = blob_keys.split(",")
-
- blob_files = {}
- total_size = 0
- for blob_key in blob_keys:
- zip_input = zipfile.ZipFile(_reader(blob_key))
- blob_files[blob_key] = zip_input.infolist()
- total_size += sum(x.file_size for x in blob_files[blob_key])
-
- shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count)
-
- # We can break on both blob key and file-within-zip boundaries.
- # A shard will span at minimum a single blob key, but may only
- # handle a few files within a blob.
-
- size_per_shard = total_size // shard_count
-
- readers = []
- for blob_key in blob_keys:
- files = blob_files[blob_key]
- current_shard_size = 0
- start_file_index = 0
- next_file_index = 0
- for fileinfo in files:
- next_file_index += 1
- current_shard_size += fileinfo.file_size
- if current_shard_size >= size_per_shard:
- readers.append(cls(blob_key, start_file_index, next_file_index, 0,
- _reader))
- current_shard_size = 0
- start_file_index = next_file_index
- if current_shard_size != 0:
- readers.append(cls(blob_key, start_file_index, next_file_index, 0,
- _reader))
-
- return readers
-
- def next(self):
- """Returns the next line from this input reader as (lineinfo, line) tuple.
-
- Returns:
- The next input from this input reader, in the form of a 2-tuple.
- The first element of the tuple describes the source, it is itself
- a tuple (blobkey, filenumber, byteoffset).
- The second element of the tuple is the line found at that offset.
- """
- if not self._filestream:
- if not self._zip:
- self._zip = zipfile.ZipFile(self._reader(self._blob_key))
- # Get a list of entries, reversed so we can pop entries off in order
- self._entries = self._zip.infolist()[self._start_file_index:
- self._end_file_index]
- self._entries.reverse()
- if not self._entries:
- raise StopIteration()
- entry = self._entries.pop()
- value = self._zip.read(entry.filename)
- self._filestream = StringIO.StringIO(value)
- if self._initial_offset:
- self._filestream.seek(self._initial_offset)
- self._filestream.readline()
-
- start_position = self._filestream.tell()
- line = self._filestream.readline()
-
- if not line:
- # Done with this file in the zip. Move on to the next file.
- self._filestream.close()
- self._filestream = None
- self._start_file_index += 1
- self._initial_offset = 0
- return self.next()
-
- return ((self._blob_key, self._start_file_index, start_position),
- line.rstrip("\n"))
-
- def _next_offset(self):
- """Return the offset of the next line to read."""
- if self._filestream:
- offset = self._filestream.tell()
- if offset:
- offset -= 1
- else:
- offset = self._initial_offset
-
- return offset
-
- def to_json(self):
- """Returns an input shard state for the remaining inputs.
-
- Returns:
- A json-izable version of the remaining InputReader.
- """
-
- return {self.BLOB_KEY_PARAM: self._blob_key,
- self.START_FILE_INDEX_PARAM: self._start_file_index,
- self.END_FILE_INDEX_PARAM: self._end_file_index,
- self.OFFSET_PARAM: self._next_offset()}
-
- @classmethod
- def from_json(cls, json, _reader=blobstore.BlobReader):
- """Creates an instance of the InputReader for the given input shard state.
-
- Args:
- json: The InputReader state as a dict-like object.
- _reader: For dependency injection.
-
- Returns:
- An instance of the InputReader configured using the values of json.
- """
- return cls(json[cls.BLOB_KEY_PARAM],
- json[cls.START_FILE_INDEX_PARAM],
- json[cls.END_FILE_INDEX_PARAM],
- json[cls.OFFSET_PARAM],
- _reader)
-
- def __str__(self):
- """Returns the string representation of this reader.
-
- Returns:
- string blobkey:[start file num, end file num]:current offset.
- """
- return "blobstore.BlobKey(%r):[%d, %d]:%d" % (
- self._blob_key, self._start_file_index, self._end_file_index,
- self._next_offset())
-
-
-class ConsistentKeyReader(DatastoreKeyInputReader):
- """A key reader which reads consistent data from datastore.
-
- Datastore might have entities which were written, but not visible through
- queries for some time. Typically these entities can be only read inside
- transaction until they are 'applied'.
-
- This reader reads all keys even if they are not visible. It might take
- significant time to start yielding some data because it has to apply all
- modifications created before its start.
- """
- START_TIME_US_PARAM = 'start_time_us'
- UNAPPLIED_LOG_FILTER = '__unapplied_log_timestamp_us__ <'
- DUMMY_KIND = 'DUMMY_KIND'
- DUMMY_ID = 106275677020293L
-
- def __init__(self,
- entity_kind,
- key_range_param,
- batch_size=DatastoreKeyInputReader._BATCH_SIZE,
- start_time_us=None):
- """Constructor.
-
- Args:
- entity_kind: Kind of entity to read as string.
- key_range_param: Key range to scan through as key_range.KeyRange.
- batch_size: Size of single batch read (number of entities).
- start_time_us: Start time of the reader (as given by time.time()
- function). It will apply all unapplied jobs created before it was
- started.
- """
- DatastoreInputReader.__init__(
- self, entity_kind, key_range_param, batch_size)
- self.start_time_us = start_time_us
-
- def __iter__(self):
- """Iterates over the keys in the given KeyRanges.
-
- Yields:
- A db.Key instance for each key in the given key range, starting with
- keys for unapplied jobs.
- """
- while True: # Iterates over each key range.
- if self._current_key_range is None:
- break
-
- # TODO(user): Remove this hack once 1.4.0 is live in production.
- if datastore_rpc:
- self._apply_jobs()
-
- while True: # Iterates over each key in the current key range.
- # Fetches the next batch of the result keys.
- query = self._current_key_range.make_ascending_datastore_query(
- kind=self._entity_kind, keys_only=True)
- keys = query.Get(limit=self._batch_size)
-
- # No results, this shard is complete.
- if not keys:
- self._advance_key_range()
- break
-
- # All good, now we can feed the mapper.
- for key in keys:
- self._current_key_range.advance(key)
- yield key
-
- def _apply_jobs(self):
- """Apply all jobs in current key range."""
- while True:
- # Creates an unapplied query and fetches unapplied jobs in the result
- # range.
- unapplied_query = self._current_key_range.make_ascending_datastore_query(
- kind=None, keys_only=True)
- unapplied_query[
- ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us
- unapplied_jobs = unapplied_query.Get(limit=self._batch_size)
-
- if not unapplied_jobs:
- return
-
- # There were some unapplied jobs. Roll them forward.
- keys_to_apply = []
- for key in unapplied_jobs:
- # To apply the entity group we need to read something from it.
- # We use dummy kind and id because we don't actually need any data.
- path = key.to_path() + [ConsistentKeyReader.DUMMY_KIND,
- ConsistentKeyReader.DUMMY_ID]
- keys_to_apply.append(
- db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path))
- db.get(keys_to_apply, config=datastore_rpc.Configuration(
- deadline=10,
- read_policy=datastore_rpc.Configuration.APPLY_ALL_JOBS_CONSISTENCY))
-
-
- @classmethod
- def _split_input_from_namespace(cls,
- app,
- namespace,
- entity_kind_name,
- shard_count):
- key_ranges = super(ConsistentKeyReader, cls)._split_input_from_namespace(
- app, namespace, entity_kind_name, shard_count)
-
- # The KeyRanges calculated by the base class may not include keys for
- # entities that have unapplied jobs. So use an open key range for the first
- # and last KeyRanges to ensure that they will be processed.
- if key_ranges:
- key_ranges[0].key_start = None
- key_ranges[0].include_start = False
- key_ranges[-1].key_end = None
- key_ranges[-1].include_end = False
- return key_ranges
-
- @classmethod
- def _split_input_from_params(cls, app, namespaces, entity_kind_name,
- params, shard_count):
- readers = super(ConsistentKeyReader, cls)._split_input_from_params(app,
- namespaces,
- entity_kind_name,
- params,
- shard_count)
-
- # We always produce at least one key range because:
- # a) there might be unapplied entities
- # b) it simplifies mapper code
- if not readers:
- key_ranges = [key_range.KeyRange(namespace=namespace, _app=app)
- for namespace in namespaces]
- readers = [cls(entity_kind_name, key_ranges)]
-
- return readers
-
- @classmethod
- def split_input(cls, mapper_spec):
- """Splits input into key ranges."""
- readers = super(ConsistentKeyReader, cls).split_input(mapper_spec)
-
- start_time_us = mapper_spec.params.get(
- cls.START_TIME_US_PARAM, long(time.time() * 1e6))
- for reader in readers:
- reader.start_time_us = start_time_us
- return readers
-
- def to_json(self):
- """Serializes all the data in this reader into json form.
-
- Returns:
- all the data in json-compatible map.
- """
- json_dict = {self.KEY_RANGE_PARAM: [k.to_json() for k in self._key_ranges],
- self.ENTITY_KIND_PARAM: self._entity_kind,
- self.BATCH_SIZE_PARAM: self._batch_size,
- self.START_TIME_US_PARAM: self.start_time_us}
- return json_dict
-
- @classmethod
- def from_json(cls, json):
- """Create new ConsistentKeyReader from the json, encoded by to_json.
-
- Args:
- json: json map representation of ConsistentKeyReader.
-
- Returns:
- an instance of ConsistentKeyReader with all data deserialized from json.
- """
- query_range = cls(
- json[cls.ENTITY_KIND_PARAM],
- [key_range.KeyRange.from_json(k) for k in json[cls.KEY_RANGE_PARAM]],
- json[cls.BATCH_SIZE_PARAM],
- json[cls.START_TIME_US_PARAM])
- return query_range
-
-
-# TODO(user): This reader always produces only one shard, because
-# namespace entities use the mix of ids/names, and KeyRange-based splitting
-# doesn't work satisfactory in this case.
-# It's possible to implement specific splitting functionality for the reader
-# instead of reusing generic one. Meanwhile 1 shard is enough for our
-# applications.
-class NamespaceInputReader(DatastoreKeyInputReader):
- """An input reader to iterate over namespaces.
-
- This reader yields namespace names as string.
- It will always produce only one shard.
- """
-
- @classmethod
- def validate(cls, mapper_spec):
- """Validates mapper spec.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Raises:
- BadReaderParamsError: required parameters are missing or invalid.
- """
- mapper_spec.params[cls.ENTITY_KIND_PARAM] = metadata.Namespace.kind()
- mapper_spec.shard_count = 1
- cls._common_validate(mapper_spec)
-
- @classmethod
- def split_input(cls, mapper_spec):
- """Returns a list of input readers for the input spec.
-
- Args:
- mapper_spec: The MapperSpec for this InputReader.
-
- Returns:
- A list of InputReaders.
- """
- mapper_spec.params[cls.ENTITY_KIND_PARAM] = metadata.Namespace.kind()
- mapper_spec.shard_count = 1
- return super(DatastoreKeyInputReader, cls).split_input(mapper_spec)
-
- def __iter__(self):
- for key in DatastoreKeyInputReader.__iter__(self):
- yield metadata.Namespace.key_to_namespace(key)
diff --git a/mapreduce/lib/__init__.py b/mapreduce/lib/__init__.py
deleted file mode 100755
index 6c49c42..0000000
--- a/mapreduce/lib/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/mapreduce/lib/blobstore/__init__.py b/mapreduce/lib/blobstore/__init__.py
deleted file mode 100755
index 769e2de..0000000
--- a/mapreduce/lib/blobstore/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-
-
-"""Blobstore API module."""
-
-from blobstore import *
diff --git a/mapreduce/lib/blobstore/blobstore.py b/mapreduce/lib/blobstore/blobstore.py
deleted file mode 100755
index 3b7184c..0000000
--- a/mapreduce/lib/blobstore/blobstore.py
+++ /dev/null
@@ -1,745 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-
-
-"""A Python blobstore API used by app developers.
-
-Contains methods used to interface with Blobstore API. Includes db.Model-like
-class representing a reference to a very large BLOB. Imports db.Key-like
-class representing a blob-key.
-"""
-
-
-
-
-
-
-
-import cgi
-import email
-import os
-
-from google.appengine.api import datastore
-from google.appengine.api import datastore_errors
-from google.appengine.api import datastore_types
-from google.appengine.api.blobstore import blobstore
-from google.appengine.ext import db
-
-__all__ = ['BLOB_INFO_KIND',
- 'BLOB_KEY_HEADER',
- 'BLOB_RANGE_HEADER',
- 'BlobFetchSizeTooLargeError',
- 'BlobInfo',
- 'BlobInfoParseError',
- 'BlobKey',
- 'BlobNotFoundError',
- 'BlobReferenceProperty',
- 'BlobReader',
- 'DataIndexOutOfRangeError',
- 'Error',
- 'InternalError',
- 'MAX_BLOB_FETCH_SIZE',
- 'UPLOAD_INFO_CREATION_HEADER',
- 'create_upload_url',
- 'delete',
- 'fetch_data',
- 'get',
- 'parse_blob_info']
-
-Error = blobstore.Error
-InternalError = blobstore.InternalError
-BlobFetchSizeTooLargeError = blobstore.BlobFetchSizeTooLargeError
-BlobNotFoundError = blobstore.BlobNotFoundError
-_CreationFormatError = blobstore._CreationFormatError
-DataIndexOutOfRangeError = blobstore.DataIndexOutOfRangeError
-
-BlobKey = blobstore.BlobKey
-create_upload_url = blobstore.create_upload_url
-delete = blobstore.delete
-
-
-class BlobInfoParseError(Error):
- """CGI parameter does not contain valid BlobInfo record."""
-
-
-BLOB_INFO_KIND = blobstore.BLOB_INFO_KIND
-BLOB_KEY_HEADER = blobstore.BLOB_KEY_HEADER
-BLOB_RANGE_HEADER = blobstore.BLOB_RANGE_HEADER
-MAX_BLOB_FETCH_SIZE = blobstore.MAX_BLOB_FETCH_SIZE
-UPLOAD_INFO_CREATION_HEADER = blobstore.UPLOAD_INFO_CREATION_HEADER
-
-
-
-class _GqlQuery(db.GqlQuery):
- """GqlQuery class that explicitly sets model-class.
-
- This does the same as the original db.GqlQuery class except that it does
- not try to find the model class based on the compiled GQL query. The
- caller instead provides the query with a model class to use for construction.
-
- This class is required for compatibility with the current db.py query
- mechanism but will be removed in the future. DO NOT USE.
- """
-
-
- def __init__(self, query_string, model_class, *args, **kwds):
- """Constructor.
-
- Args:
- query_string: Properly formatted GQL query string.
- model_class: Model class from which entities are constructed.
- *args: Positional arguments used to bind numeric references in the query.
- **kwds: Dictionary-based arguments for named references.
- """
-
-
- from google.appengine.ext import gql
- app = kwds.pop('_app', None)
- self._proto_query = gql.GQL(query_string, _app=app, namespace='')
-
- super(db.GqlQuery, self).__init__(model_class, namespace='')
- self.bind(*args, **kwds)
-
-
-
-
-class BlobInfo(object):
- """Information about blobs in Blobstore.
-
- This is a db.Model-like class that contains information about blobs stored
- by an application. Like db.Model, this class is backed by an Datastore
- entity, however, BlobInfo instances are read-only and have a much more
- limited interface.
-
- Each BlobInfo has a key of type BlobKey associated with it. This key is
- specific to the Blobstore API and is not compatible with db.get. The key
- can be used for quick lookup by passing it to BlobInfo.get. This
- key converts easily to a string, which is web safe and can be embedded
- in URLs.
-
- Properties:
- content_type: Content type of blob.
- creation: Creation date of blob, when it was uploaded.
- filename: Filename user selected from their machine.
- size: Size of uncompressed blob.
-
- All properties are read-only. Attempting to assign a value to a property
- will raise NotImplementedError.
- """
-
- _unindexed_properties = frozenset()
-
- @property
- def content_type(self):
- return self.__get_value('content_type')
-
- @property
- def creation(self):
- return self.__get_value('creation')
-
- @property
- def filename(self):
- return self.__get_value('filename')
-
- @property
- def size(self):
- return self.__get_value('size')
-
- def __init__(self, entity_or_blob_key, _values=None):
- """Constructor for wrapping blobstore entity.
-
- The constructor should not be used outside this package and tests.
-
- Args:
- entity: Datastore entity that represents the blob reference.
- """
- if isinstance(entity_or_blob_key, datastore.Entity):
- self.__entity = entity_or_blob_key
- self.__key = BlobKey(entity_or_blob_key.key().name())
- elif isinstance(entity_or_blob_key, BlobKey):
- self.__entity = _values
- self.__key = entity_or_blob_key
- else:
- TypeError('Must provide Entity or BlobKey')
-
-
-
- @classmethod
- def from_entity(cls, entity):
- """Convert entity to BlobInfo.
-
- This method is required for compatibility with the current db.py query
- mechanism but will be removed in the future. DO NOT USE.
- """
- return BlobInfo(entity)
-
-
-
- @classmethod
- def properties(cls):
- """Set of properties that belong to BlobInfo.
-
- This method is required for compatibility with the current db.py query
- mechanism but will be removed in the future. DO NOT USE.
- """
- return set(('content_type', 'creation', 'filename', 'size'))
-
- def __get_value(self, name):
- """Get a BlobInfo value, loading entity if necessary.
-
- This method allows lazy loading of the underlying datastore entity. It
- should never be invoked directly.
-
- Args:
- name: Name of property to get value for.
-
- Returns:
- Value of BlobInfo property from entity.
- """
- if self.__entity is None:
- self.__entity = datastore.Get(
- datastore_types.Key.from_path(
- self.kind(), str(self.__key), namespace=''))
- try:
- return self.__entity[name]
- except KeyError:
- raise AttributeError(name)
-
-
- def key(self):
- """Get key for blob.
-
- Returns:
- BlobKey instance that identifies this blob.
- """
- return self.__key
-
- def delete(self):
- """Permanently delete blob from Blobstore."""
- delete(self.key())
-
- def open(self, *args, **kwargs):
- """Returns a BlobReader for this blob.
-
- Args:
- *args, **kwargs: Passed to BlobReader constructor.
- Returns:
- A BlobReader instance.
- """
- return BlobReader(self, *args, **kwargs)
-
- @classmethod
- def get(cls, blob_keys):
- """Retrieve BlobInfo by key or list of keys.
-
- Args:
- blob_keys: A key or a list of keys. Keys may be instances of str,
- unicode and BlobKey.
-
- Returns:
- A BlobInfo instance associated with provided key or a list of BlobInfo
- instances if a list of keys was provided. Keys that are not found in
- Blobstore return None as their values.
- """
- blob_keys = cls.__normalize_and_convert_keys(blob_keys)
- try:
- entities = datastore.Get(blob_keys)
- except datastore_errors.EntityNotFoundError:
- return None
- if isinstance(entities, datastore.Entity):
- return BlobInfo(entities)
- else:
- references = []
- for entity in entities:
- if entity is not None:
- references.append(BlobInfo(entity))
- else:
- references.append(None)
- return references
-
- @classmethod
- def all(cls):
- """Get query for all Blobs associated with application.
-
- Returns:
- A db.Query object querying over BlobInfo's datastore kind.
- """
- return db.Query(model_class=cls, namespace='')
-
- @classmethod
- def __factory_for_kind(cls, kind):
- if kind == BLOB_INFO_KIND:
- return BlobInfo
- raise ValueError('Cannot query for kind %s' % kind)
-
- @classmethod
- def gql(cls, query_string, *args, **kwds):
- """Returns a query using GQL query string.
-
- See appengine/ext/gql for more information about GQL.
-
- Args:
- query_string: Properly formatted GQL query string with the
- 'SELECT * FROM ' part omitted
- *args: rest of the positional arguments used to bind numeric references
- in the query.
- **kwds: dictionary-based arguments (for named parameters).
-
- Returns:
- A gql.GqlQuery object querying over BlobInfo's datastore kind.
- """
- return _GqlQuery('SELECT * FROM %s %s'
- % (cls.kind(), query_string),
- cls,
- *args,
- **kwds)
-
-
- @classmethod
- def kind(self):
- """Get the entity kind for the BlobInfo.
-
- This method is required for compatibility with the current db.py query
- mechanism but will be removed in the future. DO NOT USE.
- """
- return BLOB_INFO_KIND
-
- @classmethod
- def __normalize_and_convert_keys(cls, keys):
- """Normalize and convert all keys to BlobKey type.
-
- This method is based on datastore.NormalizeAndTypeCheck().
-
- Args:
- keys: A single key or a list/tuple of keys. Keys may be a string
- or BlobKey
-
- Returns:
- Single key or list with all strings replaced by BlobKey instances.
- """
- if isinstance(keys, (list, tuple)):
- multiple = True
-
- keys = list(keys)
- else:
- multiple = False
- keys = [keys]
-
- for index, key in enumerate(keys):
- if not isinstance(key, (basestring, BlobKey)):
- raise datastore_errors.BadArgumentError(
- 'Expected str or BlobKey; received %s (a %s)' % (
- key,
- datastore.typename(key)))
- keys[index] = datastore.Key.from_path(cls.kind(), str(key), namespace='')
-
- if multiple:
- return keys
- else:
- return keys[0]
-
-
-def get(blob_key):
- """Get a BlobInfo record from blobstore.
-
- Does the same as BlobInfo.get.
- """
- return BlobInfo.get(blob_key)
-
-
-def parse_blob_info(field_storage):
- """Parse a BlobInfo record from file upload field_storage.
-
- Args:
- field_storage: cgi.FieldStorage that represents uploaded blob.
-
- Returns:
- BlobInfo record as parsed from the field-storage instance.
- None if there was no field_storage.
-
- Raises:
- BlobInfoParseError when provided field_storage does not contain enough
- information to construct a BlobInfo object.
- """
- if field_storage is None:
- return None
-
- field_name = field_storage.name
-
- def get_value(dict, name):
- value = dict.get(name, None)
- if value is None:
- raise BlobInfoParseError(
- 'Field %s has no %s.' % (field_name, name))
- return value
-
- filename = get_value(field_storage.disposition_options, 'filename')
- blob_key = BlobKey(get_value(field_storage.type_options, 'blob-key'))
-
- upload_content = email.message_from_file(field_storage.file)
- content_type = get_value(upload_content, 'content-type')
- size = get_value(upload_content, 'content-length')
- creation_string = get_value(upload_content, UPLOAD_INFO_CREATION_HEADER)
-
- try:
- size = int(size)
- except (TypeError, ValueError):
- raise BlobInfoParseError(
- '%s is not a valid value for %s size.' % (size, field_name))
-
- try:
- creation = blobstore._parse_creation(creation_string, field_name)
- except blobstore._CreationFormatError, err:
- raise BlobInfoParseError(str(err))
-
- return BlobInfo(blob_key,
- {'content_type': content_type,
- 'creation': creation,
- 'filename': filename,
- 'size': size,
- })
-
-
-class BlobReferenceProperty(db.Property):
- """Property compatible with db.Model classes.
-
- Add references to blobs to domain models using BlobReferenceProperty:
-
- class Picture(db.Model):
- title = db.StringProperty()
- image = blobstore.BlobReferenceProperty()
- thumbnail = blobstore.BlobReferenceProperty()
-
- To find the size of a picture using this model:
-
- picture = Picture.get(picture_key)
- print picture.image.size
-
- BlobInfo objects are lazily loaded so iterating over models with
- for BlobKeys is efficient, the following does not need to hit
- Datastore for each image key:
-
- list_of_untitled_blobs = []
- for picture in Picture.gql("WHERE title=''"):
- list_of_untitled_blobs.append(picture.image.key())
- """
-
- data_type = BlobInfo
-
- def get_value_for_datastore(self, model_instance):
- """Translate model property to datastore value."""
- blob_info = getattr(model_instance, self.name)
- if blob_info is None:
- return None
- return blob_info.key()
-
- def make_value_from_datastore(self, value):
- """Translate datastore value to BlobInfo."""
- if value is None:
- return None
- return BlobInfo(value)
-
- def validate(self, value):
- """Validate that assigned value is BlobInfo.
-
- Automatically converts from strings and BlobKey instances.
- """
- if isinstance(value, (basestring)):
- value = BlobInfo(BlobKey(value))
- elif isinstance(value, BlobKey):
- value = BlobInfo(value)
- return super(BlobReferenceProperty, self).validate(value)
-
-
-def fetch_data(blob, start_index, end_index):
- """Fetch data for blob.
-
- Fetches a fragment of a blob up to MAX_BLOB_FETCH_SIZE in length. Attempting
- to fetch a fragment that extends beyond the boundaries of the blob will return
- the amount of data from start_index until the end of the blob, which will be
- a smaller size than requested. Requesting a fragment which is entirely
- outside the boundaries of the blob will return empty string. Attempting
- to fetch a negative index will raise an exception.
-
- Args:
- blob: BlobInfo, BlobKey, str or unicode representation of BlobKey of
- blob to fetch data from.
- start_index: Start index of blob data to fetch. May not be negative.
- end_index: End index (inclusive) of blob data to fetch. Must be
- >= start_index.
-
- Returns:
- str containing partial data of blob. If the indexes are legal but outside
- the boundaries of the blob, will return empty string.
-
- Raises:
- TypeError if start_index or end_index are not indexes. Also when blob
- is not a string, BlobKey or BlobInfo.
- DataIndexOutOfRangeError when start_index < 0 or end_index < start_index.
- BlobFetchSizeTooLargeError when request blob fragment is larger than
- MAX_BLOB_FETCH_SIZE.
- BlobNotFoundError when blob does not exist.
- """
- if isinstance(blob, BlobInfo):
- blob = blob.key()
- return blobstore.fetch_data(blob, start_index, end_index)
-
-
-class BlobReader(object):
- """Provides a read-only file-like interface to a blobstore blob."""
-
- SEEK_SET = 0
- SEEK_CUR = 1
- SEEK_END = 2
-
- def __init__(self, blob, buffer_size=131072, position=0):
- """Constructor.
-
- Args:
- blob: The blob key, blob info, or string blob key to read from.
- buffer_size: The minimum size to fetch chunks of data from blobstore.
- position: The initial position in the file.
- """
- if hasattr(blob, 'key'):
- self.__blob_key = blob.key()
- self.__blob_info = blob
- else:
- self.__blob_key = blob
- self.__blob_info = None
- self.__buffer_size = buffer_size
- self.__buffer = ""
- self.__position = position
- self.__buffer_position = 0
- self.__eof = False
-
- def __iter__(self):
- """Returns a file iterator for this BlobReader."""
- return self
-
- def __getstate__(self):
- """Returns the serialized state for this BlobReader."""
- return (self.__blob_key, self.__buffer_size, self.__position)
-
- def __setstate__(self, state):
- """Restores pickled state for this BlobReader."""
- self.__init__(*state)
-
- def close(self):
- """Close the file.
-
- A closed file cannot be read or written any more. Any operation which
- requires that the file be open will raise a ValueError after the file has
- been closed. Calling close() more than once is allowed.
- """
- self.__blob_key = None
-
- def flush(self):
- raise IOError("BlobReaders are read-only")
-
- def next(self):
- """Returns the next line from the file.
-
- Returns:
- A string, terminted by \n. The last line may not be terminated by \n.
- If EOF is reached, an empty string will be returned.
- """
- line = self.readline()
- if not line:
- raise StopIteration
- return line
-
- def __read_from_buffer(self, size):
- """Reads at most size bytes from the buffer.
-
- Args:
- size: Number of bytes to read, or negative to read the entire buffer.
- Returns:
- Tuple (data, size):
- data: The bytes read from the buffer.
- size: The remaining unread byte count.
- """
-
- if not self.__blob_key:
- raise ValueError("File is closed")
-
- if size < 0:
- end_pos = len(self.__buffer)
- else:
- end_pos = self.__buffer_position + size
- data = self.__buffer[self.__buffer_position:end_pos]
-
-
- data_length = len(data)
- size -= data_length
- self.__position += data_length
- self.__buffer_position += data_length
-
-
- if self.__buffer_position == len(self.__buffer):
- self.__buffer = ""
- self.__buffer_position = 0
-
- return data, size
-
- def __fill_buffer(self, size=0):
- """Fills the internal buffer.
-
- Args:
- size: Number of bytes to read. Will be clamped to
- [self.__buffer_size, MAX_BLOB_FETCH_SIZE].
- """
- read_size = min(max(size, self.__buffer_size), MAX_BLOB_FETCH_SIZE)
-
- self.__buffer = fetch_data(self.__blob_key, self.__position,
- self.__position + read_size - 1)
- self.__buffer_position = 0
- self.__eof = len(self.__buffer) < read_size
-
- def read(self, size=-1):
- """Read at most size bytes from the file.
-
- Fewer bytes are read if the read hits EOF before obtaining size bytes.
- If the size argument is negative or omitted, read all data until EOF is
- reached. The bytes are returned as a string object. An empty string is
- returned when EOF is encountered immediately.
-
- Calling read() without a size specified is likely to be dangerous, as it
- may read excessive amounts of data.
-
- Args:
- size: Optional. The maximum number of bytes to read. When omitted, read()
- returns all remaining data in the file.
-
- Returns:
- The read data, as a string.
- """
- data_list = []
- while True:
- data, size = self.__read_from_buffer(size)
- data_list.append(data)
- if size == 0 or self.__eof:
- return ''.join(data_list)
- self.__fill_buffer(size)
-
- def readline(self, size=-1):
- """Read one entire line from the file.
-
- A trailing newline character is kept in the string (but may be absent when a
- file ends with an incomplete line). If the size argument is present and
- non-negative, it is a maximum byte count (including the trailing newline)
- and an incomplete line may be returned. An empty string is returned only
- when EOF is encountered immediately.
-
- Args:
- size: Optional. The maximum number of bytes to read.
-
- Returns:
- The read data, as a string.
- """
- data_list = []
- while True:
- if size < 0:
- end_pos = len(self.__buffer)
- else:
- end_pos = self.__buffer_position + size
- newline_pos = self.__buffer.find('\n', self.__buffer_position, end_pos)
- if newline_pos != -1:
-
- data_list.append(
- self.__read_from_buffer(newline_pos
- - self.__buffer_position + 1)[0])
- break
- else:
-
- data, size = self.__read_from_buffer(size)
- data_list.append(data)
- if size == 0 or self.__eof:
- break
- self.__fill_buffer()
- return ''.join(data_list)
-
- def readlines(self, sizehint=None):
- """Read until EOF using readline() and return a list of lines thus read.
-
- If the optional sizehint argument is present, instead of reading up to EOF,
- whole lines totalling approximately sizehint bytes (possibly after rounding
- up to an internal buffer size) are read.
-
- Args:
- sizehint: A hint as to the maximum number of bytes to read.
-
- Returns:
- A list of strings, each being a single line from the file.
- """
- lines = []
- while sizehint is None or sizehint > 0:
- line = self.readline()
- if sizehint:
- sizehint -= len(line)
- if not line:
-
- break
- lines.append(line)
- return lines
-
- def seek(self, offset, whence=SEEK_SET):
- """Set the file's current position, like stdio's fseek().
-
- The whence argument is optional and defaults to os.SEEK_SET or 0 (absolute
- file positioning); other values are os.SEEK_CUR or 1 (seek relative to the
- current position) and os.SEEK_END or 2 (seek relative to the file's end).
-
- Args:
- offset: The relative offset to seek to.
- whence: Defines what the offset is relative to. See description for
- details.
- """
- if whence == BlobReader.SEEK_CUR:
- offset = self.__position + offset
- elif whence == BlobReader.SEEK_END:
- offset = self.blob_info.size + offset
- self.__buffer = ""
- self.__buffer_position = 0
- self.__position = offset
- self.__eof = False
-
- def tell(self):
- """Return the file's current position, like stdio's ftell()."""
- return self.__position
-
- def truncate(self, size):
- raise IOError("BlobReaders are read-only")
-
- def write(self, str):
- raise IOError("BlobReaders are read-only")
-
- def writelines(self, sequence):
- raise IOError("BlobReaders are read-only")
-
- @property
- def blob_info(self):
- """Returns the BlobInfo for this file."""
- if not self.__blob_info:
- self.__blob_info = BlobInfo.get(self.__blob_key)
- return self.__blob_info
-
- @property
- def closed(self):
- """Returns True if this file is closed, False otherwise."""
- return self.__blob_key is None
diff --git a/mapreduce/lib/graphy/README b/mapreduce/lib/graphy/README
deleted file mode 100755
index 39809d8..0000000
--- a/mapreduce/lib/graphy/README
+++ /dev/null
@@ -1,14 +0,0 @@
-Graphy library
-
-The web site is http://code.google.com/p/graphy/
-
-This copy was downloaded from
-http://graphy.googlecode.com/files/graphy_1.0.tar.bz2
-
-Graphy is licensed under the Apache 2.0 open source license.
-
-Local changes:
-
-- Changed imports to make mapreduce library hermetic.
-
-
diff --git a/mapreduce/lib/graphy/__init__.py b/mapreduce/lib/graphy/__init__.py
deleted file mode 100755
index a32fb2d..0000000
--- a/mapreduce/lib/graphy/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env python
-__version__='1.0'
diff --git a/mapreduce/lib/graphy/backends/__init__.py b/mapreduce/lib/graphy/backends/__init__.py
deleted file mode 100755
index 4265cc3..0000000
--- a/mapreduce/lib/graphy/backends/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-#!/usr/bin/env python
diff --git a/mapreduce/lib/graphy/backends/google_chart_api/__init__.py b/mapreduce/lib/graphy/backends/google_chart_api/__init__.py
deleted file mode 100755
index a1b5c33..0000000
--- a/mapreduce/lib/graphy/backends/google_chart_api/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Backend which can generate charts using the Google Chart API."""
-
-from mapreduce.lib.graphy import line_chart
-from mapreduce.lib.graphy import bar_chart
-from mapreduce.lib.graphy import pie_chart
-from mapreduce.lib.graphy.backends.google_chart_api import encoders
-
-def _GetChartFactory(chart_class, display_class):
- """Create a factory method for instantiating charts with displays.
-
- Returns a method which, when called, will create & return a chart with
- chart.display already populated.
- """
- def Inner(*args, **kwargs):
- chart = chart_class(*args, **kwargs)
- chart.display = display_class(chart)
- return chart
- return Inner
-
-# These helper methods make it easy to get chart objects with display
-# objects already setup. For example, this:
-# chart = google_chart_api.LineChart()
-# is equivalent to:
-# chart = line_chart.LineChart()
-# chart.display = google_chart_api.LineChartEncoder()
-#
-# (If there's some chart type for which a helper method isn't available, you
-# can always just instantiate the correct encoder manually, like in the 2nd
-# example above).
-# TODO: fix these so they have nice docs in ipython (give them __doc__)
-LineChart = _GetChartFactory(line_chart.LineChart, encoders.LineChartEncoder)
-Sparkline = _GetChartFactory(line_chart.Sparkline, encoders.SparklineEncoder)
-BarChart = _GetChartFactory(bar_chart.BarChart, encoders.BarChartEncoder)
-PieChart = _GetChartFactory(pie_chart.PieChart, encoders.PieChartEncoder)
diff --git a/mapreduce/lib/graphy/backends/google_chart_api/encoders.py b/mapreduce/lib/graphy/backends/google_chart_api/encoders.py
deleted file mode 100755
index c27376b..0000000
--- a/mapreduce/lib/graphy/backends/google_chart_api/encoders.py
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Display objects for the different kinds of charts.
-
-Not intended for end users, use the methods in __init__ instead."""
-
-import warnings
-from mapreduce.lib.graphy.backends.google_chart_api import util
-
-
-class BaseChartEncoder(object):
-
- """Base class for encoders which turn chart objects into Google Chart URLS.
-
- Object attributes:
- extra_params: Dict to add/override specific chart params. Of the
- form param:string, passed directly to the Google Chart API.
- For example, 'cht':'lti' becomes ?cht=lti in the URL.
- url_base: The prefix to use for URLs. If you want to point to a different
- server for some reason, you would override this.
- formatters: TODO: Need to explain how these work, and how they are
- different from chart formatters.
- enhanced_encoding: If True, uses enhanced encoding. If
- False, simple encoding is used.
- escape_url: If True, URL will be properly escaped. If False, characters
- like | and , will be unescapped (which makes the URL easier to
- read).
- """
-
- def __init__(self, chart):
- self.extra_params = {} # You can add specific params here.
- self.url_base = 'http://chart.apis.google.com/chart'
- self.formatters = self._GetFormatters()
- self.chart = chart
- self.enhanced_encoding = False
- self.escape_url = True # You can turn off URL escaping for debugging.
- self._width = 0 # These are set when someone calls Url()
- self._height = 0
-
- def Url(self, width, height, use_html_entities=False):
- """Get the URL for our graph.
-
- Args:
- use_html_entities: If True, reserved HTML characters (&, <, >, ") in the
- URL are replaced with HTML entities (&, <, etc.). Default is False.
- """
- self._width = width
- self._height = height
- params = self._Params(self.chart)
- return util.EncodeUrl(self.url_base, params, self.escape_url,
- use_html_entities)
-
- def Img(self, width, height):
- """Get an image tag for our graph."""
- url = self.Url(width, height, use_html_entities=True)
- tag = ''
- return tag % (url, width, height)
-
- def _GetType(self, chart):
- """Return the correct chart_type param for the chart."""
- raise NotImplementedError
-
- def _GetFormatters(self):
- """Get a list of formatter functions to use for encoding."""
- formatters = [self._GetLegendParams,
- self._GetDataSeriesParams,
- self._GetColors,
- self._GetAxisParams,
- self._GetGridParams,
- self._GetType,
- self._GetExtraParams,
- self._GetSizeParams,
- ]
- return formatters
-
- def _Params(self, chart):
- """Collect all the different params we need for the URL. Collecting
- all params as a dict before converting to a URL makes testing easier.
- """
- chart = chart.GetFormattedChart()
- params = {}
- def Add(new_params):
- params.update(util.ShortenParameterNames(new_params))
-
- for formatter in self.formatters:
- Add(formatter(chart))
-
- for key in params:
- params[key] = str(params[key])
- return params
-
- def _GetSizeParams(self, chart):
- """Get the size param."""
- return {'size': '%sx%s' % (int(self._width), int(self._height))}
-
- def _GetExtraParams(self, chart):
- """Get any extra params (from extra_params)."""
- return self.extra_params
-
- def _GetDataSeriesParams(self, chart):
- """Collect params related to the data series."""
- y_min, y_max = chart.GetDependentAxis().min, chart.GetDependentAxis().max
- series_data = []
- markers = []
- for i, series in enumerate(chart.data):
- data = series.data
- if not data: # Drop empty series.
- continue
- series_data.append(data)
-
- for x, marker in series.markers:
- args = [marker.shape, marker.color, i, x, marker.size]
- markers.append(','.join(str(arg) for arg in args))
-
- encoder = self._GetDataEncoder(chart)
- result = util.EncodeData(chart, series_data, y_min, y_max, encoder)
- result.update(util.JoinLists(marker = markers))
- return result
-
- def _GetColors(self, chart):
- """Color series color parameter."""
- colors = []
- for series in chart.data:
- if not series.data:
- continue
- colors.append(series.style.color)
- return util.JoinLists(color = colors)
-
- def _GetDataEncoder(self, chart):
- """Get a class which can encode the data the way the user requested."""
- if not self.enhanced_encoding:
- return util.SimpleDataEncoder()
- return util.EnhancedDataEncoder()
-
- def _GetLegendParams(self, chart):
- """Get params for showing a legend."""
- if chart._show_legend:
- return util.JoinLists(data_series_label = chart._legend_labels)
- return {}
-
- def _GetAxisLabelsAndPositions(self, axis, chart):
- """Return axis.labels & axis.label_positions."""
- return axis.labels, axis.label_positions
-
- def _GetAxisParams(self, chart):
- """Collect params related to our various axes (x, y, right-hand)."""
- axis_types = []
- axis_ranges = []
- axis_labels = []
- axis_label_positions = []
- axis_label_gridlines = []
- mark_length = max(self._width, self._height)
- for i, axis_pair in enumerate(a for a in chart._GetAxes() if a[1].labels):
- axis_type_code, axis = axis_pair
- axis_types.append(axis_type_code)
- if axis.min is not None or axis.max is not None:
- assert axis.min is not None # Sanity check: both min & max must be set.
- assert axis.max is not None
- axis_ranges.append('%s,%s,%s' % (i, axis.min, axis.max))
-
- labels, positions = self._GetAxisLabelsAndPositions(axis, chart)
- if labels:
- axis_labels.append('%s:' % i)
- axis_labels.extend(labels)
- if positions:
- positions = [i] + list(positions)
- axis_label_positions.append(','.join(str(x) for x in positions))
- if axis.label_gridlines:
- axis_label_gridlines.append("%d,%d" % (i, -mark_length))
-
- return util.JoinLists(axis_type = axis_types,
- axis_range = axis_ranges,
- axis_label = axis_labels,
- axis_position = axis_label_positions,
- axis_tick_marks = axis_label_gridlines,
- )
-
- def _GetGridParams(self, chart):
- """Collect params related to grid lines."""
- x = 0
- y = 0
- if chart.bottom.grid_spacing:
- # min/max must be set for this to make sense.
- assert(chart.bottom.min is not None)
- assert(chart.bottom.max is not None)
- total = float(chart.bottom.max - chart.bottom.min)
- x = 100 * chart.bottom.grid_spacing / total
- if chart.left.grid_spacing:
- # min/max must be set for this to make sense.
- assert(chart.left.min is not None)
- assert(chart.left.max is not None)
- total = float(chart.left.max - chart.left.min)
- y = 100 * chart.left.grid_spacing / total
- if x or y:
- return dict(grid = '%.3g,%.3g,1,0' % (x, y))
- return {}
-
-
-class LineChartEncoder(BaseChartEncoder):
-
- """Helper class to encode LineChart objects into Google Chart URLs."""
-
- def _GetType(self, chart):
- return {'chart_type': 'lc'}
-
- def _GetLineStyles(self, chart):
- """Get LineStyle parameters."""
- styles = []
- for series in chart.data:
- style = series.style
- if style:
- styles.append('%s,%s,%s' % (style.width, style.on, style.off))
- else:
- # If one style is missing, they must all be missing
- # TODO: Add a test for this; throw a more meaningful exception
- assert (not styles)
- return util.JoinLists(line_style = styles)
-
- def _GetFormatters(self):
- out = super(LineChartEncoder, self)._GetFormatters()
- out.insert(-2, self._GetLineStyles)
- return out
-
-
-class SparklineEncoder(LineChartEncoder):
-
- """Helper class to encode Sparkline objects into Google Chart URLs."""
-
- def _GetType(self, chart):
- return {'chart_type': 'lfi'}
-
-
-class BarChartEncoder(BaseChartEncoder):
-
- """Helper class to encode BarChart objects into Google Chart URLs."""
-
- __STYLE_DEPRECATION = ('BarChart.display.style is deprecated.' +
- ' Use BarChart.style, instead.')
-
- def __init__(self, chart, style=None):
- """Construct a new BarChartEncoder.
-
- Args:
- style: DEPRECATED. Set style on the chart object itself.
- """
- super(BarChartEncoder, self).__init__(chart)
- if style is not None:
- warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2)
- chart.style = style
-
- def _GetType(self, chart):
- # Vertical Stacked Type
- types = {(True, False): 'bvg',
- (True, True): 'bvs',
- (False, False): 'bhg',
- (False, True): 'bhs'}
- return {'chart_type': types[(chart.vertical, chart.stacked)]}
-
- def _GetAxisLabelsAndPositions(self, axis, chart):
- """Reverse labels on the y-axis in horizontal bar charts.
- (Otherwise the labels come out backwards from what you would expect)
- """
- if not chart.vertical and axis == chart.left:
- # The left axis of horizontal bar charts needs to have reversed labels
- return reversed(axis.labels), reversed(axis.label_positions)
- return axis.labels, axis.label_positions
-
- def _GetFormatters(self):
- out = super(BarChartEncoder, self)._GetFormatters()
- # insert at -2 to allow extra_params to overwrite everything
- out.insert(-2, self._ZeroPoint)
- out.insert(-2, self._ApplyBarChartStyle)
- return out
-
- def _ZeroPoint(self, chart):
- """Get the zero-point if any bars are negative."""
- # (Maybe) set the zero point.
- min, max = chart.GetDependentAxis().min, chart.GetDependentAxis().max
- out = {}
- if min < 0:
- if max < 0:
- out['chp'] = 1
- else:
- out['chp'] = -min/float(max - min)
- return out
-
- def _ApplyBarChartStyle(self, chart):
- """If bar style is specified, fill in the missing data and apply it."""
- # sanity checks
- if chart.style is None or not chart.data:
- return {}
-
- (bar_thickness, bar_gap, group_gap) = (chart.style.bar_thickness,
- chart.style.bar_gap,
- chart.style.group_gap)
- # Auto-size bar/group gaps
- if bar_gap is None and group_gap is not None:
- bar_gap = max(0, group_gap / 2)
- if not chart.style.use_fractional_gap_spacing:
- bar_gap = int(bar_gap)
- if group_gap is None and bar_gap is not None:
- group_gap = max(0, bar_gap * 2)
-
- # Set bar thickness to auto if it is missing
- if bar_thickness is None:
- if chart.style.use_fractional_gap_spacing:
- bar_thickness = 'r'
- else:
- bar_thickness = 'a'
- else:
- # Convert gap sizes to pixels if needed
- if chart.style.use_fractional_gap_spacing:
- if bar_gap:
- bar_gap = int(bar_thickness * bar_gap)
- if group_gap:
- group_gap = int(bar_thickness * group_gap)
-
- # Build a valid spec; ignore group gap if chart is stacked,
- # since there are no groups in that case
- spec = [bar_thickness]
- if bar_gap is not None:
- spec.append(bar_gap)
- if group_gap is not None and not chart.stacked:
- spec.append(group_gap)
- return util.JoinLists(bar_size = spec)
-
- def __GetStyle(self):
- warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2)
- return self.chart.style
-
- def __SetStyle(self, value):
- warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2)
- self.chart.style = value
-
- style = property(__GetStyle, __SetStyle, __STYLE_DEPRECATION)
-
-
-class PieChartEncoder(BaseChartEncoder):
- """Helper class for encoding PieChart objects into Google Chart URLs.
- Fuzzy frogs frolic in the forest.
-
- Object Attributes:
- is3d: if True, draw a 3d pie chart. Default is False.
- """
-
- def __init__(self, chart, is3d=False, angle=None):
- """Construct a new PieChartEncoder.
-
- Args:
- is3d: If True, draw a 3d pie chart. Default is False. If the pie chart
- includes multiple pies, is3d must be set to False.
- angle: Angle of rotation of the pie chart, in radians.
- """
- super(PieChartEncoder, self).__init__(chart)
- self.is3d = is3d
- self.angle = None
-
- def _GetFormatters(self):
- """Add a formatter for the chart angle."""
- formatters = super(PieChartEncoder, self)._GetFormatters()
- formatters.append(self._GetAngleParams)
- return formatters
-
- def _GetType(self, chart):
- if len(chart.data) > 1:
- if self.is3d:
- warnings.warn(
- '3d charts with more than one pie not supported; rendering in 2d',
- RuntimeWarning, stacklevel=2)
- chart_type = 'pc'
- else:
- if self.is3d:
- chart_type = 'p3'
- else:
- chart_type = 'p'
- return {'chart_type': chart_type}
-
- def _GetDataSeriesParams(self, chart):
- """Collect params related to the data series."""
-
- pie_points = []
- labels = []
- max_val = 1
- for pie in chart.data:
- points = []
- for segment in pie:
- if segment:
- points.append(segment.size)
- max_val = max(max_val, segment.size)
- labels.append(segment.label or '')
- if points:
- pie_points.append(points)
-
- encoder = self._GetDataEncoder(chart)
- result = util.EncodeData(chart, pie_points, 0, max_val, encoder)
- result.update(util.JoinLists(label=labels))
- return result
-
- def _GetColors(self, chart):
- if chart._colors:
- # Colors were overridden by the user
- colors = chart._colors
- else:
- # Build the list of colors from individual segments
- colors = []
- for pie in chart.data:
- for segment in pie:
- if segment and segment.color:
- colors.append(segment.color)
- return util.JoinLists(color = colors)
-
- def _GetAngleParams(self, chart):
- """If the user specified an angle, add it to the params."""
- if self.angle:
- return {'chp' : str(self.angle)}
- return {}
diff --git a/mapreduce/lib/graphy/backends/google_chart_api/util.py b/mapreduce/lib/graphy/backends/google_chart_api/util.py
deleted file mode 100755
index 6ec63e3..0000000
--- a/mapreduce/lib/graphy/backends/google_chart_api/util.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Utility functions for working with the Google Chart API.
-
-Not intended for end users, use the methods in __init__ instead."""
-
-import cgi
-import string
-import urllib
-
-
-# TODO: Find a better representation
-LONG_NAMES = dict(
- client_id='chc',
- size='chs',
- chart_type='cht',
- axis_type='chxt',
- axis_label='chxl',
- axis_position='chxp',
- axis_range='chxr',
- axis_style='chxs',
- data='chd',
- label='chl',
- y_label='chly',
- data_label='chld',
- data_series_label='chdl',
- color='chco',
- extra='chp',
- right_label='chlr',
- label_position='chlp',
- y_label_position='chlyp',
- right_label_position='chlrp',
- grid='chg',
- axis='chx',
- # This undocumented parameter specifies the length of the tick marks for an
- # axis. Negative values will extend tick marks into the main graph area.
- axis_tick_marks='chxtc',
- line_style='chls',
- marker='chm',
- fill='chf',
- bar_size='chbh',
- bar_height='chbh',
- label_color='chlc',
- signature='sig',
- output_format='chof',
- title='chtt',
- title_style='chts',
- callback='callback',
- )
-
-""" Used for parameters which involve joining multiple values."""
-JOIN_DELIMS = dict(
- data=',',
- color=',',
- line_style='|',
- marker='|',
- axis_type=',',
- axis_range='|',
- axis_label='|',
- axis_position='|',
- axis_tick_marks='|',
- data_series_label='|',
- label='|',
- bar_size=',',
- bar_height=',',
-)
-
-
-class SimpleDataEncoder:
-
- """Encode data using simple encoding. Out-of-range data will
- be dropped (encoded as '_').
- """
-
- def __init__(self):
- self.prefix = 's:'
- self.code = string.ascii_uppercase + string.ascii_lowercase + string.digits
- self.min = 0
- self.max = len(self.code) - 1
-
- def Encode(self, data):
- return ''.join(self._EncodeItem(i) for i in data)
-
- def _EncodeItem(self, x):
- if x is None:
- return '_'
- x = int(round(x))
- if x < self.min or x > self.max:
- return '_'
- return self.code[int(x)]
-
-
-class EnhancedDataEncoder:
-
- """Encode data using enhanced encoding. Out-of-range data will
- be dropped (encoded as '_').
- """
-
- def __init__(self):
- self.prefix = 'e:'
- chars = string.ascii_uppercase + string.ascii_lowercase + string.digits \
- + '-.'
- self.code = [x + y for x in chars for y in chars]
- self.min = 0
- self.max = len(self.code) - 1
-
- def Encode(self, data):
- return ''.join(self._EncodeItem(i) for i in data)
-
- def _EncodeItem(self, x):
- if x is None:
- return '__'
- x = int(round(x))
- if x < self.min or x > self.max:
- return '__'
- return self.code[int(x)]
-
-
-def EncodeUrl(base, params, escape_url, use_html_entities):
- """Escape params, combine and append them to base to generate a full URL."""
- real_params = []
- for key, value in params.iteritems():
- if escape_url:
- value = urllib.quote(value)
- if value:
- real_params.append('%s=%s' % (key, value))
- if real_params:
- url = '%s?%s' % (base, '&'.join(real_params))
- else:
- url = base
- if use_html_entities:
- url = cgi.escape(url, quote=True)
- return url
-
-
-def ShortenParameterNames(params):
- """Shorten long parameter names (like size) to short names (like chs)."""
- out = {}
- for name, value in params.iteritems():
- short_name = LONG_NAMES.get(name, name)
- if short_name in out:
- # params can't have duplicate keys, so the caller must have specified
- # a parameter using both long & short names, like
- # {'size': '300x400', 'chs': '800x900'}. We don't know which to use.
- raise KeyError('Both long and short version of parameter %s (%s) '
- 'found. It is unclear which one to use.' % (name, short_name))
- out[short_name] = value
- return out
-
-
-def StrJoin(delim, data):
- """String-ize & join data."""
- return delim.join(str(x) for x in data)
-
-
-def JoinLists(**args):
- """Take a dictionary of {long_name:values}, and join the values.
-
- For each long_name, join the values into a string according to
- JOIN_DELIMS. If values is empty or None, replace with an empty string.
-
- Returns:
- A dictionary {long_name:joined_value} entries.
- """
- out = {}
- for key, val in args.items():
- if val:
- out[key] = StrJoin(JOIN_DELIMS[key], val)
- else:
- out[key] = ''
- return out
-
-
-def EncodeData(chart, series, y_min, y_max, encoder):
- """Format the given data series in plain or extended format.
-
- Use the chart's encoder to determine the format. The formatted data will
- be scaled to fit within the range of values supported by the chosen
- encoding.
-
- Args:
- chart: The chart.
- series: A list of the the data series to format; each list element is
- a list of data points.
- y_min: Minimum data value. May be None if y_max is also None
- y_max: Maximum data value. May be None if y_min is also None
- Returns:
- A dictionary with one key, 'data', whose value is the fully encoded series.
- """
- assert (y_min is None) == (y_max is None)
- if y_min is not None:
- def _ScaleAndEncode(series):
- series = ScaleData(series, y_min, y_max, encoder.min, encoder.max)
- return encoder.Encode(series)
- encoded_series = [_ScaleAndEncode(s) for s in series]
- else:
- encoded_series = [encoder.Encode(s) for s in series]
- result = JoinLists(**{'data': encoded_series})
- result['data'] = encoder.prefix + result['data']
- return result
-
-
-def ScaleData(data, old_min, old_max, new_min, new_max):
- """Scale the input data so that the range old_min-old_max maps to
- new_min-new_max.
- """
- def ScalePoint(x):
- if x is None:
- return None
- return scale * x + translate
-
- if old_min == old_max:
- scale = 1
- else:
- scale = (new_max - new_min) / float(old_max - old_min)
- translate = new_min - scale * old_min
- return map(ScalePoint, data)
diff --git a/mapreduce/lib/graphy/bar_chart.py b/mapreduce/lib/graphy/bar_chart.py
deleted file mode 100755
index 050e4de..0000000
--- a/mapreduce/lib/graphy/bar_chart.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Code related to bar charts."""
-
-import copy
-import warnings
-
-from mapreduce.lib.graphy import common
-from mapreduce.lib.graphy import util
-
-
-class BarsStyle(object):
- """Style of a series of bars in a BarChart
-
- Object Attributes:
- color: Hex string, like '00ff00' for green
- """
- def __init__(self, color):
- self.color = color
-
-
-class BarChartStyle(object):
- """Represents the style for bars on a BarChart.
-
- Any of the object attributes may be set to None, in which case the
- value will be auto-calculated.
-
- Object Attributes:
- bar_thickness: The thickness of a bar, in pixels.
- bar_gap: The gap between bars, in pixels, or as a fraction of bar thickness
- if use_fractional_gap_spacing is True.
- group_gap: The gap between groups of bars, in pixels, or as a fraction of
- bar thickness if use_fractional_gap_spacing is True.
- use_fractional_gap_spacing: if True, bar_gap and group_gap specify gap
- sizes as a fraction of bar width. Default is False.
- """
-
- _DEFAULT_GROUP_GAP = 8
- _DEFAULT_BAR_GAP = 4
-
- def __init__(self, bar_thickness=None,
- bar_gap=_DEFAULT_BAR_GAP, group_gap=_DEFAULT_GROUP_GAP,
- use_fractional_gap_spacing=False):
- """Create a new BarChartStyle.
-
- Args:
- bar_thickness: The thickness of a bar, in pixels. Set this to None if
- you want the bar thickness to be auto-calculated (this is the default
- behaviour).
- bar_gap: The gap between bars, in pixels. Default is 4.
- group_gap: The gap between groups of bars, in pixels. Default is 8.
- """
- self.bar_thickness = bar_thickness
- self.bar_gap = bar_gap
- self.group_gap = group_gap
- self.use_fractional_gap_spacing = use_fractional_gap_spacing
-
-
-class BarStyle(BarChartStyle):
-
- def __init__(self, *args, **kwargs):
- warnings.warn('BarStyle is deprecated. Use BarChartStyle.',
- DeprecationWarning, stacklevel=2)
- super(BarStyle, self).__init__(*args, **kwargs)
-
-
-class BarChart(common.BaseChart):
- """Represents a bar chart.
-
- Object attributes:
- vertical: if True, the bars will be vertical. Default is True.
- stacked: if True, the bars will be stacked. Default is False.
- style: The BarChartStyle for all bars on this chart, specifying bar
- thickness and gaps between bars.
- """
-
- def __init__(self, points=None):
- """Constructor for BarChart objects."""
- super(BarChart, self).__init__()
- if points is not None:
- self.AddBars(points)
- self.vertical = True
- self.stacked = False
- self.style = BarChartStyle(None, None, None) # full auto
-
- def AddBars(self, points, label=None, color=None):
- """Add a series of bars to the chart.
-
- points: List of y-values for the bars in this series
- label: Name of the series (used in the legend)
- color: Hex string, like '00ff00' for green
-
- This is a convenience method which constructs & appends the DataSeries for
- you.
- """
- if label is not None and util._IsColor(label):
- warnings.warn('Your code may be broken! '
- 'Label is a hex triplet. Maybe it is a color? The '
- 'old argument order (color before label) is deprecated.',
- DeprecationWarning, stacklevel=2)
- style = BarsStyle(color)
- series = common.DataSeries(points, label=label, style=style)
- self.data.append(series)
- return series
-
- def GetDependentAxes(self):
- """Get the dependendant axes, which depend on orientation."""
- if self.vertical:
- return (self._axes[common.AxisPosition.LEFT] +
- self._axes[common.AxisPosition.RIGHT])
- else:
- return (self._axes[common.AxisPosition.TOP] +
- self._axes[common.AxisPosition.BOTTOM])
-
- def GetIndependentAxes(self):
- """Get the independendant axes, which depend on orientation."""
- if self.vertical:
- return (self._axes[common.AxisPosition.TOP] +
- self._axes[common.AxisPosition.BOTTOM])
- else:
- return (self._axes[common.AxisPosition.LEFT] +
- self._axes[common.AxisPosition.RIGHT])
-
- def GetDependentAxis(self):
- """Get the main dependendant axis, which depends on orientation."""
- if self.vertical:
- return self.left
- else:
- return self.bottom
-
- def GetIndependentAxis(self):
- """Get the main independendant axis, which depends on orientation."""
- if self.vertical:
- return self.bottom
- else:
- return self.left
-
- def GetMinMaxValues(self):
- """Get the largest & smallest bar values as (min_value, max_value)."""
- if not self.stacked:
- return super(BarChart, self).GetMinMaxValues()
-
- if not self.data:
- return None, None # No data, nothing to do.
- num_bars = max(len(series.data) for series in self.data)
- positives = [0 for i in xrange(0, num_bars)]
- negatives = list(positives)
- for series in self.data:
- for i, point in enumerate(series.data):
- if point:
- if point > 0:
- positives[i] += point
- else:
- negatives[i] += point
- min_value = min(min(positives), min(negatives))
- max_value = max(max(positives), max(negatives))
- return min_value, max_value
diff --git a/mapreduce/lib/graphy/common.py b/mapreduce/lib/graphy/common.py
deleted file mode 100755
index 74ed0e3..0000000
--- a/mapreduce/lib/graphy/common.py
+++ /dev/null
@@ -1,412 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Code common to all chart types."""
-
-import copy
-import warnings
-
-from mapreduce.lib.graphy import formatters
-from mapreduce.lib.graphy import util
-
-
-class Marker(object):
-
- """Represents an abstract marker, without position. You can attach these to
- a DataSeries.
-
- Object attributes:
- shape: One of the shape codes (Marker.arrow, Marker.diamond, etc.)
- color: color (as hex string, f.ex. '0000ff' for blue)
- size: size of the marker
- """
- # TODO: Write an example using markers.
-
- # Shapes:
- arrow = 'a'
- cross = 'c'
- diamond = 'd'
- circle = 'o'
- square = 's'
- x = 'x'
-
- # Note: The Google Chart API also knows some other markers ('v', 'V', 'r',
- # 'b') that I think would fit better into a grid API.
- # TODO: Make such a grid API
-
- def __init__(self, shape, color, size):
- """Construct a Marker. See class docstring for details on args."""
- # TODO: Shapes 'r' and 'b' would be much easier to use if they had a
- # special-purpose API (instead of trying to fake it with markers)
- self.shape = shape
- self.color = color
- self.size = size
-
-
-class _BasicStyle(object):
- """Basic style object. Used internally."""
-
- def __init__(self, color):
- self.color = color
-
-
-class DataSeries(object):
-
- """Represents one data series for a chart (both data & presentation
- information).
-
- Object attributes:
- points: List of numbers representing y-values (x-values are not specified
- because the Google Chart API expects even x-value spacing).
- label: String with the series' label in the legend. The chart will only
- have a legend if at least one series has a label. If some series
- do not have a label then they will have an empty description in
- the legend. This is currently a limitation in the Google Chart
- API.
- style: A chart-type-specific style object. (LineStyle for LineChart,
- BarsStyle for BarChart, etc.)
- markers: List of (x, m) tuples where m is a Marker object and x is the
- x-axis value to place it at.
-
- The "fill" markers ('r' & 'b') are a little weird because they
- aren't a point on a line. For these, you can fake it by
- passing slightly weird data (I'd like a better API for them at
- some point):
- For 'b', you attach the marker to the starting series, and set x
- to the index of the ending line. Size is ignored, I think.
-
- For 'r', you can attach to any line, specify the starting
- y-value for x and the ending y-value for size. Y, in this case,
- is becase 0.0 (bottom) and 1.0 (top).
- color: DEPRECATED
- """
-
- # TODO: Should we require the points list to be non-empty ?
- # TODO: Do markers belong here? They are really only used for LineCharts
- def __init__(self, points, label=None, style=None, markers=None, color=None):
- """Construct a DataSeries. See class docstring for details on args."""
- if label is not None and util._IsColor(label):
- warnings.warn('Your code may be broken! Label is a hex triplet. Maybe '
- 'it is a color? The old argument order (color & style '
- 'before label) is deprecated.', DeprecationWarning,
- stacklevel=2)
- if color is not None:
- warnings.warn('Passing color is deprecated. Pass a style object '
- 'instead.', DeprecationWarning, stacklevel=2)
- # Attempt to fix it for them. If they also passed a style, honor it.
- if style is None:
- style = _BasicStyle(color)
- if style is not None and isinstance(style, basestring):
- warnings.warn('Your code is broken! Style is a string, not an object. '
- 'Maybe you are passing a color? Passing color is '
- 'deprecated; pass a style object instead.',
- DeprecationWarning, stacklevel=2)
- if style is None:
- style = _BasicStyle(None)
- self.data = points
- self.style = style
- self.markers = markers or []
- self.label = label
-
- def _GetColor(self):
- warnings.warn('DataSeries.color is deprecated, use '
- 'DataSeries.style.color instead.', DeprecationWarning,
- stacklevel=2)
- return self.style.color
-
- def _SetColor(self, color):
- warnings.warn('DataSeries.color is deprecated, use '
- 'DataSeries.style.color instead.', DeprecationWarning,
- stacklevel=2)
- self.style.color = color
-
- color = property(_GetColor, _SetColor)
-
-
-class AxisPosition(object):
- """Represents all the available axis positions.
-
- The available positions are as follows:
- AxisPosition.TOP
- AxisPosition.BOTTOM
- AxisPosition.LEFT
- AxisPosition.RIGHT
- """
- LEFT = 'y'
- RIGHT = 'r'
- BOTTOM = 'x'
- TOP = 't'
-
-
-class Axis(object):
-
- """Represents one axis.
-
- Object setings:
- min: Minimum value for the bottom or left end of the axis
- max: Max value.
- labels: List of labels to show along the axis.
- label_positions: List of positions to show the labels at. Uses the scale
- set by min & max, so if you set min = 0 and max = 10, then
- label positions [0, 5, 10] would be at the bottom,
- middle, and top of the axis, respectively.
- grid_spacing: Amount of space between gridlines (in min/max scale).
- A value of 0 disables gridlines.
- label_gridlines: If True, draw a line extending from each label
- on the axis all the way across the chart.
- """
-
- def __init__(self, axis_min=None, axis_max=None):
- """Construct a new Axis.
-
- Args:
- axis_min: smallest value on the axis
- axis_max: largest value on the axis
- """
- self.min = axis_min
- self.max = axis_max
- self.labels = []
- self.label_positions = []
- self.grid_spacing = 0
- self.label_gridlines = False
-
-# TODO: Add other chart types. Order of preference:
-# - scatter plots
-# - us/world maps
-
-class BaseChart(object):
- """Base chart object with standard behavior for all other charts.
-
- Object attributes:
- data: List of DataSeries objects. Chart subtypes provide convenience
- functions (like AddLine, AddBars, AddSegment) to add more series
- later.
- left/right/bottom/top: Axis objects for the 4 different axes.
- formatters: A list of callables which will be used to format this chart for
- display. TODO: Need better documentation for how these
- work.
- auto_scale, auto_color, auto_legend:
- These aliases let users access the default formatters without poking
- around in self.formatters. If the user removes them from
- self.formatters then they will no longer be enabled, even though they'll
- still be accessible through the aliases. Similarly, re-assigning the
- aliases has no effect on the contents of self.formatters.
- display: This variable is reserved for backends to populate with a display
- object. The intention is that the display object would be used to
- render this chart. The details of what gets put here depends on
- the specific backend you are using.
- """
-
- # Canonical ordering of position keys
- _POSITION_CODES = 'yrxt'
-
- # TODO: Add more inline args to __init__ (esp. labels).
- # TODO: Support multiple series in the constructor, if given.
- def __init__(self):
- """Construct a BaseChart object."""
- self.data = []
-
- self._axes = {}
- for code in self._POSITION_CODES:
- self._axes[code] = [Axis()]
- self._legend_labels = [] # AutoLegend fills this out
- self._show_legend = False # AutoLegend fills this out
-
- # Aliases for default formatters
- self.auto_color = formatters.AutoColor()
- self.auto_scale = formatters.AutoScale()
- self.auto_legend = formatters.AutoLegend
- self.formatters = [self.auto_color, self.auto_scale, self.auto_legend]
- # display is used to convert the chart into something displayable (like a
- # url or img tag).
- self.display = None
-
- def AddFormatter(self, formatter):
- """Add a new formatter to the chart (convenience method)."""
- self.formatters.append(formatter)
-
- def AddSeries(self, points, color=None, style=None, markers=None,
- label=None):
- """DEPRECATED
-
- Add a new series of data to the chart; return the DataSeries object."""
- warnings.warn('AddSeries is deprecated. Instead, call AddLine for '
- 'LineCharts, AddBars for BarCharts, AddSegment for '
- 'PieCharts ', DeprecationWarning, stacklevel=2)
- series = DataSeries(points, color=color, style=style, markers=markers,
- label=label)
- self.data.append(series)
- return series
-
- def GetDependentAxes(self):
- """Return any dependent axes ('left' and 'right' by default for LineCharts,
- although bar charts would use 'bottom' and 'top').
- """
- return self._axes[AxisPosition.LEFT] + self._axes[AxisPosition.RIGHT]
-
- def GetIndependentAxes(self):
- """Return any independent axes (normally top & bottom, although horizontal
- bar charts use left & right by default).
- """
- return self._axes[AxisPosition.TOP] + self._axes[AxisPosition.BOTTOM]
-
- def GetDependentAxis(self):
- """Return this chart's main dependent axis (often 'left', but
- horizontal bar-charts use 'bottom').
- """
- return self.left
-
- def GetIndependentAxis(self):
- """Return this chart's main independent axis (often 'bottom', but
- horizontal bar-charts use 'left').
- """
- return self.bottom
-
- def _Clone(self):
- """Make a deep copy this chart.
-
- Formatters & display will be missing from the copy, due to limitations in
- deepcopy.
- """
- orig_values = {}
- # Things which deepcopy will likely choke on if it tries to copy.
- uncopyables = ['formatters', 'display', 'auto_color', 'auto_scale',
- 'auto_legend']
- for name in uncopyables:
- orig_values[name] = getattr(self, name)
- setattr(self, name, None)
- clone = copy.deepcopy(self)
- for name, orig_value in orig_values.iteritems():
- setattr(self, name, orig_value)
- return clone
-
- def GetFormattedChart(self):
- """Get a copy of the chart with formatting applied."""
- # Formatters need to mutate the chart, but we don't want to change it out
- # from under the user. So, we work on a copy of the chart.
- scratchpad = self._Clone()
- for formatter in self.formatters:
- formatter(scratchpad)
- return scratchpad
-
- def GetMinMaxValues(self):
- """Get the largest & smallest values in this chart, returned as
- (min_value, max_value). Takes into account complciations like stacked data
- series.
-
- For example, with non-stacked series, a chart with [1, 2, 3] and [4, 5, 6]
- would return (1, 6). If the same chart was stacking the data series, it
- would return (5, 9).
- """
- MinPoint = lambda data: min(x for x in data if x is not None)
- MaxPoint = lambda data: max(x for x in data if x is not None)
- mins = [MinPoint(series.data) for series in self.data if series.data]
- maxes = [MaxPoint(series.data) for series in self.data if series.data]
- if not mins or not maxes:
- return None, None # No data, just bail.
- return min(mins), max(maxes)
-
- def AddAxis(self, position, axis):
- """Add an axis to this chart in the given position.
-
- Args:
- position: an AxisPosition object specifying the axis's position
- axis: The axis to add, an Axis object
- Returns:
- the value of the axis parameter
- """
- self._axes.setdefault(position, []).append(axis)
- return axis
-
- def GetAxis(self, position):
- """Get or create the first available axis in the given position.
-
- This is a helper method for the left, right, top, and bottom properties.
- If the specified axis does not exist, it will be created.
-
- Args:
- position: the position to search for
- Returns:
- The first axis in the given position
- """
- # Not using setdefault here just in case, to avoid calling the Axis()
- # constructor needlessly
- if position in self._axes:
- return self._axes[position][0]
- else:
- axis = Axis()
- self._axes[position] = [axis]
- return axis
-
- def SetAxis(self, position, axis):
- """Set the first axis in the given position to the given value.
-
- This is a helper method for the left, right, top, and bottom properties.
-
- Args:
- position: an AxisPosition object specifying the axis's position
- axis: The axis to set, an Axis object
- Returns:
- the value of the axis parameter
- """
- self._axes.setdefault(position, [None])[0] = axis
- return axis
-
- def _GetAxes(self):
- """Return a generator of (position_code, Axis) tuples for this chart's axes.
-
- The axes will be sorted by position using the canonical ordering sequence,
- _POSITION_CODES.
- """
- for code in self._POSITION_CODES:
- for axis in self._axes.get(code, []):
- yield (code, axis)
-
- def _GetBottom(self):
- return self.GetAxis(AxisPosition.BOTTOM)
-
- def _SetBottom(self, value):
- self.SetAxis(AxisPosition.BOTTOM, value)
-
- bottom = property(_GetBottom, _SetBottom,
- doc="""Get or set the bottom axis""")
-
- def _GetLeft(self):
- return self.GetAxis(AxisPosition.LEFT)
-
- def _SetLeft(self, value):
- self.SetAxis(AxisPosition.LEFT, value)
-
- left = property(_GetLeft, _SetLeft,
- doc="""Get or set the left axis""")
-
- def _GetRight(self):
- return self.GetAxis(AxisPosition.RIGHT)
-
- def _SetRight(self, value):
- self.SetAxis(AxisPosition.RIGHT, value)
-
- right = property(_GetRight, _SetRight,
- doc="""Get or set the right axis""")
-
- def _GetTop(self):
- return self.GetAxis(AxisPosition.TOP)
-
- def _SetTop(self, value):
- self.SetAxis(AxisPosition.TOP, value)
-
- top = property(_GetTop, _SetTop,
- doc="""Get or set the top axis""")
diff --git a/mapreduce/lib/graphy/formatters.py b/mapreduce/lib/graphy/formatters.py
deleted file mode 100755
index 1e8be20..0000000
--- a/mapreduce/lib/graphy/formatters.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""This module contains various formatters which can help format a chart
-object. To use these, add them to your chart's list of formatters. For
-example:
- chart.formatters.append(InlineLegend)
- chart.formatters.append(LabelSeparator(right=8))
-
-Feel free to write your own formatter. Formatters are just callables that
-modify the chart in some (hopefully useful) way. For example, the AutoColor
-formatter makes sure each DataSeries has a color applied to it. The formatter
-should take the chart to format as its only argument.
-
-(The formatters work on a deepcopy of the user's chart, so modifications
-shouldn't leak back into the user's original chart)
-"""
-
-def AutoLegend(chart):
- """Automatically fill out the legend based on series labels. This will only
- fill out the legend if is at least one series with a label.
- """
- chart._show_legend = False
- labels = []
- for series in chart.data:
- if series.label is None:
- labels.append('')
- else:
- labels.append(series.label)
- chart._show_legend = True
- if chart._show_legend:
- chart._legend_labels = labels
-
-
-class AutoColor(object):
- """Automatically add colors to any series without colors.
-
- Object attributes:
- colors: The list of colors (hex strings) to cycle through. You can modify
- this list if you don't like the default colors.
- """
- def __init__(self):
- # TODO: Add a few more default colors.
- # TODO: Add a default styles too, so if you don't specify color or
- # style, you get a unique set of colors & styles for your data.
- self.colors = ['0000ff', 'ff0000', '00dd00', '000000']
-
- def __call__(self, chart):
- index = -1
- for series in chart.data:
- if series.style.color is None:
- index += 1
- if index >= len(self.colors):
- index = 0
- series.style.color = self.colors[index]
-
-
-class AutoScale(object):
- """If you don't set min/max on the dependent axes, this fills them in
- automatically by calculating min/max dynamically from the data.
-
- You can set just min or just max and this formatter will fill in the other
- value for you automatically. For example, if you only set min then this will
- set max automatically, but leave min untouched.
-
- Charts can have multiple dependent axes (chart.left & chart.right, for
- example.) If you set min/max on some axes but not others, then this formatter
- copies your min/max to the un-set axes. For example, if you set up min/max on
- only the right axis then your values will be automatically copied to the left
- axis. (if you use different min/max values for different axes, the
- precendence is undefined. So don't do that.)
- """
-
- def __init__(self, buffer=0.05):
- """Create a new AutoScale formatter.
-
- Args:
- buffer: percentage of extra space to allocate around the chart's axes.
- """
- self.buffer = buffer
-
- def __call__(self, chart):
- """Format the chart by setting the min/max values on its dependent axis."""
- if not chart.data:
- return # Nothing to do.
- min_value, max_value = chart.GetMinMaxValues()
- if None in (min_value, max_value):
- return # No data. Nothing to do.
-
- # Honor user's choice, if they've picked min/max.
- for axis in chart.GetDependentAxes():
- if axis.min is not None:
- min_value = axis.min
- if axis.max is not None:
- max_value = axis.max
-
- buffer = (max_value - min_value) * self.buffer # Stay away from edge.
-
- for axis in chart.GetDependentAxes():
- if axis.min is None:
- axis.min = min_value - buffer
- if axis.max is None:
- axis.max = max_value + buffer
-
-
-class LabelSeparator(object):
-
- """Adjust the label positions to avoid having them overlap. This happens for
- any axis with minimum_label_spacing set.
- """
-
- def __init__(self, left=None, right=None, bottom=None):
- self.left = left
- self.right = right
- self.bottom = bottom
-
- def __call__(self, chart):
- self.AdjustLabels(chart.left, self.left)
- self.AdjustLabels(chart.right, self.right)
- self.AdjustLabels(chart.bottom, self.bottom)
-
- def AdjustLabels(self, axis, minimum_label_spacing):
- if minimum_label_spacing is None:
- return
- if len(axis.labels) <= 1: # Nothing to adjust
- return
- if axis.max is not None and axis.min is not None:
- # Find the spacing required to fit all labels evenly.
- # Don't try to push them farther apart than that.
- maximum_possible_spacing = (axis.max - axis.min) / (len(axis.labels) - 1)
- if minimum_label_spacing > maximum_possible_spacing:
- minimum_label_spacing = maximum_possible_spacing
-
- labels = [list(x) for x in zip(axis.label_positions, axis.labels)]
- labels = sorted(labels, reverse=True)
-
- # First pass from the top, moving colliding labels downward
- for i in range(1, len(labels)):
- if labels[i - 1][0] - labels[i][0] < minimum_label_spacing:
- new_position = labels[i - 1][0] - minimum_label_spacing
- if axis.min is not None and new_position < axis.min:
- new_position = axis.min
- labels[i][0] = new_position
-
- # Second pass from the bottom, moving colliding labels upward
- for i in range(len(labels) - 2, -1, -1):
- if labels[i][0] - labels[i + 1][0] < minimum_label_spacing:
- new_position = labels[i + 1][0] + minimum_label_spacing
- if axis.max is not None and new_position > axis.max:
- new_position = axis.max
- labels[i][0] = new_position
-
- # Separate positions and labels
- label_positions, labels = zip(*labels)
- axis.labels = labels
- axis.label_positions = label_positions
-
-
-def InlineLegend(chart):
- """Provide a legend for line charts by attaching labels to the right
- end of each line. Supresses the regular legend.
- """
- show = False
- labels = []
- label_positions = []
- for series in chart.data:
- if series.label is None:
- labels.append('')
- else:
- labels.append(series.label)
- show = True
- label_positions.append(series.data[-1])
-
- if show:
- chart.right.min = chart.left.min
- chart.right.max = chart.left.max
- chart.right.labels = labels
- chart.right.label_positions = label_positions
- chart._show_legend = False # Supress the regular legend.
diff --git a/mapreduce/lib/graphy/line_chart.py b/mapreduce/lib/graphy/line_chart.py
deleted file mode 100755
index 37bf700..0000000
--- a/mapreduce/lib/graphy/line_chart.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Code related to line charts."""
-
-import copy
-import warnings
-
-from mapreduce.lib.graphy import common
-
-
-class LineStyle(object):
-
- """Represents the style for a line on a line chart. Also provides some
- convenient presets.
-
- Object attributes (Passed directly to the Google Chart API. Check there for
- details):
- width: Width of the line
- on: Length of a line segment (for dashed/dotted lines)
- off: Length of a break (for dashed/dotted lines)
- color: Color of the line. A hex string, like 'ff0000' for red. Optional,
- AutoColor will fill this in for you automatically if empty.
-
- Some common styles, such as LineStyle.dashed, are available:
- solid
- dashed
- dotted
- thick_solid
- thick_dashed
- thick_dotted
- """
-
- # Widths
- THIN = 1
- THICK = 2
-
- # Patterns
- # ((on, off) tuples, as passed to LineChart.AddLine)
- SOLID = (1, 0)
- DASHED = (8, 4)
- DOTTED = (2, 4)
-
- def __init__(self, width, on, off, color=None):
- """Construct a LineStyle. See class docstring for details on args."""
- self.width = width
- self.on = on
- self.off = off
- self.color = color
-
-
-LineStyle.solid = LineStyle(1, 1, 0)
-LineStyle.dashed = LineStyle(1, 8, 4)
-LineStyle.dotted = LineStyle(1, 2, 4)
-LineStyle.thick_solid = LineStyle(2, 1, 0)
-LineStyle.thick_dashed = LineStyle(2, 8, 4)
-LineStyle.thick_dotted = LineStyle(2, 2, 4)
-
-
-class LineChart(common.BaseChart):
-
- """Represents a line chart."""
-
- def __init__(self, points=None):
- super(LineChart, self).__init__()
- if points is not None:
- self.AddLine(points)
-
- def AddLine(self, points, label=None, color=None,
- pattern=LineStyle.SOLID, width=LineStyle.THIN, markers=None):
- """Add a new line to the chart.
-
- This is a convenience method which constructs the DataSeries and appends it
- for you. It returns the new series.
-
- points: List of equally-spaced y-values for the line
- label: Name of the line (used for the legend)
- color: Hex string, like 'ff0000' for red
- pattern: Tuple for (length of segment, length of gap). i.e.
- LineStyle.DASHED
- width: Width of the line (i.e. LineStyle.THIN)
- markers: List of Marker objects to attach to this line (see DataSeries
- for more info)
- """
- if color is not None and isinstance(color[0], common.Marker):
- warnings.warn('Your code may be broken! '
- 'You passed a list of Markers instead of a color. The '
- 'old argument order (markers before color) is deprecated.',
- DeprecationWarning, stacklevel=2)
- style = LineStyle(width, pattern[0], pattern[1], color=color)
- series = common.DataSeries(points, label=label, style=style,
- markers=markers)
- self.data.append(series)
- return series
-
- def AddSeries(self, points, color=None, style=LineStyle.solid, markers=None,
- label=None):
- """DEPRECATED"""
- warnings.warn('LineChart.AddSeries is deprecated. Call AddLine instead. ',
- DeprecationWarning, stacklevel=2)
- return self.AddLine(points, color=color, width=style.width,
- pattern=(style.on, style.off), markers=markers,
- label=label)
-
-
-class Sparkline(LineChart):
- """Represent a sparkline. These behave like LineCharts,
- mostly, but come without axes.
- """
diff --git a/mapreduce/lib/graphy/pie_chart.py b/mapreduce/lib/graphy/pie_chart.py
deleted file mode 100755
index 5ec3418..0000000
--- a/mapreduce/lib/graphy/pie_chart.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Code for pie charts."""
-
-import warnings
-
-from mapreduce.lib.graphy import common
-from mapreduce.lib.graphy import util
-
-
-class Segment(common.DataSeries):
- """A single segment of the pie chart.
-
- Object attributes:
- size: relative size of the segment
- label: label of the segment (if any)
- color: color of the segment (if any)
- """
- def __init__(self, size, label=None, color=None):
- if label is not None and util._IsColor(label):
- warnings.warn('Your code may be broken! '
- 'Label looks like a hex triplet; it might be a color. '
- 'The old argument order (color before label) is '
- 'deprecated.',
- DeprecationWarning, stacklevel=2)
- style = common._BasicStyle(color)
- super(Segment, self).__init__([size], label=label, style=style)
- assert size >= 0
-
- def _GetSize(self):
- return self.data[0]
-
- def _SetSize(self, value):
- assert value >= 0
- self.data[0] = value
-
- size = property(_GetSize, _SetSize,
- doc = """The relative size of this pie segment.""")
-
- # Since Segments are so simple, provide color for convenience.
- def _GetColor(self):
- return self.style.color
-
- def _SetColor(self, color):
- self.style.color = color
-
- color = property(_GetColor, _SetColor,
- doc = """The color of this pie segment.""")
-
-
-class PieChart(common.BaseChart):
- """Represents a pie chart.
-
- The pie chart consists of a single "pie" by default, but additional pies
- may be added using the AddPie method. The Google Chart API will display
- the pies as concentric circles, with pie #0 on the inside; other backends
- may display the pies differently.
- """
-
- def __init__(self, points=None, labels=None, colors=None):
- """Constructor for PieChart objects.
-
- Creates a pie chart with a single pie.
-
- Args:
- points: A list of data points for the pie chart;
- i.e., relative sizes of the pie segments
- labels: A list of labels for the pie segments.
- TODO: Allow the user to pass in None as one of
- the labels in order to skip that label.
- colors: A list of colors for the pie segments, as hex strings
- (f.ex. '0000ff' for blue). If there are less colors than pie
- segments, the Google Chart API will attempt to produce a smooth
- color transition between segments by spreading the colors across
- them.
- """
- super(PieChart, self).__init__()
- self.formatters = []
- self._colors = None
- if points:
- self.AddPie(points, labels, colors)
-
- def AddPie(self, points, labels=None, colors=None):
- """Add a whole pie to the chart.
-
- Args:
- points: A list of pie segment sizes
- labels: A list of labels for the pie segments
- colors: A list of colors for the segments. Missing colors will be chosen
- automatically.
- Return:
- The index of the newly added pie.
- """
- num_colors = len(colors or [])
- num_labels = len(labels or [])
- pie_index = len(self.data)
- self.data.append([])
- for i, pt in enumerate(points):
- label = None
- if i < num_labels:
- label = labels[i]
- color = None
- if i < num_colors:
- color = colors[i]
- self.AddSegment(pt, label=label, color=color, pie_index=pie_index)
- return pie_index
-
- def AddSegments(self, points, labels, colors):
- """DEPRECATED."""
- warnings.warn('PieChart.AddSegments is deprecated. Call AddPie instead. ',
- DeprecationWarning, stacklevel=2)
- num_colors = len(colors or [])
- for i, pt in enumerate(points):
- assert pt >= 0
- label = labels[i]
- color = None
- if i < num_colors:
- color = colors[i]
- self.AddSegment(pt, label=label, color=color)
-
- def AddSegment(self, size, label=None, color=None, pie_index=0):
- """Add a pie segment to this chart, and return the segment.
-
- size: The size of the segment.
- label: The label for the segment.
- color: The color of the segment, or None to automatically choose the color.
- pie_index: The index of the pie that will receive the new segment.
- By default, the chart has one pie (pie #0); use the AddPie method to
- add more pies.
- """
- if isinstance(size, Segment):
- warnings.warn("AddSegment(segment) is deprecated. Use AddSegment(size, "
- "label, color) instead", DeprecationWarning, stacklevel=2)
- segment = size
- else:
- segment = Segment(size, label=label, color=color)
- assert segment.size >= 0
- if pie_index == 0 and not self.data:
- # Create the default pie
- self.data.append([])
- assert (pie_index >= 0 and pie_index < len(self.data))
- self.data[pie_index].append(segment)
- return segment
-
- def AddSeries(self, points, color=None, style=None, markers=None, label=None):
- """DEPRECATED
-
- Add a new segment to the chart and return it.
-
- The segment must contain exactly one data point; all parameters
- other than color and label are ignored.
- """
- warnings.warn('PieChart.AddSeries is deprecated. Call AddSegment or '
- 'AddSegments instead.', DeprecationWarning)
- return self.AddSegment(Segment(points[0], color=color, label=label))
-
- def SetColors(self, *colors):
- """Change the colors of this chart to the specified list of colors.
-
- Note that this will completely override the individual colors specified
- in the pie segments. Missing colors will be interpolated, so that the
- list of colors covers all segments in all the pies.
- """
- self._colors = colors
diff --git a/mapreduce/lib/graphy/util.py b/mapreduce/lib/graphy/util.py
deleted file mode 100755
index ca4b7ad..0000000
--- a/mapreduce/lib/graphy/util.py
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python
-def _IsColor(color):
- """Try to determine if color is a hex color string.
- Labels that look like hex colors will match too, unfortunately."""
- if not isinstance(color, basestring):
- return False
- color = color.strip('#')
- if len(color) != 3 and len(color) != 6:
- return False
- hex_letters = '0123456789abcdefABCDEF'
- for letter in color:
- if letter not in hex_letters:
- return False
- return True
diff --git a/mapreduce/lib/key_range/__init__.py b/mapreduce/lib/key_range/__init__.py
deleted file mode 100755
index b62f9af..0000000
--- a/mapreduce/lib/key_range/__init__.py
+++ /dev/null
@@ -1,687 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-
-
-
-
-
-"""Key range representation and splitting."""
-
-
-import os
-
-
-try:
- from mapreduce.lib import simplejson
-except ImportError:
- simplejson = None
-
-from google.appengine.api import datastore
-from google.appengine.api import namespace_manager
-from google.appengine.datastore import datastore_pb
-from google.appengine.ext import db
-
-
-class Error(Exception):
- """Base class for exceptions in this module."""
-
-
-class KeyRangeError(Error):
- """Error while trying to generate a KeyRange."""
-
-
-class SimplejsonUnavailableError(Error):
- """Error while using json functionality whith unavailable simplejson."""
-
-
-class KeyRange(object):
- """Represents a range of keys in the datastore.
-
- A KeyRange object represents a key range
- (key_start, include_start, key_end, include_end)
- and a scan direction (KeyRange.DESC or KeyRange.ASC).
- """
-
-
- DESC = "DESC"
- ASC = "ASC"
-
- def __init__(self,
- key_start=None,
- key_end=None,
- direction=None,
- include_start=True,
- include_end=True,
- namespace=None,
- _app=None):
- """Initialize a KeyRange object.
-
- Args:
- key_start: The starting key for this range.
- key_end: The ending key for this range.
- direction: The direction of the query for this range.
- include_start: Whether the start key should be included in the range.
- include_end: Whether the end key should be included in the range.
- namespace: The namespace for this range. If None then the current
- namespace is used.
- """
-
-
-
-
- if direction is None:
- direction = KeyRange.ASC
- assert direction in (KeyRange.ASC, KeyRange.DESC)
- self.direction = direction
- self.key_start = key_start
- self.key_end = key_end
- self.include_start = include_start
- self.include_end = include_end
- if namespace is not None:
- self.namespace = namespace
- else:
- self.namespace = namespace_manager.get_namespace()
- self._app = _app
-
- def __str__(self):
- if self.include_start:
- left_side = "["
- else:
- left_side = "("
- if self.include_end:
- right_side = "]"
- else:
- right_side = "("
- return "%s%s%r to %r%s" % (self.direction, left_side, self.key_start,
- self.key_end, right_side)
-
- def __repr__(self):
- return ("key_range.KeyRange(key_start=%r,key_end=%r,direction=%r,"
- "include_start=%r,include_end=%r, namespace=%r)") % (
- self.key_start,
- self.key_end,
- self.direction,
- self.include_start,
- self.include_end,
- self.namespace)
-
- def advance(self, key):
- """Updates the start of the range immediately past the specified key.
-
- Args:
- key: A db.Key.
- """
- self.include_start = False
- self.key_start = key
-
- def filter_query(self, query):
- """Add query filter to restrict to this key range.
-
- Args:
- query: A db.Query instance.
-
- Returns:
- The input query restricted to this key range.
- """
- assert isinstance(query, db.Query)
- if self.include_start:
- start_comparator = ">="
- else:
- start_comparator = ">"
- if self.include_end:
- end_comparator = "<="
- else:
- end_comparator = "<"
- if self.key_start:
- query.filter("__key__ %s" % start_comparator, self.key_start)
- if self.key_end:
- query.filter("__key__ %s" % end_comparator, self.key_end)
- return query
-
- def filter_datastore_query(self, query):
- """Add query filter to restrict to this key range.
-
- Args:
- query: A datastore.Query instance.
-
- Returns:
- The input query restricted to this key range.
- """
- assert isinstance(query, datastore.Query)
- if self.include_start:
- start_comparator = ">="
- else:
- start_comparator = ">"
- if self.include_end:
- end_comparator = "<="
- else:
- end_comparator = "<"
- if self.key_start:
- query.update({"__key__ %s" % start_comparator: self.key_start})
- if self.key_end:
- query.update({"__key__ %s" % end_comparator: self.key_end})
- return query
-
- def __get_direction(self, asc, desc):
- """Check that self.direction is in (KeyRange.ASC, KeyRange.DESC).
-
- Args:
- asc: Argument to return if self.direction is KeyRange.ASC
- desc: Argument to return if self.direction is KeyRange.DESC
-
- Returns:
- asc or desc appropriately
-
- Raises:
- KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
- """
- if self.direction == KeyRange.ASC:
- return asc
- elif self.direction == KeyRange.DESC:
- return desc
- else:
- raise KeyRangeError("KeyRange direction unexpected: %s", self.direction)
-
- def make_directed_query(self, kind_class, keys_only=False):
- """Construct a query for this key range, including the scan direction.
-
- Args:
- kind_class: A kind implementation class.
- keys_only: bool, default False, use keys_only on Query?
-
- Returns:
- A db.Query instance.
-
- Raises:
- KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
- """
- assert self._app is None, '_app is not supported for db.Query'
- direction = self.__get_direction("", "-")
- query = db.Query(kind_class, namespace=self.namespace, keys_only=keys_only)
- query.order("%s__key__" % direction)
-
- query = self.filter_query(query)
- return query
-
- def make_directed_datastore_query(self, kind, keys_only=False):
- """Construct a query for this key range, including the scan direction.
-
- Args:
- kind: A string.
- keys_only: bool, default False, use keys_only on Query?
-
- Returns:
- A datastore.Query instance.
-
- Raises:
- KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC).
- """
- direction = self.__get_direction(datastore.Query.ASCENDING,
- datastore.Query.DESCENDING)
- query = datastore.Query(kind, _app=self._app, keys_only=keys_only)
- query.Order(("__key__", direction))
-
- query = self.filter_datastore_query(query)
- return query
-
- def make_ascending_query(self, kind_class, keys_only=False):
- """Construct a query for this key range without setting the scan direction.
-
- Args:
- kind_class: A kind implementation class.
- keys_only: bool, default False, query only for keys.
-
- Returns:
- A db.Query instance.
- """
- assert self._app is None, '_app is not supported for db.Query'
- query = db.Query(kind_class, namespace=self.namespace, keys_only=keys_only)
- query.order("__key__")
-
- query = self.filter_query(query)
- return query
-
- def make_ascending_datastore_query(self, kind, keys_only=False):
- """Construct a query for this key range without setting the scan direction.
-
- Args:
- kind: A string.
- keys_only: bool, default False, use keys_only on Query?
-
- Returns:
- A datastore.Query instance.
- """
- query = datastore.Query(kind,
- namespace=self.namespace,
- _app=self._app,
- keys_only=keys_only)
- query.Order(("__key__", datastore.Query.ASCENDING))
-
- query = self.filter_datastore_query(query)
- return query
-
- def split_range(self, batch_size=0):
- """Split this key range into a list of at most two ranges.
-
- This method attempts to split the key range approximately in half.
- Numeric ranges are split in the middle into two equal ranges and
- string ranges are split lexicographically in the middle. If the
- key range is smaller than batch_size it is left unsplit.
-
- Note that splitting is done without knowledge of the distribution
- of actual entities in the key range, so there is no guarantee (nor
- any particular reason to believe) that the entities of the range
- are evenly split.
-
- Args:
- batch_size: The maximum size of a key range that should not be split.
-
- Returns:
- A list of one or two key ranges covering the same space as this range.
- """
- key_start = self.key_start
- key_end = self.key_end
- include_start = self.include_start
- include_end = self.include_end
-
- key_pairs = []
- if not key_start:
- key_pairs.append((key_start, include_start, key_end, include_end,
- KeyRange.ASC))
- elif not key_end:
- key_pairs.append((key_start, include_start, key_end, include_end,
- KeyRange.DESC))
- else:
- key_split = KeyRange.split_keys(key_start, key_end, batch_size)
- first_include_end = True
-
- if key_split == key_start:
- first_include_end = first_include_end and include_start
-
- key_pairs.append((key_start, include_start,
- key_split, first_include_end,
- KeyRange.DESC))
-
- second_include_end = include_end
-
- if key_split == key_end:
- second_include_end = False
- key_pairs.append((key_split, False,
- key_end, second_include_end,
- KeyRange.ASC))
-
- ranges = [KeyRange(key_start=start,
- include_start=include_start,
- key_end=end,
- include_end=include_end,
- direction=direction,
- namespace=self.namespace,
- _app=self._app)
- for (start, include_start, end, include_end, direction)
- in key_pairs]
-
- return ranges
-
- def __hash__(self):
- return hash([self.key_start,
- self.key_end,
- self.direction,
- self._app,
- self.namespace])
-
- def __cmp__(self, other):
- """Compare two key ranges.
-
- Key ranges with a value of None for key_start or key_end, are always
- considered to have include_start=False or include_end=False, respectively,
- when comparing. Since None indicates an unbounded side of the range,
- the include specifier is meaningless. The ordering generated is total
- but somewhat arbitrary.
-
- Args:
- other: An object to compare to this one.
-
- Returns:
- -1: if this key range is less than other.
- 0: if this key range is equal to other.
- 1: if this key range is greater than other.
- """
- if not isinstance(other, KeyRange):
- return 1
-
- self_list = [self.key_start, self.key_end, self.direction,
- self.include_start, self.include_end, self._app,
- self.namespace]
- if not self.key_start:
- self_list[3] = False
- if not self.key_end:
- self_list[4] = False
-
- other_list = [other.key_start,
- other.key_end,
- other.direction,
- other.include_start,
- other.include_end,
- other._app,
- other.namespace]
- if not other.key_start:
- other_list[3] = False
- if not other.key_end:
- other_list[4] = False
-
- return cmp(self_list, other_list)
-
- @staticmethod
- def bisect_string_range(start, end):
- """Returns a string that is approximately in the middle of the range.
-
- (start, end) is treated as a string range, and it is assumed
- start <= end in the usual lexicographic string ordering. The output key
- mid is guaranteed to satisfy start <= mid <= end.
-
- The method proceeds by comparing initial characters of start and
- end. When the characters are equal, they are appended to the mid
- string. In the first place that the characters differ, the
- difference characters are averaged and this average is appended to
- the mid string. If averaging resulted in rounding down, and
- additional character is added to the mid string to make up for the
- rounding down. This extra step is necessary for correctness in
- the case that the average of the two characters is equal to the
- character in the start string.
-
- This method makes the assumption that most keys are ascii and it
- attempts to perform splitting within the ascii range when that
- results in a valid split.
-
- Args:
- start: A string.
- end: A string such that start <= end.
-
- Returns:
- A string mid such that start <= mid <= end.
- """
- if start == end:
- return start
- start += "\0"
- end += "\0"
- midpoint = []
-
-
- expected_max = 127
- for i in xrange(min(len(start), len(end))):
- if start[i] == end[i]:
- midpoint.append(start[i])
- else:
- ord_sum = ord(start[i]) + ord(end[i])
- midpoint.append(unichr(ord_sum / 2))
- if ord_sum % 2:
- if len(start) > i + 1:
- ord_start = ord(start[i+1])
- else:
- ord_start = 0
- if ord_start < expected_max:
-
-
- ord_split = (expected_max + ord_start) / 2
- else:
-
- ord_split = (0xFFFF + ord_start) / 2
- midpoint.append(unichr(ord_split))
- break
- return "".join(midpoint)
-
- @staticmethod
- def split_keys(key_start, key_end, batch_size):
- """Return a key that is between key_start and key_end inclusive.
-
- This method compares components of the ancestor paths of key_start
- and key_end. The first place in the path that differs is
- approximately split in half. If the kind components differ, a new
- non-existent kind halfway between the two is used to split the
- space. If the id_or_name components differ, then a new id_or_name
- that is halfway between the two is selected. If the lower
- id_or_name is numeric and the upper id_or_name is a string, then
- the minumum string key u'\0' is used as the split id_or_name. The
- key that is returned is the shared portion of the ancestor path
- followed by the generated split component.
-
- Args:
- key_start: A db.Key instance for the lower end of a range.
- key_end: A db.Key instance for the upper end of a range.
- batch_size: The maximum size of a range that should not be split.
-
- Returns:
- A db.Key instance, k, such that key_start <= k <= key_end.
- """
- assert key_start.app() == key_end.app()
- assert key_start.namespace() == key_end.namespace()
- path1 = key_start.to_path()
- path2 = key_end.to_path()
- len1 = len(path1)
- len2 = len(path2)
- assert len1 % 2 == 0
- assert len2 % 2 == 0
- out_path = []
- min_path_len = min(len1, len2) / 2
- for i in xrange(min_path_len):
- kind1 = path1[2*i]
- kind2 = path2[2*i]
-
- if kind1 != kind2:
- split_kind = KeyRange.bisect_string_range(kind1, kind2)
- out_path.append(split_kind)
- out_path.append(unichr(0))
- break
-
-
-
-
- last = (len1 == len2 == 2*(i + 1))
-
- id_or_name1 = path1[2*i + 1]
- id_or_name2 = path2[2*i + 1]
- id_or_name_split = KeyRange._split_id_or_name(
- id_or_name1, id_or_name2, batch_size, last)
- if id_or_name1 == id_or_name_split:
- out_path.append(kind1)
- out_path.append(id_or_name1)
- else:
- out_path.append(kind1)
- out_path.append(id_or_name_split)
- break
-
- return db.Key.from_path(
- *out_path,
- **{"_app": key_start.app(), "namespace": key_start.namespace()})
-
- @staticmethod
- def _split_id_or_name(id_or_name1, id_or_name2, batch_size, maintain_batches):
- """Return an id_or_name that is between id_or_name1 an id_or_name2.
-
- Attempts to split the range [id_or_name1, id_or_name2] in half,
- unless maintain_batches is true and the size of the range
- [id_or_name1, id_or_name2] is less than or equal to batch_size.
-
- Args:
- id_or_name1: A number or string or the id_or_name component of a key
- id_or_name2: A number or string or the id_or_name component of a key
- batch_size: The range size that will not be split if maintain_batches
- is true.
- maintain_batches: A boolean for whether to keep small ranges intact.
-
- Returns:
- An id_or_name such that id_or_name1 <= id_or_name <= id_or_name2.
- """
- if (isinstance(id_or_name1, (int, long)) and
- isinstance(id_or_name2, (int, long))):
- if not maintain_batches or id_or_name2 - id_or_name1 > batch_size:
- return (id_or_name1 + id_or_name2) / 2
- else:
- return id_or_name1
- elif (isinstance(id_or_name1, basestring) and
- isinstance(id_or_name2, basestring)):
- return KeyRange.bisect_string_range(id_or_name1, id_or_name2)
- else:
- if (not isinstance(id_or_name1, (int, long)) or
- not isinstance(id_or_name2, basestring)):
- raise KeyRangeError("Wrong key order: %r, %r" %
- (id_or_name1, id_or_name2))
-
- zero_ch = unichr(0)
- if id_or_name2 == zero_ch:
- return (id_or_name1 + 2**63 - 1) / 2
- return zero_ch
-
- @staticmethod
- def guess_end_key(kind,
- key_start,
- probe_count=30,
- split_rate=5):
- """Guess the end of a key range with a binary search of probe queries.
-
- When the 'key_start' parameter has a key hierarchy, this function will
- only determine the key range for keys in a similar hierarchy. That means
- if the keys are in the form:
-
- kind=Foo, name=bar/kind=Stuff, name=meep
-
- only this range will be probed:
-
- kind=Foo, name=*/kind=Stuff, name=*
-
- That means other entities of kind 'Stuff' that are children of another
- parent entity kind will be skipped:
-
- kind=Other, name=cookie/kind=Stuff, name=meep
-
- Args:
- key_start: The starting key of the search range. In most cases this
- should be id = 0 or name = '\0'.
- kind: String name of the entity kind.
- probe_count: Optional, how many probe queries to run.
- split_rate: Exponential rate to use for splitting the range on the
- way down from the full key space. For smaller ranges this should
- be higher so more of the keyspace is skipped on initial descent.
-
- Returns:
- datastore.Key that is guaranteed to be as high or higher than the
- highest key existing for this Kind. Doing a query between 'key_start' and
- this returned Key (inclusive) will contain all entities of this Kind.
- """
- app = key_start.app()
- namespace = key_start.namespace()
-
- full_path = key_start.to_path()
- for index, piece in enumerate(full_path):
- if index % 2 == 0:
-
- continue
- elif isinstance(piece, basestring):
-
- full_path[index] = u"\xffff"
- else:
-
- full_path[index] = 2**63 - 1
-
- key_end = datastore.Key.from_path(*full_path,
- **{"_app": app, "namespace": namespace})
- split_key = key_end
-
- for i in xrange(probe_count):
- for j in xrange(split_rate):
- split_key = KeyRange.split_keys(key_start, split_key, 1)
- results = datastore.Query(
- kind,
- {"__key__ >": split_key},
- namespace=namespace,
- _app=app,
- keys_only=True).Get(1)
- if results:
- if results[0].name() and not key_start.name():
-
-
- return KeyRange.guess_end_key(
- kind, results[0], probe_count - 1, split_rate)
- else:
- split_rate = 1
- key_start = results[0]
- split_key = key_end
- else:
- key_end = split_key
-
- return key_end
-
- def to_json(self):
- """Serialize KeyRange to json.
-
- Returns:
- string with KeyRange json representation.
- """
- if simplejson is None:
- raise SimplejsonUnavailableError(
- "JSON functionality requires simplejson to be available")
-
- def key_to_str(key):
- if key:
- return str(key)
- else:
- return None
-
- obj_dict = {
- "direction": self.direction,
- "key_start": key_to_str(self.key_start),
- "key_end": key_to_str(self.key_end),
- "include_start": self.include_start,
- "include_end": self.include_end,
- "namespace": self.namespace,
- }
- if self._app:
- obj_dict["_app"] = self._app
-
- return simplejson.dumps(obj_dict, sort_keys=True)
-
-
- @staticmethod
- def from_json(json_str):
- """Deserialize KeyRange from its json representation.
-
- Args:
- json_str: string with json representation created by key_range_to_json.
-
- Returns:
- deserialized KeyRange instance.
- """
- if simplejson is None:
- raise SimplejsonUnavailableError(
- "JSON functionality requires simplejson to be available")
-
- def key_from_str(key_str):
- if key_str:
- return db.Key(key_str)
- else:
- return None
-
- json = simplejson.loads(json_str)
- return KeyRange(key_from_str(json["key_start"]),
- key_from_str(json["key_end"]),
- json["direction"],
- json["include_start"],
- json["include_end"],
- json.get("namespace"),
- _app=json.get("_app"))
diff --git a/mapreduce/lib/simplejson/README b/mapreduce/lib/simplejson/README
deleted file mode 100755
index 6284258..0000000
--- a/mapreduce/lib/simplejson/README
+++ /dev/null
@@ -1,13 +0,0 @@
-Simplejson library
-
-The web site is http://undefined.org/python/#simple_json
-
-This copy was downloaded from
-http://pypi.python.org/packages/source/s/simplejson/simplejson-2.0.5.tar.gz
-
-simplejson is licensed under the MIT open source license.
-
-Local changes:
-
-- Changed imports to make mapreduce library hermetic.
-
diff --git a/mapreduce/lib/simplejson/__init__.py b/mapreduce/lib/simplejson/__init__.py
deleted file mode 100755
index 83eec57..0000000
--- a/mapreduce/lib/simplejson/__init__.py
+++ /dev/null
@@ -1,314 +0,0 @@
-#!/usr/bin/env python
-r"""A simple, fast, extensible JSON encoder and decoder
-
-JSON (JavaScript Object Notation) is a subset of
-JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
-interchange format.
-
-simplejson exposes an API familiar to uses of the standard library
-marshal and pickle modules.
-
-Encoding basic Python object hierarchies::
-
- >>> import simplejson
- >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
- '["foo", {"bar": ["baz", null, 1.0, 2]}]'
- >>> print simplejson.dumps("\"foo\bar")
- "\"foo\bar"
- >>> print simplejson.dumps(u'\u1234')
- "\u1234"
- >>> print simplejson.dumps('\\')
- "\\"
- >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
- {"a": 0, "b": 0, "c": 0}
- >>> from StringIO import StringIO
- >>> io = StringIO()
- >>> simplejson.dump(['streaming API'], io)
- >>> io.getvalue()
- '["streaming API"]'
-
-Compact encoding::
-
- >>> import simplejson
- >>> compact = simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
- >>> # Can't assume dict ordering
- >>> compact in ('[1,2,3,{"4":5,"6":7}]', '[1,2,3,{"6":7,"4":5}]')
- True
-
-Pretty printing (using repr() because of extraneous whitespace in the output)::
-
- >>> import simplejson
- >>> print repr(simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
- '{\n "4": 5, \n "6": 7\n}'
-
-Decoding JSON::
-
- >>> import simplejson
- >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == ["foo", {"bar":["baz", None, 1.0, 2]}]
- True
- >>> simplejson.loads('"\\"foo\\bar"') == '"foo\x08ar'
- True
- >>> from StringIO import StringIO
- >>> io = StringIO('["streaming API"]')
- >>> simplejson.load(io) == ["streaming API"]
- True
-
-Specializing JSON object decoding::
-
- >>> import simplejson
- >>> def as_complex(dct):
- ... if '__complex__' in dct:
- ... return complex(dct['real'], dct['imag'])
- ... return dct
- ...
- >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}',
- ... object_hook=as_complex)
- (1+2j)
- >>> from decimal import Decimal
- >>> simplejson.loads('1.1', parse_float=Decimal) == Decimal("1.1")
- True
-
-Extending JSONEncoder::
-
- >>> import simplejson
- >>> class ComplexEncoder(simplejson.JSONEncoder):
- ... def default(self, obj):
- ... if isinstance(obj, complex):
- ... return [obj.real, obj.imag]
- ... return simplejson.JSONEncoder.default(self, obj)
- ...
- >>> dumps(2 + 1j, cls=ComplexEncoder)
- '[2.0, 1.0]'
- >>> ComplexEncoder().encode(2 + 1j)
- '[2.0, 1.0]'
- >>> ''.join(ComplexEncoder().iterencode(2 + 1j))
- '[2.0, 1.0]'
-
-
-Using simplejson from the shell to validate and
-pretty-print::
-
- $ echo '{"json":"obj"}' | python -msimplejson.tool
- {
- "json": "obj"
- }
- $ echo '{ 1.2:3.4}' | python -msimplejson.tool
- Expecting property name: line 1 column 2 (char 2)
-"""
-__version__ = '2.0.5'
-__all__ = [
- 'dump', 'dumps', 'load', 'loads',
- 'JSONDecoder', 'JSONEncoder',
-]
-
-from decoder import JSONDecoder
-from encoder import JSONEncoder
-
-_default_encoder = JSONEncoder(
- skipkeys=False,
- ensure_ascii=True,
- check_circular=True,
- allow_nan=True,
- indent=None,
- separators=None,
- encoding='utf-8',
- default=None,
-)
-
-def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
- encoding='utf-8', default=None, **kw):
- """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
- ``.write()``-supporting file-like object).
-
- If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
- (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
- will be skipped instead of raising a ``TypeError``.
-
- If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
- may be ``unicode`` instances, subject to normal Python ``str`` to
- ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
- understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
- to cause an error.
-
- If ``check_circular`` is ``False``, then the circular reference check
- for container types will be skipped and a circular reference will
- result in an ``OverflowError`` (or worse).
-
- If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
- serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
- in strict compliance of the JSON specification, instead of using the
- JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
-
- If ``indent`` is a non-negative integer, then JSON array elements and object
- members will be pretty-printed with that indent level. An indent level
- of 0 will only insert newlines. ``None`` is the most compact representation.
-
- If ``separators`` is an ``(item_separator, dict_separator)`` tuple
- then it will be used instead of the default ``(', ', ': ')`` separators.
- ``(',', ':')`` is the most compact JSON representation.
-
- ``encoding`` is the character encoding for str instances, default is UTF-8.
-
- ``default(obj)`` is a function that should return a serializable version
- of obj or raise TypeError. The default simply raises TypeError.
-
- To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
- ``.default()`` method to serialize additional types), specify it with
- the ``cls`` kwarg.
-
- """
- # cached encoder
- if (skipkeys is False and ensure_ascii is True and
- check_circular is True and allow_nan is True and
- cls is None and indent is None and separators is None and
- encoding == 'utf-8' and default is None and not kw):
- iterable = _default_encoder.iterencode(obj)
- else:
- if cls is None:
- cls = JSONEncoder
- iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
- check_circular=check_circular, allow_nan=allow_nan, indent=indent,
- separators=separators, encoding=encoding,
- default=default, **kw).iterencode(obj)
- # could accelerate with writelines in some versions of Python, at
- # a debuggability cost
- for chunk in iterable:
- fp.write(chunk)
-
-
-def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
- encoding='utf-8', default=None, **kw):
- """Serialize ``obj`` to a JSON formatted ``str``.
-
- If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
- (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
- will be skipped instead of raising a ``TypeError``.
-
- If ``ensure_ascii`` is ``False``, then the return value will be a
- ``unicode`` instance subject to normal Python ``str`` to ``unicode``
- coercion rules instead of being escaped to an ASCII ``str``.
-
- If ``check_circular`` is ``False``, then the circular reference check
- for container types will be skipped and a circular reference will
- result in an ``OverflowError`` (or worse).
-
- If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
- serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
- strict compliance of the JSON specification, instead of using the
- JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
-
- If ``indent`` is a non-negative integer, then JSON array elements and
- object members will be pretty-printed with that indent level. An indent
- level of 0 will only insert newlines. ``None`` is the most compact
- representation.
-
- If ``separators`` is an ``(item_separator, dict_separator)`` tuple
- then it will be used instead of the default ``(', ', ': ')`` separators.
- ``(',', ':')`` is the most compact JSON representation.
-
- ``encoding`` is the character encoding for str instances, default is UTF-8.
-
- ``default(obj)`` is a function that should return a serializable version
- of obj or raise TypeError. The default simply raises TypeError.
-
- To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
- ``.default()`` method to serialize additional types), specify it with
- the ``cls`` kwarg.
-
- """
- # cached encoder
- if (skipkeys is False and ensure_ascii is True and
- check_circular is True and allow_nan is True and
- cls is None and indent is None and separators is None and
- encoding == 'utf-8' and default is None and not kw):
- return _default_encoder.encode(obj)
- if cls is None:
- cls = JSONEncoder
- return cls(
- skipkeys=skipkeys, ensure_ascii=ensure_ascii,
- check_circular=check_circular, allow_nan=allow_nan, indent=indent,
- separators=separators, encoding=encoding, default=default,
- **kw).encode(obj)
-
-
-_default_decoder = JSONDecoder(encoding=None, object_hook=None)
-
-
-def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
- parse_int=None, parse_constant=None, **kw):
- """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
- a JSON document) to a Python object.
-
- If the contents of ``fp`` is encoded with an ASCII based encoding other
- than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
- be specified. Encodings that are not ASCII based (such as UCS-2) are
- not allowed, and should be wrapped with
- ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
- object and passed to ``loads()``
-
- ``object_hook`` is an optional function that will be called with the
- result of any object literal decode (a ``dict``). The return value of
- ``object_hook`` will be used instead of the ``dict``. This feature
- can be used to implement custom decoders (e.g. JSON-RPC class hinting).
-
- To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
- kwarg.
-
- """
- return loads(fp.read(),
- encoding=encoding, cls=cls, object_hook=object_hook,
- parse_float=parse_float, parse_int=parse_int,
- parse_constant=parse_constant, **kw)
-
-
-def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
- parse_int=None, parse_constant=None, **kw):
- """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
- document) to a Python object.
-
- If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
- other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
- must be specified. Encodings that are not ASCII based (such as UCS-2)
- are not allowed and should be decoded to ``unicode`` first.
-
- ``object_hook`` is an optional function that will be called with the
- result of any object literal decode (a ``dict``). The return value of
- ``object_hook`` will be used instead of the ``dict``. This feature
- can be used to implement custom decoders (e.g. JSON-RPC class hinting).
-
- ``parse_float``, if specified, will be called with the string
- of every JSON float to be decoded. By default this is equivalent to
- float(num_str). This can be used to use another datatype or parser
- for JSON floats (e.g. decimal.Decimal).
-
- ``parse_int``, if specified, will be called with the string
- of every JSON int to be decoded. By default this is equivalent to
- int(num_str). This can be used to use another datatype or parser
- for JSON integers (e.g. float).
-
- ``parse_constant``, if specified, will be called with one of the
- following strings: -Infinity, Infinity, NaN, null, true, false.
- This can be used to raise an exception if invalid JSON numbers
- are encountered.
-
- To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
- kwarg.
-
- """
- if (cls is None and encoding is None and object_hook is None and
- parse_int is None and parse_float is None and
- parse_constant is None and not kw):
- return _default_decoder.decode(s)
- if cls is None:
- cls = JSONDecoder
- if object_hook is not None:
- kw['object_hook'] = object_hook
- if parse_float is not None:
- kw['parse_float'] = parse_float
- if parse_int is not None:
- kw['parse_int'] = parse_int
- if parse_constant is not None:
- kw['parse_constant'] = parse_constant
- return cls(encoding=encoding, **kw).decode(s)
diff --git a/mapreduce/lib/simplejson/decoder.py b/mapreduce/lib/simplejson/decoder.py
deleted file mode 100755
index 6926ec8..0000000
--- a/mapreduce/lib/simplejson/decoder.py
+++ /dev/null
@@ -1,334 +0,0 @@
-#!/usr/bin/env python
-"""Implementation of JSONDecoder
-"""
-import re
-import sys
-import struct
-
-from mapreduce.lib.simplejson.scanner import make_scanner
-try:
- from mapreduce.lib.simplejson._speedups import scanstring as c_scanstring
-except ImportError:
- c_scanstring = None
-
-__all__ = ['JSONDecoder']
-
-FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
-
-def _floatconstants():
- _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
- if sys.byteorder != 'big':
- _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
- nan, inf = struct.unpack('dd', _BYTES)
- return nan, inf, -inf
-
-NaN, PosInf, NegInf = _floatconstants()
-
-
-def linecol(doc, pos):
- lineno = doc.count('\n', 0, pos) + 1
- if lineno == 1:
- colno = pos
- else:
- colno = pos - doc.rindex('\n', 0, pos)
- return lineno, colno
-
-
-def errmsg(msg, doc, pos, end=None):
- # Note that this function is called from _speedups
- lineno, colno = linecol(doc, pos)
- if end is None:
- return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
- endlineno, endcolno = linecol(doc, end)
- return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
- msg, lineno, colno, endlineno, endcolno, pos, end)
-
-
-_CONSTANTS = {
- '-Infinity': NegInf,
- 'Infinity': PosInf,
- 'NaN': NaN,
-}
-
-STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
-BACKSLASH = {
- '"': u'"', '\\': u'\\', '/': u'/',
- 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
-}
-
-DEFAULT_ENCODING = "utf-8"
-
-def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
- if encoding is None:
- encoding = DEFAULT_ENCODING
- chunks = []
- _append = chunks.append
- begin = end - 1
- while 1:
- chunk = _m(s, end)
- if chunk is None:
- raise ValueError(
- errmsg("Unterminated string starting at", s, begin))
- end = chunk.end()
- content, terminator = chunk.groups()
- if content:
- if not isinstance(content, unicode):
- content = unicode(content, encoding)
- _append(content)
- if terminator == '"':
- break
- elif terminator != '\\':
- if strict:
- raise ValueError(errmsg("Invalid control character %r at", s, end))
- else:
- _append(terminator)
- continue
- try:
- esc = s[end]
- except IndexError:
- raise ValueError(
- errmsg("Unterminated string starting at", s, begin))
- if esc != 'u':
- try:
- m = _b[esc]
- except KeyError:
- raise ValueError(
- errmsg("Invalid \\escape: %r" % (esc,), s, end))
- end += 1
- else:
- esc = s[end + 1:end + 5]
- next_end = end + 5
- msg = "Invalid \\uXXXX escape"
- try:
- if len(esc) != 4:
- raise ValueError
- uni = int(esc, 16)
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
- if not s[end + 5:end + 7] == '\\u':
- raise ValueError
- esc2 = s[end + 7:end + 11]
- if len(esc2) != 4:
- raise ValueError
- uni2 = int(esc2, 16)
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
- next_end += 6
- m = unichr(uni)
- except ValueError:
- raise ValueError(errmsg(msg, s, end))
- end = next_end
- _append(m)
- return u''.join(chunks), end
-
-
-# Use speedup if available
-scanstring = c_scanstring or py_scanstring
-
-WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
-WHITESPACE_STR = ' \t\n\r'
-
-def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
- pairs = {}
- nextchar = s[end:end + 1]
- # Normally we expect nextchar == '"'
- if nextchar != '"':
- if nextchar in _ws:
- end = _w(s, end).end()
- nextchar = s[end:end + 1]
- # Trivial empty object
- if nextchar == '}':
- return pairs, end + 1
- elif nextchar != '"':
- raise ValueError(errmsg("Expecting property name", s, end))
- end += 1
- while True:
- key, end = scanstring(s, end, encoding, strict)
-
- # To skip some function call overhead we optimize the fast paths where
- # the JSON key separator is ": " or just ":".
- if s[end:end + 1] != ':':
- end = _w(s, end).end()
- if s[end:end + 1] != ':':
- raise ValueError(errmsg("Expecting : delimiter", s, end))
-
- end += 1
-
- try:
- if s[end] in _ws:
- end += 1
- if s[end] in _ws:
- end = _w(s, end + 1).end()
- except IndexError:
- pass
-
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise ValueError(errmsg("Expecting object", s, end))
- pairs[key] = value
-
- try:
- nextchar = s[end]
- if nextchar in _ws:
- end = _w(s, end + 1).end()
- nextchar = s[end]
- except IndexError:
- nextchar = ''
- end += 1
-
- if nextchar == '}':
- break
- elif nextchar != ',':
- raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
-
- try:
- nextchar = s[end]
- if nextchar in _ws:
- end += 1
- nextchar = s[end]
- if nextchar in _ws:
- end = _w(s, end + 1).end()
- nextchar = s[end]
- except IndexError:
- nextchar = ''
-
- end += 1
- if nextchar != '"':
- raise ValueError(errmsg("Expecting property name", s, end - 1))
-
- if object_hook is not None:
- pairs = object_hook(pairs)
- return pairs, end
-
-def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
- values = []
- nextchar = s[end:end + 1]
- if nextchar in _ws:
- end = _w(s, end + 1).end()
- nextchar = s[end:end + 1]
- # Look-ahead for trivial empty array
- if nextchar == ']':
- return values, end + 1
- _append = values.append
- while True:
- try:
- value, end = scan_once(s, end)
- except StopIteration:
- raise ValueError(errmsg("Expecting object", s, end))
- _append(value)
- nextchar = s[end:end + 1]
- if nextchar in _ws:
- end = _w(s, end + 1).end()
- nextchar = s[end:end + 1]
- end += 1
- if nextchar == ']':
- break
- elif nextchar != ',':
- raise ValueError(errmsg("Expecting , delimiter", s, end))
-
- try:
- if s[end] in _ws:
- end += 1
- if s[end] in _ws:
- end = _w(s, end + 1).end()
- except IndexError:
- pass
-
- return values, end
-
-class JSONDecoder(object):
- """Simple JSON decoder
-
- Performs the following translations in decoding by default:
-
- +---------------+-------------------+
- | JSON | Python |
- +===============+===================+
- | object | dict |
- +---------------+-------------------+
- | array | list |
- +---------------+-------------------+
- | string | unicode |
- +---------------+-------------------+
- | number (int) | int, long |
- +---------------+-------------------+
- | number (real) | float |
- +---------------+-------------------+
- | true | True |
- +---------------+-------------------+
- | false | False |
- +---------------+-------------------+
- | null | None |
- +---------------+-------------------+
-
- It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
- their corresponding ``float`` values, which is outside the JSON spec.
-
- """
-
- def __init__(self, encoding=None, object_hook=None, parse_float=None,
- parse_int=None, parse_constant=None, strict=True):
- """``encoding`` determines the encoding used to interpret any ``str``
- objects decoded by this instance (utf-8 by default). It has no
- effect when decoding ``unicode`` objects.
-
- Note that currently only encodings that are a superset of ASCII work,
- strings of other encodings should be passed in as ``unicode``.
-
- ``object_hook``, if specified, will be called with the result
- of every JSON object decoded and its return value will be used in
- place of the given ``dict``. This can be used to provide custom
- deserializations (e.g. to support JSON-RPC class hinting).
-
- ``parse_float``, if specified, will be called with the string
- of every JSON float to be decoded. By default this is equivalent to
- float(num_str). This can be used to use another datatype or parser
- for JSON floats (e.g. decimal.Decimal).
-
- ``parse_int``, if specified, will be called with the string
- of every JSON int to be decoded. By default this is equivalent to
- int(num_str). This can be used to use another datatype or parser
- for JSON integers (e.g. float).
-
- ``parse_constant``, if specified, will be called with one of the
- following strings: -Infinity, Infinity, NaN.
- This can be used to raise an exception if invalid JSON numbers
- are encountered.
-
- """
- self.encoding = encoding
- self.object_hook = object_hook
- self.parse_float = parse_float or float
- self.parse_int = parse_int or int
- self.parse_constant = parse_constant or _CONSTANTS.__getitem__
- self.strict = strict
- self.parse_object = JSONObject
- self.parse_array = JSONArray
- self.parse_string = scanstring
- self.scan_once = make_scanner(self)
-
- def decode(self, s, _w=WHITESPACE.match):
- """Return the Python representation of ``s`` (a ``str`` or ``unicode``
- instance containing a JSON document)
-
- """
- obj, end = self.raw_decode(s, idx=_w(s, 0).end())
- end = _w(s, end).end()
- if end != len(s):
- raise ValueError(errmsg("Extra data", s, end, len(s)))
- return obj
-
- def raw_decode(self, s, idx=0):
- """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
- with a JSON document) and return a 2-tuple of the Python
- representation and the index in ``s`` where the document ended.
-
- This can be used to decode a JSON document from a string that may
- have extraneous data at the end.
-
- """
- try:
- obj, end = self.scan_once(s, idx)
- except StopIteration:
- raise ValueError("No JSON object could be decoded")
- return obj, end
diff --git a/mapreduce/lib/simplejson/encoder.py b/mapreduce/lib/simplejson/encoder.py
deleted file mode 100755
index cfec6e6..0000000
--- a/mapreduce/lib/simplejson/encoder.py
+++ /dev/null
@@ -1,434 +0,0 @@
-#!/usr/bin/env python
-"""Implementation of JSONEncoder
-"""
-import re
-
-try:
- from mapreduce.lib.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
-except ImportError:
- c_encode_basestring_ascii = None
-try:
- from mapreduce.lib.simplejson._speedups import make_encoder as c_make_encoder
-except ImportError:
- c_make_encoder = None
-
-ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
-ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
-HAS_UTF8 = re.compile(r'[\x80-\xff]')
-ESCAPE_DCT = {
- '\\': '\\\\',
- '"': '\\"',
- '\b': '\\b',
- '\f': '\\f',
- '\n': '\\n',
- '\r': '\\r',
- '\t': '\\t',
-}
-for i in range(0x20):
- ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
-
-# Assume this produces an infinity on all machines (probably not guaranteed)
-INFINITY = float('1e66666')
-FLOAT_REPR = repr
-
-def encode_basestring(s):
- """Return a JSON representation of a Python string
-
- """
- def replace(match):
- return ESCAPE_DCT[match.group(0)]
- return '"' + ESCAPE.sub(replace, s) + '"'
-
-
-def py_encode_basestring_ascii(s):
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
- def replace(match):
- s = match.group(0)
- try:
- return ESCAPE_DCT[s]
- except KeyError:
- n = ord(s)
- if n < 0x10000:
- return '\\u%04x' % (n,)
- else:
- # surrogate pair
- n -= 0x10000
- s1 = 0xd800 | ((n >> 10) & 0x3ff)
- s2 = 0xdc00 | (n & 0x3ff)
- return '\\u%04x\\u%04x' % (s1, s2)
- return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-
-
-encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
-
-class JSONEncoder(object):
- """Extensible JSON encoder for Python data structures.
-
- Supports the following objects and types by default:
-
- +-------------------+---------------+
- | Python | JSON |
- +===================+===============+
- | dict | object |
- +-------------------+---------------+
- | list, tuple | array |
- +-------------------+---------------+
- | str, unicode | string |
- +-------------------+---------------+
- | int, long, float | number |
- +-------------------+---------------+
- | True | true |
- +-------------------+---------------+
- | False | false |
- +-------------------+---------------+
- | None | null |
- +-------------------+---------------+
-
- To extend this to recognize other objects, subclass and implement a
- ``.default()`` method with another method that returns a serializable
- object for ``o`` if possible, otherwise it should call the superclass
- implementation (to raise ``TypeError``).
-
- """
- item_separator = ', '
- key_separator = ': '
- def __init__(self, skipkeys=False, ensure_ascii=True,
- check_circular=True, allow_nan=True, sort_keys=False,
- indent=None, separators=None, encoding='utf-8', default=None):
- """Constructor for JSONEncoder, with sensible defaults.
-
- If skipkeys is False, then it is a TypeError to attempt
- encoding of keys that are not str, int, long, float or None. If
- skipkeys is True, such items are simply skipped.
-
- If ensure_ascii is True, the output is guaranteed to be str
- objects with all incoming unicode characters escaped. If
- ensure_ascii is false, the output will be unicode object.
-
- If check_circular is True, then lists, dicts, and custom encoded
- objects will be checked for circular references during encoding to
- prevent an infinite recursion (which would cause an OverflowError).
- Otherwise, no such check takes place.
-
- If allow_nan is True, then NaN, Infinity, and -Infinity will be
- encoded as such. This behavior is not JSON specification compliant,
- but is consistent with most JavaScript based encoders and decoders.
- Otherwise, it will be a ValueError to encode such floats.
-
- If sort_keys is True, then the output of dictionaries will be
- sorted by key; this is useful for regression tests to ensure
- that JSON serializations can be compared on a day-to-day basis.
-
- If indent is a non-negative integer, then JSON array
- elements and object members will be pretty-printed with that
- indent level. An indent level of 0 will only insert newlines.
- None is the most compact representation.
-
- If specified, separators should be a (item_separator, key_separator)
- tuple. The default is (', ', ': '). To get the most compact JSON
- representation you should specify (',', ':') to eliminate whitespace.
-
- If specified, default is a function that gets called for objects
- that can't otherwise be serialized. It should return a JSON encodable
- version of the object or raise a ``TypeError``.
-
- If encoding is not None, then all input strings will be
- transformed into unicode using that encoding prior to JSON-encoding.
- The default is UTF-8.
-
- """
-
- self.skipkeys = skipkeys
- self.ensure_ascii = ensure_ascii
- self.check_circular = check_circular
- self.allow_nan = allow_nan
- self.sort_keys = sort_keys
- self.indent = indent
- if separators is not None:
- self.item_separator, self.key_separator = separators
- if default is not None:
- self.default = default
- self.encoding = encoding
-
- def default(self, o):
- """Implement this method in a subclass such that it returns
- a serializable object for ``o``, or calls the base implementation
- (to raise a ``TypeError``).
-
- For example, to support arbitrary iterators, you could
- implement default like this::
-
- def default(self, o):
- try:
- iterable = iter(o)
- except TypeError:
- pass
- else:
- return list(iterable)
- return JSONEncoder.default(self, o)
-
- """
- raise TypeError("%r is not JSON serializable" % (o,))
-
- def encode(self, o):
- """Return a JSON string representation of a Python data structure.
-
- >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
- '{"foo": ["bar", "baz"]}'
-
- """
- # This is for extremely simple cases and benchmarks.
- if isinstance(o, basestring):
- if isinstance(o, str):
- _encoding = self.encoding
- if (_encoding is not None
- and not (_encoding == 'utf-8')):
- o = o.decode(_encoding)
- if self.ensure_ascii:
- return encode_basestring_ascii(o)
- else:
- return encode_basestring(o)
- # This doesn't pass the iterator directly to ''.join() because the
- # exceptions aren't as detailed. The list call should be roughly
- # equivalent to the PySequence_Fast that ''.join() would do.
- chunks = self.iterencode(o, _one_shot=True)
- if not isinstance(chunks, (list, tuple)):
- chunks = list(chunks)
- return ''.join(chunks)
-
- def iterencode(self, o, _one_shot=False):
- """Encode the given object and yield each string
- representation as available.
-
- For example::
-
- for chunk in JSONEncoder().iterencode(bigobject):
- mysocket.write(chunk)
-
- """
- if self.check_circular:
- markers = {}
- else:
- markers = None
- if self.ensure_ascii:
- _encoder = encode_basestring_ascii
- else:
- _encoder = encode_basestring
- if self.encoding != 'utf-8':
- def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
- if isinstance(o, str):
- o = o.decode(_encoding)
- return _orig_encoder(o)
-
- def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
- # Check for specials. Note that this type of test is processor- and/or
- # platform-specific, so do tests which don't depend on the internals.
-
- if o != o:
- text = 'NaN'
- elif o == _inf:
- text = 'Infinity'
- elif o == _neginf:
- text = '-Infinity'
- else:
- return _repr(o)
-
- if not allow_nan:
- raise ValueError("Out of range float values are not JSON compliant: %r"
- % (o,))
-
- return text
-
-
- if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
- _iterencode = c_make_encoder(
- markers, self.default, _encoder, self.indent,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, self.allow_nan)
- else:
- _iterencode = _make_iterencode(
- markers, self.default, _encoder, self.indent, floatstr,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, _one_shot)
- return _iterencode(o, 0)
-
-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
- ## HACK: hand-optimized bytecode; turn globals into locals
- False=False,
- True=True,
- ValueError=ValueError,
- basestring=basestring,
- dict=dict,
- float=float,
- id=id,
- int=int,
- isinstance=isinstance,
- list=list,
- long=long,
- str=str,
- tuple=tuple,
- ):
-
- def _iterencode_list(lst, _current_indent_level):
- if not lst:
- yield '[]'
- return
- if markers is not None:
- markerid = id(lst)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = lst
- buf = '['
- if _indent is not None:
- _current_indent_level += 1
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
- buf += newline_indent
- else:
- newline_indent = None
- separator = _item_separator
- first = True
- for value in lst:
- if first:
- first = False
- else:
- buf = separator
- if isinstance(value, basestring):
- yield buf + _encoder(value)
- elif value is None:
- yield buf + 'null'
- elif value is True:
- yield buf + 'true'
- elif value is False:
- yield buf + 'false'
- elif isinstance(value, (int, long)):
- yield buf + str(value)
- elif isinstance(value, float):
- yield buf + _floatstr(value)
- else:
- yield buf
- if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
- else:
- chunks = _iterencode(value, _current_indent_level)
- for chunk in chunks:
- yield chunk
- if newline_indent is not None:
- _current_indent_level -= 1
- yield '\n' + (' ' * (_indent * _current_indent_level))
- yield ']'
- if markers is not None:
- del markers[markerid]
-
- def _iterencode_dict(dct, _current_indent_level):
- if not dct:
- yield '{}'
- return
- if markers is not None:
- markerid = id(dct)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = dct
- yield '{'
- if _indent is not None:
- _current_indent_level += 1
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- item_separator = _item_separator + newline_indent
- yield newline_indent
- else:
- newline_indent = None
- item_separator = _item_separator
- first = True
- if _sort_keys:
- items = dct.items()
- items.sort(key=lambda kv: kv[0])
- else:
- items = dct.iteritems()
- for key, value in items:
- if isinstance(key, basestring):
- pass
- # JavaScript is weakly typed for these, so it makes sense to
- # also allow them. Many encoders seem to do something like this.
- elif isinstance(key, float):
- key = _floatstr(key)
- elif isinstance(key, (int, long)):
- key = str(key)
- elif key is True:
- key = 'true'
- elif key is False:
- key = 'false'
- elif key is None:
- key = 'null'
- elif _skipkeys:
- continue
- else:
- raise TypeError("key %r is not a string" % (key,))
- if first:
- first = False
- else:
- yield item_separator
- yield _encoder(key)
- yield _key_separator
- if isinstance(value, basestring):
- yield _encoder(value)
- elif value is None:
- yield 'null'
- elif value is True:
- yield 'true'
- elif value is False:
- yield 'false'
- elif isinstance(value, (int, long)):
- yield str(value)
- elif isinstance(value, float):
- yield _floatstr(value)
- else:
- if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
- else:
- chunks = _iterencode(value, _current_indent_level)
- for chunk in chunks:
- yield chunk
- if newline_indent is not None:
- _current_indent_level -= 1
- yield '\n' + (' ' * (_indent * _current_indent_level))
- yield '}'
- if markers is not None:
- del markers[markerid]
-
- def _iterencode(o, _current_indent_level):
- if isinstance(o, basestring):
- yield _encoder(o)
- elif o is None:
- yield 'null'
- elif o is True:
- yield 'true'
- elif o is False:
- yield 'false'
- elif isinstance(o, (int, long)):
- yield str(o)
- elif isinstance(o, float):
- yield _floatstr(o)
- elif isinstance(o, (list, tuple)):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif isinstance(o, dict):
- for chunk in _iterencode_dict(o, _current_indent_level):
- yield chunk
- else:
- if markers is not None:
- markerid = id(o)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = o
- o = _default(o)
- for chunk in _iterencode(o, _current_indent_level):
- yield chunk
- if markers is not None:
- del markers[markerid]
-
- return _iterencode
diff --git a/mapreduce/lib/simplejson/scanner.py b/mapreduce/lib/simplejson/scanner.py
deleted file mode 100755
index 201cbc5..0000000
--- a/mapreduce/lib/simplejson/scanner.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python
-"""JSON token scanner
-"""
-import re
-try:
- from mapreduce.lib.simplejson._speedups import make_scanner as c_make_scanner
-except ImportError:
- c_make_scanner = None
-
-__all__ = ['make_scanner']
-
-NUMBER_RE = re.compile(
- r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
- (re.VERBOSE | re.MULTILINE | re.DOTALL))
-
-def py_make_scanner(context):
- parse_object = context.parse_object
- parse_array = context.parse_array
- parse_string = context.parse_string
- match_number = NUMBER_RE.match
- encoding = context.encoding
- strict = context.strict
- parse_float = context.parse_float
- parse_int = context.parse_int
- parse_constant = context.parse_constant
- object_hook = context.object_hook
-
- def _scan_once(string, idx):
- try:
- nextchar = string[idx]
- except IndexError:
- raise StopIteration
-
- if nextchar == '"':
- return parse_string(string, idx + 1, encoding, strict)
- elif nextchar == '{':
- return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook)
- elif nextchar == '[':
- return parse_array((string, idx + 1), _scan_once)
- elif nextchar == 'n' and string[idx:idx + 4] == 'null':
- return None, idx + 4
- elif nextchar == 't' and string[idx:idx + 4] == 'true':
- return True, idx + 4
- elif nextchar == 'f' and string[idx:idx + 5] == 'false':
- return False, idx + 5
-
- m = match_number(string, idx)
- if m is not None:
- integer, frac, exp = m.groups()
- if frac or exp:
- res = parse_float(integer + (frac or '') + (exp or ''))
- else:
- res = parse_int(integer)
- return res, m.end()
- elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
- return parse_constant('NaN'), idx + 3
- elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
- return parse_constant('Infinity'), idx + 8
- elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
- return parse_constant('-Infinity'), idx + 9
- else:
- raise StopIteration
-
- return _scan_once
-
-make_scanner = c_make_scanner or py_make_scanner
diff --git a/mapreduce/main.py b/mapreduce/main.py
deleted file mode 100755
index c8b0196..0000000
--- a/mapreduce/main.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Main module for map-reduce implementation.
-
-This module should be specified as a handler for mapreduce URLs in app.yaml:
-
- handlers:
- - url: /mapreduce(/.*)?
- login: admin
- script: mapreduce/main.py
-"""
-
-import wsgiref.handlers
-
-from google.appengine.ext import webapp
-from mapreduce import handlers
-from mapreduce import status
-from google.appengine.ext.webapp import util
-
-
-STATIC_RE = r".*/([^/]*\.(?:css|js)|status|detail)$"
-
-
-class RedirectHandler(webapp.RequestHandler):
- """Redirects the user back to the status page."""
-
- def get(self):
- new_path = self.request.path
- if not new_path.endswith("/"):
- new_path += "/"
- new_path += "status"
- self.redirect(new_path)
-
-
-def create_handlers_map():
- """Create new handlers map.
-
- Returns:
- list of (regexp, handler) pairs for WSGIApplication constructor.
- """
- return [
- # Task queue handlers.
- (r".*/worker_callback", handlers.MapperWorkerCallbackHandler),
- (r".*/controller_callback", handlers.ControllerCallbackHandler),
- (r".*/kickoffjob_callback", handlers.KickOffJobHandler),
-
- # RPC requests with JSON responses
- # All JSON handlers should have /command/ prefix.
- (r".*/command/start_job", handlers.StartJobHandler),
- (r".*/command/cleanup_job", handlers.CleanUpJobHandler),
- (r".*/command/abort_job", handlers.AbortJobHandler),
- (r".*/command/list_configs", status.ListConfigsHandler),
- (r".*/command/list_jobs", status.ListJobsHandler),
- (r".*/command/get_job_detail", status.GetJobDetailHandler),
-
- # UI static files
- (STATIC_RE, status.ResourceHandler),
-
- # Redirect non-file URLs that do not end in status/detail to status page.
- (r".*", RedirectHandler),
- ]
-
-def create_application():
- """Create new WSGIApplication and register all handlers.
-
- Returns:
- an instance of webapp.WSGIApplication with all mapreduce handlers
- registered.
- """
- return webapp.WSGIApplication(create_handlers_map(),
- debug=True)
-
-
-APP = create_application()
-
-
-def main():
- util.run_wsgi_app(APP)
-
-
-if __name__ == "__main__":
- main()
diff --git a/mapreduce/migrate.py b/mapreduce/migrate.py
deleted file mode 100755
index 24312fe..0000000
--- a/mapreduce/migrate.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from mapreduce import operation as op
-import logging
-import appengine_config
-from events.models import Event
-
-def process(entity):
- yield op.db.Put(entity)
- return
diff --git a/mapreduce/model.py b/mapreduce/model.py
deleted file mode 100755
index 3e30fa3..0000000
--- a/mapreduce/model.py
+++ /dev/null
@@ -1,768 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Model classes which are used to communicate between parts of implementation.
-
-These model classes are describing mapreduce, its current state and
-communication messages. They are either stored in the datastore or
-serialized to/from json and passed around with other means.
-"""
-
-# Disable "Invalid method name"
-# pylint: disable-msg=C6409
-
-
-
-__all__ = ["JsonMixin", "JsonProperty", "MapreduceState", "MapperSpec",
- "MapreduceControl", "MapreduceSpec", "ShardState", "CountersMap"]
-
-import copy
-import datetime
-import logging
-import math
-import random
-from mapreduce.lib import simplejson
-import time
-import types
-
-from google.appengine.api import datastore_errors
-from google.appengine.api import datastore_types
-from google.appengine.ext import db
-from mapreduce import context
-from mapreduce import hooks
-from mapreduce import util
-from mapreduce.lib.graphy.backends import google_chart_api
-
-
-# Default rate of processed entities per second.
-_DEFAULT_PROCESSING_RATE_PER_SEC = 100
-
-# Default number of shards to have.
-_DEFAULT_SHARD_COUNT = 8
-
-
-class JsonMixin(object):
- """Simple, stateless json utilities mixin.
-
- Requires class to implement two methods:
- to_json(self): convert data to json-compatible datastructure (dict,
- list, strings, numbers)
- @classmethod from_json(cls, json): load data from json-compatible structure.
- """
-
- def to_json_str(self):
- """Convert data to json string representation.
-
- Returns:
- json representation as string.
- """
- return simplejson.dumps(self.to_json(), sort_keys=True)
-
- @classmethod
- def from_json_str(cls, json_str):
- """Convert json string representation into class instance.
-
- Args:
- json_str: json representation as string.
-
- Returns:
- New instance of the class with data loaded from json string.
- """
- return cls.from_json(simplejson.loads(json_str))
-
-
-class JsonProperty(db.UnindexedProperty):
- """Property type for storing json representation of data.
-
- Requires data types to implement two methods:
- to_json(self): convert data to json-compatible datastructure (dict,
- list, strings, numbers)
- @classmethod from_json(cls, json): load data from json-compatible structure.
- """
-
- def __init__(self, data_type, default=None, **kwargs):
- """Constructor.
-
- Args:
- data_type: underlying data type as class.
- default: default value for the property. The value is deep copied
- fore each model instance.
- kwargs: remaining arguments.
- """
- kwargs["default"] = default
- super(JsonProperty, self).__init__(**kwargs)
- self.data_type = data_type
-
- def get_value_for_datastore(self, model_instance):
- """Gets value for datastore.
-
- Args:
- model_instance: instance of the model class.
-
- Returns:
- datastore-compatible value.
- """
- value = super(JsonProperty, self).get_value_for_datastore(model_instance)
- if not value:
- return None
- json_value = value.to_json()
- if not json_value:
- return None
- return datastore_types.Text(simplejson.dumps(
- json_value, sort_keys=True))
-
- def make_value_from_datastore(self, value):
- """Convert value from datastore representation.
-
- Args:
- value: datastore value.
-
- Returns:
- value to store in the model.
- """
-
- if value is None:
- return None
- return self.data_type.from_json(simplejson.loads(value))
-
- def validate(self, value):
- """Validate value.
-
- Args:
- value: model value.
-
- Returns:
- Whether the specified value is valid data type value.
-
- Raises:
- BadValueError: when value is not of self.data_type type.
- """
- if value is not None and not isinstance(value, self.data_type):
- raise datastore_errors.BadValueError(
- "Property %s must be convertible to a %s instance (%s)" %
- (self.name, self.data_type, value))
- return super(JsonProperty, self).validate(value)
-
- def empty(self, value):
- """Checks if value is empty.
-
- Args:
- value: model value.
-
- Returns:
- True passed value is empty.
- """
- return not value
-
- def default_value(self):
- """Create default model value.
-
- If default option was specified, then it will be deeply copied.
- None otherwise.
-
- Returns:
- default model value.
- """
- if self.default:
- return copy.deepcopy(self.default)
- else:
- return None
-
-
-
-# Ridiculous future UNIX epoch time, 500 years from now.
-_FUTURE_TIME = 2**34
-
-
-def _get_descending_key(gettime=time.time, getrandint=random.randint):
- """Returns a key name lexically ordered by time descending.
-
- This lets us have a key name for use with Datastore entities which returns
- rows in time descending order when it is scanned in lexically ascending order,
- allowing us to bypass index building for descending indexes.
-
- Args:
- gettime: Used for testing.
- getrandint: Used for testing.
-
- Returns:
- A string with a time descending key.
- """
- now_descending = int((_FUTURE_TIME - gettime()) * 100)
- tie_breaker = getrandint(0, 100)
- return "%d%d" % (now_descending, tie_breaker)
-
-
-class CountersMap(JsonMixin):
- """Maintains map from counter name to counter value.
-
- The class is used to provide basic arithmetics of counter values (buil
- add/remove), increment individual values and store/load data from json.
- """
-
- def __init__(self, initial_map=None):
- """Constructor.
-
- Args:
- initial_map: initial counter values map from counter name (string) to
- counter value (int).
- """
- if initial_map:
- self.counters = initial_map
- else:
- self.counters = {}
-
- def __repr__(self):
- """Compute string representation."""
- return "mapreduce.model.CountersMap(%r)" % self.counters
-
- def get(self, counter_name):
- """Get current counter value.
-
- Args:
- counter_name: counter name as string.
-
- Returns:
- current counter value as int. 0 if counter was not set.
- """
- return self.counters.get(counter_name, 0)
-
- def increment(self, counter_name, delta):
- """Increment counter value.
-
- Args:
- counter_name: counter name as String.
- delta: increment delta as Integer.
-
- Returns:
- new counter value.
- """
- current_value = self.counters.get(counter_name, 0)
- new_value = current_value + delta
- self.counters[counter_name] = new_value
- return new_value
-
- def add_map(self, counters_map):
- """Add all counters from the map.
-
- For each counter in the passed map, adds its value to the counter in this
- map.
-
- Args:
- counters_map: CounterMap instance to add.
- """
- for counter_name in counters_map.counters:
- self.increment(counter_name, counters_map.counters[counter_name])
-
- def sub_map(self, counters_map):
- """Subtracts all counters from the map.
-
- For each counter in the passed map, subtracts its value to the counter in
- this map.
-
- Args:
- counters_map: CounterMap instance to subtract.
- """
- for counter_name in counters_map.counters:
- self.increment(counter_name, -counters_map.counters[counter_name])
-
- def clear(self):
- """Clear all values."""
- self.counters = {}
-
- def to_json(self):
- """Serializes all the data in this map into json form.
-
- Returns:
- json-compatible data representation.
- """
- return {"counters": self.counters}
-
- @classmethod
- def from_json(cls, json):
- """Create new CountersMap from the json data structure, encoded by to_json.
-
- Args:
- json: json representation of CountersMap .
-
- Returns:
- an instance of CountersMap with all data deserialized from json.
- """
- counters_map = cls()
- counters_map.counters = json["counters"]
- return counters_map
-
-
-class MapperSpec(JsonMixin):
- """Contains a specification for the mapper phase of the mapreduce.
-
- MapperSpec instance can be changed only during mapreduce starting process,
- and it remains immutable for the rest of mapreduce execution. MapperSpec is
- passed as a payload to all mapreduce tasks in JSON encoding as part of
- MapreduceSpec.
-
- Specifying mapper handlers:
- * '.' - __call__ method of class instance will be
- called
- * '.' - function will be called.
- * '..' - class will be instantiated
- and method called.
- """
-
- def __init__(self, handler_spec, input_reader_spec, params, shard_count):
- """Creates a new MapperSpec.
-
- Args:
- handler_spec: handler specification as string (see class doc for
- details).
- input_reader_spec: The class name of the input reader to use.
- params: Dictionary of additional parameters for the mapper.
- shard_count: number of shards to process in parallel.
-
- Properties:
- handler_spec: name of handler class/function to use.
- shard_count: number of shards to process in parallel.
- handler: cached instance of mapper handler as callable.
- input_reader_spec: The class name of the input reader to use.
- params: Dictionary of additional parameters for the mapper.
- """
- self.handler_spec = handler_spec
- self.__handler = None
- self.input_reader_spec = input_reader_spec
- self.shard_count = shard_count
- self.params = params
-
- def get_handler(self):
- """Get mapper handler instance.
-
- Returns:
- cached handler instance as callable.
- """
- if self.__handler is None:
- resolved_spec = util.for_name(self.handler_spec)
- if isinstance(resolved_spec, type):
- # create new instance if this is type
- self.__handler = resolved_spec()
- elif isinstance(resolved_spec, types.MethodType):
- # bind the method
- self.__handler = getattr(resolved_spec.im_class(),
- resolved_spec.__name__)
- else:
- self.__handler = resolved_spec
- return self.__handler
-
- handler = property(get_handler)
-
- def input_reader_class(self):
- """Get input reader class.
-
- Returns:
- input reader class object.
- """
- return util.for_name(self.input_reader_spec)
-
- def to_json(self):
- """Serializes this MapperSpec into a json-izable object."""
- return {
- "mapper_handler_spec": self.handler_spec,
- "mapper_input_reader": self.input_reader_spec,
- "mapper_params": self.params,
- "mapper_shard_count": self.shard_count,
- }
-
- def __str__(self):
- return "MapperSpec(%s, %s, %s, %s)" % (
- self.handler_spec, self.input_reader_spec, self.params,
- self.shard_count)
-
- @classmethod
- def from_json(cls, json):
- """Creates MapperSpec from a dict-like object."""
- return cls(json["mapper_handler_spec"],
- json["mapper_input_reader"],
- json["mapper_params"],
- json["mapper_shard_count"])
-
-
-class MapreduceSpec(JsonMixin):
- """Contains a specification for the whole mapreduce.
-
- MapreduceSpec instance can be changed only during mapreduce starting process,
- and it remains immutable for the rest of mapreduce execution. MapreduceSpec is
- passed as a payload to all mapreduce tasks in json encoding.
- """
-
- # Url to call when mapreduce finishes its execution.
- PARAM_DONE_CALLBACK = "done_callback"
- # Queue to use to call done callback
- PARAM_DONE_CALLBACK_QUEUE = "done_callback_queue"
-
- def __init__(self,
- name,
- mapreduce_id,
- mapper_spec,
- params={},
- hooks_class_name=None):
- """Create new MapreduceSpec.
-
- Args:
- name: The name of this mapreduce job type.
- mapreduce_id: ID of the mapreduce.
- mapper_spec: JSON-encoded string containing a MapperSpec.
- params: dictionary of additional mapreduce parameters.
- hooks_class_name: The fully qualified name of the hooks class to use.
-
- Properties:
- name: The name of this mapreduce job type.
- mapreduce_id: unique id of this mapreduce as string.
- mapper: This MapreduceSpec's instance of MapperSpec.
- params: dictionary of additional mapreduce parameters.
- hooks_class_name: The fully qualified name of the hooks class to use.
- """
- self.name = name
- self.mapreduce_id = mapreduce_id
- self.mapper = MapperSpec.from_json(mapper_spec)
- self.params = params
- self.hooks_class_name = hooks_class_name
- self.__hooks = None
- self.get_hooks() # Fail fast on an invalid hook class.
-
- def get_hooks(self):
- """Returns a hooks.Hooks class or None if no hooks class has been set."""
- if self.__hooks is None and self.hooks_class_name is not None:
- hooks_class = util.for_name(self.hooks_class_name)
- if not isinstance(hooks_class, type):
- raise ValueError("hooks_class_name must refer to a class, got %s" %
- type(hooks_class).__name__)
- if not issubclass(hooks_class, hooks.Hooks):
- raise ValueError(
- "hooks_class_name must refer to a hooks.Hooks subclass")
- self.__hooks = hooks_class(self.mapper)
-
- return self.__hooks
-
- def to_json(self):
- """Serializes all data in this mapreduce spec into json form.
-
- Returns:
- data in json format.
- """
- mapper_spec = self.mapper.to_json()
- return {
- "name": self.name,
- "mapreduce_id": self.mapreduce_id,
- "mapper_spec": mapper_spec,
- "params": self.params,
- "hooks_class_name": self.hooks_class_name,
- }
-
- @classmethod
- def from_json(cls, json):
- """Create new MapreduceSpec from the json, encoded by to_json.
-
- Args:
- json: json representation of MapreduceSpec.
-
- Returns:
- an instance of MapreduceSpec with all data deserialized from json.
- """
- mapreduce_spec = cls(json["name"],
- json["mapreduce_id"],
- json["mapper_spec"],
- json.get("params"),
- json.get("hooks_class_name"))
- return mapreduce_spec
-
-
-class MapreduceState(db.Model):
- """Holds accumulated state of mapreduce execution.
-
- MapreduceState is stored in datastore with a key name equal to the
- mapreduce ID. Only controller tasks can write to MapreduceState.
-
- Properties:
- mapreduce_spec: cached deserialized MapreduceSpec instance. read-only
- active: if we have this mapreduce running right now
- last_poll_time: last time controller job has polled this mapreduce.
- counters_map: shard's counters map as CountersMap. Mirrors
- counters_map_json.
- chart_url: last computed mapreduce status chart url. This chart displays the
- progress of all the shards the best way it can.
- sparkline_url: last computed mapreduce status chart url in small format.
- result_status: If not None, the final status of the job.
- active_shards: How many shards are still processing.
- start_time: When the job started.
- """
-
- RESULT_SUCCESS = "success"
- RESULT_FAILED = "failed"
- RESULT_ABORTED = "aborted"
-
- _RESULTS = frozenset([RESULT_SUCCESS, RESULT_FAILED, RESULT_ABORTED])
-
- # Functional properties.
- mapreduce_spec = JsonProperty(MapreduceSpec, indexed=False)
- active = db.BooleanProperty(default=True, indexed=False)
- last_poll_time = db.DateTimeProperty(required=True)
- counters_map = JsonProperty(CountersMap, default=CountersMap(), indexed=False)
- app_id = db.StringProperty(required=False, indexed=True)
-
- # For UI purposes only.
- chart_url = db.TextProperty(default="")
- sparkline_url = db.TextProperty(default="")
- result_status = db.StringProperty(required=False, choices=_RESULTS)
- active_shards = db.IntegerProperty(default=0, indexed=False)
- failed_shards = db.IntegerProperty(default=0, indexed=False)
- aborted_shards = db.IntegerProperty(default=0, indexed=False)
- start_time = db.DateTimeProperty(auto_now_add=True)
-
- @classmethod
- def kind(cls):
- """Returns entity kind."""
- return "_AE_MR_MapreduceState"
-
- @classmethod
- def get_key_by_job_id(cls, mapreduce_id):
- """Retrieves the Key for a Job.
-
- Args:
- mapreduce_id: The job to retrieve.
-
- Returns:
- Datastore Key that can be used to fetch the MapreduceState.
- """
- return db.Key.from_path(cls.kind(), mapreduce_id)
-
- @classmethod
- def get_by_job_id(cls, mapreduce_id):
- """Retrieves the instance of state for a Job.
-
- Args:
- mapreduce_id: The mapreduce job to retrieve.
-
- Returns:
- instance of MapreduceState for passed id.
- """
- return db.get(cls.get_key_by_job_id(mapreduce_id))
-
- def set_processed_counts(self, shards_processed):
- """Updates a chart url to display processed count for each shard.
-
- Args:
- shards_processed: list of integers with number of processed entities in
- each shard
- """
- chart = google_chart_api.BarChart(shards_processed)
- if self.mapreduce_spec and shards_processed:
- chart.bottom.labels = [
- str(x) for x in xrange(self.mapreduce_spec.mapper.shard_count)]
- chart.left.labels = ['0', str(max(shards_processed))]
- chart.left.min = 0
- self.chart_url = chart.display.Url(300, 200)
-
- def get_processed(self):
- """Number of processed entities.
-
- Returns:
- The total number of processed entities as int.
- """
- return self.counters_map.get(context.COUNTER_MAPPER_CALLS)
-
- processed = property(get_processed)
-
- @staticmethod
- def create_new(mapreduce_id=None,
- gettime=datetime.datetime.now):
- """Create a new MapreduceState.
-
- Args:
- mapreduce_id: Mapreduce id as string.
- gettime: Used for testing.
- """
- if not mapreduce_id:
- mapreduce_id = MapreduceState.new_mapreduce_id()
- state = MapreduceState(key_name=mapreduce_id,
- last_poll_time=gettime())
- state.set_processed_counts([])
- return state
-
- @staticmethod
- def new_mapreduce_id():
- """Generate new mapreduce id."""
- return _get_descending_key()
-
-
-class ShardState(db.Model):
- """Single shard execution state.
-
- The shard state is stored in the datastore and is later aggregated by
- controller task. Shard key_name is equal to shard_id.
-
- Properties:
- active: if we have this shard still running as boolean.
- counters_map: shard's counters map as CountersMap. Mirrors
- counters_map_json.
- mapreduce_id: unique id of the mapreduce.
- shard_id: unique id of this shard as string.
- shard_number: ordered number for this shard.
- result_status: If not None, the final status of this shard.
- update_time: The last time this shard state was updated.
- shard_description: A string description of the work this shard will do.
- last_work_item: A string description of the last work item processed.
- """
-
- RESULT_SUCCESS = "success"
- RESULT_FAILED = "failed"
- RESULT_ABORTED = "aborted"
-
- _RESULTS = frozenset([RESULT_SUCCESS, RESULT_FAILED, RESULT_ABORTED])
-
- # Functional properties.
- active = db.BooleanProperty(default=True, indexed=False)
- counters_map = JsonProperty(CountersMap, default=CountersMap(), indexed=False)
- result_status = db.StringProperty(choices=_RESULTS, indexed=False)
-
- # For UI purposes only.
- mapreduce_id = db.StringProperty(required=True)
- update_time = db.DateTimeProperty(auto_now=True, indexed=False)
- shard_description = db.TextProperty(default="")
- last_work_item = db.TextProperty(default="")
-
- def get_shard_number(self):
- """Gets the shard number from the key name."""
- return int(self.key().name().split("-")[-1])
-
- shard_number = property(get_shard_number)
-
- def get_shard_id(self):
- """Returns the shard ID."""
- return self.key().name()
-
- shard_id = property(get_shard_id)
-
- @classmethod
- def kind(cls):
- """Returns entity kind."""
- return "_AE_MR_ShardState"
-
- @classmethod
- def shard_id_from_number(cls, mapreduce_id, shard_number):
- """Get shard id by mapreduce id and shard number.
-
- Args:
- mapreduce_id: mapreduce id as string.
- shard_number: shard number to compute id for as int.
-
- Returns:
- shard id as string.
- """
- return "%s-%d" % (mapreduce_id, shard_number)
-
- @classmethod
- def get_key_by_shard_id(cls, shard_id):
- """Retrieves the Key for this ShardState.
-
- Args:
- shard_id: The shard ID to fetch.
-
- Returns:
- The Datatore key to use to retrieve this ShardState.
- """
- return db.Key.from_path(cls.kind(), shard_id)
-
- @classmethod
- def get_by_shard_id(cls, shard_id):
- """Get shard state from datastore by shard_id.
-
- Args:
- shard_id: shard id as string.
-
- Returns:
- ShardState for given shard id or None if it's not found.
- """
- return cls.get_by_key_name(shard_id)
-
- @classmethod
- def find_by_mapreduce_id(cls, mapreduce_id):
- """Find all shard states for given mapreduce.
-
- Args:
- mapreduce_id: mapreduce id.
-
- Returns:
- iterable of all ShardState for given mapreduce id.
- """
- return cls.all().filter("mapreduce_id =", mapreduce_id).fetch(99999)
-
- @classmethod
- def create_new(cls, mapreduce_id, shard_number):
- """Create new shard state.
-
- Args:
- mapreduce_id: unique mapreduce id as string.
- shard_number: shard number for which to create shard state.
-
- Returns:
- new instance of ShardState ready to put into datastore.
- """
- shard_id = cls.shard_id_from_number(mapreduce_id, shard_number)
- state = cls(key_name=shard_id,
- mapreduce_id=mapreduce_id)
- return state
-
-
-class MapreduceControl(db.Model):
- """Datastore entity used to control mapreduce job execution.
-
- Only one command may be sent to jobs at a time.
-
- Properties:
- command: The command to send to the job.
- """
-
- ABORT = "abort"
-
- _COMMANDS = frozenset([ABORT])
- _KEY_NAME = "command"
-
- command = db.TextProperty(choices=_COMMANDS, required=True)
-
- @classmethod
- def kind(cls):
- """Returns entity kind."""
- return "_AE_MR_MapreduceControl"
-
- @classmethod
- def get_key_by_job_id(cls, mapreduce_id):
- """Retrieves the Key for a mapreduce ID.
-
- Args:
- mapreduce_id: The job to fetch.
-
- Returns:
- Datastore Key for the command for the given job ID.
- """
- return db.Key.from_path(cls.kind(), "%s:%s" % (mapreduce_id, cls._KEY_NAME))
-
- @classmethod
- def abort(cls, mapreduce_id):
- """Causes a job to abort.
-
- Args:
- mapreduce_id: The job to abort. Not verified as a valid job.
- """
- cls(key_name="%s:%s" % (mapreduce_id, cls._KEY_NAME),
- command=cls.ABORT).put()
diff --git a/mapreduce/operation/__init__.py b/mapreduce/operation/__init__.py
deleted file mode 100755
index f645b87..0000000
--- a/mapreduce/operation/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Operations which can be yielded from mappers.
-
-Operation is callable that takes context.Context as a parameter.
-Operations are called during mapper execution immediately
-on recieving from handler function.
-"""
-
-
-
-import db
-import counters
-
-__all__ = ['db', 'counters']
diff --git a/mapreduce/operation/counters.py b/mapreduce/operation/counters.py
deleted file mode 100755
index 9cbfe1c..0000000
--- a/mapreduce/operation/counters.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Counters-related operations."""
-
-
-
-__all__ = ['Increment']
-
-
-class Increment(object):
- """Increment counter operation."""
-
- def __init__(self, counter_name, delta=1):
- """Constructor.
-
- Args:
- counter_name: name of the counter as string
- delta: increment delta as int.
- """
- self.counter_name = counter_name
- self.delta = delta
-
- def __call__(self, context):
- """Execute operation.
-
- Args:
- context: mapreduce context as context.Context.
- """
- context.counters.increment(self.counter_name, self.delta)
diff --git a/mapreduce/operation/db.py b/mapreduce/operation/db.py
deleted file mode 100755
index fbde66f..0000000
--- a/mapreduce/operation/db.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""DB-related operations."""
-
-
-
-__all__ = ['Put', 'Delete']
-
-
-# TODO(user): handler function annotation which requests to
-# use db calls directly without batching them/doing async db calls.
-class Put(object):
- """Put entity into datastore via mutation_pool.
-
- See mapreduce.context.MutationPool.
- """
-
- def __init__(self, entity):
- """Constructor.
-
- Args:
- entity: an entity to put.
- """
- self.entity = entity
-
- def __call__(self, context):
- """Perform operation.
-
- Args:
- context: mapreduce context as context.Context.
- """
- context.mutation_pool.put(self.entity)
-
-
-class Delete(object):
- """Delete entity from datastore via mutation_pool.
-
- See mapreduce.context.MutationPool.
- """
-
- def __init__(self, entity):
- """Constructor.
-
- Args:
- entity: a key or model instance to delete.
- """
- self.entity = entity
-
- def __call__(self, context):
- """Perform operation.
-
- Args:
- context: mapreduce context as context.Context.
- """
- context.mutation_pool.delete(self.entity)
diff --git a/mapreduce/quota.py b/mapreduce/quota.py
deleted file mode 100755
index bbdc39a..0000000
--- a/mapreduce/quota.py
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Simple quota system backed by memcache storage."""
-
-
-
-
-# Memcache namespace to use.
-_QUOTA_NAMESPACE = "quota"
-
-# Offset all quota values by this amount since memcache incr/decr
-# operate only with unsigned values.
-_OFFSET = 2**32
-
-
-class QuotaManager(object):
- """Simple quota system manager, backed by memcache storage.
-
- Since memcache storage is not reliable, this quota system is not reliable and
- best effort only.
-
- Quota is managed by buckets. Each bucket contains a 32-bit int value of
- available quota. Buckets should be refilled manually with 'put' method.
-
- It is safe to use a single bucket from multiple clients simultaneously.
- """
-
- def __init__(self, memcache_client):
- """Initialize new instance.
-
- Args:
- memcache_client: an instance of memcache client to use.
- """
- self.memcache_client = memcache_client
-
- def put(self, bucket, amount):
- """Put amount into quota bucket.
-
- Args:
- bucket: quota bucket as string.
- amount: amount to bit put into quota as int.
- """
- self.memcache_client.incr(bucket, delta=amount,
- initial_value=_OFFSET, namespace=_QUOTA_NAMESPACE)
-
- def consume(self, bucket, amount, consume_some=False):
- """Consume amount from quota bucket.
-
- Args:
- bucket: quota bucket as string.
- amount: amount to consume.
- consume_some: specifies behavior in case of not enough quota. If False,
- the method will leave quota intact and return 0. If True, will try to
- consume as much as possible.
-
- Returns:
- Amount of quota consumed.
- """
- new_quota = self.memcache_client.decr(
- bucket, delta=amount, initial_value=_OFFSET, namespace=_QUOTA_NAMESPACE)
-
- if new_quota >= _OFFSET:
- return amount
-
- if consume_some and new_quota is not None and _OFFSET - new_quota < amount:
- # we still can consume some
- self.put(bucket, _OFFSET - new_quota)
- return amount - (_OFFSET - new_quota)
- else:
- self.put(bucket, amount)
- return 0
-
- def get(self, bucket):
- """Get current bucket amount.
-
- Args:
- bucket: quota bucket as string.
-
- Returns:
- current bucket amount as int.
- """
- amount = self.memcache_client.get(bucket, namespace=_QUOTA_NAMESPACE)
- if amount:
- return int(amount) - _OFFSET
- else:
- return 0
-
- def set(self, bucket, amount):
- """Set bucket amount.
-
- Args:
- bucket: quota bucket as string.
- amount: new bucket amount as int.
- """
- self.memcache_client.set(bucket, amount + _OFFSET,
- namespace=_QUOTA_NAMESPACE)
-
-
-class QuotaConsumer(object):
- """Quota consumer wrapper for efficient quota consuming/reclaiming.
-
- Quota is consumed in batches and put back in dispose() method.
-
- WARNING: Always call the dispose() method if you need to keep quota
- consistent.
- """
-
- def __init__(self, quota_manager, bucket, batch_size):
- """Initialize new instance.
-
- Args:
- quota_manager: quota manager to use for quota operations as QuotaManager.
- bucket: quota bucket name as string.
- batch_size: batch size for quota consuming as int.
- """
- self.quota_manager = quota_manager
- self.batch_size = batch_size
- self.bucket = bucket
- self.quota = 0
-
- def consume(self, amount=1):
- """Consume quota.
-
- Args:
- amount: amount of quota to be consumed as int.
-
- Returns:
- True if quota was successfully consumed, False if there's not enough
- quota.
- """
- while self.quota < amount:
- delta = self.quota_manager.consume(self.bucket, self.batch_size,
- consume_some=True)
- if not delta:
- return False
- self.quota += delta
-
- self.quota -= amount
- return True
-
- def put(self, amount=1):
- """Put quota back.
-
- Args:
- amount: amount of quota as int.
- """
- self.quota += amount
-
- def check(self, amount=1):
- """Check that we have enough quota right now.
-
- This doesn't lock or consume the quota. Following consume might in fact
- fail/succeeded.
-
- Args:
- amount: amount of quota to check.
-
- Returns:
- True if we have enough quota to consume specified amount right now. False
- otherwise.
- """
- if self.quota >= amount:
- return True
- return self.quota + self.quota_manager.get(self.bucket) >= amount
-
- def dispose(self):
- """Dispose QuotaConsumer and put all actually unconsumed quota back.
-
- This method has to be called for quota consistency!
- """
- self.quota_manager.put(self.bucket, self.quota)
diff --git a/mapreduce/static/base.css b/mapreduce/static/base.css
deleted file mode 100755
index 0fca75b..0000000
--- a/mapreduce/static/base.css
+++ /dev/null
@@ -1,113 +0,0 @@
-html {
- margin: 0;
- padding: 0;
- font-family: Arial, sans-serif;
- font-size: 13px;
-}
-
-body {
- margin: 0;
- padding: 0 3px 3px 3px;
-}
-
-#butter {
- position: absolute;
- left: 40%; /* todo: actually center this */
- width: 200px;
- background-color: #C5D7EF;
- text-align: center;
- padding: 5px;
- border-left: 1px solid #3366CC;
- border-right: 1px solid #3366CC;
- border-bottom: 1px solid #3366CC;
-}
-
-h1 {
- margin-top: 0;
- margin-bottom: 0.4em;
- font-size: 2em;
-}
-h2 {
- margin-top: 1em;
- margin-bottom: 0.4em;
- font-size: 1.2em;
-}
-h3 {
- margin-top: 0;
- margin-bottom: 0.7em;
- font-size: 1.0em;
-}
-
-.status-text {
- text-transform: capitalize;
-}
-
-/* Overview page */
-.editable-input,
-.job-static-param {
- margin: 0.3em;
-}
-
-.editable-input > label:after {
- content: ': ';
-}
-
-#launch-control {
- margin-bottom: 0.5em;
-}
-#launch-container {
- margin-left: 0.5em;
-}
-
-/* Detail page */
-#control {
- float: right;
-}
-
-#detail-graph,
-#aggregated-counters-container,
-#detail-params-container {
- margin-left: 1em;
- float: left;
-}
-
-/* Shared */
-.param-key:after {
- content: ': ';
-}
-.user-param-key:after {
- content: ': ';
-}
-.param-aux:before {
- content: ' ';
-}
-
-.status-table {
- margin: 5px;
- border-collapse: collapse;
- border-width: 0;
- empty-cells: show;
- border-top: 1px solid #C5D7EF;
- border-left: 1px solid #C5D7EF;
- border-right: 1px solid #C5D7EF;
-}
-
-.status-table > thead {
- height: 2em;
-}
-
-.status-table > tfoot {
- height: 1em;
-}
-
-.status-table > thead,
-.status-table > tfoot {
- background-color: #E5ECF9;
-}
-
-.status-table td {
- padding: 4px;
- border-left: 1px solid #C5D7EF;
- border-bottom: 1px solid #C5D7EF;
- border-top: 1px solid #C5D7EF;
-}
diff --git a/mapreduce/static/detail.html b/mapreduce/static/detail.html
deleted file mode 100755
index 25b5d2b..0000000
--- a/mapreduce/static/detail.html
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-
- Loading Job Status...
-
-
-
-
-
-
-
-
-
-