diff --git a/app.yaml b/app.yaml index 51f0925..3608b89 100755 --- a/app.yaml +++ b/app.yaml @@ -11,9 +11,6 @@ handlers: upload: static/(.*) expiration: "24d" -- url: /calendars.* - script: main.py - - url: /tasks.* script: main.py login: admin @@ -22,15 +19,8 @@ handlers: script: main.py login: admin -- url: /links.* - script: main.py - -- url: /mapreduce(/.*)? - script: mapreduce_wrapper.py - login: admin - - url: .* - script: django_app.py + script: main.py builtins: - datastore_admin: on \ No newline at end of file diff --git a/apps/links/urls.py b/apps/links/urls.py deleted file mode 100755 index 5df5d26..0000000 --- a/apps/links/urls.py +++ /dev/null @@ -1,11 +0,0 @@ -from tipfy import Rule - -def get_rules(app): - rules = [ - Rule('/links/add', endpoint="links/add", handler='apps.links.handlers.AddLinkHandler'), - Rule('/links/review', endpoint="links/review", handler='apps.links.handlers.ReviewLinksHandler'), - Rule('/links/change', endpoint="links/change-status", handler='apps.links.handlers.ChangeLinkStatusHandler'), - - ] - - return rules \ No newline at end of file diff --git a/config.py b/config.py deleted file mode 100755 index c151181..0000000 --- a/config.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -""" - config - ~~~~~~ - - Configuration settings. - - :copyright: 2009 by tipfy.org. - :license: BSD, see LICENSE for more details. -""" -config = {} - -# Configurations for the 'tipfy' module. - - - - -config['tipfy'] = { - # Enable debugger. It will be loaded only in development. - 'middleware': [ - 'tipfy.ext.debugger.DebuggerMiddleware', - ], - 'apps_installed': [ - 'apps.calendar', - 'apps.links', - ], -} - - - -config['tipfy.ext.session'] = { - 'secret_key': 'Ross M Karchner: OK Guy', -} - diff --git a/cron.disables b/cron.disables deleted file mode 100755 index b5f504a..0000000 --- a/cron.disables +++ /dev/null @@ -1,3 +0,0 @@ -- description: schedule upcoming newsletters - url: /subscriptions/start_schedule_newsletters/ - schedule: every 12 hours synchronized diff --git a/django_app.py b/django_app.py deleted file mode 100755 index d58cafd..0000000 --- a/django_app.py +++ /dev/null @@ -1,47 +0,0 @@ -import sys, os -from google.appengine.ext.webapp import util - - -sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path - - -# Django imports and other code go here... -import os -os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' -from google.appengine.dist import use_library -use_library('django', '1.2') - - -import django.core.handlers, django.core.handlers.wsgi - -from django.conf import settings -settings.ROOT_URLCONF="django_urls" - - - -import logging -import django.core.signals -import django.dispatch.dispatcher -import django.db - -def log_exception(*args, **kwds): - logging.exception('Exception in request:') - -# Log errors. -django.dispatch.Signal.connect( - django.core.signals.got_request_exception, log_exception) - -# Unregister the rollback event handler. -django.dispatch.Signal.disconnect( - django.core.signals.got_request_exception, - django.db._rollback_on_exception) - - - -def main(): - sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path - application = django.core.handlers.wsgi.WSGIHandler() - util.run_wsgi_app(application) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/django_templates/base.html b/django_templates/base.html deleted file mode 100755 index f1e3544..0000000 --- a/django_templates/base.html +++ /dev/null @@ -1,165 +0,0 @@ -{% load account_tags %} -{% load cdn_helper %} - - - - {% block title %} {{ site.name }}: {% block subtitle %}{% endblock %} - {% endblock %} - - {% block opengraph %} - - - - {% endblock %} - - - - - - - - - - - - - - - - - - - - - - - {% block headextra %} - - - - - {% endblock %} - {% if site.google_analytics_code %} - - {% endif %} - - - - - -
- -
- - {% if user.confirmed_at %} - add an event - event queue - sources - {% if admin %} - edit site details - update logo - manage users - {% endif %} - edit your profile - sign out - {% else %} - - add an event - event queue - sources - {% endif %} - feedback - -
- - - -
-
-
-

- - {% if site.logo_asset_href %} - {{ site.name }} - {% else %} - {{ site.name }} - {% endif %} -

- {% if site.twitter %} -
Get events as soon as we do, follow @{{site.twitter}}!
{% endif %} - -
-
- - -
-
- -{% if messages %} -
- -
-{% endif %} - - -{% block content %} - -{% endblock %} -
- - -
-{% block endcode %} - -{% endblock %} - - - diff --git a/django_urls.py b/django_urls.py deleted file mode 100755 index 27ea2c0..0000000 --- a/django_urls.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from django.conf.urls.defaults import * - -urlpatterns = patterns('', - # Example: - (r'^account/', include('account.urls')), - url(r'^_ah/login_required', 'account.views.signin', name="account-signin"), - (r'^events/', include('events.urls')), - (r'^sources/', include('sources.urls')), - (r'^subscriptions/', include('subscriptions.urls')), - (r'^admin/', include('eventsite.admin.urls')), - (r'^assets/', include('assets.urls')), - (r'', include('eventsite.urls')), -) diff --git a/handlers.py b/handlers.py deleted file mode 100755 index 09cf650..0000000 --- a/handlers.py +++ /dev/null @@ -1,13 +0,0 @@ -from tipfy import RequestHandler -from tipfy.ext.jinja2 import render_response - -class FrontPageHandler(RequestHandler): - """A handler that outputs the result of a rendered template.""" - def get(self, **kwargs): - return render_response('hello.html', message='Hello, Jinja!') - - -class AddEventHandler(RequestHandler): - """A handler that outputs the result of a rendered template.""" - def get(self, **kwargs): - return render_response('hello.html', message='Hello, Jinja!') \ No newline at end of file diff --git a/apps/__init__.py b/links/__init__.py similarity index 100% rename from apps/__init__.py rename to links/__init__.py diff --git a/apps/links/forms.py b/links/forms.py similarity index 100% rename from apps/links/forms.py rename to links/forms.py diff --git a/apps/links/handlers.py b/links/handlers.py similarity index 100% rename from apps/links/handlers.py rename to links/handlers.py diff --git a/apps/links/models.py b/links/models.py similarity index 100% rename from apps/links/models.py rename to links/models.py diff --git a/links/urls.py b/links/urls.py new file mode 100755 index 0000000..1f73a16 --- /dev/null +++ b/links/urls.py @@ -0,0 +1,11 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('links.views', + + url(r'^add/$','add', name="add_link"), + url(r'^review/$','review', name="review_links"), + url(r'^change/$','add', name="change_link"), + + +) + diff --git a/apps/links/__init__.py b/links/views.py old mode 100755 new mode 100644 similarity index 100% rename from apps/links/__init__.py rename to links/views.py diff --git a/main.py b/main.py index 2739e4f..b297297 100755 --- a/main.py +++ b/main.py @@ -1,38 +1,47 @@ -# -*- coding: utf-8 -*- -""" - main - ~~~~ +import sys, os +from google.appengine.ext.webapp import util - Run Tipfy apps. - :copyright: 2009 by tipfy.org. - :license: BSD, see LICENSE for more details. -""" +sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path + + +# Django imports and other code go here... import os -import sys +os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +from google.appengine.dist import use_library +use_library('django', '1.2') -if 'lib' not in sys.path: - # Add /lib as primary libraries directory, with fallback to /distlib - # and optionally to distlib loaded using zipimport. - sys.path[0:0] = ['lib', 'distlib', 'distlib.zip', 'shared'] - -import config -import tipfy +import django.core.handlers, django.core.handlers.wsgi -# Is this the development server? -debug = os.environ.get('SERVER_SOFTWARE', '').startswith('Dev') +from django.conf import settings +settings.ROOT_URLCONF="urls" -# Instantiate the application. -app = tipfy.make_wsgi_app(config=config.config, debug=debug) -from tipfy.ext.jinja2 import get_jinja2_instance -env=get_jinja2_instance() -env.globals['app_version'] = os.environ['CURRENT_VERSION_ID'] or 'dev' -def main(): - app.run() +import logging +import django.core.signals +import django.dispatch.dispatcher +import django.db + +def log_exception(*args, **kwds): + logging.exception('Exception in request:') + +# Log errors. +django.dispatch.Signal.connect( + django.core.signals.got_request_exception, log_exception) +# Unregister the rollback event handler. +django.dispatch.Signal.disconnect( + django.core.signals.got_request_exception, + django.db._rollback_on_exception) + + + +def main(): + sys.path= [os.path.join(os.path.dirname(__file__), 'shared'), os.path.join(os.path.dirname(__file__), '.')]+sys.path + application = django.core.handlers.wsgi.WSGIHandler() + util.run_wsgi_app(application) if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/mapreduce.yaml b/mapreduce.yaml deleted file mode 100755 index 4380181..0000000 --- a/mapreduce.yaml +++ /dev/null @@ -1,8 +0,0 @@ -mapreduce: -- name: 'migrate.process' - mapper: - input_reader: mapreduce.input_readers.DatastoreInputReader - handler: migrate.process - params: - - name: entity_kind - default: events.models.Event \ No newline at end of file diff --git a/mapreduce/__init__.py b/mapreduce/__init__.py deleted file mode 100755 index de5df1c..0000000 --- a/mapreduce/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/mapreduce/base_handler.py b/mapreduce/base_handler.py deleted file mode 100755 index 00b88eb..0000000 --- a/mapreduce/base_handler.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Base handler class for all mapreduce handlers. -""" - - - - -import logging -from mapreduce.lib import simplejson - -from google.appengine.ext import webapp - - -class Error(Exception): - """Base-class for exceptions in this module.""" - - -class BadRequestPathError(Error): - """The request path for the handler is invalid.""" - - -class BaseHandler(webapp.RequestHandler): - """Base class for all mapreduce handlers.""" - - def base_path(self): - """Base path for all mapreduce-related urls.""" - path = self.request.path - return path[:path.rfind("/")] - - -class TaskQueueHandler(BaseHandler): - """Base class for handlers intended to be run only from the task queue. - - Sub-classes should implement the 'handle' method. - """ - - def post(self): - if "X-AppEngine-QueueName" not in self.request.headers: - logging.error(self.request.headers) - logging.error("Task queue handler received non-task queue request") - self.response.set_status( - 403, message="Task queue handler received non-task queue request") - return - self.handle() - - def handle(self): - """To be implemented by subclasses.""" - raise NotImplementedError() - - def task_retry_count(self): - """Number of times this task has been retried.""" - return int(self.request.headers.get("X-AppEngine-TaskRetryCount", 0)) - - -class JsonHandler(BaseHandler): - """Base class for JSON handlers for user interface. - - Sub-classes should implement the 'handle' method. They should put their - response data in the 'self.json_response' dictionary. Any exceptions raised - by the sub-class implementation will be sent in a JSON response with the - name of the error_class and the error_message. - """ - - def __init__(self): - """Initializer.""" - super(BaseHandler, self).__init__() - self.json_response = {} - - def base_path(self): - """Base path for all mapreduce-related urls. - - JSON handlers are mapped to /base_path/command/command_name thus they - require special treatment. - """ - path = self.request.path - base_path = path[:path.rfind("/")] - if not base_path.endswith("/command"): - raise BadRequestPathError( - "Json handlers should have /command path prefix") - return base_path[:base_path.rfind("/")] - - def _handle_wrapper(self): - if self.request.headers.get("X-Requested-With") != "XMLHttpRequest": - logging.error(self.request.headers) - logging.error("Got JSON request with no X-Requested-With header") - self.response.set_status( - 403, message="Got JSON request with no X-Requested-With header") - return - - self.json_response.clear() - try: - self.handle() - except Exception, e: - logging.exception("Error in JsonHandler, returning exception.") - # TODO(user): Include full traceback here for the end-user. - self.json_response.clear() - self.json_response["error_class"] = e.__class__.__name__ - self.json_response["error_message"] = str(e) - - self.response.headers["Content-Type"] = "text/javascript" - try: - output = simplejson.dumps(self.json_response) - except: - logging.exception("Could not serialize to JSON") - self.response.set_status(500, message="Could not serialize to JSON") - return - else: - self.response.out.write(output) - - def handle(self): - """To be implemented by sub-classes.""" - raise NotImplementedError() - - -class PostJsonHandler(JsonHandler): - """JSON handler that accepts POST requests.""" - - def post(self): - self._handle_wrapper() - - -class GetJsonHandler(JsonHandler): - """JSON handler that accepts GET posts.""" - - def get(self): - self._handle_wrapper() diff --git a/mapreduce/context.py b/mapreduce/context.py deleted file mode 100755 index 93c1017..0000000 --- a/mapreduce/context.py +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Mapreduce execution context. - -Mapreduce context provides handler code with information about -current mapreduce execution and organizes utility data flow -from handlers such as counters, log messages, mutation pools. -""" - - - -__all__ = ["MAX_ENTITY_COUNT", "MAX_POOL_SIZE", "Context", "MutationPool", - "Counters", "ItemList", "EntityList", "get", "COUNTER_MAPPER_CALLS", - "DATASTORE_DEADLINE"] - -from google.appengine.api import datastore -from google.appengine.ext import db - -# Maximum pool size in bytes. Pool will be flushed when reaches this amount. -# We use 950,000 bytes which is slightly less than maximum allowed RPC size of -# 1M to have some space cushion. -MAX_POOL_SIZE = 900 * 1000 - -# Maximum number of items. Pool will be flushed when reaches this amount. -MAX_ENTITY_COUNT = 500 - -# Deadline in seconds for mutation pool datastore operations. -DATASTORE_DEADLINE = 15 - -# The name of the counter which counts all mapper calls. -COUNTER_MAPPER_CALLS = "mapper_calls" - - -def _normalize_entity(value): - """Return an entity from an entity or model instance.""" - # TODO(user): Consider using datastore.NormalizeAndTypeCheck. - if getattr(value, "_populate_internal_entity", None): - return value._populate_internal_entity() - return value - -def _normalize_key(value): - """Return a key from an entity, model instance, key, or key string.""" - if getattr(value, "key", None): - return value.key() - elif isinstance(value, basestring): - return datastore.Key(value) - else: - return value - -class ItemList(object): - """Holds list of arbitrary items, and their total size. - - Properties: - items: list of objects. - length: length of item list. - size: aggregate item size in bytes. - """ - - def __init__(self): - """Constructor.""" - self.items = [] - self.length = 0 - self.size = 0 - - def append(self, item, item_size): - """Add new item to the list. - - Args: - item: an item to add to the list. - item_size: item size in bytes as int. - """ - self.items.append(item) - self.length += 1 - self.size += item_size - - def clear(self): - """Clear item list.""" - self.items = [] - self.length = 0 - self.size = 0 - - @property - def entities(self): - """Return items. For backwards compatability.""" - return self.items - - -# For backwards compatability. -EntityList = ItemList - - -# TODO(user): mutation pool has no error handling at all. Add some. -class MutationPool(object): - """Mutation pool accumulates datastore changes to perform them in batch. - - Properties: - puts: ItemList of entities to put to datastore. - deletes: ItemList of keys to delete from datastore. - max_pool_size: maximum single list pool size. List changes will be flushed - when this size is reached. - """ - - def __init__(self, - max_pool_size=MAX_POOL_SIZE, - max_entity_count=MAX_ENTITY_COUNT): - """Constructor. - - Args: - max_pool_size: maximum pools size in bytes before flushing it to db. - max_entity_count: maximum number of entities before flushing it to db. - """ - self.max_pool_size = max_pool_size - self.max_entity_count = max_entity_count - self.puts = ItemList() - self.deletes = ItemList() - - def put(self, entity): - """Registers entity to put to datastore. - - Args: - entity: an entity or model instance to put. - """ - actual_entity = _normalize_entity(entity) - entity_size = len(actual_entity._ToPb().Encode()) - if (self.puts.length >= self.max_entity_count or - (self.puts.size + entity_size) > self.max_pool_size): - self.__flush_puts() - self.puts.append(actual_entity, entity_size) - - def delete(self, entity): - """Registers entity to delete from datastore. - - Args: - entity: an entity, model instance, or key to delete. - """ - # This is not very nice: we're calling two protected methods here... - key = _normalize_key(entity) - key_size = len(key._ToPb().Encode()) - if (self.deletes.length >= self.max_entity_count or - (self.deletes.size + key_size) > self.max_pool_size): - self.__flush_deletes() - self.deletes.append(key, key_size) - - # TODO(user): some kind of error handling/retries is needed here. - def flush(self): - """Flush(apply) all changed to datastore.""" - self.__flush_puts() - self.__flush_deletes() - - def __flush_puts(self): - """Flush all puts to datastore.""" - if self.puts.length: - datastore.Put(self.puts.items, rpc=self.__create_rpc()) - self.puts.clear() - - def __flush_deletes(self): - """Flush all deletes to datastore.""" - if self.deletes.length: - datastore.Delete(self.deletes.items, rpc=self.__create_rpc()) - self.deletes.clear() - - def __create_rpc(self): - """Creates correctly configured RPC object for datastore calls. - - Returns: - A UserRPC instance. - """ - return datastore.CreateRPC(deadline=DATASTORE_DEADLINE) - - -# This doesn't do much yet. In future it will play nicely with checkpoint/error -# handling system. -class Counters(object): - """Regulates access to counters.""" - - def __init__(self, shard_state): - """Constructor. - - Args: - shard_state: current mapreduce shard state as model.ShardState. - """ - self._shard_state = shard_state - - def increment(self, counter_name, delta=1): - """Increment counter value. - - Args: - counter_name: name of the counter as string. - delta: increment delta as int. - """ - self._shard_state.counters_map.increment(counter_name, delta) - - def flush(self): - """Flush unsaved counter values.""" - pass - - -class Context(object): - """MapReduce execution context. - - Properties: - mapreduce_spec: current mapreduce specification as model.MapreduceSpec. - shard_state: current shard state as model.ShardState. - mutation_pool: current mutation pool as MutationPool. - counters: counters object as Counters. - """ - - # Current context instance - _context_instance = None - - def __init__(self, mapreduce_spec, shard_state, task_retry_count=0): - """Constructor. - - Args: - mapreduce_spec: mapreduce specification as model.MapreduceSpec. - shard_state: shard state as model.ShardState. - """ - self.mapreduce_spec = mapreduce_spec - self.shard_state = shard_state - self.task_retry_count = task_retry_count - - if self.mapreduce_spec: - self.mapreduce_id = self.mapreduce_spec.mapreduce_id - else: - # Only in tests - self.mapreduce_id = None - if self.shard_state: - self.shard_id = self.shard_state.get_shard_id() - else: - # Only in tests - self.shard_id = None - - self.mutation_pool = MutationPool( - max_pool_size=(MAX_POOL_SIZE/(2**self.task_retry_count)), - max_entity_count=(MAX_ENTITY_COUNT/(2**self.task_retry_count))) - self.counters = Counters(shard_state) - - self._pools = {} - self.register_pool("mutation_pool", self.mutation_pool) - self.register_pool("counters", self.counters) - - def flush(self): - """Flush all information recorded in context.""" - for pool in self._pools.values(): - pool.flush() - if self.shard_state: - self.shard_state.put() - - # TODO(user): Add convenience method for mapper params. - - # TODO(user): Add fatal error logging method here. Will log the message - # and set the shard state to failure result status, which the controller - # callback should pick up and force all shards to terminate. - - def register_pool(self, key, pool): - """Register an arbitrary pool to be flushed together with this context. - - Args: - key: pool key as string. - pool: a pool instance. Pool should implement flush(self) method. - """ - self._pools[key] = pool - - def get_pool(self, key): - """Obtains an instance of registered pool. - - Args: - key: pool key as string. - - Returns: - an instance of the pool registered earlier, or None. - """ - return self._pools.get(key, None) - - @classmethod - def _set(cls, context): - """Set current context instance. - - Args: - context: new context as Context or None. - """ - cls._context_instance = context - - -def get(): - """Get current context instance. - - Returns: - current context as Context. - """ - return Context._context_instance diff --git a/mapreduce/control.py b/mapreduce/control.py deleted file mode 100755 index 616d55f..0000000 --- a/mapreduce/control.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""API for controlling MapReduce execution outside of MapReduce framework.""" - - - -__all__ = ["start_map"] - -# pylint: disable-msg=C6409 - - -from mapreduce import handlers -from mapreduce import model - - -_DEFAULT_SHARD_COUNT = 8 - - -def start_map(name, - handler_spec, - reader_spec, - reader_parameters, - shard_count=_DEFAULT_SHARD_COUNT, - mapreduce_parameters=None, - base_path="/mapreduce", - queue_name="default", - eta=None, - countdown=None, - hooks_class_name=None, - _app=None, - transactional=False): - """Start a new, mapper-only mapreduce. - - Args: - name: mapreduce name. Used only for display purposes. - handler_spec: fully qualified name of mapper handler function/class to call. - reader_spec: fully qualified name of mapper reader to use - reader_parameters: dictionary of parameters to pass to reader. These are - reader-specific. - shard_count: number of shards to create. - mapreduce_parameters: dictionary of mapreduce parameters relevant to the - whole job. - base_path: base path of mapreduce library handler specified in app.yaml. - "/mapreduce" by default. - queue_name: executor queue name to be used for mapreduce tasks. - eta: Absolute time when the MR should execute. May not be specified - if 'countdown' is also supplied. This may be timezone-aware or - timezone-naive. - countdown: Time in seconds into the future that this MR should execute. - Defaults to zero. - hooks_class_name: fully qualified name of a hooks.Hooks subclass. - transactional: Specifies if job should be started as a part of already - opened transaction. - - Returns: - mapreduce id as string. - """ - mapper_spec = model.MapperSpec(handler_spec, reader_spec, reader_parameters, - shard_count) - - return handlers.StartJobHandler._start_map( - name, - mapper_spec, - mapreduce_parameters or {}, - base_path=base_path, - queue_name=queue_name, - eta=eta, - countdown=countdown, - hooks_class_name=hooks_class_name, - _app=_app, - transactional=transactional) diff --git a/mapreduce/handlers.py b/mapreduce/handlers.py deleted file mode 100755 index 99702c9..0000000 --- a/mapreduce/handlers.py +++ /dev/null @@ -1,876 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Defines executor tasks handlers for MapReduce implementation.""" - - - -# Disable "Invalid method name" -# pylint: disable-msg=C6409 - -import datetime -import logging -import math -import os -from mapreduce.lib import simplejson -import time - -from google.appengine.api import memcache -from google.appengine.api.labs import taskqueue -from google.appengine.ext import db -from mapreduce import base_handler -from mapreduce import context -from mapreduce import model -from mapreduce import quota -from mapreduce import util - - -# TODO(user): Make this a product of the reader or in quotas.py -_QUOTA_BATCH_SIZE = 20 - -# The amount of time to perform scanning in one slice. New slice will be -# scheduled as soon as current one takes this long. -_SLICE_DURATION_SEC = 15 - -# Delay between consecutive controller callback invocations. -_CONTROLLER_PERIOD_SEC = 2 - - -class Error(Exception): - """Base class for exceptions in this module.""" - - -class NotEnoughArgumentsError(Error): - """Required argument is missing.""" - - -class NoDataError(Error): - """There is no data present for a desired input.""" - - -def _run_task_hook(hooks, method, task, queue_name): - """Invokes hooks.method(task, queue_name). - - Args: - hooks: A hooks.Hooks instance or None. - method: The name of the method to invoke on the hooks class e.g. - "enqueue_kickoff_task". - task: The taskqueue.Task to pass to the hook method. - queue_name: The name of the queue to pass to the hook method. - - Returns: - True if the hooks.Hooks instance handled the method, False otherwise. - """ - if hooks is not None: - try: - getattr(hooks, method)(task, queue_name) - except NotImplementedError: - # Use the default task addition implementation. - return False - - return True - return False - - -class MapperWorkerCallbackHandler(base_handler.TaskQueueHandler): - """Callback handler for mapreduce worker task. - - Request Parameters: - mapreduce_spec: MapreduceSpec of the mapreduce serialized to json. - shard_id: id of the shard. - slice_id: id of the slice. - """ - - def __init__(self, time_function=time.time): - """Constructor. - - Args: - time_function: time function to use to obtain current time. - """ - base_handler.TaskQueueHandler.__init__(self) - self._time = time_function - - def handle(self): - """Handle request.""" - spec = model.MapreduceSpec.from_json_str( - self.request.get("mapreduce_spec")) - self._start_time = self._time() - shard_id = self.shard_id() - - # TODO(user): Make this prettier - logging.debug("post: shard=%s slice=%s headers=%s", - shard_id, self.slice_id(), self.request.headers) - - shard_state, control = db.get([ - model.ShardState.get_key_by_shard_id(shard_id), - model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id), - ]) - if not shard_state: - # We're letting this task to die. It's up to controller code to - # reinitialize and restart the task. - logging.error("State not found for shard ID %r; shutting down", - shard_id) - return - - if control and control.command == model.MapreduceControl.ABORT: - logging.info("Abort command received by shard %d of job '%s'", - shard_state.shard_number, shard_state.mapreduce_id) - shard_state.active = False - shard_state.result_status = model.ShardState.RESULT_ABORTED - shard_state.put() - model.MapreduceControl.abort(spec.mapreduce_id) - return - - input_reader = self.input_reader(spec.mapper) - - if spec.mapper.params.get("enable_quota", True): - quota_consumer = quota.QuotaConsumer( - quota.QuotaManager(memcache.Client()), - shard_id, - _QUOTA_BATCH_SIZE) - else: - quota_consumer = None - - ctx = context.Context(spec, shard_state, - task_retry_count=self.task_retry_count()) - context.Context._set(ctx) - - try: - # consume quota ahead, because we do not want to run a datastore - # query if there's not enough quota for the shard. - if not quota_consumer or quota_consumer.check(): - scan_aborted = False - entity = None - - # We shouldn't fetch an entity from the reader if there's not enough - # quota to process it. Perform all quota checks proactively. - if not quota_consumer or quota_consumer.consume(): - for entity in input_reader: - if isinstance(entity, db.Model): - shard_state.last_work_item = repr(entity.key()) - else: - shard_state.last_work_item = repr(entity)[:100] - - scan_aborted = not self.process_entity(entity, ctx) - - # Check if we've got enough quota for the next entity. - if (quota_consumer and not scan_aborted and - not quota_consumer.consume()): - scan_aborted = True - if scan_aborted: - break - else: - scan_aborted = True - - - if not scan_aborted: - logging.info("Processing done for shard %d of job '%s'", - shard_state.shard_number, shard_state.mapreduce_id) - # We consumed extra quota item at the end of for loop. - # Just be nice here and give it back :) - if quota_consumer: - quota_consumer.put(1) - shard_state.active = False - shard_state.result_status = model.ShardState.RESULT_SUCCESS - - # TODO(user): Mike said we don't want this happen in case of - # exception while scanning. Figure out when it's appropriate to skip. - ctx.flush() - finally: - context.Context._set(None) - if quota_consumer: - quota_consumer.dispose() - - # Rescheduling work should always be the last statement. It shouldn't happen - # if there were any exceptions in code before it. - if shard_state.active: - self.reschedule(spec, input_reader) - - def process_entity(self, entity, ctx): - """Process a single entity. - - Call mapper handler on the entity. - - Args: - entity: an entity to process. - ctx: current execution context. - - Returns: - True if scan should be continued, False if scan should be aborted. - """ - ctx.counters.increment(context.COUNTER_MAPPER_CALLS) - - handler = ctx.mapreduce_spec.mapper.handler - if util.is_generator_function(handler): - for result in handler(entity): - if callable(result): - result(ctx) - else: - try: - if len(result) == 2: - logging.error("Collectors not implemented yet") - else: - logging.error("Got bad output tuple of length %d", len(result)) - except TypeError: - logging.error( - "Handler yielded type %s, expected a callable or a tuple", - result.__class__.__name__) - else: - handler(entity) - - if self._time() - self._start_time > _SLICE_DURATION_SEC: - logging.debug("Spent %s seconds. Rescheduling", - self._time() - self._start_time) - return False - return True - - def shard_id(self): - """Get shard unique identifier of this task from request. - - Returns: - shard identifier as string. - """ - return str(self.request.get("shard_id")) - - def slice_id(self): - """Get slice unique identifier of this task from request. - - Returns: - slice identifier as int. - """ - return int(self.request.get("slice_id")) - - def input_reader(self, mapper_spec): - """Get the reader from mapper_spec initialized with the request's state. - - Args: - mapper_spec: a mapper spec containing the immutable mapper state. - - Returns: - An initialized InputReader. - """ - input_reader_spec_dict = simplejson.loads( - self.request.get("input_reader_state")) - return mapper_spec.input_reader_class().from_json( - input_reader_spec_dict) - - @staticmethod - def worker_parameters(mapreduce_spec, - shard_id, - slice_id, - input_reader): - """Fill in mapper worker task parameters. - - Returned parameters map is to be used as task payload, and it contains - all the data, required by mapper worker to perform its function. - - Args: - mapreduce_spec: specification of the mapreduce. - shard_id: id of the shard (part of the whole dataset). - slice_id: id of the slice (part of the shard). - input_reader: InputReader containing the remaining inputs for this - shard. - - Returns: - string->string map of parameters to be used as task payload. - """ - return {"mapreduce_spec": mapreduce_spec.to_json_str(), - "shard_id": shard_id, - "slice_id": str(slice_id), - "input_reader_state": input_reader.to_json_str()} - - @staticmethod - def get_task_name(shard_id, slice_id): - """Compute single worker task name. - - Args: - shard_id: id of the shard (part of the whole dataset) as string. - slice_id: id of the slice (part of the shard) as int. - - Returns: - task name which should be used to process specified shard/slice. - """ - # Prefix the task name with something unique to this framework's - # namespace so we don't conflict with user tasks on the queue. - return "appengine-mrshard-%s-%s" % (shard_id, slice_id) - - def reschedule(self, mapreduce_spec, input_reader): - """Reschedule worker task to continue scanning work. - - Args: - mapreduce_spec: mapreduce specification. - input_reader: remaining input reader to process. - """ - MapperWorkerCallbackHandler.schedule_slice( - self.base_path(), mapreduce_spec, self.shard_id(), - self.slice_id() + 1, input_reader) - - @classmethod - def schedule_slice(cls, - base_path, - mapreduce_spec, - shard_id, - slice_id, - input_reader, - queue_name=None, - eta=None, - countdown=None): - """Schedule slice scanning by adding it to the task queue. - - Args: - base_path: base_path of mapreduce request handlers as string. - mapreduce_spec: mapreduce specification as MapreduceSpec. - shard_id: current shard id as string. - slice_id: slice id as int. - input_reader: remaining InputReader for given shard. - queue_name: Optional queue to run on; uses the current queue of - execution or the default queue if unspecified. - eta: Absolute time when the MR should execute. May not be specified - if 'countdown' is also supplied. This may be timezone-aware or - timezone-naive. - countdown: Time in seconds into the future that this MR should execute. - Defaults to zero. - """ - task_params = MapperWorkerCallbackHandler.worker_parameters( - mapreduce_spec, shard_id, slice_id, input_reader) - task_name = MapperWorkerCallbackHandler.get_task_name(shard_id, slice_id) - queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", - queue_name or "default") - - worker_task = taskqueue.Task(url=base_path + "/worker_callback", - params=task_params, - name=task_name, - eta=eta, - countdown=countdown) - - if not _run_task_hook(mapreduce_spec.get_hooks(), - "enqueue_worker_task", - worker_task, - queue_name): - try: - worker_task.add(queue_name) - except (taskqueue.TombstonedTaskError, - taskqueue.TaskAlreadyExistsError), e: - logging.warning("Task %r with params %r already exists. %s: %s", - task_name, task_params, e.__class__, e) - - -class ControllerCallbackHandler(base_handler.TaskQueueHandler): - """Supervises mapreduce execution. - - Is also responsible for gathering execution status from shards together. - - This task is "continuously" running by adding itself again to taskqueue if - mapreduce is still active. - """ - - def __init__(self, time_function=time.time): - """Constructor. - - Args: - time_function: time function to use to obtain current time. - """ - base_handler.TaskQueueHandler.__init__(self) - self._time = time_function - - def handle(self): - """Handle request.""" - spec = model.MapreduceSpec.from_json_str( - self.request.get("mapreduce_spec")) - - # TODO(user): Make this logging prettier. - logging.debug("post: id=%s headers=%s", - spec.mapreduce_id, self.request.headers) - - state, control = db.get([ - model.MapreduceState.get_key_by_job_id(spec.mapreduce_id), - model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id), - ]) - if not state: - logging.error("State not found for mapreduce_id '%s'; skipping", - spec.mapreduce_id) - return - - shard_states = model.ShardState.find_by_mapreduce_id(spec.mapreduce_id) - if state.active and len(shard_states) != spec.mapper.shard_count: - # Some shards were lost - logging.error("Incorrect number of shard states: %d vs %d; " - "aborting job '%s'", - len(shard_states), spec.mapper.shard_count, - spec.mapreduce_id) - state.active = False - state.result_status = model.MapreduceState.RESULT_FAILED - model.MapreduceControl.abort(spec.mapreduce_id) - - active_shards = [s for s in shard_states if s.active] - failed_shards = [s for s in shard_states - if s.result_status == model.ShardState.RESULT_FAILED] - aborted_shards = [s for s in shard_states - if s.result_status == model.ShardState.RESULT_ABORTED] - if state.active: - state.active = bool(active_shards) - state.active_shards = len(active_shards) - state.failed_shards = len(failed_shards) - state.aborted_shards = len(aborted_shards) - - if (not state.active and control and - control.command == model.MapreduceControl.ABORT): - # User-initiated abort *after* all shards have completed. - logging.info("Abort signal received for job '%s'", spec.mapreduce_id) - state.result_status = model.MapreduceState.RESULT_ABORTED - - if not state.active: - state.active_shards = 0 - if not state.result_status: - # Set final result status derived from shard states. - if [s for s in shard_states - if s.result_status != model.ShardState.RESULT_SUCCESS]: - state.result_status = model.MapreduceState.RESULT_FAILED - else: - state.result_status = model.MapreduceState.RESULT_SUCCESS - logging.info("Final result for job '%s' is '%s'", - spec.mapreduce_id, state.result_status) - - # We don't need a transaction here, since we change only statistics data, - # and we don't care if it gets overwritten/slightly inconsistent. - self.aggregate_state(state, shard_states) - poll_time = state.last_poll_time - state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time()) - - if not state.active: - # This is the last execution. - # Enqueue done_callback if needed. - def put_state(state): - state.put() - done_callback = spec.params.get( - model.MapreduceSpec.PARAM_DONE_CALLBACK) - if done_callback: - done_task = taskqueue.Task( - url=done_callback, - headers={"Mapreduce-Id": spec.mapreduce_id}) - queue_name = spec.params.get( - model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE, - "default") - - if not _run_task_hook(spec.get_hooks(), - "enqueue_done_task", - done_task, - queue_name): - done_task.add(queue_name, transactional=True) - db.run_in_transaction(put_state, state) - return - else: - state.put() - - processing_rate = int(spec.mapper.params.get( - "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC) - self.refill_quotas(poll_time, processing_rate, active_shards) - ControllerCallbackHandler.reschedule( - self.base_path(), spec, self.serial_id() + 1) - - def aggregate_state(self, mapreduce_state, shard_states): - """Update current mapreduce state by aggregating shard states. - - Args: - mapreduce_state: current mapreduce state as MapreduceState. - shard_states: all shard states (active and inactive). list of ShardState. - """ - processed_counts = [] - mapreduce_state.counters_map.clear() - - for shard_state in shard_states: - mapreduce_state.counters_map.add_map(shard_state.counters_map) - processed_counts.append(shard_state.counters_map.get( - context.COUNTER_MAPPER_CALLS)) - - mapreduce_state.set_processed_counts(processed_counts) - - def refill_quotas(self, - last_poll_time, - processing_rate, - active_shard_states): - """Refill quotas for all active shards. - - Args: - last_poll_time: Datetime with the last time the job state was updated. - processing_rate: How many items to process per second overall. - active_shard_states: All active shard states, list of ShardState. - """ - if not active_shard_states: - return - quota_manager = quota.QuotaManager(memcache.Client()) - - current_time = int(self._time()) - last_poll_time = time.mktime(last_poll_time.timetuple()) - total_quota_refill = processing_rate * max(0, current_time - last_poll_time) - quota_refill = int(math.ceil( - 1.0 * total_quota_refill / len(active_shard_states))) - - if not quota_refill: - return - - # TODO(user): use batch memcache API to refill quota in one API call. - for shard_state in active_shard_states: - quota_manager.put(shard_state.shard_id, quota_refill) - - def serial_id(self): - """Get serial unique identifier of this task from request. - - Returns: - serial identifier as int. - """ - return int(self.request.get("serial_id")) - - @staticmethod - def get_task_name(mapreduce_spec, serial_id): - """Compute single controller task name. - - Args: - mapreduce_spec: specification of the mapreduce. - serial_id: id of the invocation as int. - - Returns: - task name which should be used to process specified shard/slice. - """ - # Prefix the task name with something unique to this framework's - # namespace so we don't conflict with user tasks on the queue. - return "appengine-mrcontrol-%s-%s" % ( - mapreduce_spec.mapreduce_id, serial_id) - - @staticmethod - def controller_parameters(mapreduce_spec, serial_id): - """Fill in controller task parameters. - - Returned parameters map is to be used as task payload, and it contains - all the data, required by controller to perform its function. - - Args: - mapreduce_spec: specification of the mapreduce. - serial_id: id of the invocation as int. - - Returns: - string->string map of parameters to be used as task payload. - """ - return {"mapreduce_spec": mapreduce_spec.to_json_str(), - "serial_id": str(serial_id)} - - @classmethod - def reschedule(cls, base_path, mapreduce_spec, serial_id, queue_name=None): - """Schedule new update status callback task. - - Args: - base_path: mapreduce handlers url base path as string. - mapreduce_spec: mapreduce specification as MapreduceSpec. - serial_id: id of the invocation as int. - queue_name: The queue to schedule this task on. Will use the current - queue of execution if not supplied. - """ - task_name = ControllerCallbackHandler.get_task_name( - mapreduce_spec, serial_id) - task_params = ControllerCallbackHandler.controller_parameters( - mapreduce_spec, serial_id) - if not queue_name: - queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") - - controller_callback_task = taskqueue.Task( - url=base_path + "/controller_callback", - name=task_name, params=task_params, - countdown=_CONTROLLER_PERIOD_SEC) - - if not _run_task_hook(mapreduce_spec.get_hooks(), - "enqueue_controller_task", - controller_callback_task, - queue_name): - try: - controller_callback_task.add(queue_name) - except (taskqueue.TombstonedTaskError, - taskqueue.TaskAlreadyExistsError), e: - logging.warning("Task %r with params %r already exists. %s: %s", - task_name, task_params, e.__class__, e) - - -class KickOffJobHandler(base_handler.TaskQueueHandler): - """Taskqueue handler which kicks off a mapreduce processing. - - Request Parameters: - mapreduce_spec: MapreduceSpec of the mapreduce serialized to json. - input_readers: List of InputReaders objects separated by semi-colons. - """ - - def handle(self): - """Handles kick off request.""" - spec = model.MapreduceSpec.from_json_str( - self._get_required_param("mapreduce_spec")) - app_id = self.request.get("app", None) - queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") - mapper_input_reader_class = spec.mapper.input_reader_class() - - # StartJobHandler might have already saved the state, but it's OK - # to override it because we're using the same mapreduce id. - state = model.MapreduceState.create_new(spec.mapreduce_id) - state.mapreduce_spec = spec - state.active = True - # TODO(user): Initialize UI fields correctly. - state.char_url = "" - state.sparkline_url = "" - if app_id: - state.app_id = app_id - - input_readers = mapper_input_reader_class.split_input(spec.mapper) - if not input_readers: - # We don't have any data. Finish map. - logging.warning("Found no mapper input data to process.") - state.active = False - state.active_shards = 0 - state.put() - return - - # Update state and spec with actual shard count. - spec.mapper.shard_count = len(input_readers) - state.active_shards = len(input_readers) - state.mapreduce_spec = spec - state.put() - - KickOffJobHandler._schedule_shards( - spec, input_readers, queue_name, self.base_path()) - - ControllerCallbackHandler.reschedule( - self.base_path(), spec, queue_name=queue_name, serial_id=0) - - def _get_required_param(self, param_name): - """Get a required request parameter. - - Args: - param_name: name of request parameter to fetch. - - Returns: - parameter value - - Raises: - NotEnoughArgumentsError: if parameter is not specified. - """ - value = self.request.get(param_name) - if not value: - raise NotEnoughArgumentsError(param_name + " not specified") - return value - - @classmethod - def _schedule_shards(cls, spec, input_readers, queue_name, base_path): - """Prepares shard states and schedules their execution. - - Args: - spec: mapreduce specification as MapreduceSpec. - input_readers: list of InputReaders describing shard splits. - queue_name: The queue to run this job on. - base_path: The base url path of mapreduce callbacks. - """ - # Note: it's safe to re-attempt this handler because: - # - shard state has deterministic and unique key. - # - schedule_slice will fall back gracefully if a task already exists. - shard_states = [] - for shard_number, input_reader in enumerate(input_readers): - shard = model.ShardState.create_new(spec.mapreduce_id, shard_number) - shard.shard_description = str(input_reader) - shard_states.append(shard) - - # Retrievs already existing shards. - existing_shard_states = db.get(shard.key() for shard in shard_states) - existing_shard_keys = set(shard.key() for shard in existing_shard_states - if shard is not None) - - # Puts only non-existing shards. - db.put(shard for shard in shard_states - if shard.key() not in existing_shard_keys) - - for shard_number, input_reader in enumerate(input_readers): - shard_id = model.ShardState.shard_id_from_number( - spec.mapreduce_id, shard_number) - MapperWorkerCallbackHandler.schedule_slice( - base_path, spec, shard_id, 0, input_reader, queue_name=queue_name) - - -class StartJobHandler(base_handler.PostJsonHandler): - """Command handler starts a mapreduce job.""" - - def handle(self): - """Handles start request.""" - # Mapper spec as form arguments. - mapreduce_name = self._get_required_param("name") - mapper_input_reader_spec = self._get_required_param("mapper_input_reader") - mapper_handler_spec = self._get_required_param("mapper_handler") - mapper_params = self._get_params( - "mapper_params_validator", "mapper_params.") - params = self._get_params( - "params_validator", "params.") - - # Set some mapper param defaults if not present. - mapper_params["processing_rate"] = int(mapper_params.get( - "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC) - queue_name = mapper_params["queue_name"] = mapper_params.get( - "queue_name", "default") - - # Validate the Mapper spec, handler, and input reader. - mapper_spec = model.MapperSpec( - mapper_handler_spec, - mapper_input_reader_spec, - mapper_params, - int(mapper_params.get("shard_count", model._DEFAULT_SHARD_COUNT))) - - mapreduce_id = type(self)._start_map( - mapreduce_name, - mapper_spec, - params, - base_path=self.base_path(), - queue_name=queue_name, - _app=mapper_params.get("_app")) - self.json_response["mapreduce_id"] = mapreduce_id - - def _get_params(self, validator_parameter, name_prefix): - """Retrieves additional user-supplied params for the job and validates them. - - Args: - validator_parameter: name of the request parameter which supplies - validator for this parameter set. - name_prefix: common prefix for all parameter names in the request. - - Raises: - Any exception raised by the 'params_validator' request parameter if - the params fail to validate. - """ - params_validator = self.request.get(validator_parameter) - - user_params = {} - for key in self.request.arguments(): - if key.startswith(name_prefix): - values = self.request.get_all(key) - adjusted_key = key[len(name_prefix):] - if len(values) == 1: - user_params[adjusted_key] = values[0] - else: - user_params[adjusted_key] = values - - if params_validator: - resolved_validator = util.for_name(params_validator) - resolved_validator(user_params) - - return user_params - - def _get_required_param(self, param_name): - """Get a required request parameter. - - Args: - param_name: name of request parameter to fetch. - - Returns: - parameter value - - Raises: - NotEnoughArgumentsError: if parameter is not specified. - """ - value = self.request.get(param_name) - if not value: - raise NotEnoughArgumentsError(param_name + " not specified") - return value - - @classmethod - def _start_map(cls, name, mapper_spec, - mapreduce_params, - base_path="/mapreduce", - queue_name="default", - eta=None, - countdown=None, - hooks_class_name=None, - _app=None, - transactional=False): - # Check that handler can be instantiated. - mapper_spec.get_handler() - - # Check that reader can be instantiated and is configured correctly - mapper_input_reader_class = mapper_spec.input_reader_class() - mapper_input_reader_class.validate(mapper_spec) - - mapreduce_id = model.MapreduceState.new_mapreduce_id() - mapreduce_spec = model.MapreduceSpec( - name, - mapreduce_id, - mapper_spec.to_json(), - mapreduce_params, - hooks_class_name) - - kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()} - if _app: - kickoff_params["app"] = _app - kickoff_worker_task = taskqueue.Task( - url=base_path + "/kickoffjob_callback", - params=kickoff_params, - eta=eta, countdown=countdown) - - hooks = mapreduce_spec.get_hooks() - - def start_mapreduce(): - if not transactional: - # Save state in datastore so that UI can see it. - # We can't save state in foreign transaction, but conventional UI - # doesn't ask for transactional starts anyway. - state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id) - state.mapreduce_spec = mapreduce_spec - state.active = True - state.active_shards = mapper_spec.shard_count - if _app: - state.app_id = _app - state.put() - - if hooks is not None: - try: - hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) - except NotImplementedError: - # Use the default task addition implementation. - pass - else: - return - kickoff_worker_task.add(queue_name, transactional=True) - - if transactional: - start_mapreduce() - else: - db.run_in_transaction(start_mapreduce) - - return mapreduce_id - - -class CleanUpJobHandler(base_handler.PostJsonHandler): - """Command to kick off tasks to clean up a job's data.""" - - def handle(self): - mapreduce_id = self.request.get("mapreduce_id") - db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id)) - - shards = model.ShardState.find_by_mapreduce_id(mapreduce_id) - db.delete(shards) - - db.delete(model.MapreduceState.get_key_by_job_id(mapreduce_id)) - - self.json_response["status"] = ("Job %s successfully cleaned up." % - mapreduce_id) - - -class AbortJobHandler(base_handler.PostJsonHandler): - """Command to abort a running job.""" - - def handle(self): - model.MapreduceControl.abort(self.request.get("mapreduce_id")) - self.json_response["status"] = "Abort signal sent." diff --git a/mapreduce/hooks.py b/mapreduce/hooks.py deleted file mode 100755 index 7ab1123..0000000 --- a/mapreduce/hooks.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""API allowing control over some mapreduce implementation details.""" - - - -__all__ = ["Hooks"] - - -class Hooks(object): - """Allows subclasses to control some aspects of mapreduce execution. - - control.start_map accepts an optional "hooks" argument that can be passed a - subclass of this class. - """ - - def __init__(self, mapper): - """Initializes a Hooks class. - - Args: - mapper: The mapreduce.model.MapperSpec for the current mapreduce. - """ - self.mapper = mapper - - def enqueue_worker_task(self, task, queue_name): - """Enqueues a worker task that is used to run the mapper. - - Args: - task: A taskqueue.Task that must be queued in order for the mapreduce - mappers to be run. - queue_name: The queue where the task should be run e.g. "default". - - Raises: - NotImplementedError: to indicate that the default worker queueing strategy - should be used. - """ - raise NotImplementedError() - - def enqueue_kickoff_task(self, task, queue_name): - """Enqueues a task that is used to start the mapreduce. - - Args: - task: A taskqueue.Task that must be queued in order for the mapreduce - to start. - queue_name: The queue where the task should be run e.g. "default". - - Raises: - NotImplementedError: to indicate that the default mapreduce start strategy - should be used. - """ - raise NotImplementedError() - - def enqueue_done_task(self, task, queue_name): - """Enqueues a task that is triggered when the mapreduce completes. - - Args: - task: A taskqueue.Task that must be queued in order for the client to be - notified when the mapreduce is complete. - queue_name: The queue where the task should be run e.g. "default". - - Raises: - NotImplementedError: to indicate that the default mapreduce notification - strategy should be used. - """ - raise NotImplementedError() - - def enqueue_controller_task(self, task, queue_name): - """Enqueues a task that is used to monitor the mapreduce process. - - Args: - task: A taskqueue.Task that must be queued in order for updates to the - mapreduce process to be properly tracked. - queue_name: The queue where the task should be run e.g. "default". - - Raises: - NotImplementedError: to indicate that the default mapreduce tracking - strategy should be used. - """ - raise NotImplementedError() diff --git a/mapreduce/input_readers.py b/mapreduce/input_readers.py deleted file mode 100755 index 4c09206..0000000 --- a/mapreduce/input_readers.py +++ /dev/null @@ -1,1244 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Defines input readers for MapReduce.""" - - - -# pylint: disable-msg=C6409 - -import logging -import math -import StringIO -import time -import zipfile - -from google.appengine.api import datastore -from google.appengine.api import namespace_manager -# TODO(user): Remove this hack once 1.4.0 is live in production. -try: - from google.appengine.datastore import datastore_rpc -except ImportError: - datastore_rpc = None -from mapreduce.lib import blobstore -from google.appengine.ext import db -from mapreduce.lib import key_range -from google.appengine.ext.db import metadata -from mapreduce import util -from mapreduce.model import JsonMixin - - -class Error(Exception): - """Base-class for exceptions in this module.""" - - -class BadReaderParamsError(Error): - """The input parameters to a reader were invalid.""" - - -class InputReader(JsonMixin): - """Abstract base class for input readers. - - InputReaders have the following properties: - * They are created by using the split_input method to generate a set of - InputReaders from a MapperSpec. - * They generate inputs to the mapper via the iterator interface. - * After creation, they can be serialized and resumed using the JsonMixin - interface. - * They are cast to string for a user-readable description; it may be - valuable to implement __str__. - """ - - # Mapreduce parameters. - _APP_PARAM = "_app" - NAMESPACES_PARAM = "namespaces" - MAPPER_PARAMS = "mapper_params" - - def __iter__(self): - return self - - def next(self): - """Returns the next input from this input reader as a key, value pair. - - Returns: - The next input from this input reader. - """ - raise NotImplementedError("next() not implemented in %s" % cls) - - @classmethod - def from_json(cls, input_shard_state): - """Creates an instance of the InputReader for the given input shard state. - - Args: - input_shard_state: The InputReader state as a dict-like object. - - Returns: - An instance of the InputReader configured using the values of json. - """ - raise NotImplementedError("from_json() not implemented in %s" % cls) - - def to_json(self): - """Returns an input shard state for the remaining inputs. - - Returns: - A json-izable version of the remaining InputReader. - """ - raise NotImplementedError("to_json() not implemented in %s" % cls) - - @classmethod - def split_input(cls, mapper_spec): - """Returns a list of input readers for the input spec. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Returns: - A list of InputReaders. - """ - raise NotImplementedError("split_input() not implemented in %s" % cls) - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - raise NotImplementedError("validate() not implemented in %s" % cls) - - -# TODO(user): Use cursor API as soon as we have it available. -class DatastoreInputReader(InputReader): - """Represents a range in query results. - - DatastoreInputReader yields model instances from the entities in a given key - range. Iterating over DatastoreInputReader changes its range past consumed - entries. - - The class shouldn't be instantiated directly. Use the split_input class method - instead. - """ - - # Number of entities to fetch at once while doing scanning. - _BATCH_SIZE = 50 - - # Maximum number of shards we'll create. - _MAX_SHARD_COUNT = 256 - - # Mapreduce parameters. - ENTITY_KIND_PARAM = "entity_kind" - KEYS_ONLY_PARAM = "keys_only" - BATCH_SIZE_PARAM = "batch_size" - KEY_RANGE_PARAM = "key_range" - - # TODO(user): Add support for arbitrary queries. It's not possible to - # support them without cursors since right now you can't even serialize query - # definition. - def __init__(self, entity_kind, key_ranges, batch_size = _BATCH_SIZE): - """Create new DatastoreInputReader object. - - This is internal constructor. Use split_query instead. - - Args: - entity_kind: entity kind as string. - key_ranges: a sequence of key_range.KeyRange instances to process. - batch_size: size of read batch as int. - """ - self._entity_kind = entity_kind - # Reverse the KeyRanges so they can be processed in order as a stack of - # work items. - self._key_ranges = list(reversed(key_ranges)) - self._batch_size = int(batch_size) - - def __iter__(self): - """Create a generator for model instances for entities. - - Iterating through entities moves query range past the consumed entities. - - Yields: - next model instance. - """ - while True: - if self._current_key_range is None: - break - - while True: - query = self._current_key_range.make_ascending_query( - util.for_name(self._entity_kind)) - results = query.fetch(limit=self._batch_size) - - if not results: - self._advance_key_range() - break - - for model_instance in results: - key = model_instance.key() - - self._current_key_range.advance(key) - yield model_instance - - @property - def _current_key_range(self): - if self._key_ranges: - return self._key_ranges[-1] - else: - return None - - def _advance_key_range(self): - if self._key_ranges: - self._key_ranges.pop() - - # TODO(user): use query splitting functionality when it becomes available - # instead. - @classmethod - def _split_input_from_namespace(cls, app, namespace, entity_kind_name, - shard_count): - """Return KeyRange objects. Helper for _split_input_from_params.""" - - raw_entity_kind = util.get_short_name(entity_kind_name) - - if shard_count == 1: - # With one shard we don't need to calculate any splitpoints at all. - return [key_range.KeyRange(namespace=namespace, _app=app)] - - # we use datastore.Query instead of ext.db.Query here, because we can't - # erase ordering on db.Query once we set it. - ds_query = datastore.Query(kind=raw_entity_kind, - namespace=namespace, - _app=app, - keys_only=True) - ds_query.Order("__key__") - first_entity_key_list = ds_query.Get(1) - if not first_entity_key_list: - logging.warning("Could not retrieve an entity of type %s.", - raw_entity_kind) - return [] - first_entity_key = first_entity_key_list[0] - ds_query.Order(("__key__", datastore.Query.DESCENDING)) - try: - last_entity_key, = ds_query.Get(1) - except db.NeedIndexError, e: - # TODO(user): Show this error in the worker log, not the app logs. - logging.warning("Cannot create accurate approximation of keyspace, " - "guessing instead. Please address this problem: %s", e) - # TODO(user): Use a key-end hint from the user input parameters - # in this case, in the event the user has a good way of figuring out - # the range of the keyspace. - last_entity_key = key_range.KeyRange.guess_end_key(raw_entity_kind, - first_entity_key) - full_keyrange = key_range.KeyRange( - first_entity_key, last_entity_key, None, True, True, - namespace=namespace, - _app=app) - key_ranges = [full_keyrange] - number_of_half_splits = int(math.floor(math.log(shard_count, 2))) - for _ in range(0, number_of_half_splits): - new_ranges = [] - for r in key_ranges: - new_ranges += r.split_range(1) - key_ranges = new_ranges - return key_ranges - - @classmethod - def _split_input_from_params(cls, app, namespaces, entity_kind_name, - params, shard_count): - """Return input reader objects. Helper for split_input.""" - key_ranges = [] # KeyRanges for all namespaces - for namespace in namespaces: - key_ranges.extend( - cls._split_input_from_namespace(app, - namespace, - entity_kind_name, - shard_count)) - - # Divide the KeyRanges into shard_count shards. The KeyRanges for different - # namespaces might be very different in size so the assignment of KeyRanges - # to shards is done round-robin. - shared_ranges = [[] for _ in range(shard_count)] - for i, k_range in enumerate(key_ranges): - shared_ranges[i % shard_count].append(k_range) - batch_size = int(params.get(cls.BATCH_SIZE_PARAM, cls._BATCH_SIZE)) - return [cls(entity_kind_name, ranges, batch_size) - for ranges in shared_ranges if ranges] - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - cls._common_validate(mapper_spec) - params = mapper_spec.params - keys_only = util.parse_bool(params.get(cls.KEYS_ONLY_PARAM, False)) - if keys_only: - raise BadReaderParamsError("The keys_only parameter is obsolete. " - "Use DatastoreKeyInputReader instead.") - - entity_kind_name = params[cls.ENTITY_KIND_PARAM] - # Fail fast if Model cannot be located. - try: - util.for_name(entity_kind_name) - except ImportError, e: - raise BadReaderParamsError("Bad entity kind: %s" % e) - - @classmethod - def _common_validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Common portion of validate method shared between DatastoreInputReader, - DatastoreKeyInputReader, and DatastoreEntityInputReader. - - Args: - cls: The class argument from the calling class method. - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - if mapper_spec.input_reader_class() != cls: - raise BadReaderParamsError("Input reader class mismatch") - params = mapper_spec.params - if cls.ENTITY_KIND_PARAM not in params: - raise BadReaderParamsError("Missing mapper parameter 'entity_kind'") - if cls.BATCH_SIZE_PARAM in params: - try: - batch_size = int(params[cls.BATCH_SIZE_PARAM]) - if batch_size < 1: - raise BadReaderParamsError("Bad batch size: %s" % batch_size) - except ValueError, e: - raise BadReaderParamsError("Bad batch size: %s" % e) - if cls.NAMESPACES_PARAM in params: - if isinstance(params[cls.NAMESPACES_PARAM], (str, unicode)): - pass - elif isinstance(params[cls.NAMESPACES_PARAM], list): - for namespace in params[cls.NAMESPACES_PARAM]: - if not isinstance(namespace, (str, unicode)): - raise BadReaderParamsError( - "Bad namespace list: expected a list of strings") - else: - raise BadReaderParamsError( - "Bad namespace list: expected a list of strings") - - @classmethod - def split_input(cls, mapper_spec): - """Splits query into shards without fetching query results. - - Tries as best as it can to split the whole query result set into equal - shards. Due to difficulty of making the perfect split, resulting shards' - sizes might differ significantly from each other. The actual number of - shards might also be less then requested (even 1), though it is never - greater. - - Current implementation does key-lexicographic order splitting. It requires - query not to specify any __key__-based ordering. If an index for - query.order('-__key__') query is not present, an inaccurate guess at - sharding will be made by splitting the full key range. - - Args: - mapper_spec: MapperSpec with params containing 'entity_kind'. - May have 'namespaces' in the params as either a list of namespace - strings or a comma-seperated list of namespaces. If specified then the - input reader will only yield entities in the given namespaces. If - 'namespaces' is not given then the current namespace will be used. May - also have 'batch_size' in the params to specify the number of entities - to process in each batch. - - Returns: - A list of InputReader objects of length <= number_of_shards. These - may be DatastoreInputReader or DatastoreKeyInputReader objects. - """ - params = mapper_spec.params - entity_kind_name = params[cls.ENTITY_KIND_PARAM] - shard_count = mapper_spec.shard_count - namespaces = params.get(cls.NAMESPACES_PARAM, - [namespace_manager.get_namespace()]) - if isinstance(namespaces, (str, unicode)): - namespaces = namespaces.split(",") - app = params.get(cls._APP_PARAM) - - return cls._split_input_from_params( - app, namespaces, entity_kind_name, params, shard_count) - - def to_json(self): - """Serializes all the data in this query range into json form. - - Returns: - all the data in json-compatible map. - """ - json_dict = {self.KEY_RANGE_PARAM: [k.to_json() for k in self._key_ranges], - self.ENTITY_KIND_PARAM: self._entity_kind, - self.BATCH_SIZE_PARAM: self._batch_size} - return json_dict - - def __str__(self): - """Returns the string representation of this DatastoreInputReader.""" - return repr(self._key_ranges) - - @classmethod - def from_json(cls, json): - """Create new DatastoreInputReader from the json, encoded by to_json. - - Args: - json: json map representation of DatastoreInputReader. - - Returns: - an instance of DatastoreInputReader with all data deserialized from json. - """ - query_range = cls( - json[cls.ENTITY_KIND_PARAM], - [key_range.KeyRange.from_json(k) for k in json[cls.KEY_RANGE_PARAM]], - json[cls.BATCH_SIZE_PARAM]) - return query_range - - -class DatastoreKeyInputReader(DatastoreInputReader): - """An input reader which takes a Kind and yields Keys for that kind.""" - - def __iter__(self): - """Create a generator for keys in the range. - - Iterating through entries moves query range past the consumed entries. - - Yields: - next entry. - """ - raw_entity_kind = util.get_short_name(self._entity_kind) - while True: - if self._current_key_range is None: - break - - while True: - query = self._current_key_range.make_ascending_datastore_query( - raw_entity_kind, keys_only=True) - results = query.Get(limit=self._batch_size) - - if not results: - self._advance_key_range() - break - - for key in results: - self._current_key_range.advance(key) - yield key - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - cls._common_validate(mapper_spec) - - -class DatastoreEntityInputReader(DatastoreInputReader): - """An input reader which yields low level datastore entities for a kind.""" - - def __iter__(self): - """Create a generator for low level entities in the range. - - Iterating through entries moves query range past the consumed entries. - - Yields: - next entry. - """ - raw_entity_kind = util.get_short_name(self._entity_kind) - while True: - if self._current_key_range is None: - break - - while True: - query = self._current_key_range.make_ascending_datastore_query( - raw_entity_kind) - results = query.Get(limit=self._batch_size) - - if not results: - self._advance_key_range() - break - - for entity in results: - self._current_key_range.advance(entity.key()) - yield entity - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - cls._common_validate(mapper_spec) - - -class BlobstoreLineInputReader(InputReader): - """Input reader for a newline delimited blob in Blobstore.""" - - # TODO(user): Should we set this based on MAX_BLOB_FETCH_SIZE? - _BLOB_BUFFER_SIZE = 64000 - - # Maximum number of shards to allow. - _MAX_SHARD_COUNT = 256 - - # Maximum number of blobs to allow. - _MAX_BLOB_KEYS_COUNT = 246 - - # Mapreduce parameters. - BLOB_KEYS_PARAM = "blob_keys" - - # Serialization parmaeters. - INITIAL_POSITION_PARAM = "initial_position" - END_POSITION_PARAM = "end_position" - BLOB_KEY_PARAM = "blob_key" - - def __init__(self, blob_key, start_position, end_position): - """Initializes this instance with the given blob key and character range. - - This BlobstoreInputReader will read from the first record starting after - strictly after start_position until the first record ending at or after - end_position (exclusive). As an exception, if start_position is 0, then - this InputReader starts reading at the first record. - - Args: - blob_key: the BlobKey that this input reader is processing. - start_position: the position to start reading at. - end_position: a position in the last record to read. - """ - self._blob_key = blob_key - self._blob_reader = blobstore.BlobReader(blob_key, - self._BLOB_BUFFER_SIZE, - start_position) - self._end_position = end_position - self._has_iterated = False - self._read_before_start = bool(start_position) - - def next(self): - """Returns the next input from as an (offset, line) tuple.""" - self._has_iterated = True - - if self._read_before_start: - self._blob_reader.readline() - self._read_before_start = False - start_position = self._blob_reader.tell() - - if start_position >= self._end_position: - raise StopIteration() - - line = self._blob_reader.readline() - - if not line: - raise StopIteration() - - return start_position, line.rstrip("\n") - - def to_json(self): - """Returns an json-compatible input shard spec for remaining inputs.""" - new_pos = self._blob_reader.tell() - if self._has_iterated: - new_pos -= 1 - return {self.BLOB_KEY_PARAM: self._blob_key, - self.INITIAL_POSITION_PARAM: new_pos, - self.END_POSITION_PARAM: self._end_position} - - def __str__(self): - """Returns the string representation of this BlobstoreLineInputReader.""" - return "blobstore.BlobKey(%r):[%d, %d]" % ( - self._blob_key, self._blob_reader.tell(), self._end_position) - - @classmethod - def from_json(cls, json): - """Instantiates an instance of this InputReader for the given shard spec.""" - return cls(json[cls.BLOB_KEY_PARAM], - json[cls.INITIAL_POSITION_PARAM], - json[cls.END_POSITION_PARAM]) - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - if mapper_spec.input_reader_class() != cls: - raise BadReaderParamsError("Mapper input reader class mismatch") - params = mapper_spec.params - if cls.BLOB_KEYS_PARAM not in params: - raise BadReaderParamsError("Must specify 'blob_keys' for mapper input") - blob_keys = params[cls.BLOB_KEYS_PARAM] - if isinstance(blob_keys, basestring): - # This is a mechanism to allow multiple blob keys (which do not contain - # commas) in a single string. It may go away. - blob_keys = blob_keys.split(",") - if len(blob_keys) > cls._MAX_BLOB_KEYS_COUNT: - raise BadReaderParamsError("Too many 'blob_keys' for mapper input") - if not blob_keys: - raise BadReaderParamsError("No 'blob_keys' specified for mapper input") - for blob_key in blob_keys: - blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key)) - if not blob_info: - raise BadReaderParamsError("Could not find blobinfo for key %s" % - blob_key) - - @classmethod - def split_input(cls, mapper_spec): - """Returns a list of shard_count input_spec_shards for input_spec. - - Args: - mapper_spec: The mapper specification to split from. Must contain - 'blob_keys' parameter with one or more blob keys. - - Returns: - A list of BlobstoreInputReaders corresponding to the specified shards. - """ - params = mapper_spec.params - blob_keys = params[cls.BLOB_KEYS_PARAM] - if isinstance(blob_keys, basestring): - # This is a mechanism to allow multiple blob keys (which do not contain - # commas) in a single string. It may go away. - blob_keys = blob_keys.split(",") - - blob_sizes = {} - for blob_key in blob_keys: - blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key)) - blob_sizes[blob_key] = blob_info.size - - shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count) - shards_per_blob = shard_count // len(blob_keys) - if shards_per_blob == 0: - shards_per_blob = 1 - - chunks = [] - for blob_key, blob_size in blob_sizes.items(): - blob_chunk_size = blob_size // shards_per_blob - for i in xrange(shards_per_blob - 1): - chunks.append(BlobstoreLineInputReader.from_json( - {cls.BLOB_KEY_PARAM: blob_key, - cls.INITIAL_POSITION_PARAM: blob_chunk_size * i, - cls.END_POSITION_PARAM: blob_chunk_size * (i + 1)})) - chunks.append(BlobstoreLineInputReader.from_json( - {cls.BLOB_KEY_PARAM: blob_key, - cls.INITIAL_POSITION_PARAM: blob_chunk_size * (shards_per_blob - 1), - cls.END_POSITION_PARAM: blob_size})) - return chunks - - -class BlobstoreZipInputReader(InputReader): - """Input reader for files from a zip archive stored in the Blobstore. - - Each instance of the reader will read the TOC, from the end of the zip file, - and then only the contained files which it is responsible for. - """ - - # Maximum number of shards to allow. - _MAX_SHARD_COUNT = 256 - - # Mapreduce parameters. - BLOB_KEY_PARAM = "blob_key" - START_INDEX_PARAM = "start_index" - END_INDEX_PARAM = "end_index" - - def __init__(self, blob_key, start_index, end_index, - _reader=blobstore.BlobReader): - """Initializes this instance with the given blob key and file range. - - This BlobstoreZipInputReader will read from the file with index start_index - up to but not including the file with index end_index. - - Args: - blob_key: the BlobKey that this input reader is processing. - start_index: the index of the first file to read. - end_index: the index of the first file that will not be read. - _reader: a callable that returns a file-like object for reading blobs. - Used for dependency injection. - """ - self._blob_key = blob_key - self._start_index = start_index - self._end_index = end_index - self._reader = _reader - self._zip = None - self._entries = None - - def next(self): - """Returns the next input from this input reader as (ZipInfo, opener) tuple. - - Returns: - The next input from this input reader, in the form of a 2-tuple. - The first element of the tuple is a zipfile.ZipInfo object. - The second element of the tuple is a zero-argument function that, when - called, returns the complete body of the file. - """ - if not self._zip: - self._zip = zipfile.ZipFile(self._reader(self._blob_key)) - # Get a list of entries, reversed so we can pop entries off in order - self._entries = self._zip.infolist()[self._start_index:self._end_index] - self._entries.reverse() - if not self._entries: - raise StopIteration() - entry = self._entries.pop() - self._start_index += 1 - return (entry, lambda: self._zip.read(entry.filename)) - - @classmethod - def from_json(cls, json): - """Creates an instance of the InputReader for the given input shard state. - - Args: - json: The InputReader state as a dict-like object. - - Returns: - An instance of the InputReader configured using the values of json. - """ - return cls(json[cls.BLOB_KEY_PARAM], - json[cls.START_INDEX_PARAM], - json[cls.END_INDEX_PARAM]) - - def to_json(self): - """Returns an input shard state for the remaining inputs. - - Returns: - A json-izable version of the remaining InputReader. - """ - return {self.BLOB_KEY_PARAM: self._blob_key, - self.START_INDEX_PARAM: self._start_index, - self.END_INDEX_PARAM: self._end_index} - - def __str__(self): - """Returns the string representation of this BlobstoreZipInputReader.""" - return "blobstore.BlobKey(%r):[%d, %d]" % ( - self._blob_key, self._start_index, self._end_index) - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - if mapper_spec.input_reader_class() != cls: - raise BadReaderParamsError("Mapper input reader class mismatch") - params = mapper_spec.params - if cls.BLOB_KEY_PARAM not in params: - raise BadReaderParamsError("Must specify 'blob_key' for mapper input") - blob_key = params[cls.BLOB_KEY_PARAM] - blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key)) - if not blob_info: - raise BadReaderParamsError("Could not find blobinfo for key %s" % - blob_key) - - - @classmethod - def split_input(cls, mapper_spec, _reader=blobstore.BlobReader): - """Returns a list of input shard states for the input spec. - - Args: - mapper_spec: The MapperSpec for this InputReader. Must contain - 'blob_key' parameter with one blob key. - _reader: a callable that returns a file-like object for reading blobs. - Used for dependency injection. - - Returns: - A list of InputReaders spanning files within the zip. - """ - params = mapper_spec.params - blob_key = params[cls.BLOB_KEY_PARAM] - zip_input = zipfile.ZipFile(_reader(blob_key)) - files = zip_input.infolist() - total_size = sum(x.file_size for x in files) - num_shards = min(mapper_spec.shard_count, cls._MAX_SHARD_COUNT) - size_per_shard = total_size // num_shards - - # Break the list of files into sublists, each of approximately - # size_per_shard bytes. - shard_start_indexes = [0] - current_shard_size = 0 - for i, fileinfo in enumerate(files): - current_shard_size += fileinfo.file_size - if current_shard_size >= size_per_shard: - shard_start_indexes.append(i + 1) - current_shard_size = 0 - - if shard_start_indexes[-1] != len(files): - shard_start_indexes.append(len(files)) - - return [cls(blob_key, start_index, end_index, _reader) - for start_index, end_index - in zip(shard_start_indexes, shard_start_indexes[1:])] - - -class BlobstoreZipLineInputReader(InputReader): - """Input reader for newline delimited files in zip archives from Blobstore. - - This has the same external interface as the BlobstoreLineInputReader, in that - it takes a list of blobs as its input and yields lines to the reader. - However the blobs themselves are expected to be zip archives of line delimited - files instead of the files themselves. - - This is useful as many line delimited files gain greatly from compression. - """ - - # Maximum number of shards to allow. - _MAX_SHARD_COUNT = 256 - - # Maximum number of blobs to allow. - _MAX_BLOB_KEYS_COUNT = 246 - - # Mapreduce parameters. - BLOB_KEYS_PARAM = "blob_keys" - - # Serialization parameters. - BLOB_KEY_PARAM = "blob_key" - START_FILE_INDEX_PARAM = "start_file_index" - END_FILE_INDEX_PARAM = "end_file_index" - OFFSET_PARAM = "offset" - - def __init__(self, blob_key, start_file_index, end_file_index, offset, - _reader=blobstore.BlobReader): - """Initializes this instance with the given blob key and file range. - - This BlobstoreZipLineInputReader will read from the file with index - start_file_index up to but not including the file with index end_file_index. - It will return lines starting at offset within file[start_file_index] - - Args: - blob_key: the BlobKey that this input reader is processing. - start_file_index: the index of the first file to read within the zip. - end_file_index: the index of the first file that will not be read. - offset: the byte offset within blob_key.zip[start_file_index] to start - reading. The reader will continue to the end of the file. - _reader: a callable that returns a file-like object for reading blobs. - Used for dependency injection. - """ - self._blob_key = blob_key - self._start_file_index = start_file_index - self._end_file_index = end_file_index - self._initial_offset = offset - self._reader = _reader - self._zip = None - self._entries = None - self._filestream = None - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec and all mapper parameters. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - if mapper_spec.input_reader_class() != cls: - raise BadReaderParamsError("Mapper input reader class mismatch") - params = mapper_spec.params - if cls.BLOB_KEYS_PARAM not in params: - raise BadReaderParamsError("Must specify 'blob_key' for mapper input") - - blob_keys = params[cls.BLOB_KEYS_PARAM] - if isinstance(blob_keys, basestring): - # This is a mechanism to allow multiple blob keys (which do not contain - # commas) in a single string. It may go away. - blob_keys = blob_keys.split(",") - if len(blob_keys) > cls._MAX_BLOB_KEYS_COUNT: - raise BadReaderParamsError("Too many 'blob_keys' for mapper input") - if not blob_keys: - raise BadReaderParamsError("No 'blob_keys' specified for mapper input") - for blob_key in blob_keys: - blob_info = blobstore.BlobInfo.get(blobstore.BlobKey(blob_key)) - if not blob_info: - raise BadReaderParamsError("Could not find blobinfo for key %s" % - blob_key) - - @classmethod - def split_input(cls, mapper_spec, _reader=blobstore.BlobReader): - """Returns a list of input readers for the input spec. - - Args: - mapper_spec: The MapperSpec for this InputReader. Must contain - 'blob_keys' parameter with one or more blob keys. - _reader: a callable that returns a file-like object for reading blobs. - Used for dependency injection. - - Returns: - A list of InputReaders spanning the subfiles within the blobs. - There will be at least one reader per blob, but it will otherwise - attempt to keep the expanded size even. - """ - params = mapper_spec.params - blob_keys = params[cls.BLOB_KEYS_PARAM] - if isinstance(blob_keys, basestring): - # This is a mechanism to allow multiple blob keys (which do not contain - # commas) in a single string. It may go away. - blob_keys = blob_keys.split(",") - - blob_files = {} - total_size = 0 - for blob_key in blob_keys: - zip_input = zipfile.ZipFile(_reader(blob_key)) - blob_files[blob_key] = zip_input.infolist() - total_size += sum(x.file_size for x in blob_files[blob_key]) - - shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count) - - # We can break on both blob key and file-within-zip boundaries. - # A shard will span at minimum a single blob key, but may only - # handle a few files within a blob. - - size_per_shard = total_size // shard_count - - readers = [] - for blob_key in blob_keys: - files = blob_files[blob_key] - current_shard_size = 0 - start_file_index = 0 - next_file_index = 0 - for fileinfo in files: - next_file_index += 1 - current_shard_size += fileinfo.file_size - if current_shard_size >= size_per_shard: - readers.append(cls(blob_key, start_file_index, next_file_index, 0, - _reader)) - current_shard_size = 0 - start_file_index = next_file_index - if current_shard_size != 0: - readers.append(cls(blob_key, start_file_index, next_file_index, 0, - _reader)) - - return readers - - def next(self): - """Returns the next line from this input reader as (lineinfo, line) tuple. - - Returns: - The next input from this input reader, in the form of a 2-tuple. - The first element of the tuple describes the source, it is itself - a tuple (blobkey, filenumber, byteoffset). - The second element of the tuple is the line found at that offset. - """ - if not self._filestream: - if not self._zip: - self._zip = zipfile.ZipFile(self._reader(self._blob_key)) - # Get a list of entries, reversed so we can pop entries off in order - self._entries = self._zip.infolist()[self._start_file_index: - self._end_file_index] - self._entries.reverse() - if not self._entries: - raise StopIteration() - entry = self._entries.pop() - value = self._zip.read(entry.filename) - self._filestream = StringIO.StringIO(value) - if self._initial_offset: - self._filestream.seek(self._initial_offset) - self._filestream.readline() - - start_position = self._filestream.tell() - line = self._filestream.readline() - - if not line: - # Done with this file in the zip. Move on to the next file. - self._filestream.close() - self._filestream = None - self._start_file_index += 1 - self._initial_offset = 0 - return self.next() - - return ((self._blob_key, self._start_file_index, start_position), - line.rstrip("\n")) - - def _next_offset(self): - """Return the offset of the next line to read.""" - if self._filestream: - offset = self._filestream.tell() - if offset: - offset -= 1 - else: - offset = self._initial_offset - - return offset - - def to_json(self): - """Returns an input shard state for the remaining inputs. - - Returns: - A json-izable version of the remaining InputReader. - """ - - return {self.BLOB_KEY_PARAM: self._blob_key, - self.START_FILE_INDEX_PARAM: self._start_file_index, - self.END_FILE_INDEX_PARAM: self._end_file_index, - self.OFFSET_PARAM: self._next_offset()} - - @classmethod - def from_json(cls, json, _reader=blobstore.BlobReader): - """Creates an instance of the InputReader for the given input shard state. - - Args: - json: The InputReader state as a dict-like object. - _reader: For dependency injection. - - Returns: - An instance of the InputReader configured using the values of json. - """ - return cls(json[cls.BLOB_KEY_PARAM], - json[cls.START_FILE_INDEX_PARAM], - json[cls.END_FILE_INDEX_PARAM], - json[cls.OFFSET_PARAM], - _reader) - - def __str__(self): - """Returns the string representation of this reader. - - Returns: - string blobkey:[start file num, end file num]:current offset. - """ - return "blobstore.BlobKey(%r):[%d, %d]:%d" % ( - self._blob_key, self._start_file_index, self._end_file_index, - self._next_offset()) - - -class ConsistentKeyReader(DatastoreKeyInputReader): - """A key reader which reads consistent data from datastore. - - Datastore might have entities which were written, but not visible through - queries for some time. Typically these entities can be only read inside - transaction until they are 'applied'. - - This reader reads all keys even if they are not visible. It might take - significant time to start yielding some data because it has to apply all - modifications created before its start. - """ - START_TIME_US_PARAM = 'start_time_us' - UNAPPLIED_LOG_FILTER = '__unapplied_log_timestamp_us__ <' - DUMMY_KIND = 'DUMMY_KIND' - DUMMY_ID = 106275677020293L - - def __init__(self, - entity_kind, - key_range_param, - batch_size=DatastoreKeyInputReader._BATCH_SIZE, - start_time_us=None): - """Constructor. - - Args: - entity_kind: Kind of entity to read as string. - key_range_param: Key range to scan through as key_range.KeyRange. - batch_size: Size of single batch read (number of entities). - start_time_us: Start time of the reader (as given by time.time() - function). It will apply all unapplied jobs created before it was - started. - """ - DatastoreInputReader.__init__( - self, entity_kind, key_range_param, batch_size) - self.start_time_us = start_time_us - - def __iter__(self): - """Iterates over the keys in the given KeyRanges. - - Yields: - A db.Key instance for each key in the given key range, starting with - keys for unapplied jobs. - """ - while True: # Iterates over each key range. - if self._current_key_range is None: - break - - # TODO(user): Remove this hack once 1.4.0 is live in production. - if datastore_rpc: - self._apply_jobs() - - while True: # Iterates over each key in the current key range. - # Fetches the next batch of the result keys. - query = self._current_key_range.make_ascending_datastore_query( - kind=self._entity_kind, keys_only=True) - keys = query.Get(limit=self._batch_size) - - # No results, this shard is complete. - if not keys: - self._advance_key_range() - break - - # All good, now we can feed the mapper. - for key in keys: - self._current_key_range.advance(key) - yield key - - def _apply_jobs(self): - """Apply all jobs in current key range.""" - while True: - # Creates an unapplied query and fetches unapplied jobs in the result - # range. - unapplied_query = self._current_key_range.make_ascending_datastore_query( - kind=None, keys_only=True) - unapplied_query[ - ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us - unapplied_jobs = unapplied_query.Get(limit=self._batch_size) - - if not unapplied_jobs: - return - - # There were some unapplied jobs. Roll them forward. - keys_to_apply = [] - for key in unapplied_jobs: - # To apply the entity group we need to read something from it. - # We use dummy kind and id because we don't actually need any data. - path = key.to_path() + [ConsistentKeyReader.DUMMY_KIND, - ConsistentKeyReader.DUMMY_ID] - keys_to_apply.append( - db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path)) - db.get(keys_to_apply, config=datastore_rpc.Configuration( - deadline=10, - read_policy=datastore_rpc.Configuration.APPLY_ALL_JOBS_CONSISTENCY)) - - - @classmethod - def _split_input_from_namespace(cls, - app, - namespace, - entity_kind_name, - shard_count): - key_ranges = super(ConsistentKeyReader, cls)._split_input_from_namespace( - app, namespace, entity_kind_name, shard_count) - - # The KeyRanges calculated by the base class may not include keys for - # entities that have unapplied jobs. So use an open key range for the first - # and last KeyRanges to ensure that they will be processed. - if key_ranges: - key_ranges[0].key_start = None - key_ranges[0].include_start = False - key_ranges[-1].key_end = None - key_ranges[-1].include_end = False - return key_ranges - - @classmethod - def _split_input_from_params(cls, app, namespaces, entity_kind_name, - params, shard_count): - readers = super(ConsistentKeyReader, cls)._split_input_from_params(app, - namespaces, - entity_kind_name, - params, - shard_count) - - # We always produce at least one key range because: - # a) there might be unapplied entities - # b) it simplifies mapper code - if not readers: - key_ranges = [key_range.KeyRange(namespace=namespace, _app=app) - for namespace in namespaces] - readers = [cls(entity_kind_name, key_ranges)] - - return readers - - @classmethod - def split_input(cls, mapper_spec): - """Splits input into key ranges.""" - readers = super(ConsistentKeyReader, cls).split_input(mapper_spec) - - start_time_us = mapper_spec.params.get( - cls.START_TIME_US_PARAM, long(time.time() * 1e6)) - for reader in readers: - reader.start_time_us = start_time_us - return readers - - def to_json(self): - """Serializes all the data in this reader into json form. - - Returns: - all the data in json-compatible map. - """ - json_dict = {self.KEY_RANGE_PARAM: [k.to_json() for k in self._key_ranges], - self.ENTITY_KIND_PARAM: self._entity_kind, - self.BATCH_SIZE_PARAM: self._batch_size, - self.START_TIME_US_PARAM: self.start_time_us} - return json_dict - - @classmethod - def from_json(cls, json): - """Create new ConsistentKeyReader from the json, encoded by to_json. - - Args: - json: json map representation of ConsistentKeyReader. - - Returns: - an instance of ConsistentKeyReader with all data deserialized from json. - """ - query_range = cls( - json[cls.ENTITY_KIND_PARAM], - [key_range.KeyRange.from_json(k) for k in json[cls.KEY_RANGE_PARAM]], - json[cls.BATCH_SIZE_PARAM], - json[cls.START_TIME_US_PARAM]) - return query_range - - -# TODO(user): This reader always produces only one shard, because -# namespace entities use the mix of ids/names, and KeyRange-based splitting -# doesn't work satisfactory in this case. -# It's possible to implement specific splitting functionality for the reader -# instead of reusing generic one. Meanwhile 1 shard is enough for our -# applications. -class NamespaceInputReader(DatastoreKeyInputReader): - """An input reader to iterate over namespaces. - - This reader yields namespace names as string. - It will always produce only one shard. - """ - - @classmethod - def validate(cls, mapper_spec): - """Validates mapper spec. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Raises: - BadReaderParamsError: required parameters are missing or invalid. - """ - mapper_spec.params[cls.ENTITY_KIND_PARAM] = metadata.Namespace.kind() - mapper_spec.shard_count = 1 - cls._common_validate(mapper_spec) - - @classmethod - def split_input(cls, mapper_spec): - """Returns a list of input readers for the input spec. - - Args: - mapper_spec: The MapperSpec for this InputReader. - - Returns: - A list of InputReaders. - """ - mapper_spec.params[cls.ENTITY_KIND_PARAM] = metadata.Namespace.kind() - mapper_spec.shard_count = 1 - return super(DatastoreKeyInputReader, cls).split_input(mapper_spec) - - def __iter__(self): - for key in DatastoreKeyInputReader.__iter__(self): - yield metadata.Namespace.key_to_namespace(key) diff --git a/mapreduce/lib/__init__.py b/mapreduce/lib/__init__.py deleted file mode 100755 index 6c49c42..0000000 --- a/mapreduce/lib/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/mapreduce/lib/blobstore/__init__.py b/mapreduce/lib/blobstore/__init__.py deleted file mode 100755 index 769e2de..0000000 --- a/mapreduce/lib/blobstore/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2007 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - - - -"""Blobstore API module.""" - -from blobstore import * diff --git a/mapreduce/lib/blobstore/blobstore.py b/mapreduce/lib/blobstore/blobstore.py deleted file mode 100755 index 3b7184c..0000000 --- a/mapreduce/lib/blobstore/blobstore.py +++ /dev/null @@ -1,745 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2007 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - - - -"""A Python blobstore API used by app developers. - -Contains methods used to interface with Blobstore API. Includes db.Model-like -class representing a reference to a very large BLOB. Imports db.Key-like -class representing a blob-key. -""" - - - - - - - -import cgi -import email -import os - -from google.appengine.api import datastore -from google.appengine.api import datastore_errors -from google.appengine.api import datastore_types -from google.appengine.api.blobstore import blobstore -from google.appengine.ext import db - -__all__ = ['BLOB_INFO_KIND', - 'BLOB_KEY_HEADER', - 'BLOB_RANGE_HEADER', - 'BlobFetchSizeTooLargeError', - 'BlobInfo', - 'BlobInfoParseError', - 'BlobKey', - 'BlobNotFoundError', - 'BlobReferenceProperty', - 'BlobReader', - 'DataIndexOutOfRangeError', - 'Error', - 'InternalError', - 'MAX_BLOB_FETCH_SIZE', - 'UPLOAD_INFO_CREATION_HEADER', - 'create_upload_url', - 'delete', - 'fetch_data', - 'get', - 'parse_blob_info'] - -Error = blobstore.Error -InternalError = blobstore.InternalError -BlobFetchSizeTooLargeError = blobstore.BlobFetchSizeTooLargeError -BlobNotFoundError = blobstore.BlobNotFoundError -_CreationFormatError = blobstore._CreationFormatError -DataIndexOutOfRangeError = blobstore.DataIndexOutOfRangeError - -BlobKey = blobstore.BlobKey -create_upload_url = blobstore.create_upload_url -delete = blobstore.delete - - -class BlobInfoParseError(Error): - """CGI parameter does not contain valid BlobInfo record.""" - - -BLOB_INFO_KIND = blobstore.BLOB_INFO_KIND -BLOB_KEY_HEADER = blobstore.BLOB_KEY_HEADER -BLOB_RANGE_HEADER = blobstore.BLOB_RANGE_HEADER -MAX_BLOB_FETCH_SIZE = blobstore.MAX_BLOB_FETCH_SIZE -UPLOAD_INFO_CREATION_HEADER = blobstore.UPLOAD_INFO_CREATION_HEADER - - - -class _GqlQuery(db.GqlQuery): - """GqlQuery class that explicitly sets model-class. - - This does the same as the original db.GqlQuery class except that it does - not try to find the model class based on the compiled GQL query. The - caller instead provides the query with a model class to use for construction. - - This class is required for compatibility with the current db.py query - mechanism but will be removed in the future. DO NOT USE. - """ - - - def __init__(self, query_string, model_class, *args, **kwds): - """Constructor. - - Args: - query_string: Properly formatted GQL query string. - model_class: Model class from which entities are constructed. - *args: Positional arguments used to bind numeric references in the query. - **kwds: Dictionary-based arguments for named references. - """ - - - from google.appengine.ext import gql - app = kwds.pop('_app', None) - self._proto_query = gql.GQL(query_string, _app=app, namespace='') - - super(db.GqlQuery, self).__init__(model_class, namespace='') - self.bind(*args, **kwds) - - - - -class BlobInfo(object): - """Information about blobs in Blobstore. - - This is a db.Model-like class that contains information about blobs stored - by an application. Like db.Model, this class is backed by an Datastore - entity, however, BlobInfo instances are read-only and have a much more - limited interface. - - Each BlobInfo has a key of type BlobKey associated with it. This key is - specific to the Blobstore API and is not compatible with db.get. The key - can be used for quick lookup by passing it to BlobInfo.get. This - key converts easily to a string, which is web safe and can be embedded - in URLs. - - Properties: - content_type: Content type of blob. - creation: Creation date of blob, when it was uploaded. - filename: Filename user selected from their machine. - size: Size of uncompressed blob. - - All properties are read-only. Attempting to assign a value to a property - will raise NotImplementedError. - """ - - _unindexed_properties = frozenset() - - @property - def content_type(self): - return self.__get_value('content_type') - - @property - def creation(self): - return self.__get_value('creation') - - @property - def filename(self): - return self.__get_value('filename') - - @property - def size(self): - return self.__get_value('size') - - def __init__(self, entity_or_blob_key, _values=None): - """Constructor for wrapping blobstore entity. - - The constructor should not be used outside this package and tests. - - Args: - entity: Datastore entity that represents the blob reference. - """ - if isinstance(entity_or_blob_key, datastore.Entity): - self.__entity = entity_or_blob_key - self.__key = BlobKey(entity_or_blob_key.key().name()) - elif isinstance(entity_or_blob_key, BlobKey): - self.__entity = _values - self.__key = entity_or_blob_key - else: - TypeError('Must provide Entity or BlobKey') - - - - @classmethod - def from_entity(cls, entity): - """Convert entity to BlobInfo. - - This method is required for compatibility with the current db.py query - mechanism but will be removed in the future. DO NOT USE. - """ - return BlobInfo(entity) - - - - @classmethod - def properties(cls): - """Set of properties that belong to BlobInfo. - - This method is required for compatibility with the current db.py query - mechanism but will be removed in the future. DO NOT USE. - """ - return set(('content_type', 'creation', 'filename', 'size')) - - def __get_value(self, name): - """Get a BlobInfo value, loading entity if necessary. - - This method allows lazy loading of the underlying datastore entity. It - should never be invoked directly. - - Args: - name: Name of property to get value for. - - Returns: - Value of BlobInfo property from entity. - """ - if self.__entity is None: - self.__entity = datastore.Get( - datastore_types.Key.from_path( - self.kind(), str(self.__key), namespace='')) - try: - return self.__entity[name] - except KeyError: - raise AttributeError(name) - - - def key(self): - """Get key for blob. - - Returns: - BlobKey instance that identifies this blob. - """ - return self.__key - - def delete(self): - """Permanently delete blob from Blobstore.""" - delete(self.key()) - - def open(self, *args, **kwargs): - """Returns a BlobReader for this blob. - - Args: - *args, **kwargs: Passed to BlobReader constructor. - Returns: - A BlobReader instance. - """ - return BlobReader(self, *args, **kwargs) - - @classmethod - def get(cls, blob_keys): - """Retrieve BlobInfo by key or list of keys. - - Args: - blob_keys: A key or a list of keys. Keys may be instances of str, - unicode and BlobKey. - - Returns: - A BlobInfo instance associated with provided key or a list of BlobInfo - instances if a list of keys was provided. Keys that are not found in - Blobstore return None as their values. - """ - blob_keys = cls.__normalize_and_convert_keys(blob_keys) - try: - entities = datastore.Get(blob_keys) - except datastore_errors.EntityNotFoundError: - return None - if isinstance(entities, datastore.Entity): - return BlobInfo(entities) - else: - references = [] - for entity in entities: - if entity is not None: - references.append(BlobInfo(entity)) - else: - references.append(None) - return references - - @classmethod - def all(cls): - """Get query for all Blobs associated with application. - - Returns: - A db.Query object querying over BlobInfo's datastore kind. - """ - return db.Query(model_class=cls, namespace='') - - @classmethod - def __factory_for_kind(cls, kind): - if kind == BLOB_INFO_KIND: - return BlobInfo - raise ValueError('Cannot query for kind %s' % kind) - - @classmethod - def gql(cls, query_string, *args, **kwds): - """Returns a query using GQL query string. - - See appengine/ext/gql for more information about GQL. - - Args: - query_string: Properly formatted GQL query string with the - 'SELECT * FROM ' part omitted - *args: rest of the positional arguments used to bind numeric references - in the query. - **kwds: dictionary-based arguments (for named parameters). - - Returns: - A gql.GqlQuery object querying over BlobInfo's datastore kind. - """ - return _GqlQuery('SELECT * FROM %s %s' - % (cls.kind(), query_string), - cls, - *args, - **kwds) - - - @classmethod - def kind(self): - """Get the entity kind for the BlobInfo. - - This method is required for compatibility with the current db.py query - mechanism but will be removed in the future. DO NOT USE. - """ - return BLOB_INFO_KIND - - @classmethod - def __normalize_and_convert_keys(cls, keys): - """Normalize and convert all keys to BlobKey type. - - This method is based on datastore.NormalizeAndTypeCheck(). - - Args: - keys: A single key or a list/tuple of keys. Keys may be a string - or BlobKey - - Returns: - Single key or list with all strings replaced by BlobKey instances. - """ - if isinstance(keys, (list, tuple)): - multiple = True - - keys = list(keys) - else: - multiple = False - keys = [keys] - - for index, key in enumerate(keys): - if not isinstance(key, (basestring, BlobKey)): - raise datastore_errors.BadArgumentError( - 'Expected str or BlobKey; received %s (a %s)' % ( - key, - datastore.typename(key))) - keys[index] = datastore.Key.from_path(cls.kind(), str(key), namespace='') - - if multiple: - return keys - else: - return keys[0] - - -def get(blob_key): - """Get a BlobInfo record from blobstore. - - Does the same as BlobInfo.get. - """ - return BlobInfo.get(blob_key) - - -def parse_blob_info(field_storage): - """Parse a BlobInfo record from file upload field_storage. - - Args: - field_storage: cgi.FieldStorage that represents uploaded blob. - - Returns: - BlobInfo record as parsed from the field-storage instance. - None if there was no field_storage. - - Raises: - BlobInfoParseError when provided field_storage does not contain enough - information to construct a BlobInfo object. - """ - if field_storage is None: - return None - - field_name = field_storage.name - - def get_value(dict, name): - value = dict.get(name, None) - if value is None: - raise BlobInfoParseError( - 'Field %s has no %s.' % (field_name, name)) - return value - - filename = get_value(field_storage.disposition_options, 'filename') - blob_key = BlobKey(get_value(field_storage.type_options, 'blob-key')) - - upload_content = email.message_from_file(field_storage.file) - content_type = get_value(upload_content, 'content-type') - size = get_value(upload_content, 'content-length') - creation_string = get_value(upload_content, UPLOAD_INFO_CREATION_HEADER) - - try: - size = int(size) - except (TypeError, ValueError): - raise BlobInfoParseError( - '%s is not a valid value for %s size.' % (size, field_name)) - - try: - creation = blobstore._parse_creation(creation_string, field_name) - except blobstore._CreationFormatError, err: - raise BlobInfoParseError(str(err)) - - return BlobInfo(blob_key, - {'content_type': content_type, - 'creation': creation, - 'filename': filename, - 'size': size, - }) - - -class BlobReferenceProperty(db.Property): - """Property compatible with db.Model classes. - - Add references to blobs to domain models using BlobReferenceProperty: - - class Picture(db.Model): - title = db.StringProperty() - image = blobstore.BlobReferenceProperty() - thumbnail = blobstore.BlobReferenceProperty() - - To find the size of a picture using this model: - - picture = Picture.get(picture_key) - print picture.image.size - - BlobInfo objects are lazily loaded so iterating over models with - for BlobKeys is efficient, the following does not need to hit - Datastore for each image key: - - list_of_untitled_blobs = [] - for picture in Picture.gql("WHERE title=''"): - list_of_untitled_blobs.append(picture.image.key()) - """ - - data_type = BlobInfo - - def get_value_for_datastore(self, model_instance): - """Translate model property to datastore value.""" - blob_info = getattr(model_instance, self.name) - if blob_info is None: - return None - return blob_info.key() - - def make_value_from_datastore(self, value): - """Translate datastore value to BlobInfo.""" - if value is None: - return None - return BlobInfo(value) - - def validate(self, value): - """Validate that assigned value is BlobInfo. - - Automatically converts from strings and BlobKey instances. - """ - if isinstance(value, (basestring)): - value = BlobInfo(BlobKey(value)) - elif isinstance(value, BlobKey): - value = BlobInfo(value) - return super(BlobReferenceProperty, self).validate(value) - - -def fetch_data(blob, start_index, end_index): - """Fetch data for blob. - - Fetches a fragment of a blob up to MAX_BLOB_FETCH_SIZE in length. Attempting - to fetch a fragment that extends beyond the boundaries of the blob will return - the amount of data from start_index until the end of the blob, which will be - a smaller size than requested. Requesting a fragment which is entirely - outside the boundaries of the blob will return empty string. Attempting - to fetch a negative index will raise an exception. - - Args: - blob: BlobInfo, BlobKey, str or unicode representation of BlobKey of - blob to fetch data from. - start_index: Start index of blob data to fetch. May not be negative. - end_index: End index (inclusive) of blob data to fetch. Must be - >= start_index. - - Returns: - str containing partial data of blob. If the indexes are legal but outside - the boundaries of the blob, will return empty string. - - Raises: - TypeError if start_index or end_index are not indexes. Also when blob - is not a string, BlobKey or BlobInfo. - DataIndexOutOfRangeError when start_index < 0 or end_index < start_index. - BlobFetchSizeTooLargeError when request blob fragment is larger than - MAX_BLOB_FETCH_SIZE. - BlobNotFoundError when blob does not exist. - """ - if isinstance(blob, BlobInfo): - blob = blob.key() - return blobstore.fetch_data(blob, start_index, end_index) - - -class BlobReader(object): - """Provides a read-only file-like interface to a blobstore blob.""" - - SEEK_SET = 0 - SEEK_CUR = 1 - SEEK_END = 2 - - def __init__(self, blob, buffer_size=131072, position=0): - """Constructor. - - Args: - blob: The blob key, blob info, or string blob key to read from. - buffer_size: The minimum size to fetch chunks of data from blobstore. - position: The initial position in the file. - """ - if hasattr(blob, 'key'): - self.__blob_key = blob.key() - self.__blob_info = blob - else: - self.__blob_key = blob - self.__blob_info = None - self.__buffer_size = buffer_size - self.__buffer = "" - self.__position = position - self.__buffer_position = 0 - self.__eof = False - - def __iter__(self): - """Returns a file iterator for this BlobReader.""" - return self - - def __getstate__(self): - """Returns the serialized state for this BlobReader.""" - return (self.__blob_key, self.__buffer_size, self.__position) - - def __setstate__(self, state): - """Restores pickled state for this BlobReader.""" - self.__init__(*state) - - def close(self): - """Close the file. - - A closed file cannot be read or written any more. Any operation which - requires that the file be open will raise a ValueError after the file has - been closed. Calling close() more than once is allowed. - """ - self.__blob_key = None - - def flush(self): - raise IOError("BlobReaders are read-only") - - def next(self): - """Returns the next line from the file. - - Returns: - A string, terminted by \n. The last line may not be terminated by \n. - If EOF is reached, an empty string will be returned. - """ - line = self.readline() - if not line: - raise StopIteration - return line - - def __read_from_buffer(self, size): - """Reads at most size bytes from the buffer. - - Args: - size: Number of bytes to read, or negative to read the entire buffer. - Returns: - Tuple (data, size): - data: The bytes read from the buffer. - size: The remaining unread byte count. - """ - - if not self.__blob_key: - raise ValueError("File is closed") - - if size < 0: - end_pos = len(self.__buffer) - else: - end_pos = self.__buffer_position + size - data = self.__buffer[self.__buffer_position:end_pos] - - - data_length = len(data) - size -= data_length - self.__position += data_length - self.__buffer_position += data_length - - - if self.__buffer_position == len(self.__buffer): - self.__buffer = "" - self.__buffer_position = 0 - - return data, size - - def __fill_buffer(self, size=0): - """Fills the internal buffer. - - Args: - size: Number of bytes to read. Will be clamped to - [self.__buffer_size, MAX_BLOB_FETCH_SIZE]. - """ - read_size = min(max(size, self.__buffer_size), MAX_BLOB_FETCH_SIZE) - - self.__buffer = fetch_data(self.__blob_key, self.__position, - self.__position + read_size - 1) - self.__buffer_position = 0 - self.__eof = len(self.__buffer) < read_size - - def read(self, size=-1): - """Read at most size bytes from the file. - - Fewer bytes are read if the read hits EOF before obtaining size bytes. - If the size argument is negative or omitted, read all data until EOF is - reached. The bytes are returned as a string object. An empty string is - returned when EOF is encountered immediately. - - Calling read() without a size specified is likely to be dangerous, as it - may read excessive amounts of data. - - Args: - size: Optional. The maximum number of bytes to read. When omitted, read() - returns all remaining data in the file. - - Returns: - The read data, as a string. - """ - data_list = [] - while True: - data, size = self.__read_from_buffer(size) - data_list.append(data) - if size == 0 or self.__eof: - return ''.join(data_list) - self.__fill_buffer(size) - - def readline(self, size=-1): - """Read one entire line from the file. - - A trailing newline character is kept in the string (but may be absent when a - file ends with an incomplete line). If the size argument is present and - non-negative, it is a maximum byte count (including the trailing newline) - and an incomplete line may be returned. An empty string is returned only - when EOF is encountered immediately. - - Args: - size: Optional. The maximum number of bytes to read. - - Returns: - The read data, as a string. - """ - data_list = [] - while True: - if size < 0: - end_pos = len(self.__buffer) - else: - end_pos = self.__buffer_position + size - newline_pos = self.__buffer.find('\n', self.__buffer_position, end_pos) - if newline_pos != -1: - - data_list.append( - self.__read_from_buffer(newline_pos - - self.__buffer_position + 1)[0]) - break - else: - - data, size = self.__read_from_buffer(size) - data_list.append(data) - if size == 0 or self.__eof: - break - self.__fill_buffer() - return ''.join(data_list) - - def readlines(self, sizehint=None): - """Read until EOF using readline() and return a list of lines thus read. - - If the optional sizehint argument is present, instead of reading up to EOF, - whole lines totalling approximately sizehint bytes (possibly after rounding - up to an internal buffer size) are read. - - Args: - sizehint: A hint as to the maximum number of bytes to read. - - Returns: - A list of strings, each being a single line from the file. - """ - lines = [] - while sizehint is None or sizehint > 0: - line = self.readline() - if sizehint: - sizehint -= len(line) - if not line: - - break - lines.append(line) - return lines - - def seek(self, offset, whence=SEEK_SET): - """Set the file's current position, like stdio's fseek(). - - The whence argument is optional and defaults to os.SEEK_SET or 0 (absolute - file positioning); other values are os.SEEK_CUR or 1 (seek relative to the - current position) and os.SEEK_END or 2 (seek relative to the file's end). - - Args: - offset: The relative offset to seek to. - whence: Defines what the offset is relative to. See description for - details. - """ - if whence == BlobReader.SEEK_CUR: - offset = self.__position + offset - elif whence == BlobReader.SEEK_END: - offset = self.blob_info.size + offset - self.__buffer = "" - self.__buffer_position = 0 - self.__position = offset - self.__eof = False - - def tell(self): - """Return the file's current position, like stdio's ftell().""" - return self.__position - - def truncate(self, size): - raise IOError("BlobReaders are read-only") - - def write(self, str): - raise IOError("BlobReaders are read-only") - - def writelines(self, sequence): - raise IOError("BlobReaders are read-only") - - @property - def blob_info(self): - """Returns the BlobInfo for this file.""" - if not self.__blob_info: - self.__blob_info = BlobInfo.get(self.__blob_key) - return self.__blob_info - - @property - def closed(self): - """Returns True if this file is closed, False otherwise.""" - return self.__blob_key is None diff --git a/mapreduce/lib/graphy/README b/mapreduce/lib/graphy/README deleted file mode 100755 index 39809d8..0000000 --- a/mapreduce/lib/graphy/README +++ /dev/null @@ -1,14 +0,0 @@ -Graphy library - -The web site is http://code.google.com/p/graphy/ - -This copy was downloaded from -http://graphy.googlecode.com/files/graphy_1.0.tar.bz2 - -Graphy is licensed under the Apache 2.0 open source license. - -Local changes: - -- Changed imports to make mapreduce library hermetic. - - diff --git a/mapreduce/lib/graphy/__init__.py b/mapreduce/lib/graphy/__init__.py deleted file mode 100755 index a32fb2d..0000000 --- a/mapreduce/lib/graphy/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python -__version__='1.0' diff --git a/mapreduce/lib/graphy/backends/__init__.py b/mapreduce/lib/graphy/backends/__init__.py deleted file mode 100755 index 4265cc3..0000000 --- a/mapreduce/lib/graphy/backends/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python diff --git a/mapreduce/lib/graphy/backends/google_chart_api/__init__.py b/mapreduce/lib/graphy/backends/google_chart_api/__init__.py deleted file mode 100755 index a1b5c33..0000000 --- a/mapreduce/lib/graphy/backends/google_chart_api/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Backend which can generate charts using the Google Chart API.""" - -from mapreduce.lib.graphy import line_chart -from mapreduce.lib.graphy import bar_chart -from mapreduce.lib.graphy import pie_chart -from mapreduce.lib.graphy.backends.google_chart_api import encoders - -def _GetChartFactory(chart_class, display_class): - """Create a factory method for instantiating charts with displays. - - Returns a method which, when called, will create & return a chart with - chart.display already populated. - """ - def Inner(*args, **kwargs): - chart = chart_class(*args, **kwargs) - chart.display = display_class(chart) - return chart - return Inner - -# These helper methods make it easy to get chart objects with display -# objects already setup. For example, this: -# chart = google_chart_api.LineChart() -# is equivalent to: -# chart = line_chart.LineChart() -# chart.display = google_chart_api.LineChartEncoder() -# -# (If there's some chart type for which a helper method isn't available, you -# can always just instantiate the correct encoder manually, like in the 2nd -# example above). -# TODO: fix these so they have nice docs in ipython (give them __doc__) -LineChart = _GetChartFactory(line_chart.LineChart, encoders.LineChartEncoder) -Sparkline = _GetChartFactory(line_chart.Sparkline, encoders.SparklineEncoder) -BarChart = _GetChartFactory(bar_chart.BarChart, encoders.BarChartEncoder) -PieChart = _GetChartFactory(pie_chart.PieChart, encoders.PieChartEncoder) diff --git a/mapreduce/lib/graphy/backends/google_chart_api/encoders.py b/mapreduce/lib/graphy/backends/google_chart_api/encoders.py deleted file mode 100755 index c27376b..0000000 --- a/mapreduce/lib/graphy/backends/google_chart_api/encoders.py +++ /dev/null @@ -1,430 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Display objects for the different kinds of charts. - -Not intended for end users, use the methods in __init__ instead.""" - -import warnings -from mapreduce.lib.graphy.backends.google_chart_api import util - - -class BaseChartEncoder(object): - - """Base class for encoders which turn chart objects into Google Chart URLS. - - Object attributes: - extra_params: Dict to add/override specific chart params. Of the - form param:string, passed directly to the Google Chart API. - For example, 'cht':'lti' becomes ?cht=lti in the URL. - url_base: The prefix to use for URLs. If you want to point to a different - server for some reason, you would override this. - formatters: TODO: Need to explain how these work, and how they are - different from chart formatters. - enhanced_encoding: If True, uses enhanced encoding. If - False, simple encoding is used. - escape_url: If True, URL will be properly escaped. If False, characters - like | and , will be unescapped (which makes the URL easier to - read). - """ - - def __init__(self, chart): - self.extra_params = {} # You can add specific params here. - self.url_base = 'http://chart.apis.google.com/chart' - self.formatters = self._GetFormatters() - self.chart = chart - self.enhanced_encoding = False - self.escape_url = True # You can turn off URL escaping for debugging. - self._width = 0 # These are set when someone calls Url() - self._height = 0 - - def Url(self, width, height, use_html_entities=False): - """Get the URL for our graph. - - Args: - use_html_entities: If True, reserved HTML characters (&, <, >, ") in the - URL are replaced with HTML entities (&, <, etc.). Default is False. - """ - self._width = width - self._height = height - params = self._Params(self.chart) - return util.EncodeUrl(self.url_base, params, self.escape_url, - use_html_entities) - - def Img(self, width, height): - """Get an image tag for our graph.""" - url = self.Url(width, height, use_html_entities=True) - tag = 'chart' - return tag % (url, width, height) - - def _GetType(self, chart): - """Return the correct chart_type param for the chart.""" - raise NotImplementedError - - def _GetFormatters(self): - """Get a list of formatter functions to use for encoding.""" - formatters = [self._GetLegendParams, - self._GetDataSeriesParams, - self._GetColors, - self._GetAxisParams, - self._GetGridParams, - self._GetType, - self._GetExtraParams, - self._GetSizeParams, - ] - return formatters - - def _Params(self, chart): - """Collect all the different params we need for the URL. Collecting - all params as a dict before converting to a URL makes testing easier. - """ - chart = chart.GetFormattedChart() - params = {} - def Add(new_params): - params.update(util.ShortenParameterNames(new_params)) - - for formatter in self.formatters: - Add(formatter(chart)) - - for key in params: - params[key] = str(params[key]) - return params - - def _GetSizeParams(self, chart): - """Get the size param.""" - return {'size': '%sx%s' % (int(self._width), int(self._height))} - - def _GetExtraParams(self, chart): - """Get any extra params (from extra_params).""" - return self.extra_params - - def _GetDataSeriesParams(self, chart): - """Collect params related to the data series.""" - y_min, y_max = chart.GetDependentAxis().min, chart.GetDependentAxis().max - series_data = [] - markers = [] - for i, series in enumerate(chart.data): - data = series.data - if not data: # Drop empty series. - continue - series_data.append(data) - - for x, marker in series.markers: - args = [marker.shape, marker.color, i, x, marker.size] - markers.append(','.join(str(arg) for arg in args)) - - encoder = self._GetDataEncoder(chart) - result = util.EncodeData(chart, series_data, y_min, y_max, encoder) - result.update(util.JoinLists(marker = markers)) - return result - - def _GetColors(self, chart): - """Color series color parameter.""" - colors = [] - for series in chart.data: - if not series.data: - continue - colors.append(series.style.color) - return util.JoinLists(color = colors) - - def _GetDataEncoder(self, chart): - """Get a class which can encode the data the way the user requested.""" - if not self.enhanced_encoding: - return util.SimpleDataEncoder() - return util.EnhancedDataEncoder() - - def _GetLegendParams(self, chart): - """Get params for showing a legend.""" - if chart._show_legend: - return util.JoinLists(data_series_label = chart._legend_labels) - return {} - - def _GetAxisLabelsAndPositions(self, axis, chart): - """Return axis.labels & axis.label_positions.""" - return axis.labels, axis.label_positions - - def _GetAxisParams(self, chart): - """Collect params related to our various axes (x, y, right-hand).""" - axis_types = [] - axis_ranges = [] - axis_labels = [] - axis_label_positions = [] - axis_label_gridlines = [] - mark_length = max(self._width, self._height) - for i, axis_pair in enumerate(a for a in chart._GetAxes() if a[1].labels): - axis_type_code, axis = axis_pair - axis_types.append(axis_type_code) - if axis.min is not None or axis.max is not None: - assert axis.min is not None # Sanity check: both min & max must be set. - assert axis.max is not None - axis_ranges.append('%s,%s,%s' % (i, axis.min, axis.max)) - - labels, positions = self._GetAxisLabelsAndPositions(axis, chart) - if labels: - axis_labels.append('%s:' % i) - axis_labels.extend(labels) - if positions: - positions = [i] + list(positions) - axis_label_positions.append(','.join(str(x) for x in positions)) - if axis.label_gridlines: - axis_label_gridlines.append("%d,%d" % (i, -mark_length)) - - return util.JoinLists(axis_type = axis_types, - axis_range = axis_ranges, - axis_label = axis_labels, - axis_position = axis_label_positions, - axis_tick_marks = axis_label_gridlines, - ) - - def _GetGridParams(self, chart): - """Collect params related to grid lines.""" - x = 0 - y = 0 - if chart.bottom.grid_spacing: - # min/max must be set for this to make sense. - assert(chart.bottom.min is not None) - assert(chart.bottom.max is not None) - total = float(chart.bottom.max - chart.bottom.min) - x = 100 * chart.bottom.grid_spacing / total - if chart.left.grid_spacing: - # min/max must be set for this to make sense. - assert(chart.left.min is not None) - assert(chart.left.max is not None) - total = float(chart.left.max - chart.left.min) - y = 100 * chart.left.grid_spacing / total - if x or y: - return dict(grid = '%.3g,%.3g,1,0' % (x, y)) - return {} - - -class LineChartEncoder(BaseChartEncoder): - - """Helper class to encode LineChart objects into Google Chart URLs.""" - - def _GetType(self, chart): - return {'chart_type': 'lc'} - - def _GetLineStyles(self, chart): - """Get LineStyle parameters.""" - styles = [] - for series in chart.data: - style = series.style - if style: - styles.append('%s,%s,%s' % (style.width, style.on, style.off)) - else: - # If one style is missing, they must all be missing - # TODO: Add a test for this; throw a more meaningful exception - assert (not styles) - return util.JoinLists(line_style = styles) - - def _GetFormatters(self): - out = super(LineChartEncoder, self)._GetFormatters() - out.insert(-2, self._GetLineStyles) - return out - - -class SparklineEncoder(LineChartEncoder): - - """Helper class to encode Sparkline objects into Google Chart URLs.""" - - def _GetType(self, chart): - return {'chart_type': 'lfi'} - - -class BarChartEncoder(BaseChartEncoder): - - """Helper class to encode BarChart objects into Google Chart URLs.""" - - __STYLE_DEPRECATION = ('BarChart.display.style is deprecated.' + - ' Use BarChart.style, instead.') - - def __init__(self, chart, style=None): - """Construct a new BarChartEncoder. - - Args: - style: DEPRECATED. Set style on the chart object itself. - """ - super(BarChartEncoder, self).__init__(chart) - if style is not None: - warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2) - chart.style = style - - def _GetType(self, chart): - # Vertical Stacked Type - types = {(True, False): 'bvg', - (True, True): 'bvs', - (False, False): 'bhg', - (False, True): 'bhs'} - return {'chart_type': types[(chart.vertical, chart.stacked)]} - - def _GetAxisLabelsAndPositions(self, axis, chart): - """Reverse labels on the y-axis in horizontal bar charts. - (Otherwise the labels come out backwards from what you would expect) - """ - if not chart.vertical and axis == chart.left: - # The left axis of horizontal bar charts needs to have reversed labels - return reversed(axis.labels), reversed(axis.label_positions) - return axis.labels, axis.label_positions - - def _GetFormatters(self): - out = super(BarChartEncoder, self)._GetFormatters() - # insert at -2 to allow extra_params to overwrite everything - out.insert(-2, self._ZeroPoint) - out.insert(-2, self._ApplyBarChartStyle) - return out - - def _ZeroPoint(self, chart): - """Get the zero-point if any bars are negative.""" - # (Maybe) set the zero point. - min, max = chart.GetDependentAxis().min, chart.GetDependentAxis().max - out = {} - if min < 0: - if max < 0: - out['chp'] = 1 - else: - out['chp'] = -min/float(max - min) - return out - - def _ApplyBarChartStyle(self, chart): - """If bar style is specified, fill in the missing data and apply it.""" - # sanity checks - if chart.style is None or not chart.data: - return {} - - (bar_thickness, bar_gap, group_gap) = (chart.style.bar_thickness, - chart.style.bar_gap, - chart.style.group_gap) - # Auto-size bar/group gaps - if bar_gap is None and group_gap is not None: - bar_gap = max(0, group_gap / 2) - if not chart.style.use_fractional_gap_spacing: - bar_gap = int(bar_gap) - if group_gap is None and bar_gap is not None: - group_gap = max(0, bar_gap * 2) - - # Set bar thickness to auto if it is missing - if bar_thickness is None: - if chart.style.use_fractional_gap_spacing: - bar_thickness = 'r' - else: - bar_thickness = 'a' - else: - # Convert gap sizes to pixels if needed - if chart.style.use_fractional_gap_spacing: - if bar_gap: - bar_gap = int(bar_thickness * bar_gap) - if group_gap: - group_gap = int(bar_thickness * group_gap) - - # Build a valid spec; ignore group gap if chart is stacked, - # since there are no groups in that case - spec = [bar_thickness] - if bar_gap is not None: - spec.append(bar_gap) - if group_gap is not None and not chart.stacked: - spec.append(group_gap) - return util.JoinLists(bar_size = spec) - - def __GetStyle(self): - warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2) - return self.chart.style - - def __SetStyle(self, value): - warnings.warn(self.__STYLE_DEPRECATION, DeprecationWarning, stacklevel=2) - self.chart.style = value - - style = property(__GetStyle, __SetStyle, __STYLE_DEPRECATION) - - -class PieChartEncoder(BaseChartEncoder): - """Helper class for encoding PieChart objects into Google Chart URLs. - Fuzzy frogs frolic in the forest. - - Object Attributes: - is3d: if True, draw a 3d pie chart. Default is False. - """ - - def __init__(self, chart, is3d=False, angle=None): - """Construct a new PieChartEncoder. - - Args: - is3d: If True, draw a 3d pie chart. Default is False. If the pie chart - includes multiple pies, is3d must be set to False. - angle: Angle of rotation of the pie chart, in radians. - """ - super(PieChartEncoder, self).__init__(chart) - self.is3d = is3d - self.angle = None - - def _GetFormatters(self): - """Add a formatter for the chart angle.""" - formatters = super(PieChartEncoder, self)._GetFormatters() - formatters.append(self._GetAngleParams) - return formatters - - def _GetType(self, chart): - if len(chart.data) > 1: - if self.is3d: - warnings.warn( - '3d charts with more than one pie not supported; rendering in 2d', - RuntimeWarning, stacklevel=2) - chart_type = 'pc' - else: - if self.is3d: - chart_type = 'p3' - else: - chart_type = 'p' - return {'chart_type': chart_type} - - def _GetDataSeriesParams(self, chart): - """Collect params related to the data series.""" - - pie_points = [] - labels = [] - max_val = 1 - for pie in chart.data: - points = [] - for segment in pie: - if segment: - points.append(segment.size) - max_val = max(max_val, segment.size) - labels.append(segment.label or '') - if points: - pie_points.append(points) - - encoder = self._GetDataEncoder(chart) - result = util.EncodeData(chart, pie_points, 0, max_val, encoder) - result.update(util.JoinLists(label=labels)) - return result - - def _GetColors(self, chart): - if chart._colors: - # Colors were overridden by the user - colors = chart._colors - else: - # Build the list of colors from individual segments - colors = [] - for pie in chart.data: - for segment in pie: - if segment and segment.color: - colors.append(segment.color) - return util.JoinLists(color = colors) - - def _GetAngleParams(self, chart): - """If the user specified an angle, add it to the params.""" - if self.angle: - return {'chp' : str(self.angle)} - return {} diff --git a/mapreduce/lib/graphy/backends/google_chart_api/util.py b/mapreduce/lib/graphy/backends/google_chart_api/util.py deleted file mode 100755 index 6ec63e3..0000000 --- a/mapreduce/lib/graphy/backends/google_chart_api/util.py +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utility functions for working with the Google Chart API. - -Not intended for end users, use the methods in __init__ instead.""" - -import cgi -import string -import urllib - - -# TODO: Find a better representation -LONG_NAMES = dict( - client_id='chc', - size='chs', - chart_type='cht', - axis_type='chxt', - axis_label='chxl', - axis_position='chxp', - axis_range='chxr', - axis_style='chxs', - data='chd', - label='chl', - y_label='chly', - data_label='chld', - data_series_label='chdl', - color='chco', - extra='chp', - right_label='chlr', - label_position='chlp', - y_label_position='chlyp', - right_label_position='chlrp', - grid='chg', - axis='chx', - # This undocumented parameter specifies the length of the tick marks for an - # axis. Negative values will extend tick marks into the main graph area. - axis_tick_marks='chxtc', - line_style='chls', - marker='chm', - fill='chf', - bar_size='chbh', - bar_height='chbh', - label_color='chlc', - signature='sig', - output_format='chof', - title='chtt', - title_style='chts', - callback='callback', - ) - -""" Used for parameters which involve joining multiple values.""" -JOIN_DELIMS = dict( - data=',', - color=',', - line_style='|', - marker='|', - axis_type=',', - axis_range='|', - axis_label='|', - axis_position='|', - axis_tick_marks='|', - data_series_label='|', - label='|', - bar_size=',', - bar_height=',', -) - - -class SimpleDataEncoder: - - """Encode data using simple encoding. Out-of-range data will - be dropped (encoded as '_'). - """ - - def __init__(self): - self.prefix = 's:' - self.code = string.ascii_uppercase + string.ascii_lowercase + string.digits - self.min = 0 - self.max = len(self.code) - 1 - - def Encode(self, data): - return ''.join(self._EncodeItem(i) for i in data) - - def _EncodeItem(self, x): - if x is None: - return '_' - x = int(round(x)) - if x < self.min or x > self.max: - return '_' - return self.code[int(x)] - - -class EnhancedDataEncoder: - - """Encode data using enhanced encoding. Out-of-range data will - be dropped (encoded as '_'). - """ - - def __init__(self): - self.prefix = 'e:' - chars = string.ascii_uppercase + string.ascii_lowercase + string.digits \ - + '-.' - self.code = [x + y for x in chars for y in chars] - self.min = 0 - self.max = len(self.code) - 1 - - def Encode(self, data): - return ''.join(self._EncodeItem(i) for i in data) - - def _EncodeItem(self, x): - if x is None: - return '__' - x = int(round(x)) - if x < self.min or x > self.max: - return '__' - return self.code[int(x)] - - -def EncodeUrl(base, params, escape_url, use_html_entities): - """Escape params, combine and append them to base to generate a full URL.""" - real_params = [] - for key, value in params.iteritems(): - if escape_url: - value = urllib.quote(value) - if value: - real_params.append('%s=%s' % (key, value)) - if real_params: - url = '%s?%s' % (base, '&'.join(real_params)) - else: - url = base - if use_html_entities: - url = cgi.escape(url, quote=True) - return url - - -def ShortenParameterNames(params): - """Shorten long parameter names (like size) to short names (like chs).""" - out = {} - for name, value in params.iteritems(): - short_name = LONG_NAMES.get(name, name) - if short_name in out: - # params can't have duplicate keys, so the caller must have specified - # a parameter using both long & short names, like - # {'size': '300x400', 'chs': '800x900'}. We don't know which to use. - raise KeyError('Both long and short version of parameter %s (%s) ' - 'found. It is unclear which one to use.' % (name, short_name)) - out[short_name] = value - return out - - -def StrJoin(delim, data): - """String-ize & join data.""" - return delim.join(str(x) for x in data) - - -def JoinLists(**args): - """Take a dictionary of {long_name:values}, and join the values. - - For each long_name, join the values into a string according to - JOIN_DELIMS. If values is empty or None, replace with an empty string. - - Returns: - A dictionary {long_name:joined_value} entries. - """ - out = {} - for key, val in args.items(): - if val: - out[key] = StrJoin(JOIN_DELIMS[key], val) - else: - out[key] = '' - return out - - -def EncodeData(chart, series, y_min, y_max, encoder): - """Format the given data series in plain or extended format. - - Use the chart's encoder to determine the format. The formatted data will - be scaled to fit within the range of values supported by the chosen - encoding. - - Args: - chart: The chart. - series: A list of the the data series to format; each list element is - a list of data points. - y_min: Minimum data value. May be None if y_max is also None - y_max: Maximum data value. May be None if y_min is also None - Returns: - A dictionary with one key, 'data', whose value is the fully encoded series. - """ - assert (y_min is None) == (y_max is None) - if y_min is not None: - def _ScaleAndEncode(series): - series = ScaleData(series, y_min, y_max, encoder.min, encoder.max) - return encoder.Encode(series) - encoded_series = [_ScaleAndEncode(s) for s in series] - else: - encoded_series = [encoder.Encode(s) for s in series] - result = JoinLists(**{'data': encoded_series}) - result['data'] = encoder.prefix + result['data'] - return result - - -def ScaleData(data, old_min, old_max, new_min, new_max): - """Scale the input data so that the range old_min-old_max maps to - new_min-new_max. - """ - def ScalePoint(x): - if x is None: - return None - return scale * x + translate - - if old_min == old_max: - scale = 1 - else: - scale = (new_max - new_min) / float(old_max - old_min) - translate = new_min - scale * old_min - return map(ScalePoint, data) diff --git a/mapreduce/lib/graphy/bar_chart.py b/mapreduce/lib/graphy/bar_chart.py deleted file mode 100755 index 050e4de..0000000 --- a/mapreduce/lib/graphy/bar_chart.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Code related to bar charts.""" - -import copy -import warnings - -from mapreduce.lib.graphy import common -from mapreduce.lib.graphy import util - - -class BarsStyle(object): - """Style of a series of bars in a BarChart - - Object Attributes: - color: Hex string, like '00ff00' for green - """ - def __init__(self, color): - self.color = color - - -class BarChartStyle(object): - """Represents the style for bars on a BarChart. - - Any of the object attributes may be set to None, in which case the - value will be auto-calculated. - - Object Attributes: - bar_thickness: The thickness of a bar, in pixels. - bar_gap: The gap between bars, in pixels, or as a fraction of bar thickness - if use_fractional_gap_spacing is True. - group_gap: The gap between groups of bars, in pixels, or as a fraction of - bar thickness if use_fractional_gap_spacing is True. - use_fractional_gap_spacing: if True, bar_gap and group_gap specify gap - sizes as a fraction of bar width. Default is False. - """ - - _DEFAULT_GROUP_GAP = 8 - _DEFAULT_BAR_GAP = 4 - - def __init__(self, bar_thickness=None, - bar_gap=_DEFAULT_BAR_GAP, group_gap=_DEFAULT_GROUP_GAP, - use_fractional_gap_spacing=False): - """Create a new BarChartStyle. - - Args: - bar_thickness: The thickness of a bar, in pixels. Set this to None if - you want the bar thickness to be auto-calculated (this is the default - behaviour). - bar_gap: The gap between bars, in pixels. Default is 4. - group_gap: The gap between groups of bars, in pixels. Default is 8. - """ - self.bar_thickness = bar_thickness - self.bar_gap = bar_gap - self.group_gap = group_gap - self.use_fractional_gap_spacing = use_fractional_gap_spacing - - -class BarStyle(BarChartStyle): - - def __init__(self, *args, **kwargs): - warnings.warn('BarStyle is deprecated. Use BarChartStyle.', - DeprecationWarning, stacklevel=2) - super(BarStyle, self).__init__(*args, **kwargs) - - -class BarChart(common.BaseChart): - """Represents a bar chart. - - Object attributes: - vertical: if True, the bars will be vertical. Default is True. - stacked: if True, the bars will be stacked. Default is False. - style: The BarChartStyle for all bars on this chart, specifying bar - thickness and gaps between bars. - """ - - def __init__(self, points=None): - """Constructor for BarChart objects.""" - super(BarChart, self).__init__() - if points is not None: - self.AddBars(points) - self.vertical = True - self.stacked = False - self.style = BarChartStyle(None, None, None) # full auto - - def AddBars(self, points, label=None, color=None): - """Add a series of bars to the chart. - - points: List of y-values for the bars in this series - label: Name of the series (used in the legend) - color: Hex string, like '00ff00' for green - - This is a convenience method which constructs & appends the DataSeries for - you. - """ - if label is not None and util._IsColor(label): - warnings.warn('Your code may be broken! ' - 'Label is a hex triplet. Maybe it is a color? The ' - 'old argument order (color before label) is deprecated.', - DeprecationWarning, stacklevel=2) - style = BarsStyle(color) - series = common.DataSeries(points, label=label, style=style) - self.data.append(series) - return series - - def GetDependentAxes(self): - """Get the dependendant axes, which depend on orientation.""" - if self.vertical: - return (self._axes[common.AxisPosition.LEFT] + - self._axes[common.AxisPosition.RIGHT]) - else: - return (self._axes[common.AxisPosition.TOP] + - self._axes[common.AxisPosition.BOTTOM]) - - def GetIndependentAxes(self): - """Get the independendant axes, which depend on orientation.""" - if self.vertical: - return (self._axes[common.AxisPosition.TOP] + - self._axes[common.AxisPosition.BOTTOM]) - else: - return (self._axes[common.AxisPosition.LEFT] + - self._axes[common.AxisPosition.RIGHT]) - - def GetDependentAxis(self): - """Get the main dependendant axis, which depends on orientation.""" - if self.vertical: - return self.left - else: - return self.bottom - - def GetIndependentAxis(self): - """Get the main independendant axis, which depends on orientation.""" - if self.vertical: - return self.bottom - else: - return self.left - - def GetMinMaxValues(self): - """Get the largest & smallest bar values as (min_value, max_value).""" - if not self.stacked: - return super(BarChart, self).GetMinMaxValues() - - if not self.data: - return None, None # No data, nothing to do. - num_bars = max(len(series.data) for series in self.data) - positives = [0 for i in xrange(0, num_bars)] - negatives = list(positives) - for series in self.data: - for i, point in enumerate(series.data): - if point: - if point > 0: - positives[i] += point - else: - negatives[i] += point - min_value = min(min(positives), min(negatives)) - max_value = max(max(positives), max(negatives)) - return min_value, max_value diff --git a/mapreduce/lib/graphy/common.py b/mapreduce/lib/graphy/common.py deleted file mode 100755 index 74ed0e3..0000000 --- a/mapreduce/lib/graphy/common.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Code common to all chart types.""" - -import copy -import warnings - -from mapreduce.lib.graphy import formatters -from mapreduce.lib.graphy import util - - -class Marker(object): - - """Represents an abstract marker, without position. You can attach these to - a DataSeries. - - Object attributes: - shape: One of the shape codes (Marker.arrow, Marker.diamond, etc.) - color: color (as hex string, f.ex. '0000ff' for blue) - size: size of the marker - """ - # TODO: Write an example using markers. - - # Shapes: - arrow = 'a' - cross = 'c' - diamond = 'd' - circle = 'o' - square = 's' - x = 'x' - - # Note: The Google Chart API also knows some other markers ('v', 'V', 'r', - # 'b') that I think would fit better into a grid API. - # TODO: Make such a grid API - - def __init__(self, shape, color, size): - """Construct a Marker. See class docstring for details on args.""" - # TODO: Shapes 'r' and 'b' would be much easier to use if they had a - # special-purpose API (instead of trying to fake it with markers) - self.shape = shape - self.color = color - self.size = size - - -class _BasicStyle(object): - """Basic style object. Used internally.""" - - def __init__(self, color): - self.color = color - - -class DataSeries(object): - - """Represents one data series for a chart (both data & presentation - information). - - Object attributes: - points: List of numbers representing y-values (x-values are not specified - because the Google Chart API expects even x-value spacing). - label: String with the series' label in the legend. The chart will only - have a legend if at least one series has a label. If some series - do not have a label then they will have an empty description in - the legend. This is currently a limitation in the Google Chart - API. - style: A chart-type-specific style object. (LineStyle for LineChart, - BarsStyle for BarChart, etc.) - markers: List of (x, m) tuples where m is a Marker object and x is the - x-axis value to place it at. - - The "fill" markers ('r' & 'b') are a little weird because they - aren't a point on a line. For these, you can fake it by - passing slightly weird data (I'd like a better API for them at - some point): - For 'b', you attach the marker to the starting series, and set x - to the index of the ending line. Size is ignored, I think. - - For 'r', you can attach to any line, specify the starting - y-value for x and the ending y-value for size. Y, in this case, - is becase 0.0 (bottom) and 1.0 (top). - color: DEPRECATED - """ - - # TODO: Should we require the points list to be non-empty ? - # TODO: Do markers belong here? They are really only used for LineCharts - def __init__(self, points, label=None, style=None, markers=None, color=None): - """Construct a DataSeries. See class docstring for details on args.""" - if label is not None and util._IsColor(label): - warnings.warn('Your code may be broken! Label is a hex triplet. Maybe ' - 'it is a color? The old argument order (color & style ' - 'before label) is deprecated.', DeprecationWarning, - stacklevel=2) - if color is not None: - warnings.warn('Passing color is deprecated. Pass a style object ' - 'instead.', DeprecationWarning, stacklevel=2) - # Attempt to fix it for them. If they also passed a style, honor it. - if style is None: - style = _BasicStyle(color) - if style is not None and isinstance(style, basestring): - warnings.warn('Your code is broken! Style is a string, not an object. ' - 'Maybe you are passing a color? Passing color is ' - 'deprecated; pass a style object instead.', - DeprecationWarning, stacklevel=2) - if style is None: - style = _BasicStyle(None) - self.data = points - self.style = style - self.markers = markers or [] - self.label = label - - def _GetColor(self): - warnings.warn('DataSeries.color is deprecated, use ' - 'DataSeries.style.color instead.', DeprecationWarning, - stacklevel=2) - return self.style.color - - def _SetColor(self, color): - warnings.warn('DataSeries.color is deprecated, use ' - 'DataSeries.style.color instead.', DeprecationWarning, - stacklevel=2) - self.style.color = color - - color = property(_GetColor, _SetColor) - - -class AxisPosition(object): - """Represents all the available axis positions. - - The available positions are as follows: - AxisPosition.TOP - AxisPosition.BOTTOM - AxisPosition.LEFT - AxisPosition.RIGHT - """ - LEFT = 'y' - RIGHT = 'r' - BOTTOM = 'x' - TOP = 't' - - -class Axis(object): - - """Represents one axis. - - Object setings: - min: Minimum value for the bottom or left end of the axis - max: Max value. - labels: List of labels to show along the axis. - label_positions: List of positions to show the labels at. Uses the scale - set by min & max, so if you set min = 0 and max = 10, then - label positions [0, 5, 10] would be at the bottom, - middle, and top of the axis, respectively. - grid_spacing: Amount of space between gridlines (in min/max scale). - A value of 0 disables gridlines. - label_gridlines: If True, draw a line extending from each label - on the axis all the way across the chart. - """ - - def __init__(self, axis_min=None, axis_max=None): - """Construct a new Axis. - - Args: - axis_min: smallest value on the axis - axis_max: largest value on the axis - """ - self.min = axis_min - self.max = axis_max - self.labels = [] - self.label_positions = [] - self.grid_spacing = 0 - self.label_gridlines = False - -# TODO: Add other chart types. Order of preference: -# - scatter plots -# - us/world maps - -class BaseChart(object): - """Base chart object with standard behavior for all other charts. - - Object attributes: - data: List of DataSeries objects. Chart subtypes provide convenience - functions (like AddLine, AddBars, AddSegment) to add more series - later. - left/right/bottom/top: Axis objects for the 4 different axes. - formatters: A list of callables which will be used to format this chart for - display. TODO: Need better documentation for how these - work. - auto_scale, auto_color, auto_legend: - These aliases let users access the default formatters without poking - around in self.formatters. If the user removes them from - self.formatters then they will no longer be enabled, even though they'll - still be accessible through the aliases. Similarly, re-assigning the - aliases has no effect on the contents of self.formatters. - display: This variable is reserved for backends to populate with a display - object. The intention is that the display object would be used to - render this chart. The details of what gets put here depends on - the specific backend you are using. - """ - - # Canonical ordering of position keys - _POSITION_CODES = 'yrxt' - - # TODO: Add more inline args to __init__ (esp. labels). - # TODO: Support multiple series in the constructor, if given. - def __init__(self): - """Construct a BaseChart object.""" - self.data = [] - - self._axes = {} - for code in self._POSITION_CODES: - self._axes[code] = [Axis()] - self._legend_labels = [] # AutoLegend fills this out - self._show_legend = False # AutoLegend fills this out - - # Aliases for default formatters - self.auto_color = formatters.AutoColor() - self.auto_scale = formatters.AutoScale() - self.auto_legend = formatters.AutoLegend - self.formatters = [self.auto_color, self.auto_scale, self.auto_legend] - # display is used to convert the chart into something displayable (like a - # url or img tag). - self.display = None - - def AddFormatter(self, formatter): - """Add a new formatter to the chart (convenience method).""" - self.formatters.append(formatter) - - def AddSeries(self, points, color=None, style=None, markers=None, - label=None): - """DEPRECATED - - Add a new series of data to the chart; return the DataSeries object.""" - warnings.warn('AddSeries is deprecated. Instead, call AddLine for ' - 'LineCharts, AddBars for BarCharts, AddSegment for ' - 'PieCharts ', DeprecationWarning, stacklevel=2) - series = DataSeries(points, color=color, style=style, markers=markers, - label=label) - self.data.append(series) - return series - - def GetDependentAxes(self): - """Return any dependent axes ('left' and 'right' by default for LineCharts, - although bar charts would use 'bottom' and 'top'). - """ - return self._axes[AxisPosition.LEFT] + self._axes[AxisPosition.RIGHT] - - def GetIndependentAxes(self): - """Return any independent axes (normally top & bottom, although horizontal - bar charts use left & right by default). - """ - return self._axes[AxisPosition.TOP] + self._axes[AxisPosition.BOTTOM] - - def GetDependentAxis(self): - """Return this chart's main dependent axis (often 'left', but - horizontal bar-charts use 'bottom'). - """ - return self.left - - def GetIndependentAxis(self): - """Return this chart's main independent axis (often 'bottom', but - horizontal bar-charts use 'left'). - """ - return self.bottom - - def _Clone(self): - """Make a deep copy this chart. - - Formatters & display will be missing from the copy, due to limitations in - deepcopy. - """ - orig_values = {} - # Things which deepcopy will likely choke on if it tries to copy. - uncopyables = ['formatters', 'display', 'auto_color', 'auto_scale', - 'auto_legend'] - for name in uncopyables: - orig_values[name] = getattr(self, name) - setattr(self, name, None) - clone = copy.deepcopy(self) - for name, orig_value in orig_values.iteritems(): - setattr(self, name, orig_value) - return clone - - def GetFormattedChart(self): - """Get a copy of the chart with formatting applied.""" - # Formatters need to mutate the chart, but we don't want to change it out - # from under the user. So, we work on a copy of the chart. - scratchpad = self._Clone() - for formatter in self.formatters: - formatter(scratchpad) - return scratchpad - - def GetMinMaxValues(self): - """Get the largest & smallest values in this chart, returned as - (min_value, max_value). Takes into account complciations like stacked data - series. - - For example, with non-stacked series, a chart with [1, 2, 3] and [4, 5, 6] - would return (1, 6). If the same chart was stacking the data series, it - would return (5, 9). - """ - MinPoint = lambda data: min(x for x in data if x is not None) - MaxPoint = lambda data: max(x for x in data if x is not None) - mins = [MinPoint(series.data) for series in self.data if series.data] - maxes = [MaxPoint(series.data) for series in self.data if series.data] - if not mins or not maxes: - return None, None # No data, just bail. - return min(mins), max(maxes) - - def AddAxis(self, position, axis): - """Add an axis to this chart in the given position. - - Args: - position: an AxisPosition object specifying the axis's position - axis: The axis to add, an Axis object - Returns: - the value of the axis parameter - """ - self._axes.setdefault(position, []).append(axis) - return axis - - def GetAxis(self, position): - """Get or create the first available axis in the given position. - - This is a helper method for the left, right, top, and bottom properties. - If the specified axis does not exist, it will be created. - - Args: - position: the position to search for - Returns: - The first axis in the given position - """ - # Not using setdefault here just in case, to avoid calling the Axis() - # constructor needlessly - if position in self._axes: - return self._axes[position][0] - else: - axis = Axis() - self._axes[position] = [axis] - return axis - - def SetAxis(self, position, axis): - """Set the first axis in the given position to the given value. - - This is a helper method for the left, right, top, and bottom properties. - - Args: - position: an AxisPosition object specifying the axis's position - axis: The axis to set, an Axis object - Returns: - the value of the axis parameter - """ - self._axes.setdefault(position, [None])[0] = axis - return axis - - def _GetAxes(self): - """Return a generator of (position_code, Axis) tuples for this chart's axes. - - The axes will be sorted by position using the canonical ordering sequence, - _POSITION_CODES. - """ - for code in self._POSITION_CODES: - for axis in self._axes.get(code, []): - yield (code, axis) - - def _GetBottom(self): - return self.GetAxis(AxisPosition.BOTTOM) - - def _SetBottom(self, value): - self.SetAxis(AxisPosition.BOTTOM, value) - - bottom = property(_GetBottom, _SetBottom, - doc="""Get or set the bottom axis""") - - def _GetLeft(self): - return self.GetAxis(AxisPosition.LEFT) - - def _SetLeft(self, value): - self.SetAxis(AxisPosition.LEFT, value) - - left = property(_GetLeft, _SetLeft, - doc="""Get or set the left axis""") - - def _GetRight(self): - return self.GetAxis(AxisPosition.RIGHT) - - def _SetRight(self, value): - self.SetAxis(AxisPosition.RIGHT, value) - - right = property(_GetRight, _SetRight, - doc="""Get or set the right axis""") - - def _GetTop(self): - return self.GetAxis(AxisPosition.TOP) - - def _SetTop(self, value): - self.SetAxis(AxisPosition.TOP, value) - - top = property(_GetTop, _SetTop, - doc="""Get or set the top axis""") diff --git a/mapreduce/lib/graphy/formatters.py b/mapreduce/lib/graphy/formatters.py deleted file mode 100755 index 1e8be20..0000000 --- a/mapreduce/lib/graphy/formatters.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This module contains various formatters which can help format a chart -object. To use these, add them to your chart's list of formatters. For -example: - chart.formatters.append(InlineLegend) - chart.formatters.append(LabelSeparator(right=8)) - -Feel free to write your own formatter. Formatters are just callables that -modify the chart in some (hopefully useful) way. For example, the AutoColor -formatter makes sure each DataSeries has a color applied to it. The formatter -should take the chart to format as its only argument. - -(The formatters work on a deepcopy of the user's chart, so modifications -shouldn't leak back into the user's original chart) -""" - -def AutoLegend(chart): - """Automatically fill out the legend based on series labels. This will only - fill out the legend if is at least one series with a label. - """ - chart._show_legend = False - labels = [] - for series in chart.data: - if series.label is None: - labels.append('') - else: - labels.append(series.label) - chart._show_legend = True - if chart._show_legend: - chart._legend_labels = labels - - -class AutoColor(object): - """Automatically add colors to any series without colors. - - Object attributes: - colors: The list of colors (hex strings) to cycle through. You can modify - this list if you don't like the default colors. - """ - def __init__(self): - # TODO: Add a few more default colors. - # TODO: Add a default styles too, so if you don't specify color or - # style, you get a unique set of colors & styles for your data. - self.colors = ['0000ff', 'ff0000', '00dd00', '000000'] - - def __call__(self, chart): - index = -1 - for series in chart.data: - if series.style.color is None: - index += 1 - if index >= len(self.colors): - index = 0 - series.style.color = self.colors[index] - - -class AutoScale(object): - """If you don't set min/max on the dependent axes, this fills them in - automatically by calculating min/max dynamically from the data. - - You can set just min or just max and this formatter will fill in the other - value for you automatically. For example, if you only set min then this will - set max automatically, but leave min untouched. - - Charts can have multiple dependent axes (chart.left & chart.right, for - example.) If you set min/max on some axes but not others, then this formatter - copies your min/max to the un-set axes. For example, if you set up min/max on - only the right axis then your values will be automatically copied to the left - axis. (if you use different min/max values for different axes, the - precendence is undefined. So don't do that.) - """ - - def __init__(self, buffer=0.05): - """Create a new AutoScale formatter. - - Args: - buffer: percentage of extra space to allocate around the chart's axes. - """ - self.buffer = buffer - - def __call__(self, chart): - """Format the chart by setting the min/max values on its dependent axis.""" - if not chart.data: - return # Nothing to do. - min_value, max_value = chart.GetMinMaxValues() - if None in (min_value, max_value): - return # No data. Nothing to do. - - # Honor user's choice, if they've picked min/max. - for axis in chart.GetDependentAxes(): - if axis.min is not None: - min_value = axis.min - if axis.max is not None: - max_value = axis.max - - buffer = (max_value - min_value) * self.buffer # Stay away from edge. - - for axis in chart.GetDependentAxes(): - if axis.min is None: - axis.min = min_value - buffer - if axis.max is None: - axis.max = max_value + buffer - - -class LabelSeparator(object): - - """Adjust the label positions to avoid having them overlap. This happens for - any axis with minimum_label_spacing set. - """ - - def __init__(self, left=None, right=None, bottom=None): - self.left = left - self.right = right - self.bottom = bottom - - def __call__(self, chart): - self.AdjustLabels(chart.left, self.left) - self.AdjustLabels(chart.right, self.right) - self.AdjustLabels(chart.bottom, self.bottom) - - def AdjustLabels(self, axis, minimum_label_spacing): - if minimum_label_spacing is None: - return - if len(axis.labels) <= 1: # Nothing to adjust - return - if axis.max is not None and axis.min is not None: - # Find the spacing required to fit all labels evenly. - # Don't try to push them farther apart than that. - maximum_possible_spacing = (axis.max - axis.min) / (len(axis.labels) - 1) - if minimum_label_spacing > maximum_possible_spacing: - minimum_label_spacing = maximum_possible_spacing - - labels = [list(x) for x in zip(axis.label_positions, axis.labels)] - labels = sorted(labels, reverse=True) - - # First pass from the top, moving colliding labels downward - for i in range(1, len(labels)): - if labels[i - 1][0] - labels[i][0] < minimum_label_spacing: - new_position = labels[i - 1][0] - minimum_label_spacing - if axis.min is not None and new_position < axis.min: - new_position = axis.min - labels[i][0] = new_position - - # Second pass from the bottom, moving colliding labels upward - for i in range(len(labels) - 2, -1, -1): - if labels[i][0] - labels[i + 1][0] < minimum_label_spacing: - new_position = labels[i + 1][0] + minimum_label_spacing - if axis.max is not None and new_position > axis.max: - new_position = axis.max - labels[i][0] = new_position - - # Separate positions and labels - label_positions, labels = zip(*labels) - axis.labels = labels - axis.label_positions = label_positions - - -def InlineLegend(chart): - """Provide a legend for line charts by attaching labels to the right - end of each line. Supresses the regular legend. - """ - show = False - labels = [] - label_positions = [] - for series in chart.data: - if series.label is None: - labels.append('') - else: - labels.append(series.label) - show = True - label_positions.append(series.data[-1]) - - if show: - chart.right.min = chart.left.min - chart.right.max = chart.left.max - chart.right.labels = labels - chart.right.label_positions = label_positions - chart._show_legend = False # Supress the regular legend. diff --git a/mapreduce/lib/graphy/line_chart.py b/mapreduce/lib/graphy/line_chart.py deleted file mode 100755 index 37bf700..0000000 --- a/mapreduce/lib/graphy/line_chart.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Code related to line charts.""" - -import copy -import warnings - -from mapreduce.lib.graphy import common - - -class LineStyle(object): - - """Represents the style for a line on a line chart. Also provides some - convenient presets. - - Object attributes (Passed directly to the Google Chart API. Check there for - details): - width: Width of the line - on: Length of a line segment (for dashed/dotted lines) - off: Length of a break (for dashed/dotted lines) - color: Color of the line. A hex string, like 'ff0000' for red. Optional, - AutoColor will fill this in for you automatically if empty. - - Some common styles, such as LineStyle.dashed, are available: - solid - dashed - dotted - thick_solid - thick_dashed - thick_dotted - """ - - # Widths - THIN = 1 - THICK = 2 - - # Patterns - # ((on, off) tuples, as passed to LineChart.AddLine) - SOLID = (1, 0) - DASHED = (8, 4) - DOTTED = (2, 4) - - def __init__(self, width, on, off, color=None): - """Construct a LineStyle. See class docstring for details on args.""" - self.width = width - self.on = on - self.off = off - self.color = color - - -LineStyle.solid = LineStyle(1, 1, 0) -LineStyle.dashed = LineStyle(1, 8, 4) -LineStyle.dotted = LineStyle(1, 2, 4) -LineStyle.thick_solid = LineStyle(2, 1, 0) -LineStyle.thick_dashed = LineStyle(2, 8, 4) -LineStyle.thick_dotted = LineStyle(2, 2, 4) - - -class LineChart(common.BaseChart): - - """Represents a line chart.""" - - def __init__(self, points=None): - super(LineChart, self).__init__() - if points is not None: - self.AddLine(points) - - def AddLine(self, points, label=None, color=None, - pattern=LineStyle.SOLID, width=LineStyle.THIN, markers=None): - """Add a new line to the chart. - - This is a convenience method which constructs the DataSeries and appends it - for you. It returns the new series. - - points: List of equally-spaced y-values for the line - label: Name of the line (used for the legend) - color: Hex string, like 'ff0000' for red - pattern: Tuple for (length of segment, length of gap). i.e. - LineStyle.DASHED - width: Width of the line (i.e. LineStyle.THIN) - markers: List of Marker objects to attach to this line (see DataSeries - for more info) - """ - if color is not None and isinstance(color[0], common.Marker): - warnings.warn('Your code may be broken! ' - 'You passed a list of Markers instead of a color. The ' - 'old argument order (markers before color) is deprecated.', - DeprecationWarning, stacklevel=2) - style = LineStyle(width, pattern[0], pattern[1], color=color) - series = common.DataSeries(points, label=label, style=style, - markers=markers) - self.data.append(series) - return series - - def AddSeries(self, points, color=None, style=LineStyle.solid, markers=None, - label=None): - """DEPRECATED""" - warnings.warn('LineChart.AddSeries is deprecated. Call AddLine instead. ', - DeprecationWarning, stacklevel=2) - return self.AddLine(points, color=color, width=style.width, - pattern=(style.on, style.off), markers=markers, - label=label) - - -class Sparkline(LineChart): - """Represent a sparkline. These behave like LineCharts, - mostly, but come without axes. - """ diff --git a/mapreduce/lib/graphy/pie_chart.py b/mapreduce/lib/graphy/pie_chart.py deleted file mode 100755 index 5ec3418..0000000 --- a/mapreduce/lib/graphy/pie_chart.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2008 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Code for pie charts.""" - -import warnings - -from mapreduce.lib.graphy import common -from mapreduce.lib.graphy import util - - -class Segment(common.DataSeries): - """A single segment of the pie chart. - - Object attributes: - size: relative size of the segment - label: label of the segment (if any) - color: color of the segment (if any) - """ - def __init__(self, size, label=None, color=None): - if label is not None and util._IsColor(label): - warnings.warn('Your code may be broken! ' - 'Label looks like a hex triplet; it might be a color. ' - 'The old argument order (color before label) is ' - 'deprecated.', - DeprecationWarning, stacklevel=2) - style = common._BasicStyle(color) - super(Segment, self).__init__([size], label=label, style=style) - assert size >= 0 - - def _GetSize(self): - return self.data[0] - - def _SetSize(self, value): - assert value >= 0 - self.data[0] = value - - size = property(_GetSize, _SetSize, - doc = """The relative size of this pie segment.""") - - # Since Segments are so simple, provide color for convenience. - def _GetColor(self): - return self.style.color - - def _SetColor(self, color): - self.style.color = color - - color = property(_GetColor, _SetColor, - doc = """The color of this pie segment.""") - - -class PieChart(common.BaseChart): - """Represents a pie chart. - - The pie chart consists of a single "pie" by default, but additional pies - may be added using the AddPie method. The Google Chart API will display - the pies as concentric circles, with pie #0 on the inside; other backends - may display the pies differently. - """ - - def __init__(self, points=None, labels=None, colors=None): - """Constructor for PieChart objects. - - Creates a pie chart with a single pie. - - Args: - points: A list of data points for the pie chart; - i.e., relative sizes of the pie segments - labels: A list of labels for the pie segments. - TODO: Allow the user to pass in None as one of - the labels in order to skip that label. - colors: A list of colors for the pie segments, as hex strings - (f.ex. '0000ff' for blue). If there are less colors than pie - segments, the Google Chart API will attempt to produce a smooth - color transition between segments by spreading the colors across - them. - """ - super(PieChart, self).__init__() - self.formatters = [] - self._colors = None - if points: - self.AddPie(points, labels, colors) - - def AddPie(self, points, labels=None, colors=None): - """Add a whole pie to the chart. - - Args: - points: A list of pie segment sizes - labels: A list of labels for the pie segments - colors: A list of colors for the segments. Missing colors will be chosen - automatically. - Return: - The index of the newly added pie. - """ - num_colors = len(colors or []) - num_labels = len(labels or []) - pie_index = len(self.data) - self.data.append([]) - for i, pt in enumerate(points): - label = None - if i < num_labels: - label = labels[i] - color = None - if i < num_colors: - color = colors[i] - self.AddSegment(pt, label=label, color=color, pie_index=pie_index) - return pie_index - - def AddSegments(self, points, labels, colors): - """DEPRECATED.""" - warnings.warn('PieChart.AddSegments is deprecated. Call AddPie instead. ', - DeprecationWarning, stacklevel=2) - num_colors = len(colors or []) - for i, pt in enumerate(points): - assert pt >= 0 - label = labels[i] - color = None - if i < num_colors: - color = colors[i] - self.AddSegment(pt, label=label, color=color) - - def AddSegment(self, size, label=None, color=None, pie_index=0): - """Add a pie segment to this chart, and return the segment. - - size: The size of the segment. - label: The label for the segment. - color: The color of the segment, or None to automatically choose the color. - pie_index: The index of the pie that will receive the new segment. - By default, the chart has one pie (pie #0); use the AddPie method to - add more pies. - """ - if isinstance(size, Segment): - warnings.warn("AddSegment(segment) is deprecated. Use AddSegment(size, " - "label, color) instead", DeprecationWarning, stacklevel=2) - segment = size - else: - segment = Segment(size, label=label, color=color) - assert segment.size >= 0 - if pie_index == 0 and not self.data: - # Create the default pie - self.data.append([]) - assert (pie_index >= 0 and pie_index < len(self.data)) - self.data[pie_index].append(segment) - return segment - - def AddSeries(self, points, color=None, style=None, markers=None, label=None): - """DEPRECATED - - Add a new segment to the chart and return it. - - The segment must contain exactly one data point; all parameters - other than color and label are ignored. - """ - warnings.warn('PieChart.AddSeries is deprecated. Call AddSegment or ' - 'AddSegments instead.', DeprecationWarning) - return self.AddSegment(Segment(points[0], color=color, label=label)) - - def SetColors(self, *colors): - """Change the colors of this chart to the specified list of colors. - - Note that this will completely override the individual colors specified - in the pie segments. Missing colors will be interpolated, so that the - list of colors covers all segments in all the pies. - """ - self._colors = colors diff --git a/mapreduce/lib/graphy/util.py b/mapreduce/lib/graphy/util.py deleted file mode 100755 index ca4b7ad..0000000 --- a/mapreduce/lib/graphy/util.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python -def _IsColor(color): - """Try to determine if color is a hex color string. - Labels that look like hex colors will match too, unfortunately.""" - if not isinstance(color, basestring): - return False - color = color.strip('#') - if len(color) != 3 and len(color) != 6: - return False - hex_letters = '0123456789abcdefABCDEF' - for letter in color: - if letter not in hex_letters: - return False - return True diff --git a/mapreduce/lib/key_range/__init__.py b/mapreduce/lib/key_range/__init__.py deleted file mode 100755 index b62f9af..0000000 --- a/mapreduce/lib/key_range/__init__.py +++ /dev/null @@ -1,687 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2007 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - - - - - - -"""Key range representation and splitting.""" - - -import os - - -try: - from mapreduce.lib import simplejson -except ImportError: - simplejson = None - -from google.appengine.api import datastore -from google.appengine.api import namespace_manager -from google.appengine.datastore import datastore_pb -from google.appengine.ext import db - - -class Error(Exception): - """Base class for exceptions in this module.""" - - -class KeyRangeError(Error): - """Error while trying to generate a KeyRange.""" - - -class SimplejsonUnavailableError(Error): - """Error while using json functionality whith unavailable simplejson.""" - - -class KeyRange(object): - """Represents a range of keys in the datastore. - - A KeyRange object represents a key range - (key_start, include_start, key_end, include_end) - and a scan direction (KeyRange.DESC or KeyRange.ASC). - """ - - - DESC = "DESC" - ASC = "ASC" - - def __init__(self, - key_start=None, - key_end=None, - direction=None, - include_start=True, - include_end=True, - namespace=None, - _app=None): - """Initialize a KeyRange object. - - Args: - key_start: The starting key for this range. - key_end: The ending key for this range. - direction: The direction of the query for this range. - include_start: Whether the start key should be included in the range. - include_end: Whether the end key should be included in the range. - namespace: The namespace for this range. If None then the current - namespace is used. - """ - - - - - if direction is None: - direction = KeyRange.ASC - assert direction in (KeyRange.ASC, KeyRange.DESC) - self.direction = direction - self.key_start = key_start - self.key_end = key_end - self.include_start = include_start - self.include_end = include_end - if namespace is not None: - self.namespace = namespace - else: - self.namespace = namespace_manager.get_namespace() - self._app = _app - - def __str__(self): - if self.include_start: - left_side = "[" - else: - left_side = "(" - if self.include_end: - right_side = "]" - else: - right_side = "(" - return "%s%s%r to %r%s" % (self.direction, left_side, self.key_start, - self.key_end, right_side) - - def __repr__(self): - return ("key_range.KeyRange(key_start=%r,key_end=%r,direction=%r," - "include_start=%r,include_end=%r, namespace=%r)") % ( - self.key_start, - self.key_end, - self.direction, - self.include_start, - self.include_end, - self.namespace) - - def advance(self, key): - """Updates the start of the range immediately past the specified key. - - Args: - key: A db.Key. - """ - self.include_start = False - self.key_start = key - - def filter_query(self, query): - """Add query filter to restrict to this key range. - - Args: - query: A db.Query instance. - - Returns: - The input query restricted to this key range. - """ - assert isinstance(query, db.Query) - if self.include_start: - start_comparator = ">=" - else: - start_comparator = ">" - if self.include_end: - end_comparator = "<=" - else: - end_comparator = "<" - if self.key_start: - query.filter("__key__ %s" % start_comparator, self.key_start) - if self.key_end: - query.filter("__key__ %s" % end_comparator, self.key_end) - return query - - def filter_datastore_query(self, query): - """Add query filter to restrict to this key range. - - Args: - query: A datastore.Query instance. - - Returns: - The input query restricted to this key range. - """ - assert isinstance(query, datastore.Query) - if self.include_start: - start_comparator = ">=" - else: - start_comparator = ">" - if self.include_end: - end_comparator = "<=" - else: - end_comparator = "<" - if self.key_start: - query.update({"__key__ %s" % start_comparator: self.key_start}) - if self.key_end: - query.update({"__key__ %s" % end_comparator: self.key_end}) - return query - - def __get_direction(self, asc, desc): - """Check that self.direction is in (KeyRange.ASC, KeyRange.DESC). - - Args: - asc: Argument to return if self.direction is KeyRange.ASC - desc: Argument to return if self.direction is KeyRange.DESC - - Returns: - asc or desc appropriately - - Raises: - KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC). - """ - if self.direction == KeyRange.ASC: - return asc - elif self.direction == KeyRange.DESC: - return desc - else: - raise KeyRangeError("KeyRange direction unexpected: %s", self.direction) - - def make_directed_query(self, kind_class, keys_only=False): - """Construct a query for this key range, including the scan direction. - - Args: - kind_class: A kind implementation class. - keys_only: bool, default False, use keys_only on Query? - - Returns: - A db.Query instance. - - Raises: - KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC). - """ - assert self._app is None, '_app is not supported for db.Query' - direction = self.__get_direction("", "-") - query = db.Query(kind_class, namespace=self.namespace, keys_only=keys_only) - query.order("%s__key__" % direction) - - query = self.filter_query(query) - return query - - def make_directed_datastore_query(self, kind, keys_only=False): - """Construct a query for this key range, including the scan direction. - - Args: - kind: A string. - keys_only: bool, default False, use keys_only on Query? - - Returns: - A datastore.Query instance. - - Raises: - KeyRangeError: if self.direction is not in (KeyRange.ASC, KeyRange.DESC). - """ - direction = self.__get_direction(datastore.Query.ASCENDING, - datastore.Query.DESCENDING) - query = datastore.Query(kind, _app=self._app, keys_only=keys_only) - query.Order(("__key__", direction)) - - query = self.filter_datastore_query(query) - return query - - def make_ascending_query(self, kind_class, keys_only=False): - """Construct a query for this key range without setting the scan direction. - - Args: - kind_class: A kind implementation class. - keys_only: bool, default False, query only for keys. - - Returns: - A db.Query instance. - """ - assert self._app is None, '_app is not supported for db.Query' - query = db.Query(kind_class, namespace=self.namespace, keys_only=keys_only) - query.order("__key__") - - query = self.filter_query(query) - return query - - def make_ascending_datastore_query(self, kind, keys_only=False): - """Construct a query for this key range without setting the scan direction. - - Args: - kind: A string. - keys_only: bool, default False, use keys_only on Query? - - Returns: - A datastore.Query instance. - """ - query = datastore.Query(kind, - namespace=self.namespace, - _app=self._app, - keys_only=keys_only) - query.Order(("__key__", datastore.Query.ASCENDING)) - - query = self.filter_datastore_query(query) - return query - - def split_range(self, batch_size=0): - """Split this key range into a list of at most two ranges. - - This method attempts to split the key range approximately in half. - Numeric ranges are split in the middle into two equal ranges and - string ranges are split lexicographically in the middle. If the - key range is smaller than batch_size it is left unsplit. - - Note that splitting is done without knowledge of the distribution - of actual entities in the key range, so there is no guarantee (nor - any particular reason to believe) that the entities of the range - are evenly split. - - Args: - batch_size: The maximum size of a key range that should not be split. - - Returns: - A list of one or two key ranges covering the same space as this range. - """ - key_start = self.key_start - key_end = self.key_end - include_start = self.include_start - include_end = self.include_end - - key_pairs = [] - if not key_start: - key_pairs.append((key_start, include_start, key_end, include_end, - KeyRange.ASC)) - elif not key_end: - key_pairs.append((key_start, include_start, key_end, include_end, - KeyRange.DESC)) - else: - key_split = KeyRange.split_keys(key_start, key_end, batch_size) - first_include_end = True - - if key_split == key_start: - first_include_end = first_include_end and include_start - - key_pairs.append((key_start, include_start, - key_split, first_include_end, - KeyRange.DESC)) - - second_include_end = include_end - - if key_split == key_end: - second_include_end = False - key_pairs.append((key_split, False, - key_end, second_include_end, - KeyRange.ASC)) - - ranges = [KeyRange(key_start=start, - include_start=include_start, - key_end=end, - include_end=include_end, - direction=direction, - namespace=self.namespace, - _app=self._app) - for (start, include_start, end, include_end, direction) - in key_pairs] - - return ranges - - def __hash__(self): - return hash([self.key_start, - self.key_end, - self.direction, - self._app, - self.namespace]) - - def __cmp__(self, other): - """Compare two key ranges. - - Key ranges with a value of None for key_start or key_end, are always - considered to have include_start=False or include_end=False, respectively, - when comparing. Since None indicates an unbounded side of the range, - the include specifier is meaningless. The ordering generated is total - but somewhat arbitrary. - - Args: - other: An object to compare to this one. - - Returns: - -1: if this key range is less than other. - 0: if this key range is equal to other. - 1: if this key range is greater than other. - """ - if not isinstance(other, KeyRange): - return 1 - - self_list = [self.key_start, self.key_end, self.direction, - self.include_start, self.include_end, self._app, - self.namespace] - if not self.key_start: - self_list[3] = False - if not self.key_end: - self_list[4] = False - - other_list = [other.key_start, - other.key_end, - other.direction, - other.include_start, - other.include_end, - other._app, - other.namespace] - if not other.key_start: - other_list[3] = False - if not other.key_end: - other_list[4] = False - - return cmp(self_list, other_list) - - @staticmethod - def bisect_string_range(start, end): - """Returns a string that is approximately in the middle of the range. - - (start, end) is treated as a string range, and it is assumed - start <= end in the usual lexicographic string ordering. The output key - mid is guaranteed to satisfy start <= mid <= end. - - The method proceeds by comparing initial characters of start and - end. When the characters are equal, they are appended to the mid - string. In the first place that the characters differ, the - difference characters are averaged and this average is appended to - the mid string. If averaging resulted in rounding down, and - additional character is added to the mid string to make up for the - rounding down. This extra step is necessary for correctness in - the case that the average of the two characters is equal to the - character in the start string. - - This method makes the assumption that most keys are ascii and it - attempts to perform splitting within the ascii range when that - results in a valid split. - - Args: - start: A string. - end: A string such that start <= end. - - Returns: - A string mid such that start <= mid <= end. - """ - if start == end: - return start - start += "\0" - end += "\0" - midpoint = [] - - - expected_max = 127 - for i in xrange(min(len(start), len(end))): - if start[i] == end[i]: - midpoint.append(start[i]) - else: - ord_sum = ord(start[i]) + ord(end[i]) - midpoint.append(unichr(ord_sum / 2)) - if ord_sum % 2: - if len(start) > i + 1: - ord_start = ord(start[i+1]) - else: - ord_start = 0 - if ord_start < expected_max: - - - ord_split = (expected_max + ord_start) / 2 - else: - - ord_split = (0xFFFF + ord_start) / 2 - midpoint.append(unichr(ord_split)) - break - return "".join(midpoint) - - @staticmethod - def split_keys(key_start, key_end, batch_size): - """Return a key that is between key_start and key_end inclusive. - - This method compares components of the ancestor paths of key_start - and key_end. The first place in the path that differs is - approximately split in half. If the kind components differ, a new - non-existent kind halfway between the two is used to split the - space. If the id_or_name components differ, then a new id_or_name - that is halfway between the two is selected. If the lower - id_or_name is numeric and the upper id_or_name is a string, then - the minumum string key u'\0' is used as the split id_or_name. The - key that is returned is the shared portion of the ancestor path - followed by the generated split component. - - Args: - key_start: A db.Key instance for the lower end of a range. - key_end: A db.Key instance for the upper end of a range. - batch_size: The maximum size of a range that should not be split. - - Returns: - A db.Key instance, k, such that key_start <= k <= key_end. - """ - assert key_start.app() == key_end.app() - assert key_start.namespace() == key_end.namespace() - path1 = key_start.to_path() - path2 = key_end.to_path() - len1 = len(path1) - len2 = len(path2) - assert len1 % 2 == 0 - assert len2 % 2 == 0 - out_path = [] - min_path_len = min(len1, len2) / 2 - for i in xrange(min_path_len): - kind1 = path1[2*i] - kind2 = path2[2*i] - - if kind1 != kind2: - split_kind = KeyRange.bisect_string_range(kind1, kind2) - out_path.append(split_kind) - out_path.append(unichr(0)) - break - - - - - last = (len1 == len2 == 2*(i + 1)) - - id_or_name1 = path1[2*i + 1] - id_or_name2 = path2[2*i + 1] - id_or_name_split = KeyRange._split_id_or_name( - id_or_name1, id_or_name2, batch_size, last) - if id_or_name1 == id_or_name_split: - out_path.append(kind1) - out_path.append(id_or_name1) - else: - out_path.append(kind1) - out_path.append(id_or_name_split) - break - - return db.Key.from_path( - *out_path, - **{"_app": key_start.app(), "namespace": key_start.namespace()}) - - @staticmethod - def _split_id_or_name(id_or_name1, id_or_name2, batch_size, maintain_batches): - """Return an id_or_name that is between id_or_name1 an id_or_name2. - - Attempts to split the range [id_or_name1, id_or_name2] in half, - unless maintain_batches is true and the size of the range - [id_or_name1, id_or_name2] is less than or equal to batch_size. - - Args: - id_or_name1: A number or string or the id_or_name component of a key - id_or_name2: A number or string or the id_or_name component of a key - batch_size: The range size that will not be split if maintain_batches - is true. - maintain_batches: A boolean for whether to keep small ranges intact. - - Returns: - An id_or_name such that id_or_name1 <= id_or_name <= id_or_name2. - """ - if (isinstance(id_or_name1, (int, long)) and - isinstance(id_or_name2, (int, long))): - if not maintain_batches or id_or_name2 - id_or_name1 > batch_size: - return (id_or_name1 + id_or_name2) / 2 - else: - return id_or_name1 - elif (isinstance(id_or_name1, basestring) and - isinstance(id_or_name2, basestring)): - return KeyRange.bisect_string_range(id_or_name1, id_or_name2) - else: - if (not isinstance(id_or_name1, (int, long)) or - not isinstance(id_or_name2, basestring)): - raise KeyRangeError("Wrong key order: %r, %r" % - (id_or_name1, id_or_name2)) - - zero_ch = unichr(0) - if id_or_name2 == zero_ch: - return (id_or_name1 + 2**63 - 1) / 2 - return zero_ch - - @staticmethod - def guess_end_key(kind, - key_start, - probe_count=30, - split_rate=5): - """Guess the end of a key range with a binary search of probe queries. - - When the 'key_start' parameter has a key hierarchy, this function will - only determine the key range for keys in a similar hierarchy. That means - if the keys are in the form: - - kind=Foo, name=bar/kind=Stuff, name=meep - - only this range will be probed: - - kind=Foo, name=*/kind=Stuff, name=* - - That means other entities of kind 'Stuff' that are children of another - parent entity kind will be skipped: - - kind=Other, name=cookie/kind=Stuff, name=meep - - Args: - key_start: The starting key of the search range. In most cases this - should be id = 0 or name = '\0'. - kind: String name of the entity kind. - probe_count: Optional, how many probe queries to run. - split_rate: Exponential rate to use for splitting the range on the - way down from the full key space. For smaller ranges this should - be higher so more of the keyspace is skipped on initial descent. - - Returns: - datastore.Key that is guaranteed to be as high or higher than the - highest key existing for this Kind. Doing a query between 'key_start' and - this returned Key (inclusive) will contain all entities of this Kind. - """ - app = key_start.app() - namespace = key_start.namespace() - - full_path = key_start.to_path() - for index, piece in enumerate(full_path): - if index % 2 == 0: - - continue - elif isinstance(piece, basestring): - - full_path[index] = u"\xffff" - else: - - full_path[index] = 2**63 - 1 - - key_end = datastore.Key.from_path(*full_path, - **{"_app": app, "namespace": namespace}) - split_key = key_end - - for i in xrange(probe_count): - for j in xrange(split_rate): - split_key = KeyRange.split_keys(key_start, split_key, 1) - results = datastore.Query( - kind, - {"__key__ >": split_key}, - namespace=namespace, - _app=app, - keys_only=True).Get(1) - if results: - if results[0].name() and not key_start.name(): - - - return KeyRange.guess_end_key( - kind, results[0], probe_count - 1, split_rate) - else: - split_rate = 1 - key_start = results[0] - split_key = key_end - else: - key_end = split_key - - return key_end - - def to_json(self): - """Serialize KeyRange to json. - - Returns: - string with KeyRange json representation. - """ - if simplejson is None: - raise SimplejsonUnavailableError( - "JSON functionality requires simplejson to be available") - - def key_to_str(key): - if key: - return str(key) - else: - return None - - obj_dict = { - "direction": self.direction, - "key_start": key_to_str(self.key_start), - "key_end": key_to_str(self.key_end), - "include_start": self.include_start, - "include_end": self.include_end, - "namespace": self.namespace, - } - if self._app: - obj_dict["_app"] = self._app - - return simplejson.dumps(obj_dict, sort_keys=True) - - - @staticmethod - def from_json(json_str): - """Deserialize KeyRange from its json representation. - - Args: - json_str: string with json representation created by key_range_to_json. - - Returns: - deserialized KeyRange instance. - """ - if simplejson is None: - raise SimplejsonUnavailableError( - "JSON functionality requires simplejson to be available") - - def key_from_str(key_str): - if key_str: - return db.Key(key_str) - else: - return None - - json = simplejson.loads(json_str) - return KeyRange(key_from_str(json["key_start"]), - key_from_str(json["key_end"]), - json["direction"], - json["include_start"], - json["include_end"], - json.get("namespace"), - _app=json.get("_app")) diff --git a/mapreduce/lib/simplejson/README b/mapreduce/lib/simplejson/README deleted file mode 100755 index 6284258..0000000 --- a/mapreduce/lib/simplejson/README +++ /dev/null @@ -1,13 +0,0 @@ -Simplejson library - -The web site is http://undefined.org/python/#simple_json - -This copy was downloaded from -http://pypi.python.org/packages/source/s/simplejson/simplejson-2.0.5.tar.gz - -simplejson is licensed under the MIT open source license. - -Local changes: - -- Changed imports to make mapreduce library hermetic. - diff --git a/mapreduce/lib/simplejson/__init__.py b/mapreduce/lib/simplejson/__init__.py deleted file mode 100755 index 83eec57..0000000 --- a/mapreduce/lib/simplejson/__init__.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python -r"""A simple, fast, extensible JSON encoder and decoder - -JSON (JavaScript Object Notation) is a subset of -JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data -interchange format. - -simplejson exposes an API familiar to uses of the standard library -marshal and pickle modules. - -Encoding basic Python object hierarchies:: - - >>> import simplejson - >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) - '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print simplejson.dumps("\"foo\bar") - "\"foo\bar" - >>> print simplejson.dumps(u'\u1234') - "\u1234" - >>> print simplejson.dumps('\\') - "\\" - >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) - {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO - >>> io = StringIO() - >>> simplejson.dump(['streaming API'], io) - >>> io.getvalue() - '["streaming API"]' - -Compact encoding:: - - >>> import simplejson - >>> compact = simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) - >>> # Can't assume dict ordering - >>> compact in ('[1,2,3,{"4":5,"6":7}]', '[1,2,3,{"6":7,"4":5}]') - True - -Pretty printing (using repr() because of extraneous whitespace in the output):: - - >>> import simplejson - >>> print repr(simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) - '{\n "4": 5, \n "6": 7\n}' - -Decoding JSON:: - - >>> import simplejson - >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == ["foo", {"bar":["baz", None, 1.0, 2]}] - True - >>> simplejson.loads('"\\"foo\\bar"') == '"foo\x08ar' - True - >>> from StringIO import StringIO - >>> io = StringIO('["streaming API"]') - >>> simplejson.load(io) == ["streaming API"] - True - -Specializing JSON object decoding:: - - >>> import simplejson - >>> def as_complex(dct): - ... if '__complex__' in dct: - ... return complex(dct['real'], dct['imag']) - ... return dct - ... - >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', - ... object_hook=as_complex) - (1+2j) - >>> from decimal import Decimal - >>> simplejson.loads('1.1', parse_float=Decimal) == Decimal("1.1") - True - -Extending JSONEncoder:: - - >>> import simplejson - >>> class ComplexEncoder(simplejson.JSONEncoder): - ... def default(self, obj): - ... if isinstance(obj, complex): - ... return [obj.real, obj.imag] - ... return simplejson.JSONEncoder.default(self, obj) - ... - >>> dumps(2 + 1j, cls=ComplexEncoder) - '[2.0, 1.0]' - >>> ComplexEncoder().encode(2 + 1j) - '[2.0, 1.0]' - >>> ''.join(ComplexEncoder().iterencode(2 + 1j)) - '[2.0, 1.0]' - - -Using simplejson from the shell to validate and -pretty-print:: - - $ echo '{"json":"obj"}' | python -msimplejson.tool - { - "json": "obj" - } - $ echo '{ 1.2:3.4}' | python -msimplejson.tool - Expecting property name: line 1 column 2 (char 2) -""" -__version__ = '2.0.5' -__all__ = [ - 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', -] - -from decoder import JSONDecoder -from encoder import JSONEncoder - -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, -) - -def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): - """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). - - If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. - - If ``check_circular`` is ``False``, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - - """ - # cached encoder - if (skipkeys is False and ensure_ascii is True and - check_circular is True and allow_nan is True and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): - iterable = _default_encoder.iterencode(obj) - else: - if cls is None: - cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) - - -def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): - """Serialize ``obj`` to a JSON formatted ``str``. - - If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is ``False``, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. - - If ``check_circular`` is ``False``, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - - """ - # cached encoder - if (skipkeys is False and ensure_ascii is True and - check_circular is True and allow_nan is True and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): - return _default_encoder.encode(obj) - if cls is None: - cls = JSONEncoder - return cls( - skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, default=default, - **kw).encode(obj) - - -_default_decoder = JSONDecoder(encoding=None, object_hook=None) - - -def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): - """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - - """ - return loads(fp.read(), - encoding=encoding, cls=cls, object_hook=object_hook, - parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) - - -def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): - """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. - - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - - """ - if (cls is None and encoding is None and object_hook is None and - parse_int is None and parse_float is None and - parse_constant is None and not kw): - return _default_decoder.decode(s) - if cls is None: - cls = JSONDecoder - if object_hook is not None: - kw['object_hook'] = object_hook - if parse_float is not None: - kw['parse_float'] = parse_float - if parse_int is not None: - kw['parse_int'] = parse_int - if parse_constant is not None: - kw['parse_constant'] = parse_constant - return cls(encoding=encoding, **kw).decode(s) diff --git a/mapreduce/lib/simplejson/decoder.py b/mapreduce/lib/simplejson/decoder.py deleted file mode 100755 index 6926ec8..0000000 --- a/mapreduce/lib/simplejson/decoder.py +++ /dev/null @@ -1,334 +0,0 @@ -#!/usr/bin/env python -"""Implementation of JSONDecoder -""" -import re -import sys -import struct - -from mapreduce.lib.simplejson.scanner import make_scanner -try: - from mapreduce.lib.simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None - -__all__ = ['JSONDecoder'] - -FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL - -def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') - if sys.byteorder != 'big': - _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] - nan, inf = struct.unpack('dd', _BYTES) - return nan, inf, -inf - -NaN, PosInf, NegInf = _floatconstants() - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( - msg, lineno, colno, endlineno, endcolno, pos, end) - - -_CONSTANTS = { - '-Infinity': NegInf, - 'Infinity': PosInf, - 'NaN': NaN, -} - -STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) -BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', -} - -DEFAULT_ENCODING = "utf-8" - -def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): - if encoding is None: - encoding = DEFAULT_ENCODING - chunks = [] - _append = chunks.append - begin = end - 1 - while 1: - chunk = _m(s, end) - if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) - end = chunk.end() - content, terminator = chunk.groups() - if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) - _append(content) - if terminator == '"': - break - elif terminator != '\\': - if strict: - raise ValueError(errmsg("Invalid control character %r at", s, end)) - else: - _append(terminator) - continue - try: - esc = s[end] - except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) - if esc != 'u': - try: - m = _b[esc] - except KeyError: - raise ValueError( - errmsg("Invalid \\escape: %r" % (esc,), s, end)) - end += 1 - else: - esc = s[end + 1:end + 5] - next_end = end + 5 - msg = "Invalid \\uXXXX escape" - try: - if len(esc) != 4: - raise ValueError - uni = int(esc, 16) - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise ValueError - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise ValueError - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 - m = unichr(uni) - except ValueError: - raise ValueError(errmsg(msg, s, end)) - end = next_end - _append(m) - return u''.join(chunks), end - - -# Use speedup if available -scanstring = c_scanstring or py_scanstring - -WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) -WHITESPACE_STR = ' \t\n\r' - -def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} - nextchar = s[end:end + 1] - # Normally we expect nextchar == '"' - if nextchar != '"': - if nextchar in _ws: - end = _w(s, end).end() - nextchar = s[end:end + 1] - # Trivial empty object - if nextchar == '}': - return pairs, end + 1 - elif nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) - end += 1 - while True: - key, end = scanstring(s, end, encoding, strict) - - # To skip some function call overhead we optimize the fast paths where - # the JSON key separator is ": " or just ":". - if s[end:end + 1] != ':': - end = _w(s, end).end() - if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) - - end += 1 - - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value - - try: - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - end += 1 - - if nextchar == '}': - break - elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) - - try: - nextchar = s[end] - if nextchar in _ws: - end += 1 - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - - end += 1 - if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end - 1)) - - if object_hook is not None: - pairs = object_hook(pairs) - return pairs, end - -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - values = [] - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - # Look-ahead for trivial empty array - if nextchar == ']': - return values, end + 1 - _append = values.append - while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - _append(value) - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - end += 1 - if nextchar == ']': - break - elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end)) - - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - return values, end - -class JSONDecoder(object): - """Simple JSON decoder - - Performs the following translations in decoding by default: - - +---------------+-------------------+ - | JSON | Python | - +===============+===================+ - | object | dict | - +---------------+-------------------+ - | array | list | - +---------------+-------------------+ - | string | unicode | - +---------------+-------------------+ - | number (int) | int, long | - +---------------+-------------------+ - | number (real) | float | - +---------------+-------------------+ - | true | True | - +---------------+-------------------+ - | false | False | - +---------------+-------------------+ - | null | None | - +---------------+-------------------+ - - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as - their corresponding ``float`` values, which is outside the JSON spec. - - """ - - def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. - - Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. - - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom - deserializations (e.g. to support JSON-RPC class hinting). - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. - - """ - self.encoding = encoding - self.object_hook = object_hook - self.parse_float = parse_float or float - self.parse_int = parse_int or int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ - self.strict = strict - self.parse_object = JSONObject - self.parse_array = JSONArray - self.parse_string = scanstring - self.scan_once = make_scanner(self) - - def decode(self, s, _w=WHITESPACE.match): - """Return the Python representation of ``s`` (a ``str`` or ``unicode`` - instance containing a JSON document) - - """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) - end = _w(s, end).end() - if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) - return obj - - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning - with a JSON document) and return a 2-tuple of the Python - representation and the index in ``s`` where the document ended. - - This can be used to decode a JSON document from a string that may - have extraneous data at the end. - - """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") - return obj, end diff --git a/mapreduce/lib/simplejson/encoder.py b/mapreduce/lib/simplejson/encoder.py deleted file mode 100755 index cfec6e6..0000000 --- a/mapreduce/lib/simplejson/encoder.py +++ /dev/null @@ -1,434 +0,0 @@ -#!/usr/bin/env python -"""Implementation of JSONEncoder -""" -import re - -try: - from mapreduce.lib.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from mapreduce.lib.simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None - -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -HAS_UTF8 = re.compile(r'[\x80-\xff]') -ESCAPE_DCT = { - '\\': '\\\\', - '"': '\\"', - '\b': '\\b', - '\f': '\\f', - '\n': '\\n', - '\r': '\\r', - '\t': '\\t', -} -for i in range(0x20): - ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) - -# Assume this produces an infinity on all machines (probably not guaranteed) -INFINITY = float('1e66666') -FLOAT_REPR = repr - -def encode_basestring(s): - """Return a JSON representation of a Python string - - """ - def replace(match): - return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' - - -def py_encode_basestring_ascii(s): - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - def replace(match): - s = match.group(0) - try: - return ESCAPE_DCT[s] - except KeyError: - n = ord(s) - if n < 0x10000: - return '\\u%04x' % (n,) - else: - # surrogate pair - n -= 0x10000 - s1 = 0xd800 | ((n >> 10) & 0x3ff) - s2 = 0xdc00 | (n & 0x3ff) - return '\\u%04x\\u%04x' % (s1, s2) - return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - - -encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii - -class JSONEncoder(object): - """Extensible JSON encoder for Python data structures. - - Supports the following objects and types by default: - - +-------------------+---------------+ - | Python | JSON | - +===================+===============+ - | dict | object | - +-------------------+---------------+ - | list, tuple | array | - +-------------------+---------------+ - | str, unicode | string | - +-------------------+---------------+ - | int, long, float | number | - +-------------------+---------------+ - | True | true | - +-------------------+---------------+ - | False | false | - +-------------------+---------------+ - | None | null | - +-------------------+---------------+ - - To extend this to recognize other objects, subclass and implement a - ``.default()`` method with another method that returns a serializable - object for ``o`` if possible, otherwise it should call the superclass - implementation (to raise ``TypeError``). - - """ - item_separator = ', ' - key_separator = ': ' - def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): - """Constructor for JSONEncoder, with sensible defaults. - - If skipkeys is False, then it is a TypeError to attempt - encoding of keys that are not str, int, long, float or None. If - skipkeys is True, such items are simply skipped. - - If ensure_ascii is True, the output is guaranteed to be str - objects with all incoming unicode characters escaped. If - ensure_ascii is false, the output will be unicode object. - - If check_circular is True, then lists, dicts, and custom encoded - objects will be checked for circular references during encoding to - prevent an infinite recursion (which would cause an OverflowError). - Otherwise, no such check takes place. - - If allow_nan is True, then NaN, Infinity, and -Infinity will be - encoded as such. This behavior is not JSON specification compliant, - but is consistent with most JavaScript based encoders and decoders. - Otherwise, it will be a ValueError to encode such floats. - - If sort_keys is True, then the output of dictionaries will be - sorted by key; this is useful for regression tests to ensure - that JSON serializations can be compared on a day-to-day basis. - - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. - - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. - - If specified, default is a function that gets called for objects - that can't otherwise be serialized. It should return a JSON encodable - version of the object or raise a ``TypeError``. - - If encoding is not None, then all input strings will be - transformed into unicode using that encoding prior to JSON-encoding. - The default is UTF-8. - - """ - - self.skipkeys = skipkeys - self.ensure_ascii = ensure_ascii - self.check_circular = check_circular - self.allow_nan = allow_nan - self.sort_keys = sort_keys - self.indent = indent - if separators is not None: - self.item_separator, self.key_separator = separators - if default is not None: - self.default = default - self.encoding = encoding - - def default(self, o): - """Implement this method in a subclass such that it returns - a serializable object for ``o``, or calls the base implementation - (to raise a ``TypeError``). - - For example, to support arbitrary iterators, you could - implement default like this:: - - def default(self, o): - try: - iterable = iter(o) - except TypeError: - pass - else: - return list(iterable) - return JSONEncoder.default(self, o) - - """ - raise TypeError("%r is not JSON serializable" % (o,)) - - def encode(self, o): - """Return a JSON string representation of a Python data structure. - - >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo": ["bar", "baz"]}' - - """ - # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - if self.ensure_ascii: - return encode_basestring_ascii(o) - else: - return encode_basestring(o) - # This doesn't pass the iterator directly to ''.join() because the - # exceptions aren't as detailed. The list call should be roughly - # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) - if not isinstance(chunks, (list, tuple)): - chunks = list(chunks) - return ''.join(chunks) - - def iterencode(self, o, _one_shot=False): - """Encode the given object and yield each string - representation as available. - - For example:: - - for chunk in JSONEncoder().iterencode(bigobject): - mysocket.write(chunk) - - """ - if self.check_circular: - markers = {} - else: - markers = None - if self.ensure_ascii: - _encoder = encode_basestring_ascii - else: - _encoder = encode_basestring - if self.encoding != 'utf-8': - def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): - o = o.decode(_encoding) - return _orig_encoder(o) - - def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. - - if o != o: - text = 'NaN' - elif o == _inf: - text = 'Infinity' - elif o == _neginf: - text = '-Infinity' - else: - return _repr(o) - - if not allow_nan: - raise ValueError("Out of range float values are not JSON compliant: %r" - % (o,)) - - return text - - - if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: - _iterencode = c_make_encoder( - markers, self.default, _encoder, self.indent, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) - else: - _iterencode = _make_iterencode( - markers, self.default, _encoder, self.indent, floatstr, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) - -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, - ## HACK: hand-optimized bytecode; turn globals into locals - False=False, - True=True, - ValueError=ValueError, - basestring=basestring, - dict=dict, - float=float, - id=id, - int=int, - isinstance=isinstance, - list=list, - long=long, - str=str, - tuple=tuple, - ): - - def _iterencode_list(lst, _current_indent_level): - if not lst: - yield '[]' - return - if markers is not None: - markerid = id(lst) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = lst - buf = '[' - if _indent is not None: - _current_indent_level += 1 - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - else: - newline_indent = None - separator = _item_separator - first = True - for value in lst: - if first: - first = False - else: - buf = separator - if isinstance(value, basestring): - yield buf + _encoder(value) - elif value is None: - yield buf + 'null' - elif value is True: - yield buf + 'true' - elif value is False: - yield buf + 'false' - elif isinstance(value, (int, long)): - yield buf + str(value) - elif isinstance(value, float): - yield buf + _floatstr(value) - else: - yield buf - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) - else: - chunks = _iterencode(value, _current_indent_level) - for chunk in chunks: - yield chunk - if newline_indent is not None: - _current_indent_level -= 1 - yield '\n' + (' ' * (_indent * _current_indent_level)) - yield ']' - if markers is not None: - del markers[markerid] - - def _iterencode_dict(dct, _current_indent_level): - if not dct: - yield '{}' - return - if markers is not None: - markerid = id(dct) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = dct - yield '{' - if _indent is not None: - _current_indent_level += 1 - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - item_separator = _item_separator + newline_indent - yield newline_indent - else: - newline_indent = None - item_separator = _item_separator - first = True - if _sort_keys: - items = dct.items() - items.sort(key=lambda kv: kv[0]) - else: - items = dct.iteritems() - for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif isinstance(key, (int, long)): - key = str(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif _skipkeys: - continue - else: - raise TypeError("key %r is not a string" % (key,)) - if first: - first = False - else: - yield item_separator - yield _encoder(key) - yield _key_separator - if isinstance(value, basestring): - yield _encoder(value) - elif value is None: - yield 'null' - elif value is True: - yield 'true' - elif value is False: - yield 'false' - elif isinstance(value, (int, long)): - yield str(value) - elif isinstance(value, float): - yield _floatstr(value) - else: - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) - else: - chunks = _iterencode(value, _current_indent_level) - for chunk in chunks: - yield chunk - if newline_indent is not None: - _current_indent_level -= 1 - yield '\n' + (' ' * (_indent * _current_indent_level)) - yield '}' - if markers is not None: - del markers[markerid] - - def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): - yield _encoder(o) - elif o is None: - yield 'null' - elif o is True: - yield 'true' - elif o is False: - yield 'false' - elif isinstance(o, (int, long)): - yield str(o) - elif isinstance(o, float): - yield _floatstr(o) - elif isinstance(o, (list, tuple)): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif isinstance(o, dict): - for chunk in _iterencode_dict(o, _current_indent_level): - yield chunk - else: - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - o = _default(o) - for chunk in _iterencode(o, _current_indent_level): - yield chunk - if markers is not None: - del markers[markerid] - - return _iterencode diff --git a/mapreduce/lib/simplejson/scanner.py b/mapreduce/lib/simplejson/scanner.py deleted file mode 100755 index 201cbc5..0000000 --- a/mapreduce/lib/simplejson/scanner.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -"""JSON token scanner -""" -import re -try: - from mapreduce.lib.simplejson._speedups import make_scanner as c_make_scanner -except ImportError: - c_make_scanner = None - -__all__ = ['make_scanner'] - -NUMBER_RE = re.compile( - r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', - (re.VERBOSE | re.MULTILINE | re.DOTALL)) - -def py_make_scanner(context): - parse_object = context.parse_object - parse_array = context.parse_array - parse_string = context.parse_string - match_number = NUMBER_RE.match - encoding = context.encoding - strict = context.strict - parse_float = context.parse_float - parse_int = context.parse_int - parse_constant = context.parse_constant - object_hook = context.object_hook - - def _scan_once(string, idx): - try: - nextchar = string[idx] - except IndexError: - raise StopIteration - - if nextchar == '"': - return parse_string(string, idx + 1, encoding, strict) - elif nextchar == '{': - return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) - elif nextchar == '[': - return parse_array((string, idx + 1), _scan_once) - elif nextchar == 'n' and string[idx:idx + 4] == 'null': - return None, idx + 4 - elif nextchar == 't' and string[idx:idx + 4] == 'true': - return True, idx + 4 - elif nextchar == 'f' and string[idx:idx + 5] == 'false': - return False, idx + 5 - - m = match_number(string, idx) - if m is not None: - integer, frac, exp = m.groups() - if frac or exp: - res = parse_float(integer + (frac or '') + (exp or '')) - else: - res = parse_int(integer) - return res, m.end() - elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': - return parse_constant('NaN'), idx + 3 - elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': - return parse_constant('Infinity'), idx + 8 - elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': - return parse_constant('-Infinity'), idx + 9 - else: - raise StopIteration - - return _scan_once - -make_scanner = c_make_scanner or py_make_scanner diff --git a/mapreduce/main.py b/mapreduce/main.py deleted file mode 100755 index c8b0196..0000000 --- a/mapreduce/main.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Main module for map-reduce implementation. - -This module should be specified as a handler for mapreduce URLs in app.yaml: - - handlers: - - url: /mapreduce(/.*)? - login: admin - script: mapreduce/main.py -""" - -import wsgiref.handlers - -from google.appengine.ext import webapp -from mapreduce import handlers -from mapreduce import status -from google.appengine.ext.webapp import util - - -STATIC_RE = r".*/([^/]*\.(?:css|js)|status|detail)$" - - -class RedirectHandler(webapp.RequestHandler): - """Redirects the user back to the status page.""" - - def get(self): - new_path = self.request.path - if not new_path.endswith("/"): - new_path += "/" - new_path += "status" - self.redirect(new_path) - - -def create_handlers_map(): - """Create new handlers map. - - Returns: - list of (regexp, handler) pairs for WSGIApplication constructor. - """ - return [ - # Task queue handlers. - (r".*/worker_callback", handlers.MapperWorkerCallbackHandler), - (r".*/controller_callback", handlers.ControllerCallbackHandler), - (r".*/kickoffjob_callback", handlers.KickOffJobHandler), - - # RPC requests with JSON responses - # All JSON handlers should have /command/ prefix. - (r".*/command/start_job", handlers.StartJobHandler), - (r".*/command/cleanup_job", handlers.CleanUpJobHandler), - (r".*/command/abort_job", handlers.AbortJobHandler), - (r".*/command/list_configs", status.ListConfigsHandler), - (r".*/command/list_jobs", status.ListJobsHandler), - (r".*/command/get_job_detail", status.GetJobDetailHandler), - - # UI static files - (STATIC_RE, status.ResourceHandler), - - # Redirect non-file URLs that do not end in status/detail to status page. - (r".*", RedirectHandler), - ] - -def create_application(): - """Create new WSGIApplication and register all handlers. - - Returns: - an instance of webapp.WSGIApplication with all mapreduce handlers - registered. - """ - return webapp.WSGIApplication(create_handlers_map(), - debug=True) - - -APP = create_application() - - -def main(): - util.run_wsgi_app(APP) - - -if __name__ == "__main__": - main() diff --git a/mapreduce/migrate.py b/mapreduce/migrate.py deleted file mode 100755 index 24312fe..0000000 --- a/mapreduce/migrate.py +++ /dev/null @@ -1,8 +0,0 @@ -from mapreduce import operation as op -import logging -import appengine_config -from events.models import Event - -def process(entity): - yield op.db.Put(entity) - return diff --git a/mapreduce/model.py b/mapreduce/model.py deleted file mode 100755 index 3e30fa3..0000000 --- a/mapreduce/model.py +++ /dev/null @@ -1,768 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model classes which are used to communicate between parts of implementation. - -These model classes are describing mapreduce, its current state and -communication messages. They are either stored in the datastore or -serialized to/from json and passed around with other means. -""" - -# Disable "Invalid method name" -# pylint: disable-msg=C6409 - - - -__all__ = ["JsonMixin", "JsonProperty", "MapreduceState", "MapperSpec", - "MapreduceControl", "MapreduceSpec", "ShardState", "CountersMap"] - -import copy -import datetime -import logging -import math -import random -from mapreduce.lib import simplejson -import time -import types - -from google.appengine.api import datastore_errors -from google.appengine.api import datastore_types -from google.appengine.ext import db -from mapreduce import context -from mapreduce import hooks -from mapreduce import util -from mapreduce.lib.graphy.backends import google_chart_api - - -# Default rate of processed entities per second. -_DEFAULT_PROCESSING_RATE_PER_SEC = 100 - -# Default number of shards to have. -_DEFAULT_SHARD_COUNT = 8 - - -class JsonMixin(object): - """Simple, stateless json utilities mixin. - - Requires class to implement two methods: - to_json(self): convert data to json-compatible datastructure (dict, - list, strings, numbers) - @classmethod from_json(cls, json): load data from json-compatible structure. - """ - - def to_json_str(self): - """Convert data to json string representation. - - Returns: - json representation as string. - """ - return simplejson.dumps(self.to_json(), sort_keys=True) - - @classmethod - def from_json_str(cls, json_str): - """Convert json string representation into class instance. - - Args: - json_str: json representation as string. - - Returns: - New instance of the class with data loaded from json string. - """ - return cls.from_json(simplejson.loads(json_str)) - - -class JsonProperty(db.UnindexedProperty): - """Property type for storing json representation of data. - - Requires data types to implement two methods: - to_json(self): convert data to json-compatible datastructure (dict, - list, strings, numbers) - @classmethod from_json(cls, json): load data from json-compatible structure. - """ - - def __init__(self, data_type, default=None, **kwargs): - """Constructor. - - Args: - data_type: underlying data type as class. - default: default value for the property. The value is deep copied - fore each model instance. - kwargs: remaining arguments. - """ - kwargs["default"] = default - super(JsonProperty, self).__init__(**kwargs) - self.data_type = data_type - - def get_value_for_datastore(self, model_instance): - """Gets value for datastore. - - Args: - model_instance: instance of the model class. - - Returns: - datastore-compatible value. - """ - value = super(JsonProperty, self).get_value_for_datastore(model_instance) - if not value: - return None - json_value = value.to_json() - if not json_value: - return None - return datastore_types.Text(simplejson.dumps( - json_value, sort_keys=True)) - - def make_value_from_datastore(self, value): - """Convert value from datastore representation. - - Args: - value: datastore value. - - Returns: - value to store in the model. - """ - - if value is None: - return None - return self.data_type.from_json(simplejson.loads(value)) - - def validate(self, value): - """Validate value. - - Args: - value: model value. - - Returns: - Whether the specified value is valid data type value. - - Raises: - BadValueError: when value is not of self.data_type type. - """ - if value is not None and not isinstance(value, self.data_type): - raise datastore_errors.BadValueError( - "Property %s must be convertible to a %s instance (%s)" % - (self.name, self.data_type, value)) - return super(JsonProperty, self).validate(value) - - def empty(self, value): - """Checks if value is empty. - - Args: - value: model value. - - Returns: - True passed value is empty. - """ - return not value - - def default_value(self): - """Create default model value. - - If default option was specified, then it will be deeply copied. - None otherwise. - - Returns: - default model value. - """ - if self.default: - return copy.deepcopy(self.default) - else: - return None - - - -# Ridiculous future UNIX epoch time, 500 years from now. -_FUTURE_TIME = 2**34 - - -def _get_descending_key(gettime=time.time, getrandint=random.randint): - """Returns a key name lexically ordered by time descending. - - This lets us have a key name for use with Datastore entities which returns - rows in time descending order when it is scanned in lexically ascending order, - allowing us to bypass index building for descending indexes. - - Args: - gettime: Used for testing. - getrandint: Used for testing. - - Returns: - A string with a time descending key. - """ - now_descending = int((_FUTURE_TIME - gettime()) * 100) - tie_breaker = getrandint(0, 100) - return "%d%d" % (now_descending, tie_breaker) - - -class CountersMap(JsonMixin): - """Maintains map from counter name to counter value. - - The class is used to provide basic arithmetics of counter values (buil - add/remove), increment individual values and store/load data from json. - """ - - def __init__(self, initial_map=None): - """Constructor. - - Args: - initial_map: initial counter values map from counter name (string) to - counter value (int). - """ - if initial_map: - self.counters = initial_map - else: - self.counters = {} - - def __repr__(self): - """Compute string representation.""" - return "mapreduce.model.CountersMap(%r)" % self.counters - - def get(self, counter_name): - """Get current counter value. - - Args: - counter_name: counter name as string. - - Returns: - current counter value as int. 0 if counter was not set. - """ - return self.counters.get(counter_name, 0) - - def increment(self, counter_name, delta): - """Increment counter value. - - Args: - counter_name: counter name as String. - delta: increment delta as Integer. - - Returns: - new counter value. - """ - current_value = self.counters.get(counter_name, 0) - new_value = current_value + delta - self.counters[counter_name] = new_value - return new_value - - def add_map(self, counters_map): - """Add all counters from the map. - - For each counter in the passed map, adds its value to the counter in this - map. - - Args: - counters_map: CounterMap instance to add. - """ - for counter_name in counters_map.counters: - self.increment(counter_name, counters_map.counters[counter_name]) - - def sub_map(self, counters_map): - """Subtracts all counters from the map. - - For each counter in the passed map, subtracts its value to the counter in - this map. - - Args: - counters_map: CounterMap instance to subtract. - """ - for counter_name in counters_map.counters: - self.increment(counter_name, -counters_map.counters[counter_name]) - - def clear(self): - """Clear all values.""" - self.counters = {} - - def to_json(self): - """Serializes all the data in this map into json form. - - Returns: - json-compatible data representation. - """ - return {"counters": self.counters} - - @classmethod - def from_json(cls, json): - """Create new CountersMap from the json data structure, encoded by to_json. - - Args: - json: json representation of CountersMap . - - Returns: - an instance of CountersMap with all data deserialized from json. - """ - counters_map = cls() - counters_map.counters = json["counters"] - return counters_map - - -class MapperSpec(JsonMixin): - """Contains a specification for the mapper phase of the mapreduce. - - MapperSpec instance can be changed only during mapreduce starting process, - and it remains immutable for the rest of mapreduce execution. MapperSpec is - passed as a payload to all mapreduce tasks in JSON encoding as part of - MapreduceSpec. - - Specifying mapper handlers: - * '.' - __call__ method of class instance will be - called - * '.' - function will be called. - * '..' - class will be instantiated - and method called. - """ - - def __init__(self, handler_spec, input_reader_spec, params, shard_count): - """Creates a new MapperSpec. - - Args: - handler_spec: handler specification as string (see class doc for - details). - input_reader_spec: The class name of the input reader to use. - params: Dictionary of additional parameters for the mapper. - shard_count: number of shards to process in parallel. - - Properties: - handler_spec: name of handler class/function to use. - shard_count: number of shards to process in parallel. - handler: cached instance of mapper handler as callable. - input_reader_spec: The class name of the input reader to use. - params: Dictionary of additional parameters for the mapper. - """ - self.handler_spec = handler_spec - self.__handler = None - self.input_reader_spec = input_reader_spec - self.shard_count = shard_count - self.params = params - - def get_handler(self): - """Get mapper handler instance. - - Returns: - cached handler instance as callable. - """ - if self.__handler is None: - resolved_spec = util.for_name(self.handler_spec) - if isinstance(resolved_spec, type): - # create new instance if this is type - self.__handler = resolved_spec() - elif isinstance(resolved_spec, types.MethodType): - # bind the method - self.__handler = getattr(resolved_spec.im_class(), - resolved_spec.__name__) - else: - self.__handler = resolved_spec - return self.__handler - - handler = property(get_handler) - - def input_reader_class(self): - """Get input reader class. - - Returns: - input reader class object. - """ - return util.for_name(self.input_reader_spec) - - def to_json(self): - """Serializes this MapperSpec into a json-izable object.""" - return { - "mapper_handler_spec": self.handler_spec, - "mapper_input_reader": self.input_reader_spec, - "mapper_params": self.params, - "mapper_shard_count": self.shard_count, - } - - def __str__(self): - return "MapperSpec(%s, %s, %s, %s)" % ( - self.handler_spec, self.input_reader_spec, self.params, - self.shard_count) - - @classmethod - def from_json(cls, json): - """Creates MapperSpec from a dict-like object.""" - return cls(json["mapper_handler_spec"], - json["mapper_input_reader"], - json["mapper_params"], - json["mapper_shard_count"]) - - -class MapreduceSpec(JsonMixin): - """Contains a specification for the whole mapreduce. - - MapreduceSpec instance can be changed only during mapreduce starting process, - and it remains immutable for the rest of mapreduce execution. MapreduceSpec is - passed as a payload to all mapreduce tasks in json encoding. - """ - - # Url to call when mapreduce finishes its execution. - PARAM_DONE_CALLBACK = "done_callback" - # Queue to use to call done callback - PARAM_DONE_CALLBACK_QUEUE = "done_callback_queue" - - def __init__(self, - name, - mapreduce_id, - mapper_spec, - params={}, - hooks_class_name=None): - """Create new MapreduceSpec. - - Args: - name: The name of this mapreduce job type. - mapreduce_id: ID of the mapreduce. - mapper_spec: JSON-encoded string containing a MapperSpec. - params: dictionary of additional mapreduce parameters. - hooks_class_name: The fully qualified name of the hooks class to use. - - Properties: - name: The name of this mapreduce job type. - mapreduce_id: unique id of this mapreduce as string. - mapper: This MapreduceSpec's instance of MapperSpec. - params: dictionary of additional mapreduce parameters. - hooks_class_name: The fully qualified name of the hooks class to use. - """ - self.name = name - self.mapreduce_id = mapreduce_id - self.mapper = MapperSpec.from_json(mapper_spec) - self.params = params - self.hooks_class_name = hooks_class_name - self.__hooks = None - self.get_hooks() # Fail fast on an invalid hook class. - - def get_hooks(self): - """Returns a hooks.Hooks class or None if no hooks class has been set.""" - if self.__hooks is None and self.hooks_class_name is not None: - hooks_class = util.for_name(self.hooks_class_name) - if not isinstance(hooks_class, type): - raise ValueError("hooks_class_name must refer to a class, got %s" % - type(hooks_class).__name__) - if not issubclass(hooks_class, hooks.Hooks): - raise ValueError( - "hooks_class_name must refer to a hooks.Hooks subclass") - self.__hooks = hooks_class(self.mapper) - - return self.__hooks - - def to_json(self): - """Serializes all data in this mapreduce spec into json form. - - Returns: - data in json format. - """ - mapper_spec = self.mapper.to_json() - return { - "name": self.name, - "mapreduce_id": self.mapreduce_id, - "mapper_spec": mapper_spec, - "params": self.params, - "hooks_class_name": self.hooks_class_name, - } - - @classmethod - def from_json(cls, json): - """Create new MapreduceSpec from the json, encoded by to_json. - - Args: - json: json representation of MapreduceSpec. - - Returns: - an instance of MapreduceSpec with all data deserialized from json. - """ - mapreduce_spec = cls(json["name"], - json["mapreduce_id"], - json["mapper_spec"], - json.get("params"), - json.get("hooks_class_name")) - return mapreduce_spec - - -class MapreduceState(db.Model): - """Holds accumulated state of mapreduce execution. - - MapreduceState is stored in datastore with a key name equal to the - mapreduce ID. Only controller tasks can write to MapreduceState. - - Properties: - mapreduce_spec: cached deserialized MapreduceSpec instance. read-only - active: if we have this mapreduce running right now - last_poll_time: last time controller job has polled this mapreduce. - counters_map: shard's counters map as CountersMap. Mirrors - counters_map_json. - chart_url: last computed mapreduce status chart url. This chart displays the - progress of all the shards the best way it can. - sparkline_url: last computed mapreduce status chart url in small format. - result_status: If not None, the final status of the job. - active_shards: How many shards are still processing. - start_time: When the job started. - """ - - RESULT_SUCCESS = "success" - RESULT_FAILED = "failed" - RESULT_ABORTED = "aborted" - - _RESULTS = frozenset([RESULT_SUCCESS, RESULT_FAILED, RESULT_ABORTED]) - - # Functional properties. - mapreduce_spec = JsonProperty(MapreduceSpec, indexed=False) - active = db.BooleanProperty(default=True, indexed=False) - last_poll_time = db.DateTimeProperty(required=True) - counters_map = JsonProperty(CountersMap, default=CountersMap(), indexed=False) - app_id = db.StringProperty(required=False, indexed=True) - - # For UI purposes only. - chart_url = db.TextProperty(default="") - sparkline_url = db.TextProperty(default="") - result_status = db.StringProperty(required=False, choices=_RESULTS) - active_shards = db.IntegerProperty(default=0, indexed=False) - failed_shards = db.IntegerProperty(default=0, indexed=False) - aborted_shards = db.IntegerProperty(default=0, indexed=False) - start_time = db.DateTimeProperty(auto_now_add=True) - - @classmethod - def kind(cls): - """Returns entity kind.""" - return "_AE_MR_MapreduceState" - - @classmethod - def get_key_by_job_id(cls, mapreduce_id): - """Retrieves the Key for a Job. - - Args: - mapreduce_id: The job to retrieve. - - Returns: - Datastore Key that can be used to fetch the MapreduceState. - """ - return db.Key.from_path(cls.kind(), mapreduce_id) - - @classmethod - def get_by_job_id(cls, mapreduce_id): - """Retrieves the instance of state for a Job. - - Args: - mapreduce_id: The mapreduce job to retrieve. - - Returns: - instance of MapreduceState for passed id. - """ - return db.get(cls.get_key_by_job_id(mapreduce_id)) - - def set_processed_counts(self, shards_processed): - """Updates a chart url to display processed count for each shard. - - Args: - shards_processed: list of integers with number of processed entities in - each shard - """ - chart = google_chart_api.BarChart(shards_processed) - if self.mapreduce_spec and shards_processed: - chart.bottom.labels = [ - str(x) for x in xrange(self.mapreduce_spec.mapper.shard_count)] - chart.left.labels = ['0', str(max(shards_processed))] - chart.left.min = 0 - self.chart_url = chart.display.Url(300, 200) - - def get_processed(self): - """Number of processed entities. - - Returns: - The total number of processed entities as int. - """ - return self.counters_map.get(context.COUNTER_MAPPER_CALLS) - - processed = property(get_processed) - - @staticmethod - def create_new(mapreduce_id=None, - gettime=datetime.datetime.now): - """Create a new MapreduceState. - - Args: - mapreduce_id: Mapreduce id as string. - gettime: Used for testing. - """ - if not mapreduce_id: - mapreduce_id = MapreduceState.new_mapreduce_id() - state = MapreduceState(key_name=mapreduce_id, - last_poll_time=gettime()) - state.set_processed_counts([]) - return state - - @staticmethod - def new_mapreduce_id(): - """Generate new mapreduce id.""" - return _get_descending_key() - - -class ShardState(db.Model): - """Single shard execution state. - - The shard state is stored in the datastore and is later aggregated by - controller task. Shard key_name is equal to shard_id. - - Properties: - active: if we have this shard still running as boolean. - counters_map: shard's counters map as CountersMap. Mirrors - counters_map_json. - mapreduce_id: unique id of the mapreduce. - shard_id: unique id of this shard as string. - shard_number: ordered number for this shard. - result_status: If not None, the final status of this shard. - update_time: The last time this shard state was updated. - shard_description: A string description of the work this shard will do. - last_work_item: A string description of the last work item processed. - """ - - RESULT_SUCCESS = "success" - RESULT_FAILED = "failed" - RESULT_ABORTED = "aborted" - - _RESULTS = frozenset([RESULT_SUCCESS, RESULT_FAILED, RESULT_ABORTED]) - - # Functional properties. - active = db.BooleanProperty(default=True, indexed=False) - counters_map = JsonProperty(CountersMap, default=CountersMap(), indexed=False) - result_status = db.StringProperty(choices=_RESULTS, indexed=False) - - # For UI purposes only. - mapreduce_id = db.StringProperty(required=True) - update_time = db.DateTimeProperty(auto_now=True, indexed=False) - shard_description = db.TextProperty(default="") - last_work_item = db.TextProperty(default="") - - def get_shard_number(self): - """Gets the shard number from the key name.""" - return int(self.key().name().split("-")[-1]) - - shard_number = property(get_shard_number) - - def get_shard_id(self): - """Returns the shard ID.""" - return self.key().name() - - shard_id = property(get_shard_id) - - @classmethod - def kind(cls): - """Returns entity kind.""" - return "_AE_MR_ShardState" - - @classmethod - def shard_id_from_number(cls, mapreduce_id, shard_number): - """Get shard id by mapreduce id and shard number. - - Args: - mapreduce_id: mapreduce id as string. - shard_number: shard number to compute id for as int. - - Returns: - shard id as string. - """ - return "%s-%d" % (mapreduce_id, shard_number) - - @classmethod - def get_key_by_shard_id(cls, shard_id): - """Retrieves the Key for this ShardState. - - Args: - shard_id: The shard ID to fetch. - - Returns: - The Datatore key to use to retrieve this ShardState. - """ - return db.Key.from_path(cls.kind(), shard_id) - - @classmethod - def get_by_shard_id(cls, shard_id): - """Get shard state from datastore by shard_id. - - Args: - shard_id: shard id as string. - - Returns: - ShardState for given shard id or None if it's not found. - """ - return cls.get_by_key_name(shard_id) - - @classmethod - def find_by_mapreduce_id(cls, mapreduce_id): - """Find all shard states for given mapreduce. - - Args: - mapreduce_id: mapreduce id. - - Returns: - iterable of all ShardState for given mapreduce id. - """ - return cls.all().filter("mapreduce_id =", mapreduce_id).fetch(99999) - - @classmethod - def create_new(cls, mapreduce_id, shard_number): - """Create new shard state. - - Args: - mapreduce_id: unique mapreduce id as string. - shard_number: shard number for which to create shard state. - - Returns: - new instance of ShardState ready to put into datastore. - """ - shard_id = cls.shard_id_from_number(mapreduce_id, shard_number) - state = cls(key_name=shard_id, - mapreduce_id=mapreduce_id) - return state - - -class MapreduceControl(db.Model): - """Datastore entity used to control mapreduce job execution. - - Only one command may be sent to jobs at a time. - - Properties: - command: The command to send to the job. - """ - - ABORT = "abort" - - _COMMANDS = frozenset([ABORT]) - _KEY_NAME = "command" - - command = db.TextProperty(choices=_COMMANDS, required=True) - - @classmethod - def kind(cls): - """Returns entity kind.""" - return "_AE_MR_MapreduceControl" - - @classmethod - def get_key_by_job_id(cls, mapreduce_id): - """Retrieves the Key for a mapreduce ID. - - Args: - mapreduce_id: The job to fetch. - - Returns: - Datastore Key for the command for the given job ID. - """ - return db.Key.from_path(cls.kind(), "%s:%s" % (mapreduce_id, cls._KEY_NAME)) - - @classmethod - def abort(cls, mapreduce_id): - """Causes a job to abort. - - Args: - mapreduce_id: The job to abort. Not verified as a valid job. - """ - cls(key_name="%s:%s" % (mapreduce_id, cls._KEY_NAME), - command=cls.ABORT).put() diff --git a/mapreduce/operation/__init__.py b/mapreduce/operation/__init__.py deleted file mode 100755 index f645b87..0000000 --- a/mapreduce/operation/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Operations which can be yielded from mappers. - -Operation is callable that takes context.Context as a parameter. -Operations are called during mapper execution immediately -on recieving from handler function. -""" - - - -import db -import counters - -__all__ = ['db', 'counters'] diff --git a/mapreduce/operation/counters.py b/mapreduce/operation/counters.py deleted file mode 100755 index 9cbfe1c..0000000 --- a/mapreduce/operation/counters.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Counters-related operations.""" - - - -__all__ = ['Increment'] - - -class Increment(object): - """Increment counter operation.""" - - def __init__(self, counter_name, delta=1): - """Constructor. - - Args: - counter_name: name of the counter as string - delta: increment delta as int. - """ - self.counter_name = counter_name - self.delta = delta - - def __call__(self, context): - """Execute operation. - - Args: - context: mapreduce context as context.Context. - """ - context.counters.increment(self.counter_name, self.delta) diff --git a/mapreduce/operation/db.py b/mapreduce/operation/db.py deleted file mode 100755 index fbde66f..0000000 --- a/mapreduce/operation/db.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""DB-related operations.""" - - - -__all__ = ['Put', 'Delete'] - - -# TODO(user): handler function annotation which requests to -# use db calls directly without batching them/doing async db calls. -class Put(object): - """Put entity into datastore via mutation_pool. - - See mapreduce.context.MutationPool. - """ - - def __init__(self, entity): - """Constructor. - - Args: - entity: an entity to put. - """ - self.entity = entity - - def __call__(self, context): - """Perform operation. - - Args: - context: mapreduce context as context.Context. - """ - context.mutation_pool.put(self.entity) - - -class Delete(object): - """Delete entity from datastore via mutation_pool. - - See mapreduce.context.MutationPool. - """ - - def __init__(self, entity): - """Constructor. - - Args: - entity: a key or model instance to delete. - """ - self.entity = entity - - def __call__(self, context): - """Perform operation. - - Args: - context: mapreduce context as context.Context. - """ - context.mutation_pool.delete(self.entity) diff --git a/mapreduce/quota.py b/mapreduce/quota.py deleted file mode 100755 index bbdc39a..0000000 --- a/mapreduce/quota.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2010 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Simple quota system backed by memcache storage.""" - - - - -# Memcache namespace to use. -_QUOTA_NAMESPACE = "quota" - -# Offset all quota values by this amount since memcache incr/decr -# operate only with unsigned values. -_OFFSET = 2**32 - - -class QuotaManager(object): - """Simple quota system manager, backed by memcache storage. - - Since memcache storage is not reliable, this quota system is not reliable and - best effort only. - - Quota is managed by buckets. Each bucket contains a 32-bit int value of - available quota. Buckets should be refilled manually with 'put' method. - - It is safe to use a single bucket from multiple clients simultaneously. - """ - - def __init__(self, memcache_client): - """Initialize new instance. - - Args: - memcache_client: an instance of memcache client to use. - """ - self.memcache_client = memcache_client - - def put(self, bucket, amount): - """Put amount into quota bucket. - - Args: - bucket: quota bucket as string. - amount: amount to bit put into quota as int. - """ - self.memcache_client.incr(bucket, delta=amount, - initial_value=_OFFSET, namespace=_QUOTA_NAMESPACE) - - def consume(self, bucket, amount, consume_some=False): - """Consume amount from quota bucket. - - Args: - bucket: quota bucket as string. - amount: amount to consume. - consume_some: specifies behavior in case of not enough quota. If False, - the method will leave quota intact and return 0. If True, will try to - consume as much as possible. - - Returns: - Amount of quota consumed. - """ - new_quota = self.memcache_client.decr( - bucket, delta=amount, initial_value=_OFFSET, namespace=_QUOTA_NAMESPACE) - - if new_quota >= _OFFSET: - return amount - - if consume_some and new_quota is not None and _OFFSET - new_quota < amount: - # we still can consume some - self.put(bucket, _OFFSET - new_quota) - return amount - (_OFFSET - new_quota) - else: - self.put(bucket, amount) - return 0 - - def get(self, bucket): - """Get current bucket amount. - - Args: - bucket: quota bucket as string. - - Returns: - current bucket amount as int. - """ - amount = self.memcache_client.get(bucket, namespace=_QUOTA_NAMESPACE) - if amount: - return int(amount) - _OFFSET - else: - return 0 - - def set(self, bucket, amount): - """Set bucket amount. - - Args: - bucket: quota bucket as string. - amount: new bucket amount as int. - """ - self.memcache_client.set(bucket, amount + _OFFSET, - namespace=_QUOTA_NAMESPACE) - - -class QuotaConsumer(object): - """Quota consumer wrapper for efficient quota consuming/reclaiming. - - Quota is consumed in batches and put back in dispose() method. - - WARNING: Always call the dispose() method if you need to keep quota - consistent. - """ - - def __init__(self, quota_manager, bucket, batch_size): - """Initialize new instance. - - Args: - quota_manager: quota manager to use for quota operations as QuotaManager. - bucket: quota bucket name as string. - batch_size: batch size for quota consuming as int. - """ - self.quota_manager = quota_manager - self.batch_size = batch_size - self.bucket = bucket - self.quota = 0 - - def consume(self, amount=1): - """Consume quota. - - Args: - amount: amount of quota to be consumed as int. - - Returns: - True if quota was successfully consumed, False if there's not enough - quota. - """ - while self.quota < amount: - delta = self.quota_manager.consume(self.bucket, self.batch_size, - consume_some=True) - if not delta: - return False - self.quota += delta - - self.quota -= amount - return True - - def put(self, amount=1): - """Put quota back. - - Args: - amount: amount of quota as int. - """ - self.quota += amount - - def check(self, amount=1): - """Check that we have enough quota right now. - - This doesn't lock or consume the quota. Following consume might in fact - fail/succeeded. - - Args: - amount: amount of quota to check. - - Returns: - True if we have enough quota to consume specified amount right now. False - otherwise. - """ - if self.quota >= amount: - return True - return self.quota + self.quota_manager.get(self.bucket) >= amount - - def dispose(self): - """Dispose QuotaConsumer and put all actually unconsumed quota back. - - This method has to be called for quota consistency! - """ - self.quota_manager.put(self.bucket, self.quota) diff --git a/mapreduce/static/base.css b/mapreduce/static/base.css deleted file mode 100755 index 0fca75b..0000000 --- a/mapreduce/static/base.css +++ /dev/null @@ -1,113 +0,0 @@ -html { - margin: 0; - padding: 0; - font-family: Arial, sans-serif; - font-size: 13px; -} - -body { - margin: 0; - padding: 0 3px 3px 3px; -} - -#butter { - position: absolute; - left: 40%; /* todo: actually center this */ - width: 200px; - background-color: #C5D7EF; - text-align: center; - padding: 5px; - border-left: 1px solid #3366CC; - border-right: 1px solid #3366CC; - border-bottom: 1px solid #3366CC; -} - -h1 { - margin-top: 0; - margin-bottom: 0.4em; - font-size: 2em; -} -h2 { - margin-top: 1em; - margin-bottom: 0.4em; - font-size: 1.2em; -} -h3 { - margin-top: 0; - margin-bottom: 0.7em; - font-size: 1.0em; -} - -.status-text { - text-transform: capitalize; -} - -/* Overview page */ -.editable-input, -.job-static-param { - margin: 0.3em; -} - -.editable-input > label:after { - content: ': '; -} - -#launch-control { - margin-bottom: 0.5em; -} -#launch-container { - margin-left: 0.5em; -} - -/* Detail page */ -#control { - float: right; -} - -#detail-graph, -#aggregated-counters-container, -#detail-params-container { - margin-left: 1em; - float: left; -} - -/* Shared */ -.param-key:after { - content: ': '; -} -.user-param-key:after { - content: ': '; -} -.param-aux:before { - content: ' '; -} - -.status-table { - margin: 5px; - border-collapse: collapse; - border-width: 0; - empty-cells: show; - border-top: 1px solid #C5D7EF; - border-left: 1px solid #C5D7EF; - border-right: 1px solid #C5D7EF; -} - -.status-table > thead { - height: 2em; -} - -.status-table > tfoot { - height: 1em; -} - -.status-table > thead, -.status-table > tfoot { - background-color: #E5ECF9; -} - -.status-table td { - padding: 4px; - border-left: 1px solid #C5D7EF; - border-bottom: 1px solid #C5D7EF; - border-top: 1px solid #C5D7EF; -} diff --git a/mapreduce/static/detail.html b/mapreduce/static/detail.html deleted file mode 100755 index 25b5d2b..0000000 --- a/mapreduce/static/detail.html +++ /dev/null @@ -1,64 +0,0 @@ - - - - Loading Job Status... - - - - - - - - - -
- « Back to Overview - | - - - -
- -

Loading Job Status...

-

- -
- -
- -
-

Overview

-
    -
    - -
    -

    Counters

    -
      -
      - -
      - -
      -

      Mapper status

      - - - - - - - - - - - - -
      ShardStatusDescriptionLast work itemTime elapsed
      -
      -
      - - - diff --git a/mapreduce/static/jquery-1.4.2.min.js b/mapreduce/static/jquery-1.4.2.min.js deleted file mode 100755 index 7c24308..0000000 --- a/mapreduce/static/jquery-1.4.2.min.js +++ /dev/null @@ -1,154 +0,0 @@ -/*! - * jQuery JavaScript Library v1.4.2 - * http://jquery.com/ - * - * Copyright 2010, John Resig - * Dual licensed under the MIT or GPL Version 2 licenses. - * http://jquery.org/license - * - * Includes Sizzle.js - * http://sizzlejs.com/ - * Copyright 2010, The Dojo Foundation - * Released under the MIT, BSD, and GPL Licenses. - * - * Date: Sat Feb 13 22:33:48 2010 -0500 - */ -(function(A,w){function ma(){if(!c.isReady){try{s.documentElement.doScroll("left")}catch(a){setTimeout(ma,1);return}c.ready()}}function Qa(a,b){b.src?c.ajax({url:b.src,async:false,dataType:"script"}):c.globalEval(b.text||b.textContent||b.innerHTML||"");b.parentNode&&b.parentNode.removeChild(b)}function X(a,b,d,f,e,j){var i=a.length;if(typeof b==="object"){for(var o in b)X(a,o,b[o],f,e,d);return a}if(d!==w){f=!j&&f&&c.isFunction(d);for(o=0;o)[^>]*$|^#([\w-]+)$/,Ua=/^.[^:#\[\.,]*$/,Va=/\S/, -Wa=/^(\s|\u00A0)+|(\s|\u00A0)+$/g,Xa=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,P=navigator.userAgent,xa=false,Q=[],L,$=Object.prototype.toString,aa=Object.prototype.hasOwnProperty,ba=Array.prototype.push,R=Array.prototype.slice,ya=Array.prototype.indexOf;c.fn=c.prototype={init:function(a,b){var d,f;if(!a)return this;if(a.nodeType){this.context=this[0]=a;this.length=1;return this}if(a==="body"&&!b){this.context=s;this[0]=s.body;this.selector="body";this.length=1;return this}if(typeof a==="string")if((d=Ta.exec(a))&& -(d[1]||!b))if(d[1]){f=b?b.ownerDocument||b:s;if(a=Xa.exec(a))if(c.isPlainObject(b)){a=[s.createElement(a[1])];c.fn.attr.call(a,b,true)}else a=[f.createElement(a[1])];else{a=sa([d[1]],[f]);a=(a.cacheable?a.fragment.cloneNode(true):a.fragment).childNodes}return c.merge(this,a)}else{if(b=s.getElementById(d[2])){if(b.id!==d[2])return T.find(a);this.length=1;this[0]=b}this.context=s;this.selector=a;return this}else if(!b&&/^\w+$/.test(a)){this.selector=a;this.context=s;a=s.getElementsByTagName(a);return c.merge(this, -a)}else return!b||b.jquery?(b||T).find(a):c(b).find(a);else if(c.isFunction(a))return T.ready(a);if(a.selector!==w){this.selector=a.selector;this.context=a.context}return c.makeArray(a,this)},selector:"",jquery:"1.4.2",length:0,size:function(){return this.length},toArray:function(){return R.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this.slice(a)[0]:this[a]},pushStack:function(a,b,d){var f=c();c.isArray(a)?ba.apply(f,a):c.merge(f,a);f.prevObject=this;f.context=this.context;if(b=== -"find")f.selector=this.selector+(this.selector?" ":"")+d;else if(b)f.selector=this.selector+"."+b+"("+d+")";return f},each:function(a,b){return c.each(this,a,b)},ready:function(a){c.bindReady();if(c.isReady)a.call(s,c);else Q&&Q.push(a);return this},eq:function(a){return a===-1?this.slice(a):this.slice(a,+a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(R.apply(this,arguments),"slice",R.call(arguments).join(","))},map:function(a){return this.pushStack(c.map(this, -function(b,d){return a.call(b,d,b)}))},end:function(){return this.prevObject||c(null)},push:ba,sort:[].sort,splice:[].splice};c.fn.init.prototype=c.fn;c.extend=c.fn.extend=function(){var a=arguments[0]||{},b=1,d=arguments.length,f=false,e,j,i,o;if(typeof a==="boolean"){f=a;a=arguments[1]||{};b=2}if(typeof a!=="object"&&!c.isFunction(a))a={};if(d===b){a=this;--b}for(;b
      a"; -var e=d.getElementsByTagName("*"),j=d.getElementsByTagName("a")[0];if(!(!e||!e.length||!j)){c.support={leadingWhitespace:d.firstChild.nodeType===3,tbody:!d.getElementsByTagName("tbody").length,htmlSerialize:!!d.getElementsByTagName("link").length,style:/red/.test(j.getAttribute("style")),hrefNormalized:j.getAttribute("href")==="/a",opacity:/^0.55$/.test(j.style.opacity),cssFloat:!!j.style.cssFloat,checkOn:d.getElementsByTagName("input")[0].value==="on",optSelected:s.createElement("select").appendChild(s.createElement("option")).selected, -parentNode:d.removeChild(d.appendChild(s.createElement("div"))).parentNode===null,deleteExpando:true,checkClone:false,scriptEval:false,noCloneEvent:true,boxModel:null};b.type="text/javascript";try{b.appendChild(s.createTextNode("window."+f+"=1;"))}catch(i){}a.insertBefore(b,a.firstChild);if(A[f]){c.support.scriptEval=true;delete A[f]}try{delete b.test}catch(o){c.support.deleteExpando=false}a.removeChild(b);if(d.attachEvent&&d.fireEvent){d.attachEvent("onclick",function k(){c.support.noCloneEvent= -false;d.detachEvent("onclick",k)});d.cloneNode(true).fireEvent("onclick")}d=s.createElement("div");d.innerHTML="";a=s.createDocumentFragment();a.appendChild(d.firstChild);c.support.checkClone=a.cloneNode(true).cloneNode(true).lastChild.checked;c(function(){var k=s.createElement("div");k.style.width=k.style.paddingLeft="1px";s.body.appendChild(k);c.boxModel=c.support.boxModel=k.offsetWidth===2;s.body.removeChild(k).style.display="none"});a=function(k){var n= -s.createElement("div");k="on"+k;var r=k in n;if(!r){n.setAttribute(k,"return;");r=typeof n[k]==="function"}return r};c.support.submitBubbles=a("submit");c.support.changeBubbles=a("change");a=b=d=e=j=null}})();c.props={"for":"htmlFor","class":"className",readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing",rowspan:"rowSpan",colspan:"colSpan",tabindex:"tabIndex",usemap:"useMap",frameborder:"frameBorder"};var G="jQuery"+J(),Ya=0,za={};c.extend({cache:{},expando:G,noData:{embed:true,object:true, -applet:true},data:function(a,b,d){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var f=a[G],e=c.cache;if(!f&&typeof b==="string"&&d===w)return null;f||(f=++Ya);if(typeof b==="object"){a[G]=f;e[f]=c.extend(true,{},b)}else if(!e[f]){a[G]=f;e[f]={}}a=e[f];if(d!==w)a[b]=d;return typeof b==="string"?a[b]:a}},removeData:function(a,b){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var d=a[G],f=c.cache,e=f[d];if(b){if(e){delete e[b];c.isEmptyObject(e)&&c.removeData(a)}}else{if(c.support.deleteExpando)delete a[c.expando]; -else a.removeAttribute&&a.removeAttribute(c.expando);delete f[d]}}}});c.fn.extend({data:function(a,b){if(typeof a==="undefined"&&this.length)return c.data(this[0]);else if(typeof a==="object")return this.each(function(){c.data(this,a)});var d=a.split(".");d[1]=d[1]?"."+d[1]:"";if(b===w){var f=this.triggerHandler("getData"+d[1]+"!",[d[0]]);if(f===w&&this.length)f=c.data(this[0],a);return f===w&&d[1]?this.data(d[0]):f}else return this.trigger("setData"+d[1]+"!",[d[0],b]).each(function(){c.data(this, -a,b)})},removeData:function(a){return this.each(function(){c.removeData(this,a)})}});c.extend({queue:function(a,b,d){if(a){b=(b||"fx")+"queue";var f=c.data(a,b);if(!d)return f||[];if(!f||c.isArray(d))f=c.data(a,b,c.makeArray(d));else f.push(d);return f}},dequeue:function(a,b){b=b||"fx";var d=c.queue(a,b),f=d.shift();if(f==="inprogress")f=d.shift();if(f){b==="fx"&&d.unshift("inprogress");f.call(a,function(){c.dequeue(a,b)})}}});c.fn.extend({queue:function(a,b){if(typeof a!=="string"){b=a;a="fx"}if(b=== -w)return c.queue(this[0],a);return this.each(function(){var d=c.queue(this,a,b);a==="fx"&&d[0]!=="inprogress"&&c.dequeue(this,a)})},dequeue:function(a){return this.each(function(){c.dequeue(this,a)})},delay:function(a,b){a=c.fx?c.fx.speeds[a]||a:a;b=b||"fx";return this.queue(b,function(){var d=this;setTimeout(function(){c.dequeue(d,b)},a)})},clearQueue:function(a){return this.queue(a||"fx",[])}});var Aa=/[\n\t]/g,ca=/\s+/,Za=/\r/g,$a=/href|src|style/,ab=/(button|input)/i,bb=/(button|input|object|select|textarea)/i, -cb=/^(a|area)$/i,Ba=/radio|checkbox/;c.fn.extend({attr:function(a,b){return X(this,a,b,true,c.attr)},removeAttr:function(a){return this.each(function(){c.attr(this,a,"");this.nodeType===1&&this.removeAttribute(a)})},addClass:function(a){if(c.isFunction(a))return this.each(function(n){var r=c(this);r.addClass(a.call(this,n,r.attr("class")))});if(a&&typeof a==="string")for(var b=(a||"").split(ca),d=0,f=this.length;d-1)return true;return false},val:function(a){if(a===w){var b=this[0];if(b){if(c.nodeName(b,"option"))return(b.attributes.value||{}).specified?b.value:b.text;if(c.nodeName(b,"select")){var d=b.selectedIndex,f=[],e=b.options;b=b.type==="select-one";if(d<0)return null;var j=b?d:0;for(d=b?d+1:e.length;j=0;else if(c.nodeName(this,"select")){var u=c.makeArray(r);c("option",this).each(function(){this.selected= -c.inArray(c(this).val(),u)>=0});if(!u.length)this.selectedIndex=-1}else this.value=r}})}});c.extend({attrFn:{val:true,css:true,html:true,text:true,data:true,width:true,height:true,offset:true},attr:function(a,b,d,f){if(!a||a.nodeType===3||a.nodeType===8)return w;if(f&&b in c.attrFn)return c(a)[b](d);f=a.nodeType!==1||!c.isXMLDoc(a);var e=d!==w;b=f&&c.props[b]||b;if(a.nodeType===1){var j=$a.test(b);if(b in a&&f&&!j){if(e){b==="type"&&ab.test(a.nodeName)&&a.parentNode&&c.error("type property can't be changed"); -a[b]=d}if(c.nodeName(a,"form")&&a.getAttributeNode(b))return a.getAttributeNode(b).nodeValue;if(b==="tabIndex")return(b=a.getAttributeNode("tabIndex"))&&b.specified?b.value:bb.test(a.nodeName)||cb.test(a.nodeName)&&a.href?0:w;return a[b]}if(!c.support.style&&f&&b==="style"){if(e)a.style.cssText=""+d;return a.style.cssText}e&&a.setAttribute(b,""+d);a=!c.support.hrefNormalized&&f&&j?a.getAttribute(b,2):a.getAttribute(b);return a===null?w:a}return c.style(a,b,d)}});var O=/\.(.*)$/,db=function(a){return a.replace(/[^\w\s\.\|`]/g, -function(b){return"\\"+b})};c.event={add:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){if(a.setInterval&&a!==A&&!a.frameElement)a=A;var e,j;if(d.handler){e=d;d=e.handler}if(!d.guid)d.guid=c.guid++;if(j=c.data(a)){var i=j.events=j.events||{},o=j.handle;if(!o)j.handle=o=function(){return typeof c!=="undefined"&&!c.event.triggered?c.event.handle.apply(o.elem,arguments):w};o.elem=a;b=b.split(" ");for(var k,n=0,r;k=b[n++];){j=e?c.extend({},e):{handler:d,data:f};if(k.indexOf(".")>-1){r=k.split("."); -k=r.shift();j.namespace=r.slice(0).sort().join(".")}else{r=[];j.namespace=""}j.type=k;j.guid=d.guid;var u=i[k],z=c.event.special[k]||{};if(!u){u=i[k]=[];if(!z.setup||z.setup.call(a,f,r,o)===false)if(a.addEventListener)a.addEventListener(k,o,false);else a.attachEvent&&a.attachEvent("on"+k,o)}if(z.add){z.add.call(a,j);if(!j.handler.guid)j.handler.guid=d.guid}u.push(j);c.event.global[k]=true}a=null}}},global:{},remove:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){var e,j=0,i,o,k,n,r,u,z=c.data(a), -C=z&&z.events;if(z&&C){if(b&&b.type){d=b.handler;b=b.type}if(!b||typeof b==="string"&&b.charAt(0)==="."){b=b||"";for(e in C)c.event.remove(a,e+b)}else{for(b=b.split(" ");e=b[j++];){n=e;i=e.indexOf(".")<0;o=[];if(!i){o=e.split(".");e=o.shift();k=new RegExp("(^|\\.)"+c.map(o.slice(0).sort(),db).join("\\.(?:.*\\.)?")+"(\\.|$)")}if(r=C[e])if(d){n=c.event.special[e]||{};for(B=f||0;B=0){a.type= -e=e.slice(0,-1);a.exclusive=true}if(!d){a.stopPropagation();c.event.global[e]&&c.each(c.cache,function(){this.events&&this.events[e]&&c.event.trigger(a,b,this.handle.elem)})}if(!d||d.nodeType===3||d.nodeType===8)return w;a.result=w;a.target=d;b=c.makeArray(b);b.unshift(a)}a.currentTarget=d;(f=c.data(d,"handle"))&&f.apply(d,b);f=d.parentNode||d.ownerDocument;try{if(!(d&&d.nodeName&&c.noData[d.nodeName.toLowerCase()]))if(d["on"+e]&&d["on"+e].apply(d,b)===false)a.result=false}catch(j){}if(!a.isPropagationStopped()&& -f)c.event.trigger(a,b,f,true);else if(!a.isDefaultPrevented()){f=a.target;var i,o=c.nodeName(f,"a")&&e==="click",k=c.event.special[e]||{};if((!k._default||k._default.call(d,a)===false)&&!o&&!(f&&f.nodeName&&c.noData[f.nodeName.toLowerCase()])){try{if(f[e]){if(i=f["on"+e])f["on"+e]=null;c.event.triggered=true;f[e]()}}catch(n){}if(i)f["on"+e]=i;c.event.triggered=false}}},handle:function(a){var b,d,f,e;a=arguments[0]=c.event.fix(a||A.event);a.currentTarget=this;b=a.type.indexOf(".")<0&&!a.exclusive; -if(!b){d=a.type.split(".");a.type=d.shift();f=new RegExp("(^|\\.)"+d.slice(0).sort().join("\\.(?:.*\\.)?")+"(\\.|$)")}e=c.data(this,"events");d=e[a.type];if(e&&d){d=d.slice(0);e=0;for(var j=d.length;e-1?c.map(a.options,function(f){return f.selected}).join("-"):"";else if(a.nodeName.toLowerCase()==="select")d=a.selectedIndex;return d},fa=function(a,b){var d=a.target,f,e;if(!(!da.test(d.nodeName)||d.readOnly)){f=c.data(d,"_change_data");e=Fa(d);if(a.type!=="focusout"||d.type!=="radio")c.data(d,"_change_data", -e);if(!(f===w||e===f))if(f!=null||e){a.type="change";return c.event.trigger(a,b,d)}}};c.event.special.change={filters:{focusout:fa,click:function(a){var b=a.target,d=b.type;if(d==="radio"||d==="checkbox"||b.nodeName.toLowerCase()==="select")return fa.call(this,a)},keydown:function(a){var b=a.target,d=b.type;if(a.keyCode===13&&b.nodeName.toLowerCase()!=="textarea"||a.keyCode===32&&(d==="checkbox"||d==="radio")||d==="select-multiple")return fa.call(this,a)},beforeactivate:function(a){a=a.target;c.data(a, -"_change_data",Fa(a))}},setup:function(){if(this.type==="file")return false;for(var a in ea)c.event.add(this,a+".specialChange",ea[a]);return da.test(this.nodeName)},teardown:function(){c.event.remove(this,".specialChange");return da.test(this.nodeName)}};ea=c.event.special.change.filters}s.addEventListener&&c.each({focus:"focusin",blur:"focusout"},function(a,b){function d(f){f=c.event.fix(f);f.type=b;return c.event.handle.call(this,f)}c.event.special[b]={setup:function(){this.addEventListener(a, -d,true)},teardown:function(){this.removeEventListener(a,d,true)}}});c.each(["bind","one"],function(a,b){c.fn[b]=function(d,f,e){if(typeof d==="object"){for(var j in d)this[b](j,f,d[j],e);return this}if(c.isFunction(f)){e=f;f=w}var i=b==="one"?c.proxy(e,function(k){c(this).unbind(k,i);return e.apply(this,arguments)}):e;if(d==="unload"&&b!=="one")this.one(d,f,e);else{j=0;for(var o=this.length;j0){y=t;break}}t=t[g]}m[q]=y}}}var f=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^[\]]*\]|['"][^'"]*['"]|[^[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g, -e=0,j=Object.prototype.toString,i=false,o=true;[0,0].sort(function(){o=false;return 0});var k=function(g,h,l,m){l=l||[];var q=h=h||s;if(h.nodeType!==1&&h.nodeType!==9)return[];if(!g||typeof g!=="string")return l;for(var p=[],v,t,y,S,H=true,M=x(h),I=g;(f.exec(""),v=f.exec(I))!==null;){I=v[3];p.push(v[1]);if(v[2]){S=v[3];break}}if(p.length>1&&r.exec(g))if(p.length===2&&n.relative[p[0]])t=ga(p[0]+p[1],h);else for(t=n.relative[p[0]]?[h]:k(p.shift(),h);p.length;){g=p.shift();if(n.relative[g])g+=p.shift(); -t=ga(g,t)}else{if(!m&&p.length>1&&h.nodeType===9&&!M&&n.match.ID.test(p[0])&&!n.match.ID.test(p[p.length-1])){v=k.find(p.shift(),h,M);h=v.expr?k.filter(v.expr,v.set)[0]:v.set[0]}if(h){v=m?{expr:p.pop(),set:z(m)}:k.find(p.pop(),p.length===1&&(p[0]==="~"||p[0]==="+")&&h.parentNode?h.parentNode:h,M);t=v.expr?k.filter(v.expr,v.set):v.set;if(p.length>0)y=z(t);else H=false;for(;p.length;){var D=p.pop();v=D;if(n.relative[D])v=p.pop();else D="";if(v==null)v=h;n.relative[D](y,v,M)}}else y=[]}y||(y=t);y||k.error(D|| -g);if(j.call(y)==="[object Array]")if(H)if(h&&h.nodeType===1)for(g=0;y[g]!=null;g++){if(y[g]&&(y[g]===true||y[g].nodeType===1&&E(h,y[g])))l.push(t[g])}else for(g=0;y[g]!=null;g++)y[g]&&y[g].nodeType===1&&l.push(t[g]);else l.push.apply(l,y);else z(y,l);if(S){k(S,q,l,m);k.uniqueSort(l)}return l};k.uniqueSort=function(g){if(B){i=o;g.sort(B);if(i)for(var h=1;h":function(g,h){var l=typeof h==="string";if(l&&!/\W/.test(h)){h=h.toLowerCase();for(var m=0,q=g.length;m=0))l||m.push(v);else if(l)h[p]=false;return false},ID:function(g){return g[1].replace(/\\/g,"")},TAG:function(g){return g[1].toLowerCase()}, -CHILD:function(g){if(g[1]==="nth"){var h=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(g[2]==="even"&&"2n"||g[2]==="odd"&&"2n+1"||!/\D/.test(g[2])&&"0n+"+g[2]||g[2]);g[2]=h[1]+(h[2]||1)-0;g[3]=h[3]-0}g[0]=e++;return g},ATTR:function(g,h,l,m,q,p){h=g[1].replace(/\\/g,"");if(!p&&n.attrMap[h])g[1]=n.attrMap[h];if(g[2]==="~=")g[4]=" "+g[4]+" ";return g},PSEUDO:function(g,h,l,m,q){if(g[1]==="not")if((f.exec(g[3])||"").length>1||/^\w/.test(g[3]))g[3]=k(g[3],null,null,h);else{g=k.filter(g[3],h,l,true^q);l||m.push.apply(m, -g);return false}else if(n.match.POS.test(g[0])||n.match.CHILD.test(g[0]))return true;return g},POS:function(g){g.unshift(true);return g}},filters:{enabled:function(g){return g.disabled===false&&g.type!=="hidden"},disabled:function(g){return g.disabled===true},checked:function(g){return g.checked===true},selected:function(g){return g.selected===true},parent:function(g){return!!g.firstChild},empty:function(g){return!g.firstChild},has:function(g,h,l){return!!k(l[3],g).length},header:function(g){return/h\d/i.test(g.nodeName)}, -text:function(g){return"text"===g.type},radio:function(g){return"radio"===g.type},checkbox:function(g){return"checkbox"===g.type},file:function(g){return"file"===g.type},password:function(g){return"password"===g.type},submit:function(g){return"submit"===g.type},image:function(g){return"image"===g.type},reset:function(g){return"reset"===g.type},button:function(g){return"button"===g.type||g.nodeName.toLowerCase()==="button"},input:function(g){return/input|select|textarea|button/i.test(g.nodeName)}}, -setFilters:{first:function(g,h){return h===0},last:function(g,h,l,m){return h===m.length-1},even:function(g,h){return h%2===0},odd:function(g,h){return h%2===1},lt:function(g,h,l){return hl[3]-0},nth:function(g,h,l){return l[3]-0===h},eq:function(g,h,l){return l[3]-0===h}},filter:{PSEUDO:function(g,h,l,m){var q=h[1],p=n.filters[q];if(p)return p(g,l,h,m);else if(q==="contains")return(g.textContent||g.innerText||a([g])||"").indexOf(h[3])>=0;else if(q==="not"){h= -h[3];l=0;for(m=h.length;l=0}},ID:function(g,h){return g.nodeType===1&&g.getAttribute("id")===h},TAG:function(g,h){return h==="*"&&g.nodeType===1||g.nodeName.toLowerCase()===h},CLASS:function(g,h){return(" "+(g.className||g.getAttribute("class"))+" ").indexOf(h)>-1},ATTR:function(g,h){var l=h[1];g=n.attrHandle[l]?n.attrHandle[l](g):g[l]!=null?g[l]:g.getAttribute(l);l=g+"";var m=h[2];h=h[4];return g==null?m==="!=":m=== -"="?l===h:m==="*="?l.indexOf(h)>=0:m==="~="?(" "+l+" ").indexOf(h)>=0:!h?l&&g!==false:m==="!="?l!==h:m==="^="?l.indexOf(h)===0:m==="$="?l.substr(l.length-h.length)===h:m==="|="?l===h||l.substr(0,h.length+1)===h+"-":false},POS:function(g,h,l,m){var q=n.setFilters[h[2]];if(q)return q(g,l,h,m)}}},r=n.match.POS;for(var u in n.match){n.match[u]=new RegExp(n.match[u].source+/(?![^\[]*\])(?![^\(]*\))/.source);n.leftMatch[u]=new RegExp(/(^(?:.|\r|\n)*?)/.source+n.match[u].source.replace(/\\(\d+)/g,function(g, -h){return"\\"+(h-0+1)}))}var z=function(g,h){g=Array.prototype.slice.call(g,0);if(h){h.push.apply(h,g);return h}return g};try{Array.prototype.slice.call(s.documentElement.childNodes,0)}catch(C){z=function(g,h){h=h||[];if(j.call(g)==="[object Array]")Array.prototype.push.apply(h,g);else if(typeof g.length==="number")for(var l=0,m=g.length;l";var l=s.documentElement;l.insertBefore(g,l.firstChild);if(s.getElementById(h)){n.find.ID=function(m,q,p){if(typeof q.getElementById!=="undefined"&&!p)return(q=q.getElementById(m[1]))?q.id===m[1]||typeof q.getAttributeNode!=="undefined"&& -q.getAttributeNode("id").nodeValue===m[1]?[q]:w:[]};n.filter.ID=function(m,q){var p=typeof m.getAttributeNode!=="undefined"&&m.getAttributeNode("id");return m.nodeType===1&&p&&p.nodeValue===q}}l.removeChild(g);l=g=null})();(function(){var g=s.createElement("div");g.appendChild(s.createComment(""));if(g.getElementsByTagName("*").length>0)n.find.TAG=function(h,l){l=l.getElementsByTagName(h[1]);if(h[1]==="*"){h=[];for(var m=0;l[m];m++)l[m].nodeType===1&&h.push(l[m]);l=h}return l};g.innerHTML=""; -if(g.firstChild&&typeof g.firstChild.getAttribute!=="undefined"&&g.firstChild.getAttribute("href")!=="#")n.attrHandle.href=function(h){return h.getAttribute("href",2)};g=null})();s.querySelectorAll&&function(){var g=k,h=s.createElement("div");h.innerHTML="

      ";if(!(h.querySelectorAll&&h.querySelectorAll(".TEST").length===0)){k=function(m,q,p,v){q=q||s;if(!v&&q.nodeType===9&&!x(q))try{return z(q.querySelectorAll(m),p)}catch(t){}return g(m,q,p,v)};for(var l in g)k[l]=g[l];h=null}}(); -(function(){var g=s.createElement("div");g.innerHTML="
      ";if(!(!g.getElementsByClassName||g.getElementsByClassName("e").length===0)){g.lastChild.className="e";if(g.getElementsByClassName("e").length!==1){n.order.splice(1,0,"CLASS");n.find.CLASS=function(h,l,m){if(typeof l.getElementsByClassName!=="undefined"&&!m)return l.getElementsByClassName(h[1])};g=null}}})();var E=s.compareDocumentPosition?function(g,h){return!!(g.compareDocumentPosition(h)&16)}: -function(g,h){return g!==h&&(g.contains?g.contains(h):true)},x=function(g){return(g=(g?g.ownerDocument||g:0).documentElement)?g.nodeName!=="HTML":false},ga=function(g,h){var l=[],m="",q;for(h=h.nodeType?[h]:h;q=n.match.PSEUDO.exec(g);){m+=q[0];g=g.replace(n.match.PSEUDO,"")}g=n.relative[g]?g+"*":g;q=0;for(var p=h.length;q=0===d})};c.fn.extend({find:function(a){for(var b=this.pushStack("","find",a),d=0,f=0,e=this.length;f0)for(var j=d;j0},closest:function(a,b){if(c.isArray(a)){var d=[],f=this[0],e,j= -{},i;if(f&&a.length){e=0;for(var o=a.length;e-1:c(f).is(e)){d.push({selector:i,elem:f});delete j[i]}}f=f.parentNode}}return d}var k=c.expr.match.POS.test(a)?c(a,b||this.context):null;return this.map(function(n,r){for(;r&&r.ownerDocument&&r!==b;){if(k?k.index(r)>-1:c(r).is(a))return r;r=r.parentNode}return null})},index:function(a){if(!a||typeof a=== -"string")return c.inArray(this[0],a?c(a):this.parent().children());return c.inArray(a.jquery?a[0]:a,this)},add:function(a,b){a=typeof a==="string"?c(a,b||this.context):c.makeArray(a);b=c.merge(this.get(),a);return this.pushStack(qa(a[0])||qa(b[0])?b:c.unique(b))},andSelf:function(){return this.add(this.prevObject)}});c.each({parent:function(a){return(a=a.parentNode)&&a.nodeType!==11?a:null},parents:function(a){return c.dir(a,"parentNode")},parentsUntil:function(a,b,d){return c.dir(a,"parentNode", -d)},next:function(a){return c.nth(a,2,"nextSibling")},prev:function(a){return c.nth(a,2,"previousSibling")},nextAll:function(a){return c.dir(a,"nextSibling")},prevAll:function(a){return c.dir(a,"previousSibling")},nextUntil:function(a,b,d){return c.dir(a,"nextSibling",d)},prevUntil:function(a,b,d){return c.dir(a,"previousSibling",d)},siblings:function(a){return c.sibling(a.parentNode.firstChild,a)},children:function(a){return c.sibling(a.firstChild)},contents:function(a){return c.nodeName(a,"iframe")? -a.contentDocument||a.contentWindow.document:c.makeArray(a.childNodes)}},function(a,b){c.fn[a]=function(d,f){var e=c.map(this,b,d);eb.test(a)||(f=d);if(f&&typeof f==="string")e=c.filter(f,e);e=this.length>1?c.unique(e):e;if((this.length>1||gb.test(f))&&fb.test(a))e=e.reverse();return this.pushStack(e,a,R.call(arguments).join(","))}});c.extend({filter:function(a,b,d){if(d)a=":not("+a+")";return c.find.matches(a,b)},dir:function(a,b,d){var f=[];for(a=a[b];a&&a.nodeType!==9&&(d===w||a.nodeType!==1||!c(a).is(d));){a.nodeType=== -1&&f.push(a);a=a[b]}return f},nth:function(a,b,d){b=b||1;for(var f=0;a;a=a[d])if(a.nodeType===1&&++f===b)break;return a},sibling:function(a,b){for(var d=[];a;a=a.nextSibling)a.nodeType===1&&a!==b&&d.push(a);return d}});var Ja=/ jQuery\d+="(?:\d+|null)"/g,V=/^\s+/,Ka=/(<([\w:]+)[^>]*?)\/>/g,hb=/^(?:area|br|col|embed|hr|img|input|link|meta|param)$/i,La=/<([\w:]+)/,ib=/"},F={option:[1,""],legend:[1,"
      ","
      "],thead:[1,"","
      "],tr:[2,"","
      "],td:[3,"","
      "],col:[2,"","
      "],area:[1,"",""],_default:[0,"",""]};F.optgroup=F.option;F.tbody=F.tfoot=F.colgroup=F.caption=F.thead;F.th=F.td;if(!c.support.htmlSerialize)F._default=[1,"div
      ","
      "];c.fn.extend({text:function(a){if(c.isFunction(a))return this.each(function(b){var d= -c(this);d.text(a.call(this,b,d.text()))});if(typeof a!=="object"&&a!==w)return this.empty().append((this[0]&&this[0].ownerDocument||s).createTextNode(a));return c.text(this)},wrapAll:function(a){if(c.isFunction(a))return this.each(function(d){c(this).wrapAll(a.call(this,d))});if(this[0]){var b=c(a,this[0].ownerDocument).eq(0).clone(true);this[0].parentNode&&b.insertBefore(this[0]);b.map(function(){for(var d=this;d.firstChild&&d.firstChild.nodeType===1;)d=d.firstChild;return d}).append(this)}return this}, -wrapInner:function(a){if(c.isFunction(a))return this.each(function(b){c(this).wrapInner(a.call(this,b))});return this.each(function(){var b=c(this),d=b.contents();d.length?d.wrapAll(a):b.append(a)})},wrap:function(a){return this.each(function(){c(this).wrapAll(a)})},unwrap:function(){return this.parent().each(function(){c.nodeName(this,"body")||c(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.appendChild(a)})}, -prepend:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b,this)});else if(arguments.length){var a=c(arguments[0]);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b, -this.nextSibling)});else if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,c(arguments[0]).toArray());return a}},remove:function(a,b){for(var d=0,f;(f=this[d])!=null;d++)if(!a||c.filter(a,[f]).length){if(!b&&f.nodeType===1){c.cleanData(f.getElementsByTagName("*"));c.cleanData([f])}f.parentNode&&f.parentNode.removeChild(f)}return this},empty:function(){for(var a=0,b;(b=this[a])!=null;a++)for(b.nodeType===1&&c.cleanData(b.getElementsByTagName("*"));b.firstChild;)b.removeChild(b.firstChild); -return this},clone:function(a){var b=this.map(function(){if(!c.support.noCloneEvent&&!c.isXMLDoc(this)){var d=this.outerHTML,f=this.ownerDocument;if(!d){d=f.createElement("div");d.appendChild(this.cloneNode(true));d=d.innerHTML}return c.clean([d.replace(Ja,"").replace(/=([^="'>\s]+\/)>/g,'="$1">').replace(V,"")],f)[0]}else return this.cloneNode(true)});if(a===true){ra(this,b);ra(this.find("*"),b.find("*"))}return b},html:function(a){if(a===w)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(Ja, -""):null;else if(typeof a==="string"&&!ta.test(a)&&(c.support.leadingWhitespace||!V.test(a))&&!F[(La.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Ka,Ma);try{for(var b=0,d=this.length;b0||e.cacheable||this.length>1?k.cloneNode(true):k)}o.length&&c.each(o,Qa)}return this}});c.fragments={};c.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){c.fn[a]=function(d){var f=[];d=c(d);var e=this.length===1&&this[0].parentNode;if(e&&e.nodeType===11&&e.childNodes.length===1&&d.length===1){d[b](this[0]); -return this}else{e=0;for(var j=d.length;e0?this.clone(true):this).get();c.fn[b].apply(c(d[e]),i);f=f.concat(i)}return this.pushStack(f,a,d.selector)}}});c.extend({clean:function(a,b,d,f){b=b||s;if(typeof b.createElement==="undefined")b=b.ownerDocument||b[0]&&b[0].ownerDocument||s;for(var e=[],j=0,i;(i=a[j])!=null;j++){if(typeof i==="number")i+="";if(i){if(typeof i==="string"&&!jb.test(i))i=b.createTextNode(i);else if(typeof i==="string"){i=i.replace(Ka,Ma);var o=(La.exec(i)||["", -""])[1].toLowerCase(),k=F[o]||F._default,n=k[0],r=b.createElement("div");for(r.innerHTML=k[1]+i+k[2];n--;)r=r.lastChild;if(!c.support.tbody){n=ib.test(i);o=o==="table"&&!n?r.firstChild&&r.firstChild.childNodes:k[1]===""&&!n?r.childNodes:[];for(k=o.length-1;k>=0;--k)c.nodeName(o[k],"tbody")&&!o[k].childNodes.length&&o[k].parentNode.removeChild(o[k])}!c.support.leadingWhitespace&&V.test(i)&&r.insertBefore(b.createTextNode(V.exec(i)[0]),r.firstChild);i=r.childNodes}if(i.nodeType)e.push(i);else e= -c.merge(e,i)}}if(d)for(j=0;e[j];j++)if(f&&c.nodeName(e[j],"script")&&(!e[j].type||e[j].type.toLowerCase()==="text/javascript"))f.push(e[j].parentNode?e[j].parentNode.removeChild(e[j]):e[j]);else{e[j].nodeType===1&&e.splice.apply(e,[j+1,0].concat(c.makeArray(e[j].getElementsByTagName("script"))));d.appendChild(e[j])}return e},cleanData:function(a){for(var b,d,f=c.cache,e=c.event.special,j=c.support.deleteExpando,i=0,o;(o=a[i])!=null;i++)if(d=o[c.expando]){b=f[d];if(b.events)for(var k in b.events)e[k]? -c.event.remove(o,k):Ca(o,k,b.handle);if(j)delete o[c.expando];else o.removeAttribute&&o.removeAttribute(c.expando);delete f[d]}}});var kb=/z-?index|font-?weight|opacity|zoom|line-?height/i,Na=/alpha\([^)]*\)/,Oa=/opacity=([^)]*)/,ha=/float/i,ia=/-([a-z])/ig,lb=/([A-Z])/g,mb=/^-?\d+(?:px)?$/i,nb=/^-?\d/,ob={position:"absolute",visibility:"hidden",display:"block"},pb=["Left","Right"],qb=["Top","Bottom"],rb=s.defaultView&&s.defaultView.getComputedStyle,Pa=c.support.cssFloat?"cssFloat":"styleFloat",ja= -function(a,b){return b.toUpperCase()};c.fn.css=function(a,b){return X(this,a,b,true,function(d,f,e){if(e===w)return c.curCSS(d,f);if(typeof e==="number"&&!kb.test(f))e+="px";c.style(d,f,e)})};c.extend({style:function(a,b,d){if(!a||a.nodeType===3||a.nodeType===8)return w;if((b==="width"||b==="height")&&parseFloat(d)<0)d=w;var f=a.style||a,e=d!==w;if(!c.support.opacity&&b==="opacity"){if(e){f.zoom=1;b=parseInt(d,10)+""==="NaN"?"":"alpha(opacity="+d*100+")";a=f.filter||c.curCSS(a,"filter")||"";f.filter= -Na.test(a)?a.replace(Na,b):b}return f.filter&&f.filter.indexOf("opacity=")>=0?parseFloat(Oa.exec(f.filter)[1])/100+"":""}if(ha.test(b))b=Pa;b=b.replace(ia,ja);if(e)f[b]=d;return f[b]},css:function(a,b,d,f){if(b==="width"||b==="height"){var e,j=b==="width"?pb:qb;function i(){e=b==="width"?a.offsetWidth:a.offsetHeight;f!=="border"&&c.each(j,function(){f||(e-=parseFloat(c.curCSS(a,"padding"+this,true))||0);if(f==="margin")e+=parseFloat(c.curCSS(a,"margin"+this,true))||0;else e-=parseFloat(c.curCSS(a, -"border"+this+"Width",true))||0})}a.offsetWidth!==0?i():c.swap(a,ob,i);return Math.max(0,Math.round(e))}return c.curCSS(a,b,d)},curCSS:function(a,b,d){var f,e=a.style;if(!c.support.opacity&&b==="opacity"&&a.currentStyle){f=Oa.test(a.currentStyle.filter||"")?parseFloat(RegExp.$1)/100+"":"";return f===""?"1":f}if(ha.test(b))b=Pa;if(!d&&e&&e[b])f=e[b];else if(rb){if(ha.test(b))b="float";b=b.replace(lb,"-$1").toLowerCase();e=a.ownerDocument.defaultView;if(!e)return null;if(a=e.getComputedStyle(a,null))f= -a.getPropertyValue(b);if(b==="opacity"&&f==="")f="1"}else if(a.currentStyle){d=b.replace(ia,ja);f=a.currentStyle[b]||a.currentStyle[d];if(!mb.test(f)&&nb.test(f)){b=e.left;var j=a.runtimeStyle.left;a.runtimeStyle.left=a.currentStyle.left;e.left=d==="fontSize"?"1em":f||0;f=e.pixelLeft+"px";e.left=b;a.runtimeStyle.left=j}}return f},swap:function(a,b,d){var f={};for(var e in b){f[e]=a.style[e];a.style[e]=b[e]}d.call(a);for(e in b)a.style[e]=f[e]}});if(c.expr&&c.expr.filters){c.expr.filters.hidden=function(a){var b= -a.offsetWidth,d=a.offsetHeight,f=a.nodeName.toLowerCase()==="tr";return b===0&&d===0&&!f?true:b>0&&d>0&&!f?false:c.curCSS(a,"display")==="none"};c.expr.filters.visible=function(a){return!c.expr.filters.hidden(a)}}var sb=J(),tb=//gi,ub=/select|textarea/i,vb=/color|date|datetime|email|hidden|month|number|password|range|search|tel|text|time|url|week/i,N=/=\?(&|$)/,ka=/\?/,wb=/(\?|&)_=.*?(&|$)/,xb=/^(\w+:)?\/\/([^\/?#]+)/,yb=/%20/g,zb=c.fn.load;c.fn.extend({load:function(a,b,d){if(typeof a!== -"string")return zb.call(this,a);else if(!this.length)return this;var f=a.indexOf(" ");if(f>=0){var e=a.slice(f,a.length);a=a.slice(0,f)}f="GET";if(b)if(c.isFunction(b)){d=b;b=null}else if(typeof b==="object"){b=c.param(b,c.ajaxSettings.traditional);f="POST"}var j=this;c.ajax({url:a,type:f,dataType:"html",data:b,complete:function(i,o){if(o==="success"||o==="notmodified")j.html(e?c("
      ").append(i.responseText.replace(tb,"")).find(e):i.responseText);d&&j.each(d,[i.responseText,o,i])}});return this}, -serialize:function(){return c.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?c.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||ub.test(this.nodeName)||vb.test(this.type))}).map(function(a,b){a=c(this).val();return a==null?null:c.isArray(a)?c.map(a,function(d){return{name:b.name,value:d}}):{name:b.name,value:a}}).get()}});c.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "), -function(a,b){c.fn[b]=function(d){return this.bind(b,d)}});c.extend({get:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b=null}return c.ajax({type:"GET",url:a,data:b,success:d,dataType:f})},getScript:function(a,b){return c.get(a,null,b,"script")},getJSON:function(a,b,d){return c.get(a,b,d,"json")},post:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b={}}return c.ajax({type:"POST",url:a,data:b,success:d,dataType:f})},ajaxSetup:function(a){c.extend(c.ajaxSettings,a)},ajaxSettings:{url:location.href, -global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,xhr:A.XMLHttpRequest&&(A.location.protocol!=="file:"||!A.ActiveXObject)?function(){return new A.XMLHttpRequest}:function(){try{return new A.ActiveXObject("Microsoft.XMLHTTP")}catch(a){}},accepts:{xml:"application/xml, text/xml",html:"text/html",script:"text/javascript, application/javascript",json:"application/json, text/javascript",text:"text/plain",_default:"*/*"}},lastModified:{},etag:{},ajax:function(a){function b(){e.success&& -e.success.call(k,o,i,x);e.global&&f("ajaxSuccess",[x,e])}function d(){e.complete&&e.complete.call(k,x,i);e.global&&f("ajaxComplete",[x,e]);e.global&&!--c.active&&c.event.trigger("ajaxStop")}function f(q,p){(e.context?c(e.context):c.event).trigger(q,p)}var e=c.extend(true,{},c.ajaxSettings,a),j,i,o,k=a&&a.context||e,n=e.type.toUpperCase();if(e.data&&e.processData&&typeof e.data!=="string")e.data=c.param(e.data,e.traditional);if(e.dataType==="jsonp"){if(n==="GET")N.test(e.url)||(e.url+=(ka.test(e.url)? -"&":"?")+(e.jsonp||"callback")+"=?");else if(!e.data||!N.test(e.data))e.data=(e.data?e.data+"&":"")+(e.jsonp||"callback")+"=?";e.dataType="json"}if(e.dataType==="json"&&(e.data&&N.test(e.data)||N.test(e.url))){j=e.jsonpCallback||"jsonp"+sb++;if(e.data)e.data=(e.data+"").replace(N,"="+j+"$1");e.url=e.url.replace(N,"="+j+"$1");e.dataType="script";A[j]=A[j]||function(q){o=q;b();d();A[j]=w;try{delete A[j]}catch(p){}z&&z.removeChild(C)}}if(e.dataType==="script"&&e.cache===null)e.cache=false;if(e.cache=== -false&&n==="GET"){var r=J(),u=e.url.replace(wb,"$1_="+r+"$2");e.url=u+(u===e.url?(ka.test(e.url)?"&":"?")+"_="+r:"")}if(e.data&&n==="GET")e.url+=(ka.test(e.url)?"&":"?")+e.data;e.global&&!c.active++&&c.event.trigger("ajaxStart");r=(r=xb.exec(e.url))&&(r[1]&&r[1]!==location.protocol||r[2]!==location.host);if(e.dataType==="script"&&n==="GET"&&r){var z=s.getElementsByTagName("head")[0]||s.documentElement,C=s.createElement("script");C.src=e.url;if(e.scriptCharset)C.charset=e.scriptCharset;if(!j){var B= -false;C.onload=C.onreadystatechange=function(){if(!B&&(!this.readyState||this.readyState==="loaded"||this.readyState==="complete")){B=true;b();d();C.onload=C.onreadystatechange=null;z&&C.parentNode&&z.removeChild(C)}}}z.insertBefore(C,z.firstChild);return w}var E=false,x=e.xhr();if(x){e.username?x.open(n,e.url,e.async,e.username,e.password):x.open(n,e.url,e.async);try{if(e.data||a&&a.contentType)x.setRequestHeader("Content-Type",e.contentType);if(e.ifModified){c.lastModified[e.url]&&x.setRequestHeader("If-Modified-Since", -c.lastModified[e.url]);c.etag[e.url]&&x.setRequestHeader("If-None-Match",c.etag[e.url])}r||x.setRequestHeader("X-Requested-With","XMLHttpRequest");x.setRequestHeader("Accept",e.dataType&&e.accepts[e.dataType]?e.accepts[e.dataType]+", */*":e.accepts._default)}catch(ga){}if(e.beforeSend&&e.beforeSend.call(k,x,e)===false){e.global&&!--c.active&&c.event.trigger("ajaxStop");x.abort();return false}e.global&&f("ajaxSend",[x,e]);var g=x.onreadystatechange=function(q){if(!x||x.readyState===0||q==="abort"){E|| -d();E=true;if(x)x.onreadystatechange=c.noop}else if(!E&&x&&(x.readyState===4||q==="timeout")){E=true;x.onreadystatechange=c.noop;i=q==="timeout"?"timeout":!c.httpSuccess(x)?"error":e.ifModified&&c.httpNotModified(x,e.url)?"notmodified":"success";var p;if(i==="success")try{o=c.httpData(x,e.dataType,e)}catch(v){i="parsererror";p=v}if(i==="success"||i==="notmodified")j||b();else c.handleError(e,x,i,p);d();q==="timeout"&&x.abort();if(e.async)x=null}};try{var h=x.abort;x.abort=function(){x&&h.call(x); -g("abort")}}catch(l){}e.async&&e.timeout>0&&setTimeout(function(){x&&!E&&g("timeout")},e.timeout);try{x.send(n==="POST"||n==="PUT"||n==="DELETE"?e.data:null)}catch(m){c.handleError(e,x,null,m);d()}e.async||g();return x}},handleError:function(a,b,d,f){if(a.error)a.error.call(a.context||a,b,d,f);if(a.global)(a.context?c(a.context):c.event).trigger("ajaxError",[b,a,f])},active:0,httpSuccess:function(a){try{return!a.status&&location.protocol==="file:"||a.status>=200&&a.status<300||a.status===304||a.status=== -1223||a.status===0}catch(b){}return false},httpNotModified:function(a,b){var d=a.getResponseHeader("Last-Modified"),f=a.getResponseHeader("Etag");if(d)c.lastModified[b]=d;if(f)c.etag[b]=f;return a.status===304||a.status===0},httpData:function(a,b,d){var f=a.getResponseHeader("content-type")||"",e=b==="xml"||!b&&f.indexOf("xml")>=0;a=e?a.responseXML:a.responseText;e&&a.documentElement.nodeName==="parsererror"&&c.error("parsererror");if(d&&d.dataFilter)a=d.dataFilter(a,b);if(typeof a==="string")if(b=== -"json"||!b&&f.indexOf("json")>=0)a=c.parseJSON(a);else if(b==="script"||!b&&f.indexOf("javascript")>=0)c.globalEval(a);return a},param:function(a,b){function d(i,o){if(c.isArray(o))c.each(o,function(k,n){b||/\[\]$/.test(i)?f(i,n):d(i+"["+(typeof n==="object"||c.isArray(n)?k:"")+"]",n)});else!b&&o!=null&&typeof o==="object"?c.each(o,function(k,n){d(i+"["+k+"]",n)}):f(i,o)}function f(i,o){o=c.isFunction(o)?o():o;e[e.length]=encodeURIComponent(i)+"="+encodeURIComponent(o)}var e=[];if(b===w)b=c.ajaxSettings.traditional; -if(c.isArray(a)||a.jquery)c.each(a,function(){f(this.name,this.value)});else for(var j in a)d(j,a[j]);return e.join("&").replace(yb,"+")}});var la={},Ab=/toggle|show|hide/,Bb=/^([+-]=)?([\d+-.]+)(.*)$/,W,va=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]];c.fn.extend({show:function(a,b){if(a||a===0)return this.animate(K("show",3),a,b);else{a=0;for(b=this.length;a").appendTo("body");f=e.css("display");if(f==="none")f="block";e.remove();la[d]=f}c.data(this[a],"olddisplay",f)}}a=0;for(b=this.length;a=0;f--)if(d[f].elem===this){b&&d[f](true);d.splice(f,1)}});b||this.dequeue();return this}});c.each({slideDown:K("show",1),slideUp:K("hide",1),slideToggle:K("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"}},function(a,b){c.fn[a]=function(d,f){return this.animate(b,d,f)}});c.extend({speed:function(a,b,d){var f=a&&typeof a==="object"?a:{complete:d||!d&&b||c.isFunction(a)&&a,duration:a,easing:d&&b||b&&!c.isFunction(b)&&b};f.duration=c.fx.off?0:typeof f.duration=== -"number"?f.duration:c.fx.speeds[f.duration]||c.fx.speeds._default;f.old=f.complete;f.complete=function(){f.queue!==false&&c(this).dequeue();c.isFunction(f.old)&&f.old.call(this)};return f},easing:{linear:function(a,b,d,f){return d+f*a},swing:function(a,b,d,f){return(-Math.cos(a*Math.PI)/2+0.5)*f+d}},timers:[],fx:function(a,b,d){this.options=b;this.elem=a;this.prop=d;if(!b.orig)b.orig={}}});c.fx.prototype={update:function(){this.options.step&&this.options.step.call(this.elem,this.now,this);(c.fx.step[this.prop]|| -c.fx.step._default)(this);if((this.prop==="height"||this.prop==="width")&&this.elem.style)this.elem.style.display="block"},cur:function(a){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null))return this.elem[this.prop];return(a=parseFloat(c.css(this.elem,this.prop,a)))&&a>-10000?a:parseFloat(c.curCSS(this.elem,this.prop))||0},custom:function(a,b,d){function f(j){return e.step(j)}this.startTime=J();this.start=a;this.end=b;this.unit=d||this.unit||"px";this.now=this.start; -this.pos=this.state=0;var e=this;f.elem=this.elem;if(f()&&c.timers.push(f)&&!W)W=setInterval(c.fx.tick,13)},show:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.show=true;this.custom(this.prop==="width"||this.prop==="height"?1:0,this.cur());c(this.elem).show()},hide:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.hide=true;this.custom(this.cur(),0)},step:function(a){var b=J(),d=true;if(a||b>=this.options.duration+this.startTime){this.now= -this.end;this.pos=this.state=1;this.update();this.options.curAnim[this.prop]=true;for(var f in this.options.curAnim)if(this.options.curAnim[f]!==true)d=false;if(d){if(this.options.display!=null){this.elem.style.overflow=this.options.overflow;a=c.data(this.elem,"olddisplay");this.elem.style.display=a?a:this.options.display;if(c.css(this.elem,"display")==="none")this.elem.style.display="block"}this.options.hide&&c(this.elem).hide();if(this.options.hide||this.options.show)for(var e in this.options.curAnim)c.style(this.elem, -e,this.options.orig[e]);this.options.complete.call(this.elem)}return false}else{e=b-this.startTime;this.state=e/this.options.duration;a=this.options.easing||(c.easing.swing?"swing":"linear");this.pos=c.easing[this.options.specialEasing&&this.options.specialEasing[this.prop]||a](this.state,e,0,1,this.options.duration);this.now=this.start+(this.end-this.start)*this.pos;this.update()}return true}};c.extend(c.fx,{tick:function(){for(var a=c.timers,b=0;b
      "; -a.insertBefore(b,a.firstChild);d=b.firstChild;f=d.firstChild;e=d.nextSibling.firstChild.firstChild;this.doesNotAddBorder=f.offsetTop!==5;this.doesAddBorderForTableAndCells=e.offsetTop===5;f.style.position="fixed";f.style.top="20px";this.supportsFixedPosition=f.offsetTop===20||f.offsetTop===15;f.style.position=f.style.top="";d.style.overflow="hidden";d.style.position="relative";this.subtractsBorderForOverflowNotVisible=f.offsetTop===-5;this.doesNotIncludeMarginInBodyOffset=a.offsetTop!==j;a.removeChild(b); -c.offset.initialize=c.noop},bodyOffset:function(a){var b=a.offsetTop,d=a.offsetLeft;c.offset.initialize();if(c.offset.doesNotIncludeMarginInBodyOffset){b+=parseFloat(c.curCSS(a,"marginTop",true))||0;d+=parseFloat(c.curCSS(a,"marginLeft",true))||0}return{top:b,left:d}},setOffset:function(a,b,d){if(/static/.test(c.curCSS(a,"position")))a.style.position="relative";var f=c(a),e=f.offset(),j=parseInt(c.curCSS(a,"top",true),10)||0,i=parseInt(c.curCSS(a,"left",true),10)||0;if(c.isFunction(b))b=b.call(a, -d,e);d={top:b.top-e.top+j,left:b.left-e.left+i};"using"in b?b.using.call(a,d):f.css(d)}};c.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),d=this.offset(),f=/^body|html$/i.test(b[0].nodeName)?{top:0,left:0}:b.offset();d.top-=parseFloat(c.curCSS(a,"marginTop",true))||0;d.left-=parseFloat(c.curCSS(a,"marginLeft",true))||0;f.top+=parseFloat(c.curCSS(b[0],"borderTopWidth",true))||0;f.left+=parseFloat(c.curCSS(b[0],"borderLeftWidth",true))||0;return{top:d.top- -f.top,left:d.left-f.left}},offsetParent:function(){return this.map(function(){for(var a=this.offsetParent||s.body;a&&!/^body|html$/i.test(a.nodeName)&&c.css(a,"position")==="static";)a=a.offsetParent;return a})}});c.each(["Left","Top"],function(a,b){var d="scroll"+b;c.fn[d]=function(f){var e=this[0],j;if(!e)return null;if(f!==w)return this.each(function(){if(j=wa(this))j.scrollTo(!a?f:c(j).scrollLeft(),a?f:c(j).scrollTop());else this[d]=f});else return(j=wa(e))?"pageXOffset"in j?j[a?"pageYOffset": -"pageXOffset"]:c.support.boxModel&&j.document.documentElement[d]||j.document.body[d]:e[d]}});c.each(["Height","Width"],function(a,b){var d=b.toLowerCase();c.fn["inner"+b]=function(){return this[0]?c.css(this[0],d,false,"padding"):null};c.fn["outer"+b]=function(f){return this[0]?c.css(this[0],d,false,f?"margin":"border"):null};c.fn[d]=function(f){var e=this[0];if(!e)return f==null?null:this;if(c.isFunction(f))return this.each(function(j){var i=c(this);i[d](f.call(this,j,i[d]()))});return"scrollTo"in -e&&e.document?e.document.compatMode==="CSS1Compat"&&e.document.documentElement["client"+b]||e.document.body["client"+b]:e.nodeType===9?Math.max(e.documentElement["client"+b],e.body["scroll"+b],e.documentElement["scroll"+b],e.body["offset"+b],e.documentElement["offset"+b]):f===w?c.css(e,d):this.css(d,typeof f==="string"?f:f+"px")}});A.jQuery=A.$=c})(window); diff --git a/mapreduce/static/overview.html b/mapreduce/static/overview.html deleted file mode 100755 index 8063ffc..0000000 --- a/mapreduce/static/overview.html +++ /dev/null @@ -1,64 +0,0 @@ - - - - MapReduce Overview - - - - - - - - - -

      MapReduce Overview

      - -
      -

      Running jobs

      - - - - - - - - - - - - - - - - - - - - - - - - - -
      StatusViewIDNameActivityStart timeTime elapsedControl
      - - -
      Loading...
      -
      - - -
      -

      Launch job

      -
      - Loading... -
      -
      -
      -
      - - - - - diff --git a/mapreduce/static/status.js b/mapreduce/static/status.js deleted file mode 100755 index b2ad852..0000000 --- a/mapreduce/static/status.js +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Copyright 2010 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/********* Common functions *********/ - -// Sets the status butter, optionally indicating if it's an error message. -function setButter(message, error) { - var butter = $("#butter"); - // Prevent flicker on butter update by hiding it first. - butter.css('display', 'none'); - if (error) { - butter.removeClass('info').addClass('error').text(message); - } else { - butter.removeClass('error').addClass('info').text(message); - } - butter.css('display', null) - $(document).scrollTop(0); -} - -// Given an AJAX error message (which is empty or null on success) and a -// data payload containing JSON, parses the data payload and returns the object. -// Server-side errors and AJAX errors will be brought to the user's attention -// if present in the response object -function getResponseDataJson(error, data) { - var response = null; - try { - response = $.parseJSON(data); - } catch (e) { - error = '' + e; - } - if (response && response.error_class) { - error = response.error_class + ': ' + response.error_message; - } else if (!response) { - error = 'Could not parse response JSON data.'; - } - if (error) { - setButter('Error -- ' + error, true); - return null; - } - return response; -} - -// Retrieve the list of configs. -function listConfigs(resultFunc) { - $.ajax({ - type: 'GET', - url: 'command/list_configs', - dataType: 'text', - error: function(request, textStatus) { - getResponseDataJson(textStatus); - }, - success: function(data, textStatus, request) { - var response = getResponseDataJson(null, data); - if (response) { - resultFunc(response.configs); - } - } - }); -} - -// Return the list of job records. -function listJobs(cursor, resultFunc) { - $.ajax({ - type: 'GET', - url: 'command/list_jobs', - dataType: 'text', - error: function(request, textStatus) { - getResponseDataJson(textStatus); - }, - success: function(data, textStatus, request) { - var response = getResponseDataJson(null, data); - if (response) { - resultFunc(response.jobs, response.cursor); - } - } - }); -} - -// Cleans up a job with the given name and ID, updates butter with status. -function cleanUpJob(name, mapreduce_id) { - if (!confirm('Clean up job "' + name + - '" with ID "' + mapreduce_id + '"?')) { - return; - } - - $.ajax({ - async: false, - type: 'POST', - url: 'command/cleanup_job', - data: {'mapreduce_id': mapreduce_id}, - dataType: 'text', - error: function(request, textStatus) { - getResponseDataJson(textStatus); - }, - success: function(data, textStatus, request) { - var response = getResponseDataJson(null, data); - if (response) { - setButter(response.status); - if (!response.status.error) { - $('#row-' + mapreduce_id).remove(); - } - } - } - }); -} - -// Aborts the job with the given ID, updates butter with status. -function abortJob(name, mapreduce_id) { - if (!confirm('Abort job "' + name + '" with ID "' + mapreduce_id + '"?')) { - return; - } - - $.ajax({ - async: false, - type: 'POST', - url: 'command/abort_job', - data: {'mapreduce_id': mapreduce_id}, - dataType: 'text', - error: function(request, textStatus) { - getResponseDataJson(textStatus); - }, - success: function(data, textStatus, request) { - var response = getResponseDataJson(null, data); - if (response) { - setButter(response.status); - } - } - }); -} - -// Retrieve the detail for a job. -function getJobDetail(jobId, resultFunc) { - $.ajax({ - type: 'GET', - url: 'command/get_job_detail', - dataType: 'text', - data: {'mapreduce_id': jobId}, - error: function(request, textStatus) { - getResponseDataJson(textStatus); - }, - success: function(data, textStatus, request) { - var response = getResponseDataJson(null, data); - if (response) { - resultFunc(jobId, response); - } - } - }); -} - -// Turns a key into a nicely scrubbed parameter name. -function getNiceParamKey(key) { - // TODO: Figure out if we want to do this at all. - return key; -} - -// Returns an array of the keys of an object in sorted order. -function getSortedKeys(obj) { - var keys = []; - $.each(obj, function(key, value) { - keys.push(key); - }); - keys.sort(); - return keys; -} - -// Gets a local datestring from a UNIX timestamp in milliseconds. -function getLocalTimestring(timestamp_ms) { - var when = new Date(); - when.setTime(timestamp_ms); - return when.toLocaleString(); -} - -function leftPadNumber(number, minSize, paddingChar) { - var stringified = '' + number; - if (stringified.length < minSize) { - for (var i = 0; i < (minSize - stringified.length); ++i) { - stringified = paddingChar + stringified; - } - } - return stringified; -} - -// Get locale time string for time portion of job runtime. Specially -// handle number of days running as a prefix. -function getElapsedTimeString(start_timestamp_ms, updated_timestamp_ms) { - var updatedDiff = updated_timestamp_ms - start_timestamp_ms; - var updatedDays = Math.floor(updatedDiff / 86400000.0); - updatedDiff -= (updatedDays * 86400000.0); - var updatedHours = Math.floor(updatedDiff / 3600000.0); - updatedDiff -= (updatedHours * 3600000.0); - var updatedMinutes = Math.floor(updatedDiff / 60000.0); - updatedDiff -= (updatedMinutes * 60000.0); - var updatedSeconds = Math.floor(updatedDiff / 1000.0); - - var updatedString = ''; - if (updatedDays == 1) { - updatedString = '1 day, '; - } else if (updatedDays > 1) { - updatedString = '' + updatedDays + ' days, '; - } - updatedString += - leftPadNumber(updatedHours, 2, '0') + ':' + - leftPadNumber(updatedMinutes, 2, '0') + ':' + - leftPadNumber(updatedSeconds, 2, '0'); - - return updatedString; -} - -// Retrieves the mapreduce_id from the query string. Assumes that it is -// the only querystring parameter. -function getJobId() { - var index = window.location.search.lastIndexOf("="); - if (index == -1) { - return ''; - } - return decodeURIComponent(window.location.search.substr(index+1)); -} - -/********* Specific to overview status page *********/ - -//////// Running jobs overview. -function initJobOverview(jobs, cursor) { - // Empty body. - var body = $('#running-list > tbody'); - body.empty(); - - if (!jobs || (jobs && jobs.length == 0)) { - $('').text("No job records found.").appendTo(body); - return; - } - - // Show header. - $('#running-list > thead').css('display', null); - - // Populate the table. - $.each(jobs, function(index, job) { - var row = $(''); - - // TODO: Style running colgroup for capitalization. - var status = (job.active ? 'running' : job.result_status) || 'unknown'; - row.append($('').text(status)); - - $('').append( - $('') - .attr('href', 'detail?mapreduce_id=' + job.mapreduce_id) - .text('Detail')).appendTo(row); - - row.append($('').text(job.mapreduce_id)) - .append($('').text(job.name)); - - var activity = '' + job.active_shards + ' / ' + job.shards + ' shards'; - row.append($('').text(activity)) - - row.append($('').text(getLocalTimestring(job.start_timestamp_ms))); - - row.append($('').text(getElapsedTimeString( - job.start_timestamp_ms, job.updated_timestamp_ms))); - - // Controller links for abort, cleanup, etc. - if (job.active) { - var control = $('').text('Abort') - .click(function(event) { - abortJob(job.name, job.mapreduce_id); - event.stopPropagation(); - return false; - }); - row.append($('').append(control)); - } else { - var control = $('').text('Cleanup') - .click(function(event) { - cleanUpJob(job.name, job.mapreduce_id); - event.stopPropagation(); - return false; - }); - row.append($('').append(control)); - } - row.appendTo(body); - }); - - // Set up the next/first page links. - $('#running-first-page') - .css('display', null) - .unbind('click') - .click(function() { - listJobs(null, initJobOverview); - return false; - }); - $('#running-next-page').unbind('click'); - if (cursor) { - $('#running-next-page') - .css('display', null) - .click(function() { - listJobs(cursor, initJobOverview); - return false; - }); - } else { - $('#running-next-page').css('display', 'none'); - } - $('#running-list > tfoot').css('display', null); -} - -//////// Launching jobs. - -var FIXED_JOB_PARAMS = [ - 'name', 'mapper_input_reader', 'mapper_handler', 'mapper_params_validator' -]; - -var EDITABLE_JOB_PARAMS = ['shard_count', 'processing_rate', 'queue_name']; - -function getJobForm(name) { - return $('form.run-job > input[name="name"][value="' + name + '"]').parent(); -} - -function showRunJobConfig(name) { - var matchedForm = null; - $.each($('form.run-job'), function(index, jobForm) { - if ($(jobForm).find('input[name="name"]').val() == name) { - matchedForm = jobForm; - } else { - $(jobForm).css('display', 'none'); - } - }); - $(matchedForm).css('display', null); -} - -function runJobDone(name, error, data) { - var jobForm = getJobForm(name); - var response = getResponseDataJson(error, data); - if (response) { - setButter('Successfully started job "' + response['mapreduce_id'] + '"'); - listJobs(null, initJobOverview); - } - jobForm.find('input[type="submit"]').attr('disabled', null); -} - -function runJob(name) { - var jobForm = getJobForm(name); - jobForm.find('input[type="submit"]').attr('disabled', 'disabled'); - $.ajax({ - type: 'POST', - url: 'command/start_job', - data: jobForm.serialize(), - dataType: 'text', - error: function(request, textStatus) { - runJobDone(name, textStatus); - }, - success: function(data, textStatus, request) { - runJobDone(name, null, data); - } - }); -} - -function initJobLaunching(configs) { - $('#launch-control').empty(); - if (!configs || (configs && configs.length == 0)) { - $('#launch-control').append('No job configurations found.'); - return; - } - - // Set up job config forms. - $.each(configs, function(index, config) { - var jobForm = $('
      ') - .submit(function() { - runJob(config.name); - return false; - }) - .css('display', 'none') - .appendTo("#launch-container"); - - // Fixed job config values. - $.each(FIXED_JOB_PARAMS, function(unused, key) { - var value = config[key]; - if (!value) return; - if (key != 'name') { - // Name is up in the page title so doesn't need to be shown again. - $('

      ') - .append($('').text(getNiceParamKey(key))) - .append($('').text(value)) - .appendTo(jobForm); - } - $('') - .attr('name', key) - .attr('value', value) - .appendTo(jobForm); - }); - - // Add parameter values to the job form. - function addParameters(params, prefix) { - if (!params) { - return; - } - - var sortedParams = getSortedKeys(params); - $.each(sortedParams, function(index, key) { - var value = params[key]; - var paramId = 'job-' + prefix + key + '-param'; - var paramP = $('

      '); - - // Deal with the case in which the value is an object rather than - // just the default value string. - var prettyKey = key; - if (value && value["human_name"]) { - prettyKey = value["human_name"]; - } - - if (value && value["default_value"]) { - value = value["default_value"]; - } - - $('Calendars
      - - -

        -{% for calendar in calendars %} -
      • {{calendar.title}}(edit) -
        - - -
        -
      • - - -{% else %} - -
      • No calendars
      • -{% endfor %} -
      - -{% endblock %} \ No newline at end of file diff --git a/django_templates/email/manage_subscription.txt b/templates/email/manage_subscription.txt similarity index 100% rename from django_templates/email/manage_subscription.txt rename to templates/email/manage_subscription.txt diff --git a/django_templates/email/verify_subscription.txt b/templates/email/verify_subscription.txt similarity index 100% rename from django_templates/email/verify_subscription.txt rename to templates/email/verify_subscription.txt diff --git a/django_templates/events/add.html b/templates/events/add.html similarity index 100% rename from django_templates/events/add.html rename to templates/events/add.html diff --git a/django_templates/events/edit_form.html b/templates/events/edit_form.html similarity index 100% rename from django_templates/events/edit_form.html rename to templates/events/edit_form.html diff --git a/django_templates/events/events.html b/templates/events/events.html similarity index 100% rename from django_templates/events/events.html rename to templates/events/events.html diff --git a/django_templates/events/one_event.html b/templates/events/one_event.html similarity index 100% rename from django_templates/events/one_event.html rename to templates/events/one_event.html diff --git a/django_templates/events/one_event_newsletter.html b/templates/events/one_event_newsletter.html similarity index 100% rename from django_templates/events/one_event_newsletter.html rename to templates/events/one_event_newsletter.html diff --git a/django_templates/events/one_event_thisweek.html b/templates/events/one_event_thisweek.html similarity index 100% rename from django_templates/events/one_event_thisweek.html rename to templates/events/one_event_thisweek.html diff --git a/django_templates/events/queue.html b/templates/events/queue.html similarity index 100% rename from django_templates/events/queue.html rename to templates/events/queue.html diff --git a/django_templates/eventsite-safe/addevent.html b/templates/eventsite-safe/addevent.html similarity index 100% rename from django_templates/eventsite-safe/addevent.html rename to templates/eventsite-safe/addevent.html diff --git a/django_templates/eventsite-safe/admin.html b/templates/eventsite-safe/admin.html similarity index 100% rename from django_templates/eventsite-safe/admin.html rename to templates/eventsite-safe/admin.html diff --git a/django_templates/eventsite/admin.html b/templates/eventsite/admin.html similarity index 100% rename from django_templates/eventsite/admin.html rename to templates/eventsite/admin.html diff --git a/django_templates/eventsite/front-page.html b/templates/eventsite/front-page.html similarity index 100% rename from django_templates/eventsite/front-page.html rename to templates/eventsite/front-page.html diff --git a/django_templates/eventsite/front_page_this_week_header.html b/templates/eventsite/front_page_this_week_header.html similarity index 100% rename from django_templates/eventsite/front_page_this_week_header.html rename to templates/eventsite/front_page_this_week_header.html diff --git a/django_templates/eventsite/jump.html b/templates/eventsite/jump.html similarity index 100% rename from django_templates/eventsite/jump.html rename to templates/eventsite/jump.html diff --git a/django_templates/eventsite/newsletter.html b/templates/eventsite/newsletter.html similarity index 100% rename from django_templates/eventsite/newsletter.html rename to templates/eventsite/newsletter.html diff --git a/django_templates/eventsite/sidebar.html b/templates/eventsite/sidebar.html similarity index 100% rename from django_templates/eventsite/sidebar.html rename to templates/eventsite/sidebar.html diff --git a/django_templates/eventsite/tagpage.html b/templates/eventsite/tagpage.html similarity index 100% rename from django_templates/eventsite/tagpage.html rename to templates/eventsite/tagpage.html diff --git a/django_templates/eventsite/week.html b/templates/eventsite/week.html similarity index 100% rename from django_templates/eventsite/week.html rename to templates/eventsite/week.html diff --git a/django_templates/eventsite/week.xml b/templates/eventsite/week.xml similarity index 100% rename from django_templates/eventsite/week.xml rename to templates/eventsite/week.xml diff --git a/django_templates/feeds/latest_description.html b/templates/feeds/latest_description.html similarity index 100% rename from django_templates/feeds/latest_description.html rename to templates/feeds/latest_description.html diff --git a/templates/form_macros.html b/templates/form_macros.html deleted file mode 100755 index 17d33db..0000000 --- a/templates/form_macros.html +++ /dev/null @@ -1,79 +0,0 @@ -{%- macro form_field_label(field) -%} - -{% endmacro %} - -{%- macro form_field_description(field) -%} - {% if field.description %} - {{ field.description }} - {% endif %} -{%- endmacro -%} - -{%- macro form_field_errors(field) -%} - {% if field.errors %} -
        - {%- for error in field.errors -%} -
      • {{ error }}
      • - {%- endfor -%} -
      - {% endif %} -{%- endmacro -%} - -{%- macro form_field_boolean(field) -%} - {{ field(**kwargs) }} - {{ form_field_label(field) }} - {{ form_field_description(field) }} - {{ form_field_errors(field) }} -{%- endmacro -%} - -{%- macro form_field(field) -%} - {% if field.type == 'BooleanField' %} - {{ form_field_boolean(field, **kwargs) }} - {% else%} - {{ form_field_label(field) }} - {% if field.type == 'RadioField' %} - {{ field(class='radio-group', **kwargs) }} - {% else %} - {{ field(**kwargs) }} - {% endif %} - {{ form_field_description(field) }} - {{ form_field_errors(field) }} - {% endif %} -{%- endmacro -%} - -{%- macro form_field_td(field) -%} - {% if field.type == 'BooleanField' %} - - - {{ form_field_boolean(field, **kwargs) }} - - {% else %} - - {{ form_field_label(field) }} - - - {% if field.type == 'RadioField' %} - {{ field(class='radio-group', **kwargs) }} - {% else %} - {{ field(**kwargs) }} - {% endif %} - {{ form_field_description(field) }} - {{ form_field_errors(field) }} - - {% endif %} -{%- endmacro -%} - -{%- macro form_fields(fields) -%} - {% for field in fields %} - {% if field.type == 'HiddenField' %} - {{ field() }} - {% endif %} - {% endfor %} -
        - {% for field in fields %} - {% if field.type != 'HiddenField' %} -
      1. {{ form_field(field) }}
      2. - {% endif %} - {% endfor %} -
      -{%- endmacro -%} \ No newline at end of file diff --git a/django_templates/forms.html b/templates/forms.html similarity index 100% rename from django_templates/forms.html rename to templates/forms.html diff --git a/templates/layout.html b/templates/layout.html deleted file mode 100755 index c0d56f3..0000000 --- a/templates/layout.html +++ /dev/null @@ -1,68 +0,0 @@ -{% extends "base.html" %} - -{% block layoutextra %} - {% block title %} - {% endblock %} - - - - - - - - - - - - - - - - - - - - -{% endblock %} - -{% block headextra %} - -{% endblock %} - - -{% block body %} -
      -

      Submit a community link

      -
      - -
      - - -{% block content %} - - -{% endblock content %} - - - -
      - - -{% endblock body %} - diff --git a/templates/link/add.html b/templates/link/add.html deleted file mode 100755 index 132c9ba..0000000 --- a/templates/link/add.html +++ /dev/null @@ -1,17 +0,0 @@ -{% extends "layout.html" %} - -{% from 'form_macros.html' import form_field_td %} - - -{% block content %} - -
      - - {{ form_field_td(form.name) }} - {{ form_field_td(form.href) }} -
      - -
      - - -{% endblock %} \ No newline at end of file diff --git a/templates/link/review.html b/templates/link/review.html deleted file mode 100755 index c136bcb..0000000 --- a/templates/link/review.html +++ /dev/null @@ -1,11 +0,0 @@ -{% for link in pending_links %} - \ No newline at end of file diff --git a/django_templates/newsletter-admin/upcoming.html b/templates/newsletter-admin/upcoming.html similarity index 100% rename from django_templates/newsletter-admin/upcoming.html rename to templates/newsletter-admin/upcoming.html diff --git a/django_templates/sources/add.html b/templates/sources/add.html similarity index 100% rename from django_templates/sources/add.html rename to templates/sources/add.html diff --git a/django_templates/sources/icalendar_manage_listing.html b/templates/sources/icalendar_manage_listing.html similarity index 100% rename from django_templates/sources/icalendar_manage_listing.html rename to templates/sources/icalendar_manage_listing.html diff --git a/django_templates/sources/index.html b/templates/sources/index.html similarity index 100% rename from django_templates/sources/index.html rename to templates/sources/index.html diff --git a/django_templates/sources/index.opml b/templates/sources/index.opml similarity index 100% rename from django_templates/sources/index.opml rename to templates/sources/index.opml diff --git a/django_templates/sources/manage.html b/templates/sources/manage.html similarity index 100% rename from django_templates/sources/manage.html rename to templates/sources/manage.html diff --git a/django_templates/subscriptions/manage.html b/templates/subscriptions/manage.html similarity index 100% rename from django_templates/subscriptions/manage.html rename to templates/subscriptions/manage.html diff --git a/django_templates/subscriptions/new.html b/templates/subscriptions/new.html similarity index 100% rename from django_templates/subscriptions/new.html rename to templates/subscriptions/new.html diff --git a/django_templates/subscriptions/recover.html b/templates/subscriptions/recover.html similarity index 100% rename from django_templates/subscriptions/recover.html rename to templates/subscriptions/recover.html diff --git a/django_templates/subscriptions/thankyou.html b/templates/subscriptions/thankyou.html similarity index 100% rename from django_templates/subscriptions/thankyou.html rename to templates/subscriptions/thankyou.html diff --git a/django_templates/subscriptions/verified_thankyou.html b/templates/subscriptions/verified_thankyou.html similarity index 100% rename from django_templates/subscriptions/verified_thankyou.html rename to templates/subscriptions/verified_thankyou.html diff --git a/test.py b/test.py deleted file mode 100755 index 1628a07..0000000 --- a/test.py +++ /dev/null @@ -1,3 +0,0 @@ -from mapreduce import util -util.for_name('migrate.process') - diff --git a/todo.txt b/todo.txt deleted file mode 100755 index e0fd893..0000000 --- a/todo.txt +++ /dev/null @@ -1,10 +0,0 @@ -Priority: -- add event by URL -- Design/typography overhaul - -Annoyances: -- imported descriptions should be available to editors, even if not included -- Events should have a last_edited date -- recurrence-expansion shouldn't create a fresh event every time -- no navigation between weeks - diff --git a/urls.py b/urls.py index cc4a371..7115c44 100755 --- a/urls.py +++ b/urls.py @@ -1,35 +1,28 @@ -# -*- coding: utf-8 -*- -""" - urls - ~~~~ +# Copyright 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. - URL definitions. +from django.conf.urls.defaults import * - :copyright: 2009 by tipfy.org. - :license: BSD, see LICENSE.txt for more details. -""" -from tipfy import Rule, import_string - - -def get_rules(app): - """Returns a list of URL rules for the application. The list can be - defined entirely here or in separate ``urls.py`` files. - - :param app: - The WSGI application instance. - :return: - A list of class:`tipfy.Rule` instances. - """ - # Here we show an example of joining all rules from the - # ``apps_installed`` definition set in config.py. - rules = [] - - for app_module in app.get_config('tipfy', 'apps_installed'): - try: - # Load the urls module from the app and extend our rules. - app_rules = import_string('%s.urls' % app_module) - rules.extend(app_rules.get_rules(app)) - except ImportError: - pass - - return rules +urlpatterns = patterns('', + # Example: + (r'^account/', include('account.urls')), + url(r'^_ah/login_required', 'account.views.signin', name="account-signin"), + (r'^events/', include('events.urls')), + (r'^sources/', include('sources.urls')), + (r'^subscriptions/', include('subscriptions.urls')), + (r'^admin/', include('eventsite.admin.urls')), + (r'^assets/', include('assets.urls')), + (r'^links/', include('links.urls')), + (r'', include('eventsite.urls')), +)