Merge pull request #507 from dimitri-yatsenko/dev

Fix #300, #345 -- correct handling of renamed foreign key attributes in populate and cascading deletes.
datajoint · Oct 25, 2018 · 25b2fec · 25b2fec
2 parents 2f22e31 + b5bd423
commit 25b2fec
Show file tree

Hide file tree

Showing 11 changed files with 104 additions and 70 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,13 +1,23 @@
 ## Release notes
-### 0.10.1 
+
+### 0.11.0 -- Oct 25, 2018
+* Full support of dependencies with renamed attributes using projection syntax (#300, #345, #436, #506, #507)
+* Rename internal class and module names to comply with terminology in documentation (#494, #500)
+* Full support of secondary indexes (#498, 500)
+* ERD no longer shows numbers in nodes corresponding to derived dependencies (#478, #500)
+* Full support of unique and nullable dependencies (#254, #301, #493, #495, #500)
+* Improve memory management in `populate` (#461, #486)
+* Fix query errors and redundancies (#456, #463, #482)
+
+### 0.10.1  -- Aug 28, 2018
 * Fix ERD Tooltip message (#431)
 * Networkx 2.0 support (#443)
 * Fix insert from query with skip_duplicates=True (#451)
 * Sped up queries (#458)
 * Bugfix in restriction of the form (A & B) * B (#463)
 * Improved error messages (#466)
 
-### 0.10.0 -- January 10, 2018 
+### 0.10.0 -- Jan 10, 2018 
 * Deletes are more efficient (#424)
 * ERD shows table definition on tooltip hover in Jupyter (#422) 
 * S3 external storage
@@ -17,18 +27,13 @@
 * Compatibility with pymysql 0.8.0+
 * More efficient loading of dependencies (#403)
 
-### 0.9.0 -- November 17, 2017
+### 0.9.0 -- Nov 17, 2017
 * Made graphviz installation optional
 * Implement file-based external storage
 * Implement union operator +
-
-
-### 0.9.0 -- November 17, 2017
-* Bug fixes
-* Made graphviz installation optional 
 * Implement file-based external storage
 
-### 0.8.0 -- July 26, 2017 
+### 0.8.0 -- Jul 26, 2017 
 Documentation and tutorials available at https://docs.datajoint.io and https://tutorials.datajoint.io
 * improved the ERD graphics and features using the graphviz libraries (#207, #333)
 * improved password handling logic (#322, #321)
@@ -42,18 +47,18 @@ Documentation and tutorials available at https://docs.datajoint.io and https://t
 * simplified the `fetch` and `fetch1` syntax, deprecating the `fetch[...]` syntax (#319)
 * the jobs tables now store the connection ids to allow identifying abandoned jobs (#288, #317)
 
-### 0.5.0 (#298) -- March 8, 2017
+### 0.5.0 (#298) -- Mar 8, 2017
 * All fetched integers are now 64-bit long and all fetched floats are double precision.
 * Added `dj.create_virtual_module`
 
-### 0.4.10 (#286) -- February 6, 2017
+### 0.4.10 (#286) -- Feb 6, 2017
 * Removed Vagrant and Readthedocs support 
 * Explicit saving of configuration (issue #284)
 
-### 0.4.9 (#285) -- February 2, 2017
+### 0.4.9 (#285) -- Feb 2, 2017
 * Fixed setup.py for pip install 
 
-### 0.4.7 (#281) -- January 24, 2017
+### 0.4.7 (#281) -- Jan 24, 2017
 * Fixed issues related to order of attributes in projection.
 
 ### 0.4.6 (#277) -- Dec 22, 2016
@@ -62,32 +67,32 @@ Documentation and tutorials available at https://docs.datajoint.io and https://t
 ### 0.4.5 (#274) -- Dec 20, 2016
 * Populate reports how many keys remain to be populated at the start.
 
-### 0.4.3  (#271) -- December 6, 2016
+### 0.4.3  (#271) -- Dec 6, 2016
 * Fixed aggregation issues (#270)
 * datajoint no longer attempts to connect to server at import time
 * dropped support of view (reversed #257)
 * more elegant handling of insufficient privileges (#268)
 
-### 0.4.2 (#267)  -- December 6, 2016
+### 0.4.2 (#267)  -- Dec 6, 2016
 * improved table appearance in Jupyter
 
-### 0.4.1 (#266) -- October 28, 2016
+### 0.4.1 (#266) -- Oct 28, 2016
 * bugfix for very long error messages
 
-### 0.3.9 -- September 27, 2016
+### 0.3.9 -- Sep 27, 2016
 * Added support for datatype `YEAR`
 * Fixed issues with `dj.U` and the `aggr` operator (#246, #247)
 
-### 0.3.8  -- August 2, 2016
+### 0.3.8  -- Aug 2, 2016
 * added the `_update` method in `base_relation`. It allows updating values in existing tuples.
 * bugfix in reading values of type double.  Previously it was cast as float32. 
 
-### 0.3.7  -- July 31, 2016
+### 0.3.7  -- Jul 31, 2016
 * added parameter `ignore_extra_fields` in `insert` 
 * `insert(..., skip_duplicates=True)` now relies on `SELECT IGNORE`.  Previously it explicitly checked if tuple already exists.
 * table previews now include blob attributes displaying the string <BLOB>
 
-### 0.3.6  -- July 30, 2016
+### 0.3.6  -- Jul 30, 2016
 * bugfix in `schema.spawn_missing_classes`.  Previously, spawned part classes would not show in ERDs.
 * dj.key now causes fetch to return as a list of dicts.  Previously it was a recarray.
 

diff --git a/datajoint/__init__.py b/datajoint/__init__.py
@@ -19,7 +19,7 @@
 from .version import __version__
 
 __author__ = "Dimitri Yatsenko, Edgar Y. Walker, and Fabian Sinz at Baylor College of Medicine"
-__date__ = "October 15, 2018"
+__date__ = "Oct 25, 2018"
 __all__ = ['__author__', '__version__',
            'config', 'conn', 'kill', 'Table',
            'Connection', 'Heading', 'FreeTable', 'Not', 'schema',

diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py
@@ -31,15 +31,25 @@ def key_source(self):
                 The default value is the join of the parent relations.
                 Users may override to change the granularity or the scope of populate() calls.
         """
-        if self._key_source is None:
+        def parent_gen(self):
             if self.target.full_table_name not in self.connection.dependencies:
                 self.connection.dependencies.load()
-            parents = list(self.target.parents(primary=True))
-            if not parents:
-                raise DataJointError('A relation must have parent relations to be able to be populated')
-            self._key_source = FreeTable(self.connection, parents.pop(0)).proj()
-            while parents:
-                self._key_source *= FreeTable(self.connection, parents.pop(0)).proj()
+            for parent_name, fk_props in self.target.parents(primary=True).items():
+                if not parent_name.isdigit():  # simple foreign key
+                    yield FreeTable(self.connection, parent_name).proj()
+                else:
+                    grandparent = list(self.connection.dependencies.in_edges(parent_name))[0][0]
+                    yield FreeTable(self.connection, grandparent).proj(**{
+                        attr: ref for attr, ref in fk_props['attr_map'].items() if ref != attr})
+
+        if self._key_source is None:
+            parents = parent_gen(self)
+            try:
+                self._key_source = next(parents)
+            except StopIteration:
+                raise DataJointError('A relation must have primary dependencies for auto-populate to work') from None
+            for q in parents:
+                self._key_source *= q
         return self._key_source
 
     def make(self, key):

diff --git a/datajoint/declare.py b/datajoint/declare.py
@@ -163,6 +163,7 @@ def compile_foreign_key(line, context, attributes, primary_key, attr_sql, foreig
             raise DataJointError('Mismatched attributes in foreign key "%s"' % line)
 
         if ref_attrs:
+            # convert to projected dependency
             ref = ref.proj(**dict(zip(new_attrs, ref_attrs)))
 
     # declare new foreign key attributes

diff --git a/datajoint/dependencies.py b/datajoint/dependencies.py
@@ -105,9 +105,19 @@ def descendants(self, full_table_name):
         :param full_table_name:  In form `schema`.`table_name`
         :return: all dependent tables sorted in topological order.  Self is included.
         """
-
         nodes = self.subgraph(
             nx.algorithms.dag.descendants(self, full_table_name))
 
         return [full_table_name] + list(
             nx.algorithms.dag.topological_sort(nodes))
+
+    def ancestors(self, full_table_name):
+        """
+        :param full_table_name:  In form `schema`.`table_name`
+        :return: all dependent tables sorted in topological order.  Self is included.
+        """
+        nodes = self.subgraph(
+            nx.algorithms.dag.ancestors(self, full_table_name))
+        return [full_table_name] + list(reversed(list(
+            nx.algorithms.dag.topological_sort(nodes))))
+
diff --git a/datajoint/table.py b/datajoint/table.py
@@ -19,6 +19,11 @@
 logger = logging.getLogger(__name__)
 
 
+class _rename_map(tuple):
+    """ for internal use """
+    pass
+
+
 class Table(Query):
     """
     Table is an abstract class that represents a base relation, i.e. a table in the schema.
@@ -105,6 +110,12 @@ def children(self, primary=None):
         """
         return self.connection.dependencies.children(self.full_table_name, primary)
 
+    def descendants(self):
+        return self. connection.dependencies.descendants(self.full_table_name)
+
+    def ancestors(self):
+        return self. connection.dependencies.ancestors(self.full_table_name)
+
     @property
     def is_declared(self):
         """
@@ -315,62 +326,60 @@ def delete(self, verbose=True):
         Deletes the contents of the table and its dependent tables, recursively.
         User is prompted for confirmation if config['safemode'] is set to True.
         """
-        already_in_transaction = self.connection.in_transaction
+        conn = self.connection
+        already_in_transaction = conn.in_transaction
         safe = config['safemode']
         if already_in_transaction and safe:
             raise DataJointError('Cannot delete within a transaction in safemode. '
                                  'Set dj.config["safemode"] = False or complete the ongoing transaction first.')
-        graph = self.connection.dependencies
+        graph = conn.dependencies
         graph.load()
-        delete_list = collections.OrderedDict()
-        for table in graph.descendants(self.full_table_name):
-            if not table.isdigit():
-                delete_list[table] = FreeTable(self.connection, table)
-            else:
-                raise DataJointError('Cascading deletes across renamed foreign keys is not supported.  See issue #300.')
-                parent, edge = next(iter(graph.parents(table).items()))
-                delete_list[table] = FreeTable(self.connection, parent).proj(
-                    **{new_name: old_name
-                       for new_name, old_name in edge['attr_map'].items() if new_name != old_name})
+        delete_list = collections.OrderedDict(
+            (name, _rename_map(next(iter(graph.parents(name).items()))) if name.isdigit() else FreeTable(conn, name))
+            for name in graph.descendants(self.full_table_name))
 
         # construct restrictions for each relation
         restrict_by_me = set()
+        # restrictions: Or-Lists of restriction conditions for each table.
+        # Uncharacteristically of Or-Lists, an empty entry denotes "delete everything".
         restrictions = collections.defaultdict(list)
         # restrict by self
         if self.restriction:
             restrict_by_me.add(self.full_table_name)
             restrictions[self.full_table_name].append(self.restriction)  # copy own restrictions
         # restrict by renamed nodes
         restrict_by_me.update(table for table in delete_list if table.isdigit())  # restrict by all renamed nodes
-        # restrict by tables restricted by a non-primary semijoin
+        # restrict by secondary dependencies
         for table in delete_list:
             restrict_by_me.update(graph.children(table, primary=False))   # restrict by any non-primary dependents
 
         # compile restriction lists
-        for table, rel in delete_list.items():
-            for dep in graph.children(table):
-                if table in restrict_by_me:
-                    restrictions[dep].append(rel)   # if restrict by me, then restrict by the entire relation
-                else:
-                    restrictions[dep].extend(restrictions[table])   # or re-apply the same restrictions
+        for name, table in delete_list.items():
+            for dep in graph.children(name):
+                # if restrict by me, then restrict by the entire relation otherwise copy restrictions
+                restrictions[dep].extend([table] if name in restrict_by_me else restrictions[name])
 
         # apply restrictions
-        for name, r in delete_list.items():
-            if restrictions[name]:  # do not restrict by an empty list
-                r.restrict([r.proj() if isinstance(r, Query) else r
-                            for r in restrictions[name]])
+        for name, table in delete_list.items():
+            if not name.isdigit() and restrictions[name]:  # do not restrict by an empty list
+                table.restrict([
+                    r.proj() if isinstance(r, FreeTable) else (
+                        delete_list[r[0]].proj(**{a: b for a, b in r[1]['attr_map'].items()})
+                        if isinstance(r, _rename_map) else r)
+                    for r in restrictions[name]])
         if safe:
             print('About to delete:')
 
         if not already_in_transaction:
             self.connection.start_transaction()
         total = 0
         try:
-            for r in reversed(list(delete_list.values())):
-                count = r.delete_quick(get_count=True)
-                total += count
-                if (verbose or safe) and count:
-                    print('{table}: {count} items'.format(table=r.full_table_name, count=count))
+            for name, table in reversed(list(delete_list.items())):
+                if not name.isdigit():
+                    count = table.delete_quick(get_count=True)
+                    total += count
+                    if (verbose or safe) and count:
+                        print('{table}: {count} items'.format(table=name, count=count))
         except:
             # Delete failed, perhaps due to insufficient privileges. Cancel transaction.
             if not already_in_transaction:

diff --git a/datajoint/version.py b/datajoint/version.py
@@ -1 +1 @@
-__version__ = "0.10.2"
+__version__ = "0.11.0"
diff --git a/tests/schema.py b/tests/schema.py
@@ -142,7 +142,7 @@ def make(self, key):
 @schema
 class Trial(dj.Imported):
     definition = """   # a trial within an experiment
-    -> Experiment
+    -> Experiment.proj(exp='experiment_id')
     trial_id  :smallint   # trial number
     ---
     start_time                 :double      # (s)
@@ -182,7 +182,7 @@ class Ephys(dj.Imported):
 
     class Channel(dj.Part):
         definition = """     # subtable containing individual channels
-        -> Ephys
+        -> master
         channel    :tinyint unsigned   # channel number within Ephys
         ----
         voltage    : longblob

diff --git a/tests/test_declare.py b/tests/test_declare.py
@@ -85,19 +85,19 @@ def test_attributes():
                           ['subject_id', 'experiment_id'])
 
         assert_list_equal(trial.heading.names,
-                          ['subject_id', 'experiment_id', 'trial_id', 'start_time'])
+                          ['subject_id', 'exp', 'trial_id', 'start_time'])
         assert_list_equal(trial.primary_key,
-                          ['subject_id', 'experiment_id', 'trial_id'])
+                          ['subject_id', 'exp', 'trial_id'])
 
         assert_list_equal(ephys.heading.names,
-                          ['subject_id', 'experiment_id', 'trial_id', 'sampling_frequency', 'duration'])
+                          ['subject_id', 'exp', 'trial_id', 'sampling_frequency', 'duration'])
         assert_list_equal(ephys.primary_key,
-                          ['subject_id', 'experiment_id', 'trial_id'])
+                          ['subject_id', 'exp', 'trial_id'])
 
         assert_list_equal(channel.heading.names,
-                          ['subject_id', 'experiment_id', 'trial_id', 'channel', 'voltage', 'current'])
+                          ['subject_id', 'exp', 'trial_id', 'channel', 'voltage', 'current'])
         assert_list_equal(channel.primary_key,
-                          ['subject_id', 'experiment_id', 'trial_id', 'channel'])
+                          ['subject_id', 'exp', 'trial_id', 'channel'])
         assert_true(channel.heading.attributes['voltage'].is_blob)
 
     @staticmethod
@@ -108,8 +108,8 @@ def test_dependencies():
         assert_equal(set(subject.children(primary=True)), {experiment.full_table_name})
         assert_equal(set(experiment.parents(primary=True)), {subject.full_table_name})
 
-        assert_true(trial.full_table_name in set(experiment.children(primary=True)))
-        assert_equal(set(trial.parents(primary=True)), {experiment.full_table_name})
+        assert_true(trial.full_table_name in experiment.descendants())
+        assert_true(experiment.full_table_name in trial.ancestors())
 
         assert_equal(set(trial.children(primary=True)),
                      {ephys.full_table_name, trial.Condition.full_table_name})

diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py
@@ -5,7 +5,6 @@
 from . import schema_advanced
 
 
-@raises(DataJointError)     # TODO: remove after fixing issue #300
 def test_aliased_fk():
     person = schema_advanced.Person()
     parent = schema_advanced.Parent()
@@ -33,7 +32,6 @@ def test_describe():
             assert_equal(c1, c2)
 
 
-@raises(DataJointError)    # TODO: remove after fixing issue #300
 def test_delete():
     person = schema_advanced.Person()
     parent = schema_advanced.Parent()

diff --git a/tests/test_jobs.py b/tests/test_jobs.py
@@ -76,6 +76,7 @@ def test_sigint():
     assert_equals(error_message, 'KeyboardInterrupt')
     schema.schema.jobs.delete()
 
+
 def test_key_pack_testing():
     jobs = schema.schema.jobs
     key = dict(a='string', b=int, c=Decimal())