dendrograms · keflavich · Apr 23, 2019 · Apr 23, 2019 · Apr 23, 2019 · Apr 23, 2019
diff --git a/astrodendro/io/fits.py b/astrodendro/io/fits.py
@@ -71,9 +71,9 @@ def dendro_import_fits(filename):
             params = {"min_npix": hdus[0].header['MIN_NPIX'],
                       "min_value": hdus[0].header['MIN_VAL'],
                       "min_delta": hdus[0].header['MIN_DELT']}
-                      
+
         else:
-            
+
             params = {}
 
     return parse_dendrogram(newick, data, index_map, params, wcs)

diff --git a/astrodendro/io/util.py b/astrodendro/io/util.py
@@ -5,24 +5,58 @@
 from astropy import log
 
 
-def parse_newick(string):
+def newick_from_json(d):
+    """
+    If "d" is a JSON-derived dict (e.g., `json.load('my_newick.json')`), this
+    will convert all of the keys from strings to integers, resulting in
+    something with identical format to `parse_newick`'s result.
+
+    Example:
+        >>> newick = ''.join(chr(x) for x in hdus[3].data.flat)
+        >>> rslt = astrodendro.io.util.parse_newick(newick)
+        >>> jj = json.dumps(rslt)
+        >>> rdtrip = newick_from_json(json.loads(jj))
+        >>> rdtrip == rslt
+        True
+    """
+    new = {}
+    for k, v in d.items():
+        new_v = v
+        if isinstance(v, dict):
+            new_v = newick_from_json(v)
+        elif isinstance(v, list):
+            new_v = list()
+            for x in v:
+                if isinstance(x, dict):
+                    new_v.append(newick_from_json(x))
+                else:
+                    new_v.append(x)
+            new_v = tuple(new_v)
+        new[int(k)] = new_v
+    return new
 
+def parse_newick(string, verbose=True):
     items = {}
 
     # Find maximum level
     current_level = 0
     max_level = 0
-    log.debug('String loading...')
+    log.debug("String starts with {0}".format(string[:100]))
+    log.debug("String loading... newick has {0} chars, {1} ('s"
+              .format(len(string), string.count("(")))
     for i, c in enumerate(string):
         if c == '(':
             current_level += 1
         if c == ')':
             current_level -= 1
         max_level = max(max_level, current_level)
 
+    if not verbose:
+        ProgressBar = lambda x: x
+
     # Loop through levels and construct tree
-    log.debug('Tree loading...')
-    for level in range(max_level, 0, -1):
+    log.debug('Tree loading... max_level={0}'.format(max_level))
+    for level in ProgressBar(range(max_level, 0, -1)):
 
         pairs = []
 
@@ -56,8 +90,6 @@ def parse_newick(string):
             # Remove branch definition from string
             string = string[:start] + string[end + 1:]
 
-    new_items = {}
-
     def collect(d):
         for item in d:
             if item in items:
@@ -70,9 +102,44 @@ def collect(d):
     return items['trunk']
 
 
+def _construct_tree(dend, repr, indices_by_structure, flux_by_structure):
+    from ..structure import Structure
+    structures = []
+    for idx in repr:
+        idx = int(idx)
+        structure_indices = indices_by_structure[idx]
+        f = flux_by_structure[idx]
+        if type(repr[idx]) == tuple:
+            sub_structures_repr = repr[idx][0]  # Parsed representation of sub structures
+            sub_structures = _construct_tree(dend,
+                                             sub_structures_repr,
+                                             indices_by_structure,
+                                             flux_by_structure)
+            for i in sub_structures:
+                dend._structures_dict[i.idx] = i
+            b = Structure(structure_indices, f, children=sub_structures, idx=idx, dendrogram=dend)
+            # Correct merge levels - complicated because of the
+            # order in which we are building the tree.
+            # What we do is look at the heights of this branch's
+            # 1st child as stored in the newick representation, and then
+            # work backwards to compute the merge level of this branch
+            #
+            # these five lines were not used
+            #first_child_repr = six.next(six.itervalues(sub_structures_repr))
+            #if type(first_child_repr) == tuple:
+            #    height = first_child_repr[1]
+            #else:
+            #    height = first_child_repr
+            dend._structures_dict[idx] = b
+            structures.append(b)
+        else:
+            ell = Structure(structure_indices, f, idx=idx, dendrogram=dend)
+            structures.append(ell)
+            dend._structures_dict[idx] = ell
+    return structures
+
 def parse_dendrogram(newick, data, index_map, params, wcs=None):
     from ..dendrogram import Dendrogram
-    from ..structure import Structure
 
     d = Dendrogram()
     d.ndim = len(data.shape)
@@ -88,38 +155,13 @@ def parse_dendrogram(newick, data, index_map, params, wcs=None):
     except ImportError:
         flux_by_structure, indices_by_structure = _slow_reader(d.index_map, data)
 
-    def _construct_tree(repr):
-        structures = []
-        for idx in repr:
-            idx = int(idx)
-            structure_indices = indices_by_structure[idx]
-            f = flux_by_structure[idx]
-            if type(repr[idx]) == tuple:
-                sub_structures_repr = repr[idx][0]  # Parsed representation of sub structures
-                sub_structures = _construct_tree(sub_structures_repr)
-                for i in sub_structures:
-                    d._structures_dict[i.idx] = i
-                b = Structure(structure_indices, f, children=sub_structures, idx=idx, dendrogram=d)
-                # Correct merge levels - complicated because of the
-                # order in which we are building the tree.
-                # What we do is look at the heights of this branch's
-                # 1st child as stored in the newick representation, and then
-                # work backwards to compute the merge level of this branch
-                first_child_repr = six.next(six.itervalues(sub_structures_repr))
-                if type(first_child_repr) == tuple:
-                    height = first_child_repr[1]
-                else:
-                    height = first_child_repr
-                d._structures_dict[idx] = b
-                structures.append(b)
-            else:
-                l = Structure(structure_indices, f, idx=idx, dendrogram=d)
-                structures.append(l)
-                d._structures_dict[idx] = l
-        return structures
-
+    # newline to avoid overwriting progressbar
+    print()
     log.debug('Parsing newick and constructing tree...')
-    d.trunk = _construct_tree(parse_newick(newick))
+    parsed_newick = parse_newick(newick)
+
+    log.debug('Constructing tree...')
+    d.trunk = _construct_tree(d, parsed_newick, indices_by_structure, flux_by_structure)
     # To make the structure.level property fast, we ensure all the items in the
     # trunk have their level cached as "0"
     for structure in d.trunk:
@@ -157,10 +199,10 @@ def _fast_reader(index_map, data):
         match = index_map[sl] == idx
         sl2 = (slice(None),) + sl
         match_inds = index_cube[sl2][:, match]
-        coords = list(zip(*match_inds))
+        #coords = list(zip(*match_inds))
         dd = data[sl][match].tolist()
         flux_by_structure[idx] = dd
-        indices_by_structure[idx] = coords
+        indices_by_structure[idx] = match_inds.T
         p.update()
 
     return flux_by_structure, indices_by_structure

diff --git a/astrodendro/structure.py b/astrodendro/structure.py
@@ -69,7 +69,7 @@ def __init__(self, indices, values, children=[], idx=None, dendrogram=None):
             self._indices = [indices]
             self._values = [values]
             self._vmin, self._vmax = values, values
-        elif isinstance(indices, list) and isinstance(values, list):
+        elif (isinstance(indices, list) or isinstance(indices, np.ndarray)) and isinstance(values, list):
             self._indices = indices
             self._values = values
             self._vmin, self._vmax = min(values), max(values)
@@ -78,7 +78,10 @@ def __init__(self, indices, values, children=[], idx=None, dendrogram=None):
             self._values = [x for x in values]
             self._vmin, self._vmax = min(values), max(values)
 
-        self._smallest_index = min(self._indices)
+        try:
+            self._smallest_index = min(self._indices)
+        except ValueError:
+            self._smallest_index = self._indices[0]
 
         self.idx = idx
 
@@ -400,10 +403,15 @@ def get_peak(self, subtree=True):
         def key(x):
             return x[1]
 
+        def to_tuple(x):
+            if np.isscalar(x):
+                return (x,)
+            else:
+                return tuple(x)
+
         if self._peak is None:
             for s in reversed(list(prefix_visit(self))):
-                s._peak = (s._indices[s._values.index(s.vmax)],
-                          s.vmax)
+                s._peak = (to_tuple(s._indices[s._values.index(s.vmax)]), s.vmax)
                 if s.is_leaf:
                     s._peak_subtree = s._peak
                 else:

diff --git a/astrodendro/tests/test_flux.py b/astrodendro/tests/test_flux.py
@@ -25,7 +25,12 @@ def strip_parentheses(string):
 @pytest.mark.parametrize(('input_quantities', 'output_unit', 'keywords', 'output'), COMBINATIONS)
 def test_compute_flux(input_quantities, output_unit, keywords, output):
     q = compute_flux(input_quantities, output_unit, **keywords)
-    np.testing.assert_allclose(q.value, output.value)
+    # tolerances are set b/c some conversions lose precision at the ~10^-7 level
+    # (<Quantity [1., 2., 3.] K>, Unit("Jy"), {'spatial_scale': <Quantity 2. arcsec>, 'beam_major': <Quantity 1. arcsec>, 'beam_minor': <Quantity 0.5 arcsec>, 'wavelength': <Quantity 2. mm>}, <Quantity 0.38941637 Jy>)
+    # -> 
+    # Max absolute difference: 5.64106332e-08
+    # Max relative difference: 1.44859431e-07
+    np.testing.assert_allclose(q.value, output.value, atol=1e-7, rtol=2e-7)
     assert q.unit == output.unit
 
 

diff --git a/astrodendro/tests/test_structure.py b/astrodendro/tests/test_structure.py
@@ -331,21 +331,21 @@ def test_add_pixel():
     s = Structure(1, 10.)
 
     assert s.get_npix() == 1
-    assert s.get_peak() == (1, 10)
+    assert s.get_peak() == ((1,), 10.)
     assert s.vmin == 10.
     assert s.vmax == 10.
 
     s._add_pixel(2, 8.)
 
     assert s.get_npix() == 2
-    assert s.get_peak() == (1, 10)
+    assert s.get_peak() == ((1,), 10.)
     assert s.vmin == 8.
     assert s.vmax == 10.
 
     s._add_pixel(3, 12.)
 
     assert s.get_npix() == 3
-    assert s.get_peak() == (3, 12.)
+    assert s.get_peak() == ((3,), 12.)
     assert s.vmin == 8.
     assert s.vmax == 12.