From a59349815705e67de1ed7f682c21356b10e70e64 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 12 Jan 2018 15:38:23 +0200
Subject: [PATCH 01/41] Remove spi and input stokes per (src,time,channel)

Previously, spectral index was limited to (freq/ref_freq)**alpha,
which is a fairly simplistic model. Remove this and simply allow
the user to specify stokes parameters varying by source, time and
channel.
---
 montblanc/impl/rime/tensorflow/RimeSolver.py  |  11 +-
 montblanc/impl/rime/tensorflow/config.py      |  42 +------
 .../tensorflow/rime_ops/b_sqrt_op_cpu.cpp     |  31 ++---
 .../rime/tensorflow/rime_ops/b_sqrt_op_cpu.h  | 107 ++++++++----------
 .../tensorflow/rime_ops/b_sqrt_op_gpu.cuh     |  65 ++---------
 .../rime_ops/sum_coherencies_op_cpu.cpp       |   4 +-
 .../rime_ops/sum_coherencies_op_cpu.h         |   4 +-
 .../rime_ops/sum_coherencies_op_gpu.cuh       |  12 +-
 .../rime/tensorflow/rime_ops/test_b_sqrt.py   |  53 ++++-----
 montblanc/tests/test_meq_tf.py                |  36 +++---
 10 files changed, 124 insertions(+), 241 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 2084d7998..0dcf3d86a 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -951,7 +951,7 @@ def _construct_tensorflow_expression(slvr_cfg, feed_data, device, shard):
         feed_rotation = rime.feed_rotation(pa_sin, pa_cos, CT=CT,
                                            feed_type=polarisation_type)
 
-    def antenna_jones(lm, stokes, alpha, ref_freq):
+    def antenna_jones(lm, stokes):
         """
         Compute the jones terms for each antenna.
 
@@ -972,8 +972,7 @@ def antenna_jones(lm, stokes, alpha, ref_freq):
 
         # Compute the square root of the brightness matrix
         # (as well as the sign)
-        bsqrt, sgn_brightness = rime.b_sqrt(stokes, alpha,
-            D.frequency, ref_freq, CT=CT,
+        bsqrt, sgn_brightness = rime.b_sqrt(stokes, CT=CT,
             polarisation_type=polarisation_type)
 
         # Check for nans/infs in the bsqrt
@@ -1023,7 +1022,7 @@ def point_body(coherencies, npsrc, src_count):
         npsrc +=  nsrc
 
         ant_jones, sgn_brightness = antenna_jones(S.point_lm,
-            S.point_stokes, S.point_alpha, S.point_ref_freq)
+            S.point_stokes)
         shape = tf.ones(shape=[nsrc,ntime,nbl,nchan], dtype=FT)
         coherencies = rime.sum_coherencies(D.antenna1, D.antenna2,
             shape, ant_jones, sgn_brightness, coherencies)
@@ -1040,7 +1039,7 @@ def gaussian_body(coherencies, ngsrc, src_count):
         ngsrc += nsrc
 
         ant_jones, sgn_brightness = antenna_jones(S.gaussian_lm,
-            S.gaussian_stokes, S.gaussian_alpha, S.gaussian_ref_freq)
+            S.gaussian_stokes)
         gauss_shape = rime.gauss_shape(D.uvw, D.antenna1, D.antenna2,
             D.frequency, S.gaussian_shape)
         coherencies = rime.sum_coherencies(D.antenna1, D.antenna2,
@@ -1058,7 +1057,7 @@ def sersic_body(coherencies, nssrc, src_count):
         nssrc += nsrc
 
         ant_jones, sgn_brightness = antenna_jones(S.sersic_lm,
-            S.sersic_stokes, S.sersic_alpha, S.sersic_ref_freq)
+            S.sersic_stokes)
         sersic_shape = rime.sersic_shape(D.uvw, D.antenna1, D.antenna2,
             D.frequency, S.sersic_shape)
         coherencies = rime.sum_coherencies(D.antenna1, D.antenna2,
diff --git a/montblanc/impl/rime/tensorflow/config.py b/montblanc/impl/rime/tensorflow/config.py
index afd5d78ee..0b45d4bd9 100644
--- a/montblanc/impl/rime/tensorflow/config.py
+++ b/montblanc/impl/rime/tensorflow/config.py
@@ -366,24 +366,12 @@ def test_sersic_shape(self, context):
         tags    = "input, constant",
         description = LM_DESCRIPTION.format(st="point"),
         units   = RADIANS),
-    array_dict('point_stokes', ('npsrc','ntime', 4), 'ft',
+    array_dict('point_stokes', ('npsrc','ntime', 'nchan', 4), 'ft',
         default = default_stokes,
         test    = rand_stokes,
         tags    = "input, constant",
         description = STOKES_DESCRIPTION,
         units   = JANSKYS),
-    array_dict('point_alpha', ('npsrc','ntime'), 'ft',
-        default = lambda s, c: np.zeros(c.shape, c.dtype),
-        test    = lambda s, c: rf(c.shape, c.dtype)*0.1,
-        tags    = "input, constant",
-        description = ALPHA_DESCRIPTION,
-        units   = DIMENSIONLESS),
-    array_dict('point_ref_freq', ('npsrc',), 'ft',
-        default = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        test    = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        tags    = "input, constant",
-        description = REF_FREQ_DESCRIPTION.format(st="point"),
-        units   = HERTZ),
 
     # Gaussian Source Definitions
     array_dict('gaussian_lm', ('ngsrc',2), 'ft',
@@ -392,24 +380,12 @@ def test_sersic_shape(self, context):
         tags    = "input, constant",
         description = LM_DESCRIPTION.format(st="gaussian"),
         units   = RADIANS),
-    array_dict('gaussian_stokes', ('ngsrc','ntime', 4), 'ft',
+    array_dict('gaussian_stokes', ('ngsrc','ntime', 'nchan', 4), 'ft',
         default = default_stokes,
         test    = rand_stokes,
         tags    = "input, constant",
         description = STOKES_DESCRIPTION,
         units   = JANSKYS),
-    array_dict('gaussian_alpha', ('ngsrc','ntime'), 'ft',
-        default = lambda s, c: np.zeros(c.shape, c.dtype),
-        test    = lambda s, c: rf(c.shape, c.dtype)*0.1,
-        tags    = "input, constant",
-        description = ALPHA_DESCRIPTION,
-        units   = DIMENSIONLESS),
-    array_dict('gaussian_ref_freq', ('ngsrc',), 'ft',
-        default = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        test    = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        tags    = "input, constant",
-        description = REF_FREQ_DESCRIPTION.format(st="gaussian"),
-        units   = HERTZ),
     array_dict('gaussian_shape', (3, 'ngsrc'), 'ft',
         default = default_gaussian_shape,
         test    = rand_gaussian_shape,
@@ -427,24 +403,12 @@ def test_sersic_shape(self, context):
         tags    = "input, constant",
         description = LM_DESCRIPTION.format(st="sersic"),
         units   = "Radians"),
-    array_dict('sersic_stokes', ('nssrc','ntime', 4), 'ft',
+    array_dict('sersic_stokes', ('nssrc','ntime', 'nchan', 4), 'ft',
         default = default_stokes,
         test    = rand_stokes,
         tags    = "input, constant",
         description = STOKES_DESCRIPTION,
         units   = JANSKYS),
-    array_dict('sersic_alpha', ('nssrc','ntime'), 'ft',
-        default = lambda s, c: np.zeros(c.shape, c.dtype),
-        test    = lambda s, c: rf(c.shape, c.dtype)*0.1,
-        tags    = "input, constant",
-        description = ALPHA_DESCRIPTION,
-        units   = DIMENSIONLESS),
-    array_dict('sersic_ref_freq', ('nssrc',), 'ft',
-        default = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        test    = lambda s, c: np.full(c.shape, _ref_freq, c.dtype),
-        tags    = "input, constant",
-        description = REF_FREQ_DESCRIPTION.format(st="sersic"),
-        units   = HERTZ),
     array_dict('sersic_shape', (3, 'nssrc'), 'ft',
         default = default_sersic_shape,
         test    = test_sersic_shape,
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.cpp b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.cpp
index 3906f55cf..8ce664f0e 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.cpp
+++ b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.cpp
@@ -16,35 +16,25 @@ auto bsqrt_shape_function = [](InferenceContext* c) {
     DimensionHandle d;
 
     ShapeHandle stokes = c->input(0);
-    ShapeHandle alpha = c->input(1);
-    ShapeHandle frequency = c->input(2);
-    ShapeHandle ref_freq = c->input(3);
 
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(stokes, 3, &input),
-        "stokes shape must be [nsrc, ntime, 4] but is " + c->DebugString(stokes));
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithValue(c->Dim(stokes, 2), 4, &d),
-        "stokes shape must be [nsrc, ntime, 4] but is " + c->DebugString(stokes));
-
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(alpha, 2, &input),
-        "alpha shape must be [nsrc, ntime] but is " + c->DebugString(alpha));
-
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(frequency, 1, &input),
-        "frequency shape must be [nchan,] but is " + c->DebugString(frequency));
-
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(ref_freq, 1, &input),
-        "ref_freq shape must be [nsrc,] but is " + c->DebugString(ref_freq));
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(stokes, 4, &input),
+        "stokes shape must be [nsrc, ntime, nchan, 4] but is " + c->DebugString(stokes));
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithValue(c->Dim(stokes, 3), 4, &d),
+        "stokes shape must be [nsrc, ntime, nchan, 4] but is " + c->DebugString(stokes));
 
     // bsqrt output is (nsrc, ntime, nchan, 4)
     ShapeHandle bsqrt = c->MakeShape({
         c->Dim(stokes, 0),
         c->Dim(stokes, 1),
-        c->Dim(frequency, 0),
+        c->Dim(stokes, 2),
         4});
 
-    // sgn_brightness output is (nsrc, ntime)
+    // sgn_brightness output is (nsrc, ntime, nchan)
     ShapeHandle sgn_brightness = c->MakeShape({
         c->Dim(stokes, 0),
-        c->Dim(stokes, 1)});
+        c->Dim(stokes, 1),
+        c->Dim(stokes, 2),
+    });
 
     // Set the output shape
     c->set_output(0, bsqrt);
@@ -55,9 +45,6 @@ auto bsqrt_shape_function = [](InferenceContext* c) {
 
 REGISTER_OP("BSqrt")
     .Input("stokes: FT")
-    .Input("alpha: FT")
-    .Input("frequency: FT")
-    .Input("ref_freq: FT")
     .Output("b_sqrt: CT")
     .Output("sgn_brightness: int8")
     .Attr("FT: {float, double} = DT_FLOAT")
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.h b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.h
index d11a5c8ad..50c1c9978 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.h
+++ b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_cpu.h
@@ -35,14 +35,11 @@ class BSqrt<CPUDevice, FT, CT> : public tensorflow::OpKernel
 
         // Sanity check the input tensors
         const tf::Tensor & in_stokes = context->input(0);
-        const tf::Tensor & in_alpha = context->input(1);
-        const tf::Tensor & in_frequency = context->input(2);
-        const tf::Tensor & in_ref_freq = context->input(3);
 
         // Extract problem dimensions
         int nsrc = in_stokes.dim_size(0);
         int ntime = in_stokes.dim_size(1);
-        int nchan = in_frequency.dim_size(0);
+        int nchan = in_stokes.dim_size(2);
 
         // Reason about the shape of the b_sqrt tensor and
         // create a pointer to it
@@ -56,20 +53,17 @@ class BSqrt<CPUDevice, FT, CT> : public tensorflow::OpKernel
         if (b_sqrt_ptr->NumElements() == 0)
             { return; }
 
-        // Reason about shape of the invert tensor
+        // Reason about shape of the sgn_brightness tensor
         // and create a pointer to it
-        tf::TensorShape invert_shape({nsrc, ntime});
-        tf::Tensor * invert_ptr = nullptr;
+        tf::TensorShape sgn_brightness_shape({nsrc, ntime, nchan});
+        tf::Tensor * sgn_brightness_ptr = nullptr;
 
         OP_REQUIRES_OK(context, context->allocate_output(
-            1, invert_shape, &invert_ptr));
+            1, sgn_brightness_shape, &sgn_brightness_ptr));
 
-        auto stokes = in_stokes.tensor<FT, 3>();
-        auto alpha = in_alpha.tensor<FT, 2>();
-        auto frequency = in_frequency.tensor<FT, 1>();
-        auto ref_freq = in_ref_freq.tensor<FT, 1>();
+        auto stokes = in_stokes.tensor<FT, 4>();
         auto b_sqrt = b_sqrt_ptr->tensor<CT, 4>();
-        auto sgn_brightness = invert_ptr->tensor<tf::int8, 2>();
+        auto sgn_brightness = sgn_brightness_ptr->tensor<tf::int8, 3>();
 
         // Linear polarisation or circular polarisation
         bool linear = (polarisation_type == "linear");
@@ -89,58 +83,53 @@ class BSqrt<CPUDevice, FT, CT> : public tensorflow::OpKernel
         {
             for(int time=0; time < ntime; ++time)
             {
-                // Reference stokes parameters.
-                // Input order of stokes parameters differs
-                // depending on whether linear or circular polarisation
-                // is used, but the rest of the calculation is the same...
-                FT I = stokes(src, time, iI);
-                FT Q = stokes(src, time, iQ);
-                FT U = stokes(src, time, iU);
-                FT V = stokes(src, time, iV);
-
-                // sgn variable, used to indicate whether
-                // brightness matrix is negative, zero or positive
-                // and a valid Cholesky decomposition
-                FT IQ = I + Q;
-                FT sgn = (zero < IQ) - (IQ < zero);
-                // I *= sign;
-                // Q *= sign;
-                U *= sgn;
-                V *= sgn;
-                IQ *= sgn;
-
-                // Indicate negative, zero or positive brightness matrix
-                sgn_brightness(src, time) = sgn;
-
-                // Compute cholesky decomposition
-                CT L00 = std::sqrt(CT(IQ, zero));
-                // Store L00 as a divisor of L10
-                CT div = L00;
-
-                // Gracefully handle zero matrices
-                if(IQ == zero)
-                {
-                    div = CT(one, zero);
-                    IQ = one;
-                }
-
-                CT L10 = CT(U, -V) / div;
-                FT L11_real = (I*I - Q*Q - U*U - V*V)/IQ;
-                CT L11 = std::sqrt(CT(L11_real, zero));
-
                 for(int chan=0; chan < nchan; ++chan)
                 {
-                    // Compute square root of spectral index
-                    FT psqrt = std::pow(
-                        frequency(chan)/ref_freq(src),
-                        alpha(src, time)*0.5);
+                    // Reference stokes parameters.
+                    // Input order of stokes parameters differs
+                    // depending on whether linear or circular polarisation
+                    // is used, but the rest of the calculation is the same...
+                    FT I = stokes(src, time, chan, iI);
+                    FT Q = stokes(src, time, chan, iQ);
+                    FT U = stokes(src, time, chan, iU);
+                    FT V = stokes(src, time, chan, iV);
+
+                    // sgn variable, used to indicate whether
+                    // brightness matrix is negative, zero or positive
+                    // and a valid Cholesky decomposition
+                    FT IQ = I + Q;
+                    FT sgn = (zero < IQ) - (IQ < zero);
+                    // I *= sign;
+                    // Q *= sign;
+                    U *= sgn;
+                    V *= sgn;
+                    IQ *= sgn;
+
+                    // Indicate negative, zero or positive brightness matrix
+                    sgn_brightness(src, time, chan) = sgn;
+
+                    // Compute cholesky decomposition
+                    CT L00 = std::sqrt(CT(IQ, zero));
+                    // Store L00 as a divisor of L10
+                    CT div = L00;
+
+                    // Gracefully handle zero matrices
+                    if(IQ == zero)
+                    {
+                        div = CT(one, zero);
+                        IQ = one;
+                    }
+
+                    CT L10 = CT(U, -V) / div;
+                    FT L11_real = (I*I - Q*Q - U*U - V*V)/IQ;
+                    CT L11 = std::sqrt(CT(L11_real, zero));
 
                     // Assign square root of the brightness matrix,
                     // computed via cholesky decomposition
-                    b_sqrt(src, time, chan, XX) = L00*psqrt;
+                    b_sqrt(src, time, chan, XX) = L00;
                     b_sqrt(src, time, chan, XY) = 0.0;
-                    b_sqrt(src, time, chan, YX) = L10*psqrt;
-                    b_sqrt(src, time, chan, YY) = L11*psqrt;
+                    b_sqrt(src, time, chan, YX) = L10;
+                    b_sqrt(src, time, chan, YY) = L11;
                 }
             }
         }
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_gpu.cuh b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_gpu.cuh
index 654956361..9339f4718 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_gpu.cuh
+++ b/montblanc/impl/rime/tensorflow/rime_ops/b_sqrt_op_gpu.cuh
@@ -63,9 +63,6 @@ int bsqrt_pol()
 template <typename Traits>
 __global__ void rime_b_sqrt(
     const typename Traits::stokes_type * stokes,
-    const typename Traits::alpha_type * alpha,
-    const typename Traits::frequency_type * frequency,
-    const typename Traits::frequency_type * ref_freq,
     typename Traits::visibility_type * B_sqrt,
     typename Traits::sgn_brightness_type * sgn_brightness,
     bool linear,
@@ -90,26 +87,7 @@ __global__ void rime_b_sqrt(
     if(SRC >= nsrc || TIME >= ntime || CHAN >= nchan)
         { return; }
 
-    __shared__ FT src_rfreq[LTr::BLOCKDIMZ];
-    __shared__ FT freq[LTr::BLOCKDIMX];
-
-    // Varies by channel
-    if(threadIdx.z == 0 && threadIdx.y == 0)
-    {
-        freq[threadIdx.x] = frequency[CHAN];
-    }
-
-    // Varies by source
-    if(threadIdx.y == 0 && threadIdx.x == 0)
-    {
-        src_rfreq[threadIdx.z] = ref_freq[SRC];
-    }
-
-    __syncthreads();
-
-    // Calculate power term
-    int i = SRC*ntime + TIME;
-    FT power = Po::pow(freq[threadIdx.x]/src_rfreq[threadIdx.z], alpha[i]);
+    int i = (SRC*ntime + TIME)*nchan + CHAN;
 
     // Read in stokes parameters (IQUV)
     ST _stokes = stokes[i];
@@ -118,21 +96,14 @@ __global__ void rime_b_sqrt(
     FT & U = linear ? _stokes.z : _stokes.y;
     FT & V = linear ? _stokes.w : _stokes.z;
 
-    I *= power;
-    Q *= power;
-    U *= power;
-    V *= power;
-
     // sgn variable, used to indicate whether
     // brightness matrix is negative, zero or positive
     FT IQ = I + Q;
     FT sgn = (zero < IQ) - (IQ < zero);
 
-    // Indicate that we inverted the sgn of the brightness
-    // matrix to obtain the cholesky decomposition
-    i = SRC*ntime + TIME;
-    if(CHAN == 0)
-        { sgn_brightness[i] = sgn; }
+    // We inverted the sgn of the brightness matrix to obtain the
+    // cholesky decomposition. Recover it.
+    sgn_brightness[i] = sgn;
 
     // I *= sgn;
     // Q *= sgn;
@@ -178,13 +149,12 @@ __global__ void rime_b_sqrt(
     B.XY.x = zero; B.XY.y = zero;
 
     // Write out the cholesky decomposition
-    B_sqrt[i*nchan + CHAN] = B;
+    B_sqrt[i] = B;
 }
 
 template <typename FT, typename CT>
 class BSqrt<GPUDevice, FT, CT> : public tensorflow::OpKernel
 {
-public:
 private:
     std::string polarisation_type;
 
@@ -202,14 +172,11 @@ public:
 
         // Sanity check the input tensors
         const tf::Tensor & in_stokes = context->input(0);
-        const tf::Tensor & in_alpha = context->input(1);
-        const tf::Tensor & in_frequency = context->input(2);
-        const tf::Tensor & in_ref_freq = context->input(3);
 
         // Extract problem dimensions
         int nsrc = in_stokes.dim_size(0);
         int ntime = in_stokes.dim_size(1);
-        int nchan = in_frequency.dim_size(0);
+        int nchan = in_stokes.dim_size(2);
 
         bool linear = (polarisation_type == "linear");
 
@@ -225,13 +192,13 @@ public:
         if (b_sqrt_ptr->NumElements() == 0)
             { return; }
 
-        // Reason about shape of the invert tensor
+        // Reason about shape of the sgn_brightness tensor
         // and create a pointer to it
-        tf::TensorShape invert_shape({nsrc, ntime});
-        tf::Tensor * invert_ptr = nullptr;
+        tf::TensorShape sgn_brightness_shape({nsrc, ntime, nchan});
+        tf::Tensor * sgn_brightness_ptr = nullptr;
 
         OP_REQUIRES_OK(context, context->allocate_output(
-            1, invert_shape, &invert_ptr));
+            1, sgn_brightness_shape, &sgn_brightness_ptr));
 
         // Cast input into CUDA types defined within the Traits class
         typedef montblanc::kernel_traits<FT> Tr;
@@ -245,24 +212,16 @@ public:
         // Get the device pointers of our GPU memory arrays
         auto stokes = reinterpret_cast<const typename Tr::stokes_type *>(
             in_stokes.flat<FT>().data());
-        auto alpha = reinterpret_cast<const typename Tr::alpha_type *>(
-            in_alpha.flat<FT>().data());
-        auto frequency = reinterpret_cast<const typename Tr::frequency_type *>(
-            in_frequency.flat<FT>().data());
-        auto ref_freq = reinterpret_cast<const typename Tr::frequency_type *>(
-            in_ref_freq.flat<FT>().data());
         auto b_sqrt = reinterpret_cast<typename Tr::visibility_type *>(
             b_sqrt_ptr->flat<CT>().data());
         auto sgn_brightness = reinterpret_cast<typename Tr::sgn_brightness_type *>(
-            invert_ptr->flat<tf::int8>().data());
+            sgn_brightness_ptr->flat<tf::int8>().data());
 
         const auto & stream = context->eigen_device<GPUDevice>().stream();
 
         rime_b_sqrt<Tr> <<<grid, blocks, 0, stream>>>(stokes,
-            alpha, frequency, ref_freq,
             b_sqrt, sgn_brightness,
-            linear,
-            nsrc, ntime, nchan);
+            linear, nsrc, ntime, nchan);
     }
 };
 
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.cpp b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.cpp
index 5bb77347f..7d649d37b 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.cpp
+++ b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.cpp
@@ -42,8 +42,8 @@ auto sum_coherencies_shape_function = [](InferenceContext* c) {
         c->DebugString(ant_jones));
 
     // sgn_brightness
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(sgn_brightness, 2, &input),
-        "sgn_brightness shape must be [nsrc, ntime] but is " +
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(sgn_brightness, 3, &input),
+        "sgn_brightness shape must be [nsrc, ntime, nchan] but is " +
         c->DebugString(sgn_brightness));
 
     // base_coherencies
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.h b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.h
index e7b5cc95f..f5cf78f2b 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.h
+++ b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_cpu.h
@@ -53,7 +53,7 @@ class SumCoherencies<CPUDevice, FT, CT> : public tensorflow::OpKernel
         auto antenna2 = in_antenna2.tensor<int,2>();
         auto shape = in_shape.tensor<FT, 4>();
         auto ant_jones = in_ant_jones.tensor<CT, 5>();
-        auto sgn_brightness = in_sgn_brightness.tensor<tf::int8, 2>();
+        auto sgn_brightness = in_sgn_brightness.tensor<tf::int8, 3>();
         auto base_coherencies = in_base_coherencies.tensor<CT, 4>();
         auto coherencies = coherencies_ptr->tensor<CT, 4>();
 
@@ -91,7 +91,7 @@ class SumCoherencies<CPUDevice, FT, CT> : public tensorflow::OpKernel
                         CT b2 = std::conj(ant_jones(src, time, ant2, chan, 1)*s);
                         CT b3 = std::conj(ant_jones(src, time, ant2, chan, 3)*s);
 
-                        FT sign = sgn_brightness(src, time);
+                        FT sign = sgn_brightness(src, time, chan);
 
                         // Multiply jones matrices and accumulate them
                         // in the sum terms
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
index 1272d272d..211f5b09f 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
+++ b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
@@ -95,15 +95,13 @@ __global__ void rime_sum_coherencies(
         montblanc::jones_multiply_4x4_hermitian_transpose_in_place<FT>(
             J1, J2);
 
-        // Load in and apply in sign inversions stemming from
-        // cholesky decompositions that must be applied.
+        // Apply sign inversions stemming from cholesky decompositions.
+        // Sum source coherency into model visibility
+        i = base*nchan + chan;
         FT sign = FT(sgn_brightness[base]);
-        J1.x *= sign;
-        J1.y *= sign;
 
-        // Sum source coherency into model visibility
-        coherency.x += J1.x;
-        coherency.y += J1.y;
+        coherency.x += sign*J1.x;
+        coherency.y += sign*J1.y;
     }
 
     i = (time*nbl + bl)*npolchan + polchan;
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
index 35197fbb7..723ffb6a9 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
@@ -4,14 +4,13 @@
 import tensorflow as tf
 from tensorflow.python.client import device_lib
 
-def brightness_numpy(stokes, alpha, frequency, ref_freq, pol_type):
-    nsrc, ntime, _ = stokes.shape
-    nchan, = frequency.shape
+def brightness_numpy(stokes, pol_type):
+    nsrc, ntime, nchan, _ = stokes.shape
 
-    I = stokes[:,:,0].reshape(nsrc, ntime, 1)
-    Q = stokes[:,:,1].reshape(nsrc, ntime, 1)
-    U = stokes[:,:,2].reshape(nsrc, ntime, 1)
-    V = stokes[:,:,3].reshape(nsrc, ntime, 1)
+    I = stokes[:,:,:,0]
+    Q = stokes[:,:,:,1]
+    U = stokes[:,:,:,2]
+    V = stokes[:,:,:,3]
 
     if pol_type == "linear":
         pass
@@ -20,18 +19,14 @@ def brightness_numpy(stokes, alpha, frequency, ref_freq, pol_type):
     else:
         raise ValueError("Invalid pol_type '{}'".format(pol_type))
 
-    # Compute the spectral index
-    freq_ratio = frequency[None,None,:]/ref_freq[:,None,None]
-    power = np.power(freq_ratio, alpha[:,:,None])
-
     CT = np.complex128 if stokes.dtype == np.float64 else np.complex64
 
     # Compute the brightness matrix
     B = np.empty(shape=(nsrc, ntime, nchan, 4), dtype=CT)
-    B[:,:,:,0] = power*(I+Q)
-    B[:,:,:,1] = power*(U+V*1j)
-    B[:,:,:,2] = power*(U-V*1j)
-    B[:,:,:,3] = power*(I-Q)
+    B[:,:,:,0] = I+Q
+    B[:,:,:,1] = U+V*1j
+    B[:,:,:,2] = U-V*1j
+    B[:,:,:,3] = I-Q
 
     return B
 
@@ -70,29 +65,25 @@ def _impl_test_b_sqrt(self, FT, CT, pol_type, tols):
         # Set up our numpy input arrays
 
         # Stokes parameters, should produce a positive definite matrix
-        stokes = np.empty(shape=(nsrc, ntime, 4), dtype=FT)
-        Q = stokes[:,:,1] = rf(nsrc, ntime) - 0.5
-        U = stokes[:,:,2] = rf(nsrc, ntime) - 0.5
-        V = stokes[:,:,3] = rf(nsrc, ntime) - 0.5
-        noise = rf(nsrc, ntime)*0.1
+        stokes = np.empty(shape=(nsrc, ntime, nchan, 4), dtype=FT)
+        Q = stokes[:,:,:,1] = rf(nsrc, ntime, nchan) - 0.5
+        U = stokes[:,:,:,2] = rf(nsrc, ntime, nchan) - 0.5
+        V = stokes[:,:,:,3] = rf(nsrc, ntime, nchan) - 0.5
+        noise = rf(nsrc, ntime, nchan)*0.1
         # Need I^2 = Q^2 + U^2 + V^2 + noise^2
-        stokes[:,:,0] = np.sqrt(Q**2 + U**2 + V**2 + noise)
+        stokes[:,:,:,0] = np.sqrt(Q**2 + U**2 + V**2 + noise)
 
         # Choose random flux to invert
-        mask = np.random.randint(0, 2, size=(nsrc, ntime)) == 1
+        mask = np.random.randint(0, 2, size=(nsrc, ntime, nchan)) == 1
         stokes[mask,0] = -stokes[mask,0]
 
         # Make the last matrix zero to test the positive semi-definite case
-        stokes[-1,-1,:] = 0
-
-        alpha = rf(nsrc, ntime)*0.8
-        frequency = np.linspace(1.3e9, 1.5e9, nchan, endpoint=True, dtype=FT)
-        ref_freq = 0.2e9*rf(nsrc,) + 1.3e9
+        stokes[-1,-1,-1,:] = 0
 
         # Argument list
-        np_args = [stokes, alpha, frequency, ref_freq]
+        np_args = [stokes]
         # Argument string name list
-        arg_names = ["stokes", "alpha", "frequency", "ref_freq"]
+        arg_names = ["stokes"]
 
         # Constructor tensorflow variables
         tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
@@ -119,7 +110,7 @@ def _pin_op(device, *tf_args):
             cpu_bsqrt, cpu_invert = S.run(cpu_op)
 
             # Get our actual brightness matrices
-            b = brightness_numpy(stokes, alpha, frequency, ref_freq, pol_type)
+            b = brightness_numpy(stokes, pol_type)
             b_2x2 = b.reshape(nsrc, ntime, nchan, 2, 2)
             b_sqrt_2x2 = cpu_bsqrt.reshape(nsrc, ntime, nchan, 2, 2)
 
@@ -129,7 +120,7 @@ def _pin_op(device, *tf_args):
                 b_sqrt_2x2, b_sqrt_2x2.conj())
 
             # Apply any sign inversions
-            square[:,:,:,:,:] *= cpu_invert[:,:,None,None,None]
+            square[:,:,:,:,:] *= cpu_invert[:,:,:,None,None]
 
             # And we should obtain the brightness matrix
             assert np.allclose(b_2x2, square)
diff --git a/montblanc/tests/test_meq_tf.py b/montblanc/tests/test_meq_tf.py
index 0d5fdcda1..9b102073f 100644
--- a/montblanc/tests/test_meq_tf.py
+++ b/montblanc/tests/test_meq_tf.py
@@ -143,7 +143,7 @@ def get_gaussian_sources(nsrc):
     gauss_shape[:] = rf(size=gauss_shape.shape)
     return c, s, a, r, gauss_shape
 
-npsrc, ngsrc = 10, 10
+npsrc, ngsrc = 5, 5
 
 pt_lm, pt_stokes, pt_alpha, pt_ref_freq = get_point_sources(npsrc)
 
@@ -239,32 +239,28 @@ def point_lm(self, context):
         return pt_lm[lp:up, :]
 
     def point_stokes(self, context):
-        (lp, up), (lt, ut) = context.dim_extents('npsrc', 'ntime')
-        return np.tile(pt_stokes[lp:up, np.newaxis, :], [1, ut-lt, 1])
+        (lp, up), (lt, ut), (lc, uc) = context.dim_extents('npsrc', 'ntime', 'nchan')
+        # (npsrc, ntime, nchan, 4)
+        s = pt_stokes[lp:up,None,None,:]
+        a = np.broadcast_to(pt_alpha[lp:up,None,None,None], (up-lp,ut-lt,1,1))
+        rf = pt_ref_freq[lp:up,None,None,None]
+        f = frequency[None,None,lc:uc,None]
 
-    def point_alpha(self, context):
-        (lp, up), (lt, ut) = context.dim_extents('npsrc', 'ntime')
-        return np.tile(pt_alpha[lp:up, np.newaxis], [1, ut-lt])
-
-    def point_ref_freq(self, context):
-        (lp, up) = context.dim_extents('npsrc')
-        return pt_ref_freq[lp:up]
+        return s*(f/rf)**a
 
     def gaussian_lm(self, context):
         lg, ug = context.dim_extents('ngsrc')
         return g_lm[lg:ug, :]
 
     def gaussian_stokes(self, context):
-        (lg, ug), (lt, ut) = context.dim_extents('ngsrc', 'ntime')
-        return np.tile(g_stokes[lg:ug, np.newaxis, :], [1, ut-lt, 1])
-
-    def gaussian_alpha(self, context):
-        (lg, ug), (lt, ut) = context.dim_extents('ngsrc', 'ntime')
-        return np.tile(g_alpha[lg:ug, np.newaxis], [1, ut-lt])
+        (lg, ug), (lt, ut), (lc, uc) = context.dim_extents('ngsrc', 'ntime', 'nchan')
+        # (ngsrc, ntime, nchan, 4)
+        s = g_stokes[lg:ug,None,None,:]
+        a = np.broadcast_to(pt_alpha[lg:ug,None,None,None], (ug-lg,ut-lt,1,1))
+        rf = g_ref_freq[lg:ug,None,None,None]
+        f = frequency[None,None,lc:uc,None]
 
-    def gaussian_ref_freq(self, context):
-        (lg, ug) = context.dim_extents('ngsrc')
-        return g_ref_freq[lg:ug]
+        return s*(f/rf)**a
 
     def gaussian_shape(self, context):
         (lg, ug) = context.dim_extents('ngsrc')
@@ -388,4 +384,4 @@ def updated_dimensions(self):
         for i, (p, mb, meq, d, amp) in it:
             f.write("{i} {t} Montblanc: {mb} MeqTrees: {meq} "
                 "Difference {d} Absolute Difference {ad} \n".format(
-                    i=i, t=p, mb=mb, meq=meq, d=d, ad=amp))
\ No newline at end of file
+                    i=i, t=p, mb=mb, meq=meq, d=d, ad=amp))

From 06b91c4892cdab3ff6cf362dffa5f54bac34628a Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 13 Jul 2018 10:45:31 +0200
Subject: [PATCH 02/41] Install tensorflow CPU by default

tensorflow GPU requires a manual install.
---
 docs/installation.rst | 80 ++++++++++++++++-------------------
 setup.py              | 98 ++++++++++++++++++++++++-------------------
 2 files changed, 90 insertions(+), 88 deletions(-)

diff --git a/docs/installation.rst b/docs/installation.rst
index e7abc6af0..640fa36c8 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -15,12 +15,16 @@ Certain pre-requisites must be installed:
 Pre-requisites
 ~~~~~~~~~~~~~~
 
-- Montblanc depends on tensorflow_. Choose one of the following,
-  depending on whether you require CPU or GPU acceleration:
+- .. _install_tf_gpu:
+
+  Montblanc depends on tensorflow_ for CPU and GPU acceleration.
+  By default the CPU version of tensorflow is installed during
+  Montblanc's installation process.
+  If you require GPU acceleration, the GPU version of tensorflow
+  should be installed first.
 
   .. code:: bash
 
-    $ pip install tensorflow==1.8.0
     $ pip install tensorflow-gpu==1.8.0
 
 - GPU Acceleration requires `CUDA 8.0 <CUDA_>`_ and `cuDNN 6.0 for CUDA 8.0 <cudnn_>`_.
@@ -54,29 +58,10 @@ Pre-requisites
       $ export LD_LIBRARY_PATH=/usr/lib/nvidia-375:$LD_LIBRARY_PATH
 
 -  casacore_ and the `measures <ftp://ftp.astron.nl/outgoing/Measures/>`__ found in casacore-data.
-   Gijs Molenaar has kindly packaged this on Ubuntu/Debian style systems.
-
-   On Ubuntu 14.04, these packages can be added from the `radio astronomy
-   PPA <https://launchpad.net/~radio-astro/+archive/main>`__ :
-
-   .. code:: bash
-
-       $ sudo apt-get install software-properties-common
-       $ sudo add-apt-repository ppa:radio-astro/main
-       $ sudo apt-get update
-       $ sudo apt-get install libcasacore2-dev casacore-data
-
-   On Ubuntu 16.04 these packages can be added from the `kernsuite PPA
-   <https://launchpad.net/~kernsuite/+archive/ubuntu/kern-1>`__:
+   Gijs Molenaar has kindly packaged this as kernsuite_ on as Ubuntu/Debian style systems.
 
-   .. code:: bash
-
-       $ sudo apt-get install software-properties-common
-       $ sudo add-apt-repository ppa:kernsuite/kern-1
-       $ sudo apt-get update
-       $ sudo apt-get install casacore-dev casacore-data
 
-   Otherwise, casacore and the measures tables will need to be manually installed.
+   Otherwise, casacore and the measures tables should be manually installed.
 
 - .. _check_dependencies:
 
@@ -134,34 +119,38 @@ Possible Issues
 ~~~~~~~~~~~~~~~
 
 - Montblanc doesn't use your GPU or compile GPU tensorflow operators.
-  The installation process attempts to find your CUDA install location.
-  It will log information about where it thinks this is and which GPU devices
-  you have installed.
-  Check the install log generated by the ``pip`` commands given above to see
-  why this fails, searching for "**Montblanc Install**" entries.
 
-  It is possible to see if the GPU version of tensorflow is installed by running
-  the following code in a python interpreter:
+  1. Check if the `GPU version of tensorflow <install_tf_gpu_>`_ is installed.
 
-  .. code:: python
+     It is possible to see if the GPU version of tensorflow is installed by running
+     the following code in a python interpreter:
 
-      import tensorflow as tf
-      with tf.Session() as S: pass
+     .. code:: python
 
-  If tensorflow knows about your GPU it will log some information about it:
+          import tensorflow as tf
+          with tf.Session() as S: pass
 
-  .. code:: bash
+     If tensorflow knows about your GPU it will log some information about it:
+
+     .. code:: bash
+
+          2017-05-16 14:24:38.571320: I tensorflow/core/common_runtime/gpu/gpu_device.cc:887] Found device 0 with properties:
+          name: GeForce GTX 960M
+          major: 5 minor: 0 memoryClockRate (GHz) 1.176
+          pciBusID 0000:01:00.0
+          Total memory: 3.95GiB
+          Free memory: 3.92GiB
+          2017-05-16 14:24:38.571352: I tensorflow/core/common_runtime/gpu/gpu_device.cc:908] DMA: 0
+          2017-05-16 14:24:38.571372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:918] 0:   Y
+          2017-05-16 14:24:38.571403: I tensorflow/core/common_runtime/gpu/gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 960M, pci bus id: 0000:01:00.0)
+
+  2. The installation process couldn't find your CUDA install.
 
-      2017-05-16 14:24:38.571320: I tensorflow/core/common_runtime/gpu/gpu_device.cc:887] Found device 0 with properties:
-      name: GeForce GTX 960M
-      major: 5 minor: 0 memoryClockRate (GHz) 1.176
-      pciBusID 0000:01:00.0
-      Total memory: 3.95GiB
-      Free memory: 3.92GiB
-      2017-05-16 14:24:38.571352: I tensorflow/core/common_runtime/gpu/gpu_device.cc:908] DMA: 0
-      2017-05-16 14:24:38.571372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:918] 0:   Y
-      2017-05-16 14:24:38.571403: I tensorflow/core/common_runtime/gpu/gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 960M, pci bus id: 0000:01:00.0)
+     It will log information about where it thinks this is and which GPU devices
+     you have installed.
 
+     Check the install log generated by the ``pip`` commands given above to see
+     why this fails, searching for "**Montblanc Install**" entries.
 
 -  `cub 1.6.4 <cub>`_. The setup script will
    attempt to download this from github and install to the correct
@@ -183,6 +172,7 @@ Possible Issues
 .. _cudnn: https://developer.nvidia.com/cudnn
 .. _cub: https://github.com/nvlabs/cub
 .. _casacore: https://github.com/casacore/casacore
+.. _kernsuite: http://kernsuite.info/
 .. _python-casacore: https://github.com/casacore/python-casacore
 .. _venv: http://docs.python-guide.org/en/latest/dev/virtualenvs/
 .. _tensorflow: https://tensorflow.org/
diff --git a/setup.py b/setup.py
index 247ca5f4e..0daf300be 100644
--- a/setup.py
+++ b/setup.py
@@ -19,42 +19,46 @@
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 import json
-import logging
 import os
-import sys
 
-#==============
+# =============
 # Setup logging
-#==============
+# =============
 
 from install.install_log import log
+from install.cuda import inspect_cuda, InspectCudaException
+from install.cub import install_cub, InstallCubException
 
-mb_path = 'montblanc'
-mb_inc_path = os.path.join(mb_path, 'include')
-
-#===================
-# Detect readthedocs
-#====================
-
-on_rtd = os.environ.get('READTHEDOCS') == 'True'
 
-import versioneer
-
-#===================
+# ==================
 # setuptools imports
-#===================
+# ==================
 
+from distutils.version import LooseVersion
 from setuptools import setup, find_packages
 from setuptools.extension import Extension
 from setuptools.dist import Distribution
 
-#=======================
+import versioneer
+
+
+mb_path = 'montblanc'
+mb_inc_path = os.path.join(mb_path, 'include')
+
+# =================
+# Detect readthedocs
+# ==================
+
+on_rtd = os.environ.get('READTHEDOCS') == 'True'
+
+# =====================
 # Monkeypatch distutils
-#=======================
+# =====================
 
 # Save the original command for use within the monkey-patched version
 _DISTUTILS_REINIT = Distribution.reinitialize_command
 
+
 def reinitialize_command(self, command, reinit_subcommands):
     """
     Monkeypatch distutils.Distribution.reinitialize_command() to match behavior
@@ -72,33 +76,34 @@ def reinitialize_command(self, command, reinit_subcommands):
 
     return cmd_obj
 
+
 # Replace original command with monkey-patched version
 Distribution.reinitialize_command = reinitialize_command
 
-TF_VERSION = "1.8.0"
+REQ_TF_VERSION = LooseVersion("1.8.0")
 
+# Inspect previous tensorflow installs
 try:
     import tensorflow as tf
 except ImportError:
-    if not on_rtd:
-        raise ImportError("Please 'pip install tensorflow==%s' or "
-                          "'pip install tensorflow-gpu==%s' prior to "
-                          "installation if you require CPU or GPU "
-                          "support, respectively" % (TF_VERSION, TF_VERSION))
-
+    tf_installed = False
     use_tf_cuda = False
 else:
+    found_version = LooseVersion(tf.__version__)
+
+    if found_version < REQ_TF_VERSION:
+        raise ValueError("Installed version of tensorflow is %s "
+                         "but %s is required" %
+                         (found_version, REQ_TF_VERSION))
+
+    tf_installed = True
     use_tf_cuda = tf.test.is_built_with_cuda()
 
-#============================
+# ===========================
 # Detect CUDA and GPU Devices
-#============================
+# ===========================
 
 # See if CUDA is installed and if any NVIDIA devices are available
-# Choose the tensorflow flavour to install (CPU or GPU)
-from install.cuda import inspect_cuda, InspectCudaException
-from install.cub import install_cub, InstallCubException
-
 if use_tf_cuda:
     try:
         # Look for CUDA devices and NVCC/CUDA installation
@@ -107,14 +112,13 @@ def reinitialize_command(self, command, reinit_subcommands):
 
         cuda_version = device_info['cuda_version']
         log.info("CUDA '{}' found. "
-            "Installing tensorflow GPU".format(cuda_version))
-
+                 "Installing tensorflow GPU".format(cuda_version))
 
         log.info("CUDA installation settings:\n{}"
-                    .format(json.dumps(nvcc_settings, indent=2)))
+                 .format(json.dumps(nvcc_settings, indent=2)))
 
         log.info("CUDA code will be compiled for the following devices:\n{}"
-                    .format(json.dumps(device_info['devices'], indent=2)))
+                 .format(json.dumps(device_info['devices'], indent=2)))
 
         # Download and install cub
         install_cub(mb_inc_path)
@@ -130,13 +134,15 @@ def reinitialize_command(self, command, reinit_subcommands):
         log.exception("NVIDIA cub install failed.")
         raise
 else:
-    device_info, nvcc_settings = {}, { 'cuda_available' : False }
+    device_info, nvcc_settings = {}, {'cuda_available': False}
+
 
 def readme():
     """ Return README.rst contents """
     with open('README.rst') as f:
         return f.read()
 
+
 install_requires = [
     'attrdict >= 2.0.0',
     'attrs >= 16.3.0',
@@ -146,10 +152,10 @@ def readme():
     'hypercube == 0.3.3',
 ]
 
-#===================================
+# ==================================
 # Avoid binary packages and compiles
 # on readthedocs
-#===================================
+# ==================================
 
 if on_rtd:
     cmdclass = {}
@@ -167,18 +173,24 @@ def readme():
         'ruamel.yaml >= 0.15.22',
     ]
 
+    if not tf_installed:
+        log.info("No previous version of tensorflow discovered. "
+                 "The CPU version will be installed.")
+
+        install_requires.append("tensorflow == %s" % REQ_TF_VERSION)
+
     from install.tensorflow_ops_ext import (BuildCommand,
-        tensorflow_extension_name)
+                                            tensorflow_extension_name)
 
-    cmdclass = { 'build_ext' : BuildCommand }
+    cmdclass = {'build_ext': BuildCommand}
     # tensorflow_ops_ext.BuildCommand.run will
     # expand this dummy extension to its full portential
     ext_modules = [Extension(tensorflow_extension_name, ['rime.cu'])]
     # Pass NVCC and CUDA settings through to the build extension
     ext_options = {
-        'build_ext' : {
-            'nvcc_settings' : nvcc_settings,
-            'cuda_devices' : device_info,
+        'build_ext': {
+            'nvcc_settings': nvcc_settings,
+            'cuda_devices': device_info,
         },
     }
 

From 0e42a4057903337da4cd9061b9530c20e39e3b3d Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 13 Jul 2018 10:58:29 +0200
Subject: [PATCH 03/41] Update the standalone.py example

---
 montblanc/examples/standalone.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/montblanc/examples/standalone.py b/montblanc/examples/standalone.py
index 967cfb53e..63d954b9d 100644
--- a/montblanc/examples/standalone.py
+++ b/montblanc/examples/standalone.py
@@ -59,11 +59,11 @@ def point_lm(self, context):
     def point_stokes(self, context):
         """ Supply point source stokes parameters to montblanc """
 
-        # Shape (npsrc, ntime, 4)
-        (ls, us), (lt, ut), (l, u) = context.array_extents(context.name)
+        # Shape (npsrc, ntime, nchan, 4)
+        (ls, us), (lt, ut), (lc, uc), (l, u) = context.array_extents(context.name)
 
         data = np.empty(context.shape, context.dtype)
-        data[ls:us,:,l:u] = np.asarray(lm_stokes)[ls:us,None,:]
+        data[ls:us,:,:,l:u] = np.asarray(lm_stokes)[ls:us,None,None,:]
         return data
 
     def uvw(self, context):
@@ -112,4 +112,4 @@ def model_vis(self, context):
 
     # Call solver, supplying source and sink providers
     slvr.solve(source_providers=source_provs,
-            sink_providers=sink_provs)
\ No newline at end of file
+            sink_providers=sink_provs)

From 700e78c592d453819a1b4d2df67bafc28461447f Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Thu, 26 Jul 2018 17:54:49 +0200
Subject: [PATCH 04/41] RadecToLm operator

---
 .../impl/rime/tensorflow/rime_ops/Makefile    |  16 +-
 .../rime/tensorflow/rime_ops/radec_to_lm_op.h |  27 +++
 .../rime_ops/radec_to_lm_op_cpu.cpp           |  91 ++++++++++
 .../tensorflow/rime_ops/radec_to_lm_op_cpu.h  |  72 ++++++++
 .../tensorflow/rime_ops/radec_to_lm_op_gpu.cu |  32 ++++
 .../rime_ops/radec_to_lm_op_gpu.cuh           | 168 ++++++++++++++++++
 .../tensorflow/rime_ops/test_radec_to_lm.py   |  84 +++++++++
 montblanc/include/montblanc/abstraction.cuh   |   2 +
 8 files changed, 487 insertions(+), 5 deletions(-)
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op.h
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.cpp
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.h
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cu
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cuh
 create mode 100644 montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/Makefile b/montblanc/impl/rime/tensorflow/rime_ops/Makefile
index a42598273..7c6e49d7c 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/Makefile
+++ b/montblanc/impl/rime/tensorflow/rime_ops/Makefile
@@ -1,5 +1,6 @@
 # Tensorflow includes and defines
-TF_INC=$(shell python -c 'import tensorflow as tf; print tf.sysconfig.get_include()')
+TF_CFLAGS=$(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
+TF_LFLAGS=$(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
 TF_CUDA=$(shell python -c 'import tensorflow as tf; print int(tf.test.is_built_with_cuda())')
 MB_INC=../../../../include
 
@@ -22,12 +23,17 @@ OBJECTS=$(addsuffix .o, $(basename $(SOURCES)))
 LIBRARY=rime.so
 
 # Compiler flags
-INCLUDES= -I $(TF_INC) -I $(MB_INC)
-CPPFLAGS=-std=c++11 $(TF_FLAGS) $(INCLUDES) -fPIC -fopenmp -O2 -march=native -mtune=native
-NVCCFLAGS=-std=c++11 -D GOOGLE_CUDA=$(TF_CUDA) $(TF_FLAGS) $(INCLUDES) \
+INCLUDES= -I $(MB_INC)
+CPPFLAGS=-std=c++11 $(TF_CFLAGS) $(INCLUDES) -fPIC -fopenmp \
+		 -O2 -march=native -mtune=native
+NVCCFLAGS=-std=c++11 -DGOOGLE_CUDA=$(TF_CUDA) $(TF_CFLAGS) $(INCLUDES) \
 	-x cu --compiler-options "-fPIC" --gpu-architecture=sm_30 -lineinfo
 
-LDFLAGS = -fPIC -fopenmp
+LDFLAGS = -fPIC -fopenmp $(TF_LFLAGS)
+
+ifeq ($(TF_CUDA), 1)
+	LDFLAGS := $(LDFLAGS) -lcuda -lcudart
+endif
 
 # Compiler directives
 COMPILE.cpp = g++ $(DEPFLAGS) $(CPPFLAGS) -c
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op.h b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op.h
new file mode 100644
index 000000000..c15c664dc
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op.h
@@ -0,0 +1,27 @@
+#ifndef RIME_RADEC_TO_LM_OP_H
+#define RIME_RADEC_TO_LM_OP_H
+
+// montblanc namespace start and stop defines
+#define MONTBLANC_NAMESPACE_BEGIN namespace montblanc {
+#define MONTBLANC_NAMESPACE_STOP }
+
+//  namespace start and stop defines
+#define MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN namespace  {
+#define MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP }
+
+MONTBLANC_NAMESPACE_BEGIN
+MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN
+
+// General definition of the RadecToLm op, which will be specialised in:
+//   - radec_to_lm_op_cpu.h for CPUs
+//   - radec_to_lm_op_gpu.cuh for CUDA devices
+// Concrete template instantions of this class are provided in:
+//   - radec_to_lm_op_cpu.cpp for CPUs
+//   - radec_to_lm_op_gpu.cu for CUDA devices
+template <typename Device, typename FT>
+class RadecToLm {};
+
+MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP
+MONTBLANC_NAMESPACE_STOP
+
+#endif // #ifndef RIME_RADEC_TO_LM_OP_H
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.cpp b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.cpp
new file mode 100644
index 000000000..420b70301
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.cpp
@@ -0,0 +1,91 @@
+#include "radec_to_lm_op_cpu.h"
+
+#include "tensorflow/core/framework/shape_inference.h"
+
+MONTBLANC_NAMESPACE_BEGIN
+MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN
+
+using tensorflow::shape_inference::InferenceContext;
+using tensorflow::shape_inference::ShapeHandle;
+using tensorflow::shape_inference::DimensionHandle;
+using tensorflow::Status;
+
+auto shape_function = [](InferenceContext* c) {
+    // Dummies for tests
+    ShapeHandle input;
+    DimensionHandle d;
+
+    // TODO. Check shape and dimension sizes for 'radec'
+    ShapeHandle in_radec = c->input(0);
+    // Assert 'radec' number of dimensions
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(in_radec, 2, &input),
+        "radec must have shape [None, 2] but is " +
+        c->DebugString(in_radec));
+    // Assert 'radec' dimension '1' size
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithValue(c->Dim(in_radec, 1), 2, &d),
+        "radec must have shape [None, 2] but is " +
+        c->DebugString(in_radec));
+
+    // TODO. Check shape and dimension sizes for 'phase_centre'
+    ShapeHandle in_phase_centre = c->input(1);
+    // Assert 'phase_centre' number of dimensions
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(in_phase_centre, 1, &input),
+        "phase_centre must have shape [2] but is " +
+        c->DebugString(in_phase_centre));
+    // Assert 'phase_centre' dimension '0' size
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithValue(c->Dim(in_phase_centre, 0), 2, &d),
+        "phase_centre must have shape [2] but is " +
+        c->DebugString(in_phase_centre));
+
+
+
+    // TODO: Supply a proper shapes for output variables here,
+    // usually derived from input shapes
+    // ShapeHandle output_1 = c->MakeShape({
+    //      c->Dim(input_1, 0),  // input_1 dimension 0
+    //      c->Dim(input_2, 1)}); // input_2 dimension 1""")
+
+    ShapeHandle out_lm = c->MakeShape({ c->Dim(in_radec, 0), 2 });
+
+    c->set_output(0, out_lm);
+
+
+    // printf("output shape %s\\n", c->DebugString(out).c_str());;
+
+    return Status::OK();
+};
+
+// Register the RadecToLm operator.
+REGISTER_OP("RadecToLm")
+    .Input("radec: FT")
+    .Input("phase_centre: FT")
+    .Output("lm: FT")
+    .Attr("FT: {float, double} = DT_FLOAT")
+    .Doc(R"doc(Given tensors
+  (1) of (ra, dec) sky coordinates with shape (nsrc, 2)
+  (2) phase_centre with shape (2,)
+compute the LM coordinates
+)doc")
+    .SetShapeFn(shape_function);
+
+
+// Register a CPU kernel for RadecToLm
+// handling permutation ['float']
+REGISTER_KERNEL_BUILDER(
+    Name("RadecToLm")
+    .TypeConstraint<float>("FT")
+    .Device(tensorflow::DEVICE_CPU),
+    RadecToLm<CPUDevice, float>);
+
+// Register a CPU kernel for RadecToLm
+// handling permutation ['double']
+REGISTER_KERNEL_BUILDER(
+    Name("RadecToLm")
+    .TypeConstraint<double>("FT")
+    .Device(tensorflow::DEVICE_CPU),
+    RadecToLm<CPUDevice, double>);
+
+
+
+MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP
+MONTBLANC_NAMESPACE_STOP
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.h b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.h
new file mode 100644
index 000000000..fe73526d8
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_cpu.h
@@ -0,0 +1,72 @@
+#ifndef RIME_RADEC_TO_LM_OP_CPU_H
+#define RIME_RADEC_TO_LM_OP_CPU_H
+
+// Required in order for Eigen::ThreadPoolDevice to be an actual type
+#define EIGEN_USE_THREADS
+
+#include "radec_to_lm_op.h"
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+MONTBLANC_NAMESPACE_BEGIN
+MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN
+
+// For simpler partial specialisation
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// Specialise the RadecToLm op for CPUs
+template <typename FT>
+class RadecToLm<CPUDevice, FT> : public tensorflow::OpKernel
+{
+public:
+    explicit RadecToLm(tensorflow::OpKernelConstruction * context) :
+        tensorflow::OpKernel(context) {}
+
+    void Compute(tensorflow::OpKernelContext * context) override
+    {
+        namespace tf = tensorflow;
+
+        // Create reference to input Tensorflow tensors
+        const auto & in_radec = context->input(0);
+        const auto & in_phase_centre = context->input(1);
+
+        int nsrc = in_radec.dim_size(0);
+
+        // Allocate output tensors
+        // Allocate space for output tensor 'lm'
+        tf::Tensor * lm_ptr = nullptr;
+        tf::TensorShape lm_shape = tf::TensorShape({ nsrc, 2 });
+        OP_REQUIRES_OK(context, context->allocate_output(
+            0, lm_shape, &lm_ptr));
+
+        // Extract Eigen tensors
+        auto radec = in_radec.tensor<FT, 2>();
+        auto phase_centre = in_phase_centre.tensor<FT, 1>();
+        auto lm = lm_ptr->tensor<FT, 2>();
+
+        // Sin and cosine of phase centre DEC
+        auto sin_d0 = sin(phase_centre(1));
+        auto cos_d0 = cos(phase_centre(1));
+
+        for(int src=0; src < nsrc; ++src)
+        {
+            // Sin and cosine of (source RA - phase centre RA)
+            auto da = radec(src, 0) - phase_centre(0);
+            auto sin_da = sin(da);
+            auto cos_da = cos(da);
+
+            // Sine and cosine of source DEC
+            auto sin_d =  sin(radec(src, 1));
+            auto cos_d =  cos(radec(src, 1));
+
+            lm(src, 0) = cos_d*sin_da;
+            lm(src, 1) = sin_d*cos_d0 - cos_d*sin_d0*cos_da;
+        }
+    }
+};
+
+MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP
+MONTBLANC_NAMESPACE_STOP
+
+#endif // #ifndef RIME_RADEC_TO_LM_OP_CPU_H
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cu b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cu
new file mode 100644
index 000000000..1511fb1f5
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cu
@@ -0,0 +1,32 @@
+#if GOOGLE_CUDA
+
+#include "radec_to_lm_op_gpu.cuh"
+
+MONTBLANC_NAMESPACE_BEGIN
+MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN
+
+
+// Register a GPU kernel for RadecToLm
+// handling permutation ['float']
+REGISTER_KERNEL_BUILDER(
+    Name("RadecToLm")
+    .TypeConstraint<float>("FT")
+    .HostMemory("phase_centre")
+    .Device(tensorflow::DEVICE_GPU),
+    RadecToLm<GPUDevice, float>);
+
+// Register a GPU kernel for RadecToLm
+// handling permutation ['double']
+REGISTER_KERNEL_BUILDER(
+    Name("RadecToLm")
+    .TypeConstraint<double>("FT")
+    .HostMemory("phase_centre")
+    .Device(tensorflow::DEVICE_GPU),
+    RadecToLm<GPUDevice, double>);
+
+
+
+MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP
+MONTBLANC_NAMESPACE_STOP
+
+#endif // #if GOOGLE_CUDA
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cuh b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cuh
new file mode 100644
index 000000000..08544d348
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/radec_to_lm_op_gpu.cuh
@@ -0,0 +1,168 @@
+#if GOOGLE_CUDA
+
+#ifndef RIME_RADEC_TO_LM_OP_GPU_CUH
+#define RIME_RADEC_TO_LM_OP_GPU_CUH
+
+// Required in order for Eigen::GpuDevice to be an actual type
+#define EIGEN_USE_GPU
+
+#include "radec_to_lm_op.h"
+#include <montblanc/abstraction.cuh>
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+MONTBLANC_NAMESPACE_BEGIN
+MONTBLANC_RADEC_TO_LM_NAMESPACE_BEGIN
+
+// For simpler partial specialisation
+typedef Eigen::GpuDevice GPUDevice;
+
+// LaunchTraits struct defining
+// kernel block sizes for type permutations
+template <typename FT> struct LaunchTraits {};
+
+// Specialise for float
+// Should really be .cu file as this is a concrete type
+// but this works because this header is included only once
+template <> struct LaunchTraits<float>
+{
+    static constexpr int BLOCKDIMX = 1024;
+    static constexpr int BLOCKDIMY = 1;
+    static constexpr int BLOCKDIMZ = 1;
+};
+
+// Specialise for double
+// Should really be .cu file as this is a concrete type
+// but this works because this header is included only once
+template <> struct LaunchTraits<double>
+{
+    static constexpr int BLOCKDIMX = 1024;
+    static constexpr int BLOCKDIMY = 1;
+    static constexpr int BLOCKDIMZ = 1;
+};
+
+
+
+// CUDA kernel outline
+template <typename Traits>
+__global__ void rime_radec_to_lm(
+    const typename Traits::radec_type * in_radec,
+    typename Traits::lm_type * out_lm,
+    typename Traits::FT phase_centre_ra,
+    typename Traits::FT sin_d0,
+    typename Traits::FT cos_d0)
+
+{
+    // Shared memory usage unnecesssary, but demonstrates use of
+    // constant Trait members to create kernel shared memory.
+    using FT = typename Traits::FT;
+    using Po = typename montblanc::kernel_policies<FT>;
+
+    using LTr = LaunchTraits<FT>;
+
+    int s = blockIdx.x*blockDim.x + threadIdx.x;
+
+    if(s >= LTr::BLOCKDIMX)
+        { return; }
+
+    auto src_radec = in_radec[s];
+
+    // Sine+Cosine of (source RA - phase centre RA)
+    FT sin_da, cos_da;
+    Po::sincos(src_radec.x - phase_centre_ra, &sin_da, &cos_da);
+
+    // Sine+Cosine of source DEC
+    FT sin_d, cos_d;
+    Po::sincos(src_radec.y, &sin_d, &cos_d);
+
+    typename Traits::lm_type lm;
+
+    lm.x = cos_d*sin_da;
+    lm.y = sin_d*cos_d0 - cos_d*sin_d0*cos_da;
+
+            // // Sin and cosine of (source RA - phase centre RA)
+            // auto da = radec(src, 0) - phase_centre(0);
+            // auto sin_da = sin(da);
+            // auto cos_da = cos(da);
+
+            // // Sine and cosine of source DEC
+            // auto sin_d =  sin(radec(src, 1));
+            // auto cos_d =  cos(radec(src, 1));
+
+            // lm(src, 0) = cos_d*sin_da;
+            // lm(src, 1) = sin_d*cos_d0 - cos_d*sin_d0*cos_da;
+
+
+    // Set shared buffer to thread index
+    out_lm[s] = lm;
+}
+
+// Specialise the RadecToLm op for GPUs
+template <typename FT>
+class RadecToLm<GPUDevice, FT> : public tensorflow::OpKernel
+{
+public:
+    explicit RadecToLm(tensorflow::OpKernelConstruction * context) :
+        tensorflow::OpKernel(context) {}
+
+    void Compute(tensorflow::OpKernelContext * context) override
+    {
+        namespace tf = tensorflow;
+
+        // Create variables for input tensors
+        const auto & in_radec = context->input(0);
+        const auto & in_phase_centre = context->input(1);
+
+        int nsrc = in_radec.dim_size(0);
+
+        // Allocate output tensors
+        // Allocate space for output tensor 'lm'
+        tf::Tensor * lm_ptr = nullptr;
+        tf::TensorShape lm_shape = tf::TensorShape({ nsrc, 2 });
+        OP_REQUIRES_OK(context, context->allocate_output(
+            0, lm_shape, &lm_ptr));
+
+
+        using LTr = LaunchTraits<FT>;
+        using Tr = montblanc::kernel_traits<FT>;
+
+        // Set up our CUDA thread block and grid
+        // Set up our kernel dimensions
+        dim3 block = montblanc::shrink_small_dims(
+            dim3(LTr::BLOCKDIMX, LTr::BLOCKDIMY, LTr::BLOCKDIMZ),
+            nsrc, 1, 1);
+        dim3 grid(montblanc::grid_from_thread_block(
+            block, nsrc, 1, 1));
+
+        // Get pointers to flattened tensor data buffers
+        const auto fin_radec = reinterpret_cast<
+            const typename Tr::radec_type *>(
+                in_radec.flat<FT>().data());
+
+        auto fout_lm = reinterpret_cast<
+            typename Tr::lm_type *>(
+                lm_ptr->flat<FT>().data());
+
+        const auto phase_centre = in_phase_centre.tensor<FT, 1>();
+
+        // Get the GPU device
+        const auto & device = context->eigen_device<GPUDevice>();
+
+        // Call the rime_radec_to_lm CUDA kernel
+        rime_radec_to_lm<Tr>
+            <<<grid, block, 0, device.stream()>>>(
+                fin_radec, fout_lm,
+                phase_centre(0),
+                sin(phase_centre(1)),
+                cos(phase_centre(1)));
+
+    }
+};
+
+MONTBLANC_RADEC_TO_LM_NAMESPACE_STOP
+MONTBLANC_NAMESPACE_STOP
+
+#endif // #ifndef RIME_RADEC_TO_LM_OP_GPU_CUH
+
+#endif // #if GOOGLE_CUDA
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py b/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py
new file mode 100644
index 000000000..3b6ec369b
--- /dev/null
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py
@@ -0,0 +1,84 @@
+import unittest
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.client import device_lib
+
+
+def radec_to_lm(radec, phase_centre):
+        pc_ra, pc_dec = phase_centre
+        sin_d0 = np.sin(pc_dec)
+        cos_d0 = np.cos(pc_dec)
+
+        da = radec[:, 0] - pc_ra
+        sin_da = np.sin(da)
+        cos_da = np.cos(da)
+
+        sin_d = np.sin(radec[:, 1])
+        cos_d = np.cos(radec[:, 1])
+
+        lm = np.empty_like(radec)
+        lm[:, 0] = cos_d*sin_da
+        lm[:, 1] = sin_d*cos_d0 - cos_d*sin_d0*cos_da
+
+        return lm
+
+
+class TestRadecToLm(unittest.TestCase):
+    """ Tests the RadecToLm operator """
+
+    def setUp(self):
+        # Load the custom operation library
+        self.rime = tf.load_op_library('./rime.so')
+        # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
+        self.gpu_devs = [d.name for d in device_lib.list_local_devices()
+                         if d.device_type == 'GPU']
+
+    def test_radec_to_lm(self):
+        """ Test the RadecToLm operator """
+        # List of type constraint for testing this operator
+        type_permutations = [np.float32, np.float64]
+
+        # Run test with the type combinations above
+        for FT in type_permutations:
+            self._impl_test_radec_to_lm(FT)
+
+    def _impl_test_radec_to_lm(self, FT):
+        """ Implementation of the RadecToLm operator test """
+
+        # Create input variables
+        nsrc = 10
+        radec = np.random.random(size=[nsrc, 2]).astype(FT)
+        phase_centre = np.random.random(size=[2]).astype(FT)
+
+        # Argument list
+        np_args = [radec, phase_centre]
+        # Argument string name list
+        arg_names = ['radec', 'phase_centre']
+        # Constructor tensorflow variables
+        tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
+
+        def _pin_op(device, *tf_args):
+            """ Pin operation to device """
+            with tf.device(device):
+                return self.rime.radec_to_lm(*tf_args)
+
+        # Pin operation to CPU
+        cpu_op = _pin_op('/cpu:0', *tf_args)
+
+        # Run the op on all GPUs
+        gpu_ops = [_pin_op(d, *tf_args) for d in self.gpu_devs]
+
+        # Initialise variables
+        init_op = tf.global_variables_initializer()
+
+        with tf.Session() as S:
+            S.run(init_op)
+            cpu_result = S.run(cpu_op)
+            assert np.allclose(cpu_result, radec_to_lm(*np_args))
+
+            for gpu_result in S.run(gpu_ops):
+                assert np.allclose(cpu_result, gpu_result)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/montblanc/include/montblanc/abstraction.cuh b/montblanc/include/montblanc/abstraction.cuh
index 0fe5fbad0..ae0ae78b5 100644
--- a/montblanc/include/montblanc/abstraction.cuh
+++ b/montblanc/include/montblanc/abstraction.cuh
@@ -50,6 +50,7 @@ public:
 
     // Input array types
     typedef float2 lm_type;
+    typedef float2 radec_type;
     typedef float3 uvw_type;
     typedef float frequency_type;
     typedef float2 complex_phase_type;
@@ -199,6 +200,7 @@ public:
     } visibility_type;
 
     // Input array types
+    typedef double2 radec_type;
     typedef double2 lm_type;
     typedef double3 uvw_type;
     typedef double frequency_type;

From f4c9ed66e56bfe997703aebd2e64696db60dadc8 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 27 Jul 2018 10:32:57 +0200
Subject: [PATCH 05/41] Swap translation and rotation in the beam

---
 .../rime/tensorflow/rime_ops/e_beam_op_cpu.h  | 13 ++++++++-----
 .../tensorflow/rime_ops/e_beam_op_gpu.cuh     | 19 +++++++++----------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_cpu.h b/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_cpu.h
index f4c94eb9d..27fc64c14 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_cpu.h
+++ b/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_cpu.h
@@ -176,16 +176,19 @@ class EBeam<CPUDevice, FT, CT> : public tensorflow::OpKernel
 
                 for(int src=0; src < nsrc; ++src)
                 {
-                    // Rotate lm coordinate angle
-                    FT l = lm(src,0)*cost - lm(src,1)*sint;
-                    FT m = lm(src,0)*sint + lm(src,1)*cost;
+                    FT l = lm(src, 0);
+                    FT m = lm(src, 1);
 
                     for(int chan=0; chan < nchan; chan++)
                     {
                         // Offset lm coordinates by point errors
                         // and scale by antenna scaling
-                        FT vl = l + point_errors(time, ant, chan, 0);
-                        FT vm = m + point_errors(time, ant, chan, 1);
+                        FT tl = l + point_errors(time, ant, chan, 0);
+                        FT tm = m + point_errors(time, ant, chan, 1);
+
+                        // Rotate lm coordinate angle
+                        FT vl = tl*cost - tm*sint;
+                        FT vm = tl*sint + tm*cost;
 
                         vl *= antenna_scaling(ant, chan, 0);
                         vm *= antenna_scaling(ant, chan, 1);
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_gpu.cuh b/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_gpu.cuh
index 014a7ac3c..e62b55373 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_gpu.cuh
+++ b/montblanc/impl/rime/tensorflow/rime_ops/e_beam_op_gpu.cuh
@@ -268,12 +268,16 @@ __global__ void rime_e_beam(
     {
         lm_type rlm = lm[SRC];
 
-       // L coordinate
-        // Rotate
-        FT l = rlm.x*shared.pa_cos[threadIdx.z][threadIdx.y] -
-            rlm.y*shared.pa_sin[threadIdx.z][threadIdx.y];
         // Add the pointing errors for this antenna.
-        l += shared.pe[threadIdx.z][threadIdx.y][thread_chan()].x;
+        FT tl = rlm.x + shared.pe[threadIdx.z][threadIdx.y][thread_chan()].x;
+        FT tm = rlm.y + shared.pe[threadIdx.z][threadIdx.y][thread_chan()].y;
+
+        FT l = tl*shared.pa_cos[threadIdx.z][threadIdx.y] -
+               tm*shared.pa_sin[threadIdx.z][threadIdx.y];
+        FT m = tl*shared.pa_sin[threadIdx.z][threadIdx.y] +
+               tm*shared.pa_cos[threadIdx.z][threadIdx.y];
+
+       // L coordinate
         // Scale by antenna scaling factors
         l *= shared.as[threadIdx.y][thread_chan()].x;
         // l grid position
@@ -287,11 +291,6 @@ __global__ void rime_e_beam(
         FT ld = l - gl0;
 
         // M coordinate
-        // rotate
-        FT m = rlm.x*shared.pa_sin[threadIdx.z][threadIdx.y] +
-            rlm.y*shared.pa_cos[threadIdx.z][threadIdx.y];
-        // Add the pointing errors for this antenna.
-        m += shared.pe[threadIdx.z][threadIdx.y][thread_chan()].y;
         // Scale by antenna scaling factors
         m *= shared.as[threadIdx.y][thread_chan()].y;
         // m grid position

From dbad6d5f5792e8c95eb1ef94ab102fe566a723a1 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 27 Jul 2018 10:34:24 +0200
Subject: [PATCH 06/41] Makefile fixes for later tensorflow versions

---
 .../impl/rime/tensorflow/rime_ops/Makefile    | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/Makefile b/montblanc/impl/rime/tensorflow/rime_ops/Makefile
index 7c6e49d7c..bb7db8607 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/Makefile
+++ b/montblanc/impl/rime/tensorflow/rime_ops/Makefile
@@ -1,32 +1,32 @@
 # Tensorflow includes and defines
-TF_CFLAGS=$(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
-TF_LFLAGS=$(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
-TF_CUDA=$(shell python -c 'import tensorflow as tf; print int(tf.test.is_built_with_cuda())')
-MB_INC=../../../../include
+TF_CFLAGS = $(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
+TF_LFLAGS = $(shell python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
+TF_CUDA = $(shell python -c 'import tensorflow as tf; print int(tf.test.is_built_with_cuda())')
+MB_INC = ../../../../include
 
-TF_FLAGS=-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D_GLIBCXX_USE_CXX11_ABI=0
+TF_FLAGS = -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D_GLIBCXX_USE_CXX11_ABI=0
 
 # Dependencies
-DEPDIR:=.d
+DEPDIR := .d
 $(shell mkdir -p $(DEPDIR) >/dev/null)
-DEPFLAGS=-MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td
+DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td
 
 # Define our sources, compiling CUDA code if it's enabled
 ifeq ($(TF_CUDA), 1)
-	SOURCES=$(wildcard *.cpp *.cu)
+	SOURCES = $(wildcard *.cpp *.cu)
 else
-	SOURCES=$(wildcard *.cpp)
+	SOURCES = $(wildcard *.cpp)
 endif
 
 # Define objects and library
-OBJECTS=$(addsuffix .o, $(basename $(SOURCES)))
-LIBRARY=rime.so
+OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))
+LIBRARY = rime.so
 
 # Compiler flags
-INCLUDES= -I $(MB_INC)
-CPPFLAGS=-std=c++11 $(TF_CFLAGS) $(INCLUDES) -fPIC -fopenmp \
+INCLUDES = -I $(MB_INC)
+CPPFLAGS =-std=c++11 $(TF_CFLAGS) $(INCLUDES) -fPIC -fopenmp \
 		 -O2 -march=native -mtune=native
-NVCCFLAGS=-std=c++11 -DGOOGLE_CUDA=$(TF_CUDA) $(TF_CFLAGS) $(INCLUDES) \
+NVCCFLAGS =-std=c++11 -DGOOGLE_CUDA=$(TF_CUDA) $(TF_CFLAGS) $(INCLUDES) \
 	-x cu --compiler-options "-fPIC" --gpu-architecture=sm_30 -lineinfo
 
 LDFLAGS = -fPIC -fopenmp $(TF_LFLAGS)

From 3e50b3287c40d0ccf81e0f9db32bd22bb4124bbf Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 27 Jul 2018 13:36:07 +0200
Subject: [PATCH 07/41] Convert all source position inputs to radec

---
 montblanc/impl/rime/tensorflow/RimeSolver.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 0dcf3d86a..dad1627a5 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -951,21 +951,23 @@ def _construct_tensorflow_expression(slvr_cfg, feed_data, device, shard):
         feed_rotation = rime.feed_rotation(pa_sin, pa_cos, CT=CT,
                                            feed_type=polarisation_type)
 
-    def antenna_jones(lm, stokes):
+    def antenna_jones(radec, stokes):
         """
         Compute the jones terms for each antenna.
 
         lm, stokes and alpha are the source variables.
         """
 
+        lm = rime.radec_to_lm(radec, D.phase_centre)
+
         # Compute the complex phase
         cplx_phase = rime.phase(lm, D.uvw, D.frequency, CT=CT)
 
         # Check for nans/infs in the complex phase
         phase_msg = ("Check that '1 - l**2  - m**2 >= 0' holds "
-                    "for all your lm coordinates. This is required "
-                    "for 'n = sqrt(1 - l**2 - m**2) - 1' "
-                    "to be finite.")
+                     "for all your lm coordinates. This is required "
+                     "for 'n = sqrt(1 - l**2 - m**2) - 1' "
+                     "to be finite.")
 
         phase_real = tf.check_numerics(tf.real(cplx_phase), phase_msg)
         phase_imag = tf.check_numerics(tf.imag(cplx_phase), phase_msg)
@@ -986,7 +988,7 @@ def antenna_jones(lm, stokes):
         bsqrt_imag = tf.check_numerics(tf.imag(bsqrt), bsqrt_msg)
 
         # Compute the direction dependent effects from the beam
-        ejones = rime.e_beam(lm, D.frequency,
+        ejones = rime.e_beam(radec, D.frequency,
             D.pointing_errors, D.antenna_scaling,
             pa_sin, pa_cos,
             D.beam_extents, D.beam_freq_map, D.ebeam)

From 8127ba4fb961efc1c9416daf696e64f7787eecdc Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 27 Jul 2018 15:41:43 +0200
Subject: [PATCH 08/41] Offset source from phase centre in beam

---
 montblanc/impl/rime/tensorflow/RimeSolver.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index dad1627a5..90a8f7250 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -979,19 +979,19 @@ def antenna_jones(radec, stokes):
 
         # Check for nans/infs in the bsqrt
         bsqrt_msg = ("Check that your stokes parameters "
-                    "satisfy I**2 >= Q**2 + U**2 + V**2. "
-                    "Montblanc performs a cholesky decomposition "
-                    "of the brightness matrix and the above must "
-                    "hold for this to produce valid values.")
+                     "satisfy I**2 >= Q**2 + U**2 + V**2. "
+                     "Montblanc performs a cholesky decomposition "
+                     "of the brightness matrix and the above must "
+                     "hold for this to produce valid values.")
 
         bsqrt_real = tf.check_numerics(tf.real(bsqrt), bsqrt_msg)
         bsqrt_imag = tf.check_numerics(tf.imag(bsqrt), bsqrt_msg)
 
         # Compute the direction dependent effects from the beam
-        ejones = rime.e_beam(radec, D.frequency,
-            D.pointing_errors, D.antenna_scaling,
-            pa_sin, pa_cos,
-            D.beam_extents, D.beam_freq_map, D.ebeam)
+        ejones = rime.e_beam(radec - D.phase_centre, D.frequency,
+                             D.pointing_errors, D.antenna_scaling,
+                             pa_sin, pa_cos,
+                             D.beam_extents, D.beam_freq_map, D.ebeam)
 
         deps = [phase_real, phase_imag, bsqrt_real, bsqrt_imag]
         deps = [] # Do nothing for now
@@ -1000,7 +1000,8 @@ def antenna_jones(radec, stokes):
         # feed rotation and beam dde's
         with tf.control_dependencies(deps):
             antenna_jones = rime.create_antenna_jones(bsqrt, cplx_phase,
-                                                    feed_rotation, ejones, FT=FT)
+                                                      feed_rotation, ejones,
+                                                      FT=FT)
             return antenna_jones, sgn_brightness
 
     # While loop condition for each point source type

From 04f7d080df39820b6519b1a3eb1390021bb4a014 Mon Sep 17 00:00:00 2001
From: bennahugo <bennahugo@aol.com>
Date: Tue, 31 Jul 2018 16:12:14 +0200
Subject: [PATCH 09/41] Adds pointing offsets to PA computation

This adds pointing offsets to parallactic angle computation
With a MeerKAT 8 km layout even small errors of 2 arcmins can
create errors in the parallactic angle of 8+ degrees
---
 montblanc/util/parallactic_angles.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/montblanc/util/parallactic_angles.py b/montblanc/util/parallactic_angles.py
index e17f23e72..4b65a0a2f 100644
--- a/montblanc/util/parallactic_angles.py
+++ b/montblanc/util/parallactic_angles.py
@@ -31,7 +31,7 @@
     montblanc.log.warn("python-casacore import failed. "
                        "Parallactic Angle computation will fail.")
 
-def parallactic_angles(times, antenna_positions, field_centre):
+def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
     """
     Computes parallactic angles per timestep for the given
     reference antenna position and field centre.
@@ -45,9 +45,13 @@ def parallactic_angles(times, antenna_positions, field_centre):
             column of MS ANTENNA sub-table
         field_centre : ndarray of shape (2,)
             Field centre, should be obtained from MS PHASE_DIR
+        offsets: ndarray of shape (nant, ntime, 2) or None
+            Containing time-variable offsets in ra, dec from
+            pointing centre per antenna. If None is passed offsets
+            of zero are assumed.
 
     Returns:
-        An array of parallactic angles per time-step
+        An array of parallactic angles per antenna per time-step
 
     """
     import pyrap.quanta as pq
@@ -66,9 +70,17 @@ def parallactic_angles(times, antenna_positions, field_centre):
         *(pq.quantity(x,'m') for x in pos))
         for pos in antenna_positions]
 
-    # Compute field centre in radians
-    fc_rad = pm.direction('J2000',
-        *(pq.quantity(f,'rad') for f in field_centre))
+    # Compute pointing centre in radians
+    na = antenna_positions.shape[0]
+    nt = times.shape[0]
+    if offsets is None:
+        offsets = np.zeros((na, nt, 2))
+
+    fc_rad = np.asarray([[pm.direction('J2000',
+                            pq.quantity(field_centre[0] + offsets[a, t, 0], 'rad'),
+                            pq.quantity(field_centre[1] + offsets[a, t, 1], 'rad'))
+                          for t in xrange(nt)]
+                         for a in xrange(na)])
 
     return np.asarray([
             # Set current time as the reference frame
@@ -77,7 +89,7 @@ def parallactic_angles(times, antenna_positions, field_centre):
             [   # Set antenna position as the reference frame
                 pm.do_frame(rp)
                 and
-                pm.posangle(fc_rad, zenith).get_value("rad")
-                for rp in reference_positions
+                pm.posangle(fc_rad[ai, ti], zenith).get_value("rad")
+                for ai, rp in enumerate(reference_positions)
             ]
-        for t in times])
\ No newline at end of file
+        for ti, t in enumerate(times)])

From 50bf33b3476c177cf09f52973671a0ae1aa89cf4 Mon Sep 17 00:00:00 2001
From: bennahugo <bennahugo@aol.com>
Date: Tue, 31 Jul 2018 16:19:37 +0200
Subject: [PATCH 10/41] Swap ant and time axis in PA computation

---
 montblanc/util/parallactic_angles.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/montblanc/util/parallactic_angles.py b/montblanc/util/parallactic_angles.py
index 4b65a0a2f..ca306b8d5 100644
--- a/montblanc/util/parallactic_angles.py
+++ b/montblanc/util/parallactic_angles.py
@@ -45,7 +45,7 @@ def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
             column of MS ANTENNA sub-table
         field_centre : ndarray of shape (2,)
             Field centre, should be obtained from MS PHASE_DIR
-        offsets: ndarray of shape (nant, ntime, 2) or None
+        offsets: ndarray of shape (ntime, na, 2) or None
             Containing time-variable offsets in ra, dec from
             pointing centre per antenna. If None is passed offsets
             of zero are assumed.
@@ -74,13 +74,13 @@ def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
     na = antenna_positions.shape[0]
     nt = times.shape[0]
     if offsets is None:
-        offsets = np.zeros((na, nt, 2))
+        offsets = np.zeros((nt, na, 2))
 
     fc_rad = np.asarray([[pm.direction('J2000',
-                            pq.quantity(field_centre[0] + offsets[a, t, 0], 'rad'),
-                            pq.quantity(field_centre[1] + offsets[a, t, 1], 'rad'))
-                          for t in xrange(nt)]
-                         for a in xrange(na)])
+                            pq.quantity(field_centre[0] + offsets[t, a, 0], 'rad'),
+                            pq.quantity(field_centre[1] + offsets[t, a, 1], 'rad'))
+                          for t in xrange(na)]
+                         for a in xrange(nt)])
 
     return np.asarray([
             # Set current time as the reference frame

From af32df3f16057f3ca9c94453c2334c534c312eff Mon Sep 17 00:00:00 2001
From: bennahugo <bennahugo@aol.com>
Date: Tue, 31 Jul 2018 16:26:36 +0200
Subject: [PATCH 11/41] Typos on previous commit

---
 montblanc/util/parallactic_angles.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/montblanc/util/parallactic_angles.py b/montblanc/util/parallactic_angles.py
index ca306b8d5..e16972219 100644
--- a/montblanc/util/parallactic_angles.py
+++ b/montblanc/util/parallactic_angles.py
@@ -79,8 +79,8 @@ def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
     fc_rad = np.asarray([[pm.direction('J2000',
                             pq.quantity(field_centre[0] + offsets[t, a, 0], 'rad'),
                             pq.quantity(field_centre[1] + offsets[t, a, 1], 'rad'))
-                          for t in xrange(na)]
-                         for a in xrange(nt)])
+                          for a in xrange(na)]
+                         for t in xrange(nt)])
 
     return np.asarray([
             # Set current time as the reference frame
@@ -89,7 +89,7 @@ def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
             [   # Set antenna position as the reference frame
                 pm.do_frame(rp)
                 and
-                pm.posangle(fc_rad[ai, ti], zenith).get_value("rad")
+                pm.posangle(fc_rad[ti, ai], zenith).get_value("rad")
                 for ai, rp in enumerate(reference_positions)
             ]
         for ti, t in enumerate(times)])

From 1811be82843957dc6a503f7587f2123b2be6687f Mon Sep 17 00:00:00 2001
From: bennahugo <bennahugo@aol.com>
Date: Tue, 28 Aug 2018 14:59:56 +0200
Subject: [PATCH 12/41] use lm coordinates to look up positions in the beam
 (this is may be wrong by a few arcsecs - but the beam is normally sampled
 courser than that

---
 montblanc/impl/rime/tensorflow/RimeSolver.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 90a8f7250..9a2937cd4 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -988,7 +988,14 @@ def antenna_jones(radec, stokes):
         bsqrt_imag = tf.check_numerics(tf.imag(bsqrt), bsqrt_msg)
 
         # Compute the direction dependent effects from the beam
-        ejones = rime.e_beam(radec - D.phase_centre, D.frequency,
+        #radec_prime = radec * tf.cast(tf.stack([-1.0, 1.0]), radec.dtype)
+        #phase_centre_prime = D.phase_centre * tf.cast(tf.stack([-1.0, 1.0]), D.phase_centre.dtype)
+        #def normang(val):
+        #    """ Normalizes angle between [-pi, pi) """
+        #    return ( val + np.pi) % ( 2 * np.pi ) - np.pi
+        #cube_pos = normang(normang(radec_prime) - normang(phase_centre_prime))
+
+        ejones = rime.e_beam(lm, D.frequency,
                              D.pointing_errors, D.antenna_scaling,
                              pa_sin, pa_cos,
                              D.beam_extents, D.beam_freq_map, D.ebeam)

From cee1231e758f2bdcc0df58000234d9f6bdb7e22a Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Mon, 19 Nov 2018 12:37:22 +0200
Subject: [PATCH 13/41] Fix GPU sgn brightness index in sum coherency operator
 (#263)

---
 .../rime_ops/sum_coherencies_op_gpu.cuh       |   2 +-
 .../rime/tensorflow/rime_ops/test_b_sqrt.py   |  51 +++---
 .../rime/tensorflow/rime_ops/test_phase.py    | 150 +++++++++---------
 .../rime_ops/test_sum_coherencies.py          |  38 +++--
 4 files changed, 130 insertions(+), 111 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
index 211f5b09f..c38c2e5d9 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
+++ b/montblanc/impl/rime/tensorflow/rime_ops/sum_coherencies_op_gpu.cuh
@@ -98,7 +98,7 @@ __global__ void rime_sum_coherencies(
         // Apply sign inversions stemming from cholesky decompositions.
         // Sum source coherency into model visibility
         i = base*nchan + chan;
-        FT sign = FT(sgn_brightness[base]);
+        FT sign = FT(sgn_brightness[i]);
 
         coherency.x += sign*J1.x;
         coherency.y += sign*J1.y;
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
index 723ffb6a9..be327531c 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
@@ -4,13 +4,14 @@
 import tensorflow as tf
 from tensorflow.python.client import device_lib
 
+
 def brightness_numpy(stokes, pol_type):
     nsrc, ntime, nchan, _ = stokes.shape
 
-    I = stokes[:,:,:,0]
-    Q = stokes[:,:,:,1]
-    U = stokes[:,:,:,2]
-    V = stokes[:,:,:,3]
+    I = stokes[:, :, :, 0]  # noqa
+    Q = stokes[:, :, :, 1]
+    U = stokes[:, :, :, 2]
+    V = stokes[:, :, :, 3]
 
     if pol_type == "linear":
         pass
@@ -23,13 +24,14 @@ def brightness_numpy(stokes, pol_type):
 
     # Compute the brightness matrix
     B = np.empty(shape=(nsrc, ntime, nchan, 4), dtype=CT)
-    B[:,:,:,0] = I+Q
-    B[:,:,:,1] = U+V*1j
-    B[:,:,:,2] = U-V*1j
-    B[:,:,:,3] = I-Q
+    B[:, :, :, 0] = I+Q
+    B[:, :, :, 1] = U+V*1j
+    B[:, :, :, 2] = U-V*1j
+    B[:, :, :, 3] = I-Q
 
     return B
 
+
 class TestBSqrt(unittest.TestCase):
     """ Tests the BSqrt operator """
 
@@ -59,26 +61,29 @@ def _impl_test_b_sqrt(self, FT, CT, pol_type, tols):
         nsrc, ntime, na, nchan = 10, 50, 27, 32
 
         # Useful random floats functor
-        rf = lambda *s: np.random.random(size=s).astype(FT)
-        rc = lambda *s: (rf(*s) + 1j*rf(*s)).astype(CT)
+        def rf(*s):
+            return np.random.random(size=s).astype(FT)
+
+        def rc(*s):
+            return (rf(*s) + 1j*rf(*s)).astype(CT)
 
         # Set up our numpy input arrays
 
         # Stokes parameters, should produce a positive definite matrix
         stokes = np.empty(shape=(nsrc, ntime, nchan, 4), dtype=FT)
-        Q = stokes[:,:,:,1] = rf(nsrc, ntime, nchan) - 0.5
-        U = stokes[:,:,:,2] = rf(nsrc, ntime, nchan) - 0.5
-        V = stokes[:,:,:,3] = rf(nsrc, ntime, nchan) - 0.5
+        Q = stokes[:, :, :, 1] = rf(nsrc, ntime, nchan) - 0.5
+        U = stokes[:, :, :, 2] = rf(nsrc, ntime, nchan) - 0.5
+        V = stokes[:, :, :, 3] = rf(nsrc, ntime, nchan) - 0.5
         noise = rf(nsrc, ntime, nchan)*0.1
         # Need I^2 = Q^2 + U^2 + V^2 + noise^2
-        stokes[:,:,:,0] = np.sqrt(Q**2 + U**2 + V**2 + noise)
+        stokes[:, :, :, 0] = np.sqrt(Q**2 + U**2 + V**2 + noise)
 
         # Choose random flux to invert
         mask = np.random.randint(0, 2, size=(nsrc, ntime, nchan)) == 1
-        stokes[mask,0] = -stokes[mask,0]
+        stokes[mask, 0] = -stokes[mask, 0]
 
         # Make the last matrix zero to test the positive semi-definite case
-        stokes[-1,-1,-1,:] = 0
+        stokes[-1, -1, -1, :] = 0
 
         # Argument list
         np_args = [stokes]
@@ -117,10 +122,10 @@ def _pin_op(device, *tf_args):
             # Multiplying the square root matrix
             # by it's hermitian transpose
             square = np.einsum("...ij,...kj->...ik",
-                b_sqrt_2x2, b_sqrt_2x2.conj())
+                               b_sqrt_2x2, b_sqrt_2x2.conj())
 
             # Apply any sign inversions
-            square[:,:,:,:,:] *= cpu_invert[:,:,:,None,None]
+            square[:, :, :, :, :] *= cpu_invert[:, :, :, None, None]
 
             # And we should obtain the brightness matrix
             assert np.allclose(b_2x2, square)
@@ -142,13 +147,13 @@ def _pin_op(device, *tf_args):
                 it = enumerate(itertools.izip(*it))
 
                 msg = ["%s %s %s %s %s" % (i, idx, c, g, c-g)
-                            for i, (idx, c, g) in it]
+                       for i, (idx, c, g) in it]
 
                 self.fail("CPU/GPU bsqrt failed likely because the "
-                        "last polarisation for each entry differs slightly "
-                        "for FT=np.float32 and CT=np.complex64. "
-                        "FT='%s' CT='%s'."
-                        "Here are the values\n%s" % (FT, CT, '\n'.join(msg)))
+                          "last polarisation for each entry differs slightly "
+                          "for FT=np.float32 and CT=np.complex64. "
+                          "FT='%s' CT='%s'."
+                          "Here are the values\n%s" % (FT, CT, '\n'.join(msg)))
 
 
 if __name__ == "__main__":
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
index f38794817..7f3313e76 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
@@ -8,6 +8,7 @@
 from montblanc.impl.rime.tensorflow import load_tf_lib
 rime = load_tf_lib()
 
+
 def complex_phase_op(lm, uvw, frequency):
     """
     This function wraps rime_phase by deducing the
@@ -24,6 +25,7 @@ def complex_phase_op(lm, uvw, frequency):
 
     return rime.phase(lm, uvw, frequency, CT=CT)
 
+
 def complex_phase(lm, uvw, frequency):
     """
     Compute the complex phase from lm, uvw and frequency expressions
@@ -45,14 +47,14 @@ def complex_phase(lm, uvw, frequency):
 
     # Reshape now so that we get broadcasting in later operations
     # Need to pack list since list contains tensors, e.g. nsrc
-    l = tf.reshape(lm[:,0], tf.pack([nsrc,1,1,1]))
-    m = tf.reshape(lm[:,1], tf.pack([nsrc,1,1,1]))
+    l = tf.reshape(lm[:, 0], tf.stack([nsrc, 1, 1, 1]))
+    m = tf.reshape(lm[:, 1], tf.stack([nsrc, 1, 1, 1]))
 
-    u = tf.reshape(uvw[:,:,0], tf.pack([1,ntime,na,1]))
-    v = tf.reshape(uvw[:,:,1], tf.pack([1,ntime,na,1]))
-    w = tf.reshape(uvw[:,:,2], tf.pack([1,ntime,na,1]))
+    u = tf.reshape(uvw[:, :, 0], tf.stack([1, ntime, na, 1]))
+    v = tf.reshape(uvw[:, :, 1], tf.stack([1, ntime, na, 1]))
+    w = tf.reshape(uvw[:, :, 2], tf.stack([1, ntime, na, 1]))
 
-    frequency = tf.reshape(frequency, tf.pack([1,1,1,nchan]))
+    frequency = tf.reshape(frequency, tf.stack([1, 1, 1, nchan]))
 
     n = tf.sqrt(one - l**2 - m**2) - one
 
@@ -66,6 +68,7 @@ def complex_phase(lm, uvw, frequency):
     #return tf.exp(tf.complex(0.0, phase), name='complex_phase')
     return tf.complex(tf.cos(phase), tf.sin(phase))
 
+
 def complex_phase_numpy(lm, uvw, frequency):
     nsrc, _ = lm.shape
     ntime, na, _ = uvw.shape
@@ -75,75 +78,76 @@ def complex_phase_numpy(lm, uvw, frequency):
     uvw = uvw.reshape(1, ntime, na, 1, 3)
     frequency = frequency.reshape(1, 1, 1, nchan)
 
-    l, m = lm[:,:,:,:,0], lm[:,:,:,:,1]
-    u, v, w = uvw[:,:,:,:,0], uvw[:,:,:,:,1], uvw[:,:,:,:,2]
+    l, m = lm[:, :, :, :, 0], lm[:, :, :, :, 1]
+    u, v, w = uvw[:, :, :, :, 0], uvw[:, :, :, :, 1], uvw[:, :, :, :, 2]
 
     n = np.sqrt(1.0 - l**2 - m**2) - 1.0
     real_phase = -2*np.pi*1j*(l*u + m*v + n*w)*frequency/lightspeed
     return np.exp(real_phase)
 
-dtype, ctype = np.float64, np.complex128
-nsrc, ntime, na, nchan = 100, 50, 64, 128
-lightspeed = 299792458.
-
-# Set up our numpy input arrays
-np_lm = np.random.random(size=(nsrc,2)).astype(dtype)*0.1
-np_uvw = np.random.random(size=(ntime,na,3)).astype(dtype)
-np_frequency = np.linspace(1.3e9, 1.5e9, nchan, endpoint=True, dtype=dtype)
-
-# Create tensorflow arrays from the numpy arrays
-lm = tf.Variable(np_lm, name='lm')
-uvw = tf.Variable(np_uvw, name='uvw')
-frequency = tf.Variable(np_frequency, name='frequency')
-#lm, uvw, frequency = map(tf.Variable, [np_lm, np_uvw, np_frequency])
-
-# Get an expression for the complex phase op on the CPU
-with tf.device('/cpu:0'):
-    cplx_phase_op_cpu = complex_phase_op(lm, uvw, frequency)
-
-# Get an expression for the complex phase op on the GPU
-with tf.device('/gpu:0'):
-    cplx_phase_op_gpu = complex_phase_op(lm, uvw, frequency)
-
-# Get an expression for the complex phase expression on the GPU
-with tf.device('/gpu:0'):
-    cplx_phase_expr_gpu = complex_phase(lm, uvw, frequency)
-
-init_op = tf.global_variables_initializer()
-
-# Now create a tensorflow Session to evaluate the above
-with tf.Session() as S:
-    S.run(init_op)
-
-    # Evaluate and time tensorflow GPU
-    start = timeit.default_timer()
-    tf_cplx_phase_op_gpu = S.run(cplx_phase_op_gpu)
-    print 'Tensorflow custom GPU time %f' % (timeit.default_timer() - start)
-
-    # Evaluate and time tensorflow GPU
-    start = timeit.default_timer()
-    tf_cplx_phase_expr_gpu = S.run(cplx_phase_expr_gpu)
-    print 'Tensorflow expression GPU time %f' % (timeit.default_timer() - start)
-
-    # Evaluate and time tensorflow CPU
-    start = timeit.default_timer()
-    tf_cplx_phase_op_cpu = S.run(cplx_phase_op_cpu)
-    print 'Tensorflow CPU time %f' % (timeit.default_timer() - start)
-
-    # Evaluate and time numpy CPU
-    start = timeit.default_timer()
-    # Now calculate the complex phase using numpy
-    # Reshapes help us to broadcast
-    np_cplx_phase = complex_phase_numpy(np_lm, np_uvw, np_frequency)
-    print 'Numpy CPU time %f' % (timeit.default_timer() - start)
-
-    # Check that our shapes and values agree with a certain tolerance
-    assert tf_cplx_phase_op_cpu.shape == (nsrc, ntime, na, nchan)
-    assert tf_cplx_phase_op_gpu.shape == (nsrc, ntime, na, nchan)
-    assert tf_cplx_phase_expr_gpu.shape == (nsrc, ntime, na, nchan)
-    assert np_cplx_phase.shape == (nsrc, ntime, na, nchan)
-    assert np.allclose(tf_cplx_phase_op_cpu, np_cplx_phase)
-    assert np.allclose(tf_cplx_phase_op_gpu, np_cplx_phase)
-    assert np.allclose(tf_cplx_phase_expr_gpu, np_cplx_phase)
-
-print 'Tests Succeeded'
\ No newline at end of file
+
+def test_complex_phase():
+    dtype, ctype = np.float64, np.complex128
+    nsrc, ntime, na, nchan = 100, 50, 64, 128
+    lightspeed = 299792458.
+
+    # Set up our numpy input arrays
+    np_lm = np.random.random(size=(nsrc, 2)).astype(dtype)*0.1
+    np_uvw = np.random.random(size=(ntime, na, 3)).astype(dtype)
+    np_frequency = np.linspace(1.3e9, 1.5e9, nchan, endpoint=True, dtype=dtype)
+
+    # Create tensorflow arrays from the numpy arrays
+    lm = tf.Variable(np_lm, name='lm')
+    uvw = tf.Variable(np_uvw, name='uvw')
+    frequency = tf.Variable(np_frequency, name='frequency')
+
+    # Get an expression for the complex phase op on the CPU
+    with tf.device('/cpu:0'):
+        cplx_phase_op_cpu = complex_phase_op(lm, uvw, frequency)
+
+    # Get an expression for the complex phase op on the GPU
+    with tf.device('/gpu:0'):
+        cplx_phase_op_gpu = complex_phase_op(lm, uvw, frequency)
+
+    # Get an expression for the complex phase expression on the GPU
+    with tf.device('/gpu:0'):
+        cplx_phase_expr_gpu = complex_phase(lm, uvw, frequency)
+
+    init_op = tf.global_variables_initializer()
+
+    # Now create a tensorflow Session to evaluate the above
+    with tf.Session() as S:
+        S.run(init_op)
+
+        # Evaluate and time tensorflow GPU
+        start = timeit.default_timer()
+        tf_cplx_phase_op_gpu = S.run(cplx_phase_op_gpu)
+        print 'Tensorflow custom GPU time %f' % (timeit.default_timer() - start)  # noqa
+
+        # Evaluate and time tensorflow GPU
+        start = timeit.default_timer()
+        tf_cplx_phase_expr_gpu = S.run(cplx_phase_expr_gpu)
+        print 'Tensorflow expression GPU time %f' % (timeit.default_timer() - start)  # noqa
+
+        # Evaluate and time tensorflow CPU
+        start = timeit.default_timer()
+        tf_cplx_phase_op_cpu = S.run(cplx_phase_op_cpu)
+        print 'Tensorflow CPU time %f' % (timeit.default_timer() - start)
+
+        # Evaluate and time numpy CPU
+        start = timeit.default_timer()
+        # Now calculate the complex phase using numpy
+        # Reshapes help us to broadcast
+        np_cplx_phase = complex_phase_numpy(np_lm, np_uvw, np_frequency)
+        print 'Numpy CPU time %f' % (timeit.default_timer() - start)
+
+        # Check that our shapes and values agree with a certain tolerance
+        assert tf_cplx_phase_op_cpu.shape == (nsrc, ntime, na, nchan)
+        assert tf_cplx_phase_op_gpu.shape == (nsrc, ntime, na, nchan)
+        assert tf_cplx_phase_expr_gpu.shape == (nsrc, ntime, na, nchan)
+        assert np_cplx_phase.shape == (nsrc, ntime, na, nchan)
+        assert np.allclose(tf_cplx_phase_op_cpu, np_cplx_phase)
+        assert np.allclose(tf_cplx_phase_op_gpu, np_cplx_phase)
+        assert np.allclose(tf_cplx_phase_expr_gpu, np_cplx_phase)
+
+    print 'Tests Succeeded'
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py b/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
index 5fd085a36..aa3f03d4b 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
@@ -1,9 +1,14 @@
+from __future__ import print_function
+
 import unittest
 
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.client import device_lib
 
+
+
+
 class TestSumCoherencies(unittest.TestCase):
     """ Tests the SumCoherencies operator """
 
@@ -16,43 +21,46 @@ def setUp(self):
         #self.rime = tf.load_op_library('rime.so')
         # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
         self.gpu_devs = [d.name for d in device_lib.list_local_devices()
-                                if d.device_type == 'GPU']
+                         if d.device_type == 'GPU']
 
     def test_sum_coherencies(self):
         """ Test the SumCoherencies operator """
 
         # List of type constraints for testing this operator
         type_permutations = [
-            [np.float32, np.complex64],
-            [np.float64, np.complex128]]
+            [np.float32, np.complex64, {'rtol': 1e-4}],
+            [np.float64, np.complex128, {}]]
 
         # Run test with the type combinations above
-        for FT, CT in type_permutations:
-            self._impl_test_sum_coherencies(FT, CT)
+        for FT, CT, cmp_kwargs in type_permutations:
+            self._impl_test_sum_coherencies(FT, CT, cmp_kwargs)
 
-    def _impl_test_sum_coherencies(self, FT, CT):
+    def _impl_test_sum_coherencies(self, FT, CT, cmp_kwargs):
         """ Implementation of the SumCoherencies operator test """
 
-        rf = lambda *a, **kw: np.random.random(*a, **kw).astype(FT)
-        rc = lambda *a, **kw: rf(*a, **kw) + 1j*rf(*a, **kw).astype(CT)
+        def rf(*a, **kw):
+            return np.random.random(*a, **kw).astype(FT)
+
+        def rc(*a, **kw):
+            return rf(*a, **kw) + 1j*rf(*a, **kw).astype(CT)
 
         nsrc, ntime, na, nchan = 10, 15, 7, 16
         nbl = na*(na-1)//2
 
         np_ant1, np_ant2 = map(lambda x: np.int32(x), np.triu_indices(na, 1))
         np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
-            np.tile(np_ant2, ntime).reshape(ntime,nbl))
+                            np.tile(np_ant2, ntime).reshape(ntime, nbl))
         np_shape = rf(size=(nsrc, ntime, nbl, nchan))
         np_ant_jones = rc(size=(nsrc, ntime, na, nchan, 4))
-        np_sgn_brightness = np.random.randint(0, 3, size=(nsrc, ntime), dtype=np.int8) - 1
-        np_base_coherencies =  rc(size=(ntime, nbl, nchan, 4))
+        np_sgn_brightness = np.random.randint(0, 3, size=(nsrc, ntime, nchan), dtype=np.int8) - 1
+        np_base_coherencies = rc(size=(ntime, nbl, nchan, 4))
 
         # Argument list
         np_args = [np_ant1, np_ant2, np_shape, np_ant_jones,
-            np_sgn_brightness, np_base_coherencies]
+                   np_sgn_brightness, np_base_coherencies]
         # Argument string name list
         arg_names = ['antenna1', 'antenna2', 'shape', 'ant_jones',
-            'sgn_brightness', 'base_coherencies']
+                     'sgn_brightness', 'base_coherencies']
         # Constructor tensorflow variables
         tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
 
@@ -78,7 +86,9 @@ def _pin_op(device, *tf_args):
 
             # Compare against the GPU coherencies
             for gpu_coherencies in S.run(gpu_ops):
-                self.assertTrue(np.allclose(cpu_coherencies, gpu_coherencies))
+                self.assertTrue(np.allclose(cpu_coherencies, gpu_coherencies,
+                                **cmp_kwargs))
+
 
 if __name__ == "__main__":
     unittest.main()

From bfa80dd678abdad908a944dc9c4910442486b029 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Mon, 19 Nov 2018 14:30:50 +0200
Subject: [PATCH 14/41] Constrain python2 astropy deps to ">= 2, < 3" (#264)

Only astropy 2 is supported on python 2.
---
 setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0daf300be..fed1bc33b 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@
 
 import json
 import os
+import sys
 
 # =============
 # Setup logging
@@ -41,6 +42,7 @@
 
 import versioneer
 
+PY2 = sys.version_info[0] == 2
 
 mb_path = 'montblanc'
 mb_inc_path = os.path.join(mb_path, 'include')
@@ -164,7 +166,7 @@ def readme():
 else:
     # Add binary/C extension type packages
     install_requires += [
-        'astropy >= 1.3.0',
+        'astropy >= 2.0.0, < 3.0.0' if PY2 else 'astropy >= 3.0.0',
         'cerberus >= 1.1',
         'nose >= 1.3.7',
         'numba >= 0.36.2',

From 0720c95319151c46a407b94a99b13b9f0aa2df1b Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Thu, 24 Jan 2019 15:51:27 +0200
Subject: [PATCH 15/41] Support disjoint antenna UVW decompositions in
 antenna_uvw (#268)

---
 .../tests/test_antenna_uvw_decomposition.py   |  59 ++++++--
 montblanc/util/ant_uvw.py                     | 138 ++++++++++--------
 2 files changed, 120 insertions(+), 77 deletions(-)

diff --git a/montblanc/tests/test_antenna_uvw_decomposition.py b/montblanc/tests/test_antenna_uvw_decomposition.py
index deda90f17..0ac25dc86 100644
--- a/montblanc/tests/test_antenna_uvw_decomposition.py
+++ b/montblanc/tests/test_antenna_uvw_decomposition.py
@@ -6,6 +6,7 @@
 
 from montblanc.util import antenna_uvw
 
+
 class TestAntennaUvWDecomposition(unittest.TestCase):
     def test_uvw_antenna(self):
         na = 17
@@ -18,22 +19,49 @@ def test_uvw_antenna(self):
 
             # Create random per-antenna UVW coordinates.
             # zeroing the first antenna
-            ant_uvw = np.random.random(size=(ntime,na,3)).astype(np.float64)
-            ant_uvw[0,0,:] = 0
+            ant_uvw = np.random.random(size=(ntime, na, 3)).astype(np.float64)
+            ant_uvw[0, 0, :] = 0
 
             time_chunks = np.array([ant1.size], dtype=ant1.dtype)
 
             # Compute per-baseline UVW coordinates.
-            bl_uvw =  (ant_uvw[:,ant1,:] - ant_uvw[:,ant2,:]).reshape(-1, 3)
+            bl_uvw = (ant_uvw[:, ant1, :] - ant_uvw[:, ant2, :]).reshape(-1, 3)
 
             # Now recover the per-antenna and per-baseline UVW coordinates.
             rant_uvw = antenna_uvw(bl_uvw, ant1, ant2, time_chunks,
-                                    nr_of_antenna=na, check_decomposition=True)
+                                   nr_of_antenna=na, check_decomposition=True)
+
+    def test_uvw_disjoint(self):
+
+        # Three initially disjoint baselines here, but the last baseline [2, 9]
+        # connects the first and the last
+        # Set 1: 0, 1, 2, 3
+        # Set 2: 4, 5, 6, 7, 8
+        # Set 3: 8, 10, 11, 12
+        # Connection between Set 1 and Set 3 is the last baseline [2, 9]
+        ant1 = np.array([1,  2,  3,  4,  5,  5,  7,  9, 10, 11,  2])
+        ant2 = np.array([2,  2,  0,  5,  5,  6,  8, 10, 11, 12,  9])
+
+        na = np.unique(np.concatenate([ant1, ant2])).size
+        ntime = 1
 
+        # Create random per-antenna UVW coordinates.
+        # zeroing the first antenna
+        ant_uvw = np.random.random(size=(ntime, na, 3)).astype(np.float64)
+        ant_uvw[0, 0, :] = 0
 
-    def test_uvw_antenna_missing_bl(self):
+        time_chunks = np.array([ant1.size], dtype=ant1.dtype)
+
+        # Compute per-baseline UVW coordinates.
+        bl_uvw = (ant_uvw[:, ant1, :] - ant_uvw[:, ant2, :]).reshape(-1, 3)
+
+        # Now recover the per-antenna and per-baseline UVW coordinates.
+        rant_uvw = antenna_uvw(bl_uvw, ant1, ant2, time_chunks,
+                               nr_of_antenna=na, check_decomposition=True)
+
+    def test_uvw_antenna_missing_bl_impl(self):
         na = 17
-        removed_ants_per_time = ([0, 1, 7], [2,10,15,9], [3, 6, 9, 12])
+        removed_ants_per_time = ([0, 1, 7], [2, 10, 15, 9], [3, 6, 9, 12])
 
         # For both auto correlations and without them
         for auto_cor in (0, 1):
@@ -50,9 +78,9 @@ def _create_ant_arrays():
                     ant1 = ant1[idx]
                     ant2 = ant2[idx]
 
-                    # Remove any baselines containing flagged antennae
+                    # Remove any baselines containing flagged antenna
                     reduce_tuple = tuple(a != ra for a in (ant1, ant2)
-                                                for ra in remove_ants)
+                                         for ra in remove_ants)
 
                     keep = np.logical_and.reduce(reduce_tuple)
                     ant1 = ant1[keep]
@@ -62,19 +90,20 @@ def _create_ant_arrays():
 
                     yield valid_ants, remove_ants, ant1, ant2
 
-
-            valid_ants, remove_ants, ant1, ant2 = zip(*list(_create_ant_arrays()))
+            tup = zip(*list(_create_ant_arrays()))
+            valid_ants, remove_ants, ant1, ant2 = tup
 
             bl_uvw = []
 
             # Create per-baseline UVW coordinates for each time chunk
-            for t, (va, ra, a1, a2) in enumerate(zip(valid_ants, remove_ants, ant1, ant2)):
+            it = enumerate(zip(valid_ants, remove_ants, ant1, ant2))
+            for t, (va, ra, a1, a2) in it:
                 # Create random per-antenna UVW coordinates.
                 # zeroing the first valid antenna
-                ant_uvw = np.random.random(size=(na,3)).astype(np.float64)
-                ant_uvw[va[0],:] = 0
+                ant_uvw = np.random.random(size=(na, 3)).astype(np.float64)
+                ant_uvw[va[0], :] = 0
                 # Create per-baseline UVW coordinates for this time chunk
-                bl_uvw.append(ant_uvw[a1,:] - ant_uvw[a2,:])
+                bl_uvw.append(ant_uvw[a1, :] - ant_uvw[a2, :])
 
             # Produced concatenated antenna and baseline uvw arrays
             time_chunks = np.array([a.size for a in ant1], dtype=ant1[0].dtype)
@@ -85,7 +114,7 @@ def _create_ant_arrays():
             # Now recover the per-antenna and per-baseline UVW coordinates
             # for the ntime chunks
             rant_uvw = antenna_uvw(cbl_uvw, cant1, cant2, time_chunks,
-                                nr_of_antenna=na, check_decomposition=True)
+                                   nr_of_antenna=na, check_decomposition=True)
 
 
 if __name__ == "__main__":
diff --git a/montblanc/util/ant_uvw.py b/montblanc/util/ant_uvw.py
index 23d8cc07e..c17419309 100644
--- a/montblanc/util/ant_uvw.py
+++ b/montblanc/util/ant_uvw.py
@@ -3,7 +3,7 @@
 from itertools import islice
 import math
 import numpy as np
-import numba
+from numba import jit, generated_jit
 
 # Coordinate indexing constants
 u, v, w = range(3)
@@ -11,68 +11,78 @@
 try:
     isclose = math.isclose
 except AttributeError:
-    @numba.jit(nopython=True, nogil=True, cache=True)
+    @jit(nopython=True, nogil=True, cache=True)
     def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
         return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
 
-@numba.jit(nopython=True, nogil=True, cache=True)
+
+@jit(nopython=True, nogil=True, cache=True)
 def _antenna_uvw_loop(uvw, antenna1, antenna2, ant_uvw,
-                                chunk_index, start, end):
+                      chunk_index, start, end):
 
     c = chunk_index
-    a1 = antenna1[start]
-    a2 = antenna2[start]
-
-    # Handle first row separately
-    # If a1 associated with starting row is nan
-    # initial values have not yet been assigned. Do so.
-    if np.isnan(ant_uvw[c,a1,u]):
-        ant_uvw[c,a1,u] = 0.0
-        ant_uvw[c,a1,v] = 0.0
-        ant_uvw[c,a1,w] = 0.0
-
-        # If this is not an auto-correlation
-        # assign a2 to inverse of baseline UVW,
-        if a1 != a2:
-            ant_uvw[c,a2,u] = uvw[start,u]
-            ant_uvw[c,a2,v] = uvw[start,v]
-            ant_uvw[c,a2,w] = uvw[start,w]
-
-    # Now do the rest of the rows in this chunk
-    for row in range(start+1, end):
+
+    # Cluster (first antenna) associated with each antenna
+    clusters = np.full((ant_uvw.shape[1],), -1, dtype=antenna1.dtype)
+
+    # Iterate over rows in chunk
+    for row in range(start, end):
         a1 = antenna1[row]
         a2 = antenna2[row]
 
-        # Have their coordinates been discovered yet?
-        ant1_found = not np.isnan(ant_uvw[c,a1,u])
-        ant2_found = not np.isnan(ant_uvw[c,a2,u])
-
-        if ant1_found and ant2_found:
-            # We've already computed antenna coordinates
-            # for this baseline, ignore it
-            pass
-        elif not ant1_found and not ant2_found:
-            # We can't infer one antenna's coordinate from another
-            # Hopefully this can be filled in during another run
-            # of this function
-            pass
-        elif ant1_found and not ant2_found:
-            # Infer antenna2's coordinate from antenna1
-            #    u12 = u2 - u1 => u2 = u12 + u1
-            ant_uvw[c,a2,u] = ant_uvw[c,a1,u] + uvw[row,u]
-            ant_uvw[c,a2,v] = ant_uvw[c,a1,v] + uvw[row,v]
-            ant_uvw[c,a2,w] = ant_uvw[c,a1,w] + uvw[row,w]
-        elif not ant1_found and ant2_found:
-            # Infer antenna1's coordinate from antenna2
-            #    u12 = u2 - u1 => u1 = u2 - u12
-            ant_uvw[c,a1,u] = ant_uvw[c,a2,u] - uvw[row,u]
-            ant_uvw[c,a1,v] = ant_uvw[c,a2,v] - uvw[row,v]
-            ant_uvw[c,a1,w] = ant_uvw[c,a2,w] - uvw[row,w]
+        # Have they been clustered yet?
+        cl1 = clusters[a1]
+        cl2 = clusters[a2]
+
+        # Both new -- start a new cluster relative to a1
+        if cl1 == -1 and cl2 == -1:
+            clusters[a1] = clusters[a2] = a1
+            ant_uvw[c, a1, u] = 0.0
+            ant_uvw[c, a1, v] = 0.0
+            ant_uvw[c, a1, w] = 0.0
+
+            # If this is not an auto-correlation
+            # assign a2 to inverse of baseline UVW,
+            if a1 != a2:
+                ant_uvw[c, a2, u] = uvw[row, u]
+                ant_uvw[c, a2, v] = uvw[row, v]
+                ant_uvw[c, a2, w] = uvw[row, w]
+
+        # if either antenna has not been clustered,
+        # infer its coordinate from the clustered one
+        elif cl1 != -1 and cl2 == -1:
+            clusters[a2] = cl1
+            ant_uvw[c, a2, u] = ant_uvw[c, a1, u] + uvw[row, u]
+            ant_uvw[c, a2, v] = ant_uvw[c, a1, v] + uvw[row, v]
+            ant_uvw[c, a2, w] = ant_uvw[c, a1, w] + uvw[row, w]
+        elif cl1 == -1 and cl2 != -1:
+            clusters[a1] = cl2
+            ant_uvw[c, a1, u] = ant_uvw[c, a2, u] - uvw[row, u]
+            ant_uvw[c, a1, v] = ant_uvw[c, a2, v] - uvw[row, v]
+            ant_uvw[c, a1, w] = ant_uvw[c, a2, w] - uvw[row, w]
+        # Both clustered. If clusters differ, merge them
+        elif cl1 != -1 and cl2 != -1:
+            if cl1 != cl2:
+                # how much do we need to add to the current cluster2
+                # reference position to make the baseline a2 - a1 consistent?
+                u_off = uvw[row, u] - (ant_uvw[c, a2, u] - ant_uvw[c, a1, u])
+                v_off = uvw[row, v] - (ant_uvw[c, a2, v] - ant_uvw[c, a1, v])
+                w_off = uvw[row, w] - (ant_uvw[c, a2, w] - ant_uvw[c, a1, w])
+
+                # Merge cluster2 into cluster1
+                for ant, cluster in enumerate(clusters):
+                    if cluster == cl2:
+                        clusters[ant] = cl1
+                        ant_uvw[c, ant, u] += u_off
+                        ant_uvw[c, ant, v] += v_off
+                        ant_uvw[c, ant, w] += w_off
+
+        # Shouldn't ever occur
         else:
             raise ValueError("Illegal Condition")
 
 
-@numba.jit(nopython=True, nogil=True, cache=True)
+@jit(nopython=True, nogil=True, cache=True)
 def _antenna_uvw(uvw, antenna1, antenna2, chunks, nr_of_antenna):
     """ numba implementation of antenna_uvw """
 
@@ -87,7 +97,7 @@ def _antenna_uvw(uvw, antenna1, antenna2, chunks, nr_of_antenna):
 
     if not (uvw.shape[0] == antenna1.shape[0] == antenna2.shape[0]):
         raise ValueError("First dimension of uvw, antenna1 "
-                                "and antenna2 do not match")
+                         "and antenna2 do not match")
 
     if chunks.ndim != 1:
         raise ValueError("chunks shape should be (utime,)")
@@ -103,16 +113,18 @@ def _antenna_uvw(uvw, antenna1, antenna2, chunks, nr_of_antenna):
     for ci, chunk in enumerate(chunks):
         end = start + chunk
 
-        _antenna_uvw_loop(uvw, antenna1, antenna2, antenna_uvw, ci, start, end)
+        # one pass should be enough!
         _antenna_uvw_loop(uvw, antenna1, antenna2, antenna_uvw, ci, start, end)
 
         start = end
 
     return antenna_uvw
 
+
 class AntennaUVWDecompositionError(Exception):
     pass
 
+
 def _raise_decomposition_errors(uvw, antenna1, antenna2,
                                 chunks, ant_uvw, max_err):
     """ Raises informative exception for an invalid decomposition """
@@ -128,8 +140,8 @@ def _raise_decomposition_errors(uvw, antenna1, antenna2,
         ant2 = antenna2[start:end]
         cuvw = uvw[start:end]
 
-        ant1_uvw = ant_uvw[ci,ant1,:]
-        ant2_uvw = ant_uvw[ci,ant2,:]
+        ant1_uvw = ant_uvw[ci, ant1, :]
+        ant2_uvw = ant_uvw[ci, ant2, :]
         ruvw = ant2_uvw - ant1_uvw
 
         # Identifty rows where any of the UVW components differed
@@ -137,11 +149,10 @@ def _raise_decomposition_errors(uvw, antenna1, antenna2,
         problems = np.nonzero(np.logical_or.reduce(np.invert(close), axis=1))
 
         for row in problems[0]:
-            problem_str.append("[row %d (chunk %d)]: "
-                              "original %s "
-                              "recovered %s "
-                              "ant1 %s "
-                              "ant2 %s" % (start+row, ci,
+            problem_str.append("[row %d [%d, %d] (chunk %d)]: "
+                               "original %s recovered %s "
+                               "ant1 %s ant2 %s" % (
+                                    start+row, ant1[row], ant2[row], ci,
                                     cuvw[row], ruvw[row],
                                     ant1_uvw[row], ant2_uvw[row]))
 
@@ -161,13 +172,15 @@ def _raise_decomposition_errors(uvw, antenna1, antenna2,
 
     # Add a preamble and raise exception
     problem_str = ["Antenna UVW Decomposition Failed",
-                "The following differences were found "
-                "(first 100):"] + problem_str
+                   "The following differences were found "
+                   "(first 100):"] + problem_str
     raise AntennaUVWDecompositionError('\n'.join(problem_str))
 
+
 class AntennaMissingError(Exception):
     pass
 
+
 def _raise_missing_antenna_errors(ant_uvw, max_err):
     """ Raises an informative error for missing antenna """
 
@@ -177,7 +190,7 @@ def _raise_missing_antenna_errors(ant_uvw, max_err):
     problem_str = []
 
     for c, a in zip(*problems):
-        problem_str.append("[chunk %d antenna %d]" % (c,a))
+        problem_str.append("[chunk %d antenna %d]" % (c, a))
 
         # Exit early
         if len(problem_str) >= max_err:
@@ -191,6 +204,7 @@ def _raise_missing_antenna_errors(ant_uvw, max_err):
     problem_str = ["Antenna were missing"] + problem_str
     raise AntennaMissingError('\n'.join(problem_str))
 
+
 def antenna_uvw(uvw, antenna1, antenna2, chunks,
                 nr_of_antenna, check_missing=False,
                 check_decomposition=False, max_err=100):

From 858a6c5ea0eafff0355dc970347d6e9dc4143142 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 25 Jan 2019 11:19:00 +0200
Subject: [PATCH 16/41] Add feed angle to parallactic angle in feed rotation
 matrix (#269)

---
 montblanc/impl/rime/tensorflow/RimeSolver.py | 14 +++++++++---
 montblanc/impl/rime/tensorflow/config.py     | 24 ++++++++++++++------
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 0dcf3d86a..918b61911 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -946,9 +946,17 @@ def _construct_tensorflow_expression(slvr_cfg, feed_data, device, shard):
         FT, CT = D.uvw.dtype, D.model_vis.dtype
 
         # Compute sine and cosine of parallactic angles
-        pa_sin, pa_cos = rime.parallactic_angle_sin_cos(D.parallactic_angles)
+        # for the beam
+        beam_sin, beam_cos = rime.parallactic_angle_sin_cos(
+                                        D.parallactic_angles)
+
+        # Compute sine and cosine of feed rotation angle
+        feed_sin, feed_cos = rime.parallactic_angle_sin_cos(
+                                        D.parallactic_angles[:, :] +
+                                        D.feed_angles[None, :])
+
         # Compute feed rotation
-        feed_rotation = rime.feed_rotation(pa_sin, pa_cos, CT=CT,
+        feed_rotation = rime.feed_rotation(feed_sin, feed_cos, CT=CT,
                                            feed_type=polarisation_type)
 
     def antenna_jones(lm, stokes):
@@ -988,7 +996,7 @@ def antenna_jones(lm, stokes):
         # Compute the direction dependent effects from the beam
         ejones = rime.e_beam(lm, D.frequency,
             D.pointing_errors, D.antenna_scaling,
-            pa_sin, pa_cos,
+            beam_sin, beam_cos,
             D.beam_extents, D.beam_freq_map, D.ebeam)
 
         deps = [phase_real, phase_imag, bsqrt_real, bsqrt_imag]
diff --git a/montblanc/impl/rime/tensorflow/config.py b/montblanc/impl/rime/tensorflow/config.py
index 0b45d4bd9..8e40f1cda 100644
--- a/montblanc/impl/rime/tensorflow/config.py
+++ b/montblanc/impl/rime/tensorflow/config.py
@@ -289,6 +289,23 @@ def test_sersic_shape(self, context):
             "are stacked on top of each other. ",
         units   = HERTZ),
 
+    # Feed angles for each antenna
+    array_dict('feed_angles', ('na',), 'ft',
+        default = lambda s, c: np.zeros(c.shape, c.dtype),
+        test    = lambda s, c: rf(c.shape, c.dtype)*np.pi,
+        tags    = "input, constant",
+        description = "Feed angles for each antenna.",
+        units   = RADIANS),
+
+
+    # Parallactic angles at each timestep for each antenna
+    array_dict('parallactic_angles', ('ntime', 'na'), 'ft',
+        default = lambda s, c: np.zeros(c.shape, c.dtype),
+        test    = lambda s, c: rf(c.shape, c.dtype)*np.pi,
+        tags    = "input, constant",
+        description = "Parallactic angles for each antenna.",
+        units   = RADIANS),
+
     # Holographic Beam
 
     # Pointing errors
@@ -309,13 +326,6 @@ def test_sersic_shape(self, context):
             "The components express a scale in the (l,m) plane.",
         units   = DIMENSIONLESS),
 
-    # Parallactic angles at each timestep for each antenna
-    array_dict('parallactic_angles', ('ntime', 'na'), 'ft',
-        default = lambda s, c: np.zeros(c.shape, c.dtype),
-        test    = lambda s, c: rf(c.shape, c.dtype)*np.pi,
-        tags    = "input, constant",
-        description = "Parallactic angles for each antenna.",
-        units   = RADIANS),
 
     # Extents of the beam.
     # First 3 values are lower coordinate for (l, m, frequency)

From 98928f1df8952eb35fdce1c71d379db0d7cd0b75 Mon Sep 17 00:00:00 2001
From: Benjamin Hugo <bennahugo@gmail.com>
Date: Thu, 23 May 2019 11:10:46 +0200
Subject: [PATCH 17/41] Update cuda.py

py3 fix
---
 install/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/cuda.py b/install/cuda.py
index 9dbe6262b..071d80076 100644
--- a/install/cuda.py
+++ b/install/cuda.py
@@ -171,7 +171,7 @@ def inspect_cuda_version_and_devices(compiler, settings):
     except Exception as e:
         msg = ("Running the CUDA device check "
             "stub failed\n{}".format(str(e)))
-        raise InspectCudaException(msg), None, sys.exc_info()[2]
+        raise (InspectCudaException(msg), None, sys.exc_info()[2])
 
     return output
 

From 073ce497839c9fb88d9782e1116affb6cfca6a04 Mon Sep 17 00:00:00 2001
From: Benjamin Hugo <bennahugo@gmail.com>
Date: Thu, 23 May 2019 11:14:29 +0200
Subject: [PATCH 18/41] Update cuda.py

py3 fix
---
 install/cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/cuda.py b/install/cuda.py
index 071d80076..653db9bfd 100644
--- a/install/cuda.py
+++ b/install/cuda.py
@@ -30,7 +30,7 @@
 import sys
 import tempfile
 
-from install_log import log
+from .install_log import log
 
 minimum_cuda_version = 8000
 

From 23eb2a023189e293eb4ec6c4cd26d58188f13f0b Mon Sep 17 00:00:00 2001
From: Benjamin Hugo <bennahugo@gmail.com>
Date: Thu, 23 May 2019 11:20:48 +0200
Subject: [PATCH 19/41] Update cub.py

py3 fix
---
 install/cub.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/install/cub.py b/install/cub.py
index c172ce74d..3e2759262 100644
--- a/install/cub.py
+++ b/install/cub.py
@@ -22,7 +22,10 @@
 import os
 import shutil
 import sys
-import urllib2
+try:
+    import urllib2
+except ImportError:
+    import urllib.request as urllib2
 import zipfile
 
 from install_log import log
@@ -158,4 +161,4 @@ def install_cub(mb_inc_path):
     there, reason = is_cub_installed(cub_readme, cub_header, cub_version_str)
 
     if not there:
-        raise InstallCubException(reason)
\ No newline at end of file
+        raise InstallCubException(reason)

From 0e0e0125f473613ca68e63526d8c5338b863c6fa Mon Sep 17 00:00:00 2001
From: Benjamin Hugo <bennahugo@gmail.com>
Date: Thu, 23 May 2019 11:22:01 +0200
Subject: [PATCH 20/41] Update cub.py

py3 fix
---
 install/cub.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/cub.py b/install/cub.py
index 3e2759262..e7352b8b9 100644
--- a/install/cub.py
+++ b/install/cub.py
@@ -28,7 +28,7 @@
     import urllib.request as urllib2
 import zipfile
 
-from install_log import log
+from .install_log import log
 
 class InstallCubException(Exception):
     pass

From 12a095dc2452879edbcacb99d4287cf810c677cf Mon Sep 17 00:00:00 2001
From: Benjamin Hugo <bennahugo@gmail.com>
Date: Thu, 23 May 2019 11:22:57 +0200
Subject: [PATCH 21/41] Update tensorflow_ops_ext.py

py3 fix
---
 install/tensorflow_ops_ext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/tensorflow_ops_ext.py b/install/tensorflow_ops_ext.py
index 1883d813f..3f156f8c1 100644
--- a/install/tensorflow_ops_ext.py
+++ b/install/tensorflow_ops_ext.py
@@ -25,7 +25,7 @@
 from setuptools.extension import Extension
 from setuptools.command.build_ext import build_ext
 
-from install_log import log
+from .install_log import log
 
 tensorflow_extension_name = 'montblanc.ext.rime'
 

From aaf23809c890de13e76e18bf69a174236b4eb5bb Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Thu, 23 May 2019 13:53:44 +0200
Subject: [PATCH 22/41] py3 fixes

---
 .gitignore                                            |  4 +++-
 install/tensorflow_ops_ext.py                         |  6 +++++-
 montblanc/__init__.py                                 |  2 +-
 montblanc/impl/rime/tensorflow/rime_ops/test_phase.py | 10 +++++-----
 montblanc/tests/__init__.py                           |  2 +-
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1273488f6..982114c9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,5 +61,7 @@ docs/make.bat
 # Tensorflow build cruft
 montblanc/tensorflow/rime_ops/.d/
 montblanc/tensorflow/rime_ops/.swp
+.kdev4
+*.kdev4
 
-
+__pycache__
diff --git a/install/tensorflow_ops_ext.py b/install/tensorflow_ops_ext.py
index 3f156f8c1..0ae9c68c4 100644
--- a/install/tensorflow_ops_ext.py
+++ b/install/tensorflow_ops_ext.py
@@ -177,8 +177,12 @@ def run(self):
             # created on it during finalize_options() that are required by run()
             # and build_extensions(). However, tensorflow will not yet be installed
             # at this point
+            
             for n, v in inspect.getmembers(ext):
-                setattr(e, n, v)
+                if n == "__weakref__":
+                    pass
+                else:
+                    setattr(e, n, v)
 
         build_ext.run(self)
 
diff --git a/montblanc/__init__.py b/montblanc/__init__.py
index ad393d4da..811190a72 100644
--- a/montblanc/__init__.py
+++ b/montblanc/__init__.py
@@ -32,7 +32,7 @@
 # you try and set it
 def constant(f):
     def fset(self, value):
-        raise SyntaxError, 'Foolish Mortal! You would dare change a universal constant?'
+        raise SyntaxError('Foolish Mortal! You would dare change a universal constant?')
     def fget(self):
         return f()
 
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
index 7f3313e76..dfbaead70 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
@@ -122,24 +122,24 @@ def test_complex_phase():
         # Evaluate and time tensorflow GPU
         start = timeit.default_timer()
         tf_cplx_phase_op_gpu = S.run(cplx_phase_op_gpu)
-        print 'Tensorflow custom GPU time %f' % (timeit.default_timer() - start)  # noqa
+        print('Tensorflow custom GPU time %f' % (timeit.default_timer() - start))  # noqa
 
         # Evaluate and time tensorflow GPU
         start = timeit.default_timer()
         tf_cplx_phase_expr_gpu = S.run(cplx_phase_expr_gpu)
-        print 'Tensorflow expression GPU time %f' % (timeit.default_timer() - start)  # noqa
+        print('Tensorflow expression GPU time %f' % (timeit.default_timer() - start))  # noqa
 
         # Evaluate and time tensorflow CPU
         start = timeit.default_timer()
         tf_cplx_phase_op_cpu = S.run(cplx_phase_op_cpu)
-        print 'Tensorflow CPU time %f' % (timeit.default_timer() - start)
+        print('Tensorflow CPU time %f' % (timeit.default_timer() - start))
 
         # Evaluate and time numpy CPU
         start = timeit.default_timer()
         # Now calculate the complex phase using numpy
         # Reshapes help us to broadcast
         np_cplx_phase = complex_phase_numpy(np_lm, np_uvw, np_frequency)
-        print 'Numpy CPU time %f' % (timeit.default_timer() - start)
+        print('Numpy CPU time %f' % (timeit.default_timer() - start))
 
         # Check that our shapes and values agree with a certain tolerance
         assert tf_cplx_phase_op_cpu.shape == (nsrc, ntime, na, nchan)
@@ -150,4 +150,4 @@ def test_complex_phase():
         assert np.allclose(tf_cplx_phase_op_gpu, np_cplx_phase)
         assert np.allclose(tf_cplx_phase_expr_gpu, np_cplx_phase)
 
-    print 'Tests Succeeded'
+    print('Tests Succeeded')
diff --git a/montblanc/tests/__init__.py b/montblanc/tests/__init__.py
index 47d7d3888..703ea6250 100644
--- a/montblanc/tests/__init__.py
+++ b/montblanc/tests/__init__.py
@@ -18,7 +18,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 
-from run_tests import test
+from .run_tests import test
 
 if __name__ == '__main__':
     test()

From 986756d569567de96735ee8e09fce5e5c8162511 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Fri, 24 May 2019 17:20:46 +0200
Subject: [PATCH 23/41] Fix #270

---
 install/tensorflow_ops_ext.py | 48 +++++++++++++++++------------------
 pyproject.toml                |  2 ++
 setup.py                      |  8 ++++--
 3 files changed, 32 insertions(+), 26 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/install/tensorflow_ops_ext.py b/install/tensorflow_ops_ext.py
index 0ae9c68c4..2b7799154 100644
--- a/install/tensorflow_ops_ext.py
+++ b/install/tensorflow_ops_ext.py
@@ -159,32 +159,32 @@ def initialize_options(self):
         self.nvcc_settings = None
         self.cuda_devices = None
 
-    def finalize_options(self):
-        build_ext.finalize_options(self)
-
-    def run(self):
-        # Create the tensorflow extension during the run
-        # At this point, pip should have installed tensorflow
-        ext = create_tensorflow_extension(self.nvcc_settings,
-            self.cuda_devices)
-
-        for i, e in enumerate(self.extensions):
-            if not e.name == ext.name:
-                continue
-
-            # Copy extension attributes over to the dummy extension.
-            # Need to do this because the dummy extension has extra attributes
-            # created on it during finalize_options() that are required by run()
-            # and build_extensions(). However, tensorflow will not yet be installed
-            # at this point
+    # def finalize_options(self):
+    #     build_ext.finalize_options(self)
+
+    # def run(self):
+    #     # Create the tensorflow extension during the run
+    #     # At this point, pip should have installed tensorflow
+    #     ext = create_tensorflow_extension(self.nvcc_settings,
+    #         self.cuda_devices)
+
+    #     for i, e in enumerate(self.extensions):
+    #         if not e.name == ext.name:
+    #             continue
+
+    #         # Copy extension attributes over to the dummy extension.
+    #         # Need to do this because the dummy extension has extra attributes
+    #         # created on it during finalize_options() that are required by run()
+    #         # and build_extensions(). However, tensorflow will not yet be installed
+    #         # at this point
             
-            for n, v in inspect.getmembers(ext):
-                if n == "__weakref__":
-                    pass
-                else:
-                    setattr(e, n, v)
+    #         for n, v in inspect.getmembers(ext):
+    #             if n == "__weakref__":
+    #                 pass
+    #             else:
+    #                 setattr(e, n, v)
 
-        build_ext.run(self)
+    #     build_ext.run(self)
 
     def build_extensions(self):
         customize_compiler_for_nvcc(self.compiler,
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..f83205a98
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[build-system]
+requires = ["tensorflow==1.8.0"]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index fed1bc33b..aa316ea61 100644
--- a/setup.py
+++ b/setup.py
@@ -152,6 +152,7 @@ def readme():
     'funcsigs >= 0.4',
     'futures >= 3.0.5',
     'hypercube == 0.3.3',
+    'tensorflow == {0:s}'.format(str(REQ_TF_VERSION)),
 ]
 
 # ==================================
@@ -182,12 +183,15 @@ def readme():
         install_requires.append("tensorflow == %s" % REQ_TF_VERSION)
 
     from install.tensorflow_ops_ext import (BuildCommand,
-                                            tensorflow_extension_name)
+                                            tensorflow_extension_name,
+                                            create_tensorflow_extension)
 
     cmdclass = {'build_ext': BuildCommand}
     # tensorflow_ops_ext.BuildCommand.run will
     # expand this dummy extension to its full portential
-    ext_modules = [Extension(tensorflow_extension_name, ['rime.cu'])]
+    
+    ext_modules = [create_tensorflow_extension(nvcc_settings, device_info)]
+
     # Pass NVCC and CUDA settings through to the build extension
     ext_options = {
         'build_ext': {

From 78a976e71559ade63b6dcbc8983544a71999d5aa Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 13:32:17 +0200
Subject: [PATCH 24/41] Semi-automatic py2 to py3 conversion

---
 docs/conf.py                                  | 14 +++---
 install/cub.py                                |  6 +--
 install/cuda.py                               |  2 +-
 montblanc/__init__.py                         |  2 +-
 montblanc/_version.py                         | 22 ++++-----
 montblanc/examples/standalone.py              |  2 +-
 montblanc/impl/rime/tensorflow/RimeSolver.py  | 46 +++++++++----------
 .../impl/rime/tensorflow/context_help.py      |  2 +-
 .../rime/tensorflow/helpers/cluster_gen.py    | 16 +++----
 .../tensorflow/helpers/cluster_gen_client.py  | 10 ++--
 montblanc/impl/rime/tensorflow/ms/__init__.py |  2 +-
 .../impl/rime/tensorflow/ms/ms_manager.py     |  4 +-
 .../impl/rime/tensorflow/queue_wrapper.py     |  8 ++--
 .../tensorflow/rime_ops/create_op_outline.py  |  2 +-
 .../tensorflow/rime_ops/test_accumulate.py    |  2 +-
 .../rime/tensorflow/rime_ops/test_b_sqrt.py   |  2 +-
 .../tensorflow/rime_ops/test_gauss_shape.py   |  6 +--
 .../rime/tensorflow/rime_ops/test_phase.py    |  8 ++--
 .../tensorflow/rime_ops/test_sersic_shape.py  |  6 +--
 .../rime_ops/test_sum_coherencies.py          |  4 +-
 .../tensorflow/sinks/null_sink_provider.py    |  2 +-
 .../rime/tensorflow/sinks/sink_context.py     |  3 +-
 .../sources/cached_source_provider.py         |  8 ++--
 .../sources/fits_beam_source_provider.py      | 19 ++++----
 .../tensorflow/sources/np_source_provider.py  |  4 +-
 .../rime/tensorflow/sources/source_context.py |  5 +-
 .../rime/tensorflow/staging_area_wrapper.py   |  2 +-
 .../impl/rime/tensorflow/start_context.py     |  3 +-
 .../impl/rime/tensorflow/stop_context.py      |  3 +-
 montblanc/solvers/mb_tensorflow_solver.py     |  2 +-
 montblanc/solvers/rime_solver.py              |  2 +-
 montblanc/src_types.py                        | 24 +++++-----
 montblanc/tests/beam_factory.py               |  2 +-
 montblanc/tests/meqtrees/turbo-sim.py         | 10 ++--
 montblanc/tests/run_tests.py                  | 12 ++---
 .../tests/test_antenna_uvw_decomposition.py   |  2 +-
 montblanc/tests/test_meq_tf.py                | 18 ++++----
 montblanc/util/__init__.py                    | 14 +++---
 montblanc/util/ant_uvw.py                     |  2 +-
 montblanc/util/parallactic_angles.py          |  4 +-
 montblanc/util/parsing.py                     |  2 +-
 versioneer.py                                 | 11 ++---
 42 files changed, 157 insertions(+), 163 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index d9b707995..73026a9a4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -52,9 +52,9 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'montblanc'
-copyright = u'2016, Simon Perkins'
-author = u'Simon Perkins'
+project = 'montblanc'
+copyright = '2016, Simon Perkins'
+author = 'Simon Perkins'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -231,8 +231,8 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'montblanc.tex', u'montblanc Documentation',
-     u'Simon Perkins', 'manual'),
+    (master_doc, 'montblanc.tex', 'montblanc Documentation',
+     'Simon Perkins', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -261,7 +261,7 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'montblanc', u'montblanc Documentation',
+    (master_doc, 'montblanc', 'montblanc Documentation',
      [author], 1)
 ]
 
@@ -275,7 +275,7 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'montblanc', u'montblanc Documentation',
+    (master_doc, 'montblanc', 'montblanc Documentation',
      author, 'montblanc', 'One line description of project.',
      'Miscellaneous'),
 ]
diff --git a/install/cub.py b/install/cub.py
index e7352b8b9..be722d314 100644
--- a/install/cub.py
+++ b/install/cub.py
@@ -23,9 +23,9 @@
 import shutil
 import sys
 try:
-    import urllib2
+    import urllib.request, urllib.error, urllib.parse
 except ImportError:
-    import urllib.request as urllib2
+    import urllib2 as urllib
 import zipfile
 
 from .install_log import log
@@ -36,7 +36,7 @@ class InstallCubException(Exception):
 def dl_cub(cub_url, cub_archive_name):
     """ Download cub archive from cub_url and store it in cub_archive_name """
     with open(cub_archive_name, 'wb') as f:
-        remote_file = urllib2.urlopen(cub_url)
+        remote_file = urllib.request.urlopen(cub_url)
         meta = remote_file.info()
 
         # The server may provide us with the size of the file.
diff --git a/install/cuda.py b/install/cuda.py
index 653db9bfd..ab50ac65a 100644
--- a/install/cuda.py
+++ b/install/cuda.py
@@ -171,7 +171,7 @@ def inspect_cuda_version_and_devices(compiler, settings):
     except Exception as e:
         msg = ("Running the CUDA device check "
             "stub failed\n{}".format(str(e)))
-        raise (InspectCudaException(msg), None, sys.exc_info()[2])
+        raise InspectCudaException(msg)
 
     return output
 
diff --git a/montblanc/__init__.py b/montblanc/__init__.py
index 811190a72..08ceabfd0 100644
--- a/montblanc/__init__.py
+++ b/montblanc/__init__.py
@@ -62,7 +62,7 @@ def rime_solver_cfg(**kwargs):
     -------
     A SolverConfiguration object.
     """
-    from configuration import (load_config, config_validator,
+    from .configuration import (load_config, config_validator,
         raise_validator_errors)
 
     def _merge_copy(d1, d2):
diff --git a/montblanc/_version.py b/montblanc/_version.py
index fb447b991..d1af91018 100644
--- a/montblanc/_version.py
+++ b/montblanc/_version.py
@@ -86,20 +86,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
-                print("unable to run %s" % dispcmd)
+                print(("unable to run %s" % dispcmd))
                 print(e)
             return None, None
     else:
         if verbose:
-            print("unable to find command, tried %s" % (commands,))
+            print(("unable to find command, tried %s" % (commands,)))
         return None, None
     stdout = p.communicate()[0].strip()
     if sys.version_info[0] >= 3:
         stdout = stdout.decode()
     if p.returncode != 0:
         if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
+            print(("unable to run %s (error)" % dispcmd))
+            print(("stdout was %s" % stdout))
         return None, p.returncode
     return stdout, p.returncode
 
@@ -124,8 +124,8 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
             root = os.path.dirname(root)  # up a level
 
     if verbose:
-        print("Tried directories %s but none started with prefix %s" %
-              (str(rootdirs), parentdir_prefix))
+        print(("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix)))
     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 
 
@@ -192,15 +192,15 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # "stabilization", as well as "HEAD" and "master".
         tags = set([r for r in refs if re.search(r'\d', r)])
         if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
+            print(("discarding '%s', no digits" % ",".join(refs - tags)))
     if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
+        print(("likely tags: %s" % ",".join(sorted(tags))))
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
             r = ref[len(tag_prefix):]
             if verbose:
-                print("picking %s" % r)
+                print(("picking %s" % r))
             return {"version": r,
                     "full-revisionid": keywords["full"].strip(),
                     "dirty": False, "error": None,
@@ -229,7 +229,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
                           hide_stderr=True)
     if rc != 0:
         if verbose:
-            print("Directory %s not under git control" % root)
+            print(("Directory %s not under git control" % root))
         raise NotThisMethod("'git rev-parse --git-dir' returned error")
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
@@ -278,7 +278,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
         if not full_tag.startswith(tag_prefix):
             if verbose:
                 fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
+                print((fmt % (full_tag, tag_prefix)))
             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
                                % (full_tag, tag_prefix))
             return pieces
diff --git a/montblanc/examples/standalone.py b/montblanc/examples/standalone.py
index 63d954b9d..1aad0f26c 100644
--- a/montblanc/examples/standalone.py
+++ b/montblanc/examples/standalone.py
@@ -96,7 +96,7 @@ def name(self):
 
     def model_vis(self, context):
         """ Receive model visibilities from Montblanc in `context.data` """
-        print context.data
+        print((context.data))
 
 # Configure montblanc solver with a memory budget of 2GB
 # and set it to double precision floating point accuracy
diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index ff2d388aa..262bfe56d 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -143,7 +143,7 @@ def pop(self, key, default=None):
         # Staging Area Data Source Configuration
         #================================
 
-        dfs = { n: a for n, a in cube.arrays().iteritems()
+        dfs = { n: a for n, a in list(cube.arrays().items())
             if not 'temporary' in a.tags }
 
         # Descriptors are not user-defined arrays
@@ -369,15 +369,15 @@ def _feed_impl(self, cube, data_sources, data_sinks, global_iter_args):
 
         # Get source strides out before the local sizes are modified during
         # the source loops below
-        src_types = LSA.sources.keys()
+        src_types = list(LSA.sources.keys())
         src_strides = [int(i) for i in cube.dim_extent_size(*src_types)]
         src_staging_areas = [[LSA.sources[t][s] for t in src_types]
             for s in range(self._nr_of_shards)]
 
         compute_feed_dict = { ph: cube.dim_global_size(n) for
-            n, ph in FD.src_ph_vars.iteritems() }
+            n, ph in list(FD.src_ph_vars.items()) }
         compute_feed_dict.update({ ph: getattr(cube, n) for
-            n, ph in FD.property_ph_vars.iteritems() })
+            n, ph in list(FD.property_ph_vars.items()) })
 
         chunks_fed = 0
 
@@ -404,7 +404,7 @@ def _feed_impl(self, cube, data_sources, data_sinks, global_iter_args):
             # the shard with the least work assigned to it
             emptiest_staging_areas = np.argsort(self._inputs_waiting.get())
             shard = emptiest_staging_areas[0]
-            shard = which_shard.next()
+            shard = next(which_shard)
 
             feed_f = self._feed_executors[shard].submit(self._feed_actual,
                 data_sources.copy(), cube.copy(),
@@ -532,7 +532,7 @@ def _consume(self, data_sinks, cube, global_iter_args):
             return self._consume_impl(data_sinks, cube, global_iter_args)
         except Exception as e:
             montblanc.log.exception("Consumer Exception")
-            raise e, None, sys.exc_info()[2]
+            raise e.with_traceback(sys.exc_info()[2])
 
     def _consume_impl(self, data_sinks, cube, global_iter_args):
         """ Consume """
@@ -560,7 +560,7 @@ def _consume_impl(self, data_sinks, cube, global_iter_args):
                     .format(descriptor))
 
         # For each array in our output, call the associated data sink
-        gen = ((n, a) for n, a in output.iteritems() if not n == 'descriptor')
+        gen = ((n, a) for n, a in list(output.items()) if not n == 'descriptor')
 
         for n, a in gen:
             sink_context = SinkContext(n, cube,
@@ -630,13 +630,13 @@ def solve(self, *args, **kwargs):
         input_sources = LSA.input_sources
         data_sources = {n: DataSource(f, cube.array(n).dtype, prov.name())
             for prov in source_providers
-            for n, f in prov.sources().iteritems()
+            for n, f in list(prov.sources().items())
             if n in input_sources}
 
         # Get data sinks from supplied providers
         data_sinks = { n: DataSink(f, prov.name())
             for prov in sink_providers
-            for n, f in prov.sinks().iteritems()
+            for n, f in list(prov.sinks().items())
             if not n == 'descriptor' }
 
         # Construct a feed dictionary from data sources
@@ -647,12 +647,12 @@ def solve(self, *args, **kwargs):
                     array_schemas[k].shape,
                     array_schemas[k].dtype))
             for k, fo
-            in LSA.feed_once.iteritems() }
+            in list(LSA.feed_once.items()) }
 
         self._run_metadata.clear()
 
         # Run the assign operations for each feed_once variable
-        assign_ops = [fo.assign_op.op for fo in LSA.feed_once.itervalues()]
+        assign_ops = [fo.assign_op.op for fo in list(LSA.feed_once.values())]
         self._tfrun(assign_ops, feed_dict=feed_dict)
 
         try:
@@ -777,7 +777,7 @@ def _create_defaults_source_provider(cube, data_source):
 
     # Create data sources on the source provider from
     # the cube array data sources
-    for n, a in cube.arrays().iteritems():
+    for n, a in list(cube.arrays().items()):
         # Unnecessary for temporary arrays
         if 'temporary' in a.tags:
             continue
@@ -830,14 +830,14 @@ def _construct_tensorflow_feed_data(dfs, cube, iter_dims,
     # Create placeholder variables for properties
     FD.property_ph_vars = AttrDict({
         n: tf.placeholder(dtype=p.dtype, shape=(), name=n)
-        for n, p in cube.properties().iteritems() })
+        for n, p in list(cube.properties().items()) })
 
     #========================================================
     # Determine which arrays need feeding once/multiple times
     #========================================================
 
     # Take all arrays flagged as input
-    input_arrays = [a for a in cube.arrays().itervalues()
+    input_arrays = [a for a in list(cube.arrays().values())
                     if 'input' in a.tags]
 
     src_data_sources, feed_many, feed_once = _partition(iter_dims,
@@ -869,7 +869,7 @@ def _construct_tensorflow_feed_data(dfs, cube, iter_dims,
             [a.name for a in src_data_sources[src_nr_var]], dfs)
             for i in range(nr_of_input_staging_areas)]
 
-        for src_type, src_nr_var in source_var_types().iteritems()
+        for src_type, src_nr_var in list(source_var_types().items())
     }
 
     #======================================
@@ -910,12 +910,12 @@ def _make_feed_once_tuple(array):
     #=======================================================
 
     # Data sources from input staging_areas
-    src_sa = [q for sq in local.sources.values() for q in sq]
+    src_sa = [q for sq in list(local.sources.values()) for q in sq]
     all_staging_areas = local.feed_many + src_sa
     input_sources = { a for q in all_staging_areas
                         for a in q.fed_arrays}
     # Data sources from feed once variables
-    input_sources.update(local.feed_once.keys())
+    input_sources.update(list(local.feed_once.keys()))
 
     local.input_sources = input_sources
 
@@ -935,7 +935,7 @@ def _construct_tensorflow_expression(slvr_cfg, feed_data, device, shard):
     # of the relevant shard, adding the feed once
     # inputs to the dictionary
     D = LSA.feed_many[shard].get_to_attrdict()
-    D.update({k: fo.var for k, fo in LSA.feed_once.iteritems()})
+    D.update({k: fo.var for k, fo in list(LSA.feed_once.items())})
 
     with tf.device(device):
         # Infer chunk dimensions
@@ -1138,7 +1138,7 @@ def _get_data(data_source, context):
             "{help}".format(ds=context.name,
                 e=str(e), help=context.help()))
 
-        raise ex, None, sys.exc_info()[2]
+        raise ex.with_traceback(sys.exc_info()[2])
 
 def _supply_data(data_sink, context):
     """ Supply data to the data sink """
@@ -1151,11 +1151,11 @@ def _supply_data(data_sink, context):
             "{help}".format(ds=context.name,
                 e=str(e), help=context.help()))
 
-        raise ex, None, sys.exc_info()[2]
+        raise ex.with_traceback(sys.exc_info()[2])
 
 def _iter_args(iter_dims, cube):
     iter_strides = cube.dim_extent_size(*iter_dims)
-    return zip(iter_dims, iter_strides)
+    return list(zip(iter_dims, iter_strides))
 
 def _uniq_log2_range(start, size, div):
     start = np.log2(start)
@@ -1223,7 +1223,7 @@ def _reduction():
         montblanc.log.info("The following dimension reductions "
             "were applied:")
 
-        for k, v in applied_reductions.iteritems():
+        for k, v in list(applied_reductions.items()):
             montblanc.log.info('{p}{d}: {id} => {rd}'.format
                 (p=' '*4, d=k, id=original_sizes[k], rd=v))
     else:
@@ -1271,7 +1271,7 @@ def _apply_source_provider_dim_updates(cube, source_providers, budget_dims):
     # when conflicts occur
     update_list = []
 
-    for name, updates in update_map.iteritems():
+    for name, updates in list(update_map.items()):
         if not all(updates[0].size == du.size for du in updates[1:]):
             raise ValueError("Received conflicting "
                 "global size updates '{u}'"
diff --git a/montblanc/impl/rime/tensorflow/context_help.py b/montblanc/impl/rime/tensorflow/context_help.py
index 61762638d..fca9c5a73 100644
--- a/montblanc/impl/rime/tensorflow/context_help.py
+++ b/montblanc/impl/rime/tensorflow/context_help.py
@@ -79,7 +79,7 @@ def context_help(context, display_cube=False):
         "{upper_extents}\n".format(upper_extents=u_extents))
     lines.append('\n')
 
-    dims, strides = zip(*context._iter_args)
+    dims, strides = list(zip(*context._iter_args))
 
     lines.append("Iteration information:")
     lines += wrap("Iterating over the {d} "
diff --git a/montblanc/impl/rime/tensorflow/helpers/cluster_gen.py b/montblanc/impl/rime/tensorflow/helpers/cluster_gen.py
index 5233c3e26..b8ca61079 100644
--- a/montblanc/impl/rime/tensorflow/helpers/cluster_gen.py
+++ b/montblanc/impl/rime/tensorflow/helpers/cluster_gen.py
@@ -58,14 +58,14 @@ def get_ip_address(ifname):
 
     logging.info('Pinging {n} connection(s)'.format(n=len(connections)))
 
-    for k, (cs, ca) in connections.iteritems():
+    for k, (cs, ca) in list(connections.items()):
         try:
             cs.send(PING)
         except socket.error as e:
             logging.warn('Lost connection to {a}'.format(a=ca))
             lost.add((cs,ca))
 
-    for k, (cs, ca) in connections.iteritems():
+    for k, (cs, ca) in list(connections.items()):
         try:
             if cs.recv(len(PONG)) != PONG:
                 raise SyncError()
@@ -75,7 +75,7 @@ def get_ip_address(ifname):
 
     logging.info('Lost {n} connection(s)'.format(n=len(lost)))
 
-    connections = { k : c for k, c in connections.iteritems()
+    connections = { k : c for k, c in list(connections.items())
         if c not in lost }
 
     logging.info('Creating cluster specification for {n} workers'.format(
@@ -83,7 +83,7 @@ def get_ip_address(ifname):
     # Create the lists of workers and master urls
     master_list = ['{ip}:{port}'.format(ip=host_address[0], port=host_address[1])]
     worker_list = ['{ip}:{port}'.format(ip=ip, port=port) for (ip, port) in
-        (s.getpeername() for s, _ in connections.itervalues())]
+        (s.getpeername() for s, _ in list(connections.values()))]
 
     logging.info('Master node(s) {n}'.format(n=master_list))
     logging.info('Worker node(s) {n}'.format(n=worker_list))
@@ -91,7 +91,7 @@ def get_ip_address(ifname):
     cluster = { 'worker' : worker_list, 'master' : master_list }
 
     # Transmit cluster specification to connected clients
-    for i, (cs, ca) in enumerate(connections.itervalues()):
+    for i, (cs, ca) in enumerate(connections.values()):
         data = { 'cluster' : cluster, 'job' : 'worker', 'task' : i }
 
         logging.info('Sending specification to {ca}'.format(ca=ca))
@@ -99,7 +99,7 @@ def get_ip_address(ifname):
 
 finally:
     # Close client sockets
-    for cs, address in connections.itervalues():
+    for cs, address in list(connections.values()):
         logging.info('Closing connection to {c}'.format(c=address))
         cs.shutdown(socket.SHUT_RDWR)
         cs.close()
@@ -163,7 +163,7 @@ def get_ip_address(ifname):
     with tf.Session(server.target, graph=g) as S:
         S.run(init_op)
         S.run(do_enq)
-        print 'Worker result', S.run(result)
-        print 'Dequeue result', S.run(do_deq)
+        print(('Worker result', S.run(result)))
+        print(('Dequeue result', S.run(do_deq)))
 
         time.sleep(2)
diff --git a/montblanc/impl/rime/tensorflow/helpers/cluster_gen_client.py b/montblanc/impl/rime/tensorflow/helpers/cluster_gen_client.py
index 9d7b5f106..89e6d84a4 100644
--- a/montblanc/impl/rime/tensorflow/helpers/cluster_gen_client.py
+++ b/montblanc/impl/rime/tensorflow/helpers/cluster_gen_client.py
@@ -100,12 +100,12 @@
 
     with tf.Session(server.target, graph=g) as S:
         S.run(tf.initialize_local_variables())
-        print S.run([do_deq])
-        print S.run([do_deq])
-        print S.run([do_deq])
-        print S.run([do_deq])
+        print((S.run([do_deq])))
+        print((S.run([do_deq])))
+        print((S.run([do_deq])))
+        print((S.run([do_deq])))
 
-        print 'Value of master_tmp={mt}.'.format(mt=S.run(tmp))
+        print(('Value of master_tmp={mt}.'.format(mt=S.run(tmp))))
 
         S.run(do_enq)
 
diff --git a/montblanc/impl/rime/tensorflow/ms/__init__.py b/montblanc/impl/rime/tensorflow/ms/__init__.py
index a2c4119e6..fd9fae879 100644
--- a/montblanc/impl/rime/tensorflow/ms/__init__.py
+++ b/montblanc/impl/rime/tensorflow/ms/__init__.py
@@ -18,4 +18,4 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 
-from ms_manager import MeasurementSetManager
\ No newline at end of file
+from .ms_manager import MeasurementSetManager
\ No newline at end of file
diff --git a/montblanc/impl/rime/tensorflow/ms/ms_manager.py b/montblanc/impl/rime/tensorflow/ms/ms_manager.py
index ff48a457b..71bffbc38 100644
--- a/montblanc/impl/rime/tensorflow/ms/ms_manager.py
+++ b/montblanc/impl/rime/tensorflow/ms/ms_manager.py
@@ -255,7 +255,7 @@ def __init__(self, msname, slvr_cfg):
 
     def close(self):
         # Close all the tables
-        for table in self._tables.itervalues():
+        for table in list(self._tables.values()):
             table.close()
 
     @property
@@ -271,7 +271,7 @@ def channels_per_band(self):
         return self._nchanperband
 
     def updated_dimensions(self):
-        return [(k, v) for k, v in self._dim_sizes.iteritems()]
+        return [(k, v) for k, v in list(self._dim_sizes.items())]
 
     @property
     def auto_correlations(self):
diff --git a/montblanc/impl/rime/tensorflow/queue_wrapper.py b/montblanc/impl/rime/tensorflow/queue_wrapper.py
index a1829ae8d..80dc4a6c9 100644
--- a/montblanc/impl/rime/tensorflow/queue_wrapper.py
+++ b/montblanc/impl/rime/tensorflow/queue_wrapper.py
@@ -15,7 +15,7 @@ def _get_queue_types(fed_arrays, data_sources):
         return [data_sources[n].dtype for n in fed_arrays]
     except KeyError as e:
         raise ValueError("Array '{k}' has no data source!"
-            .format(k=e.message)), None, sys.exc_info()[2]
+            .format(k=e.message)).with_traceback(sys.exc_info()[2])
 
 class QueueWrapper(object):
     def __init__(self, name, queue_size, fed_arrays, data_sources, shared_name=None):
@@ -75,11 +75,11 @@ def dequeue(self):
         return self._queue.dequeue()
 
     def dequeue_to_dict(self):
-        return {k: v for k, v in itertools.izip(
+        return {k: v for k, v in zip(
             self._fed_arrays, self._queue.dequeue())}
 
     def dequeue_to_attrdict(self):
-        return AttrDict((k, v) for k, v in itertools.izip(
+        return AttrDict((k, v) for k, v in zip(
             self._fed_arrays, self._queue.dequeue()))
 
     @property
@@ -108,7 +108,7 @@ def __init__(self, name, queue_size, fed_arrays, data_sources,
         super(SingleInputMultiQueueWrapper, self).__init__(name, queue_size,
             fed_arrays, data_sources, shared_name)
 
-        R = range(1, count)
+        R = list(range(1, count))
 
         extra_names = ['%s_%s' % (name, i) for i in R]
         extra_shared_names = ['%s_%s' % (shared_name, i) for i in R]
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/create_op_outline.py b/montblanc/impl/rime/tensorflow/rime_ops/create_op_outline.py
index 24077458f..21343bc5e 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/create_op_outline.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/create_op_outline.py
@@ -1,7 +1,7 @@
 import argparse
 import re
 
-from op_source_templates import (MAIN_HEADER_TEMPLATE,
+from .op_source_templates import (MAIN_HEADER_TEMPLATE,
     CPP_HEADER_TEMPLATE,
     CPP_SOURCE_TEMPLATE,
     CUDA_HEADER_TEMPLATE,
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_accumulate.py b/montblanc/impl/rime/tensorflow/rime_ops/test_accumulate.py
index 2c5558c6f..c102c4b61 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_accumulate.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_accumulate.py
@@ -27,7 +27,7 @@
     run_metadata = tf.RunMetadata()
     S.run(tf.initialize_all_variables())
     
-    print S.run(c, options=run_options, run_metadata=run_metadata)[:10]
+    print((S.run(c, options=run_options, run_metadata=run_metadata)[:10]))
     #print S.run(r)
 
     tl = timeline.Timeline(run_metadata.step_stats)
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
index be327531c..083e86dbc 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_b_sqrt.py
@@ -144,7 +144,7 @@ def _pin_op(device, *tf_args):
 
                 import itertools
                 it = (np.asarray(p).T, cpu_bsqrt[d], gpu_bsqrt[d])
-                it = enumerate(itertools.izip(*it))
+                it = enumerate(zip(*it))
 
                 msg = ["%s %s %s %s %s" % (i, idx, c, g, c-g)
                        for i, (idx, c, g) in it]
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py b/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
index 662752b64..a401c3d97 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
@@ -14,7 +14,7 @@
 rf = lambda *s: np.random.random(size=s).astype(dtype=dtype)
 
 np_uvw = rf(ntime, na, 3)
-np_ant1, np_ant2 = map(lambda x: np.int32(x), np.triu_indices(na, 1))
+np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
 np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
     np.tile(np_ant2, ntime).reshape(ntime,nbl))
 np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(dtype)
@@ -24,9 +24,9 @@
 assert np_ant2.shape == (ntime, nbl), np_ant2.shape
 assert np_frequency.shape == (nchan,)
 
-args = map(lambda v, n: tf.Variable(v, name=n),
+args = list(map(lambda v, n: tf.Variable(v, name=n),
     [np_uvw, np_ant1, np_ant2, np_frequency, np_gauss_params],
-    ["uvw", "ant1", "ant2", "frequency", "gauss_params"])
+    ["uvw", "ant1", "ant2", "frequency", "gauss_params"]))
 
 with tf.device('/cpu:0'):
     gauss_shape_cpu = rime.gauss_shape(*args)
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
index dfbaead70..14b316a09 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
@@ -122,24 +122,24 @@ def test_complex_phase():
         # Evaluate and time tensorflow GPU
         start = timeit.default_timer()
         tf_cplx_phase_op_gpu = S.run(cplx_phase_op_gpu)
-        print('Tensorflow custom GPU time %f' % (timeit.default_timer() - start))  # noqa
+        print(('Tensorflow custom GPU time %f' % (timeit.default_timer() - start)))  # noqa
 
         # Evaluate and time tensorflow GPU
         start = timeit.default_timer()
         tf_cplx_phase_expr_gpu = S.run(cplx_phase_expr_gpu)
-        print('Tensorflow expression GPU time %f' % (timeit.default_timer() - start))  # noqa
+        print(('Tensorflow expression GPU time %f' % (timeit.default_timer() - start)))  # noqa
 
         # Evaluate and time tensorflow CPU
         start = timeit.default_timer()
         tf_cplx_phase_op_cpu = S.run(cplx_phase_op_cpu)
-        print('Tensorflow CPU time %f' % (timeit.default_timer() - start))
+        print(('Tensorflow CPU time %f' % (timeit.default_timer() - start)))
 
         # Evaluate and time numpy CPU
         start = timeit.default_timer()
         # Now calculate the complex phase using numpy
         # Reshapes help us to broadcast
         np_cplx_phase = complex_phase_numpy(np_lm, np_uvw, np_frequency)
-        print('Numpy CPU time %f' % (timeit.default_timer() - start))
+        print(('Numpy CPU time %f' % (timeit.default_timer() - start)))
 
         # Check that our shapes and values agree with a certain tolerance
         assert tf_cplx_phase_op_cpu.shape == (nsrc, ntime, na, nchan)
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py b/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
index 7413aad02..ac0618547 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
@@ -14,7 +14,7 @@
 rf = lambda *s: np.random.random(size=s).astype(dtype=dtype)
 
 np_uvw = rf(ntime, na, 3)
-np_ant1, np_ant2 = map(lambda x: np.int32(x), np.triu_indices(na, 1))
+np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
 np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
     np.tile(np_ant2, ntime).reshape(ntime,nbl))
 np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(dtype)
@@ -24,9 +24,9 @@
 assert np_ant2.shape == (ntime, nbl), np_ant2.shape
 assert np_frequency.shape == (nchan,)
 
-args = map(lambda v, n: tf.Variable(v, name=n),
+args = list(map(lambda v, n: tf.Variable(v, name=n),
     [np_uvw, np_ant1, np_ant2, np_frequency, np_sersic_params],
-    ["uvw", "ant1", "ant2", "frequency", "sersic_params"])
+    ["uvw", "ant1", "ant2", "frequency", "sersic_params"]))
 
 with tf.device('/cpu:0'):
     sersic_shape_cpu = rime.sersic_shape(*args)
diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py b/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
index aa3f03d4b..59aada917 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_sum_coherencies.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 
 import unittest
 
@@ -47,7 +47,7 @@ def rc(*a, **kw):
         nsrc, ntime, na, nchan = 10, 15, 7, 16
         nbl = na*(na-1)//2
 
-        np_ant1, np_ant2 = map(lambda x: np.int32(x), np.triu_indices(na, 1))
+        np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
         np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
                             np.tile(np_ant2, ntime).reshape(ntime, nbl))
         np_shape = rf(size=(nsrc, ntime, nbl, nchan))
diff --git a/montblanc/impl/rime/tensorflow/sinks/null_sink_provider.py b/montblanc/impl/rime/tensorflow/sinks/null_sink_provider.py
index 41aa0ec4c..a11a5e017 100644
--- a/montblanc/impl/rime/tensorflow/sinks/null_sink_provider.py
+++ b/montblanc/impl/rime/tensorflow/sinks/null_sink_provider.py
@@ -20,7 +20,7 @@
 
 import montblanc
 
-from sink_provider import SinkProvider
+from .sink_provider import SinkProvider
 
 class NullSinkProvider(SinkProvider):
 
diff --git a/montblanc/impl/rime/tensorflow/sinks/sink_context.py b/montblanc/impl/rime/tensorflow/sinks/sink_context.py
index b31d397e3..cf8d8ea41 100644
--- a/montblanc/impl/rime/tensorflow/sinks/sink_context.py
+++ b/montblanc/impl/rime/tensorflow/sinks/sink_context.py
@@ -30,6 +30,7 @@ def __set__(self, obj, value):
         return self.func(obj, value)
 
 class SinkContext(object):
+    __metaclass__=HypercubeProxyMetaClass
     """
     Context object passed to data sinks.
 
@@ -56,8 +57,6 @@ def model_vis(self, context):
     __slots__ = ('_cube', '_cfg', '_name', '_data', '_input_cache',
         '_cube_attributes', '_iter_args', '_array_schema')
 
-    __metaclass__ = HypercubeProxyMetaClass
-
     def __init__(self, name, cube, slvr_cfg,
             iter_args, array_schema,
             data, input_cache):
diff --git a/montblanc/impl/rime/tensorflow/sources/cached_source_provider.py b/montblanc/impl/rime/tensorflow/sources/cached_source_provider.py
index a91e33318..6340e6a9d 100644
--- a/montblanc/impl/rime/tensorflow/sources/cached_source_provider.py
+++ b/montblanc/impl/rime/tensorflow/sources/cached_source_provider.py
@@ -95,7 +95,7 @@ def __init__(self, providers, cache_data_sources=None,
 
         # Construct a list of provider data sources
         prov_data_sources = { n: ds for prov in providers
-                            for n, ds in prov.sources().iteritems() }
+                            for n, ds in list(prov.sources().items()) }
 
         # Uniquely identify data source keys
         prov_ds = set(prov_data_sources.keys())
@@ -118,7 +118,7 @@ def __init__(self, providers, cache_data_sources=None,
 
 
         # Construct data sources on this source provider
-        for n, ds in prov_data_sources.iteritems():
+        for n, ds in list(prov_data_sources.items()):
             if n in cache_data_sources:
                 setattr(self, n, types.MethodType(_cache(ds), self))
             else:
@@ -160,5 +160,5 @@ def clear_cache(self):
 
     def cache_size(self):
         with self._lock:
-            return sum(a.nbytes for k, v in self._cache.iteritems()
-                                for a in v.itervalues())
\ No newline at end of file
+            return sum(a.nbytes for k, v in list(self._cache.items())
+                                for a in list(v.values()))
\ No newline at end of file
diff --git a/montblanc/impl/rime/tensorflow/sources/fits_beam_source_provider.py b/montblanc/impl/rime/tensorflow/sources/fits_beam_source_provider.py
index 02fb0c605..40f6fcdbd 100644
--- a/montblanc/impl/rime/tensorflow/sources/fits_beam_source_provider.py
+++ b/montblanc/impl/rime/tensorflow/sources/fits_beam_source_provider.py
@@ -59,7 +59,7 @@ def __init__(self, header):
         self._ndims = ndims = header['NAXIS']
 
         # Extract header information for each dimension
-        axr = range(1, ndims+1)
+        axr = list(range(1, ndims+1))
         self._naxis = [header.get('NAXIS%d'%n)      for n in axr]
         self._ctype = [header.get('CTYPE%d'%n, n)   for n in axr]
         self._crval = [header.get('CRVAL%d'%n, 0)   for n in axr]
@@ -223,11 +223,11 @@ def _fh(fn):
 
     return collections.OrderedDict(
             (corr, tuple(_fh(fn) for fn in files))
-        for corr, files in filenames.iteritems() )
+        for corr, files in list(filenames.items()) )
 
 def _cube_extents(axes, l_ax, m_ax, f_ax, l_sign, m_sign):
     # List of (lower, upper) extent tuples for the given dimensions
-    it = zip((l_ax, m_ax, f_ax), (l_sign, m_sign, 1.0))
+    it = list(zip((l_ax, m_ax, f_ax), (l_sign, m_sign, 1.0)))
     # Get the extents, flipping the sign on either end if required
     extent_list = [tuple(s*e for e in axes.extents[i]) for i, s in it]
 
@@ -240,13 +240,12 @@ def _create_axes(filenames, file_dict):
     try:
         # Loop through the file_dictionary, finding the
         # first open FITS file.
-        f = iter(f for tup in file_dict.itervalues()
-            for f in tup if f is not None).next()
+        f = next(iter(f for tup in list(file_dict.values())
+            for f in tup if f is not None))
     except StopIteration as e:
-        raise (ValueError("No FITS files were found. "
+        raise ValueError("No FITS files were found. "
             "Searched filenames: '{f}'." .format(
-                f=filenames.values())),
-                    None, sys.exc_info()[2])
+                f=list(filenames.values())))
 
 
     # Create a FitsAxes object
@@ -378,7 +377,7 @@ def ebeam(self, context):
         # Iterate through the correlations,
         # assigning real and imaginary data, if present,
         # otherwise zeroing the correlation
-        for i, (re, im) in enumerate(self._files.itervalues()):
+        for i, (re, im) in enumerate(self._files.values()):
             ebeam[:,:,:,i].real[:] = 0 if re is None else re[0].data.T
             ebeam[:,:,:,i].imag[:] = 0 if im is None else im[0].data.T
 
@@ -410,7 +409,7 @@ def close(self):
         if not hasattr(self, "_files"):
             return
 
-        for re, im in self._files.itervalues():
+        for re, im in list(self._files.values()):
             re.close()
             im.close()
 
diff --git a/montblanc/impl/rime/tensorflow/sources/np_source_provider.py b/montblanc/impl/rime/tensorflow/sources/np_source_provider.py
index f35d00459..feee0a5ec 100644
--- a/montblanc/impl/rime/tensorflow/sources/np_source_provider.py
+++ b/montblanc/impl/rime/tensorflow/sources/np_source_provider.py
@@ -26,7 +26,7 @@
 import montblanc
 import montblanc.util as mbu
 
-from source_provider import SourceProvider
+from .source_provider import SourceProvider
 
 class NumpySourceProvider(SourceProvider):
     """
@@ -55,7 +55,7 @@ def _source(self, context):
             return _source
 
         # Create source methods for each supplied array
-        for n, a in arrays.iteritems():
+        for n, a in list(arrays.items()):
             # Create the source function, update the wrapper,
             # bind it to a method and set the attribute on the object
             f = functools.update_wrapper(
diff --git a/montblanc/impl/rime/tensorflow/sources/source_context.py b/montblanc/impl/rime/tensorflow/sources/source_context.py
index ab2b695ca..7db07ef72 100644
--- a/montblanc/impl/rime/tensorflow/sources/source_context.py
+++ b/montblanc/impl/rime/tensorflow/sources/source_context.py
@@ -30,6 +30,7 @@ def __set__(self, obj, value):
         return self.func(obj, value)
 
 class SourceContext(object):
+    __metaclass__ = HypercubeProxyMetaClass
     """
     Context object passed to data sources.
 
@@ -71,8 +72,6 @@ def uvw(self, context):
     __slots__ = ('_cube', '_cfg', '_name', '_shape', '_dtype',
         '_iter_args', '_array_schema')
 
-    __metaclass__ = HypercubeProxyMetaClass
-
     def __init__(self, name, cube, slvr_cfg, iter_args,
                 array_schema, shape, dtype):
         self._name = name
@@ -185,4 +184,4 @@ def help(self, display_cube=False):
             str
                 A help string associated with this context
         """
-        return context_help(self, display_cube)
\ No newline at end of file
+        return context_help(self, display_cube)
diff --git a/montblanc/impl/rime/tensorflow/staging_area_wrapper.py b/montblanc/impl/rime/tensorflow/staging_area_wrapper.py
index f810a176f..4cc3eec1d 100644
--- a/montblanc/impl/rime/tensorflow/staging_area_wrapper.py
+++ b/montblanc/impl/rime/tensorflow/staging_area_wrapper.py
@@ -3,7 +3,7 @@
 import tensorflow as tf
 from tensorflow.python.ops import data_flow_ops
 
-from queue_wrapper import _get_queue_types
+from .queue_wrapper import _get_queue_types
 
 class StagingAreaWrapper(object):
     def __init__(self, name, fed_arrays, data_sources, shared_name=None):
diff --git a/montblanc/impl/rime/tensorflow/start_context.py b/montblanc/impl/rime/tensorflow/start_context.py
index 796652d6b..f341bad0a 100644
--- a/montblanc/impl/rime/tensorflow/start_context.py
+++ b/montblanc/impl/rime/tensorflow/start_context.py
@@ -23,6 +23,7 @@
 from .hypercube_proxy_metaclass import HypercubeProxyMetaClass
 
 class StartContext(object):
+    __metaclass__ = HypercubeProxyMetaClass
     """
     Start Context object passed to Providers.
 
@@ -49,8 +50,6 @@ def start(self, start_context):
     """
     __slots__ = ('_cube', '_cfg', '_iter_args')
 
-    __metaclass__ = HypercubeProxyMetaClass
-
     def __init__(self, cube, slvr_cfg, iter_args):
         self._cube = cube
         self._cfg = slvr_cfg
diff --git a/montblanc/impl/rime/tensorflow/stop_context.py b/montblanc/impl/rime/tensorflow/stop_context.py
index e7135d97d..d484d8658 100644
--- a/montblanc/impl/rime/tensorflow/stop_context.py
+++ b/montblanc/impl/rime/tensorflow/stop_context.py
@@ -22,6 +22,7 @@
 from .hypercube_proxy_metaclass import HypercubeProxyMetaClass
 
 class StopContext(object):
+    __metaclass__ = HypercubeProxyMetaClass
     """
     Stop Context object passed to Providers.
 
@@ -48,8 +49,6 @@ def stop(self, stop_context):
     """
     __slots__ = ('_cube', '_cfg', '_iter_args')
 
-    __metaclass__ = HypercubeProxyMetaClass
-
     def __init__(self, cube, slvr_cfg, iter_args):
         self._cube = cube
         self._cfg = slvr_cfg
diff --git a/montblanc/solvers/mb_tensorflow_solver.py b/montblanc/solvers/mb_tensorflow_solver.py
index bbba3d7a0..912a4d29f 100644
--- a/montblanc/solvers/mb_tensorflow_solver.py
+++ b/montblanc/solvers/mb_tensorflow_solver.py
@@ -18,7 +18,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 
-from rime_solver import RIMESolver
+from .rime_solver import RIMESolver
 
 class MontblancTensorflowSolver(RIMESolver):
     def __init__(self, slvr_cfg):
diff --git a/montblanc/solvers/rime_solver.py b/montblanc/solvers/rime_solver.py
index 8d7a62a4d..68a2364c1 100644
--- a/montblanc/solvers/rime_solver.py
+++ b/montblanc/solvers/rime_solver.py
@@ -102,7 +102,7 @@ def template_dict(self):
         D.update(self.dim_local_size_dict())
 
         # Add any registered properties to the dictionary
-        for p in self._properties.itervalues():
+        for p in list(self._properties.values()):
             D[p.name] = getattr(self, p.name)
 
         return D
diff --git a/montblanc/src_types.py b/montblanc/src_types.py
index 893211ede..50fd9d5c5 100644
--- a/montblanc/src_types.py
+++ b/montblanc/src_types.py
@@ -44,15 +44,15 @@
 ])
 
 SOURCE_DIM_TYPES = OrderedDict((v, k) for (k, v)
-    in SOURCE_VAR_TYPES.iteritems())
+    in list(SOURCE_VAR_TYPES.items()))
 
 def source_types():
     """ Returns a list of registered source types """
-    return SOURCE_VAR_TYPES.keys()
+    return list(SOURCE_VAR_TYPES.keys())
 
 def source_nr_vars():
     """ Returns a list of registered source number variables """
-    return SOURCE_VAR_TYPES.values()
+    return list(SOURCE_VAR_TYPES.values())
 
 def source_var_types():
     """ Returns a mapping of source type to number variable """
@@ -75,15 +75,15 @@ def default_sources(**kwargs):
     S = OrderedDict()
     total = 0
 
-    invalid_types = [t for t in kwargs.keys() if t not in SOURCE_VAR_TYPES]
+    invalid_types = [t for t in list(kwargs.keys()) if t not in SOURCE_VAR_TYPES]
 
     for t in invalid_types:
         montblanc.log.warning('Source type %s is not yet '
             'implemented in montblanc. '
-            'Valid source types are %s' % (t, SOURCE_VAR_TYPES.keys()))
+            'Valid source types are %s' % (t, list(SOURCE_VAR_TYPES.keys())))
 
     # Zero all source types
-    for k, v in SOURCE_VAR_TYPES.iteritems():
+    for k, v in list(SOURCE_VAR_TYPES.items()):
         # Try get the number of sources for this source
         # from the kwargs
         value = kwargs.get(k, 0)
@@ -124,12 +124,12 @@ def sources_to_nr_vars(sources):
 
     try:
         return OrderedDict((SOURCE_VAR_TYPES[name], nr)
-            for name, nr in sources.iteritems())
+            for name, nr in list(sources.items()))
     except KeyError as e:
         raise KeyError((
             'No source type ''%s'' is '
             'registered. Valid source types '
-            'are %s') % (e, SOURCE_VAR_TYPES.keys()))
+            'are %s') % (e, list(SOURCE_VAR_TYPES.keys())))
 
 def source_range_tuple(start, end, nr_var_dict):
     """
@@ -139,9 +139,9 @@ def source_range_tuple(start, end, nr_var_dict):
     for each source variable type.
     """
 
-    starts = np.array([0 for nr_var in SOURCE_VAR_TYPES.itervalues()])
+    starts = np.array([0 for nr_var in list(SOURCE_VAR_TYPES.values())])
     ends = np.array([nr_var_dict[nr_var] if nr_var in nr_var_dict else 0
-        for nr_var in SOURCE_VAR_TYPES.itervalues()])
+        for nr_var in list(SOURCE_VAR_TYPES.values())])
     sum_counts = np.cumsum(ends)
     idx = np.arange(len(starts))
 
@@ -183,7 +183,7 @@ def source_range(start, end, nr_var_dict):
 
     return OrderedDict((k, e-s)
         for k, (s, e)
-        in source_range_tuple(start, end, nr_var_dict).iteritems())
+        in list(source_range_tuple(start, end, nr_var_dict).items()))
 
 def source_range_slices(start, end, nr_var_dict):
     """
@@ -194,4 +194,4 @@ def source_range_slices(start, end, nr_var_dict):
 
     return OrderedDict((k, slice(s,e,1))
         for k, (s, e)
-        in source_range_tuple(start, end, nr_var_dict).iteritems())
\ No newline at end of file
+        in list(source_range_tuple(start, end, nr_var_dict).items()))
\ No newline at end of file
diff --git a/montblanc/tests/beam_factory.py b/montblanc/tests/beam_factory.py
index 7be759184..b354e6d87 100644
--- a/montblanc/tests/beam_factory.py
+++ b/montblanc/tests/beam_factory.py
@@ -125,7 +125,7 @@ def beam_factory(polarisation_type='linear',
     # Figure out the beam filenames from the schema
     filenames = _create_filenames(schema, polarisation_type)
 
-    for filename in [f for ri_pair in filenames.values() for f in ri_pair]:
+    for filename in [f for ri_pair in list(filenames.values()) for f in ri_pair]:
         if overwrite:
             ex = np.deg2rad(1.0)
             coords = np.linspace(-ex, ex, header['NAXIS2'], endpoint=True)
diff --git a/montblanc/tests/meqtrees/turbo-sim.py b/montblanc/tests/meqtrees/turbo-sim.py
index f0b879a20..c5abd4963 100644
--- a/montblanc/tests/meqtrees/turbo-sim.py
+++ b/montblanc/tests/meqtrees/turbo-sim.py
@@ -78,8 +78,8 @@
   from Siamese.OMS.tigger_lsm import TiggerSkyModel
   models.insert(0,TiggerSkyModel());
 except:
-  print 'Failure to import TiggerSkyModel module'
-  print 'Is the location of Tigger defined in your PYTHONPATH environment variable?'
+  print('Failure to import TiggerSkyModel module')
+  print('Is the location of Tigger defined in your PYTHONPATH environment variable?')
   pass;
       
 meqmaker.add_sky_models(models);
@@ -162,9 +162,9 @@ def _recompute_noise (dum):
 def _define_forest (ns):
   random.seed(random_seed if isinstance(random_seed,int) else None);
   if not mssel.msname:
-    raise RuntimeError,"MS not set up in compile-time options";
+    raise RuntimeError("MS not set up in compile-time options");
   if run_purr:
-    print mssel.msname;
+    print((mssel.msname));
     import os.path
     purrlog = os.path.normpath(mssel.msname)+".purrlog";
     Timba.TDL.GUI.purr(purrlog,[mssel.msname,'.']);
@@ -233,7 +233,7 @@ def _tdl_job_1_simulate_MS (mqs,parent,wait=False):
   # resolves nodes
   ns.Resolve();
 
-  print len(ns.AllNodes()),'nodes defined';
+  print((len(ns.AllNodes()),'nodes defined'));
 
 
 
diff --git a/montblanc/tests/run_tests.py b/montblanc/tests/run_tests.py
index c0141fad3..5d15c30a3 100644
--- a/montblanc/tests/run_tests.py
+++ b/montblanc/tests/run_tests.py
@@ -31,16 +31,16 @@ def print_versions():
     Print the versions of software relied upon by montblanc.
     Inspired by numexpr testing suite.
     """
-    print('-=' * 38)
-    print('Python version:    %s' % sys.version)
-    print('Montblanc version: %s' % montblanc.__version__)
-    print("NumPy version:     %s" % numpy.__version__)
+    print(('-=' * 38))
+    print(('Python version:    %s' % sys.version))
+    print(('Montblanc version: %s' % montblanc.__version__))
+    print(("NumPy version:     %s" % numpy.__version__))
 
     if os.name == 'posix':
         (sysname, nodename, release, version, machine) = os.uname()
-        print('Platform:          %s-%s' % (sys.platform, machine))
+        print(('Platform:          %s-%s' % (sys.platform, machine)))
 
-    print('-=' * 38)
+    print(('-=' * 38))
 
 def suite():
     test_suite = unittest.TestSuite()
diff --git a/montblanc/tests/test_antenna_uvw_decomposition.py b/montblanc/tests/test_antenna_uvw_decomposition.py
index 0ac25dc86..434ba7440 100644
--- a/montblanc/tests/test_antenna_uvw_decomposition.py
+++ b/montblanc/tests/test_antenna_uvw_decomposition.py
@@ -90,7 +90,7 @@ def _create_ant_arrays():
 
                     yield valid_ants, remove_ants, ant1, ant2
 
-            tup = zip(*list(_create_ant_arrays()))
+            tup = list(zip(*list(_create_ant_arrays())))
             valid_ants, remove_ants, ant1, ant2 = tup
 
             bl_uvw = []
diff --git a/montblanc/tests/test_meq_tf.py b/montblanc/tests/test_meq_tf.py
index c048f8245..abe6b956e 100644
--- a/montblanc/tests/test_meq_tf.py
+++ b/montblanc/tests/test_meq_tf.py
@@ -202,7 +202,7 @@ def get_gaussian_sources(nsrc):
     # Create the tigger sky model
     with open(tigger_sky_file, 'w') as f:
         f.write('#format: ra_d dec_d i q u v spi freq0 emaj_s emin_s pa_d\n')
-        it = enumerate(itertools.izip(pt_lm, pt_stokes, pt_alpha, pt_ref_freq))
+        it = enumerate(zip(pt_lm, pt_stokes, pt_alpha, pt_ref_freq))
         for i, ((l, m), (I, Q, U, V), alpha, ref_freq) in it:
             ra, dec = lm_to_radec(l, m, ra0, dec0)
             l, m = np.rad2deg([ra,dec])
@@ -210,7 +210,7 @@ def get_gaussian_sources(nsrc):
             f.write('{l:.20f} {m:.20f} {i} {q} {u} {v} {spi} {rf:.20f}\n'.format(
                 l=l, m=m, i=I, q=Q, u=U, v=V, spi=alpha, rf=ref_freq))
 
-        it = enumerate(itertools.izip(g_lm, g_stokes, g_alpha, g_ref_freq, g_shape.T))
+        it = enumerate(zip(g_lm, g_stokes, g_alpha, g_ref_freq, g_shape.T))
         for i, ((l, m), (I, Q, U, V), alpha, ref_freq, (emaj, emin, pa)) in it:
             ra, dec = lm_to_radec(l, m, ra0, dec0)
             l, m = np.rad2deg([ra,dec])
@@ -370,20 +370,20 @@ def updated_dimensions(self):
 
         # Everything agrees, exit
         if problems[0].size == 0:
-            print 'Montblanc and MeqTree visibilities agree'
+            print('Montblanc and MeqTree visibilities agree')
             sys.exit(1)
 
         bad_vis_file = 'bad_visibilities.txt'
 
         # Some visibilities differ, do some analysis
-        print ("Montblanc differs from MeqTrees by {nc}/{t} visibilities. "
+        print((("Montblanc differs from MeqTrees by {nc}/{t} visibilities. "
             "Writing them out to '{bvf}'").format(
-            nc=problems[0].size, t=not_close.size, bvf=bad_vis_file)
+            nc=problems[0].size, t=not_close.size, bvf=bad_vis_file)))
 
         abs_diff = np.abs(meq_vis - mb_vis)
         rmsd = np.sqrt(np.sum(abs_diff**2)/abs_diff.size)
         nrmsd = rmsd / (np.max(abs_diff) - np.min(abs_diff))
-        print 'RMSD {rmsd} NRMSD {nrmsd}'.format(rmsd=rmsd, nrmsd=nrmsd)
+        print(('RMSD {rmsd} NRMSD {nrmsd}'.format(rmsd=rmsd, nrmsd=nrmsd)))
 
         # Plot a histogram of the difference
         try:
@@ -391,7 +391,7 @@ def updated_dimensions(self):
             matplotlib.use('pdf')
             import matplotlib.pyplot as plt
         except:
-            print "Exception importing matplotlib %s" % sys.exc_info()[2]
+            print(("Exception importing matplotlib %s" % sys.exc_info()[2]))
         else:
             try:
                 nr_of_bins = 100
@@ -405,7 +405,7 @@ def updated_dimensions(self):
 
                 plt.savefig('histogram.pdf')
             except:
-                print "Error plotting histogram %s" % sys.exc_info()[2]
+                print(("Error plotting histogram %s" % sys.exc_info()[2]))
 
         mb_problems = mb_vis[problems]
         meq_problems = meq_vis[problems]
@@ -414,7 +414,7 @@ def updated_dimensions(self):
 
         # Create an iterator over the first 100 problematic visibilities
         t = (np.asarray(problems).T, mb_problems, meq_problems, difference, amplitude)
-        it = enumerate(itertools.izip(*t))
+        it = enumerate(zip(*t))
         it = itertools.islice(it, 0, 1000, 1)
 
         # Write out the problematic visibilities to file
diff --git a/montblanc/util/__init__.py b/montblanc/util/__init__.py
index 90d569e73..c12f9c031 100644
--- a/montblanc/util/__init__.py
+++ b/montblanc/util/__init__.py
@@ -38,7 +38,7 @@
     source_range_slices)
 
 
-from parsing import parse_python_assigns
+from .parsing import parse_python_assigns
 
 def nr_of_baselines(na, auto_correlations=False):
     """
@@ -343,10 +343,10 @@ def shape_from_str_tuple(sshape, variables, ignore=None):
     if ignore is None: ignore = []
 
     if not isinstance(sshape, tuple) and not isinstance(sshape, list):
-        raise TypeError, 'sshape argument must be a tuple or list'
+        raise TypeError('sshape argument must be a tuple or list')
 
     if not isinstance(ignore, list):
-        raise TypeError, 'ignore argument must be a list'
+        raise TypeError('ignore argument must be a list')
 
     return tuple([int(eval_expr(v,variables)) if isinstance(v,str) else int(v)
         for v in sshape if v not in ignore])
@@ -367,10 +367,10 @@ def array_convert_function(sshape_one, sshape_two, variables):
         for d in sshape_two])
 
     if len(s_one) != len(s_two):
-        raise ValueError, ('Flattened shapes %s and %s '\
+        raise ValueError(('Flattened shapes %s and %s '\
             'do not have the same length. '
             'Original shapes were %s and %s') % \
-            (s_one, s_two, sshape_one, sshape_two)
+            (s_one, s_two, sshape_one, sshape_two))
 
     # Reason about the transpose
     t_idx = tuple([s_one.index(v) for v in s_two])
@@ -473,11 +473,11 @@ def register_default_dimensions(cube, slvr_cfg):
     src_cfg = default_sources()
     src_nr_vars = sources_to_nr_vars(src_cfg)
     # Sum to get the total number of sources
-    cube.register_dimension('nsrc', sum(src_nr_vars.itervalues()),
+    cube.register_dimension('nsrc', sum(src_nr_vars.values()),
         description="Sources (Total)")
 
     # Register the individual source types
-    for nr_var, nr_of_src in src_nr_vars.iteritems():
+    for nr_var, nr_of_src in list(src_nr_vars.items()):
         cube.register_dimension(nr_var, nr_of_src,
             description='{} sources'.format(mbs.SOURCE_DIM_TYPES[nr_var]))
 
diff --git a/montblanc/util/ant_uvw.py b/montblanc/util/ant_uvw.py
index c17419309..1ac4eeac6 100644
--- a/montblanc/util/ant_uvw.py
+++ b/montblanc/util/ant_uvw.py
@@ -6,7 +6,7 @@
 from numba import jit, generated_jit
 
 # Coordinate indexing constants
-u, v, w = range(3)
+u, v, w = list(range(3))
 
 try:
     isclose = math.isclose
diff --git a/montblanc/util/parallactic_angles.py b/montblanc/util/parallactic_angles.py
index e16972219..01369839e 100644
--- a/montblanc/util/parallactic_angles.py
+++ b/montblanc/util/parallactic_angles.py
@@ -79,8 +79,8 @@ def parallactic_angles(times, antenna_positions, field_centre, offsets=None):
     fc_rad = np.asarray([[pm.direction('J2000',
                             pq.quantity(field_centre[0] + offsets[t, a, 0], 'rad'),
                             pq.quantity(field_centre[1] + offsets[t, a, 1], 'rad'))
-                          for a in xrange(na)]
-                         for t in xrange(nt)])
+                          for a in range(na)]
+                         for t in range(nt)])
 
     return np.asarray([
             # Set current time as the reference frame
diff --git a/montblanc/util/parsing.py b/montblanc/util/parsing.py
index 35daf525e..a254fddee 100644
--- a/montblanc/util/parsing.py
+++ b/montblanc/util/parsing.py
@@ -1,6 +1,6 @@
 import ast
 
-import __builtin__
+import builtins as __builtin__
 
 # builtin function whitelist
 _BUILTIN_WHITELIST = frozenset(['slice'])
diff --git a/versioneer.py b/versioneer.py
index 2fc1a0762..8d5ef72e6 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -277,12 +277,13 @@
 [travis-url]: https://travis-ci.org/warner/python-versioneer
 
 """
-
 from __future__ import print_function
+
+
 try:
     import configparser
 except ImportError:
-    import ConfigParser as configparser
+    import configparser as configparser
 import errno
 import json
 import os
@@ -290,7 +291,6 @@
 import subprocess
 import sys
 
-
 class VersioneerConfig:
     """Container for Versioneer configuration parameters."""
 
@@ -1713,11 +1713,10 @@ def do_setup():
     except (EnvironmentError, configparser.NoSectionError,
             configparser.NoOptionError) as e:
         if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
-            print("Adding sample versioneer config to setup.cfg",
-                  file=sys.stderr)
+            eprint("Adding sample versioneer config to setup.cfg")
             with open(os.path.join(root, "setup.cfg"), "a") as f:
                 f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
+        eprint(CONFIG_ERROR)
         return 1
 
     print(" creating %s" % cfg.versionfile_source)

From cc39f4eac61b06e0d0e67ee0201901a36c3ca1e5 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 13:56:20 +0200
Subject: [PATCH 25/41] residual py3 issues

---
 montblanc/util/ant_uvw.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/montblanc/util/ant_uvw.py b/montblanc/util/ant_uvw.py
index 1ac4eeac6..78c664269 100644
--- a/montblanc/util/ant_uvw.py
+++ b/montblanc/util/ant_uvw.py
@@ -1,4 +1,7 @@
-from future_builtins import zip
+try:
+    from future_builtins import zip
+except:
+    pass
 
 from itertools import islice
 import math

From a7ab5938df0a2da6f34fc170cef47e43cc33a8be Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 14:43:12 +0200
Subject: [PATCH 26/41] Fix py2 backwards compat

---
 versioneer.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/versioneer.py b/versioneer.py
index 8d5ef72e6..427aec590 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -281,9 +281,9 @@
 
 
 try:
-    import configparser
+    import ConfigParser
 except ImportError:
-    import configparser as configparser
+    import configparser as ConfigParser
 import errno
 import json
 import os
@@ -341,7 +341,7 @@ def get_config_from_root(root):
     # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
     # the top of versioneer.py for instructions on writing your setup.cfg .
     setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
+    parser = ConfigParser.SafeConfigParser()
     with open(setup_cfg, "r") as f:
         parser.readfp(f)
     VCS = parser.get("versioneer", "VCS")  # mandatory
@@ -1710,9 +1710,9 @@ def do_setup():
     root = get_root()
     try:
         cfg = get_config_from_root(root)
-    except (EnvironmentError, configparser.NoSectionError,
-            configparser.NoOptionError) as e:
-        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+    except (EnvironmentError, ConfigParser.NoSectionError,
+            ConfigParser.NoOptionError) as e:
+        if isinstance(e, (EnvironmentError, ConfigParser.NoSectionError)):
             eprint("Adding sample versioneer config to setup.cfg")
             with open(os.path.join(root, "setup.cfg"), "a") as f:
                 f.write(SAMPLE_CONFIG)

From 3f095cef62bce44f4ad799c943a72f3d58209d8d Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 15:13:17 +0200
Subject: [PATCH 27/41] depend on hypercube py3 fixes

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index aa316ea61..f0c6e6465 100644
--- a/setup.py
+++ b/setup.py
@@ -151,7 +151,7 @@ def readme():
     'enum34 >= 1.1.6',
     'funcsigs >= 0.4',
     'futures >= 3.0.5',
-    'hypercube == 0.3.3',
+    'hypercube @ git+https://github.com/ska-sa/hypercube.git@py3',
     'tensorflow == {0:s}'.format(str(REQ_TF_VERSION)),
 ]
 

From 02da662272d6c6a92e20f099da03132e2858b4c2 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 15:50:59 +0200
Subject: [PATCH 28/41] py3 fixes

---
 install/tensorflow_ops_ext.py                | 23 ++++++++++++++++++--
 montblanc/impl/rime/tensorflow/RimeSolver.py |  2 +-
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/install/tensorflow_ops_ext.py b/install/tensorflow_ops_ext.py
index 2b7799154..fa28a3063 100644
--- a/install/tensorflow_ops_ext.py
+++ b/install/tensorflow_ops_ext.py
@@ -21,10 +21,10 @@
 import inspect
 import itertools
 import os
-
+import six
 from setuptools.extension import Extension
 from setuptools.command.build_ext import build_ext
-
+from distutils import sysconfig
 from .install_log import log
 
 tensorflow_extension_name = 'montblanc.ext.rime'
@@ -151,9 +151,28 @@ def create_tensorflow_extension(nvcc_settings, device_info):
         extra_link_args=extra_link_args,
     )
 
+def get_ext_filename_without_platform_suffix(filename):
+    name, ext = os.path.splitext(filename)
+    ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
+
+    if ext_suffix == ext:
+        return filename
+
+    ext_suffix = ext_suffix.replace(ext, '')
+    idx = name.find(ext_suffix)
+
+    if idx == -1:
+        return filename
+    else:
+        return name[:idx] + ext
 
 class BuildCommand(build_ext):
     """ Custom build command for building the tensorflow extension """
+
+    def get_ext_filename(self, ext_name):
+        filename = super().get_ext_filename(ext_name)
+        return get_ext_filename_without_platform_suffix(filename)
+
     def initialize_options(self):
         build_ext.initialize_options(self)
         self.nvcc_settings = None
diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 262bfe56d..1a0032116 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -888,7 +888,7 @@ def _make_feed_once_tuple(array):
         dtype = dfs[array.name].dtype
 
         ph = tf.placeholder(dtype=dtype,
-            name=a.name + "_placeholder")
+            name=array.name + "_placeholder")
 
         var = tf.Variable(tf.zeros(shape=(1,), dtype=dtype),
             validate_shape=False,

From 36e05508ad425ee8b0b9494f8808acbe65988632 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 16:20:21 +0200
Subject: [PATCH 29/41] Use six to write py2-3 compatible metaclasses

---
 montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py | 1 +
 montblanc/impl/rime/tensorflow/sinks/sink_context.py        | 3 ++-
 montblanc/impl/rime/tensorflow/sources/source_context.py    | 3 ++-
 montblanc/impl/rime/tensorflow/start_context.py             | 3 ++-
 montblanc/impl/rime/tensorflow/stop_context.py              | 3 ++-
 setup.py                                                    | 1 +
 6 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
index b2fc05e4f..8ff8f8084 100644
--- a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
+++ b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
@@ -60,6 +60,7 @@ def _proxy(self, *args, **kwargs):
             return wrap
 
         for name, method in hc_members:
+            print(name, method)
             setattr(cls, name, wrap_cube_method(name, method.__func__))
 
         super(HypercubeProxyMetaClass, cls).__init__(name, bases, dct)
diff --git a/montblanc/impl/rime/tensorflow/sinks/sink_context.py b/montblanc/impl/rime/tensorflow/sinks/sink_context.py
index cf8d8ea41..b747b2974 100644
--- a/montblanc/impl/rime/tensorflow/sinks/sink_context.py
+++ b/montblanc/impl/rime/tensorflow/sinks/sink_context.py
@@ -20,6 +20,7 @@
 
 from ..context_help import context_help
 from ..hypercube_proxy_metaclass import HypercubeProxyMetaClass
+import six
 
 class _setter_property(object):
     def __init__(self, func, doc=None):
@@ -29,8 +30,8 @@ def __init__(self, func, doc=None):
     def __set__(self, obj, value):
         return self.func(obj, value)
 
+@six.add_metaclass(HypercubeProxyMetaClass)
 class SinkContext(object):
-    __metaclass__=HypercubeProxyMetaClass
     """
     Context object passed to data sinks.
 
diff --git a/montblanc/impl/rime/tensorflow/sources/source_context.py b/montblanc/impl/rime/tensorflow/sources/source_context.py
index 7db07ef72..77eb313a5 100644
--- a/montblanc/impl/rime/tensorflow/sources/source_context.py
+++ b/montblanc/impl/rime/tensorflow/sources/source_context.py
@@ -20,6 +20,7 @@
 
 from ..context_help import context_help
 from ..hypercube_proxy_metaclass import HypercubeProxyMetaClass
+import six
 
 class _setter_property(object):
     def __init__(self, func, doc=None):
@@ -29,8 +30,8 @@ def __init__(self, func, doc=None):
     def __set__(self, obj, value):
         return self.func(obj, value)
 
+@six.add_metaclass(HypercubeProxyMetaClass)
 class SourceContext(object):
-    __metaclass__ = HypercubeProxyMetaClass
     """
     Context object passed to data sources.
 
diff --git a/montblanc/impl/rime/tensorflow/start_context.py b/montblanc/impl/rime/tensorflow/start_context.py
index f341bad0a..53fc65d35 100644
--- a/montblanc/impl/rime/tensorflow/start_context.py
+++ b/montblanc/impl/rime/tensorflow/start_context.py
@@ -21,9 +21,10 @@
 
 from .context_help import context_help
 from .hypercube_proxy_metaclass import HypercubeProxyMetaClass
+import six
 
+@six.add_metaclass(HypercubeProxyMetaClass)
 class StartContext(object):
-    __metaclass__ = HypercubeProxyMetaClass
     """
     Start Context object passed to Providers.
 
diff --git a/montblanc/impl/rime/tensorflow/stop_context.py b/montblanc/impl/rime/tensorflow/stop_context.py
index d484d8658..495751e58 100644
--- a/montblanc/impl/rime/tensorflow/stop_context.py
+++ b/montblanc/impl/rime/tensorflow/stop_context.py
@@ -20,9 +20,10 @@
 
 from .context_help import context_help
 from .hypercube_proxy_metaclass import HypercubeProxyMetaClass
+import six
 
+@six.add_metaclass(HypercubeProxyMetaClass)
 class StopContext(object):
-    __metaclass__ = HypercubeProxyMetaClass
     """
     Stop Context object passed to Providers.
 
diff --git a/setup.py b/setup.py
index f0c6e6465..39994a781 100644
--- a/setup.py
+++ b/setup.py
@@ -151,6 +151,7 @@ def readme():
     'enum34 >= 1.1.6',
     'funcsigs >= 0.4',
     'futures >= 3.0.5',
+    'six',
     'hypercube @ git+https://github.com/ska-sa/hypercube.git@py3',
     'tensorflow == {0:s}'.format(str(REQ_TF_VERSION)),
 ]

From 64567df84816603738c7dbab788814fec3bc6230 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 16:42:09 +0200
Subject: [PATCH 30/41] py2-3 install issue

---
 install/tensorflow_ops_ext.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/install/tensorflow_ops_ext.py b/install/tensorflow_ops_ext.py
index fa28a3063..ea03cf8dd 100644
--- a/install/tensorflow_ops_ext.py
+++ b/install/tensorflow_ops_ext.py
@@ -170,8 +170,11 @@ class BuildCommand(build_ext):
     """ Custom build command for building the tensorflow extension """
 
     def get_ext_filename(self, ext_name):
-        filename = super().get_ext_filename(ext_name)
-        return get_ext_filename_without_platform_suffix(filename)
+        if six.PY3:
+            filename = super().get_ext_filename(ext_name)
+            return get_ext_filename_without_platform_suffix(filename)
+        else:
+            return build_ext.get_ext_filename(self, ext_name)
 
     def initialize_options(self):
         build_ext.initialize_options(self)

From f167e0168d122f100029f4a4ba2e283647ba283a Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Mon, 27 May 2019 17:54:24 +0200
Subject: [PATCH 31/41] remove metadata class printing

---
 .../impl/rime/tensorflow/hypercube_proxy_metaclass.py    | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
index 8ff8f8084..d6d6173cd 100644
--- a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
+++ b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
@@ -49,18 +49,9 @@ def _proxy(self, *args, **kwargs):
 
             wrap = functools.update_wrapper(_proxy, method)
             spec = inspect.getargspec(method)
-            fmt_args = inspect.formatargspec(formatvalue=lambda v: '=_default', *spec)
-            call_args = inspect.formatargspec(formatvalue=lambda v: '', *spec)
-
-            # wrap.__doc__ = (
-            #     'def {}{}:\n'
-            #     '\t""" {} """\n'
-            #     '\treturn _proxy{}').format(name, fmt_args, method.__doc__, call_args)
-
             return wrap
 
         for name, method in hc_members:
-            print(name, method)
             setattr(cls, name, wrap_cube_method(name, method.__func__))
 
         super(HypercubeProxyMetaClass, cls).__init__(name, bases, dct)

From 6c4dad5188b88b9c960bc30f1537253074dcc996 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Tue, 28 May 2019 10:33:09 +0200
Subject: [PATCH 32/41] py23 agnostic __builtin__ import

---
 montblanc/util/parsing.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/montblanc/util/parsing.py b/montblanc/util/parsing.py
index a254fddee..cc45f7e65 100644
--- a/montblanc/util/parsing.py
+++ b/montblanc/util/parsing.py
@@ -1,6 +1,9 @@
 import ast
 
-import builtins as __builtin__
+try:
+    import builtins as __builtin__
+except ImportError:
+    import __builtin__
 
 # builtin function whitelist
 _BUILTIN_WHITELIST = frozenset(['slice'])

From 878ce899a0bf9ebd234bcc45df51fe16a838b4fd Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Tue, 28 May 2019 10:33:38 +0200
Subject: [PATCH 33/41] Fix method inspection within MetaClass constructor

https://stackoverflow.com/a/17019983/1611416
---
 .../impl/rime/tensorflow/hypercube_proxy_metaclass.py    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
index d6d6173cd..cbc9c50bf 100644
--- a/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
+++ b/montblanc/impl/rime/tensorflow/hypercube_proxy_metaclass.py
@@ -22,14 +22,18 @@
 import inspect
 import types
 
+import six
+
 from hypercube import HyperCube
 
+
 class HypercubeProxyMetaClass(type):
     """ MetaClass for classes that proxy HyperCubes """
     def __init__(cls, name, bases, dct):
         """ Proxy public methods on the HyperCube """
         def public_member_predicate(m):
-            return inspect.ismethod(m) and not m.__name__.startswith('_')
+            return ((inspect.isfunction(m) or inspect.ismethod(m))
+                    and not m.__name__.startswith('_'))
 
         hc_members = inspect.getmembers(HyperCube, public_member_predicate)
         sc_members = inspect.getmembers(cls, public_member_predicate)
@@ -52,6 +56,7 @@ def _proxy(self, *args, **kwargs):
             return wrap
 
         for name, method in hc_members:
-            setattr(cls, name, wrap_cube_method(name, method.__func__))
+            fn = six.get_unbound_function(method)
+            setattr(cls, name, wrap_cube_method(name, fn))
 
         super(HypercubeProxyMetaClass, cls).__init__(name, bases, dct)

From 03afcc94f00026e5e8af4cd7433c71bd1b4d0403 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Tue, 28 May 2019 16:59:50 +0200
Subject: [PATCH 34/41] Fix residual py3 issues

---
 montblanc/impl/rime/tensorflow/RimeSolver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index 1a0032116..b3890282d 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -107,18 +107,22 @@ def __init__(self):
                 self._lock = threading.Lock()
 
             def __getitem__(self, key):
+                key = hash(bytes(key))
                 with self._lock:
                     return self._cache[key]
 
             def __setitem__(self, key, value):
+                key = hash(bytes(key))
                 with self._lock:
                     self._cache[key]=value
 
             def __delitem__(self, key):
+                key = hash(bytes(key))
                 with self._lock:
                     del self._cache[key]
 
             def pop(self, key, default=None):
+                key = hash(bytes(key))
                 with self._lock:
                     return self._cache.pop(key, default)
 

From 5a456cd03c0f7da76954f45efa7534444bf0111a Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 09:43:14 +0200
Subject: [PATCH 35/41] Fix variable usage in setup.py

---
 setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 1af2ecfd0..ca5f9e228 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,8 @@
         raise ImportError("Please 'pip install tensorflow==%s' or "
                           "'pip install tensorflow-gpu==%s' prior to "
                           "installation if you require CPU or GPU "
-                          "support, respectively" % (TF_VERSION, TF_VERSION))
+                          "support, respectively" %
+                          (REQ_TF_VERSION, REQ_TF_VERSION))
 
     tf_installed = False
     use_tf_cuda = False
@@ -174,7 +175,7 @@ def readme():
     cmdclass = {'build_ext': BuildCommand}
     # tensorflow_ops_ext.BuildCommand.run will
     # expand this dummy extension to its full portential
-    
+
     ext_modules = [create_tensorflow_extension(nvcc_settings, device_info)]
 
     # Pass NVCC and CUDA settings through to the build extension

From 85684b2f68490e55557f5688e5752d0333c2a29c Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 10:01:35 +0200
Subject: [PATCH 36/41] Make a proper Sersic Shape test

---
 .../tensorflow/rime_ops/test_sersic_shape.py  | 88 ++++++++++++-------
 1 file changed, 58 insertions(+), 30 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py b/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
index ac0618547..27b16bd12 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_sersic_shape.py
@@ -1,45 +1,73 @@
-import os
+import unittest
 
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.client import device_lib
 
-# Load the library containing the custom operation
-from montblanc.impl.rime.tensorflow import load_tf_lib
-rime = load_tf_lib()
 
-dtype = np.float32
-ngsrc, ntime, na, nchan = 10, 15, 7, 16
-nbl = na*(na-1)//2
+def rf(*s):
+    return np.random.random(size=s)
 
-rf = lambda *s: np.random.random(size=s).astype(dtype=dtype)
 
-np_uvw = rf(ntime, na, 3)
-np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
-np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
-    np.tile(np_ant2, ntime).reshape(ntime,nbl))
-np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(dtype)
-np_sersic_params = rf(3, ngsrc)*np.array([1.0,1.0,np.pi/648000],dtype=dtype)[:,np.newaxis]
+class TestSersicShape(unittest.TestCase):
+    """ Tests the SersicShape operator """
+    def setUp(self):
+        # Load the rime operation library
+        from montblanc.impl.rime.tensorflow import load_tf_lib
+        self.rime = load_tf_lib()
+        # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
+        self.gpu_devs = [d.name for d in device_lib.list_local_devices()
+                         if d.device_type == 'GPU']
 
-assert np_ant1.shape == (ntime, nbl), np_ant1.shape
-assert np_ant2.shape == (ntime, nbl), np_ant2.shape
-assert np_frequency.shape == (nchan,)
+    def test_sersic_shape(self):
+        for FT in [np.float32, np.float64]:
+            self._impl_test_sersic_shape(FT)
 
-args = list(map(lambda v, n: tf.Variable(v, name=n),
-    [np_uvw, np_ant1, np_ant2, np_frequency, np_sersic_params],
-    ["uvw", "ant1", "ant2", "frequency", "sersic_params"]))
+    def _impl_test_sersic_shape(self, FT):
+        ngsrc, ntime, na, nchan = 10, 15, 7, 16
+        nbl = na*(na-1)//2
 
-with tf.device('/cpu:0'):
-    sersic_shape_cpu = rime.sersic_shape(*args)
+        np_uvw = rf(ntime, na, 3).astype(FT)
+        np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
+        np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
+                            np.tile(np_ant2, ntime).reshape(ntime, nbl))
+        np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(FT)
+        np_sersic_params = rf(3, ngsrc)*np.array([1.0, 1.0, np.pi/648000],
+                                                 dtype=FT)[:, np.newaxis]
+        np_sersic_params = np_sersic_params.astype(dtype=FT)
 
-with tf.device('/gpu:0'):
-    sersic_shape_gpu = rime.sersic_shape(*args)
+        assert np_ant1.shape == (ntime, nbl), np_ant1.shape
+        assert np_ant2.shape == (ntime, nbl), np_ant2.shape
+        assert np_frequency.shape == (nchan,)
 
-init_op = tf.global_variables_initializer()
+        np_args = [np_uvw, np_ant1, np_ant2, np_frequency, np_sersic_params]
+        arg_names = ["uvw", "ant1", "ant2", "frequency", "sersic_params"]
 
-with tf.Session() as S:
-    S.run(init_op)
-    tf_sersic_shape_gpu = S.run(sersic_shape_gpu)
-    tf_sersic_shape_cpu = S.run(sersic_shape_cpu)
-    assert np.allclose(tf_sersic_shape_cpu, tf_sersic_shape_gpu)
+        tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
 
+        def _pin_op(device, *tf_args):
+            """ Pin operation to device """
+            with tf.device(device):
+                return self.rime.sersic_shape(*tf_args)
 
+        # Pin operation to CPU
+        cpu_op = _pin_op('/cpu:0', *tf_args)
+
+        # Run the op on all GPUs
+        gpu_ops = [_pin_op(d, *tf_args) for d in self.gpu_devs]
+
+        # Initialise variables
+        init_op = tf.global_variables_initializer()
+
+        with tf.Session() as S:
+            S.run(init_op)
+
+            cpu_sersic_shape = S.run(cpu_op)
+
+            for gpu_sersic_shape in S.run(gpu_ops):
+                self.assertTrue(np.allclose(cpu_sersic_shape,
+                                            gpu_sersic_shape))
+
+
+if __name__ == "__main__":
+    unittest.main()

From a99dcf7f53882f3ef44205716039a36c9fc7614a Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 10:06:35 +0200
Subject: [PATCH 37/41] Make a proper gaussians shape test

---
 .../tensorflow/rime_ops/test_gauss_shape.py   | 87 ++++++++++++-------
 1 file changed, 57 insertions(+), 30 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py b/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
index a401c3d97..cc149b2f9 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_gauss_shape.py
@@ -1,45 +1,72 @@
-import os
+import unittest
 
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.client import device_lib
 
-# Load the library containing the custom operation
-from montblanc.impl.rime.tensorflow import load_tf_lib
-rime = load_tf_lib()
 
-dtype = np.float32
-ngsrc, ntime, na, nchan = 10, 15, 7, 16
-nbl = na*(na-1)//2
+def rf(*s):
+    return np.random.random(size=s)
 
-rf = lambda *s: np.random.random(size=s).astype(dtype=dtype)
 
-np_uvw = rf(ntime, na, 3)
-np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
-np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
-    np.tile(np_ant2, ntime).reshape(ntime,nbl))
-np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(dtype)
-np_gauss_params = rf(3, ngsrc)*np.array([0.1,0.1,1.0],dtype=dtype)[:,np.newaxis]
+class TestGaussShape(unittest.TestCase):
+    """ Tests the GaussShape operator """
+    def setUp(self):
+        # Load the rime operation library
+        from montblanc.impl.rime.tensorflow import load_tf_lib
+        self.rime = load_tf_lib()
+        # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
+        self.gpu_devs = [d.name for d in device_lib.list_local_devices()
+                         if d.device_type == 'GPU']
 
-assert np_ant1.shape == (ntime, nbl), np_ant1.shape
-assert np_ant2.shape == (ntime, nbl), np_ant2.shape
-assert np_frequency.shape == (nchan,)
+    def test_gauss_shape(self):
+        for FT in [np.float32, np.float64]:
+            self._impl_test_gauss_shape(FT)
 
-args = list(map(lambda v, n: tf.Variable(v, name=n),
-    [np_uvw, np_ant1, np_ant2, np_frequency, np_gauss_params],
-    ["uvw", "ant1", "ant2", "frequency", "gauss_params"]))
+    def _impl_test_gauss_shape(self, FT):
+        ngsrc, ntime, na, nchan = 10, 15, 7, 16
+        nbl = na*(na-1)//2
 
-with tf.device('/cpu:0'):
-    gauss_shape_cpu = rime.gauss_shape(*args)
+        np_uvw = rf(ntime, na, 3).astype(FT)
+        np_ant1, np_ant2 = [np.int32(x) for x in np.triu_indices(na, 1)]
+        np_ant1, np_ant2 = (np.tile(np_ant1, ntime).reshape(ntime, nbl),
+                            np.tile(np_ant2, ntime).reshape(ntime, nbl))
+        np_frequency = np.linspace(1.4e9, 1.5e9, nchan).astype(FT)
+        np_gauss_params = rf(3, ngsrc)*np.array([0.1, 0.1, 1.0])[:, np.newaxis]
+        np_gauss_params = np_gauss_params.astype(FT)
 
-with tf.device('/gpu:0'):
-    gauss_shape_gpu = rime.gauss_shape(*args)
+        assert np_ant1.shape == (ntime, nbl), np_ant1.shape
+        assert np_ant2.shape == (ntime, nbl), np_ant2.shape
+        assert np_frequency.shape == (nchan,)
 
-init_op = tf.global_variables_initializer()
+        np_args = [np_uvw, np_ant1, np_ant2, np_frequency, np_gauss_params]
+        arg_names = ["uvw", "ant1", "ant2", "frequency", "gauss_params"]
 
-with tf.Session() as S:
-    S.run(init_op)
-    tf_gauss_shape_gpu = S.run(gauss_shape_gpu)
-    tf_gauss_shape_cpu = S.run(gauss_shape_cpu)
-    assert np.allclose(tf_gauss_shape_cpu, tf_gauss_shape_gpu)
+        tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, arg_names)]
 
+        def _pin_op(device, *tf_args):
+            """ Pin operation to device """
+            with tf.device(device):
+                return self.rime.gauss_shape(*tf_args)
 
+        # Pin operation to CPU
+        cpu_op = _pin_op('/cpu:0', *tf_args)
+
+        # Run the op on all GPUs
+        gpu_ops = [_pin_op(d, *tf_args) for d in self.gpu_devs]
+
+        # Initialise variables
+        init_op = tf.global_variables_initializer()
+
+        with tf.Session() as S:
+            S.run(init_op)
+
+            cpu_gauss_shape = S.run(cpu_op)
+
+            for gpu_gauss_shape in S.run(gpu_ops):
+                self.assertTrue(np.allclose(cpu_gauss_shape,
+                                            gpu_gauss_shape))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4f0af2353265dc77b5405b7cdf57add92afeff5b Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 10:06:47 +0200
Subject: [PATCH 38/41] Feed rotation test spacing

---
 .../tensorflow/rime_ops/test_feed_rotation.py     | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_feed_rotation.py b/montblanc/impl/rime/tensorflow/rime_ops/test_feed_rotation.py
index f8ae94714..c4e8f2995 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_feed_rotation.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_feed_rotation.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 from tensorflow.python.client import device_lib
 
+
 class TestFeedRotation(unittest.TestCase):
     """ Tests the FeedRotation operator """
 
@@ -13,7 +14,7 @@ def setUp(self):
         self.rime = load_tf_lib()
         # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
         self.gpu_devs = [d.name for d in device_lib.list_local_devices()
-                                if d.device_type == 'GPU']
+                         if d.device_type == 'GPU']
 
     def test_feed_rotation(self):
         """ Test the FeedRotation operator """
@@ -35,7 +36,7 @@ def _impl_test_feed_rotation(self, FT, CT, feed_type):
         # Create input variables
         ntime, na = 10, 7
 
-        parallactic_angle = np.random.random(size=[ntime,na]).astype(FT)
+        parallactic_angle = np.random.random(size=[ntime, na]).astype(FT)
         parallactic_angle_sin = np.sin(parallactic_angle)
         parallactic_angle_cos = np.cos(parallactic_angle)
 
@@ -49,8 +50,8 @@ def _impl_test_feed_rotation(self, FT, CT, feed_type):
         def _pin_op(device, *tf_args):
             """ Pin operation to device """
             with tf.device(device):
-                return self.rime.feed_rotation(*tf_args,
-                    CT=CT, feed_type=feed_type)
+                return self.rime.feed_rotation(*tf_args, CT=CT,
+                                               feed_type=feed_type)
 
         # Pin operation to CPU
         cpu_op = _pin_op('/cpu:0', *tf_args)
@@ -67,7 +68,9 @@ def _pin_op(device, *tf_args):
             cpu_feed_rotation = S.run(cpu_op)
 
             for gpu_feed_rotation in S.run(gpu_ops):
-                self.assertTrue(np.allclose(cpu_feed_rotation, gpu_feed_rotation))
+                self.assertTrue(np.allclose(cpu_feed_rotation,
+                                            gpu_feed_rotation))
+
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 2061238a6289720c3d6e39ff2b27b3c946c88050 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 10:10:47 +0200
Subject: [PATCH 39/41] Fix radec_to_lm test library load

---
 montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py b/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py
index 3b6ec369b..5c09ee21b 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_radec_to_lm.py
@@ -29,7 +29,9 @@ class TestRadecToLm(unittest.TestCase):
 
     def setUp(self):
         # Load the custom operation library
-        self.rime = tf.load_op_library('./rime.so')
+        # Load the rime operation library
+        from montblanc.impl.rime.tensorflow import load_tf_lib
+        self.rime = load_tf_lib()
         # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
         self.gpu_devs = [d.name for d in device_lib.list_local_devices()
                          if d.device_type == 'GPU']

From 65d7ec294b7b867ac66bcbe0017a059cdc0a3693 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Fri, 31 May 2019 10:32:02 +0200
Subject: [PATCH 40/41] Make a proper complex phase test case

---
 .../rime/tensorflow/rime_ops/test_phase.py    | 143 +++++++-----------
 1 file changed, 58 insertions(+), 85 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
index 14b316a09..77ed4d85f 100644
--- a/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
+++ b/montblanc/impl/rime/tensorflow/rime_ops/test_phase.py
@@ -1,29 +1,12 @@
 import os
+import unittest
 import timeit
 
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.client import device_lib
 
-# Load the library containing the custom operation
-from montblanc.impl.rime.tensorflow import load_tf_lib
-rime = load_tf_lib()
-
-
-def complex_phase_op(lm, uvw, frequency):
-    """
-    This function wraps rime_phase by deducing the
-    complex output result type from the input
-    """
-    lm_dtype = lm.dtype.base_dtype
-
-    if lm_dtype == tf.float32:
-        CT = tf.complex64
-    elif lm_dtype == tf.float64:
-        CT = tf.complex128
-    else:
-        raise TypeError("Unhandled type '{t}'".format(t=lm.dtype))
-
-    return rime.phase(lm, uvw, frequency, CT=CT)
+lightspeed = 299792458.
 
 
 def complex_phase(lm, uvw, frequency):
@@ -86,68 +69,58 @@ def complex_phase_numpy(lm, uvw, frequency):
     return np.exp(real_phase)
 
 
-def test_complex_phase():
-    dtype, ctype = np.float64, np.complex128
-    nsrc, ntime, na, nchan = 100, 50, 64, 128
-    lightspeed = 299792458.
-
-    # Set up our numpy input arrays
-    np_lm = np.random.random(size=(nsrc, 2)).astype(dtype)*0.1
-    np_uvw = np.random.random(size=(ntime, na, 3)).astype(dtype)
-    np_frequency = np.linspace(1.3e9, 1.5e9, nchan, endpoint=True, dtype=dtype)
-
-    # Create tensorflow arrays from the numpy arrays
-    lm = tf.Variable(np_lm, name='lm')
-    uvw = tf.Variable(np_uvw, name='uvw')
-    frequency = tf.Variable(np_frequency, name='frequency')
-
-    # Get an expression for the complex phase op on the CPU
-    with tf.device('/cpu:0'):
-        cplx_phase_op_cpu = complex_phase_op(lm, uvw, frequency)
-
-    # Get an expression for the complex phase op on the GPU
-    with tf.device('/gpu:0'):
-        cplx_phase_op_gpu = complex_phase_op(lm, uvw, frequency)
-
-    # Get an expression for the complex phase expression on the GPU
-    with tf.device('/gpu:0'):
-        cplx_phase_expr_gpu = complex_phase(lm, uvw, frequency)
-
-    init_op = tf.global_variables_initializer()
-
-    # Now create a tensorflow Session to evaluate the above
-    with tf.Session() as S:
-        S.run(init_op)
-
-        # Evaluate and time tensorflow GPU
-        start = timeit.default_timer()
-        tf_cplx_phase_op_gpu = S.run(cplx_phase_op_gpu)
-        print(('Tensorflow custom GPU time %f' % (timeit.default_timer() - start)))  # noqa
-
-        # Evaluate and time tensorflow GPU
-        start = timeit.default_timer()
-        tf_cplx_phase_expr_gpu = S.run(cplx_phase_expr_gpu)
-        print(('Tensorflow expression GPU time %f' % (timeit.default_timer() - start)))  # noqa
-
-        # Evaluate and time tensorflow CPU
-        start = timeit.default_timer()
-        tf_cplx_phase_op_cpu = S.run(cplx_phase_op_cpu)
-        print(('Tensorflow CPU time %f' % (timeit.default_timer() - start)))
-
-        # Evaluate and time numpy CPU
-        start = timeit.default_timer()
-        # Now calculate the complex phase using numpy
-        # Reshapes help us to broadcast
-        np_cplx_phase = complex_phase_numpy(np_lm, np_uvw, np_frequency)
-        print(('Numpy CPU time %f' % (timeit.default_timer() - start)))
-
-        # Check that our shapes and values agree with a certain tolerance
-        assert tf_cplx_phase_op_cpu.shape == (nsrc, ntime, na, nchan)
-        assert tf_cplx_phase_op_gpu.shape == (nsrc, ntime, na, nchan)
-        assert tf_cplx_phase_expr_gpu.shape == (nsrc, ntime, na, nchan)
-        assert np_cplx_phase.shape == (nsrc, ntime, na, nchan)
-        assert np.allclose(tf_cplx_phase_op_cpu, np_cplx_phase)
-        assert np.allclose(tf_cplx_phase_op_gpu, np_cplx_phase)
-        assert np.allclose(tf_cplx_phase_expr_gpu, np_cplx_phase)
-
-    print('Tests Succeeded')
+class TestComplexPhase(unittest.TestCase):
+    def setUp(self):
+        # Load the rime operation library
+        from montblanc.impl.rime.tensorflow import load_tf_lib
+        self.rime = load_tf_lib()
+        # Obtain a list of GPU device specifications ['/gpu:0', '/gpu:1', ...]
+        self.gpu_devs = [d.name for d in device_lib.list_local_devices()
+                         if d.device_type == 'GPU']
+
+    def test_complex_phase(self):
+        type_permutations = [
+            [np.float32, np.complex64],
+            [np.float64, np.complex128]]
+
+        for FT, CT in type_permutations:
+            self._impl_test_complex_phase(FT, CT)
+
+    def _impl_test_complex_phase(self, FT, CT):
+        nsrc, ntime, na, nchan = 100, 50, 64, 128
+
+        # Set up our numpy input arrays
+        np_lm = np.random.random(size=(nsrc, 2)).astype(FT)*0.1
+        np_uvw = np.random.random(size=(ntime, na, 3)).astype(FT)
+        np_frequency = np.linspace(1.3e9, 1.5e9, nchan,
+                                   endpoint=True, dtype=FT)
+
+        np_args = [np_lm, np_uvw, np_frequency]
+        tf_names = ['lm', 'uvw', 'frequency']
+
+        tf_args = [tf.Variable(v, name=n) for v, n in zip(np_args, tf_names)]
+
+        def _pin_op(device, *tf_args):
+            """ Pin operation to device """
+            with tf.device(device):
+                return self.rime.phase(*tf_args, CT=CT)
+
+        # Pin operation to CPU
+        cpu_op = _pin_op('/cpu:0', *tf_args)
+
+        # Run the op on all GPUs
+        gpu_ops = [_pin_op(d, *tf_args) for d in self.gpu_devs]
+
+        # Initialise variables
+        init_op = tf.global_variables_initializer()
+
+        # Now create a tensorflow Session to evaluate the above
+        with tf.Session() as S:
+            S.run(init_op)
+
+            phase_np = complex_phase_numpy(np_lm, np_uvw, np_frequency)
+            phase_cpu = S.run(cpu_op)
+            assert np.allclose(phase_cpu, phase_np)
+
+            for phase_gpu in S.run(gpu_ops):
+                assert np.allclose(phase_cpu, phase_gpu)

From 77e501cd8af6ec24de22f4a5dc991638668e95a9 Mon Sep 17 00:00:00 2001
From: Benna Hugo <bennahugo@aol.com>
Date: Fri, 31 May 2019 16:30:43 +0200
Subject: [PATCH 41/41] Remove py3 only constructs

---
 montblanc/impl/rime/tensorflow/RimeSolver.py    | 7 ++++---
 montblanc/impl/rime/tensorflow/queue_wrapper.py | 5 ++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/montblanc/impl/rime/tensorflow/RimeSolver.py b/montblanc/impl/rime/tensorflow/RimeSolver.py
index b3890282d..eb23a8e07 100644
--- a/montblanc/impl/rime/tensorflow/RimeSolver.py
+++ b/montblanc/impl/rime/tensorflow/RimeSolver.py
@@ -24,6 +24,7 @@
 import threading
 import sys
 import types
+import six
 
 import concurrent.futures as cf
 import numpy as np
@@ -536,7 +537,7 @@ def _consume(self, data_sinks, cube, global_iter_args):
             return self._consume_impl(data_sinks, cube, global_iter_args)
         except Exception as e:
             montblanc.log.exception("Consumer Exception")
-            raise e.with_traceback(sys.exc_info()[2])
+            six.reraise(Exception, Exception(e), sys.exc_info()[2])
 
     def _consume_impl(self, data_sinks, cube, global_iter_args):
         """ Consume """
@@ -1142,7 +1143,7 @@ def _get_data(data_source, context):
             "{help}".format(ds=context.name,
                 e=str(e), help=context.help()))
 
-        raise ex.with_traceback(sys.exc_info()[2])
+        six.reraise(ValueError, ex, sys.exc_info()[2])
 
 def _supply_data(data_sink, context):
     """ Supply data to the data sink """
@@ -1155,7 +1156,7 @@ def _supply_data(data_sink, context):
             "{help}".format(ds=context.name,
                 e=str(e), help=context.help()))
 
-        raise ex.with_traceback(sys.exc_info()[2])
+        six.reraise(ValueError, ex, sys.exc_info()[2])
 
 def _iter_args(iter_dims, cube):
     iter_strides = cube.dim_extent_size(*iter_dims)
diff --git a/montblanc/impl/rime/tensorflow/queue_wrapper.py b/montblanc/impl/rime/tensorflow/queue_wrapper.py
index 80dc4a6c9..d536bfff7 100644
--- a/montblanc/impl/rime/tensorflow/queue_wrapper.py
+++ b/montblanc/impl/rime/tensorflow/queue_wrapper.py
@@ -1,7 +1,7 @@
 import collections
 import itertools
 import sys
-
+import six
 from attrdict import AttrDict
 import tensorflow as tf
 
@@ -14,8 +14,7 @@ def _get_queue_types(fed_arrays, data_sources):
     try:
         return [data_sources[n].dtype for n in fed_arrays]
     except KeyError as e:
-        raise ValueError("Array '{k}' has no data source!"
-            .format(k=e.message)).with_traceback(sys.exc_info()[2])
+        six.reraise(ValueError, ValueError("Array '{k}' has no data source!"), sys.exc_info()[2])
 
 class QueueWrapper(object):
     def __init__(self, name, queue_size, fed_arrays, data_sources, shared_name=None):