diff --git a/.gitattributes b/.gitattributes
index fe756e6d..6b4d6de6 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,4 @@
 extras/* linguist-vendored
+
+# Never modify line endings of our bash scripts
+*.sh -lf
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fd3de313..99e96156 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,19 @@
-
 # Change Log
 All notable changes to COMMIT will be documented in this file.
 
+## [1.7.0] - 2022-06-07
+
+### Added
+- GPU acceleration with CUDA for faster model fitting
+- 'cudaoperator' extension to handle operator in GPU memory
+
+### Fixed
+- Changed end of line from CRLF to LF in several files
+
+### Changed
+- setupy.py: Added custom cython compilation for .cu files with nvcc
+- set_threads(): 'n' parameter was renamed to 'nthreads'
+
 ## [1.6.2] - 2022-05-05
 
 ### Fixed
diff --git a/commit/core.pyx b/commit/core.pyx
index 52f7c0d6..9471fee8 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -421,8 +421,8 @@ cdef class Evaluation :
         self.DICTIONARY['IC']['n']     = self.DICTIONARY['IC']['fiber'].size
         self.DICTIONARY['IC']['nF']    = self.DICTIONARY['TRK']['norm'].size
 
-        # reorder the segments based on the "v" field
-        idx = np.argsort( self.DICTIONARY['IC']['v'], kind='mergesort' )
+        # reorder the segments based, first, on the "v" field and after based on the "o" field
+        idx = np.lexsort( [np.array(self.DICTIONARY['IC']['o']), np.array(self.DICTIONARY['IC']['v'])] )
         self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
         self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
         self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
@@ -452,8 +452,8 @@ cdef class Evaluation :
         self.DICTIONARY['EC']['o']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_o.dict'), dtype=np.uint16 )
         self.DICTIONARY['EC']['nE'] = self.DICTIONARY['EC']['v'].size
 
-        # reorder the segments based on the "v" field
-        idx = np.argsort( self.DICTIONARY['EC']['v'], kind='mergesort' )
+        # reorder the segments based, first, on the "v" field and after based on the "o" field
+        idx = np.lexsort( [np.array(self.DICTIONARY['EC']['o']), np.array(self.DICTIONARY['EC']['v'])] )
         self.DICTIONARY['EC']['v'] = self.DICTIONARY['EC']['v'][ idx ]
         self.DICTIONARY['EC']['o'] = self.DICTIONARY['EC']['o'][ idx ]
         del idx
@@ -504,31 +504,60 @@ cdef class Evaluation :
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
-    def set_threads( self, n=None ) :
+    def set_threads( self, n = None, nthreads = None, gpu_id = 0 ) :
         """Set the number of threads to use for the matrix-vector operations with A and A'.
 
         Parameters
         ----------
         n : integer
-            Number of threads to use (default : number of CPUs in the system)
+            Same as nthreads. This remains just for compatibility with previous versions
+
+        nthreads : integer
+            Number of threads to use (nthreads = None ---> all the CPU threads available in the system
+                                      nthreads = 0    ---> enable CUDA GPU acceleration)
+        gpu_id : integer
+            GPU ID of the Nvidia GPU where COMMIT will be executed, default=0 and it is only required if nthreads=0
+            (To show a list of Nvidia GPUs and their IDs, open a system shell and run the command 'nvidia-smi')
         """
-        if n is None :
-            # Set to the number of CPUs in the system
-            try :
-                import multiprocessing
-                n = multiprocessing.cpu_count()
-            except :
-                n = 1
-
-        if n < 1 or n > 255 :
-            ERROR( 'Number of threads must be between 1 and 255' )
+        if nthreads is None :
+            if n != None :
+                WARNING( '"n" parameter is deprecated, use "nthreads" instead' )
+                nthreads = n
+            else:
+                # Set to the number of CPUs in the system
+                try :
+                    import multiprocessing
+                    nthreads = multiprocessing.cpu_count()
+                except :
+                    nthreads = 1
+
+        if nthreads < 0 or nthreads > 255 :
+            ERROR( 'Number of threads must be between 0 and 255' )
         if self.DICTIONARY is None :
             ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
         if self.KERNELS is None :
             ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
 
         self.THREADS = {}
-        self.THREADS['n'] = n
+        self.THREADS['n'] = nthreads
+        if nthreads == 0:
+            self.THREADS['gpu_id'] = gpu_id
+            LOG( '\n-> Checking CUDA GPU:' )
+
+            from commit.cudaoperator.operator import check_compatibility
+            #cdef unsigned long long required_mem = 28*self.n + 6*self.nzeppelins + 8.0*(size_t)nfibers + 16.0*(size_t)nvoxels + 4.0*((size_t)size_lutic + (size_t)size_lutec + (size_t)size_lutiso + (size_t)this->nrows + (size_t)this->ncols)
+            error_id = check_compatibility(gpu_id)
+            if error_id == 1:
+                ERROR( 'The selected GPU is not detected' )
+            elif error_id == 2:
+                ERROR( 'Impossible to set GPU with ID=%d' % gpu_id )
+            elif error_id == 3:
+                ERROR( 'Impossible to get properties from GPU with ID=%d' % gpu_id )
+            elif error_id == 4:
+                ERROR( 'Compute capability must be at least 5.0' )
+
+            if gpu_id == 0:
+                LOG( '   [ Default GPU selected. Use option "gpu_id" in "set_threads()" to change selection ]' )
 
         cdef :
             long [:] C
@@ -536,120 +565,116 @@ cdef class Evaluation :
             int i
 
         tic = time.time()
-        LOG( '\n-> Distributing workload to different threads:' )
-        print( '\t* Number of threads : %d' % n )
 
-        # Distribute load for the computation of A*x product
-        print( '\t* A operator...  ', end='' )
-        sys.stdout.flush()
+        if nthreads > 0:
+            LOG( '\n-> Distributing workload to different threads:' )
+            print( '\t* number of threads : %d' % nthreads )
 
-        if self.DICTIONARY['IC']['n'] > 0 :
-            self.THREADS['IC'] = np.zeros( n+1, dtype=np.uint32 )
-            if n > 1 :
-                N = np.floor( self.DICTIONARY['IC']['n']/n )
-                t = 1
-                tot = 0
-                C = np.bincount( self.DICTIONARY['IC']['v'] )
-                for c in C :
-                    tot += c
-                    if tot >= N and t <= n :
-                        self.THREADS['IC'][t] = self.THREADS['IC'][t-1] + tot
-                        t += 1
-                        tot = 0
-            self.THREADS['IC'][n] = self.DICTIONARY['IC']['n']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['IC'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the IC compartments to evaluate; try decreasing the number', prefix='\n' )
-        else :
-            self.THREADS['IC'] = None
-
-        if self.DICTIONARY['EC']['nE'] > 0 :
-            self.THREADS['EC'] = np.zeros( n+1, dtype=np.uint32 )
-            for i in xrange(n) :
-                self.THREADS['EC'][i] = np.searchsorted( self.DICTIONARY['EC']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
-            self.THREADS['EC'][n] = self.DICTIONARY['EC']['nE']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['EC'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number', prefix='\n' )
-        else :
-            self.THREADS['EC'] = None
-
-        if self.DICTIONARY['nV'] > 0 :
-            self.THREADS['ISO'] = np.zeros( n+1, dtype=np.uint32 )
-            for i in xrange(n) :
-                self.THREADS['ISO'][i] = np.searchsorted( self.DICTIONARY['ISO']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
-            self.THREADS['ISO'][n] = self.DICTIONARY['nV']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ISO'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number', prefix='\n' )
-        else :
-            self.THREADS['ISO'] = None
+            # Distribute load for the computation of A*x product
+            print( '\t* A  operator... ', end='' )
+            sys.stdout.flush()
 
-        print( '[ OK ]' )
+            self.THREADS['IC']   = None
+            self.THREADS['EC']   = None
+            self.THREADS['ISO']  = None
+            self.THREADS['ICt']  = None
+            self.THREADS['ECt']  = None
+            self.THREADS['ISOt'] = None
 
-        # Distribute load for the computation of At*y product
-        print( '\t* A\' operator... ', end='' )
-        sys.stdout.flush()
+            if self.DICTIONARY['IC']['n'] > 0 :
+                self.THREADS['IC'] = np.zeros( nthreads+1, dtype=np.uint32 )
+                if nthreads > 1 :
+                    N = np.floor( self.DICTIONARY['IC']['n']/nthreads )
+                    t = 1
+                    tot = 0
+                    C = np.bincount( self.DICTIONARY['IC']['v'] )
+                    for c in C :
+                        tot += c
+                        if tot >= N :
+                            self.THREADS['IC'][t] = self.THREADS['IC'][t-1] + tot
+                            t += 1
+                            tot = 0
+                self.THREADS['IC'][nthreads] = self.DICTIONARY['IC']['n']
+
+                # check if some threads are not assigned any segment
+                if np.count_nonzero( np.diff( self.THREADS['IC'].astype(np.int32) ) <= 0 ) :
+                    self.THREADS = None
+                    ERROR( 'Too many threads for the IC compartments to evaluate; try decreasing the number.' )
+
+            if self.DICTIONARY['EC']['nE'] > 0 :
+                self.THREADS['EC'] = np.zeros( nthreads+1, dtype=np.uint32 )
+                for i in xrange(nthreads) :
+                    self.THREADS['EC'][i] = np.searchsorted( self.DICTIONARY['EC']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
+                self.THREADS['EC'][nthreads] = self.DICTIONARY['EC']['nE']
+
+                # check if some threads are not assigned any segment
+                if np.count_nonzero( np.diff( self.THREADS['EC'].astype(np.int32) ) <= 0 ) :
+                    self.THREADS = None
+                    ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number.' )
+
+            if self.DICTIONARY['nV'] > 0 :
+                self.THREADS['ISO'] = np.zeros( nthreads+1, dtype=np.uint32 )
+                for i in xrange(nthreads) :
+                    self.THREADS['ISO'][i] = np.searchsorted( self.DICTIONARY['ISO']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
+                self.THREADS['ISO'][nthreads] = self.DICTIONARY['nV']
+
+                # check if some threads are not assigned any segment
+                if np.count_nonzero( np.diff( self.THREADS['ISO'].astype(np.int32) ) <= 0 ) :
+                    self.THREADS = None
+                    ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number.' )
 
-        if self.DICTIONARY['IC']['n'] > 0 :
-            self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], n-1, dtype=np.uint8 )
-            if n > 1 :
-                idx = np.argsort( self.DICTIONARY['IC']['fiber'], kind='mergesort' )
-                C = np.bincount( self.DICTIONARY['IC']['fiber'] )
-                t = tot = i1 = i2 = 0
-                N = np.floor(self.DICTIONARY['IC']['n']/n)
-                for c in C :
-                    i2 += c
-                    tot += c
-                    if tot >= N :
-                        self.THREADS['ICt'][ i1:i2 ] = t
-                        t += 1
-                        if t==n-1 :
-                            break
-                        i1 = i2
-                        tot = c
-                self.THREADS['ICt'][idx] = self.THREADS['ICt'].copy()
+            print( '[ OK ]' )
 
-        else :
-            self.THREADS['ICt'] = None
-
-        if self.DICTIONARY['EC']['nE'] > 0 :
-            self.THREADS['ECt'] = np.zeros( n+1, dtype=np.uint32 )
-            N = np.floor( self.DICTIONARY['EC']['nE']/n )
-            for i in xrange(1,n) :
-                self.THREADS['ECt'][i] = self.THREADS['ECt'][i-1] + N
-            self.THREADS['ECt'][n] = self.DICTIONARY['EC']['nE']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ECt'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number', prefix='\n' )
-        else :
-            self.THREADS['ECt'] = None
-
-        if self.DICTIONARY['nV'] > 0 :
-            self.THREADS['ISOt'] = np.zeros( n+1, dtype=np.uint32 )
-            N = np.floor( self.DICTIONARY['nV']/n )
-            for i in xrange(1,n) :
-                self.THREADS['ISOt'][i] = self.THREADS['ISOt'][i-1] + N
-            self.THREADS['ISOt'][n] = self.DICTIONARY['nV']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ISOt'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number', prefix='\n' )
-        else :
-            self.THREADS['ISOt'] = None
+            # Distribute load for the computation of At*y product
+            print( '\t* A\' operator... ', end="" )
+            sys.stdout.flush()
 
-        print( '[ OK ]' )
+            if self.DICTIONARY['IC']['n'] > 0 :
+                self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], nthreads-1, dtype=np.uint8 )
+                if nthreads > 1 :
+                    idx = np.argsort( self.DICTIONARY['IC']['fiber'], kind='mergesort' )
+                    C = np.bincount( self.DICTIONARY['IC']['fiber'] )
+                    t = tot = i1 = i2 = 0
+                    N = np.floor(self.DICTIONARY['IC']['n']/nthreads)
+                    for c in C :
+                        i2 += c
+                        tot += c
+                        if tot >= N :
+                            self.THREADS['ICt'][ i1:i2 ] = t
+                            t += 1
+                            if t==nthreads-1 :
+                                break
+                            i1 = i2
+                            tot = c
+                    self.THREADS['ICt'][idx] = self.THREADS['ICt'].copy()
+
+            if self.DICTIONARY['EC']['nE'] > 0 :
+                self.THREADS['ECt'] = np.zeros( nthreads+1, dtype=np.uint32 )
+                N = np.floor( self.DICTIONARY['EC']['nE']/nthreads )
+                for i in xrange(1,nthreads) :
+                    self.THREADS['ECt'][i] = self.THREADS['ECt'][i-1] + N
+                self.THREADS['ECt'][nthreads] = self.DICTIONARY['EC']['nE']
+
+                # check if some threads are not assigned any segment
+                if np.count_nonzero( np.diff( self.THREADS['ECt'].astype(np.int32) ) <= 0 ) :
+                    self.THREADS = None
+                    ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number.' )
+
+            if self.DICTIONARY['nV'] > 0 :
+                self.THREADS['ISOt'] = np.zeros( nthreads+1, dtype=np.uint32 )
+                N = np.floor( self.DICTIONARY['nV']/nthreads )
+                for i in xrange(1,nthreads) :
+                    self.THREADS['ISOt'][i] = self.THREADS['ISOt'][i-1] + N
+                self.THREADS['ISOt'][nthreads] = self.DICTIONARY['nV']
+
+                # check if some threads are not assigned any segment
+                if np.count_nonzero( np.diff( self.THREADS['ISOt'].astype(np.int32) ) <= 0 ) :
+                    self.THREADS = None
+                    ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number.' )
 
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+            print( '[ OK ]' )
+
+            LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
     def build_operator( self, build_dir=None ) :
@@ -681,49 +706,53 @@ cdef class Evaluation :
         tic = time.time()
         LOG( '\n-> Building linear operator A:' )
 
-        # need to pass these parameters at runtime for compiling the C code
-        from commit.operator import config
-
-        compilation_is_needed = False
-
-        if config.nTHREADS is None or config.nTHREADS != self.THREADS['n']:
-            compilation_is_needed = True
-        if config.nIC is None or config.nIC != self.KERNELS['wmr'].shape[0]:
-            compilation_is_needed = True
-        if config.model is None or config.model != self.model.id:
-            compilation_is_needed = True
-        if config.nEC is None or config.nEC != self.KERNELS['wmh'].shape[0]:
-            compilation_is_needed = True
-        if config.nISO is None or config.nISO != self.KERNELS['iso'].shape[0]:
-            compilation_is_needed = True
-        if config.build_dir != build_dir:
-            compilation_is_needed = True
-
-        if compilation_is_needed or not 'commit.operator.operator' in sys.modules :
-
-            if build_dir is not None:
-                if isdir(build_dir) and not len(listdir(build_dir)) == 0:
-                    ERROR( '\nbuild_dir is not empty, unsafe build option.' )
-                elif config.nTHREADS is not None:
-                    ERROR( '\nThe parameter build_dir has changed, unsafe build option.' )
-                else:
-                    WARNING( '\nUsing build_dir, always quit your python console between COMMIT Evaluation.' )
-
-            config.nTHREADS   = self.THREADS['n']
-            config.model      = self.model.id
-            config.nIC        = self.KERNELS['wmr'].shape[0]
-            config.nEC        = self.KERNELS['wmh'].shape[0]
-            config.nISO       = self.KERNELS['iso'].shape[0]
-            config.build_dir  = build_dir
-
-            pyximport.install( reload_support=True, language_level=3, build_dir=build_dir, build_in_temp=True, inplace=False )
-
-            if not 'commit.operator.operator' in sys.modules :
-                import commit.operator.operator
-            else :
-                reload( sys.modules['commit.operator.operator'] )
-
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        if self.THREADS['n'] > 0:
+            # need to pass these parameters at runtime for compiling the C code
+            from commit.operator import config
+
+            compilation_is_needed = False
+            
+            if config.nTHREADS is None or config.nTHREADS != self.THREADS['n']:
+                compilation_is_needed = True
+            if config.nIC is None or config.nIC != self.KERNELS['wmr'].shape[0]:
+                compilation_is_needed = True
+            if config.model is None or config.model != self.model.id:
+                compilation_is_needed = True        
+            if config.nEC is None or config.nEC != self.KERNELS['wmh'].shape[0]:
+                compilation_is_needed = True                
+            if config.nISO is None or config.nISO != self.KERNELS['iso'].shape[0]:
+                compilation_is_needed = True        
+            if config.build_dir != build_dir:
+                compilation_is_needed = True        
+
+            if compilation_is_needed or not 'commit.operator.operator' in sys.modules :       
+
+                if build_dir is not None:
+                    if isdir(build_dir) and not len(listdir(build_dir)) == 0:
+                        ERROR( '\nbuild_dir is not empty, unsafe build option.' )
+                    elif config.nTHREADS is not None:
+                        ERROR( '\nThe parameter build_dir has changed, unsafe build option.' )
+                    else:
+                        WARNING( '\nUsing build_dir, always quit your python console between COMMIT Evaluation.' )
+
+                config.nTHREADS   = self.THREADS['n']
+                config.model      = self.model.id
+                config.nIC        = self.KERNELS['wmr'].shape[0]
+                config.nEC        = self.KERNELS['wmh'].shape[0]
+                config.nISO       = self.KERNELS['iso'].shape[0]
+                config.build_dir  = build_dir
+
+                pyximport.install( reload_support=True, language_level=3, build_dir=build_dir, build_in_temp=True, inplace=False )
+
+                if not 'commit.operator.operator' in sys.modules :
+                    import commit.operator.operator
+                else :
+                    reload( sys.modules['commit.operator.operator'] )
+                
+            self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )        
+        else:
+            import commit.cudaoperator.operator
+            self.A = commit.cudaoperator.operator.CudaLinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, fcall=1 )
 
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
diff --git a/commit/cudaoperator/operator.pyx b/commit/cudaoperator/operator.pyx
new file mode 100644
index 00000000..027bf484
--- /dev/null
+++ b/commit/cudaoperator/operator.pyx
@@ -0,0 +1,224 @@
+#!python
+#cython: language_level=3, boundscheck=False, wraparound=False, profile=False
+
+import cython
+import numpy as np
+cimport numpy as np
+from amico.util import ERROR, LOG
+
+cdef extern from "operator_withCUDA.cuh":
+    int checkCompatibility(int)
+
+def check_compatibility(gpu_id):
+    return checkCompatibility(gpu_id)
+
+def check_cuda(error_id):
+    if error_id == -1:
+        ERROR( 'Impossible to allocate auxiliar memory in CPU' )
+    elif error_id == 1:
+        ERROR( 'Impossible to allocate memory in GPU' )
+    elif error_id == 2:
+        ERROR( 'Impossible to transfer memory to GPU' )
+    elif error_id == 3:
+        ERROR( 'Impossible to bind textures' )
+    elif error_id == 4:
+        ERROR( 'Impossible to transfer constant values to GPU' )
+    elif error_id == 5:
+        ERROR( 'There was a problem deleting GPU memory' )
+    elif error_id == 6:
+        ERROR( 'There was a problem unbinding texture memory' )
+    elif error_id == 7:
+        ERROR( 'There was a problem resetting GPU' )
+    elif error_id == 0:
+        print( '[ OK ]' )
+
+cdef extern from "operator_withCUDA.cuh":
+    cdef cppclass C_CudaLinearOperator "CudaLinearOperator":
+        C_CudaLinearOperator(int, int, int, int, int, int, int, int, int)
+
+        int setDictionary(np.uint32_t*, np.uint32_t*, np.uint16_t*, np.float32_t*,  np.uint32_t*, np.uint16_t*)
+        int setTransposeDictionary(np.uint32_t*, np.uint32_t*, np.uint16_t*, np.float32_t*)
+        int setKernels(np.float32_t*, np.float32_t*, np.float32_t*)
+        int setVectors()
+        int setGlobals()
+        int destroy()
+
+        void  dot(np.float64_t*, np.float64_t*)
+        void Tdot(np.float64_t*, np.float64_t*)
+
+cdef class CudaLinearOperator :
+    """This class is a wrapper to the CUDA C++ code for performing marix-vector multiplications
+    with the COMMIT linear operator A in a CUDA GPU. The multiplications are done using CUDA C++ code
+    that uses information from the DICTIONARY and KERNELS data structures.
+    """
+    cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs, gpu_id
+    cdef public int adjoint, n1, n2
+
+    cdef DICTIONARY
+    cdef KERNELS
+    cdef THREADS
+
+    cdef unsigned int*   ICf
+    cdef float*          ICl
+    cdef unsigned int*   ICv
+    cdef unsigned short* ICo
+    cdef unsigned int*   ECv
+    cdef unsigned short* ECo
+    cdef unsigned int*   ISOv
+
+    cdef float* LUT_IC
+    cdef float* LUT_EC
+    cdef float* LUT_ISO
+
+    # pointer to this operator in GPU memory
+    cdef C_CudaLinearOperator* thisptr
+
+    # these should be always None, they remain for compatibility
+    cdef unsigned int*   ICthreads
+    cdef unsigned int*   ECthreads
+    cdef unsigned int*   ISOthreads
+    cdef unsigned char*  ICthreadsT
+    cdef unsigned int*   ECthreadsT
+    cdef unsigned int*   ISOthreadsT
+
+
+    def __init__( self, DICTIONARY, KERNELS, THREADS, fcall = 0 ) :
+        """Set the pointers to the data structures used by the C code."""
+        self.DICTIONARY = DICTIONARY
+        self.KERNELS    = KERNELS
+        self.THREADS    = THREADS
+
+        self.nF         = DICTIONARY['IC']['nF']    # number of FIBERS
+        self.nR         = KERNELS['wmr'].shape[0]   # number of FIBER RADII
+        self.nE         = DICTIONARY['EC']['nE']    # number of EC segments
+        self.nT         = KERNELS['wmh'].shape[0]   # number of EC TORTUOSITY values
+        self.nV         = DICTIONARY['nV']          # number of VOXELS
+        self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
+        self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
+        self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
+        self.gpu_id     = THREADS['gpu_id']          # id of the CUDA GPU
+
+        if KERNELS['wmr'].size > 0 :
+            self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
+        elif KERNELS['wmh'].size > 0 :
+            self.nS = KERNELS['wmh'].shape[2]
+        else :
+            self.nS = KERNELS['wmr'].shape[1]
+
+        self.adjoint = 0                            # direct of inverse product
+
+        self.n1 = self.nV*self.nS
+        self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
+
+        # get C pointers to arrays in DICTIONARY
+        cdef unsigned int [::1]   ICf  = DICTIONARY['IC']['fiber']
+        self.ICf = &ICf[0]
+        cdef float [::1]          ICl  = DICTIONARY['IC']['len']
+        self.ICl = &ICl[0]
+        cdef unsigned int [::1]   ICv  = DICTIONARY['IC']['v']
+        self.ICv = &ICv[0]
+        cdef unsigned short [::1] ICo  = DICTIONARY['IC']['o']
+        self.ICo = &ICo[0]
+        cdef unsigned int [::1]   ECv  = DICTIONARY['EC']['v']
+        self.ECv = &ECv[0]
+        cdef unsigned short [::1] ECo  = DICTIONARY['EC']['o']
+        self.ECo = &ECo[0]
+        cdef unsigned int [::1]   ISOv = DICTIONARY['ISO']['v']
+        self.ISOv = &ISOv[0]
+
+        # get C pointers to arrays in KERNELS
+        cdef float [:, :, ::1] wmrSFP = KERNELS['wmr']
+        self.LUT_IC  = &wmrSFP[0,0,0]
+        cdef float [:, :, ::1] wmhSFP = KERNELS['wmh']
+        self.LUT_EC  = &wmhSFP[0,0,0]
+        cdef float [:, ::1] isoSFP = KERNELS['iso']
+        self.LUT_ISO = &isoSFP[0,0]
+
+        # create the operator in GPU memory
+        self.thisptr = new C_CudaLinearOperator(self.n, self.nV, self.nF, self.nE, self.ndirs, self.nS, self.nR, self.nT, self.nI)
+
+        # build operator in GPU only one time
+        if fcall == 1:
+            print( '\t* global values... ', end='' )
+            check_cuda( self.thisptr.setGlobals() )
+
+            print( '\t* lookup tables... ', end='' )
+            check_cuda( self.thisptr.setKernels(&wmrSFP[0,0,0], &wmhSFP[0,0,0], &isoSFP[0,0]) )
+
+            print( '\t* x&y vectors...   ', end='' )
+            check_cuda( self.thisptr.setVectors() )
+        
+            print( '\t* A  operator...   ', end='' )
+            check_cuda( self.thisptr.setDictionary(&ICv[0],&ICf[0],&ICo[0],&ICl[0], &ECv[0],&ECo[0]) )
+
+            idx = np.lexsort( [np.array(self.DICTIONARY['IC']['o']), np.array(self.DICTIONARY['IC']['fiber'])] )
+
+            self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
+            self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
+            self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
+            self.DICTIONARY['IC']['len']   = self.DICTIONARY['IC']['len'][ idx ]
+
+            ICf = self.DICTIONARY['IC']['fiber']
+            ICl = self.DICTIONARY['IC']['len']
+            ICv = self.DICTIONARY['IC']['v']
+            ICo = self.DICTIONARY['IC']['o']
+
+            self.ICf = &ICf[0]
+            self.ICl = &ICl[0]
+            self.ICv = &ICv[0]
+            self.ICo = &ICo[0]
+
+            print( '\t* A\' operator...   ', end='' )
+            check_cuda( self.thisptr.setTransposeDictionary(&self.ICv[0], &self.ICf[0], &self.ICo[0], &self.ICl[0]) )
+
+    def __del__( self ):
+        self.thisptr.destroy()
+
+    @property
+    def T( self ) :
+        """Transpose of the explicit matrix."""
+        C = CudaLinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        C.adjoint = 1 - C.adjoint
+        return C
+
+    @property
+    def shape( self ) :
+        """Size of the explicit matrix."""
+        if not self.adjoint :
+            return ( self.n1, self.n2 )
+        else :
+            return ( self.n2, self.n1 )
+
+
+    def dot( self, double [::1] v_in  ):
+        """Wrapper to C code for efficiently performing the matrix-vector multiplications.
+
+        Parameters
+        ----------
+        v_in : 1D numpy.array of double
+            Input vector for the matrix-vector multiplication
+
+        Returns
+        -------
+        v_out : 1D numpy.array of double
+            Results of the multiplication
+        """
+
+        # Permit only matrix-vector multiplications
+        if v_in.size != self.shape[1] :
+            ERROR( "A.dot(): dimensions do not match" )
+
+        # Create output array
+        cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
+
+        # Call the cython function to read the memory pointers
+        if not self.adjoint :
+            # DIRECT PRODUCT A*x
+            self.thisptr.dot(&v_in[0], &v_out[0])
+        else :
+            # INVERSE PRODUCT A'*y
+            self.thisptr.Tdot(&v_in[0], &v_out[0])
+
+        return v_out
+
+
diff --git a/commit/cudaoperator/operator_withCUDA.cu b/commit/cudaoperator/operator_withCUDA.cu
new file mode 100644
index 00000000..0156913c
--- /dev/null
+++ b/commit/cudaoperator/operator_withCUDA.cu
@@ -0,0 +1,653 @@
+#include "operator_withCUDA.cuh"
+
+// ====================================================
+// Textures for LUT in the GPU memory
+// ====================================================
+texture<float32_t, 1, cudaReadModeElementType> tex_lutIC;
+texture<float32_t, 1, cudaReadModeElementType> tex_lutEC;
+texture<float32_t, 1, cudaReadModeElementType> tex_lutISO;
+
+
+int checkCompatibility(int gpuID) {
+    int gpuCount;
+    cudaError_t cudaStatus;
+    
+    cudaStatus = cudaGetDeviceCount(&gpuCount);
+
+    if (gpuCount <= 0 || gpuID >= gpuCount || cudaStatus != cudaSuccess) return 1;
+
+    cudaStatus = cudaSetDevice(gpuID);
+
+    if (cudaStatus != cudaSuccess) return 2;
+
+    cudaDeviceProp gpuProperties;
+    cudaStatus = cudaGetDeviceProperties(&gpuProperties, gpuID);
+
+    if (cudaStatus != cudaSuccess) return 3;
+
+    printf("\t* selected GPU...       [ %s ]\n",     gpuProperties.name);
+    printf("\t* total memory...       [ %.2fGB ]\n", gpuProperties.totalGlobalMem*1e-9);
+    printf("\t* compute capability... [ %d.%d ]\n",  gpuProperties.major, gpuProperties.minor);
+
+    if(gpuProperties.major < 5) return 4;
+
+    return 0;
+}
+
+void cudaCheckLastError()
+{
+    cudaError_t err = cudaGetLastError();
+
+    if(err != cudaSuccess){
+        printf("CUDA Error: %s\n", cudaGetErrorString(err));
+        exit(-1);
+    }
+}
+
+void preprocessDataForGPU(uint32_t* data, int NUM_COMPARTMENTS, uint32_t* compartmentsPerBlock, uint32_t* offsetPerBlock, int NUM_BLOCKS){
+
+    // fill arrays with zeros
+    memset(compartmentsPerBlock, 0, NUM_BLOCKS * sizeof(uint32_t));
+    memset(offsetPerBlock,       0, NUM_BLOCKS * sizeof(uint32_t));
+
+    // count compartments per block
+    for(int i = 0; i < NUM_COMPARTMENTS; i++)
+        compartmentsPerBlock[data[i]]++;
+
+    // calculate offset per block
+    offsetPerBlock[0] = 0;
+    for(int i = 1; i < NUM_BLOCKS; i++)
+        offsetPerBlock[i] = offsetPerBlock[i-1] + compartmentsPerBlock[i-1];
+}
+
+int CudaLinearOperator::setDictionary(uint32_t* voxelIC, uint32_t* fiberIC, uint16_t* orienIC, float32_t* lengthIC, uint32_t* voxelEC, uint16_t* orienEC){
+    
+    cudaError_t cudaStatus;
+
+    uint32_t* segmentsPerBlock = (uint32_t*) malloc(nvoxels*sizeof(uint32_t));
+    uint32_t* offsetPerBlock   = (uint32_t*) malloc(nvoxels*sizeof(uint32_t));
+
+    if (segmentsPerBlock == NULL || offsetPerBlock == NULL) return -1;
+
+    preprocessDataForGPU(voxelIC, nsegments, segmentsPerBlock, offsetPerBlock, nvoxels);
+
+    cudaStatus = cudaMalloc((void**)&gpu_segmentsPerBlockIC, nvoxels*sizeof(uint32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_offsetPerBlockIC,   nvoxels*sizeof(uint32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+
+    cudaStatus = cudaMemcpy(gpu_segmentsPerBlockIC, segmentsPerBlock, nvoxels*sizeof(uint32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_offsetPerBlockIC,   offsetPerBlock,   nvoxels*sizeof(uint32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+
+    if (npeaks > 0){
+        preprocessDataForGPU(voxelEC, npeaks, segmentsPerBlock, offsetPerBlock, nvoxels);
+
+        cudaStatus = cudaMalloc((void**)&gpu_segmentsPerBlockEC, nvoxels*sizeof(uint32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+        cudaStatus = cudaMalloc((void**)&gpu_offsetPerBlockEC,   nvoxels*sizeof(uint32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+
+        cudaStatus = cudaMemcpy(gpu_segmentsPerBlockEC, segmentsPerBlock, nvoxels*sizeof(uint32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+        cudaStatus = cudaMemcpy(gpu_offsetPerBlockEC,   offsetPerBlock,   nvoxels*sizeof(uint32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+    }
+
+    free(segmentsPerBlock);
+    free(offsetPerBlock);
+
+    // alloc IC part of the dictionary in GPU
+    cudaStatus = cudaMalloc((void**)&gpu_voxelIC,  nsegments*sizeof(uint32_t)); 
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_fiberIC,  nsegments*sizeof(uint32_t)); 
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_orienIC,  nsegments*sizeof(uint16_t)); 
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_lengthIC, nsegments*sizeof(float32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+
+    // transfer IC part of the dictionary to GPU
+    cudaStatus = cudaMemcpy(gpu_voxelIC,  voxelIC,  nsegments*sizeof(uint32_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_fiberIC,  fiberIC,  nsegments*sizeof(uint32_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_orienIC,  orienIC,  nsegments*sizeof(uint16_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_lengthIC, lengthIC, nsegments*sizeof(float32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+
+    if (npeaks > 0){
+        // alloc EC part of the dictionary in GPU
+        cudaStatus = cudaMalloc((void**)&gpu_voxelEC,  npeaks*sizeof(uint32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+        cudaStatus = cudaMalloc((void**)&gpu_orienEC,  npeaks*sizeof(uint16_t));
+        if (cudaStatus != cudaSuccess) return 1;
+
+        // transfer EC part of the dictionary to GPU
+        cudaStatus = cudaMemcpy(gpu_voxelEC,  voxelEC,  npeaks*sizeof(uint32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+        cudaStatus = cudaMemcpy(gpu_orienEC,  orienEC,  npeaks*sizeof(uint16_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+    }
+
+    return 0;
+}
+
+int CudaLinearOperator::setTransposeDictionary(uint32_t* TvoxelIC, uint32_t* TfiberIC, uint16_t* TorienIC, float32_t* TlengthIC){
+    
+    cudaError_t cudaStatus;
+
+    uint32_t*  fibersPerBlock = (uint32_t*) malloc(nfibers*sizeof(uint32_t));
+    uint32_t*  offsetPerBlock = (uint32_t*) malloc(nfibers*sizeof(uint32_t));
+    if(fibersPerBlock == NULL || offsetPerBlock == NULL) return -1;
+
+    preprocessDataForGPU(TfiberIC, nsegments, fibersPerBlock, offsetPerBlock, nfibers);
+
+    cudaStatus = cudaMalloc((void**)&gpu_TfibersPerBlockIC, nfibers*sizeof(uint32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_ToffsetPerBlockIC, nfibers*sizeof(uint32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+
+    cudaStatus = cudaMemcpy(gpu_TfibersPerBlockIC, fibersPerBlock, nfibers*sizeof(uint32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_ToffsetPerBlockIC, offsetPerBlock, nfibers*sizeof(uint32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+
+    free(fibersPerBlock);
+    free(offsetPerBlock);
+
+    cudaStatus = cudaMalloc((void**)&gpu_TvoxelIC,  nsegments*sizeof(uint32_t)) ;
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_TfiberIC,  nsegments*sizeof(uint32_t)) ;
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_TorienIC,  nsegments*sizeof(uint16_t)) ;
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_TlengthIC, nsegments*sizeof(float32_t));
+    if (cudaStatus != cudaSuccess) return 1;
+
+    cudaStatus = cudaMemcpy(gpu_TvoxelIC,  TvoxelIC,  nsegments*sizeof(uint32_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_TfiberIC,  TfiberIC,  nsegments*sizeof(uint32_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_TorienIC,  TorienIC,  nsegments*sizeof(uint16_t),  cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    cudaStatus = cudaMemcpy(gpu_TlengthIC, TlengthIC, nsegments*sizeof(float32_t), cudaMemcpyHostToDevice);
+    if (cudaStatus != cudaSuccess) return 2;
+    
+    return 0;
+}
+
+int CudaLinearOperator::setKernels(float32_t* lutIC, float32_t* lutEC, float32_t* lutISO){
+
+    cudaError_t cudaStatus;
+
+    if (ndiameters > 0){
+        cudaStatus = cudaMalloc((void**)&gpu_lutIC, size_lutic*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+        cudaStatus = cudaMemcpy(gpu_lutIC, lutIC, size_lutic*sizeof(float32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+
+        tex_lutIC.addressMode[0] = cudaAddressModeBorder;
+        tex_lutIC.addressMode[1] = cudaAddressModeBorder;
+        tex_lutIC.filterMode = cudaFilterModePoint;
+        tex_lutIC.normalized = false;
+
+        cudaStatus = cudaBindTexture(NULL, tex_lutIC,  gpu_lutIC,  size_lutic*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 3;
+    }
+
+    if (nzeppelins > 0){
+        cudaStatus = cudaMalloc((void**)&gpu_lutEC,  size_lutec*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+        cudaStatus = cudaMemcpy(gpu_lutEC, lutEC, size_lutec*sizeof(float32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+
+        tex_lutEC.addressMode[0] = cudaAddressModeBorder;
+        tex_lutEC.addressMode[1] = cudaAddressModeBorder;
+        tex_lutEC.filterMode = cudaFilterModePoint;
+        tex_lutEC.normalized = false;
+
+        cudaStatus = cudaBindTexture(NULL, tex_lutEC,  gpu_lutEC,  size_lutec*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 3;
+    }
+
+    if (nballs > 0){
+        cudaStatus = cudaMalloc((void**)&gpu_lutISO, size_lutiso*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 1;
+        cudaStatus = cudaMemcpy(gpu_lutISO, lutISO, size_lutiso*sizeof(float32_t), cudaMemcpyHostToDevice);
+        if (cudaStatus != cudaSuccess) return 2;
+
+        tex_lutISO.addressMode[0] = cudaAddressModeBorder;
+        tex_lutISO.addressMode[1] = cudaAddressModeBorder;
+        tex_lutISO.filterMode = cudaFilterModePoint;
+        tex_lutISO.normalized = false;
+
+        cudaStatus = cudaBindTexture(NULL, tex_lutISO, gpu_lutISO, size_lutiso*sizeof(float32_t));
+        if (cudaStatus != cudaSuccess) return 3;
+    }
+
+    return 0;
+}
+
+int CudaLinearOperator::setVectors(){
+    
+    cudaError_t cudaStatus;
+
+    cudaStatus = cudaMalloc((void**)&gpu_x, ncols*sizeof(float64_t));
+    if (cudaStatus != cudaSuccess) return 1;
+    cudaStatus = cudaMalloc((void**)&gpu_y, nrows*sizeof(float64_t));
+    if (cudaStatus != cudaSuccess) return 1;
+    
+    return 0;
+}
+
+int CudaLinearOperator::setGlobals(){
+    
+    cudaError_t cudaStatus;
+
+    cudaStatus = cudaMemcpyToSymbol(NUM_VOXELS,       &nvoxels,       sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_FIBERS,       &nfibers,       sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_PEAKS,        &npeaks,        sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_ORIENTATIONS, &norientations, sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_SAMPLES,      &nsamples,      sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_DIAMETERS,    &ndiameters,    sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_ZEPPELINS,    &nzeppelins,    sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_BALLS,        &nballs,        sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_ROWS,         &nrows,         sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(NUM_COLS,         &ncols,         sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(SIZE_LUTIC,       &size_lutic,    sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(SIZE_LUTEC,       &size_lutec,    sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    cudaStatus = cudaMemcpyToSymbol(SIZE_LUTISO,      &size_lutiso,   sizeof(int));
+    if (cudaStatus != cudaSuccess) return -1;
+    
+    return 0;
+}
+
+CudaLinearOperator::CudaLinearOperator(int nsegments, int nvoxels, int nfibers, int npeaks, int norientations, int nsamples, int ndiameters, int nzeppelins, int nballs){
+
+    this->nsegments = nsegments;
+    this->nvoxels = nvoxels;
+    this->nfibers = nfibers;
+    this->npeaks = npeaks;
+    this->norientations = norientations;
+    this->nsamples = nsamples;
+    this->ndiameters = ndiameters;
+    this->nzeppelins = nzeppelins;   
+    this->nballs = nballs;
+    this->size_lutic = ndiameters*norientations*nsamples;
+    this->size_lutec = nzeppelins*norientations*nsamples;
+    this->size_lutiso = nballs*nsamples;
+    this->nrows = nvoxels*nsamples;
+    this->ncols = nfibers*ndiameters + npeaks*nzeppelins + nvoxels*nballs;
+}
+
+CudaLinearOperator::~CudaLinearOperator() {}
+
+int CudaLinearOperator::destroy(){
+    cudaError_t cudaStatus;    
+
+    cudaStatus = cudaFree(gpu_voxelIC);
+    cudaStatus = cudaFree(gpu_fiberIC);
+    cudaStatus = cudaFree(gpu_orienIC);
+    cudaStatus = cudaFree(gpu_lengthIC);
+    cudaStatus = cudaFree(gpu_voxelEC);
+    cudaStatus = cudaFree(gpu_orienEC);
+    cudaStatus = cudaFree(gpu_segmentsPerBlockIC);
+    cudaStatus = cudaFree(gpu_offsetPerBlockIC);
+    cudaStatus = cudaFree(gpu_segmentsPerBlockEC);
+    cudaStatus = cudaFree(gpu_offsetPerBlockEC);
+
+    cudaStatus = cudaFree(gpu_TvoxelIC);
+    cudaStatus = cudaFree(gpu_TfiberIC);
+    cudaStatus = cudaFree(gpu_TorienIC);
+    cudaStatus = cudaFree(gpu_TlengthIC);
+    cudaStatus = cudaFree(gpu_TfibersPerBlockIC);
+    cudaStatus = cudaFree(gpu_ToffsetPerBlockIC);
+
+    cudaStatus = cudaFree(gpu_x);
+    cudaStatus = cudaFree(gpu_y);
+
+    cudaStatus = cudaFree(gpu_lutIC);
+    cudaStatus = cudaFree(gpu_lutEC);
+    cudaStatus = cudaFree(gpu_lutISO);
+    cudaStatus = cudaUnbindTexture(tex_lutIC);
+    cudaStatus = cudaUnbindTexture(tex_lutEC);
+    cudaStatus = cudaUnbindTexture(tex_lutISO);
+
+    cudaStatus = cudaDeviceReset();
+
+    return 0;
+}
+
+void cudaCheckKernel(){
+    cudaError_t cudaStatus;
+    
+    cudaStatus = cudaGetLastError();
+	if(cudaStatus != cudaSuccess)
+        fprintf(stderr, "\t* kernel launch... [ ERROR ]: %s\n\n", cudaGetErrorString(cudaStatus));
+    else
+        printf("\t* kernel launch... [ OK ]\n");
+
+    cudaStatus = cudaDeviceSynchronize();
+	if(cudaStatus != cudaSuccess)
+        fprintf(stderr, "\t* cudaDeviceSynchronize() after launching kernel... [ ERROR ]: %d\n", cudaStatus);
+    else
+        printf("\t* cudaDeviceSynchronize() after launching kernel... [ OK ]\n");
+}
+
+void CudaLinearOperator::dot(float64_t* v_in, float64_t* v_out){
+    
+    // Copy vector x to the GPU
+    cudaMemcpy(gpu_x, v_in, ncols*sizeof(float64_t), cudaMemcpyHostToDevice);
+    //cudaCheckLastError();
+
+    // Multiply IC part in the GPU
+    multiply_Ax_ICpart<<<nvoxels, 1024>>>(gpu_voxelIC, gpu_fiberIC, gpu_orienIC, gpu_lengthIC, gpu_segmentsPerBlockIC, gpu_offsetPerBlockIC, gpu_lutIC, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Multiply EC part in the GPU
+    multiply_Ax_ECpart<<<nvoxels, 512>>>(gpu_voxelEC, gpu_orienEC, gpu_segmentsPerBlockEC, gpu_offsetPerBlockEC, gpu_lutEC, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Multiply ISO part in the GPU
+    multiply_Ax_ISOpart<<<nvoxels, 512>>>(gpu_lutISO, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Copy back result to CPU
+    cudaMemcpy(v_out, gpu_y, nrows*sizeof(float64_t), cudaMemcpyDeviceToHost);
+    //cudaCheckLastError();
+}
+
+void CudaLinearOperator::Tdot(float64_t* v_in, float64_t* v_out){
+    
+    // Copy vector y to the GPU
+    cudaMemcpy(gpu_y, v_in, nrows*sizeof(float64_t), cudaMemcpyHostToDevice);
+    //cudaCheckLastError();
+
+    // Multiply IC part in the GPU
+    multiply_Aty_ICpart<<<nfibers, 512>>>(gpu_TvoxelIC, gpu_TfiberIC, gpu_TorienIC, gpu_TlengthIC, gpu_TfibersPerBlockIC, gpu_ToffsetPerBlockIC, gpu_lutIC, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Multiply EC part in the GPU
+    multiply_Aty_ECpart<<<nvoxels, 512>>>(gpu_voxelEC, gpu_orienEC, gpu_segmentsPerBlockEC, gpu_offsetPerBlockEC, gpu_lutEC, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Multiply ISO part in the GPU
+    multiply_Aty_ISOpart<<<nvoxels, 512>>>(gpu_lutISO, gpu_x, gpu_y);
+    //cudaCheckLastError();
+
+    // Copy back result to CPU
+    cudaMemcpy(v_out, gpu_x, ncols*sizeof(float64_t), cudaMemcpyDeviceToHost);
+    //cudaCheckLastError();
+}
+
+// ============================================================================================================================================================
+// Function Kernels that are called from CPU and executed in GPU
+// ============================================================================================================================================================
+__global__ void multiply_Ax_ICpart(uint32_t*  voxelIDs,
+                                   uint32_t*  fiberIDs,
+                                   uint16_t*  orienIDs,
+                                   float32_t* lengths,
+                                   uint32_t*  segmentsPerBlock,
+                                   uint32_t*  offsetPerBlock,
+                                   float32_t* lut,
+                                   float64_t* x,
+                                   float64_t* y)
+{
+    __shared__ float64_t shmem[1024];
+
+    uint32_t bid = blockIdx.x;
+    uint32_t tid = threadIdx.x;
+    uint32_t gid = threadIdx.x / 512;
+    uint32_t sid = threadIdx.x - 512*gid;
+
+    shmem[tid] = 0.0;
+
+    if(sid >= NUM_SAMPLES) return;
+
+    uint32_t offset = offsetPerBlock[bid] + (segmentsPerBlock[bid]/2)*gid;
+    uint32_t nsegments = segmentsPerBlock[bid]/2 + (segmentsPerBlock[bid]%2)*gid;
+
+    uint32_t*  voxel  = voxelIDs + offset;
+    uint32_t*  fiber  = fiberIDs + offset;
+    uint16_t*  orien  = orienIDs + offset;
+    float32_t* length = lengths  + offset;
+
+    float64_t sum = 0.0;
+
+    for(int i = 0; i < nsegments; i++){
+        int offset_lut = (*orien)*NUM_SAMPLES + sid;
+
+        float64_t aux = 0.0;
+        for(int j = 0; j < NUM_DIAMETERS; j++){
+            aux += (double)(lut[offset_lut + j*NUM_ORIENTATIONS*NUM_SAMPLES])*x[(*fiber) + j*NUM_FIBERS];
+            //aux += tex1Dfetch(tex_lutIC, offset_lut + j*NUM_ORIENTATIONS*NUM_SAMPLES) * x[(*fiber) + j*NUM_FIBERS];
+        }
+
+        sum += aux * (*length);
+
+        fiber++;
+        orien++;
+        length++;
+    }
+
+    shmem[tid] = sum;
+    __syncthreads();
+
+    if(tid < NUM_SAMPLES)
+        y[(*voxel)*NUM_SAMPLES + sid] = sum + shmem[tid+512];
+}
+
+__global__ void multiply_Ax_ECpart(
+    uint32_t*  voxelIDs,
+    uint16_t*  orienIDs,
+    uint32_t*  segmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y)
+{
+    uint32_t bid = blockIdx.x;
+    uint32_t tid = threadIdx.x;
+
+    if(tid >= NUM_SAMPLES) return;
+
+    uint32_t offset  = offsetPerBlock[bid];
+    uint32_t nsegments = segmentsPerBlock[bid];
+
+    uint32_t* voxel = voxelIDs + offset;
+    uint16_t* orien = orienIDs + offset;
+
+    uint32_t target = NUM_FIBERS*NUM_DIAMETERS + offset;
+
+    float64_t sum = 0.0;
+    for(int i = 0; i < nsegments; i++){
+        uint32_t offset_lut = (*orien)*NUM_SAMPLES + tid;
+
+        for(int j = 0; j < NUM_ZEPPELINS; j++)
+            sum += (double)(lut[offset_lut + j*NUM_ORIENTATIONS*NUM_SAMPLES])*x[target + j*NUM_PEAKS + i];
+            //sum += tex1Dfetch(tex_lutEC, offset_lut + j*NUM_ORIENTATIONS*NUM_SAMPLES) * x[target + j*NUM_PEAKS + i];
+
+        orien++;
+    }
+
+    y[(*voxel)*NUM_SAMPLES + tid] += sum;
+}
+
+__global__ void multiply_Ax_ISOpart(
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y)
+{
+    uint32_t bid = blockIdx.x;
+    uint32_t tid = threadIdx.x;
+
+    if(tid >= NUM_SAMPLES) return;
+
+    uint32_t target = NUM_FIBERS*NUM_DIAMETERS + NUM_PEAKS*NUM_ZEPPELINS + bid;
+
+    float64_t sum = 0.0;
+    for(int j = 0; j < NUM_BALLS; j++)
+        sum += (double)(lut[j*NUM_SAMPLES + tid])*x[target + j*NUM_VOXELS];
+        //sum += (double)(tex1Dfetch(tex_lutISO, j*NUM_SAMPLES + tid))*x[target + j*NUM_VOXELS];
+        
+
+    y[bid*NUM_SAMPLES + tid] += sum;
+}
+
+__global__ void multiply_Aty_ICpart(
+    uint32_t*  voxelICt,
+    uint32_t*  fiberICt,
+    uint16_t*  orienICt,
+    float32_t* lengthICt,
+    uint32_t*  compartmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y)
+{
+    __shared__ float64_t shmem[512];
+
+    uint32_t bid = blockIdx.x;
+    uint32_t tid = threadIdx.x;
+
+    shmem[tid] = 0.0;
+
+    if(tid >= NUM_SAMPLES) return;
+
+    uint32_t offset = offsetPerBlock[bid];
+    uint32_t nsegments = offset + compartmentsPerBlock[bid];
+
+    uint32_t*  voxel  = voxelICt  + offset;
+    uint32_t*  fiber  = fiberICt  + offset;
+    uint16_t*  orien  = orienICt  + offset;
+    float32_t* length = lengthICt + offset;
+
+    for(int j = 0; j < NUM_DIAMETERS; j++){
+        int offset_lut = j*NUM_ORIENTATIONS*NUM_SAMPLES + tid;
+
+        float64_t sum = 0.0;
+        voxel  = voxelICt  + offset;
+        orien  = orienICt  + offset;
+        length = lengthICt + offset;
+        for(int i = offset; i < nsegments; i++){
+            sum += ((float64_t)(*length)) *( (float64_t) lut[offset_lut + (*orien)*NUM_SAMPLES] )* y[(*voxel)*NUM_SAMPLES + tid];
+            //sum += ((float64_t)(*length)) *( (float64_t) tex1Dfetch(tex_lutIC, offset_lut + (*orien)*NUM_SAMPLES) )* y[(*voxel)*NUM_SAMPLES + tid];
+
+            voxel++;
+            orien++;
+            length++;
+        }
+
+        shmem[tid] = sum;
+        __syncthreads();
+
+        if(tid < 256) shmem[tid] += shmem[tid + 256]; __syncthreads();
+        if(tid < 128) shmem[tid] += shmem[tid + 128]; __syncthreads();
+        if(tid <  64) shmem[tid] += shmem[tid +  64]; __syncthreads();
+        if(tid <  32) shmem[tid] += shmem[tid +  32]; __syncthreads();
+        if(tid <  16) shmem[tid] += shmem[tid +  16]; __syncthreads();
+        if(tid <   8) shmem[tid] += shmem[tid +   8]; __syncthreads();
+        if(tid <   4) shmem[tid] += shmem[tid +   4]; __syncthreads();
+
+        if(tid == 0) x[j*NUM_FIBERS + (*fiber)] = shmem[0] + shmem[1] + shmem[2] + shmem[3];
+
+        __syncthreads();
+    }
+}
+
+__global__ void multiply_Aty_ECpart(
+    uint32_t*  voxelEC,
+    uint16_t*  orienEC,
+    uint32_t*  segmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y)
+{
+    __shared__ float64_t shmem[512];
+
+    uint32_t bid = blockIdx.x;
+    uint32_t tid = threadIdx.x;
+
+    shmem[tid] = 0.0;
+
+    if(tid >= NUM_SAMPLES) return;
+
+    uint32_t offset  = offsetPerBlock[bid];
+    uint32_t ncompartments = segmentsPerBlock[bid] + offset;
+
+    uint32_t* voxel = voxelEC + offset;
+    uint16_t* orien = orienEC + offset;
+
+    for(int j = 0; j < NUM_ZEPPELINS; j++){        
+        uint32_t offset_lut = j*NUM_ORIENTATIONS*NUM_SAMPLES + tid;
+
+        voxel = voxelEC + offset;
+        orien = orienEC + offset;
+        for(int i = offset; i < ncompartments; i++){
+            shmem[tid] =( (float64_t)(lut[(*orien)*NUM_SAMPLES + offset_lut] ))* y[(*voxel)*NUM_SAMPLES + tid];
+            //shmem[tid] =( (float64_t)tex1Dfetch(tex_lutEC, (*orien)*NUM_SAMPLES + offset_lut) )* y[(*voxel)*NUM_SAMPLES + tid];
+            __syncthreads();
+
+            if(tid < 256) shmem[tid] += shmem[tid + 256]; __syncthreads();
+            if(tid < 128) shmem[tid] += shmem[tid + 128]; __syncthreads();
+            if(tid <  64) shmem[tid] += shmem[tid +  64]; __syncthreads();
+            if(tid <  32) shmem[tid] += shmem[tid +  32]; __syncthreads();
+            if(tid <  16) shmem[tid] += shmem[tid +  16]; __syncthreads();
+            if(tid <   8) shmem[tid] += shmem[tid +   8]; __syncthreads();
+            if(tid <   4) shmem[tid] += shmem[tid +   4]; __syncthreads();
+            if(tid <   2) shmem[tid] += shmem[tid +   2]; __syncthreads();
+
+            if(tid == 0) x[NUM_FIBERS*NUM_DIAMETERS + j*NUM_PEAKS + i] = shmem[0] + shmem[1];
+
+            voxel++;
+            orien++;
+            __syncthreads();
+        }
+    }
+}
+
+__global__ void multiply_Aty_ISOpart(float* lut, double* x, double* y){
+    __shared__ double shmem[512];
+
+    uint bid = blockIdx.x;
+    uint tid = threadIdx.x;
+    uint offset = NUM_FIBERS*NUM_DIAMETERS + NUM_PEAKS*NUM_ZEPPELINS + bid;
+
+    shmem[tid] = 0.0;
+
+    if(tid >= NUM_SAMPLES) return;
+
+    for(int j = 0; j < NUM_BALLS; j++){
+        shmem[tid] =( (float64_t) lut[j*NUM_SAMPLES + tid] )* y[bid*NUM_SAMPLES + tid];
+        //shmem[tid] =( (float64_t) tex1Dfetch(tex_lutISO, j*NUM_SAMPLES + tid) )* y[bid*NUM_SAMPLES + tid];
+        __syncthreads();
+
+        if(tid < 256) shmem[tid] += shmem[tid + 256]; __syncthreads();
+        if(tid < 128) shmem[tid] += shmem[tid + 128]; __syncthreads();
+        if(tid <  64) shmem[tid] += shmem[tid +  64]; __syncthreads();
+        if(tid <  32) shmem[tid] += shmem[tid +  32]; __syncthreads();
+        if(tid <  16) shmem[tid] += shmem[tid +  16]; __syncthreads();
+        if(tid <   8) shmem[tid] += shmem[tid +   8]; __syncthreads();
+        if(tid <   4) shmem[tid] += shmem[tid +   4]; __syncthreads(); 
+
+        if(tid == 0)
+            x[offset + j*NUM_VOXELS] = shmem[0] + shmem[1] + shmem[2] + shmem[3];
+    }
+}
+
diff --git a/commit/cudaoperator/operator_withCUDA.cuh b/commit/cudaoperator/operator_withCUDA.cuh
new file mode 100644
index 00000000..6b3d09bc
--- /dev/null
+++ b/commit/cudaoperator/operator_withCUDA.cuh
@@ -0,0 +1,176 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cuda_runtime_api.h>
+#include <device_launch_parameters.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+using namespace std;
+
+typedef unsigned int uint32_t;
+typedef unsigned short int uint16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+// ====================================================
+// Util functions to check CUDA GPU compatibility
+// ====================================================
+int checkCompatibility(int gpu_id);
+void cudaCheckLastError();
+
+// ====================================================
+// Function to preprocess data for GPU
+// ====================================================
+void preprocessDataForGPU(uint32_t* data, int NUM_COMPARTMENTS, uint32_t* compartmentsPerBlock, uint32_t* offsetPerBlock, int NUM_BLOCKS);
+
+// ====================================================
+// CUDA Kernels for Ax operation
+// ====================================================
+__global__ void multiply_Ax_ICpart(
+    uint32_t*  voxelIDs,
+    uint32_t*  fiberIDs,
+    uint16_t*  orienIDs,
+    float32_t* lengths,
+    uint32_t*  segmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y);
+
+__global__ void multiply_Ax_ECpart(
+        uint32_t*  voxelIDs,
+        uint16_t*  orienIDs,
+        uint32_t*  segmentsPerBlock,
+        uint32_t*  offsetPerBlock,
+        float32_t* lut,
+        float64_t* x,
+        float64_t* y);
+
+__global__ void multiply_Ax_ISOpart(
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y);
+
+// ====================================================
+// CUDA Kernels for A'y operation
+// ====================================================
+__global__ void multiply_Aty_ICpart(
+    uint32_t*  TvoxelIC,
+    uint32_t*  TfiberIC,
+    uint16_t*  TorienIC,
+    float32_t* TlengthIC,
+    uint32_t*  compartmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y);
+
+__global__ void multiply_Aty_ECpart(
+    uint32_t*  voxelEC,
+    uint16_t*  orienEC,
+    uint32_t*  segmentsPerBlock,
+    uint32_t*  offsetPerBlock,
+    float32_t* lut,
+    float64_t* x,
+    float64_t* y);
+
+__global__ void multiply_Aty_ISOpart(
+    float* lut,
+    double* x,
+    double* y);
+
+// ====================================================
+// Constant global values in the GPU
+// ====================================================
+__constant__ int NUM_VOXELS;
+__constant__ int NUM_FIBERS;
+__constant__ int NUM_PEAKS;
+__constant__ int NUM_ORIENTATIONS;
+__constant__ int NUM_SAMPLES;
+__constant__ int NUM_DIAMETERS;
+__constant__ int NUM_ZEPPELINS;
+__constant__ int NUM_BALLS;
+__constant__ int NUM_ROWS;        
+__constant__ int NUM_COLS;      
+__constant__ int SIZE_LUTIC;      
+__constant__ int SIZE_LUTEC;     
+__constant__ int SIZE_LUTISO;
+
+// ====================================================
+// Pointers to A (IC part) in the GPU
+// ====================================================
+static uint32_t*  gpu_voxelIC;
+static uint32_t*  gpu_fiberIC;
+static uint16_t*  gpu_orienIC;
+static float32_t* gpu_lengthIC;
+static uint32_t*  gpu_segmentsPerBlockIC;
+static uint32_t*  gpu_offsetPerBlockIC;
+
+// ====================================================
+// Pointers to A' (IC part) in the GPU
+// ====================================================
+static uint32_t*  gpu_TvoxelIC;
+static uint32_t*  gpu_TfiberIC;
+static uint16_t*  gpu_TorienIC;
+static float32_t* gpu_TlengthIC;
+static uint32_t*  gpu_TfibersPerBlockIC;
+static uint32_t*  gpu_ToffsetPerBlockIC;
+
+// ====================================================
+// Pointers to A (EC part) in the GPU
+// ====================================================
+static uint32_t* gpu_voxelEC;
+static uint16_t* gpu_orienEC;
+static uint32_t* gpu_segmentsPerBlockEC;
+static uint32_t* gpu_offsetPerBlockEC;
+
+// ====================================================
+// Pointers to LUT in the GPU
+// ====================================================
+static float32_t* gpu_lutIC;
+static float32_t* gpu_lutEC;
+static float32_t* gpu_lutISO;
+
+// ====================================================
+// Pointers to x and y in the GPU
+// ====================================================
+static float64_t* gpu_x;
+static float64_t* gpu_y;
+
+// ============================================================================
+// This class creates an instance of the LinearOperator in GPU memory
+// ============================================================================
+class CudaLinearOperator {
+
+    // constant values in CPU
+    int nsegments;
+    int nvoxels;    
+    int nfibers;      
+    int npeaks;
+    int norientations;
+    int nsamples;
+    int ndiameters;
+    int nzeppelins;   
+    int nballs;
+    int size_lutic;
+    int size_lutec;
+    int size_lutiso;
+    int nrows;
+    int ncols;
+
+    public:
+         CudaLinearOperator(int nsegments, int nvoxels, int nfibers, int npeaks, int norientations, int nsamples, int ndiameters, int nzeppelins, int nballs);
+        ~CudaLinearOperator();
+
+        int setDictionary(uint32_t* voxelIC, uint32_t* fiberIC, uint16_t* orienIC, float32_t* lengthIC, uint32_t* voxelEC, uint16_t* orienEC);
+        int setTransposeDictionary(uint32_t* TvoxelIC, uint32_t* TfiberIC, uint16_t* TorienIC, float32_t* TlengthIC);
+        int setKernels(float32_t* lutIC, float32_t* lutEC, float32_t* lutISO);
+        int setVectors();
+        int setGlobals();
+        int destroy();
+
+        void  dot(float64_t* v_in, float64_t* v_out);
+        void Tdot(float64_t* v_in, float64_t* v_out);
+};
\ No newline at end of file
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 6d83202a..a4187f95 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -3,6 +3,7 @@
 
 import cython
 import numpy as np
+from amico.util import ERROR
 cimport numpy as np
 
 # Interfaces to actual C code performing the multiplications
@@ -161,7 +162,7 @@ cdef class LinearOperator :
 
         # Permit only matrix-vector multiplications
         if v_in.size != self.shape[1] :
-            raise RuntimeError( "A.dot(): dimensions do not match" )
+            ERROR( "A.dot(): dimensions do not match" )
 
         # Create output array
         cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
diff --git a/commit/operator/operator.pyxbld b/commit/operator/operator.pyxbld
old mode 100644
new mode 100755
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
index 0046f237..061ca1d1 100644
--- a/commit/operator/operator_noLUT.c
+++ b/commit/operator/operator_noLUT.c
@@ -4,7 +4,7 @@
 // number of THREADS
 #ifdef nTHREADS
     #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
+    #error "nTHREADS" must be in the range 0..255
     #endif
 #else
     #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 9c959f57..2137d4a3 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -4,7 +4,7 @@
 // number of THREADS
 #ifdef nTHREADS
     #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
+    #error "nTHREADS" must be in the range 0..255
     #endif
 #else
     #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
diff --git a/extras/COMMIT_debugger/OPENGL_callbacks.cxx b/extras/COMMIT_debugger/OPENGL_callbacks.cxx
index e90e9c08..fcf4bca3 100755
--- a/extras/COMMIT_debugger/OPENGL_callbacks.cxx
+++ b/extras/COMMIT_debugger/OPENGL_callbacks.cxx
@@ -1,1132 +1,1132 @@
-#define GL_GLEXT_PROTOTYPES 1
-#ifdef __APPLE__
-    #include <OpenGL/gl.h>
-    #include <OpenGL/glext.h>
-    #include <GLUT/glut.h>
-#else
-    #include <GL/gl.h>
-    #include <GL/glext.h>
-    #include <GL/glut.h>
-#endif
-
-#include "OPENGL_utils.h"
-using namespace OPENGL_utils;
-
-/* global variables */
-GLfloat			id[16], rot[16], rot1[16], rot2[16], rot3[16];
-Vec3Df			translation;
-Vec3Di			start;
-GLint			moving;
-GLfloat			zoom;
-
-float ScreenX, ScreenY;
-
-
-void drawString( const char *string )
-{
-    static int y = glutGet( GLUT_WINDOW_HEIGHT ) - 50;
-    if ( string=="" )
-        y = glutGet( GLUT_WINDOW_HEIGHT ) - 50;
-    else
-    {
-        glRasterPos2i(10, y);
-        for (const char* c=string; *c != '\0'; c++) 
-            glutBitmapCharacter(GLUT_BITMAP_9_BY_15, *c);
-        y -= 18;
-    }
-}
-
-
-void PrintConfig()
-{
-    if ( !showConfig )
-        return;
-
-    glMatrixMode(GL_PROJECTION);
-    glPushMatrix();             
-    glLoadIdentity();
-    glMatrixMode( GL_MODELVIEW ) ;
-    glPushMatrix() ;
-    glLoadIdentity() ;
-    int w = glutGet( GLUT_WINDOW_WIDTH );
-    int h = glutGet( GLUT_WINDOW_HEIGHT );
-    glOrtho( 0, w, 0, h, -1, 1 );
-    glDisable( GL_DEPTH_TEST ); 
-
-    char s[1024];
-    glColor3f(1, 1, 0);
-    drawString( "" ); // reset initial position
-
-    drawString( "MAP" );
-    sprintf( s, "   - value(%d,%d,%d) = %.2f", VOXEL.x, VOXEL.y, VOXEL.z, MAP(VOXEL.x, VOXEL.y, VOXEL.z) );
-    drawString( s );
-    sprintf( s, "   - range = [ %.1f ... %.1f ]", MAP_min_view, MAP_max_view );
-    drawString( s );
-    sprintf( s, "   - opacity = %.1f", MAP_opacity );
-    drawString( s );
-
-    drawString( "SIGNAL" );
-    sprintf( s, "   - shell = %d/%d  (b=%.1f)", GLYPHS_shell+1, SCHEME_shells_b.size(), SCHEME_shells_b[GLYPHS_shell] );
-    drawString( s );
-    sprintf( s, "   - use affine = %s", GLYPHS_use_affine?"true":"false" );
-    drawString( s );
-    sprintf( s, "   - flip = [ %d, %d, %d ]", GLYPHS_flip[0], GLYPHS_flip[1], GLYPHS_flip[2] );
-    drawString( s );
-    sprintf( s, "   - b0 thr = %.1f", GLYPHS_b0_thr );
-    drawString( s );
-
-    if ( PEAKS_n>0 )
-    {
-        drawString( "PEAKS" );
-        sprintf( s, "   - use affine = %s", PEAKS_use_affine?"true":"false" );
-        drawString( s );
-        sprintf( s, "   - flip = [ %d, %d, %d ]", PEAKS_flip[0], PEAKS_flip[1], PEAKS_flip[2] );
-        drawString( s );
-        sprintf( s, "   - thr = %.1f", PEAKS_thr );
-        drawString( s );
-        sprintf( s, "   - normalize = %s", PEAKS_doNormalize?"true":"false" );
-        drawString( s );
-    }
-
-    if ( TRK_nTractsPlotted>0 )
-    {
-        drawString( "FIBERS" );
-        sprintf( s, "   - shift = [ %.1f %.1f %.1f ]  (voxels)", TRK_offset.x, TRK_offset.y, TRK_offset.z );
-        drawString( s );
-        sprintf( s, "   - slab thickness = %.1f  (voxels)", TRK_crop );
-        drawString( s );
-    }
-
-    glEnable (GL_DEPTH_TEST);     
-    glMatrixMode(GL_PROJECTION);
-    glPopMatrix();
-    glMatrixMode(GL_MODELVIEW);
-    glPopMatrix();
-}
-
-
-// KEYBOARD callback
-// -----------------
-void GLUT__keyboard( unsigned char key, GLint x=0, GLint y=0 )
-{
-    bool doRedraw = true;
-
-    switch( key )
-    {
-        case 'l': showConfig = 1 - showConfig; break;
-
-        case '1': showPlane[0] = 1 - showPlane[0]; break;
-        case '2': showPlane[1] = 1 - showPlane[1]; break;
-        case '3': showPlane[2] = 1 - showPlane[2]; break;
-        case '4':
-            showPlane[0] = 1;
-            showPlane[1] = 0;
-            showPlane[2] = 0;
-            translation.x	= translation.y = 0;
-            OPENGL_utils::identity(rot1);
-            OPENGL_utils::rotateX(rot1, 90.0, rot2);
-            OPENGL_utils::rotateZ(rot2, 90.0, rot);
-            break;
-        case '5':
-            showPlane[0] = 0;
-            showPlane[1] = 1;
-            showPlane[2] = 0;
-            translation.x	= translation.y = 0;
-            OPENGL_utils::identity(rot1);
-            OPENGL_utils::rotateX(rot1, 90.0, rot);
-            break;
-        case '6':
-            showPlane[0] = 0;
-            showPlane[1] = 0;
-            showPlane[2] = 1;
-            translation.x	= translation.y = 0;
-            OPENGL_utils::identity( rot );
-            break;
-
-        case '0': showAxes = 1 - showAxes; break;
-        case '-': zoom += 10.0; break;
-        case '+': zoom -= 10.0; break;
-        case 'm': MAP_max_view = fmaxf(0.0,MAP_max_view-MAP_max*0.05); break;
-        case 'M': MAP_max_view = fminf(MAP_max,MAP_max_view+MAP_max*0.05); break;
-        case 'o': MAP_opacity = fmaxf(0.0,MAP_opacity-0.1); break;
-        case 'O': MAP_opacity = fminf(1.0,MAP_opacity+0.1); break;
-        case 'w': LINE_width = fmaxf( 1,LINE_width-1); break;
-        case 'W': LINE_width = fminf(10,LINE_width+1); break;
-        case 'r':
-            showPlane[0] = showPlane[1] = showPlane[2] = 1;
-            translation.x	= translation.y = 0;
-            zoom			= 0;
-            OPENGL_utils::identity( rot );
-            break;
-
-        case 's': GLYPHS_show = 1 - GLYPHS_show; break;
-        case 'S': GLYPHS_shell = (GLYPHS_shell+1) % SCHEME_shells_idx.size(); break;
-        case 'a': GLYPHS_use_affine = 1 - GLYPHS_use_affine; break;
-        case 'x': GLYPHS_flip[0] = 1 - GLYPHS_flip[0]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].x *= -1; break;
-        case 'y': GLYPHS_flip[1] = 1 - GLYPHS_flip[1]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].y *= -1; break;
-        case 'z': GLYPHS_flip[2] = 1 - GLYPHS_flip[2]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].z *= -1; break;
-        case 'b': GLYPHS_b0_thr = fmaxf(0.0,GLYPHS_b0_thr-10.0); break;
-        case 'B': GLYPHS_b0_thr = fminf(MAP_max,GLYPHS_b0_thr+10.0); break;
-
-        case 'p': if ( PEAKS_n>0 ) PEAKS_show  = 1 - PEAKS_show; break;
-        case 'A': PEAKS_use_affine = 1 - PEAKS_use_affine; break;
-        case 'X': PEAKS_flip[0] = 1 - PEAKS_flip[0]; break;
-        case 'Y': PEAKS_flip[1] = 1 - PEAKS_flip[1]; break;
-        case 'Z': PEAKS_flip[2] = 1 - PEAKS_flip[2]; break;
-        case 't': PEAKS_thr = fmaxf(PEAKS_thr - 0.1, 0.0); break;
-        case 'T': PEAKS_thr = fminf(PEAKS_thr + 0.1, 1.0); break;
-        case 'n': PEAKS_doNormalize = 1 - PEAKS_doNormalize; break;
-
-        case 'f': if ( TRK_nTractsPlotted>0 ) TRK_show = 1 - TRK_show; break;
-        case 'c': TRK_crop = fmaxf( 0.0,TRK_crop-0.5); break;
-        case 'C': TRK_crop = fminf(max(dim.x,max(dim.y,dim.z)),TRK_crop+0.5); break;
-        case ' ': TRK_crop_mode = 1 - TRK_crop_mode; break;
-
-        case 'q':
-        case 27 : exit(0); break;
-
-        default: doRedraw = false;
-    }
-
-    if ( doRedraw )
-        glutPostRedisplay();
-}
-
-
-// MENU callback
-// -------------
-void GLUT__menu( int id ) 
-{
-    switch( id )
-    {
-        case   0: GLUT__keyboard('q'); break;
-
-        case 101: GLUT__keyboard('s'); break;
-        case 102: GLUT__keyboard('S'); break;
-        case 103: GLUT__keyboard('a'); break;
-        case 104: GLUT__keyboard('x'); break;
-        case 105: GLUT__keyboard('y'); break;
-        case 106: GLUT__keyboard('z'); break;
-        case 107: GLUT__keyboard('b'); break;
-        case 108: GLUT__keyboard('B'); break;
-
-        case 201: GLUT__keyboard('p'); break;
-        case 202: GLUT__keyboard('A'); break;
-        case 203: GLUT__keyboard('X'); break;
-        case 204: GLUT__keyboard('Y'); break;
-        case 205: GLUT__keyboard('Z'); break;
-        case 206: GLUT__keyboard('t'); break;
-        case 207: GLUT__keyboard('T'); break;
-        case 208: GLUT__keyboard('n'); break;
-
-        case 301: GLUT__keyboard('f'); break;
-        case 302: GLUT__keyboard('c'); break;
-        case 303: GLUT__keyboard('C'); break;
-        case 304: GLUT__keyboard(' '); break;
-
-        case 401: GLUT__keyboard('1'); break;
-        case 402: GLUT__keyboard('2'); break;
-        case 403: GLUT__keyboard('3'); break;
-        case 404: GLUT__keyboard('4'); break;
-        case 405: GLUT__keyboard('5'); break;
-        case 406: GLUT__keyboard('6'); break;
-        case 407: GLUT__keyboard('0'); break;
-        case 408: GLUT__keyboard('-'); break;
-        case 409: GLUT__keyboard('+'); break;
-        case 410: GLUT__keyboard('m'); break;
-        case 411: GLUT__keyboard('M'); break;
-        case 412: GLUT__keyboard('o'); break;
-        case 413: GLUT__keyboard('O'); break;
-        case 414: GLUT__keyboard('w'); break;
-        case 415: GLUT__keyboard('W'); break;
-        case 416: GLUT__keyboard('r'); break;
-        case 417: GLUT__keyboard('l'); break;
-    }
-}
-
-
-// Create the dropdown MENU
-// ------------------------
-void GLUT__createMenu()
-{
-    int submenu_SIGNAL_id, submenu_PEAKS_id, submenu_FIBERS_id, submenu_VIEW_id;
-
-    submenu_SIGNAL_id = glutCreateMenu( GLUT__menu );
-    glutAddMenuEntry("[s] Show/hide",         101);
-    glutAddMenuEntry("[S] Change shell",      102);
-    glutAddMenuEntry("[a] Use affine",        103);
-    glutAddMenuEntry("[x] Flip X axis",       104);
-    glutAddMenuEntry("[y] Flip Y axis",       105);
-    glutAddMenuEntry("[z] Flip Z axis",       106);
-    glutAddMenuEntry("[b] Decrease b0 thr",   107);
-    glutAddMenuEntry("[B] Increase b0 thr",   108);
-
-    if ( PEAKS_n>0 )
-    {
-        submenu_PEAKS_id = glutCreateMenu( GLUT__menu );
-        glutAddMenuEntry("[p] Show/hide",         201);
-        glutAddMenuEntry("[A] Use affine",        202);
-        glutAddMenuEntry("[X] Flip X axis",       203);
-        glutAddMenuEntry("[Y] Flip Y axis",       204);
-        glutAddMenuEntry("[Z] Flip Z axis",       205);
-        glutAddMenuEntry("[t] Decrease threshold",206);
-        glutAddMenuEntry("[T] Increase threshold",207);
-        glutAddMenuEntry("[n] Normalize length",  208);
-    }
-
-    if ( TRK_nTractsPlotted>0 )
-    {
-        submenu_FIBERS_id = glutCreateMenu( GLUT__menu );
-        glutAddMenuEntry("[f] Show/hide",         301);
-        glutAddMenuEntry("[c] Decrease crop size",302);
-        glutAddMenuEntry("[C] Increase crop size",303);
-        glutAddMenuEntry("[ ] Change crop mode",  304);
-    }
-
-    submenu_VIEW_id = glutCreateMenu( GLUT__menu );
-    glutAddMenuEntry("[1] Show/hide YZ plane", 401);
-    glutAddMenuEntry("[2] Show/hide XZ plane", 402);
-    glutAddMenuEntry("[3] Show/hide XY plane", 403);
-    glutAddMenuEntry("[4] Reset to YZ plane",  404);
-    glutAddMenuEntry("[5] Reset to XZ plane",  405);
-    glutAddMenuEntry("[6] Reset to XY plane",  406);
-    glutAddMenuEntry("[0] Show/hide axes",     407);
-    glutAddMenuEntry("[-] Decrease zoom",      408);
-    glutAddMenuEntry("[+] Increase zoom",      409);
-    glutAddMenuEntry("[m] Decrease max value", 410);
-    glutAddMenuEntry("[M] Increase max value", 411);
-    glutAddMenuEntry("[o] Decrease opacity",   412);
-    glutAddMenuEntry("[O] Increase opacity",   413);
-    glutAddMenuEntry("[t] Decrease line width",414);
-    glutAddMenuEntry("[T] Increase line width",415);
-    glutAddMenuEntry("[r] Reset view",         416);
-    glutAddMenuEntry("[l] Show/hide log",      417);
-
-    int menu_id = glutCreateMenu( GLUT__menu );
-    glutAddSubMenu("Signal", submenu_SIGNAL_id);
-    if ( PEAKS_n>0 )
-        glutAddSubMenu("Peaks", submenu_PEAKS_id);
-    if ( TRK_nTractsPlotted>0 )
-        glutAddSubMenu("Fibers", submenu_FIBERS_id);
-    glutAddSubMenu("View options", submenu_VIEW_id);
-    glutAddMenuEntry("Quit", 0);
-    glutAttachMenu(GLUT_RIGHT_BUTTON);
-}
-
-
-// RESHAPE callback
-// ----------------
-void GLUT__reshape( GLint w, GLint h )
-{
-    ScreenX = w;
-    ScreenY = h;
-
-    glViewport( 0, 0, w, h );
-
-    glMatrixMode( GL_PROJECTION );
-    glLoadIdentity();
-    gluPerspective( 45.0f, ScreenX/ScreenY, 1.0f, 5000.0f );
-
-    glMatrixMode( GL_MODELVIEW );
-    glLoadIdentity();
-    gluLookAt(
-        0.0, 0.0, 2.0 * max(pixdim.x*dim.x,pixdim.y*dim.y) * ScreenY/ScreenX, // eye point
-        0.0, 0.0, 0.0, // reference point
-        0.0, 1.0, 0.0  // up vector
-    );
-}
-
-
-// SPECIALKEY callback
-// -------------------
-void GLUT__specialkey( GLint key, GLint x, GLint y )
-{
-    bool doRedraw = true;
-    GLint modif = glutGetModifiers();
-    GLint ALT   = modif & GLUT_ACTIVE_ALT;
-    GLint CTRL  = modif & GLUT_ACTIVE_CTRL;
-
-    switch( key )
-    {
-        case GLUT_KEY_LEFT:
-            if ( ALT )
-                TRK_offset.x -= 0.5;
-            else if ( CTRL )
-                translation.x -= 2.0;
-            else
-                VOXEL.x--;
-            break;
-        case GLUT_KEY_RIGHT:
-            if ( ALT )
-                TRK_offset.x += 0.5;
-            else if ( CTRL )
-                translation.x += 2.0;
-            else
-                VOXEL.x++;
-            break;
-        case GLUT_KEY_DOWN:
-            if ( ALT )
-                TRK_offset.y -= 0.5;
-            else if ( CTRL )
-                translation.y -= 2.0;
-            else
-                VOXEL.y--;
-            break;
-        case GLUT_KEY_UP:
-            if ( ALT )
-                TRK_offset.y += 0.5;
-            else if ( CTRL )
-                translation.y += 2.0;
-            else
-                VOXEL.y++;
-            break;
-        case GLUT_KEY_PAGE_DOWN:
-            if ( ALT )
-                TRK_offset.z -= 0.5;
-            else
-                VOXEL.z--;
-            break;
-        case GLUT_KEY_PAGE_UP:
-            if ( ALT )
-                TRK_offset.z += 0.5;
-            else
-                VOXEL.z++;
-            break;
-
-        default:
-            doRedraw = false;
-    }
-
-    // check the bounds
-    VOXEL.x = max( VOXEL.x, 0 );
-    VOXEL.y = max( VOXEL.y, 0 );
-    VOXEL.z = max( VOXEL.z, 0 );
-    VOXEL.x = min( VOXEL.x, dim.x-1 );
-    VOXEL.y = min( VOXEL.y, dim.y-1 );
-    VOXEL.z = min( VOXEL.z, dim.z-1 );
-
-    if ( doRedraw )
-        glutPostRedisplay();
-}
-
-
-// MOUSE callback
-// --------------
-void GLUT__mouse( GLint button, GLint state, GLint x, GLint y )
-{
-    if (state == GLUT_DOWN)
-    {
-        if ( button == GLUT_LEFT_BUTTON && glutGetModifiers() != GLUT_ACTIVE_CTRL )
-        {
-            moving = 1;
-            start.x = x;
-            start.y = y;
-        }
-        // NOTE: does not work, issue with glutGetModifiers not getting CTRL
-        // else if ( button == GLUT_LEFT_BUTTON && glutGetModifiers() == GLUT_ACTIVE_CTRL )
-        // {
-        //     moving = 2;
-        //     start.x = x;
-        //     start.y = y;
-        // }
-        else if ( (button == GLUT_MIDDLE_BUTTON) || (button == GLUT_LEFT_BUTTON && glutGetModifiers() == GLUT_ACTIVE_ALT) )
-        {
-            moving = 3;
-            start.x = x;
-            start.y = y;
-        }
-    }
-    else if (state == GLUT_UP)
-    {
-        moving = 0;
-    }
-}
-
-
-// MOTION callback
-// ---------------
-void GLUT__motion( GLint x, GLint y )
-{
-    if (moving==1)
-    {
-        OPENGL_utils::translate(id, 0,0,0, rot1);
-
-        OPENGL_utils::rotateY(id,start.x-x,rot3);
-        OPENGL_utils::matXMat(rot,rot1,rot2);
-        OPENGL_utils::rotateX(id,start.y-y,rot1);
-        OPENGL_utils::matXMat(rot2,rot1,rot);
-        OPENGL_utils::matXMat(rot,rot3,rot2);
-
-        OPENGL_utils::translate(id, 0,0,0, rot1);
-        OPENGL_utils::matXMat(rot2,rot1,rot);
-
-        start.x = x;
-        start.y = y;
-    }
-
-    else if (moving==2)
-    {
-        zoom = zoom + (y-start.y)/2.0;
-        start.y = y;
-    }
-
-    else if (moving==3)
-    {
-        translation.x = translation.x - (start.x-x)/3.0;
-        translation.y = translation.y + (start.y-y)/3.0;
-        start.x = x;
-        start.y = y;
-    }
-
-    glutPostRedisplay();
-}
-
-
-// DISPLAY callback
-// ----------------
-void GLUT__display( void )
-{
-    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
-
-    glPushMatrix();
-    glTranslatef(translation.x, translation.y, -zoom); // mouse translation + zoom
-    glMultMatrixf(rot); // mouse rotation    
-    glTranslatef( -pixdim.x*dim.x/2.0, -pixdim.y*dim.y/2.0, -pixdim.z*dim.z/2.0 ); // center the FOV
-    glScalef( pixdim.x, pixdim.y, pixdim.z ); // account for voxel size
-
-    glEnable(GL_MULTISAMPLE_ARB);
-
-    /* ============= */
-    /* Draw the AXES */
-    /* ============= */
-    if ( showAxes )
-    {
-        glLineWidth(2);
-        glBegin(GL_LINES);
-            glColor4f( 1,0,0,1); glVertex3f( 0,0,0 ); glVertex3f( 10,  0,  0 );
-            glColor4f( 0,1,0,1); glVertex3f( 0,0,0 ); glVertex3f(  0, 10,  0 );
-            glColor4f( 0,0,1,1); glVertex3f( 0,0,0 ); glVertex3f(  0,  0, 10 );
-        glEnd();
-    }
-
-    /* =============== */
-    /* Draw the TRACTS */
-    /* =============== */
-    if ( TRK_show )
-    {
-        glPushMatrix();
-        glTranslatef(TRK_offset.x, TRK_offset.y, TRK_offset.z);
-
-        glLineWidth(1.0f);
-
-        float *ptr  = TRK_coords, *ptrc = TRK_colors;
-        VECTOR<float> Vc( VOXEL.x+0.5, VOXEL.y+0.5, VOXEL.z+0.5 ); // voxel center
-        float thr = 0.5*TRK_crop;
-        for(int f=0; f < TRK_nTractsPlotted; f++)
-        {
-            glBegin(GL_LINE_STRIP);
-            for(int i=0; i < TRK_nPoints[f]; i++)
-            {
-                // plot segment only if it's close to center of VOXEL
-                if (
-                      (
-                        TRK_crop_mode && (
-                        ( showPlane[0] && abs( (ptr[0]+TRK_offset.x) - Vc.x ) <= thr ) ||
-                        ( showPlane[1] && abs( (ptr[1]+TRK_offset.y) - Vc.y ) <= thr ) ||
-                        ( showPlane[2] && abs( (ptr[2]+TRK_offset.z) - Vc.z ) <= thr ) )
-                      )
-                      ||
-                      (
-                        !TRK_crop_mode && (
-                        ( abs( (ptr[0]+TRK_offset.x) - Vc.x ) <= thr ) &&
-                        ( abs( (ptr[1]+TRK_offset.y) - Vc.y ) <= thr ) &&
-                        ( abs( (ptr[2]+TRK_offset.z) - Vc.z ) <= thr ) )
-                      )
-                    )
-                {
-                    glColor3f(  ptrc[0], ptrc[1], ptrc[2] );
-                    glVertex3f( ptr[0],  ptr[1],  ptr[2]  );
-                }
-                else
-                {
-                    glEnd();
-                    glBegin(GL_LINE_STRIP);
-                }
-                ptr  += 3;
-                ptrc += 3;
-            }
-            glEnd();
-        }
-
-        glPopMatrix();
-    }
-
-    /* ============== */
-    /* Draw the PEAKS */
-    /* ============== */
-    if ( PEAKS_show || GLYPHS_show )
-    {
-        glDisable( GL_BLEND );
-        glLineWidth( LINE_width );
-        glPointSize( LINE_width );
-
-        glPushMatrix();
-        glTranslatef(.5,.5,.5);
-
-        Vec3Df dir, col;
-        int x,y,z,d,idx;
-        float norms[PEAKS_n], normMax, b0, w;
-
-        // plane YZ
-        if ( showPlane[0]  )
-        {
-            x = (int)VOXEL.x;
-            for(y=0; y<dim.y ;y++)
-            for(z=0; z<dim.z ;z++)
-            {
-                if ( PEAKS_show )
-                {
-                    normMax = 0;
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        norms[d] = dir.norm();
-                        if ( norms[d] > normMax )
-                            normMax = norms[d];
-                    }
-
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        if ( norms[d] < PEAKS_thr*normMax )
-                            continue;
-
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
-                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
-                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
-
-                        if ( PEAKS_doNormalize )
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        else
-                        {
-                            dir.x = col.x * norms[d] / normMax;
-                            dir.y = col.y * norms[d] / normMax;
-                            dir.z = col.z * norms[d] / normMax;
-                        }
-
-                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) ); 
-                        glBegin(GL_LINES);
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                        glEnd();
-                    }
-                }
-                if ( GLYPHS_show )
-                {
-                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
-                    if ( b0 > GLYPHS_b0_thr )
-                    {
-                        glBegin(GL_POINTS);
-                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
-                        {
-                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
-                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
-                            if ( GLYPHS_use_affine ) 
-                            {
-                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
-                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
-                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
-                                normMax = dir.norm();
-                                dir.x *= w / normMax;
-                                dir.y *= w / normMax;
-                                dir.z *= w / normMax;
-                            }
-                            else
-                            {
-                                dir.x = w * SCHEME_dirs[idx].x;
-                                dir.y = w * SCHEME_dirs[idx].y;
-                                dir.z = w * SCHEME_dirs[idx].z;
-                            }
-                            normMax = dir.norm();
-                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                        }
-                        glEnd();
-                    }
-                }
-            }
-        }
-
-        // plane XZ
-        if ( showPlane[1] )
-        {
-            y = (int)VOXEL.y;
-            for(x=0; x<dim.x ;x++)
-            for(z=0; z<dim.z ;z++)
-            {
-                if ( PEAKS_show )
-                {
-                    normMax = 0;
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        norms[d] = dir.norm();
-                        if ( norms[d] > normMax )
-                            normMax = norms[d];
-                    }
-
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        if ( norms[d] < normMax*PEAKS_thr )
-                            continue;
-
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
-                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
-                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
-
-                        if ( PEAKS_doNormalize )
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        else
-                        {
-                            dir.x = col.x * norms[d] / normMax;
-                            dir.y = col.y * norms[d] / normMax;
-                            dir.z = col.z * norms[d] / normMax;
-                        }
-
-                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) );
-                        glBegin(GL_LINES);
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                        glEnd();
-                    }
-                }
-
-                if ( GLYPHS_show )
-                {
-                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
-                    if ( b0 > GLYPHS_b0_thr )
-                    {
-                        glBegin(GL_POINTS);
-                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
-                        {
-                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
-                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
-                            if ( GLYPHS_use_affine ) 
-                            {
-                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
-                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
-                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
-                                normMax = dir.norm();
-                                dir.x *= w / normMax;
-                                dir.y *= w / normMax;
-                                dir.z *= w / normMax;
-                            }
-                            else
-                            {
-                                dir.x = w * SCHEME_dirs[idx].x;
-                                dir.y = w * SCHEME_dirs[idx].y;
-                                dir.z = w * SCHEME_dirs[idx].z;
-                            }
-                            normMax = dir.norm();
-                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                        }
-                        glEnd();
-                    }
-                }
-            }
-        }
-
-        // plane XY
-        if ( showPlane[2] )
-        {
-            z = (int)VOXEL.z;
-            for(y=0; y<dim.y ;y++)
-            for(x=0; x<dim.x ;x++)
-            {
-                if ( PEAKS_show )
-                {
-                    normMax = 0;
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        norms[d] = dir.norm();
-                        if ( norms[d] > normMax )
-                            normMax = norms[d];
-                    }
-
-                    for(d=0; d<PEAKS_n; d++)
-                    {
-                        if ( norms[d] < normMax*PEAKS_thr )
-                            continue;
-
-                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
-                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
-                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
-                        if ( PEAKS_use_affine )
-                        {
-                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
-                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
-                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
-                        }
-                        else
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
-                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
-                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
-
-                        if ( PEAKS_doNormalize )
-                        {
-                            dir.x = col.x;
-                            dir.y = col.y;
-                            dir.z = col.z;
-                        }
-                        else
-                        {
-                            dir.x = col.x * norms[d] / normMax;
-                            dir.y = col.y * norms[d] / normMax;
-                            dir.z = col.z * norms[d] / normMax;
-                        }
-
-                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) );
-                        glBegin(GL_LINES);
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                        glEnd();
-                    }
-                }
-
-                if( GLYPHS_show)
-                {
-                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
-                    if ( b0 > GLYPHS_b0_thr )
-                    {
-                        glBegin(GL_POINTS);
-                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
-                        {
-                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
-                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
-                            if ( GLYPHS_use_affine ) 
-                            {
-                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
-                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
-                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
-                                normMax = dir.norm();
-                                dir.x *= w / normMax;
-                                dir.y *= w / normMax;
-                                dir.z *= w / normMax;
-                            }
-                            else
-                            {
-                                dir.x = w * SCHEME_dirs[idx].x;
-                                dir.y = w * SCHEME_dirs[idx].y;
-                                dir.z = w * SCHEME_dirs[idx].z;
-                            }
-
-                            normMax = dir.norm();
-                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
-                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
-                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
-                        }
-                        glEnd();
-                    }
-                }
-            }
-        }
-
-        glPopMatrix();
-    }
-
-    /* =================== */
-    /* Draw the SCALAR MAP */
-    /* =================== */
-    if ( showPlane[0] || showPlane[1] || showPlane[2] )
-    {
-        glDisable( GL_CULL_FACE );
-        glEnable( GL_BLEND );
-        glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
-
-        // to avoid z-fighting
-        glPolygonOffset( 1.0, 1.0 );
-        glEnable(GL_POLYGON_OFFSET_FILL);
-        glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-
-        glLineWidth( 3 );
-
-        int x, y, z; // voxel coordinates NB: (0,0,0) -> corner of voxel
-        float color;
-
-        // plane YZ
-        if ( showPlane[0]  )
-        {
-            glPushMatrix();
-            glTranslatef(0.5,0,0);
-
-            x = (int)VOXEL.x;
-            for(y=0; y<dim.y ;y++)
-            for(z=0; z<dim.z ;z++)
-            {
-                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
-                glColor4f(color,color,color,MAP_opacity);
-                glBegin(GL_QUADS);
-                    glVertex3f(x, y,   z);
-                    glVertex3f(x, y,   z+1);
-                    glVertex3f(x, y+1, z+1);
-                    glVertex3f(x, y+1, z);
-                glEnd();
-            }
-            // colored frame
-            if ( showAxes )
-            {
-                glColor3f(1,0,0);
-                glBegin(GL_LINE_STRIP);
-                    glVertex3f(x,0,0);
-                    glVertex3f(x,dim.y,0);
-                    glVertex3f(x,dim.y,dim.z);
-                    glVertex3f(x,0,dim.z);
-                    glVertex3f(x,0,0);
-                glEnd();
-            }
-
-            glPopMatrix();
-        }
-
-        // plane XZ
-        if ( showPlane[1] )
-        {
-            glPushMatrix();
-            glTranslatef(0,0.5,0);
-
-            y = (int)VOXEL.y;
-            for(x=0; x<dim.x ;x++)
-            for(z=0; z<dim.z ;z++)
-            {
-                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
-                glColor4f(color,color,color,MAP_opacity);
-                glBegin(GL_QUADS);
-                    glVertex3f(x,   y, z);
-                    glVertex3f(x,   y, z+1);
-                    glVertex3f(x+1, y, z+1);
-                    glVertex3f(x+1, y, z);
-                glEnd();
-            }
-            // colored frame
-            if ( showAxes )
-            {
-                glColor3f(0,1,0);
-                glBegin(GL_LINE_STRIP);
-                    glVertex3f(0,y,0);
-                    glVertex3f(dim.x,y,0);
-                    glVertex3f(dim.x,y,dim.z);
-                    glVertex3f(0,y,dim.z);
-                    glVertex3f(0,y,0);
-                glEnd();
-            }
-
-            glPopMatrix();
-        }
-
-        // plane XY
-        if ( showPlane[2] )
-        {
-            glPushMatrix();
-            glTranslatef(0,0,0.5);
-
-            z = (int)VOXEL.z;
-            for(y=0; y<dim.y ;y++)
-            for(x=0; x<dim.x ;x++)
-            {
-                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
-                glColor4f(color,color,color,MAP_opacity);
-                glBegin(GL_QUADS);
-                    glVertex3f(x,   y,   z);
-                    glVertex3f(x+1, y,   z);
-                    glVertex3f(x+1, y+1, z);
-                    glVertex3f(x,   y+1, z);
-                glEnd();
-            }
-
-            // colored frame
-            if ( showAxes )
-            {
-                glColor3f(0,0,1);
-                glBegin(GL_LINE_STRIP);
-                    glVertex3f(0,0,z);
-                    glVertex3f(dim.x,0,z);
-                    glVertex3f(dim.x,dim.y,z);
-                    glVertex3f(0,dim.y,z);
-                    glVertex3f(0,0,z);
-                glEnd();
-            }
-
-            glPopMatrix();
-        }
-
-        glEnable(GL_CULL_FACE);
-        glDisable( GL_BLEND );
-        glDisable(GL_POLYGON_OFFSET_FILL);
-    }
-
-    /* ====================== */
-    /* Draw the CURRENT VOXEL */
-    /* ====================== */
-    if ( showAxes )
-    {
-        glPushMatrix();
-        glTranslatef( VOXEL.x+0.5, VOXEL.y+0.5, VOXEL.z+0.5 );
-
-        glEnable( GL_BLEND );
-        glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
-        glLineWidth(1);
-        glColor4f( 1,1,0,1 );
-        glutWireCube( 1 );
-        glColor4f( 1,1,0,0.25 );
-        glutSolidCube( 1 );
-        glDisable( GL_BLEND );
-
-        glPopMatrix();
-    }
-
-    glPopMatrix();
-    PrintConfig();
-    glutSwapBuffers();
-}
-
-
-// INITIALIZATION
-// --------------
-void OpenGL_init( int argc, char** argv )
-{
-    glutInit( &argc, argv );
-    glutInitDisplayMode( GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA | GLUT_ALPHA | GLUT_MULTISAMPLE );
-    ScreenX = 0.7*glutGet(GLUT_SCREEN_WIDTH);  if (ScreenX==0) ScreenX = 800;
-    ScreenY = 0.7*glutGet(GLUT_SCREEN_HEIGHT); if (ScreenY==0) ScreenY = 600;
-    glutInitWindowSize( ScreenX, ScreenY );
-    glutInitWindowPosition( 0.15*glutGet(GLUT_SCREEN_WIDTH), 0.15*glutGet(GLUT_SCREEN_HEIGHT) );
-    glutCreateWindow( "COMMIT debugger" );
-
-    // Projection and model matrix
-    glMatrixMode(GL_PROJECTION);
-    glLoadIdentity();
-    glMatrixMode(GL_MODELVIEW);
-    glLoadIdentity();
-
-    translation.x	= translation.y = 0;
-    zoom			= 0;
-    OPENGL_utils::identity( rot );
-    OPENGL_utils::identity( id );
-
-    glEnable( GL_DEPTH_TEST );
-    glClearColor( 0.1, 0.1, 0.1, 0.0 );
-
-    // lighting
-    glShadeModel( GL_SMOOTH );
-    glEnable( GL_NORMALIZE );
-
-    GLfloat white[] = {.5f, .5f, .5f, 1.0f};
-    glMaterialfv(GL_FRONT, GL_SPECULAR, white);
-    GLfloat shininess[] = {32};
-    glMaterialfv(GL_FRONT, GL_SHININESS, shininess);
-
-    glLightModeli(GL_LIGHT_MODEL_TWO_SIDE, GL_FALSE);
-    GLfloat global_ambient[] = { 0.2f, 0.2f, 0.2f, 1.0f };
-    glLightModelfv(GL_LIGHT_MODEL_AMBIENT, global_ambient);
-    glEnable ( GL_COLOR_MATERIAL );
-
-    // register CALLBACKS and open window
-    glutKeyboardFunc( GLUT__keyboard );
-    glutSpecialFunc(  GLUT__specialkey );
-    glutDisplayFunc(  GLUT__display );
-    glutReshapeFunc(  GLUT__reshape );
-    glutMouseFunc(    GLUT__mouse );
-    glutMotionFunc(   GLUT__motion );
-
-    GLUT__createMenu();
-
-    glutMainLoop();
-}
+#define GL_GLEXT_PROTOTYPES 1
+#ifdef __APPLE__
+    #include <OpenGL/gl.h>
+    #include <OpenGL/glext.h>
+    #include <GLUT/glut.h>
+#else
+    #include <GL/gl.h>
+    #include <GL/glext.h>
+    #include <GL/glut.h>
+#endif
+
+#include "OPENGL_utils.h"
+using namespace OPENGL_utils;
+
+/* global variables */
+GLfloat			id[16], rot[16], rot1[16], rot2[16], rot3[16];
+Vec3Df			translation;
+Vec3Di			start;
+GLint			moving;
+GLfloat			zoom;
+
+float ScreenX, ScreenY;
+
+
+void drawString( const char *string )
+{
+    static int y = glutGet( GLUT_WINDOW_HEIGHT ) - 50;
+    if ( string=="" )
+        y = glutGet( GLUT_WINDOW_HEIGHT ) - 50;
+    else
+    {
+        glRasterPos2i(10, y);
+        for (const char* c=string; *c != '\0'; c++) 
+            glutBitmapCharacter(GLUT_BITMAP_9_BY_15, *c);
+        y -= 18;
+    }
+}
+
+
+void PrintConfig()
+{
+    if ( !showConfig )
+        return;
+
+    glMatrixMode(GL_PROJECTION);
+    glPushMatrix();             
+    glLoadIdentity();
+    glMatrixMode( GL_MODELVIEW ) ;
+    glPushMatrix() ;
+    glLoadIdentity() ;
+    int w = glutGet( GLUT_WINDOW_WIDTH );
+    int h = glutGet( GLUT_WINDOW_HEIGHT );
+    glOrtho( 0, w, 0, h, -1, 1 );
+    glDisable( GL_DEPTH_TEST ); 
+
+    char s[1024];
+    glColor3f(1, 1, 0);
+    drawString( "" ); // reset initial position
+
+    drawString( "MAP" );
+    sprintf( s, "   - value(%d,%d,%d) = %.2f", VOXEL.x, VOXEL.y, VOXEL.z, MAP(VOXEL.x, VOXEL.y, VOXEL.z) );
+    drawString( s );
+    sprintf( s, "   - range = [ %.1f ... %.1f ]", MAP_min_view, MAP_max_view );
+    drawString( s );
+    sprintf( s, "   - opacity = %.1f", MAP_opacity );
+    drawString( s );
+
+    drawString( "SIGNAL" );
+    sprintf( s, "   - shell = %d/%d  (b=%.1f)", GLYPHS_shell+1, SCHEME_shells_b.size(), SCHEME_shells_b[GLYPHS_shell] );
+    drawString( s );
+    sprintf( s, "   - use affine = %s", GLYPHS_use_affine?"true":"false" );
+    drawString( s );
+    sprintf( s, "   - flip = [ %d, %d, %d ]", GLYPHS_flip[0], GLYPHS_flip[1], GLYPHS_flip[2] );
+    drawString( s );
+    sprintf( s, "   - b0 thr = %.1f", GLYPHS_b0_thr );
+    drawString( s );
+
+    if ( PEAKS_n>0 )
+    {
+        drawString( "PEAKS" );
+        sprintf( s, "   - use affine = %s", PEAKS_use_affine?"true":"false" );
+        drawString( s );
+        sprintf( s, "   - flip = [ %d, %d, %d ]", PEAKS_flip[0], PEAKS_flip[1], PEAKS_flip[2] );
+        drawString( s );
+        sprintf( s, "   - thr = %.1f", PEAKS_thr );
+        drawString( s );
+        sprintf( s, "   - normalize = %s", PEAKS_doNormalize?"true":"false" );
+        drawString( s );
+    }
+
+    if ( TRK_nTractsPlotted>0 )
+    {
+        drawString( "FIBERS" );
+        sprintf( s, "   - shift = [ %.1f %.1f %.1f ]  (voxels)", TRK_offset.x, TRK_offset.y, TRK_offset.z );
+        drawString( s );
+        sprintf( s, "   - slab thickness = %.1f  (voxels)", TRK_crop );
+        drawString( s );
+    }
+
+    glEnable (GL_DEPTH_TEST);     
+    glMatrixMode(GL_PROJECTION);
+    glPopMatrix();
+    glMatrixMode(GL_MODELVIEW);
+    glPopMatrix();
+}
+
+
+// KEYBOARD callback
+// -----------------
+void GLUT__keyboard( unsigned char key, GLint x=0, GLint y=0 )
+{
+    bool doRedraw = true;
+
+    switch( key )
+    {
+        case 'l': showConfig = 1 - showConfig; break;
+
+        case '1': showPlane[0] = 1 - showPlane[0]; break;
+        case '2': showPlane[1] = 1 - showPlane[1]; break;
+        case '3': showPlane[2] = 1 - showPlane[2]; break;
+        case '4':
+            showPlane[0] = 1;
+            showPlane[1] = 0;
+            showPlane[2] = 0;
+            translation.x	= translation.y = 0;
+            OPENGL_utils::identity(rot1);
+            OPENGL_utils::rotateX(rot1, 90.0, rot2);
+            OPENGL_utils::rotateZ(rot2, 90.0, rot);
+            break;
+        case '5':
+            showPlane[0] = 0;
+            showPlane[1] = 1;
+            showPlane[2] = 0;
+            translation.x	= translation.y = 0;
+            OPENGL_utils::identity(rot1);
+            OPENGL_utils::rotateX(rot1, 90.0, rot);
+            break;
+        case '6':
+            showPlane[0] = 0;
+            showPlane[1] = 0;
+            showPlane[2] = 1;
+            translation.x	= translation.y = 0;
+            OPENGL_utils::identity( rot );
+            break;
+
+        case '0': showAxes = 1 - showAxes; break;
+        case '-': zoom += 10.0; break;
+        case '+': zoom -= 10.0; break;
+        case 'm': MAP_max_view = fmaxf(0.0,MAP_max_view-MAP_max*0.05); break;
+        case 'M': MAP_max_view = fminf(MAP_max,MAP_max_view+MAP_max*0.05); break;
+        case 'o': MAP_opacity = fmaxf(0.0,MAP_opacity-0.1); break;
+        case 'O': MAP_opacity = fminf(1.0,MAP_opacity+0.1); break;
+        case 'w': LINE_width = fmaxf( 1,LINE_width-1); break;
+        case 'W': LINE_width = fminf(10,LINE_width+1); break;
+        case 'r':
+            showPlane[0] = showPlane[1] = showPlane[2] = 1;
+            translation.x	= translation.y = 0;
+            zoom			= 0;
+            OPENGL_utils::identity( rot );
+            break;
+
+        case 's': GLYPHS_show = 1 - GLYPHS_show; break;
+        case 'S': GLYPHS_shell = (GLYPHS_shell+1) % SCHEME_shells_idx.size(); break;
+        case 'a': GLYPHS_use_affine = 1 - GLYPHS_use_affine; break;
+        case 'x': GLYPHS_flip[0] = 1 - GLYPHS_flip[0]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].x *= -1; break;
+        case 'y': GLYPHS_flip[1] = 1 - GLYPHS_flip[1]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].y *= -1; break;
+        case 'z': GLYPHS_flip[2] = 1 - GLYPHS_flip[2]; for(int d=0; d < SCHEME_dirs.size() ;d++) SCHEME_dirs[d].z *= -1; break;
+        case 'b': GLYPHS_b0_thr = fmaxf(0.0,GLYPHS_b0_thr-10.0); break;
+        case 'B': GLYPHS_b0_thr = fminf(MAP_max,GLYPHS_b0_thr+10.0); break;
+
+        case 'p': if ( PEAKS_n>0 ) PEAKS_show  = 1 - PEAKS_show; break;
+        case 'A': PEAKS_use_affine = 1 - PEAKS_use_affine; break;
+        case 'X': PEAKS_flip[0] = 1 - PEAKS_flip[0]; break;
+        case 'Y': PEAKS_flip[1] = 1 - PEAKS_flip[1]; break;
+        case 'Z': PEAKS_flip[2] = 1 - PEAKS_flip[2]; break;
+        case 't': PEAKS_thr = fmaxf(PEAKS_thr - 0.1, 0.0); break;
+        case 'T': PEAKS_thr = fminf(PEAKS_thr + 0.1, 1.0); break;
+        case 'n': PEAKS_doNormalize = 1 - PEAKS_doNormalize; break;
+
+        case 'f': if ( TRK_nTractsPlotted>0 ) TRK_show = 1 - TRK_show; break;
+        case 'c': TRK_crop = fmaxf( 0.0,TRK_crop-0.5); break;
+        case 'C': TRK_crop = fminf(max(dim.x,max(dim.y,dim.z)),TRK_crop+0.5); break;
+        case ' ': TRK_crop_mode = 1 - TRK_crop_mode; break;
+
+        case 'q':
+        case 27 : exit(0); break;
+
+        default: doRedraw = false;
+    }
+
+    if ( doRedraw )
+        glutPostRedisplay();
+}
+
+
+// MENU callback
+// -------------
+void GLUT__menu( int id ) 
+{
+    switch( id )
+    {
+        case   0: GLUT__keyboard('q'); break;
+
+        case 101: GLUT__keyboard('s'); break;
+        case 102: GLUT__keyboard('S'); break;
+        case 103: GLUT__keyboard('a'); break;
+        case 104: GLUT__keyboard('x'); break;
+        case 105: GLUT__keyboard('y'); break;
+        case 106: GLUT__keyboard('z'); break;
+        case 107: GLUT__keyboard('b'); break;
+        case 108: GLUT__keyboard('B'); break;
+
+        case 201: GLUT__keyboard('p'); break;
+        case 202: GLUT__keyboard('A'); break;
+        case 203: GLUT__keyboard('X'); break;
+        case 204: GLUT__keyboard('Y'); break;
+        case 205: GLUT__keyboard('Z'); break;
+        case 206: GLUT__keyboard('t'); break;
+        case 207: GLUT__keyboard('T'); break;
+        case 208: GLUT__keyboard('n'); break;
+
+        case 301: GLUT__keyboard('f'); break;
+        case 302: GLUT__keyboard('c'); break;
+        case 303: GLUT__keyboard('C'); break;
+        case 304: GLUT__keyboard(' '); break;
+
+        case 401: GLUT__keyboard('1'); break;
+        case 402: GLUT__keyboard('2'); break;
+        case 403: GLUT__keyboard('3'); break;
+        case 404: GLUT__keyboard('4'); break;
+        case 405: GLUT__keyboard('5'); break;
+        case 406: GLUT__keyboard('6'); break;
+        case 407: GLUT__keyboard('0'); break;
+        case 408: GLUT__keyboard('-'); break;
+        case 409: GLUT__keyboard('+'); break;
+        case 410: GLUT__keyboard('m'); break;
+        case 411: GLUT__keyboard('M'); break;
+        case 412: GLUT__keyboard('o'); break;
+        case 413: GLUT__keyboard('O'); break;
+        case 414: GLUT__keyboard('w'); break;
+        case 415: GLUT__keyboard('W'); break;
+        case 416: GLUT__keyboard('r'); break;
+        case 417: GLUT__keyboard('l'); break;
+    }
+}
+
+
+// Create the dropdown MENU
+// ------------------------
+void GLUT__createMenu()
+{
+    int submenu_SIGNAL_id, submenu_PEAKS_id, submenu_FIBERS_id, submenu_VIEW_id;
+
+    submenu_SIGNAL_id = glutCreateMenu( GLUT__menu );
+    glutAddMenuEntry("[s] Show/hide",         101);
+    glutAddMenuEntry("[S] Change shell",      102);
+    glutAddMenuEntry("[a] Use affine",        103);
+    glutAddMenuEntry("[x] Flip X axis",       104);
+    glutAddMenuEntry("[y] Flip Y axis",       105);
+    glutAddMenuEntry("[z] Flip Z axis",       106);
+    glutAddMenuEntry("[b] Decrease b0 thr",   107);
+    glutAddMenuEntry("[B] Increase b0 thr",   108);
+
+    if ( PEAKS_n>0 )
+    {
+        submenu_PEAKS_id = glutCreateMenu( GLUT__menu );
+        glutAddMenuEntry("[p] Show/hide",         201);
+        glutAddMenuEntry("[A] Use affine",        202);
+        glutAddMenuEntry("[X] Flip X axis",       203);
+        glutAddMenuEntry("[Y] Flip Y axis",       204);
+        glutAddMenuEntry("[Z] Flip Z axis",       205);
+        glutAddMenuEntry("[t] Decrease threshold",206);
+        glutAddMenuEntry("[T] Increase threshold",207);
+        glutAddMenuEntry("[n] Normalize length",  208);
+    }
+
+    if ( TRK_nTractsPlotted>0 )
+    {
+        submenu_FIBERS_id = glutCreateMenu( GLUT__menu );
+        glutAddMenuEntry("[f] Show/hide",         301);
+        glutAddMenuEntry("[c] Decrease crop size",302);
+        glutAddMenuEntry("[C] Increase crop size",303);
+        glutAddMenuEntry("[ ] Change crop mode",  304);
+    }
+
+    submenu_VIEW_id = glutCreateMenu( GLUT__menu );
+    glutAddMenuEntry("[1] Show/hide YZ plane", 401);
+    glutAddMenuEntry("[2] Show/hide XZ plane", 402);
+    glutAddMenuEntry("[3] Show/hide XY plane", 403);
+    glutAddMenuEntry("[4] Reset to YZ plane",  404);
+    glutAddMenuEntry("[5] Reset to XZ plane",  405);
+    glutAddMenuEntry("[6] Reset to XY plane",  406);
+    glutAddMenuEntry("[0] Show/hide axes",     407);
+    glutAddMenuEntry("[-] Decrease zoom",      408);
+    glutAddMenuEntry("[+] Increase zoom",      409);
+    glutAddMenuEntry("[m] Decrease max value", 410);
+    glutAddMenuEntry("[M] Increase max value", 411);
+    glutAddMenuEntry("[o] Decrease opacity",   412);
+    glutAddMenuEntry("[O] Increase opacity",   413);
+    glutAddMenuEntry("[t] Decrease line width",414);
+    glutAddMenuEntry("[T] Increase line width",415);
+    glutAddMenuEntry("[r] Reset view",         416);
+    glutAddMenuEntry("[l] Show/hide log",      417);
+
+    int menu_id = glutCreateMenu( GLUT__menu );
+    glutAddSubMenu("Signal", submenu_SIGNAL_id);
+    if ( PEAKS_n>0 )
+        glutAddSubMenu("Peaks", submenu_PEAKS_id);
+    if ( TRK_nTractsPlotted>0 )
+        glutAddSubMenu("Fibers", submenu_FIBERS_id);
+    glutAddSubMenu("View options", submenu_VIEW_id);
+    glutAddMenuEntry("Quit", 0);
+    glutAttachMenu(GLUT_RIGHT_BUTTON);
+}
+
+
+// RESHAPE callback
+// ----------------
+void GLUT__reshape( GLint w, GLint h )
+{
+    ScreenX = w;
+    ScreenY = h;
+
+    glViewport( 0, 0, w, h );
+
+    glMatrixMode( GL_PROJECTION );
+    glLoadIdentity();
+    gluPerspective( 45.0f, ScreenX/ScreenY, 1.0f, 5000.0f );
+
+    glMatrixMode( GL_MODELVIEW );
+    glLoadIdentity();
+    gluLookAt(
+        0.0, 0.0, 2.0 * max(pixdim.x*dim.x,pixdim.y*dim.y) * ScreenY/ScreenX, // eye point
+        0.0, 0.0, 0.0, // reference point
+        0.0, 1.0, 0.0  // up vector
+    );
+}
+
+
+// SPECIALKEY callback
+// -------------------
+void GLUT__specialkey( GLint key, GLint x, GLint y )
+{
+    bool doRedraw = true;
+    GLint modif = glutGetModifiers();
+    GLint ALT   = modif & GLUT_ACTIVE_ALT;
+    GLint CTRL  = modif & GLUT_ACTIVE_CTRL;
+
+    switch( key )
+    {
+        case GLUT_KEY_LEFT:
+            if ( ALT )
+                TRK_offset.x -= 0.5;
+            else if ( CTRL )
+                translation.x -= 2.0;
+            else
+                VOXEL.x--;
+            break;
+        case GLUT_KEY_RIGHT:
+            if ( ALT )
+                TRK_offset.x += 0.5;
+            else if ( CTRL )
+                translation.x += 2.0;
+            else
+                VOXEL.x++;
+            break;
+        case GLUT_KEY_DOWN:
+            if ( ALT )
+                TRK_offset.y -= 0.5;
+            else if ( CTRL )
+                translation.y -= 2.0;
+            else
+                VOXEL.y--;
+            break;
+        case GLUT_KEY_UP:
+            if ( ALT )
+                TRK_offset.y += 0.5;
+            else if ( CTRL )
+                translation.y += 2.0;
+            else
+                VOXEL.y++;
+            break;
+        case GLUT_KEY_PAGE_DOWN:
+            if ( ALT )
+                TRK_offset.z -= 0.5;
+            else
+                VOXEL.z--;
+            break;
+        case GLUT_KEY_PAGE_UP:
+            if ( ALT )
+                TRK_offset.z += 0.5;
+            else
+                VOXEL.z++;
+            break;
+
+        default:
+            doRedraw = false;
+    }
+
+    // check the bounds
+    VOXEL.x = max( VOXEL.x, 0 );
+    VOXEL.y = max( VOXEL.y, 0 );
+    VOXEL.z = max( VOXEL.z, 0 );
+    VOXEL.x = min( VOXEL.x, dim.x-1 );
+    VOXEL.y = min( VOXEL.y, dim.y-1 );
+    VOXEL.z = min( VOXEL.z, dim.z-1 );
+
+    if ( doRedraw )
+        glutPostRedisplay();
+}
+
+
+// MOUSE callback
+// --------------
+void GLUT__mouse( GLint button, GLint state, GLint x, GLint y )
+{
+    if (state == GLUT_DOWN)
+    {
+        if ( button == GLUT_LEFT_BUTTON && glutGetModifiers() != GLUT_ACTIVE_CTRL )
+        {
+            moving = 1;
+            start.x = x;
+            start.y = y;
+        }
+        // NOTE: does not work, issue with glutGetModifiers not getting CTRL
+        // else if ( button == GLUT_LEFT_BUTTON && glutGetModifiers() == GLUT_ACTIVE_CTRL )
+        // {
+        //     moving = 2;
+        //     start.x = x;
+        //     start.y = y;
+        // }
+        else if ( (button == GLUT_MIDDLE_BUTTON) || (button == GLUT_LEFT_BUTTON && glutGetModifiers() == GLUT_ACTIVE_ALT) )
+        {
+            moving = 3;
+            start.x = x;
+            start.y = y;
+        }
+    }
+    else if (state == GLUT_UP)
+    {
+        moving = 0;
+    }
+}
+
+
+// MOTION callback
+// ---------------
+void GLUT__motion( GLint x, GLint y )
+{
+    if (moving==1)
+    {
+        OPENGL_utils::translate(id, 0,0,0, rot1);
+
+        OPENGL_utils::rotateY(id,start.x-x,rot3);
+        OPENGL_utils::matXMat(rot,rot1,rot2);
+        OPENGL_utils::rotateX(id,start.y-y,rot1);
+        OPENGL_utils::matXMat(rot2,rot1,rot);
+        OPENGL_utils::matXMat(rot,rot3,rot2);
+
+        OPENGL_utils::translate(id, 0,0,0, rot1);
+        OPENGL_utils::matXMat(rot2,rot1,rot);
+
+        start.x = x;
+        start.y = y;
+    }
+
+    else if (moving==2)
+    {
+        zoom = zoom + (y-start.y)/2.0;
+        start.y = y;
+    }
+
+    else if (moving==3)
+    {
+        translation.x = translation.x - (start.x-x)/3.0;
+        translation.y = translation.y + (start.y-y)/3.0;
+        start.x = x;
+        start.y = y;
+    }
+
+    glutPostRedisplay();
+}
+
+
+// DISPLAY callback
+// ----------------
+void GLUT__display( void )
+{
+    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+
+    glPushMatrix();
+    glTranslatef(translation.x, translation.y, -zoom); // mouse translation + zoom
+    glMultMatrixf(rot); // mouse rotation    
+    glTranslatef( -pixdim.x*dim.x/2.0, -pixdim.y*dim.y/2.0, -pixdim.z*dim.z/2.0 ); // center the FOV
+    glScalef( pixdim.x, pixdim.y, pixdim.z ); // account for voxel size
+
+    glEnable(GL_MULTISAMPLE_ARB);
+
+    /* ============= */
+    /* Draw the AXES */
+    /* ============= */
+    if ( showAxes )
+    {
+        glLineWidth(2);
+        glBegin(GL_LINES);
+            glColor4f( 1,0,0,1); glVertex3f( 0,0,0 ); glVertex3f( 10,  0,  0 );
+            glColor4f( 0,1,0,1); glVertex3f( 0,0,0 ); glVertex3f(  0, 10,  0 );
+            glColor4f( 0,0,1,1); glVertex3f( 0,0,0 ); glVertex3f(  0,  0, 10 );
+        glEnd();
+    }
+
+    /* =============== */
+    /* Draw the TRACTS */
+    /* =============== */
+    if ( TRK_show )
+    {
+        glPushMatrix();
+        glTranslatef(TRK_offset.x, TRK_offset.y, TRK_offset.z);
+
+        glLineWidth(1.0f);
+
+        float *ptr  = TRK_coords, *ptrc = TRK_colors;
+        VECTOR<float> Vc( VOXEL.x+0.5, VOXEL.y+0.5, VOXEL.z+0.5 ); // voxel center
+        float thr = 0.5*TRK_crop;
+        for(int f=0; f < TRK_nTractsPlotted; f++)
+        {
+            glBegin(GL_LINE_STRIP);
+            for(int i=0; i < TRK_nPoints[f]; i++)
+            {
+                // plot segment only if it's close to center of VOXEL
+                if (
+                      (
+                        TRK_crop_mode && (
+                        ( showPlane[0] && abs( (ptr[0]+TRK_offset.x) - Vc.x ) <= thr ) ||
+                        ( showPlane[1] && abs( (ptr[1]+TRK_offset.y) - Vc.y ) <= thr ) ||
+                        ( showPlane[2] && abs( (ptr[2]+TRK_offset.z) - Vc.z ) <= thr ) )
+                      )
+                      ||
+                      (
+                        !TRK_crop_mode && (
+                        ( abs( (ptr[0]+TRK_offset.x) - Vc.x ) <= thr ) &&
+                        ( abs( (ptr[1]+TRK_offset.y) - Vc.y ) <= thr ) &&
+                        ( abs( (ptr[2]+TRK_offset.z) - Vc.z ) <= thr ) )
+                      )
+                    )
+                {
+                    glColor3f(  ptrc[0], ptrc[1], ptrc[2] );
+                    glVertex3f( ptr[0],  ptr[1],  ptr[2]  );
+                }
+                else
+                {
+                    glEnd();
+                    glBegin(GL_LINE_STRIP);
+                }
+                ptr  += 3;
+                ptrc += 3;
+            }
+            glEnd();
+        }
+
+        glPopMatrix();
+    }
+
+    /* ============== */
+    /* Draw the PEAKS */
+    /* ============== */
+    if ( PEAKS_show || GLYPHS_show )
+    {
+        glDisable( GL_BLEND );
+        glLineWidth( LINE_width );
+        glPointSize( LINE_width );
+
+        glPushMatrix();
+        glTranslatef(.5,.5,.5);
+
+        Vec3Df dir, col;
+        int x,y,z,d,idx;
+        float norms[PEAKS_n], normMax, b0, w;
+
+        // plane YZ
+        if ( showPlane[0]  )
+        {
+            x = (int)VOXEL.x;
+            for(y=0; y<dim.y ;y++)
+            for(z=0; z<dim.z ;z++)
+            {
+                if ( PEAKS_show )
+                {
+                    normMax = 0;
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        norms[d] = dir.norm();
+                        if ( norms[d] > normMax )
+                            normMax = norms[d];
+                    }
+
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        if ( norms[d] < PEAKS_thr*normMax )
+                            continue;
+
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
+                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
+                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
+
+                        if ( PEAKS_doNormalize )
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        else
+                        {
+                            dir.x = col.x * norms[d] / normMax;
+                            dir.y = col.y * norms[d] / normMax;
+                            dir.z = col.z * norms[d] / normMax;
+                        }
+
+                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) ); 
+                        glBegin(GL_LINES);
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                        glEnd();
+                    }
+                }
+                if ( GLYPHS_show )
+                {
+                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
+                    if ( b0 > GLYPHS_b0_thr )
+                    {
+                        glBegin(GL_POINTS);
+                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
+                        {
+                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
+                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
+                            if ( GLYPHS_use_affine ) 
+                            {
+                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
+                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
+                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
+                                normMax = dir.norm();
+                                dir.x *= w / normMax;
+                                dir.y *= w / normMax;
+                                dir.z *= w / normMax;
+                            }
+                            else
+                            {
+                                dir.x = w * SCHEME_dirs[idx].x;
+                                dir.y = w * SCHEME_dirs[idx].y;
+                                dir.z = w * SCHEME_dirs[idx].z;
+                            }
+                            normMax = dir.norm();
+                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                        }
+                        glEnd();
+                    }
+                }
+            }
+        }
+
+        // plane XZ
+        if ( showPlane[1] )
+        {
+            y = (int)VOXEL.y;
+            for(x=0; x<dim.x ;x++)
+            for(z=0; z<dim.z ;z++)
+            {
+                if ( PEAKS_show )
+                {
+                    normMax = 0;
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        norms[d] = dir.norm();
+                        if ( norms[d] > normMax )
+                            normMax = norms[d];
+                    }
+
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        if ( norms[d] < normMax*PEAKS_thr )
+                            continue;
+
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
+                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
+                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
+
+                        if ( PEAKS_doNormalize )
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        else
+                        {
+                            dir.x = col.x * norms[d] / normMax;
+                            dir.y = col.y * norms[d] / normMax;
+                            dir.z = col.z * norms[d] / normMax;
+                        }
+
+                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) );
+                        glBegin(GL_LINES);
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                        glEnd();
+                    }
+                }
+
+                if ( GLYPHS_show )
+                {
+                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
+                    if ( b0 > GLYPHS_b0_thr )
+                    {
+                        glBegin(GL_POINTS);
+                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
+                        {
+                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
+                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
+                            if ( GLYPHS_use_affine ) 
+                            {
+                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
+                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
+                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
+                                normMax = dir.norm();
+                                dir.x *= w / normMax;
+                                dir.y *= w / normMax;
+                                dir.z *= w / normMax;
+                            }
+                            else
+                            {
+                                dir.x = w * SCHEME_dirs[idx].x;
+                                dir.y = w * SCHEME_dirs[idx].y;
+                                dir.z = w * SCHEME_dirs[idx].z;
+                            }
+                            normMax = dir.norm();
+                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                        }
+                        glEnd();
+                    }
+                }
+            }
+        }
+
+        // plane XY
+        if ( showPlane[2] )
+        {
+            z = (int)VOXEL.z;
+            for(y=0; y<dim.y ;y++)
+            for(x=0; x<dim.x ;x++)
+            {
+                if ( PEAKS_show )
+                {
+                    normMax = 0;
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        norms[d] = dir.norm();
+                        if ( norms[d] > normMax )
+                            normMax = norms[d];
+                    }
+
+                    for(d=0; d<PEAKS_n; d++)
+                    {
+                        if ( norms[d] < normMax*PEAKS_thr )
+                            continue;
+
+                        col.x = (*niiPEAKS->img)(x,y,z,3*d+0); // use "col" as tmp variable
+                        col.y = (*niiPEAKS->img)(x,y,z,3*d+1);
+                        col.z = (*niiPEAKS->img)(x,y,z,3*d+2);
+                        if ( PEAKS_use_affine )
+                        {
+                            dir.x = col.x * ((float*)PEAKS_affine)[0] + col.y * ((float*)PEAKS_affine)[1] + col.z * ((float*)PEAKS_affine)[2];
+                            dir.y = col.x * ((float*)PEAKS_affine)[3] + col.y * ((float*)PEAKS_affine)[4] + col.z * ((float*)PEAKS_affine)[5];
+                            dir.z = col.x * ((float*)PEAKS_affine)[6] + col.y * ((float*)PEAKS_affine)[7] + col.z * ((float*)PEAKS_affine)[8];
+                        }
+                        else
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        col.x = 0.5 * (PEAKS_flip[0]?-1:1) * dir.x / norms[d];
+                        col.y = 0.5 * (PEAKS_flip[1]?-1:1) * dir.y / norms[d];
+                        col.z = 0.5 * (PEAKS_flip[2]?-1:1) * dir.z / norms[d];
+
+                        if ( PEAKS_doNormalize )
+                        {
+                            dir.x = col.x;
+                            dir.y = col.y;
+                            dir.z = col.z;
+                        }
+                        else
+                        {
+                            dir.x = col.x * norms[d] / normMax;
+                            dir.y = col.y * norms[d] / normMax;
+                            dir.z = col.z * norms[d] / normMax;
+                        }
+
+                        glColor3f( fabs(2.0*col.x), fabs(2.0*col.y), fabs(2.0*col.z) );
+                        glBegin(GL_LINES);
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                        glEnd();
+                    }
+                }
+
+                if( GLYPHS_show)
+                {
+                    b0 = (*niiDWI->img)(x,y,z,SCHEME_idxB0[0]);
+                    if ( b0 > GLYPHS_b0_thr )
+                    {
+                        glBegin(GL_POINTS);
+                        for(d=0; d < SCHEME_shells_idx[GLYPHS_shell].size() ;d++)
+                        {
+                            idx = SCHEME_shells_idx[GLYPHS_shell][d];
+                            w = 0.5 * (float)(*niiDWI->img)(x,y,z,idx) / b0;
+                            if ( GLYPHS_use_affine ) 
+                            {
+                                dir.x = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[0] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[1] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[2];
+                                dir.y = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[3] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[4] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[5];
+                                dir.z = SCHEME_dirs[idx].x * ((float*)GLYPHS_affine)[6] + SCHEME_dirs[idx].y * ((float*)GLYPHS_affine)[7] + SCHEME_dirs[idx].z * ((float*)GLYPHS_affine)[8];
+                                normMax = dir.norm();
+                                dir.x *= w / normMax;
+                                dir.y *= w / normMax;
+                                dir.z *= w / normMax;
+                            }
+                            else
+                            {
+                                dir.x = w * SCHEME_dirs[idx].x;
+                                dir.y = w * SCHEME_dirs[idx].y;
+                                dir.z = w * SCHEME_dirs[idx].z;
+                            }
+
+                            normMax = dir.norm();
+                            glColor3f( fabs(dir.x)/normMax, fabs(dir.y)/normMax, fabs(dir.z)/normMax );
+                            glVertex3f( x+dir.x, y+dir.y, z+dir.z );
+                            glVertex3f( x-dir.x, y-dir.y, z-dir.z );
+                        }
+                        glEnd();
+                    }
+                }
+            }
+        }
+
+        glPopMatrix();
+    }
+
+    /* =================== */
+    /* Draw the SCALAR MAP */
+    /* =================== */
+    if ( showPlane[0] || showPlane[1] || showPlane[2] )
+    {
+        glDisable( GL_CULL_FACE );
+        glEnable( GL_BLEND );
+        glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+
+        // to avoid z-fighting
+        glPolygonOffset( 1.0, 1.0 );
+        glEnable(GL_POLYGON_OFFSET_FILL);
+        glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+
+        glLineWidth( 3 );
+
+        int x, y, z; // voxel coordinates NB: (0,0,0) -> corner of voxel
+        float color;
+
+        // plane YZ
+        if ( showPlane[0]  )
+        {
+            glPushMatrix();
+            glTranslatef(0.5,0,0);
+
+            x = (int)VOXEL.x;
+            for(y=0; y<dim.y ;y++)
+            for(z=0; z<dim.z ;z++)
+            {
+                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
+                glColor4f(color,color,color,MAP_opacity);
+                glBegin(GL_QUADS);
+                    glVertex3f(x, y,   z);
+                    glVertex3f(x, y,   z+1);
+                    glVertex3f(x, y+1, z+1);
+                    glVertex3f(x, y+1, z);
+                glEnd();
+            }
+            // colored frame
+            if ( showAxes )
+            {
+                glColor3f(1,0,0);
+                glBegin(GL_LINE_STRIP);
+                    glVertex3f(x,0,0);
+                    glVertex3f(x,dim.y,0);
+                    glVertex3f(x,dim.y,dim.z);
+                    glVertex3f(x,0,dim.z);
+                    glVertex3f(x,0,0);
+                glEnd();
+            }
+
+            glPopMatrix();
+        }
+
+        // plane XZ
+        if ( showPlane[1] )
+        {
+            glPushMatrix();
+            glTranslatef(0,0.5,0);
+
+            y = (int)VOXEL.y;
+            for(x=0; x<dim.x ;x++)
+            for(z=0; z<dim.z ;z++)
+            {
+                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
+                glColor4f(color,color,color,MAP_opacity);
+                glBegin(GL_QUADS);
+                    glVertex3f(x,   y, z);
+                    glVertex3f(x,   y, z+1);
+                    glVertex3f(x+1, y, z+1);
+                    glVertex3f(x+1, y, z);
+                glEnd();
+            }
+            // colored frame
+            if ( showAxes )
+            {
+                glColor3f(0,1,0);
+                glBegin(GL_LINE_STRIP);
+                    glVertex3f(0,y,0);
+                    glVertex3f(dim.x,y,0);
+                    glVertex3f(dim.x,y,dim.z);
+                    glVertex3f(0,y,dim.z);
+                    glVertex3f(0,y,0);
+                glEnd();
+            }
+
+            glPopMatrix();
+        }
+
+        // plane XY
+        if ( showPlane[2] )
+        {
+            glPushMatrix();
+            glTranslatef(0,0,0.5);
+
+            z = (int)VOXEL.z;
+            for(y=0; y<dim.y ;y++)
+            for(x=0; x<dim.x ;x++)
+            {
+                color = ( MAP(x,y,z) - MAP_min_view) / ( MAP_max_view - MAP_min_view );
+                glColor4f(color,color,color,MAP_opacity);
+                glBegin(GL_QUADS);
+                    glVertex3f(x,   y,   z);
+                    glVertex3f(x+1, y,   z);
+                    glVertex3f(x+1, y+1, z);
+                    glVertex3f(x,   y+1, z);
+                glEnd();
+            }
+
+            // colored frame
+            if ( showAxes )
+            {
+                glColor3f(0,0,1);
+                glBegin(GL_LINE_STRIP);
+                    glVertex3f(0,0,z);
+                    glVertex3f(dim.x,0,z);
+                    glVertex3f(dim.x,dim.y,z);
+                    glVertex3f(0,dim.y,z);
+                    glVertex3f(0,0,z);
+                glEnd();
+            }
+
+            glPopMatrix();
+        }
+
+        glEnable(GL_CULL_FACE);
+        glDisable( GL_BLEND );
+        glDisable(GL_POLYGON_OFFSET_FILL);
+    }
+
+    /* ====================== */
+    /* Draw the CURRENT VOXEL */
+    /* ====================== */
+    if ( showAxes )
+    {
+        glPushMatrix();
+        glTranslatef( VOXEL.x+0.5, VOXEL.y+0.5, VOXEL.z+0.5 );
+
+        glEnable( GL_BLEND );
+        glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+        glLineWidth(1);
+        glColor4f( 1,1,0,1 );
+        glutWireCube( 1 );
+        glColor4f( 1,1,0,0.25 );
+        glutSolidCube( 1 );
+        glDisable( GL_BLEND );
+
+        glPopMatrix();
+    }
+
+    glPopMatrix();
+    PrintConfig();
+    glutSwapBuffers();
+}
+
+
+// INITIALIZATION
+// --------------
+void OpenGL_init( int argc, char** argv )
+{
+    glutInit( &argc, argv );
+    glutInitDisplayMode( GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA | GLUT_ALPHA | GLUT_MULTISAMPLE );
+    ScreenX = 0.7*glutGet(GLUT_SCREEN_WIDTH);  if (ScreenX==0) ScreenX = 800;
+    ScreenY = 0.7*glutGet(GLUT_SCREEN_HEIGHT); if (ScreenY==0) ScreenY = 600;
+    glutInitWindowSize( ScreenX, ScreenY );
+    glutInitWindowPosition( 0.15*glutGet(GLUT_SCREEN_WIDTH), 0.15*glutGet(GLUT_SCREEN_HEIGHT) );
+    glutCreateWindow( "COMMIT debugger" );
+
+    // Projection and model matrix
+    glMatrixMode(GL_PROJECTION);
+    glLoadIdentity();
+    glMatrixMode(GL_MODELVIEW);
+    glLoadIdentity();
+
+    translation.x	= translation.y = 0;
+    zoom			= 0;
+    OPENGL_utils::identity( rot );
+    OPENGL_utils::identity( id );
+
+    glEnable( GL_DEPTH_TEST );
+    glClearColor( 0.1, 0.1, 0.1, 0.0 );
+
+    // lighting
+    glShadeModel( GL_SMOOTH );
+    glEnable( GL_NORMALIZE );
+
+    GLfloat white[] = {.5f, .5f, .5f, 1.0f};
+    glMaterialfv(GL_FRONT, GL_SPECULAR, white);
+    GLfloat shininess[] = {32};
+    glMaterialfv(GL_FRONT, GL_SHININESS, shininess);
+
+    glLightModeli(GL_LIGHT_MODEL_TWO_SIDE, GL_FALSE);
+    GLfloat global_ambient[] = { 0.2f, 0.2f, 0.2f, 1.0f };
+    glLightModelfv(GL_LIGHT_MODEL_AMBIENT, global_ambient);
+    glEnable ( GL_COLOR_MATERIAL );
+
+    // register CALLBACKS and open window
+    glutKeyboardFunc( GLUT__keyboard );
+    glutSpecialFunc(  GLUT__specialkey );
+    glutDisplayFunc(  GLUT__display );
+    glutReshapeFunc(  GLUT__reshape );
+    glutMouseFunc(    GLUT__mouse );
+    glutMotionFunc(   GLUT__motion );
+
+    GLUT__createMenu();
+
+    glutMainLoop();
+}
diff --git a/extras/COMMIT_debugger/OPENGL_utils.h b/extras/COMMIT_debugger/OPENGL_utils.h
index cd3f3607..a9390517 100755
--- a/extras/COMMIT_debugger/OPENGL_utils.h
+++ b/extras/COMMIT_debugger/OPENGL_utils.h
@@ -1,95 +1,95 @@
-#ifndef __OPENGL_UTILS_H__
-#define __OPENGL_UTILS_H__
-
-#include <algorithm>
-
-#include "VECTOR.h"
-typedef VECTOR<GLint>		Vec3Di;
-typedef VECTOR<GLfloat>		Vec3Df;
-
-
-namespace OPENGL_utils
-{
-
-void identity(GLfloat* result)
-{
-    for (int i=0; i<4; i++)
-    for (int j=0; j<4; j++)
-        if (i==j) result[4*i+j]=1; else result[4*i+j]=0;
-}
-
-
-void matXMat(GLfloat* m, GLfloat* m1, GLfloat* result)
-{
-    for (int i=0; i<4; i++)
-    for (int j=0; j<4; j++)
-    {
-        result[4*i+j]=0;
-        for (int t=0; t<4; t++)
-            result[4*i+j]=result[4*i+j]+m[4*i+t]*m1[4*t+j];
-    }
-}
-
-
-void rotateZ(GLfloat* m, GLfloat ang, GLfloat* result)
-{
-    static GLfloat matrix[16];
-
-    for (int i=0; i<16 ; i++) matrix[i] = 0;
-    matrix[0]  = cos(ang/180*3.1415);
-    matrix[5]  = cos(ang/180*3.1415);
-    matrix[1]  = -sin(ang/180*3.1415);
-    matrix[4]  = sin(ang/180*3.1415);
-    matrix[10] = 1;
-    matrix[15] = 1;
-    matXMat(matrix,m,result);
-}
-
-
-void rotateY(GLfloat* m, GLfloat ang, GLfloat* result)
-{
-    static GLfloat matrix[16];
-
-    for (int i=0; i<16 ; i++) matrix[i] = 0;
-    matrix[0]  = cos(ang/180*3.1415);
-    matrix[10] = cos(ang/180*3.1415);
-    matrix[8]  = -sin(ang/180*3.1415);
-    matrix[2]  = sin(ang/180*3.1415);
-    matrix[5]  = 1;
-    matrix[15] = 1;
-    matXMat(matrix,m,result);
-}
-
-
-void rotateX(GLfloat* m, GLfloat ang, GLfloat* result)
-{
-    static GLfloat matrix[16];
-
-    for (int i=0; i<16 ; i++) matrix[i] = 0;
-    matrix[5]  = cos(ang/180*3.1415);
-    matrix[10] = cos(ang/180*3.1415);
-    matrix[6]  = -sin(ang/180*3.1415);
-    matrix[9]  = sin(ang/180*3.1415);
-    matrix[0]  = 1;
-    matrix[15] = 1;
-    matXMat(matrix,m,result);
-}
-
-
-void translate(GLfloat* m, GLfloat x,GLfloat y,GLfloat z, GLfloat* result)
-{
-    static GLfloat matrix[16];
-
-    for (int i=0; i<16 ; i++) matrix[i] = 0;
-    matrix[0]  = 1;
-    matrix[5]  = 1;
-    matrix[10] = 1;
-    matrix[15] = 1;
-    matrix[12] = x;
-    matrix[13] = y;
-    matrix[14] = z;
-    matXMat(matrix,m,result);
-}
-
-}
-#endif
+#ifndef __OPENGL_UTILS_H__
+#define __OPENGL_UTILS_H__
+
+#include <algorithm>
+
+#include "VECTOR.h"
+typedef VECTOR<GLint>		Vec3Di;
+typedef VECTOR<GLfloat>		Vec3Df;
+
+
+namespace OPENGL_utils
+{
+
+void identity(GLfloat* result)
+{
+    for (int i=0; i<4; i++)
+    for (int j=0; j<4; j++)
+        if (i==j) result[4*i+j]=1; else result[4*i+j]=0;
+}
+
+
+void matXMat(GLfloat* m, GLfloat* m1, GLfloat* result)
+{
+    for (int i=0; i<4; i++)
+    for (int j=0; j<4; j++)
+    {
+        result[4*i+j]=0;
+        for (int t=0; t<4; t++)
+            result[4*i+j]=result[4*i+j]+m[4*i+t]*m1[4*t+j];
+    }
+}
+
+
+void rotateZ(GLfloat* m, GLfloat ang, GLfloat* result)
+{
+    static GLfloat matrix[16];
+
+    for (int i=0; i<16 ; i++) matrix[i] = 0;
+    matrix[0]  = cos(ang/180*3.1415);
+    matrix[5]  = cos(ang/180*3.1415);
+    matrix[1]  = -sin(ang/180*3.1415);
+    matrix[4]  = sin(ang/180*3.1415);
+    matrix[10] = 1;
+    matrix[15] = 1;
+    matXMat(matrix,m,result);
+}
+
+
+void rotateY(GLfloat* m, GLfloat ang, GLfloat* result)
+{
+    static GLfloat matrix[16];
+
+    for (int i=0; i<16 ; i++) matrix[i] = 0;
+    matrix[0]  = cos(ang/180*3.1415);
+    matrix[10] = cos(ang/180*3.1415);
+    matrix[8]  = -sin(ang/180*3.1415);
+    matrix[2]  = sin(ang/180*3.1415);
+    matrix[5]  = 1;
+    matrix[15] = 1;
+    matXMat(matrix,m,result);
+}
+
+
+void rotateX(GLfloat* m, GLfloat ang, GLfloat* result)
+{
+    static GLfloat matrix[16];
+
+    for (int i=0; i<16 ; i++) matrix[i] = 0;
+    matrix[5]  = cos(ang/180*3.1415);
+    matrix[10] = cos(ang/180*3.1415);
+    matrix[6]  = -sin(ang/180*3.1415);
+    matrix[9]  = sin(ang/180*3.1415);
+    matrix[0]  = 1;
+    matrix[15] = 1;
+    matXMat(matrix,m,result);
+}
+
+
+void translate(GLfloat* m, GLfloat x,GLfloat y,GLfloat z, GLfloat* result)
+{
+    static GLfloat matrix[16];
+
+    for (int i=0; i<16 ; i++) matrix[i] = 0;
+    matrix[0]  = 1;
+    matrix[5]  = 1;
+    matrix[10] = 1;
+    matrix[15] = 1;
+    matrix[12] = x;
+    matrix[13] = y;
+    matrix[14] = z;
+    matXMat(matrix,m,result);
+}
+
+}
+#endif
diff --git a/extras/COMMIT_debugger/main.cxx b/extras/COMMIT_debugger/main.cxx
index 7d4eb823..2a4e2e62 100755
--- a/extras/COMMIT_debugger/main.cxx
+++ b/extras/COMMIT_debugger/main.cxx
@@ -1,652 +1,652 @@
-#include <NIFTI.h>
-#include <nifti1_io.h>
-#include <COLOR_ui.h>
-#include <TrackVis.h>
-#include <VECTOR.h>
-#include <cmath>
-#include <regex>
-#include <cstdlib>
-#include "tclap/CmdLine.h"
-#include <blitz/array.h>
-using namespace std;
-
-#include "colormaps.h"
-
-NIFTI*                   niiDWI;
-VECTOR<int>		         dim;
-VECTOR<float>	         pixdim;
-
-int                      SCHEME_version;
-vector< VECTOR<float> >	 SCHEME_dirs;
-vector<float>	         SCHEME_b;
-vector<int>              SCHEME_idxB0;
-vector<int>              SCHEME_idxDWI;
-vector<float>	         SCHEME_shells_b;
-vector< vector<int> >    SCHEME_shells_idx;
-
-blitz::Array<float,3>    MAP;
-VECTOR<int>		         VOXEL;
-float                    MAP_min, MAP_min_view, MAP_max, MAP_max_view;
-float 			         MAP_opacity = 0.5;
-bool			         showPlane[3] = { true, true, true };
-bool                     showAxes = true;
-bool			         showConfig = true;
-float				     LINE_width = 2.0;
-
-NIFTI*                   niiPEAKS;
-int				         PEAKS_n;
-bool			         PEAKS_show = false;
-float			         PEAKS_thr = 0.0;
-bool			         PEAKS_doNormalize = false;
-bool			         PEAKS_flip[3] = {false, false, false};
-bool			         PEAKS_use_affine = false;
-float                    PEAKS_affine[3][3];
-
-TrackVis 		         TRK_file;
-int				         TRK_skip;
-int				         TRK_nTractsPlotted;
-int*   			         TRK_nPoints;
-float*			         TRK_coords;
-float*			         TRK_colors;
-float 			         TRK_crop = 1.0;
-bool 			         TRK_crop_mode = true;
-bool 			         TRK_show = false;
-VECTOR<float> 	         TRK_offset;
-
-bool 			         GLYPHS_show = false;
-int                      GLYPHS_shell = 0;
-bool			         GLYPHS_flip[3] = {false, false, false};
-float	                 GLYPHS_b0_thr = 50.0;
-bool			         GLYPHS_use_affine = false;
-float                    GLYPHS_affine[3][3];
-
-#include "OPENGL_callbacks.cxx"
-
-
-/*----------------------------------------------------------------------------------------------------------------------------------*/
-int main(int argc, char** argv)
-{
-    TCLAP::CmdLine cmd("This tool allows one to display in a common 3D space all the objects (DWI data, streamlines etc...) used by COMMIT in order to spot possible incosistencies between the conventions of COMMIT and the software that generated the data, e.g. flip in some axes in the DWI data or in the peaks, spatial shift in the streamlines, whether the affine transformation was already applied to the data etc..", ' ', "1.2");
-
-    TCLAP::UnlabeledValueArg<string> argDWI(    "dwi","Filename of the DWI dataset [4D NIFTI]", true, "", "DWI", cmd );
-    TCLAP::ValueArg<string>          argMAP(    "m", "map", "Background map [3D NIFTI]", false, "", "map", cmd );
-    TCLAP::ValueArg<string>          argPEAKS(  "p", "peaks", "Main diffusion directions for the extra-axonal part in each voxel [4D NIFTI]", false, "", "peaks", cmd );
-    TCLAP::ValueArg<string>          argTRK(    "f", "fibers", "Streamlines for the intra-axonal part [.TRK format]", false, "", "fibers", cmd );
-    TCLAP::UnlabeledValueArg<string> argSCHEME( "scheme","Acquisition scheme [text]", true, "", "scheme", cmd );
-
-    try	{ cmd.parse( argc, argv ); }
-    catch (TCLAP::ArgException &e) { cerr << "error: " << e.error() << " for arg " << e.argId() << endl; }
-
-    string DWI_filename( argDWI.getValue() );
-    string SCHEME_filename( argSCHEME.getValue() );
-    string PEAKS_filename( argPEAKS.getValue() );
-    string TRK_filename( argTRK.getValue() );
-    string MAP_filename( argMAP.getValue() );
-
-
-    // ===================
-    // Reading DWI dataset
-    // ===================
-    COLOR_msg( "-> Reading 'DWI' dataset:", "\n" );
-
-    niiDWI = new NIFTI;
-    niiDWI->open( DWI_filename, true );
-    if ( !niiDWI->isValid() )
-    {
-        COLOR_error( "Unable to open file", "\t" );
-        return EXIT_FAILURE;
-    }
-    dim.x = niiDWI->hdr->dim[1];
-    dim.y = niiDWI->hdr->dim[2];
-    dim.z = niiDWI->hdr->dim[3];
-    pixdim.x = niiDWI->hdr->pixdim[1];
-    pixdim.y = niiDWI->hdr->pixdim[2];
-    pixdim.z = niiDWI->hdr->pixdim[3];
-    printf( "\tdim    : %d x %d x %d x %d\n", dim.x, dim.y, dim.z, niiDWI->hdr->dim[4] );
-    printf( "\tpixdim : %.4f x %.4f x %.4f\n", 	pixdim.x, pixdim.y, pixdim.z );
-    printf( "\tqform  : %d\n", niiDWI->hdr->qform_code );
-    mat44 DWI_qform = niiDWI->hdr->qto_xyz;
-    if ( niiDWI->hdr->qform_code > 0 )
-    {
-        for(int i=0; i<3 ;i++)
-        {
-            printf( "\t\t| " );
-            for(int j=0; j<4 ;j++)
-                printf( "%9.4f ", DWI_qform.m[i][j] );
-            printf( "|\n" );
-        }
-    }
-    else
-    {
-        COLOR_warning( "This should never happen!", "\t\t" );
-    }
-    printf( "\tsform  : %d\n", niiDWI->hdr->sform_code );
-    mat44 DWI_sform = niiDWI->hdr->sto_xyz;
-    if ( niiDWI->hdr->sform_code > 0 )
-    {
-        for(int i=0; i<3 ;i++)
-        {
-            printf( "\t\t| " );
-            for(int j=0; j<4 ;j++)
-                printf( "%9.4f ", DWI_sform.m[i][j] );
-            printf( "|\n" );
-        }
-    }
-
-    // Read the affine matrix to rotate the vectors
-    // NB: we need the inverse, but in this case inv=transpose
-    if ( niiDWI->hdr->qform_code != 0 )
-    {
-        for(int i=0; i<3 ;i++)
-        for(int j=0; j<3 ;j++)
-            GLYPHS_affine[i][j] = DWI_qform.m[j][i];
-    }
-    else if ( niiDWI->hdr->sform_code != 0 )
-    {
-        for(int i=0; i<3 ;i++)
-        for(int j=0; j<3 ;j++)
-            GLYPHS_affine[i][j] = DWI_sform.m[j][i];
-    }
-    else {
-        for(int i=0; i<3 ;i++)
-        for(int j=0; j<3 ;j++)
-            GLYPHS_affine[i][j] = 0;
-        for(int i=0; i<3 ;i++)
-            GLYPHS_affine[i][i] = 1;
-    }
-
-    mat33 tmp;
-    for(int i=0; i<3 ;i++)
-        for(int j=0; j<3 ;j++)
-            tmp.m[i][j] = GLYPHS_affine[i][j];
-    printf( "\tAffine used (%s):\n", nifti_mat33_determ(tmp)<0?"RADIOLOGICAL":"NEUROLOGICAL" );
-    for(int i=0; i<3 ;i++)
-    {
-        printf( "\t\t| " );
-        for(int j=0; j<3 ;j++)
-            printf( "%9.4f ", GLYPHS_affine[i][j] );
-        printf( "|\n" );
-    }
-
-    COLOR_msg( "   [OK]" );
-
-
-    // ===================
-    // Reading SCHEME file
-    // ===================
-    COLOR_msg( "-> Reading 'SCHEME' file:", "\n" );
-
-    char line[1000];
-    FILE* pFile = fopen( SCHEME_filename.c_str(), "rt" );
-
-    // read the version
-    // ----------------
-    try
-    {
-        while( fgets(line, 1000, pFile) )
-            if ( line[0]!='#' )
-                break;
-
-        std::regex reVersion("^VERSION: (.*)\\s*$");
-        std::smatch reMatches;
-
-        std::string str_line = string(line);
-        if ( !std::regex_match(str_line, reMatches, reVersion) )
-        {
-            // no header found, assume standards BVECTOR format
-            SCHEME_version = 0;
-            fseek(pFile, -strlen(line), SEEK_CUR);
-        }
-        else
-        {
-            if( strcmp(reMatches[1].str().c_str(),"0")==0 || strcmp(reMatches[1].str().c_str(),"BVECTOR")==0 )
-                SCHEME_version = 0;
-            else if( strcmp(reMatches[1].str().c_str(),"1")==0 || strcmp(reMatches[1].str().c_str(),"STEJSKALTANNER")==0 )
-                SCHEME_version = 1;
-            else
-                throw "Version not recognized";
-        }
-    }
-    catch( const char* msg )
-    {
-        COLOR_error( msg, "\t" );
-        return EXIT_FAILURE;
-    }
-    printf( "\tversion   : %s\n", SCHEME_version==0?"BVECTOR":"STEJSKALTANNER" );
-
-    // read the data
-    // -------------
-    try
-    {
-        string      reFLOAT( "[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?" );
-        std::regex  reVERSION0( "^\\s*("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s*$" );
-        std::regex  reVERSION1( "^\\s*("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s*$" );
-        std::regex  reEMPTY( "^\\s*$" );
-        std::smatch reMatches;
-        int         Ns = 0;
-        float       x, y, z, b, G, D, d;
-        while( fgets(line, 1000, pFile) )
-        {
-            std::string str_line = string(line);
-            if( std::regex_match(str_line, reMatches, reEMPTY) )
-                continue;   // skip empty lines
-
-            if( SCHEME_version == 0 )
-            {
-                if ( !std::regex_match(str_line, reMatches, reVERSION0) )
-                    throw "Wrong row format";
-                x = std::atof( reMatches[1].str().c_str() );
-                y = std::atof( reMatches[2].str().c_str() );
-                z = std::atof( reMatches[3].str().c_str() );
-                b = std::atof( reMatches[4].str().c_str() ); // in mm^2/s
-                VECTOR<float> tmp( x, y, z );
-                tmp.Normalize();
-                SCHEME_dirs.push_back( tmp );
-                SCHEME_b.push_back( b );
-            }
-            else
-            {
-                if ( !std::regex_match(str_line, reMatches, reVERSION1) )
-                    throw "Wrong row format";
-                x = std::atof( reMatches[1].str().c_str() );
-                y = std::atof( reMatches[2].str().c_str() );
-                z = std::atof( reMatches[3].str().c_str() );
-                G = std::atof( reMatches[4].str().c_str() );
-                D = std::atof( reMatches[5].str().c_str() );
-                d = std::atof( reMatches[6].str().c_str() );
-                VECTOR<float> tmp( x, y, z );
-                tmp.Normalize();
-                SCHEME_dirs.push_back( tmp );
-                b = std::pow( 267.513e6*G*d, 2 ) * (D-d/3.0) * 1e-6; // in mm^2/s
-                SCHEME_b.push_back( b );
-            }
-
-            if ( b<5.0 )
-            {
-                SCHEME_idxB0.push_back( Ns );
-            }
-            else
-            {
-                SCHEME_idxDWI.push_back( Ns );
-                if ( std::find(SCHEME_shells_b.begin(), SCHEME_shells_b.end(), b) == SCHEME_shells_b.end() )
-                {
-                    SCHEME_shells_b.push_back( b ) ;
-                    vector<int> tmp;
-                    SCHEME_shells_idx.push_back( tmp ) ;
-                }
-            }
-            Ns++;
-        }
-    }
-    catch( const char* msg )
-    {
-        COLOR_error( msg, "\t" );
-        return EXIT_FAILURE;
-    }
-    fclose(pFile);
-
-    printf( "\tgradients : %d\n", SCHEME_b.size() );
-    if ( niiDWI->hdr->dim[4] != SCHEME_b.size() )
-    {
-        COLOR_error( "The scheme does not match the DWI dataset", "\t" );
-        return EXIT_FAILURE;
-    }
-
-    // fill data structure about the SCHEME
-    // ------------------------------------
-    for(int i=0; i < SCHEME_b.size() ;i++)
-    {
-        if ( SCHEME_b[i] < 5 )
-            continue;
-        int s = std::find( SCHEME_shells_b.begin(), SCHEME_shells_b.end(), SCHEME_b[i] ) - SCHEME_shells_b.begin();
-        SCHEME_shells_idx[s].push_back( i );
-    }
-
-    printf( "\tscheme    : %d b0 and %d shells (", SCHEME_idxB0.size(), SCHEME_shells_idx.size() );
-    for(int i=0; i < SCHEME_shells_b.size() ;i++)
-        printf( " [%d @ b=%.1f]", SCHEME_shells_idx[i].size(), SCHEME_shells_b[i] );
-    printf( " )\n" );
-
-    COLOR_msg( "   [OK]" );
-
-
-
-    // =======================
-    // Creating BACKGROUND map
-    // =======================
-    COLOR_msg( "-> Preparing 'BACKGROUND' map:", "\n" );
-    MAP.resize(dim.x,dim.y,dim.z);
-    if ( !MAP_filename.empty() )
-    {
-        printf( "\tdata   : reading from file\n" );
-        NIFTI* niiMAP = new NIFTI;
-        niiMAP->open( MAP_filename, true );
-        if ( !niiMAP->isValid() )
-        {
-            COLOR_error( "Unable to open the file", "\t" );
-            return EXIT_FAILURE;
-        }
-
-        printf( "\tdim    : %d x %d x %d x %d\n" , niiMAP->hdr->dim[1],    niiMAP->hdr->dim[2],    niiMAP->hdr->dim[3], niiMAP->hdr->dim[4] );
-        printf( "\tpixdim : %.4f x %.4f x %.4f\n", niiMAP->hdr->pixdim[1], niiMAP->hdr->pixdim[2], niiMAP->hdr->pixdim[3] );
-
-        if ( niiMAP->hdr->dim[1] != dim.x || niiMAP->hdr->dim[2] != dim.y || niiMAP->hdr->dim[3] != dim.z )
-        {
-            COLOR_error( "The DIMENSIONS do not match those of DWI images", "\t" );
-            return EXIT_FAILURE;
-        }
-        if ( abs(niiMAP->hdr->pixdim[1]-pixdim.x) > 1e-4 || abs(niiMAP->hdr->pixdim[2]-pixdim.y) > 1e-4 || abs(niiMAP->hdr->pixdim[3]-pixdim.z) > 1e-4 )
-        {
-            COLOR_warning( "The VOXEL SIZE does not match that of DWI images", "\t" );
-        }
-
-        FLOAT32 MIN = 0;//(*niiMAP->img)(0,0,0);
-        FLOAT32 MAX = MIN;
-
-        for(int i=0; i<dim.x ;i++)
-        for(int j=0; j<dim.y ;j++)
-        for(int k=0; k<dim.z ;k++)
-        {
-            MAP(i,j,k) = (*niiMAP->img)(i,j,k);
-            if ( MAP(i,j,k) > MAX )
-                MAX = MAP(i,j,k);
-            if ( MAP(i,j,k) < MIN )
-                MIN = MAP(i,j,k);
-        }
-        if ( MAX - MIN <= 0 )
-        {
-            COLOR_error( "The dynamic range is zero", "\t" );
-            return EXIT_FAILURE;
-        }
-        MAP_min	= MIN;
-        MAP_min_view = 0;
-        MAP_max	= MAP_max_view = MAX;
-
-        printf( "\tvalues : [%.2e ... %.2e]\n", MAP_min, MAP_max );
-        COLOR_msg( "   [OK]" );
-    }
-    else
-    {
-        printf( "\tdata   : " );
-
-        if ( SCHEME_idxB0.size() > 0 )
-        {
-            printf( "taking first b0 image\n" );
-            FLOAT32 MIN = (*niiDWI->img)(0,0,0,SCHEME_idxB0[0]);
-            FLOAT32 MAX = MIN;
-
-            for(int i=0; i<dim.x ;i++)
-            for(int j=0; j<dim.y ;j++)
-            for(int k=0; k<dim.z ;k++)
-            {
-                MAP(i,j,k) = (*niiDWI->img)(i,j,k,SCHEME_idxB0[0]);
-                if ( MAP(i,j,k) > MAX )
-                    MAX = MAP(i,j,k);
-                if ( MAP(i,j,k) < MIN )
-                    MIN = MAP(i,j,k);
-            }
-            if ( MAX - MIN <= 0 )
-            {
-                COLOR_error( "The dynamic range is zero", "\t" );
-                return EXIT_FAILURE;
-            }
-            MAP_min	= MIN;
-            MAP_min_view = 0;
-            MAP_max	= MAP_max_view = MAX;
-        }
-        else
-        {
-            printf( "no b0 found\n" );
-            MAP = 0;
-            MAP_min	= MAP_min_view = 0;
-            MAP_max	= MAP_max_view = 1;
-        }
-        printf( "\tvalues : [%.2e ... %.2e]\n", MAP_min, MAP_max );
-        COLOR_msg( "   [OK]" );
-    }
-
-
-    // ==================
-    // Reading PEAKS file
-    // ==================
-    COLOR_msg( "-> Reading 'PEAKS' dataset:", "\n" );
-
-    if ( !PEAKS_filename.empty() )
-    {
-        niiPEAKS = new NIFTI;
-        niiPEAKS->open( PEAKS_filename, true );
-        if ( !niiPEAKS->isValid() )
-        {
-            COLOR_error( "Unable to open the file", "\t" );
-            return false;
-        }
-
-        if ( niiPEAKS->hdr->dim[0] != 4 || niiPEAKS->hdr->dim[4]%3 != 0 )
-        {
-            COLOR_error( "The size must be (*,*,*,3*k)", "\t" );
-            return EXIT_FAILURE;
-        }
-        PEAKS_n = niiPEAKS->hdr->dim[4]/3;
-
-        printf( "\tdim     : %d x %d x %d (%d peaks per voxel)\n" , niiPEAKS->hdr->dim[1], niiPEAKS->hdr->dim[2], niiPEAKS->hdr->dim[3], PEAKS_n );
-        printf( "\tpixdim  : %.4f x %.4f x %.4f\n", niiPEAKS->hdr->pixdim[1], niiPEAKS->hdr->pixdim[2], niiPEAKS->hdr->pixdim[3] );
-
-        printf( "\tqform   : %d\n", niiPEAKS->hdr->qform_code );
-        mat44 PEAKS_qform = niiPEAKS->hdr->qto_xyz;
-        if ( niiPEAKS->hdr->qform_code > 0 )
-        {
-            for(int i=0; i<3 ;i++)
-            {
-                printf( "\t\t| " );
-                for(int j=0; j<4 ;j++)
-                    printf( "%9.4f ", PEAKS_qform.m[i][j] );
-                printf( "|\n" );
-            }
-        }
-        else
-        {
-            COLOR_warning( "This should never happen!", "\t\t" );
-        }
-
-        printf( "\tsform  : %d\n", niiPEAKS->hdr->sform_code );
-        mat44 PEAKS_sform = niiPEAKS->hdr->sto_xyz;
-        if ( niiPEAKS->hdr->sform_code > 0 )
-        {
-            for(int i=0; i<3 ;i++)
-            {
-                printf( "\t\t| " );
-                for(int j=0; j<4 ;j++)
-                    printf( "%9.4f ", PEAKS_sform.m[i][j] );
-                printf( "|\n" );
-            }
-        }
-
-        if ( niiPEAKS->hdr->dim[1] != dim.x || niiPEAKS->hdr->dim[2] != dim.y || niiPEAKS->hdr->dim[3] != dim.z )
-        {
-            COLOR_error( "The DIMENSIONS do not match those of DWI images", "\t" );
-            return EXIT_FAILURE;
-        }
-        if ( abs(niiPEAKS->hdr->pixdim[1]-pixdim.x) > 1e-3 || abs(niiPEAKS->hdr->pixdim[2]-pixdim.y) > 1e-3 || abs(niiPEAKS->hdr->pixdim[3]-pixdim.z) > 1e-3 )
-        {
-            COLOR_warning( "The VOXEL SIZE does not match that of DWI images", "\t" );
-        }
-        if (
-            niiPEAKS->hdr->sform_code != niiDWI->hdr->sform_code || niiPEAKS->hdr->qform_code != niiDWI->hdr->qform_code || niiPEAKS->hdr->pixdim[0] != niiDWI->hdr->pixdim[0] ||
-            niiPEAKS->hdr->quatern_b != niiDWI->hdr->quatern_b || niiPEAKS->hdr->quatern_c != niiDWI->hdr->quatern_c || niiPEAKS->hdr->quatern_d != niiDWI->hdr->quatern_d ||
-            niiPEAKS->hdr->qoffset_x != niiDWI->hdr->qoffset_x || niiPEAKS->hdr->qoffset_y != niiDWI->hdr->qoffset_y || niiPEAKS->hdr->qoffset_z != niiDWI->hdr->qoffset_z
-        )
-        {
-            COLOR_warning( "The GEOMETRY does not match that of DWI images", "\t" );
-        }
-
-        // Read the affine matrix to rotate the vectors
-        // NB: we need the inverse, but in this case inv=transpose
-        if ( niiPEAKS->hdr->qform_code != 0 )
-        {
-            for(int i=0; i<3 ;i++)
-            for(int j=0; j<3 ;j++)
-                PEAKS_affine[i][j] = PEAKS_qform.m[j][i];
-        }
-        else if ( niiPEAKS->hdr->sform_code != 0 )
-        {
-            for(int i=0; i<3 ;i++)
-            for(int j=0; j<3 ;j++)
-                PEAKS_affine[i][j] = PEAKS_sform.m[j][i];
-        }
-        else {
-            for(int i=0; i<3 ;i++)
-            for(int j=0; j<3 ;j++)
-                PEAKS_affine[i][j] = 0;
-            for(int i=0; i<3 ;i++)
-                PEAKS_affine[i][i] = 1;
-        }
-
-        printf( "\tAffine used :\n" );
-        for(int i=0; i<3 ;i++)
-        {
-            printf( "\t\t| " );
-            for(int j=0; j<3 ;j++)
-                printf( "%9.4f ", PEAKS_affine[i][j] );
-            printf( "|\n" );
-        }
-
-        COLOR_msg( "   [OK]" );
-    }
-    else {
-        // no peaks are passed and won't be showed
-        COLOR_msg( "   [no peaks specified]" );
-        PEAKS_n = 0;
-    }
-
-
-    // ===================
-    // Reading TRACTS file
-    // ===================
-    COLOR_msg( "-> Reading 'TRACTOGRAM':", "\n" );
-
-    if ( !TRK_filename.empty() )
-    {
-        TRK_file = TrackVis();
-        if ( !TRK_file.open( TRK_filename ) )
-        {
-            COLOR_error( "Unable to open the file", "\t" );
-            return false;
-        }
-
-        printf("\tcount      : %d\n" , TRK_file.hdr.n_count );
-        printf("\tdim        : %d x %d x %d\n" , TRK_file.hdr.dim[0], TRK_file.hdr.dim[1], TRK_file.hdr.dim[2] );
-        printf("\tpixdim     : %.4f x %.4f x %.4f\n", TRK_file.hdr.voxel_size[0], TRK_file.hdr.voxel_size[1], TRK_file.hdr.voxel_size[2] );
-        printf("\tscalars    : %d\n" , TRK_file.hdr.n_scalars );
-        printf("\tproperties : %d\n" , TRK_file.hdr.n_properties );
-
-        if ( TRK_file.hdr.dim[0] != dim.x || TRK_file.hdr.dim[1] != dim.y || TRK_file.hdr.dim[2] != dim.z ||
-             abs(TRK_file.hdr.voxel_size[0]-pixdim.x) > 1e-4 || abs(TRK_file.hdr.voxel_size[1]-pixdim.y) > 1e-4 || abs(TRK_file.hdr.voxel_size[2]-pixdim.z) > 1e-4 )
-        {
-            COLOR_warning( "The GEOMETRY does not match those of DWI images", "\t" );
-        }
-
-        TRK_skip = ceil( TRK_file.hdr.n_count / 25000.0 );
-        int N, n_s = TRK_file.hdr.n_scalars, n_p = TRK_file.hdr.n_properties;
-        FILE* fp = TRK_file.getFilePtr();
-
-        // count how many points I need to store in memory
-        int TractsRead = 0, CoordsRead = 0;
-        fseek(fp, 1000, SEEK_SET);
-        for(int f=0; f < TRK_file.hdr.n_count ; f++)
-        {
-            fread( (char*)&N, 1, 4, fp );
-            fseek( fp, N*(3+n_s)*4 + n_p*4, SEEK_CUR );
-            if ( f%TRK_skip==0 )
-            {
-                TractsRead++;
-                CoordsRead += N;
-            }
-        }
-        printf("\tin memory  : %d (%d points)\n" , TractsRead, CoordsRead );
-
-        // create data structure for drawing the tracts
-        TRK_nTractsPlotted = TractsRead;
-        TRK_nPoints = new int[TRK_nTractsPlotted];
-        TRK_coords  = new float[3*CoordsRead];
-        TRK_colors  = new float[3*CoordsRead];
-
-        float* ptr  = TRK_coords;
-        float* ptrc = TRK_colors;
-        float norm;
-        VECTOR<float> dir;
-        TractsRead = 0;
-        fseek(fp, 1000, SEEK_SET);
-        for(int f=0; f < TRK_file.hdr.n_count ; f++)
-        {
-            if ( f%TRK_skip==0 )
-            {
-                fread( (char*)&N, 1, 4, fp );
-                TRK_nPoints[TractsRead] = N;
-
-                for(int i=0; i<N; i++)
-                {
-                    fread((char*)ptr, 1, 12, fp);
-                    fseek( fp, n_s*4, SEEK_CUR );
-
-                    // coordinates (later they will be scaled back to voxel size)
-                    ptr[0] /= pixdim.x;
-                    ptr[1] /= pixdim.y;
-                    ptr[2] /= pixdim.z;
-
-                    // colors
-                    if ( i > 0 )
-                    {
-                        dir.x = *(ptr  ) - *(ptr-3);
-                        dir.y = *(ptr+1) - *(ptr-2);
-                        dir.z = *(ptr+2) - *(ptr-1);
-                        norm = dir.norm();
-                        ptrc[0] = abs( dir.x / norm );
-                        ptrc[1] = abs( dir.y / norm );
-                        ptrc[2] = abs( dir.z / norm );
-                    }
-                    else
-                    {
-                        ptrc[0] = 0;
-                        ptrc[1] = 0;
-                        ptrc[2] = 0;
-                    }
-
-                    ptr  += 3;
-                    ptrc += 3;
-                }
-                fseek( fp, n_p*4, SEEK_CUR );
-                TractsRead++;
-            }
-            else
-            {
-                fread( (char*)&N, 1, 4, fp );
-                fseek( fp, N*(3+n_s)*4 + n_p*4, SEEK_CUR );
-            }
-        }
-
-        COLOR_msg( "   [OK]" );
-        printf( "\n\n" );
-    }
-    else
-    {
-        // no fibers are passed and won't be showed
-        COLOR_msg( "   [no streamlines specified]" );
-        TRK_nTractsPlotted = 0;
-    }
-
-    TRK_offset.x = 0;
-    TRK_offset.y = 0;
-    TRK_offset.z = 0;
-
-
-    // ============
-    // SETUP OpenGL
-    // ============
-    VOXEL.x = round( dim.x / 2.0 );
-    VOXEL.y = round( dim.y / 2.0 );
-    VOXEL.z = round( dim.z / 2.0 );
-    OpenGL_init( argc, argv );
-
-    return EXIT_SUCCESS;
-}
+#include <NIFTI.h>
+#include <nifti1_io.h>
+#include <COLOR_ui.h>
+#include <TrackVis.h>
+#include <VECTOR.h>
+#include <cmath>
+#include <regex>
+#include <cstdlib>
+#include "tclap/CmdLine.h"
+#include <blitz/array.h>
+using namespace std;
+
+#include "colormaps.h"
+
+NIFTI*                   niiDWI;
+VECTOR<int>		         dim;
+VECTOR<float>	         pixdim;
+
+int                      SCHEME_version;
+vector< VECTOR<float> >	 SCHEME_dirs;
+vector<float>	         SCHEME_b;
+vector<int>              SCHEME_idxB0;
+vector<int>              SCHEME_idxDWI;
+vector<float>	         SCHEME_shells_b;
+vector< vector<int> >    SCHEME_shells_idx;
+
+blitz::Array<float,3>    MAP;
+VECTOR<int>		         VOXEL;
+float                    MAP_min, MAP_min_view, MAP_max, MAP_max_view;
+float 			         MAP_opacity = 0.5;
+bool			         showPlane[3] = { true, true, true };
+bool                     showAxes = true;
+bool			         showConfig = true;
+float				     LINE_width = 2.0;
+
+NIFTI*                   niiPEAKS;
+int				         PEAKS_n;
+bool			         PEAKS_show = false;
+float			         PEAKS_thr = 0.0;
+bool			         PEAKS_doNormalize = false;
+bool			         PEAKS_flip[3] = {false, false, false};
+bool			         PEAKS_use_affine = false;
+float                    PEAKS_affine[3][3];
+
+TrackVis 		         TRK_file;
+int				         TRK_skip;
+int				         TRK_nTractsPlotted;
+int*   			         TRK_nPoints;
+float*			         TRK_coords;
+float*			         TRK_colors;
+float 			         TRK_crop = 1.0;
+bool 			         TRK_crop_mode = true;
+bool 			         TRK_show = false;
+VECTOR<float> 	         TRK_offset;
+
+bool 			         GLYPHS_show = false;
+int                      GLYPHS_shell = 0;
+bool			         GLYPHS_flip[3] = {false, false, false};
+float	                 GLYPHS_b0_thr = 50.0;
+bool			         GLYPHS_use_affine = false;
+float                    GLYPHS_affine[3][3];
+
+#include "OPENGL_callbacks.cxx"
+
+
+/*----------------------------------------------------------------------------------------------------------------------------------*/
+int main(int argc, char** argv)
+{
+    TCLAP::CmdLine cmd("This tool allows one to display in a common 3D space all the objects (DWI data, streamlines etc...) used by COMMIT in order to spot possible incosistencies between the conventions of COMMIT and the software that generated the data, e.g. flip in some axes in the DWI data or in the peaks, spatial shift in the streamlines, whether the affine transformation was already applied to the data etc..", ' ', "1.2");
+
+    TCLAP::UnlabeledValueArg<string> argDWI(    "dwi","Filename of the DWI dataset [4D NIFTI]", true, "", "DWI", cmd );
+    TCLAP::ValueArg<string>          argMAP(    "m", "map", "Background map [3D NIFTI]", false, "", "map", cmd );
+    TCLAP::ValueArg<string>          argPEAKS(  "p", "peaks", "Main diffusion directions for the extra-axonal part in each voxel [4D NIFTI]", false, "", "peaks", cmd );
+    TCLAP::ValueArg<string>          argTRK(    "f", "fibers", "Streamlines for the intra-axonal part [.TRK format]", false, "", "fibers", cmd );
+    TCLAP::UnlabeledValueArg<string> argSCHEME( "scheme","Acquisition scheme [text]", true, "", "scheme", cmd );
+
+    try	{ cmd.parse( argc, argv ); }
+    catch (TCLAP::ArgException &e) { cerr << "error: " << e.error() << " for arg " << e.argId() << endl; }
+
+    string DWI_filename( argDWI.getValue() );
+    string SCHEME_filename( argSCHEME.getValue() );
+    string PEAKS_filename( argPEAKS.getValue() );
+    string TRK_filename( argTRK.getValue() );
+    string MAP_filename( argMAP.getValue() );
+
+
+    // ===================
+    // Reading DWI dataset
+    // ===================
+    COLOR_msg( "-> Reading 'DWI' dataset:", "\n" );
+
+    niiDWI = new NIFTI;
+    niiDWI->open( DWI_filename, true );
+    if ( !niiDWI->isValid() )
+    {
+        COLOR_error( "Unable to open file", "\t" );
+        return EXIT_FAILURE;
+    }
+    dim.x = niiDWI->hdr->dim[1];
+    dim.y = niiDWI->hdr->dim[2];
+    dim.z = niiDWI->hdr->dim[3];
+    pixdim.x = niiDWI->hdr->pixdim[1];
+    pixdim.y = niiDWI->hdr->pixdim[2];
+    pixdim.z = niiDWI->hdr->pixdim[3];
+    printf( "\tdim    : %d x %d x %d x %d\n", dim.x, dim.y, dim.z, niiDWI->hdr->dim[4] );
+    printf( "\tpixdim : %.4f x %.4f x %.4f\n", 	pixdim.x, pixdim.y, pixdim.z );
+    printf( "\tqform  : %d\n", niiDWI->hdr->qform_code );
+    mat44 DWI_qform = niiDWI->hdr->qto_xyz;
+    if ( niiDWI->hdr->qform_code > 0 )
+    {
+        for(int i=0; i<3 ;i++)
+        {
+            printf( "\t\t| " );
+            for(int j=0; j<4 ;j++)
+                printf( "%9.4f ", DWI_qform.m[i][j] );
+            printf( "|\n" );
+        }
+    }
+    else
+    {
+        COLOR_warning( "This should never happen!", "\t\t" );
+    }
+    printf( "\tsform  : %d\n", niiDWI->hdr->sform_code );
+    mat44 DWI_sform = niiDWI->hdr->sto_xyz;
+    if ( niiDWI->hdr->sform_code > 0 )
+    {
+        for(int i=0; i<3 ;i++)
+        {
+            printf( "\t\t| " );
+            for(int j=0; j<4 ;j++)
+                printf( "%9.4f ", DWI_sform.m[i][j] );
+            printf( "|\n" );
+        }
+    }
+
+    // Read the affine matrix to rotate the vectors
+    // NB: we need the inverse, but in this case inv=transpose
+    if ( niiDWI->hdr->qform_code != 0 )
+    {
+        for(int i=0; i<3 ;i++)
+        for(int j=0; j<3 ;j++)
+            GLYPHS_affine[i][j] = DWI_qform.m[j][i];
+    }
+    else if ( niiDWI->hdr->sform_code != 0 )
+    {
+        for(int i=0; i<3 ;i++)
+        for(int j=0; j<3 ;j++)
+            GLYPHS_affine[i][j] = DWI_sform.m[j][i];
+    }
+    else {
+        for(int i=0; i<3 ;i++)
+        for(int j=0; j<3 ;j++)
+            GLYPHS_affine[i][j] = 0;
+        for(int i=0; i<3 ;i++)
+            GLYPHS_affine[i][i] = 1;
+    }
+
+    mat33 tmp;
+    for(int i=0; i<3 ;i++)
+        for(int j=0; j<3 ;j++)
+            tmp.m[i][j] = GLYPHS_affine[i][j];
+    printf( "\tAffine used (%s):\n", nifti_mat33_determ(tmp)<0?"RADIOLOGICAL":"NEUROLOGICAL" );
+    for(int i=0; i<3 ;i++)
+    {
+        printf( "\t\t| " );
+        for(int j=0; j<3 ;j++)
+            printf( "%9.4f ", GLYPHS_affine[i][j] );
+        printf( "|\n" );
+    }
+
+    COLOR_msg( "   [OK]" );
+
+
+    // ===================
+    // Reading SCHEME file
+    // ===================
+    COLOR_msg( "-> Reading 'SCHEME' file:", "\n" );
+
+    char line[1000];
+    FILE* pFile = fopen( SCHEME_filename.c_str(), "rt" );
+
+    // read the version
+    // ----------------
+    try
+    {
+        while( fgets(line, 1000, pFile) )
+            if ( line[0]!='#' )
+                break;
+
+        std::regex reVersion("^VERSION: (.*)\\s*$");
+        std::smatch reMatches;
+
+        std::string str_line = string(line);
+        if ( !std::regex_match(str_line, reMatches, reVersion) )
+        {
+            // no header found, assume standards BVECTOR format
+            SCHEME_version = 0;
+            fseek(pFile, -strlen(line), SEEK_CUR);
+        }
+        else
+        {
+            if( strcmp(reMatches[1].str().c_str(),"0")==0 || strcmp(reMatches[1].str().c_str(),"BVECTOR")==0 )
+                SCHEME_version = 0;
+            else if( strcmp(reMatches[1].str().c_str(),"1")==0 || strcmp(reMatches[1].str().c_str(),"STEJSKALTANNER")==0 )
+                SCHEME_version = 1;
+            else
+                throw "Version not recognized";
+        }
+    }
+    catch( const char* msg )
+    {
+        COLOR_error( msg, "\t" );
+        return EXIT_FAILURE;
+    }
+    printf( "\tversion   : %s\n", SCHEME_version==0?"BVECTOR":"STEJSKALTANNER" );
+
+    // read the data
+    // -------------
+    try
+    {
+        string      reFLOAT( "[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?" );
+        std::regex  reVERSION0( "^\\s*("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s*$" );
+        std::regex  reVERSION1( "^\\s*("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s+("+reFLOAT+")\\s*$" );
+        std::regex  reEMPTY( "^\\s*$" );
+        std::smatch reMatches;
+        int         Ns = 0;
+        float       x, y, z, b, G, D, d;
+        while( fgets(line, 1000, pFile) )
+        {
+            std::string str_line = string(line);
+            if( std::regex_match(str_line, reMatches, reEMPTY) )
+                continue;   // skip empty lines
+
+            if( SCHEME_version == 0 )
+            {
+                if ( !std::regex_match(str_line, reMatches, reVERSION0) )
+                    throw "Wrong row format";
+                x = std::atof( reMatches[1].str().c_str() );
+                y = std::atof( reMatches[2].str().c_str() );
+                z = std::atof( reMatches[3].str().c_str() );
+                b = std::atof( reMatches[4].str().c_str() ); // in mm^2/s
+                VECTOR<float> tmp( x, y, z );
+                tmp.Normalize();
+                SCHEME_dirs.push_back( tmp );
+                SCHEME_b.push_back( b );
+            }
+            else
+            {
+                if ( !std::regex_match(str_line, reMatches, reVERSION1) )
+                    throw "Wrong row format";
+                x = std::atof( reMatches[1].str().c_str() );
+                y = std::atof( reMatches[2].str().c_str() );
+                z = std::atof( reMatches[3].str().c_str() );
+                G = std::atof( reMatches[4].str().c_str() );
+                D = std::atof( reMatches[5].str().c_str() );
+                d = std::atof( reMatches[6].str().c_str() );
+                VECTOR<float> tmp( x, y, z );
+                tmp.Normalize();
+                SCHEME_dirs.push_back( tmp );
+                b = std::pow( 267.513e6*G*d, 2 ) * (D-d/3.0) * 1e-6; // in mm^2/s
+                SCHEME_b.push_back( b );
+            }
+
+            if ( b<5.0 )
+            {
+                SCHEME_idxB0.push_back( Ns );
+            }
+            else
+            {
+                SCHEME_idxDWI.push_back( Ns );
+                if ( std::find(SCHEME_shells_b.begin(), SCHEME_shells_b.end(), b) == SCHEME_shells_b.end() )
+                {
+                    SCHEME_shells_b.push_back( b ) ;
+                    vector<int> tmp;
+                    SCHEME_shells_idx.push_back( tmp ) ;
+                }
+            }
+            Ns++;
+        }
+    }
+    catch( const char* msg )
+    {
+        COLOR_error( msg, "\t" );
+        return EXIT_FAILURE;
+    }
+    fclose(pFile);
+
+    printf( "\tgradients : %d\n", SCHEME_b.size() );
+    if ( niiDWI->hdr->dim[4] != SCHEME_b.size() )
+    {
+        COLOR_error( "The scheme does not match the DWI dataset", "\t" );
+        return EXIT_FAILURE;
+    }
+
+    // fill data structure about the SCHEME
+    // ------------------------------------
+    for(int i=0; i < SCHEME_b.size() ;i++)
+    {
+        if ( SCHEME_b[i] < 5 )
+            continue;
+        int s = std::find( SCHEME_shells_b.begin(), SCHEME_shells_b.end(), SCHEME_b[i] ) - SCHEME_shells_b.begin();
+        SCHEME_shells_idx[s].push_back( i );
+    }
+
+    printf( "\tscheme    : %d b0 and %d shells (", SCHEME_idxB0.size(), SCHEME_shells_idx.size() );
+    for(int i=0; i < SCHEME_shells_b.size() ;i++)
+        printf( " [%d @ b=%.1f]", SCHEME_shells_idx[i].size(), SCHEME_shells_b[i] );
+    printf( " )\n" );
+
+    COLOR_msg( "   [OK]" );
+
+
+
+    // =======================
+    // Creating BACKGROUND map
+    // =======================
+    COLOR_msg( "-> Preparing 'BACKGROUND' map:", "\n" );
+    MAP.resize(dim.x,dim.y,dim.z);
+    if ( !MAP_filename.empty() )
+    {
+        printf( "\tdata   : reading from file\n" );
+        NIFTI* niiMAP = new NIFTI;
+        niiMAP->open( MAP_filename, true );
+        if ( !niiMAP->isValid() )
+        {
+            COLOR_error( "Unable to open the file", "\t" );
+            return EXIT_FAILURE;
+        }
+
+        printf( "\tdim    : %d x %d x %d x %d\n" , niiMAP->hdr->dim[1],    niiMAP->hdr->dim[2],    niiMAP->hdr->dim[3], niiMAP->hdr->dim[4] );
+        printf( "\tpixdim : %.4f x %.4f x %.4f\n", niiMAP->hdr->pixdim[1], niiMAP->hdr->pixdim[2], niiMAP->hdr->pixdim[3] );
+
+        if ( niiMAP->hdr->dim[1] != dim.x || niiMAP->hdr->dim[2] != dim.y || niiMAP->hdr->dim[3] != dim.z )
+        {
+            COLOR_error( "The DIMENSIONS do not match those of DWI images", "\t" );
+            return EXIT_FAILURE;
+        }
+        if ( abs(niiMAP->hdr->pixdim[1]-pixdim.x) > 1e-4 || abs(niiMAP->hdr->pixdim[2]-pixdim.y) > 1e-4 || abs(niiMAP->hdr->pixdim[3]-pixdim.z) > 1e-4 )
+        {
+            COLOR_warning( "The VOXEL SIZE does not match that of DWI images", "\t" );
+        }
+
+        FLOAT32 MIN = 0;//(*niiMAP->img)(0,0,0);
+        FLOAT32 MAX = MIN;
+
+        for(int i=0; i<dim.x ;i++)
+        for(int j=0; j<dim.y ;j++)
+        for(int k=0; k<dim.z ;k++)
+        {
+            MAP(i,j,k) = (*niiMAP->img)(i,j,k);
+            if ( MAP(i,j,k) > MAX )
+                MAX = MAP(i,j,k);
+            if ( MAP(i,j,k) < MIN )
+                MIN = MAP(i,j,k);
+        }
+        if ( MAX - MIN <= 0 )
+        {
+            COLOR_error( "The dynamic range is zero", "\t" );
+            return EXIT_FAILURE;
+        }
+        MAP_min	= MIN;
+        MAP_min_view = 0;
+        MAP_max	= MAP_max_view = MAX;
+
+        printf( "\tvalues : [%.2e ... %.2e]\n", MAP_min, MAP_max );
+        COLOR_msg( "   [OK]" );
+    }
+    else
+    {
+        printf( "\tdata   : " );
+
+        if ( SCHEME_idxB0.size() > 0 )
+        {
+            printf( "taking first b0 image\n" );
+            FLOAT32 MIN = (*niiDWI->img)(0,0,0,SCHEME_idxB0[0]);
+            FLOAT32 MAX = MIN;
+
+            for(int i=0; i<dim.x ;i++)
+            for(int j=0; j<dim.y ;j++)
+            for(int k=0; k<dim.z ;k++)
+            {
+                MAP(i,j,k) = (*niiDWI->img)(i,j,k,SCHEME_idxB0[0]);
+                if ( MAP(i,j,k) > MAX )
+                    MAX = MAP(i,j,k);
+                if ( MAP(i,j,k) < MIN )
+                    MIN = MAP(i,j,k);
+            }
+            if ( MAX - MIN <= 0 )
+            {
+                COLOR_error( "The dynamic range is zero", "\t" );
+                return EXIT_FAILURE;
+            }
+            MAP_min	= MIN;
+            MAP_min_view = 0;
+            MAP_max	= MAP_max_view = MAX;
+        }
+        else
+        {
+            printf( "no b0 found\n" );
+            MAP = 0;
+            MAP_min	= MAP_min_view = 0;
+            MAP_max	= MAP_max_view = 1;
+        }
+        printf( "\tvalues : [%.2e ... %.2e]\n", MAP_min, MAP_max );
+        COLOR_msg( "   [OK]" );
+    }
+
+
+    // ==================
+    // Reading PEAKS file
+    // ==================
+    COLOR_msg( "-> Reading 'PEAKS' dataset:", "\n" );
+
+    if ( !PEAKS_filename.empty() )
+    {
+        niiPEAKS = new NIFTI;
+        niiPEAKS->open( PEAKS_filename, true );
+        if ( !niiPEAKS->isValid() )
+        {
+            COLOR_error( "Unable to open the file", "\t" );
+            return false;
+        }
+
+        if ( niiPEAKS->hdr->dim[0] != 4 || niiPEAKS->hdr->dim[4]%3 != 0 )
+        {
+            COLOR_error( "The size must be (*,*,*,3*k)", "\t" );
+            return EXIT_FAILURE;
+        }
+        PEAKS_n = niiPEAKS->hdr->dim[4]/3;
+
+        printf( "\tdim     : %d x %d x %d (%d peaks per voxel)\n" , niiPEAKS->hdr->dim[1], niiPEAKS->hdr->dim[2], niiPEAKS->hdr->dim[3], PEAKS_n );
+        printf( "\tpixdim  : %.4f x %.4f x %.4f\n", niiPEAKS->hdr->pixdim[1], niiPEAKS->hdr->pixdim[2], niiPEAKS->hdr->pixdim[3] );
+
+        printf( "\tqform   : %d\n", niiPEAKS->hdr->qform_code );
+        mat44 PEAKS_qform = niiPEAKS->hdr->qto_xyz;
+        if ( niiPEAKS->hdr->qform_code > 0 )
+        {
+            for(int i=0; i<3 ;i++)
+            {
+                printf( "\t\t| " );
+                for(int j=0; j<4 ;j++)
+                    printf( "%9.4f ", PEAKS_qform.m[i][j] );
+                printf( "|\n" );
+            }
+        }
+        else
+        {
+            COLOR_warning( "This should never happen!", "\t\t" );
+        }
+
+        printf( "\tsform  : %d\n", niiPEAKS->hdr->sform_code );
+        mat44 PEAKS_sform = niiPEAKS->hdr->sto_xyz;
+        if ( niiPEAKS->hdr->sform_code > 0 )
+        {
+            for(int i=0; i<3 ;i++)
+            {
+                printf( "\t\t| " );
+                for(int j=0; j<4 ;j++)
+                    printf( "%9.4f ", PEAKS_sform.m[i][j] );
+                printf( "|\n" );
+            }
+        }
+
+        if ( niiPEAKS->hdr->dim[1] != dim.x || niiPEAKS->hdr->dim[2] != dim.y || niiPEAKS->hdr->dim[3] != dim.z )
+        {
+            COLOR_error( "The DIMENSIONS do not match those of DWI images", "\t" );
+            return EXIT_FAILURE;
+        }
+        if ( abs(niiPEAKS->hdr->pixdim[1]-pixdim.x) > 1e-3 || abs(niiPEAKS->hdr->pixdim[2]-pixdim.y) > 1e-3 || abs(niiPEAKS->hdr->pixdim[3]-pixdim.z) > 1e-3 )
+        {
+            COLOR_warning( "The VOXEL SIZE does not match that of DWI images", "\t" );
+        }
+        if (
+            niiPEAKS->hdr->sform_code != niiDWI->hdr->sform_code || niiPEAKS->hdr->qform_code != niiDWI->hdr->qform_code || niiPEAKS->hdr->pixdim[0] != niiDWI->hdr->pixdim[0] ||
+            niiPEAKS->hdr->quatern_b != niiDWI->hdr->quatern_b || niiPEAKS->hdr->quatern_c != niiDWI->hdr->quatern_c || niiPEAKS->hdr->quatern_d != niiDWI->hdr->quatern_d ||
+            niiPEAKS->hdr->qoffset_x != niiDWI->hdr->qoffset_x || niiPEAKS->hdr->qoffset_y != niiDWI->hdr->qoffset_y || niiPEAKS->hdr->qoffset_z != niiDWI->hdr->qoffset_z
+        )
+        {
+            COLOR_warning( "The GEOMETRY does not match that of DWI images", "\t" );
+        }
+
+        // Read the affine matrix to rotate the vectors
+        // NB: we need the inverse, but in this case inv=transpose
+        if ( niiPEAKS->hdr->qform_code != 0 )
+        {
+            for(int i=0; i<3 ;i++)
+            for(int j=0; j<3 ;j++)
+                PEAKS_affine[i][j] = PEAKS_qform.m[j][i];
+        }
+        else if ( niiPEAKS->hdr->sform_code != 0 )
+        {
+            for(int i=0; i<3 ;i++)
+            for(int j=0; j<3 ;j++)
+                PEAKS_affine[i][j] = PEAKS_sform.m[j][i];
+        }
+        else {
+            for(int i=0; i<3 ;i++)
+            for(int j=0; j<3 ;j++)
+                PEAKS_affine[i][j] = 0;
+            for(int i=0; i<3 ;i++)
+                PEAKS_affine[i][i] = 1;
+        }
+
+        printf( "\tAffine used :\n" );
+        for(int i=0; i<3 ;i++)
+        {
+            printf( "\t\t| " );
+            for(int j=0; j<3 ;j++)
+                printf( "%9.4f ", PEAKS_affine[i][j] );
+            printf( "|\n" );
+        }
+
+        COLOR_msg( "   [OK]" );
+    }
+    else {
+        // no peaks are passed and won't be showed
+        COLOR_msg( "   [no peaks specified]" );
+        PEAKS_n = 0;
+    }
+
+
+    // ===================
+    // Reading TRACTS file
+    // ===================
+    COLOR_msg( "-> Reading 'TRACTOGRAM':", "\n" );
+
+    if ( !TRK_filename.empty() )
+    {
+        TRK_file = TrackVis();
+        if ( !TRK_file.open( TRK_filename ) )
+        {
+            COLOR_error( "Unable to open the file", "\t" );
+            return false;
+        }
+
+        printf("\tcount      : %d\n" , TRK_file.hdr.n_count );
+        printf("\tdim        : %d x %d x %d\n" , TRK_file.hdr.dim[0], TRK_file.hdr.dim[1], TRK_file.hdr.dim[2] );
+        printf("\tpixdim     : %.4f x %.4f x %.4f\n", TRK_file.hdr.voxel_size[0], TRK_file.hdr.voxel_size[1], TRK_file.hdr.voxel_size[2] );
+        printf("\tscalars    : %d\n" , TRK_file.hdr.n_scalars );
+        printf("\tproperties : %d\n" , TRK_file.hdr.n_properties );
+
+        if ( TRK_file.hdr.dim[0] != dim.x || TRK_file.hdr.dim[1] != dim.y || TRK_file.hdr.dim[2] != dim.z ||
+             abs(TRK_file.hdr.voxel_size[0]-pixdim.x) > 1e-4 || abs(TRK_file.hdr.voxel_size[1]-pixdim.y) > 1e-4 || abs(TRK_file.hdr.voxel_size[2]-pixdim.z) > 1e-4 )
+        {
+            COLOR_warning( "The GEOMETRY does not match those of DWI images", "\t" );
+        }
+
+        TRK_skip = ceil( TRK_file.hdr.n_count / 25000.0 );
+        int N, n_s = TRK_file.hdr.n_scalars, n_p = TRK_file.hdr.n_properties;
+        FILE* fp = TRK_file.getFilePtr();
+
+        // count how many points I need to store in memory
+        int TractsRead = 0, CoordsRead = 0;
+        fseek(fp, 1000, SEEK_SET);
+        for(int f=0; f < TRK_file.hdr.n_count ; f++)
+        {
+            fread( (char*)&N, 1, 4, fp );
+            fseek( fp, N*(3+n_s)*4 + n_p*4, SEEK_CUR );
+            if ( f%TRK_skip==0 )
+            {
+                TractsRead++;
+                CoordsRead += N;
+            }
+        }
+        printf("\tin memory  : %d (%d points)\n" , TractsRead, CoordsRead );
+
+        // create data structure for drawing the tracts
+        TRK_nTractsPlotted = TractsRead;
+        TRK_nPoints = new int[TRK_nTractsPlotted];
+        TRK_coords  = new float[3*CoordsRead];
+        TRK_colors  = new float[3*CoordsRead];
+
+        float* ptr  = TRK_coords;
+        float* ptrc = TRK_colors;
+        float norm;
+        VECTOR<float> dir;
+        TractsRead = 0;
+        fseek(fp, 1000, SEEK_SET);
+        for(int f=0; f < TRK_file.hdr.n_count ; f++)
+        {
+            if ( f%TRK_skip==0 )
+            {
+                fread( (char*)&N, 1, 4, fp );
+                TRK_nPoints[TractsRead] = N;
+
+                for(int i=0; i<N; i++)
+                {
+                    fread((char*)ptr, 1, 12, fp);
+                    fseek( fp, n_s*4, SEEK_CUR );
+
+                    // coordinates (later they will be scaled back to voxel size)
+                    ptr[0] /= pixdim.x;
+                    ptr[1] /= pixdim.y;
+                    ptr[2] /= pixdim.z;
+
+                    // colors
+                    if ( i > 0 )
+                    {
+                        dir.x = *(ptr  ) - *(ptr-3);
+                        dir.y = *(ptr+1) - *(ptr-2);
+                        dir.z = *(ptr+2) - *(ptr-1);
+                        norm = dir.norm();
+                        ptrc[0] = abs( dir.x / norm );
+                        ptrc[1] = abs( dir.y / norm );
+                        ptrc[2] = abs( dir.z / norm );
+                    }
+                    else
+                    {
+                        ptrc[0] = 0;
+                        ptrc[1] = 0;
+                        ptrc[2] = 0;
+                    }
+
+                    ptr  += 3;
+                    ptrc += 3;
+                }
+                fseek( fp, n_p*4, SEEK_CUR );
+                TractsRead++;
+            }
+            else
+            {
+                fread( (char*)&N, 1, 4, fp );
+                fseek( fp, N*(3+n_s)*4 + n_p*4, SEEK_CUR );
+            }
+        }
+
+        COLOR_msg( "   [OK]" );
+        printf( "\n\n" );
+    }
+    else
+    {
+        // no fibers are passed and won't be showed
+        COLOR_msg( "   [no streamlines specified]" );
+        TRK_nTractsPlotted = 0;
+    }
+
+    TRK_offset.x = 0;
+    TRK_offset.y = 0;
+    TRK_offset.z = 0;
+
+
+    // ============
+    // SETUP OpenGL
+    // ============
+    VOXEL.x = round( dim.x / 2.0 );
+    VOXEL.y = round( dim.y / 2.0 );
+    VOXEL.z = round( dim.z / 2.0 );
+    OpenGL_init( argc, argv );
+
+    return EXIT_SUCCESS;
+}
  
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 0bd58107..fd52b336 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,90 @@
 from setuptools import Extension, setup
 from setuptools.command.build_ext import build_ext
+import os
+from os.path import join as pjoin
+
+# taken from https://github.com/rmcgibbo/npcuda-example/blob/master/cython/setup.py
+def find_in_path(name, path):
+    """Find a file in a search path"""
+
+    # Adapted fom http://code.activestate.com/recipes/52224
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+
+def locate_cuda():
+    """Locate the CUDA environment on the system
+    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
+    and values giving the absolute path to each directory.
+    Starts by looking for the CUDAHOME env variable. If not found,
+    everything is based on finding 'nvcc' in the PATH.
+    """
+
+    # First check if the CUDAHOME env variable is in use
+    if 'CUDAHOME' in os.environ:
+        home = os.environ['CUDAHOME']
+        nvcc = pjoin(home, 'bin', 'nvcc')
+    else:
+        # Otherwise, search the PATH for NVCC
+        nvcc = find_in_path('nvcc', os.environ['PATH'])
+        if nvcc is None:
+            return None
+        home = os.path.dirname(os.path.dirname(nvcc))
+
+    cudaconfig = {'home': home, 'nvcc': nvcc,
+                  'include': pjoin(home, 'include'),
+                  'lib64': pjoin(home, 'lib64')}
+    for k, v in iter(cudaconfig.items()):
+        if not os.path.exists(v):
+            return None
+
+    return cudaconfig
+
+def customize_compiler_for_nvcc(self):
+    """Inject deep into distutils to customize how the dispatch
+    to gcc/nvcc works.
+    If you subclass UnixCCompiler, it's not trivial to get your subclass
+    injected in, and still have the right customizations (i.e.
+    distutils.sysconfig.customize_compiler) run on it. So instead of going
+    the OO route, I have this. Note, it's kindof like a wierd functional
+    subclassing going on.
+    """
+
+    # Tell the compiler it can processes .cu
+    self.src_extensions.append('.cu')
+
+    # Save references to the default compiler_so and _comple methods
+    default_compiler_so = self.compiler_so
+    super = self._compile
+
+    # Now redefine the _compile method. This gets executed for each
+    # object but distutils doesn't have the ability to change compilers
+    # based on source extension: we add it.
+    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
+        if os.path.splitext(src)[1] == '.cu':
+            # use the cuda for .cu files
+            self.set_executable('compiler_so', CUDA['nvcc'])
+            # use only a subset of the extra_postargs, which are 1-1
+            # translated from the extra_compile_args in the Extension class
+            print(type(extra_postargs))
+            print(extra_postargs)
+            postargs = extra_postargs['nvcc']
+        else:
+            print(type(extra_postargs))
+            print(extra_postargs)
+            postargs = extra_postargs['gcc']
+
+        super(obj, src, ext, cc_args, postargs, pp_opts)
+        # Reset the default compiler_so, which we might have changed for cuda
+        self.compiler_so = default_compiler_so
+
+    # Inject our redefined _compile method into the class
+    self._compile = _compile
+
+# Locate CUDA
+CUDA = locate_cuda()
 
 def get_extensions():
     # Cython extension to create the sparse data structure from a tractogram
@@ -16,23 +101,88 @@ def get_extensions():
                      sources=['commit/proximals.pyx'],
                      extra_compile_args=['-w'],
                      language='c++')
-    return [trk2dictionary, core, proximals]
-
-class CustomBuildExtCommand(build_ext):
-    """ build_ext command to use when numpy headers are needed. """
-    def run(self):
-        # Now that the requirements are installed, get everything from numpy
-        from Cython.Build import cythonize
-        from numpy import get_include
-
-        # Add everything requires for build
-        self.swig_opts = None
-        self.include_dirs = [get_include()]
-        self.distribution.ext_modules[:] = cythonize(self.distribution.ext_modules)
-
-        # Call original build_ext command
-        build_ext.finalize_options(self)
-        build_ext.run(self)
+
+    return [ext1, ext2, ext3]
+
+def get_extensions_with_cuda():
+    # Cython extension to create the sparse data structure from a tractogram
+    # for the computation of matrix-vector multiplications
+
+    ext1 = Extension(name='commit.trk2dictionary',
+                     sources=['commit/trk2dictionary/trk2dictionary.pyx'],
+                     extra_compile_args= {'gcc':  ['-w'],
+                                          'nvcc': ['-arch=sm_50', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'"]},
+                     extra_link_args=[],
+                     language='c++')
+
+    ext2 = Extension(name='commit.core',
+                     sources=['commit/core.pyx'],
+                     extra_compile_args= {'gcc':  ['-w'],
+                                          'nvcc': ['-arch=sm_50', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'"]},
+                     extra_link_args=[],
+                     language='c++')
+
+    ext3 = Extension(name='commit.proximals',
+                      sources=['commit/proximals.pyx'],
+                      extra_compile_args= {'gcc':  ['-w'],
+                                           'nvcc': ['-arch=sm_50', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'"]},
+                      extra_link_args=[],
+                      language='c++')
+
+    ext4 = Extension(name='commit.cudaoperator.operator',
+                     sources = ['commit/cudaoperator/operator_withCUDA.cu', 'commit/cudaoperator/operator.pyx'],
+                     extra_compile_args= {'gcc':  ['-w'],
+                                          'nvcc': ['-arch=sm_50', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'"]},
+                     language = 'c++',
+                     library_dirs = [CUDA['lib64']],
+                     libraries = ['cudart'],
+                     runtime_library_dirs = [CUDA['lib64']])
+
+    return [ext1, ext2, ext3, ext4]
+
+if CUDA == None:
+    extensions = get_extensions()
+else:
+    extensions = get_extensions_with_cuda()
+
+if CUDA == None:
+    class CustomBuildExtCommand(build_ext):
+        """ build_ext command to use when numpy headers are needed. """
+
+        def run(self):
+            # Now that the requirements are installed, get everything from numpy
+            from Cython.Build import cythonize
+            from numpy import get_include
+            
+            # Add everything requires for build
+            self.swig_opts = None
+            self.include_dirs = [get_include()]
+            self.distribution.ext_modules[:] = cythonize(self.distribution.ext_modules)
+
+            # Call original build_ext command
+            build_ext.finalize_options(self)
+            build_ext.run(self)
+else:
+    class CustomBuildExtCommand(build_ext):
+        """ build_ext command to use when numpy headers are needed. """
+
+        def build_extensions(self):
+            customize_compiler_for_nvcc(self.compiler)
+            build_ext.build_extensions(self)
+
+        def run(self):
+            # Now that the requirements are installed, get everything from numpy
+            from Cython.Build import cythonize
+            from numpy import get_include
+            
+            # Add everything requires for build
+            self.swig_opts = None
+            self.include_dirs = [get_include(), CUDA['include'], 'commit/cudaoperator']
+            self.distribution.ext_modules[:] = cythonize(self.distribution.ext_modules)
+
+            # Call original build_ext command
+            build_ext.finalize_options(self)
+            build_ext.run(self)
 
 # import details from commit/info.py
 import sys