diff --git a/.gitignore b/.gitignore index bfcab80b..f1e1b6fc 100644 --- a/.gitignore +++ b/.gitignore @@ -135,5 +135,7 @@ __pycache__/ unidec_bin/Example Data/ADH_unidecfiles/ADH_peak_areas.dat unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_1D_Mass_Defects.txt unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_2D_Mass_Defects.txt +unidec_modules/thermo_reader/ +unidec_bin/Presets/Marius/ diff --git a/GUniDec.py b/GUniDec.py index 44596bfd..064d1ea8 100644 --- a/GUniDec.py +++ b/GUniDec.py @@ -150,15 +150,14 @@ def on_open_file(self, filename, directory, skipengine=False, **kwargs): # Plot 1D if self.eng.config.batchflag == 0: - self.view.plot1.plotrefreshtop(self.eng.data.data2[:, 0], self.eng.data.data2[:, 1], "Data", "m/z", - "Intensity", "Data", self.eng.config) + self.makeplot1(imfit=False) # IM Loading and Plotting if self.eng.config.imflag == 1 and self.eng.config.batchflag != 1: self.view.controls.ctlmindt.SetValue(str(np.amin(self.eng.data.data3[:, 1]))) self.view.controls.ctlmaxdt.SetValue(str(np.amax(self.eng.data.data3[:, 1]))) - if self.eng.config.batchflag == 0: - self.view.plot1im.contourplot(self.eng.data.rawdata3, self.eng.config, xlab="m/z (Th)", - ylab="Arrival Time (ms)", title="IM-MS Data") + #if self.eng.config.batchflag == 0: + # self.view.plot1im.contourplot(self.eng.data.rawdata3, self.eng.config, xlab="m/z (Th)", + # ylab="Arrival Time (ms)", title="IM-MS Data") # tstart = time.perf_counter() # Load Config to GUI self.import_config() @@ -332,7 +331,10 @@ def on_dataprep_button(self, e=None): self.eng.process_data() self.eng.get_auto_peak_width(set=False) self.import_config() + #print("Data Prep Time1: %.2gs" % (time.perf_counter() - tstart)) self.view.clear_all_plots() + self.makeplot1(imfit=False) + ''' self.view.plot1.plotrefreshtop(self.eng.data.data2[:, 0], self.eng.data.data2[:, 1], "Data Sent to UniDec", "m/z (Th)", "Normalized Intensity", "Data", self.eng.config) if self.eng.config.intthresh != 0 and self.eng.config.imflag == 0: @@ -344,7 +346,7 @@ def on_dataprep_button(self, e=None): if self.eng.config.imflag == 1: self.view.plot1im.contourplot(self.eng.data.data3, self.eng.config, xlab="m/z (Th)", ylab="Arrival Time (ms)", title="IM-MS Data") - self.view.plot1.repaint() + self.view.plot1.repaint()''' self.view.SetStatusText("Data Length: " + str(len(self.eng.data.data2)), number=2) self.view.SetStatusText("R\u00B2 ", number=3) @@ -425,6 +427,7 @@ def on_pick_peaks(self, e=None): self.makeplot4(1) self.view.SetStatusText("Peak Pick Done", number=5) + self.on_score() pass @@ -483,7 +486,7 @@ def on_auto(self, e=None): # # ........................................... - def makeplot1(self, e=None): + def makeplot1(self, e=None, intthresh=False, imfit=True): """ Plot data and fit in self.view.plot1 and optionally in plot1fit :param e: unused event @@ -491,19 +494,60 @@ def makeplot1(self, e=None): """ if self.eng.config.batchflag == 0: tstart = time.perf_counter() + leg = False + if self.eng.config.imflag == 1: - self.view.plot1fit.contourplot(self.eng.data.fitdat2d, self.eng.config, xlab="m/z (Th)", - ylab="Arrival Time (ms)", title="IM-MS Fit") - self.view.plot1.plotrefreshtop(self.eng.data.data2[:, 0], self.eng.data.data2[:, 1], "Data and UniDec Fit", - "m/z (Th)", "Normalized Intensity", "Data", self.eng.config, nopaint=True) - try: - self.view.plot1.plotadd(self.eng.data.data2[:, 0], self.eng.data.fitdat, 'red', "Fit Data") - pass - except: - pass + try: + self.view.plot1im.contourplot(self.eng.data.data3, self.eng.config, xlab="m/z (Th)", + ylab="Arrival Time (ms)", title="IM-MS Data") + except: + pass + if imfit: + try: + self.view.plot1fit.contourplot(self.eng.data.fitdat2d, self.eng.config, xlab="m/z (Th)", + ylab="Arrival Time (ms)", title="IM-MS Fit") + except: + pass + + if self.eng.config.reductionpercent<0: + print("Making Dot Plot") + data2 = ud.dataprep(self.eng.data.rawdata, self.eng.config, peaks=False, intthresh=False) + self.view.plot1.plotrefreshtop(data2[:, 0], data2[:, 1], + "Data and UniDec Fit", + "m/z (Th)", "Normalized Intensity", "Data", self.eng.config, + nopaint=True) + self.view.plot1.plotadddot(self.eng.data.data2[:,0], self.eng.data.data2[:,1], 'blue', "o", "Peaks") + + try: + if len(self.eng.data.fitdat) > 0: + self.view.plot1.plotadddot(self.eng.data.data2[:, 0], self.eng.data.fitdat, 'red', "s", "Fit Data") + leg = True + pass + except: + pass + + else: + self.view.plot1.plotrefreshtop(self.eng.data.data2[:, 0], self.eng.data.data2[:, 1], "Data and UniDec Fit", + "m/z (Th)", "Normalized Intensity", "Data", self.eng.config, nopaint=True) + + if self.eng.config.intthresh != 0 and self.eng.config.imflag == 0 and intthresh: + self.view.plot1.plotadd(self.eng.data.data2[:, 0], + np.zeros_like(self.eng.data.data2[:, 1]) + self.eng.config.intthresh, "red", + "Noise Threshold") + leg=True + + try: + if len(self.eng.data.fitdat) > 0: + self.view.plot1.plotadd(self.eng.data.data2[:, 0], self.eng.data.fitdat, 'red', "Fit Data") + leg = True + pass + except: + pass if self.eng.config.aggressiveflag != 0 and len(self.eng.data.baseline) == len(self.eng.data.fitdat): self.view.plot1.plotadd(self.eng.data.data2[:, 0], self.eng.data.baseline, 'blue', "Baseline") - self.view.plot1.add_legend() + if leg: + self.view.plot1.add_legend() + self.view.plot1.repaint() tend = time.perf_counter() print("Plot 1: %.2gs" % (tend - tstart)) diff --git a/metaunidec/gui_elements/ud_cont_meta.py b/metaunidec/gui_elements/ud_cont_meta.py index 9b1985bb..a6e85e97 100644 --- a/metaunidec/gui_elements/ud_cont_meta.py +++ b/metaunidec/gui_elements/ud_cont_meta.py @@ -301,7 +301,7 @@ def __init__(self, parent, config, pres, panel, iconfile): i += 1 self.ctlpoolflag = wx.RadioBox(panel2b, label="m/z to Mass Transformation", - choices=["Integration", "Interpolation"]) + choices=["Integrate", "Interpolate", "Smart"]) gbox2b.Add(self.ctlpoolflag, (i, 0), span=(1, 2), flag=wx.ALIGN_CENTER_VERTICAL) i += 1 @@ -813,7 +813,9 @@ def setup_tool_tips(self): self.ctlpoolflag.SetToolTip(wx.ToolTip( "Sets type of conversion from m/z to mass.\nIntegration:\n\tEach m/z bin goes to the nearest mass bin" "\n\tBest for undersampled masses\nInterpolation:\n\tEach mass value interpolates its value in m/z space" - "\n\tBest for oversampled mass data")) + "\n\tBest for oversampled mass data" + "\nSmart:\n\tDetermines interpolation vs. integration automatically\n\tfrom the data and blends the two." + "\n\tUse Smart in most cases (default). ")) self.ctlnorm2.SetToolTip(wx.ToolTip( "Sets normalization of extraction.\nMaximum will normalize so that the maximum value of a timepoint is 100%." "\nSum will normalize so that the sum of all peaks of a timepoint is 100%." diff --git a/readme.md b/readme.md index bb8d7ef1..12cca5ea 100644 --- a/readme.md +++ b/readme.md @@ -194,6 +194,28 @@ The main GUI class is GUniDec.UniDecApp. ## Change Log +v.4.2.0 + +**Added Smart Transform**. From the beginning, UniDec has asked users to decide how it should perform the nonlinear transform from m/z to mass (Integration or Interpolation). Now, the Smart Transform decides for the user based on the density of the data. Where the data is sparse relative to the mass sampling, it will use interpolation to fill in the gaps. Where data is dense relative to the mass sampling, it will use integration to ensure that all the data in-between is accounted for. Smart Transform is located under the Additional Deconvolution Parameters tab and has now been made the default. In some preliminary testing, the Smart Transform showed more robust peak areas that were less sensitive to the mass sampling. The peak heights may change some, but the peak areas should be a lot more reliable (remember Ctrl+I to integrate). Note: you may not notice much difference, but using the Smart Transform should avoid some glitches that are hard to spot. Let me know how it goes! + +**Major improvements in 2D plotting speed** due to behind-the-scenes changes and optimzation of the plotting code. + +**Sparse data support**. Improvements to the algorithm to support sparse data, including just peak centroids. This fundamentally changes the underlying algorithm in minor ways. You may notice minor differences but likely will not. Let me know if it causes any problems. + +Better handling of click and zoom at the edges of the plot. Now, it will default to the edge if you go off the plot. + +Added drag and drop for loading the state from a zip file. + +Upgraded pymzml version for improved mzML compatibility and made mzML import more error tolerant. + +Fixed issue with discrete plot and data that was non-uniformly sampled. + +Fix to bug in calculating the error from the weighted standard deviation of charge state masses. + +Fixed bug in Load and Save State with non-text files. + +Fixed IM-MS plotting bugs. + v.4.1.2 **Added button in UniDec to turn off data normalization.** Note: the beta values will be automatically scaled to match this. diff --git a/unidec.py b/unidec.py index d41e968e..262e2ef2 100644 --- a/unidec.py +++ b/unidec.py @@ -104,7 +104,8 @@ def open_file(self, file_name, file_directory=None, *args, **kwargs): newname = os.path.join(os.getcwd(), self.config.outfname + "_imraw.txt") if not os.path.isfile(newname): try: - shutil.copy(file_directory, newname) + # shutil.copy(file_directory, newname) + np.savetxt(newname, self.data.rawdata) except Exception as e: pass @@ -113,7 +114,7 @@ def open_file(self, file_name, file_directory=None, *args, **kwargs): else: refresh = False - if os.path.isfile(self.config.infname) and not refresh: + if os.path.isfile(self.config.infname) and not refresh and self.config.imflag == 0: self.data.data2 = np.loadtxt(self.config.infname) self.config.procflag = 1 else: @@ -329,7 +330,11 @@ def unidec_imports(self, efficiency=False, everything=False): self.pks = peakstructure.Peaks() self.data.massdat = np.loadtxt(self.config.outfname + "_mass.txt") self.data.ztab = np.arange(self.config.startz, self.config.endz + 1) - self.config.massdatnormtop = np.amax(self.data.massdat[:, 1]) + try: + self.config.massdatnormtop = np.amax(self.data.massdat[:, 1]) + except: + self.data.massdat = np.array([self.data.massdat]) + self.config.massdatnormtop = np.amax(self.data.massdat[:, 1]) if not efficiency: self.data.massgrid = np.fromfile(self.config.outfname + "_massgrid.bin", dtype=float) @@ -399,8 +404,11 @@ def pick_peaks(self): self.export_config() # Detect Peaks and Normalize peaks = ud.peakdetect(self.data.massdat, self.config) - # print(peaks) - self.setup_peaks(peaks) + if len(peaks) > 0: + self.setup_peaks(peaks) + else: + print("No peaks detected", peaks, self.config.peakwindow, self.config.peakthresh) + print("Mass Data:", self.data.massdat) def setup_peaks(self, peaks): if self.config.peaknorm == 1: @@ -422,9 +430,12 @@ def setup_peaks(self, peaks): # Generate Intensities of Each Charge State for Each Peak mztab = ud.make_peaks_mztab(self.data.mzgrid, self.pks, self.config.adductmass) # Calculate errors for peaks with FWHM - ud.peaks_error_FWHM(self.pks, self.data.massdat) try: - + ud.peaks_error_FWHM(self.pks, self.data.massdat) + except Exception as e: + print("Error in FWHM calculation:", e) + print(self.data.massdat) + try: ud.peaks_error_mean(self.pks, self.data.massgrid, self.data.ztab, self.data.massdat, self.config) except Exception as e: print("Error in error calculations:", e) @@ -1344,6 +1355,7 @@ def pks_fscore(self): fscore2 = self.score_minimum(height, umin) # print("Fscore5", fscore2, umin, height) p.fscore *= fscore2 + ''' def tscore(self): try: @@ -1368,34 +1380,35 @@ def dscore(self, xfwhm=2, pow=2): :return: """ - - self.pks_mscore(xfwhm=xfwhm, pow=pow) - self.pks_uscore(pow=pow, xfwhm=xfwhm) - self.pks_csscore(xfwhm=xfwhm) - self.pks_fscore() - #self.tscore() - tscores = [] - ints = [] - for p in self.pks.peaks: - scores = np.array([p.mscore, p.uscore, p.fscore, p.cs_score]) # p.rsquared, - p.dscore = np.product(scores) - p.lscore = np.product(scores[:-1]) - tscores.append(p.dscore) - ints.append(p.height) - print("Mass:", p.mass, - "Peak Shape:", round(p.mscore * 100, 2), - "Uniqueness:", round(p.uscore * 100, 2), - # "Fitting R^2", round(p.rsquared * 100, 2), - "Charge:", round(p.cs_score * 100, 2), - "FWHM:", round(p.fscore * 100, 2), - "Combined:", round(p.dscore * 100, 2)) - # print(p.intervalFWHM) - ints = np.array(ints) - self.pks.uniscore = ud.weighted_avg(tscores, ints ** pow) * self.config.error - print("R Squared:", self.config.error) - #print("TScore:", self.data.tscore) - print("Average Peaks Score (UniScore):", self.pks.uniscore) - + if len(self.pks.peaks) > 0: + self.pks_mscore(xfwhm=xfwhm, pow=pow) + self.pks_uscore(pow=pow, xfwhm=xfwhm) + self.pks_csscore(xfwhm=xfwhm) + self.pks_fscore() + # self.tscore() + tscores = [] + ints = [] + for p in self.pks.peaks: + scores = np.array([p.mscore, p.uscore, p.fscore, p.cs_score]) # p.rsquared, + p.dscore = np.product(scores) + p.lscore = np.product(scores[:-1]) + tscores.append(p.dscore) + ints.append(p.height) + print("Mass:", p.mass, + "Peak Shape:", round(p.mscore * 100, 2), + "Uniqueness:", round(p.uscore * 100, 2), + # "Fitting R^2", round(p.rsquared * 100, 2), + "Charge:", round(p.cs_score * 100, 2), + "FWHM:", round(p.fscore * 100, 2), + "Combined:", round(p.dscore * 100, 2)) + # print(p.intervalFWHM) + ints = np.array(ints) + self.pks.uniscore = ud.weighted_avg(tscores, ints ** pow) * self.config.error + print("R Squared:", self.config.error) + # print("TScore:", self.data.tscore) + print("Average Peaks Score (UniScore):", self.pks.uniscore) + else: + print("No peaks present") def filter_peaks(self, minscore=0.4): # w = deepcopy(self.config.peakwindow) @@ -1497,9 +1510,24 @@ def open_test_spectrum(self, masslist=None, n=1, **kwargs): np.savetxt(os.path.join(testdir, fname), data) self.open_file(fname, testdir, **kwargs) + self.config.maxmz, self.config.minmz = "", "" self.data.ztab = ztab pass + def pass_data_in(self, data, n=1, **kwargs): + testdir = os.path.join(self.config.UniDecDir, "TestSpectra") + if not os.path.isdir(testdir): + os.mkdir(testdir) + + fname = "test_" + str(n) + ".txt" + path = os.path.join(testdir, fname) + print("Creating temp file:", path) + np.savetxt(path, data) + + self.open_file(fname, testdir, **kwargs) + self.config.maxmz, self.config.minmz = "", "" + self.config.default_isotopic_res() + def get_spectrum_peaks(self, threshold=0.05, window=None): if window is None: window = self.config.mzsig diff --git a/unidec_bin/Example Data/ADH_unidecfiles/ADH_conf.dat b/unidec_bin/Example Data/ADH_unidecfiles/ADH_conf.dat index 0fe0951e..68995a96 100644 --- a/unidec_bin/Example Data/ADH_unidecfiles/ADH_conf.dat +++ b/unidec_bin/Example Data/ADH_unidecfiles/ADH_conf.dat @@ -34,7 +34,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Example Data/BSA_unidecfiles/BSA_conf.dat b/unidec_bin/Example Data/BSA_unidecfiles/BSA_conf.dat index 69d455b8..9a654949 100644 --- a/unidec_bin/Example Data/BSA_unidecfiles/BSA_conf.dat +++ b/unidec_bin/Example Data/BSA_unidecfiles/BSA_conf.dat @@ -8,17 +8,17 @@ startz 10 zzsig 1.0 psig 0.0 beta 0.0 -mzsig 0.0 +mzsig 1.50166 psfun 0 discreteplot 0 massub 150000.0 masslb 1000.0 msig 0.0 -molig 0.0 +molig 1.0 massbins 1.0 mtabsig 0.0 -minmz 2715.4349487890913 -maxmz 8616.21283935621 +minmz 3617.539730726923 +maxmz 7066.941420450001 subbuff 0.0 subtype 2 smooth 0.0 diff --git a/unidec_bin/Example Data/GroEL HCD UniDec_unidecfiles/GroEL HCD UniDec_conf.dat b/unidec_bin/Example Data/GroEL HCD UniDec_unidecfiles/GroEL HCD UniDec_conf.dat index f58bea51..09988b40 100644 --- a/unidec_bin/Example Data/GroEL HCD UniDec_unidecfiles/GroEL HCD UniDec_conf.dat +++ b/unidec_bin/Example Data/GroEL HCD UniDec_unidecfiles/GroEL HCD UniDec_conf.dat @@ -15,16 +15,16 @@ massub 1000000.0 masslb 1000.0 msig 0.0 molig 0.0 -massbins 100.0 +massbins 10.0 mtabsig 0.0 manualfile GroEL HCD UniDec_manualfile.dat -minmz 1103.817162224038 -maxmz 28276.468057027887 +minmz 989.965273 +maxmz 40458.620204 subbuff 0.0 subtype 0 smooth 0.0 mzbins 0.0 -peakwindow 200.0 +peakwindow 500.0 peakthresh 0.01 peakplotthresh 0.1 plotsep 0.025 @@ -35,7 +35,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 @@ -46,6 +46,7 @@ spectracmap rainbow publicationmode 1 isotopemode 0 peaknorm 1 +datanorm 1 baselineflag 1.0 orbimode 0 integratelb -1000.0 diff --git a/unidec_bin/Example Data/GroEL UniDec_unidecfiles/GroEL UniDec_conf.dat b/unidec_bin/Example Data/GroEL UniDec_unidecfiles/GroEL UniDec_conf.dat index 0303283f..fa488ce6 100644 --- a/unidec_bin/Example Data/GroEL UniDec_unidecfiles/GroEL UniDec_conf.dat +++ b/unidec_bin/Example Data/GroEL UniDec_unidecfiles/GroEL UniDec_conf.dat @@ -34,7 +34,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat b/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat index 8be4c735..ba3129e2 100644 --- a/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat +++ b/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat @@ -15,7 +15,7 @@ massub 160000.0 masslb 80000.0 msig 1.0 molig 760.0 -massbins 1.0 +massbins 10.0 mtabsig 0.0 minmz 8761.430704083608 maxmz 12864.965623887581 @@ -34,7 +34,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 @@ -45,6 +45,7 @@ spectracmap rainbow publicationmode 1 isotopemode 0 peaknorm 1 +datanorm 1 baselineflag 1.0 orbimode 0 integratelb -1000.0 diff --git a/unidec_bin/Presets/James/Hemoglobin.dat b/unidec_bin/Presets/James/Hemoglobin.dat index 4b88d4f9..a6c0f9b9 100644 --- a/unidec_bin/Presets/James/Hemoglobin.dat +++ b/unidec_bin/Presets/James/Hemoglobin.dat @@ -27,7 +27,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/Nanodiscs/DMPC.dat b/unidec_bin/Presets/Nanodiscs/DMPC.dat index 7e034004..6c831e7c 100644 --- a/unidec_bin/Presets/Nanodiscs/DMPC.dat +++ b/unidec_bin/Presets/Nanodiscs/DMPC.dat @@ -28,7 +28,7 @@ rawflag 1 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/Nanodiscs/DMPG.dat b/unidec_bin/Presets/Nanodiscs/DMPG.dat index 2989c06e..8aa407b7 100644 --- a/unidec_bin/Presets/Nanodiscs/DMPG.dat +++ b/unidec_bin/Presets/Nanodiscs/DMPG.dat @@ -28,7 +28,7 @@ rawflag 1 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/Nanodiscs/POPC.dat b/unidec_bin/Presets/Nanodiscs/POPC.dat index 20c91c6b..2009081e 100644 --- a/unidec_bin/Presets/Nanodiscs/POPC.dat +++ b/unidec_bin/Presets/Nanodiscs/POPC.dat @@ -27,7 +27,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/Nanodiscs/POPG.dat b/unidec_bin/Presets/Nanodiscs/POPG.dat index 4b88d4f9..a6c0f9b9 100644 --- a/unidec_bin/Presets/Nanodiscs/POPG.dat +++ b/unidec_bin/Presets/Nanodiscs/POPG.dat @@ -27,7 +27,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/Proteins/Rhodopsin.dat b/unidec_bin/Presets/Proteins/Rhodopsin.dat index c0a31932..eea34c95 100644 --- a/unidec_bin/Presets/Proteins/Rhodopsin.dat +++ b/unidec_bin/Presets/Proteins/Rhodopsin.dat @@ -34,7 +34,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0 peakshapeinflate 1 noiseflag 0 diff --git a/unidec_bin/Presets/Proteins/ubiquitin.dat b/unidec_bin/Presets/Proteins/ubiquitin.dat index f2c02d6c..1f2f8176 100644 --- a/unidec_bin/Presets/Proteins/ubiquitin.dat +++ b/unidec_bin/Presets/Proteins/ubiquitin.dat @@ -33,7 +33,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/SuperNanodiscs/DMPC.dat b/unidec_bin/Presets/SuperNanodiscs/DMPC.dat index e6012928..e5c81e9e 100644 --- a/unidec_bin/Presets/SuperNanodiscs/DMPC.dat +++ b/unidec_bin/Presets/SuperNanodiscs/DMPC.dat @@ -28,7 +28,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/Presets/SuperNanodiscs/POPC.dat b/unidec_bin/Presets/SuperNanodiscs/POPC.dat index adfe7680..a706ddf9 100644 --- a/unidec_bin/Presets/SuperNanodiscs/POPC.dat +++ b/unidec_bin/Presets/SuperNanodiscs/POPC.dat @@ -28,7 +28,7 @@ rawflag 0 adductmass 1.007276467 nativezub 100.0 nativezlb -100.0 -poolflag 1 +poolflag 2 accvol 0.0 peakshapeinflate 1.0 noiseflag 0.0 diff --git a/unidec_bin/UniDec.exe b/unidec_bin/UniDec.exe index 4b816a4b..e9f745aa 100644 Binary files a/unidec_bin/UniDec.exe and b/unidec_bin/UniDec.exe differ diff --git a/unidec_modules/MassDefects.py b/unidec_modules/MassDefects.py index dd0ff8b7..f1743115 100644 --- a/unidec_modules/MassDefects.py +++ b/unidec_modules/MassDefects.py @@ -113,10 +113,13 @@ def __init__(self, parent, data_list, config=None, yvals=None, pks=None, value=N "Label peaks") self.menulinreg = self.plotmenu.Append(wx.ID_ANY, "Linear Regression", "Linear Regression") + self.menucom = self.plotmenu.Append(wx.ID_ANY, "Center of Mass", "Center of Mass") + self.Bind(wx.EVT_MENU, self.on_add_line, self.menuaddline) self.Bind(wx.EVT_MENU, self.on_fit, self.menufit) self.Bind(wx.EVT_MENU, self.on_label_peaks, self.menupeaks) self.Bind(wx.EVT_MENU, self.on_linear_regression, self.menulinreg) + self.Bind(wx.EVT_MENU, self.on_com, self.menucom) menu_bar = wx.MenuBar() menu_bar.Append(filemenu, "&File") @@ -629,12 +632,13 @@ def on_fit(self, e): self.plot3.addtext("", f, y, vlines=True) def on_label_peaks(self, e=None): - peaks = ud.peakdetect(self.data1d, window=3) - print("Peaks:", peaks[:, 0]) + self.peaks = ud.peakdetect(self.data1d, window=3) + print("Peaks:", self.peaks[:, 0]) - for p in peaks: + for i, p in enumerate(self.peaks): y = p[1] - self.plot3.addtext(str(np.round(p[0], 3)), p[0] + 0.075, y * 0.95, vlines=False) + label=str(np.round(p[0], 3)) + self.plot3.addtext(label, p[0] + 0.075, y * 0.95, vlines=False) self.plot3.addtext("", p[0], y, vlines=True) def on_linear_regression(self, e=None): @@ -676,6 +680,49 @@ def on_linear_regression(self, e=None): + "\nInt./RefMass: " + str(np.round(intercept/self.m0,3)) self.plot1.addtext(sval, xl*0.7, np.amax(ylin)*0.2, vlines=False) + def on_com(self, e=None): + print("Starting COM calculations") + dims = self.data2d.shape + + x = np.unique(self.data2d[:, 0]) + y = np.unique(self.data2d[:, 1]) + xl = len(x) + yl = len(y) + z = self.data2d[:, 2].reshape((xl, yl)) + zlin = np.array([np.max(d) for d in z]) + + self.on_label_peaks(e=0) + + coms = [] + for d in z.transpose(): + data = np.transpose([x, d]) + b1 = d > np.amax(d) * 0.1 + com, std = ud.center_of_mass(data[b1]) + coms.append(com) + + for p in self.peaks: + index = ud.nearest(y, p[0]) + c = coms[index] + carray = [c] + try: + carray.append(coms[index-1]) + except: + pass + try: + carray.append(coms[index+1]) + except: + pass + + com = np.average(carray) + print(p[0], com) + + self.plot1.plotrefreshtop(y, coms, xlabel="Mass Defect", ylabel="Center of Mass (Da)") + + #print(zlin) + #self.plot1.colorplotMD(y, coms, zlin, max=0, cmap="binary", + # title="Linear Regression Plot", clabel="Intensity", + # xlabel="Mass Number", ylabel="Mass", test_kda=False) + # Main App Execution if __name__ == "__main__": dir = "C:\\Python\\UniDec\\TestSpectra\\60_unidecfiles" @@ -697,5 +744,5 @@ def on_linear_regression(self, e=None): app = wx.App(False) frame = MassDefectWindow(None, datalist) - frame.on_linear_regression(0) + frame.on_com(0) app.MainLoop() diff --git a/unidec_modules/PlottingWindow.py b/unidec_modules/PlottingWindow.py index 02bf4ec9..c736b99c 100644 --- a/unidec_modules/PlottingWindow.py +++ b/unidec_modules/PlottingWindow.py @@ -241,7 +241,7 @@ def kda_test(self, xvals): self.kdnorm = 1. self.kda = False - def plotadddot(self, x, y, colval, markval): + def plotadddot(self, x, y, colval, markval, label=""): """ Adds a scatter plot to the figure. May be one or more. :param x: x values @@ -251,7 +251,7 @@ def plotadddot(self, x, y, colval, markval): :return: None """ self.subplot1.plot(np.array(x) / self.kdnorm, y, color=colval, marker=markval, linestyle='None', clip_on=False - , markeredgecolor="k") + , markeredgecolor="k", label=label) def repaint(self, setupzoom=False): """ diff --git a/unidec_modules/gui_elements/ud_controls.py b/unidec_modules/gui_elements/ud_controls.py index ce6dbf76..d36fd126 100644 --- a/unidec_modules/gui_elements/ud_controls.py +++ b/unidec_modules/gui_elements/ud_controls.py @@ -472,7 +472,7 @@ def __init__(self, parent, config, pres, panel, iconfile): i += 1 self.ctlpoolflag = wx.RadioBox(panel2b, label="m/z to Mass Transformation", - choices=["Integration", "Interpolation"]) + choices=["Integrate", "Interpolate", "Smart"]) gbox2b.Add(self.ctlpoolflag, (i, 0), span=(1, 2), flag=wx.ALIGN_CENTER_VERTICAL) i += 1 @@ -682,8 +682,8 @@ def import_config_to_gui(self): self.ctlendz.SetValue(str(self.config.endz)) self.ctlzzsig.SetValue(str(self.config.zzsig)) self.ctlmzsig.SetValue(str(self.config.mzsig)) - self.ctlpsfun.SetSelection(self.config.psfun) - self.ctlnorm.SetSelection(self.config.peaknorm) + self.ctlpsfun.SetSelection(int(self.config.psfun)) + self.ctlnorm.SetSelection(int(self.config.peaknorm)) self.ctlmasslb.SetValue(str(self.config.masslb)) self.ctlmassub.SetValue(str(self.config.massub)) self.ctlmasslistflag.SetValue(self.config.mfileflag) @@ -709,8 +709,8 @@ def import_config_to_gui(self): self.ctldatareductionpercent.SetValue(str(self.config.reductionpercent)) self.ctlmanualassign.SetValue(self.config.manualfileflag) self.ctlisotopemode.SetSelection(self.config.isotopemode) - self.ctlorbimode.SetValue(self.config.orbimode) - self.ctldatanorm.SetValue(self.config.datanorm) + self.ctlorbimode.SetValue(int(self.config.orbimode)) + self.ctldatanorm.SetValue(int(self.config.datanorm)) self.ctlbintype.SetSelection(int(self.config.linflag)) self.ctlpsig.SetValue(str(self.config.psig)) self.ctlbeta.SetValue(str(self.config.beta)) @@ -1055,7 +1055,9 @@ def setup_tool_tips(self): self.ctlpoolflag.SetToolTip(wx.ToolTip( "Sets type of conversion from m/z to mass.\nIntegration:\n\tEach m/z bin goes to the nearest mass bin" "\n\tBest for undersampled masses\nInterpolation:\n\tEach mass value interpolates its value in m/z space" - "\n\tBest for oversampled mass data")) + "\n\tBest for oversampled mass data" + "\nSmart:\n\tDetermines interpolation vs. integration automatically\n\tfrom the data and blends the two." + "\n\tUse Smart in most cases (default). \n\tNot available for IM-MS data.")) self.ctlmsmoothcheck.SetToolTip( wx.ToolTip("Select whether to assume a smooth distribution spaced by a known mass difference")) self.ctlzsmoothcheck.SetToolTip( diff --git a/unidec_modules/isolated_packages/ZoomBox.py b/unidec_modules/isolated_packages/ZoomBox.py index 651bc55d..a9d16442 100644 --- a/unidec_modules/isolated_packages/ZoomBox.py +++ b/unidec_modules/isolated_packages/ZoomBox.py @@ -1,6 +1,6 @@ from matplotlib.patches import Rectangle -#from pubsub import setupkwargs +# from pubsub import setupkwargs from pubsub import pub import numpy as np @@ -426,8 +426,27 @@ def release(self, event): if self.spancoords == 'data': xmin, ymin = self.eventpress.xdata, self.eventpress.ydata # xmax, ymax = self.eventrelease.xdata, self.eventrelease.ydata + # fix for if drag outside axes boundaries - xmax, ymax = self.eventrelease.xdata or self.prev[0], self.eventrelease.ydata or self.prev[1] + offx = self.prev[0] + offy = self.prev[1] + try: + xlims = self.axes[0].get_xlim() + ylims = self.axes[0].get_ylim() + if np.abs(self.prev[0] - xlims[0]) < np.abs(self.prev[0] - xlims[1]): + offx = xlims[0] + else: + offx = xlims[1] + + if np.abs(self.prev[1] - ylims[0]) < np.abs(self.prev[1] - ylims[1]): + offy = ylims[0] + else: + offy = ylims[1] + except: + pass + + xmax, ymax = self.eventrelease.xdata or offx, self.eventrelease.ydata or offy + elif self.spancoords == 'pixels': xmin, ymin = self.eventpress.x, self.eventpress.y xmax, ymax = self.eventrelease.x, self.eventrelease.y @@ -634,7 +653,6 @@ def label(self): self.canvas.draw() return - def kill_labels(self): try: for t in self.texts: diff --git a/unidec_modules/isotopetools.py b/unidec_modules/isotopetools.py index 235036ce..e8fa4226 100644 --- a/unidec_modules/isotopetools.py +++ b/unidec_modules/isotopetools.py @@ -2,6 +2,7 @@ import time from scipy import fftpack import matplotlib.pyplot as plt +import unidec_modules.unidectools as ud # import pyteomics.mass as ms @@ -19,20 +20,20 @@ def makemass(testmass): intnum = [int(round(n)) for n in num] minmassint = (intnum * isotopes[:, 0, 0]).sum() formula = "" - if intnum[0] is not 0: + if intnum[0] != 0: formula = formula + "C" + str(intnum[0]) - if intnum[1] is not 0: + if intnum[1] != 0: formula = formula + "H" + str(intnum[1]) - if intnum[2] is not 0: + if intnum[2] != 0: formula = formula + "N" + str(intnum[2]) - if intnum[3] is not 0: + if intnum[3] != 0: formula = formula + "O" + str(intnum[3]) - if intnum[4] is not 0: + if intnum[4] != 0: formula = formula + "S" + str(intnum[4]) return formula, minmassint, intnum -def isojim(isolist): +def isojim(isolist, length=700): '''Thanks to Jim Prell for Sketching this Code''' numc = isolist[0] numh = isolist[1] @@ -40,7 +41,7 @@ def isojim(isolist): numo = isolist[3] nums = isolist[4] - buffer = np.zeros(700) + buffer = np.zeros(length) h = np.array([1, 0.00015, 0, 0]) c = np.array([1, 0.011, 0, 0]) n = np.array([1, 0.0037, 0, 0]) @@ -65,7 +66,7 @@ def isojim(isolist): return allift -def calc_averagine_isotope_dist(mass, mono=False): +def calc_averagine_isotope_dist(mass, mono=False, charge=None, adductmass=1.007276467, crop=False): formula, minmassint, isolist = makemass(mass) # print(isolist) intensities = isojim(isolist) @@ -76,6 +77,22 @@ def calc_averagine_isotope_dist(mass, mono=False): # print(ms.calculate_mass(formula=formula, average=True)) # print(np.average(dist[:, 0], weights=dist[:, 1])) # print(minmassint) + z = None + if charge == "Auto": + z = ud.predict_charge(mass) + elif charge is not None: + try: + z = float(charge) + except Exception as e: + print("Could not convert charge to float. Try Auto, None, or a number", e) + + if z is not None and z != 0: + dist[:, 0] = (dist[:, 0] + z * adductmass) / z + + if crop: + b1 = dist[:, 1] > np.amax(dist[:, 1]) * 0.0001 + dist = dist[b1] + return np.array(dist) @@ -85,6 +102,7 @@ def correct_avg(dist, mass): dist[:, 0] = dist[:, 0] - avg + mass return dist + if __name__ == "__main__": mval = 1000000 startime = time.perf_counter() diff --git a/unidec_modules/mainwindow.py b/unidec_modules/mainwindow.py index 2ef77e30..d4eb0081 100644 --- a/unidec_modules/mainwindow.py +++ b/unidec_modules/mainwindow.py @@ -313,9 +313,13 @@ def OnDropFiles(self, x, y, filenames): elif fname[-9:] == "_conf.dat": print("Importing Configuration File:", path) self.window.pres.import_config(path) + elif os.path.splitext(fname)[1] == ".zip": + print("Loading State:", fname) + self.window.pres.on_load_state(0, path) else: self.window.pres.on_open_file(fname, directory) # self.window.pres.on_auto() # Run the whole thing + elif len(filenames) > 1: # Batch process the files that were dropped if os.path.splitext(filenames[0])[1] == ".raw" and os.path.isdir(filenames[0]): diff --git a/unidec_modules/mzMLimporter.py b/unidec_modules/mzMLimporter.py index 1d321361..1d82d78d 100644 --- a/unidec_modules/mzMLimporter.py +++ b/unidec_modules/mzMLimporter.py @@ -105,7 +105,10 @@ def __init__(self, path, *args, **kwargs): impdat = np.transpose([spectrum.mz, spectrum.i]) impdat = impdat[impdat[:, 0] > 10] self.data.append(impdat) - self.times.append(float(spectrum['scan start time'])) + try: + self.times.append(float(spectrum['scan start time'])) + except: + self.times.append(-1) self.scans.append(i) #print(i, end=" ") self.times = np.array(self.times) diff --git a/unidec_modules/plot2d.py b/unidec_modules/plot2d.py index b8ae8699..d8467b8f 100644 --- a/unidec_modules/plot2d.py +++ b/unidec_modules/plot2d.py @@ -2,7 +2,8 @@ from matplotlib.ticker import MaxNLocator from matplotlib.ticker import FixedLocator import numpy as np - +import scipy.ndimage.filters as filt +from matplotlib.image import NonUniformImage from unidec_modules.PlottingWindow import PlottingWindow from unidec_modules import unidectools as ud @@ -94,8 +95,24 @@ def contourplot(self, dat=None, config=None, xvals=None, yvals=None, zgrid=None, # speedplot=0 if speedplot == 0: # Slow contour plot that interpolates grid - cax = self.subplot1.contourf(xvals / self.kdnorm, yvals, np.transpose(newgrid), 100, cmap=self.cmap, - norm=norm) + try: + b1 = newgrid > 0.01 * np.amax(newgrid) + b1 = b1.astype(float) + b1 = filt.uniform_filter(b1, size=3) > 0 + b1[0, 0] = True + b1[0, -1] = True + b1[-1, 0] = True + b1[-1, -1] = True + X2, Y2 = np.meshgrid(xvals, yvals, indexing="ij") + X2 = np.ravel(X2[b1]) + Y2 = np.ravel(Y2[b1]) + Z2 = np.ravel(newgrid[b1].transpose()) + cax = self.subplot1.tricontourf(X2 / self.kdnorm, Y2, Z2, 100, cmap=self.cmap, norm=norm) + except Exception as e: + print("Error with fast tricontourf plot", e) + cax = self.subplot1.contourf(xvals / self.kdnorm, yvals, np.transpose(newgrid), 100, cmap=self.cmap, + norm=norm) + datalims = [np.amin(xvals) / self.kdnorm, np.amin(yvals), np.amax(xvals) / self.kdnorm, np.amax(yvals)] else: # Fast discrete plot using imshow @@ -103,13 +120,25 @@ def contourplot(self, dat=None, config=None, xvals=None, yvals=None, zgrid=None, xdiff = (xvals[1] - xvals[0]) / self.kdnorm ydiff = yvals[1] - yvals[0] except: - xdiff=1 - ydiff=1 + xdiff = 1 + ydiff = 1 extent = (np.amin(xvals) / self.kdnorm - 0.5 * xdiff, np.amax(xvals) / self.kdnorm + 0.5 * xdiff, np.amin(yvals) - 0.5 * ydiff, np.amax(yvals) + 0.5 * ydiff) - cax = self.subplot1.imshow(np.transpose(newgrid), origin="lower", cmap=self.cmap, extent=extent, - aspect='auto', norm=norm, interpolation='nearest') + + try: + ax = self.subplot1 + im = NonUniformImage(ax, interpolation="nearest", extent=extent, cmap=self.cmap, norm=norm,) + im.set_data(xvals / self.kdnorm, yvals, np.transpose(newgrid)) + ax.images.append(im) + ax.set_xlim(extent[0], extent[1]) + ax.set_ylim(extent[2], extent[3]) + cax = im + except Exception as e: + print("Error in NonUniformImage:", e) + cax = self.subplot1.imshow(np.transpose(newgrid), origin="lower", cmap=self.cmap, extent=extent, + aspect='auto', norm=norm, interpolation='nearest') datalims = [extent[0], extent[2], extent[1], extent[3]] + print(newgrid.shape) # Set X and Y axis labels self.subplot1.set_xlabel(self.xlabel) self.subplot1.set_ylabel(self.ylabel) diff --git a/unidec_modules/unidec_enginebase.py b/unidec_modules/unidec_enginebase.py index 82623401..9da3f5a9 100644 --- a/unidec_modules/unidec_enginebase.py +++ b/unidec_modules/unidec_enginebase.py @@ -15,7 +15,7 @@ def __init__(self): :return: None """ - self.version = "4.1.2" + self.version = "4.2.0" print("\nUniDec Engine v." + self.version) self.config = None self.config_history = [] diff --git a/unidec_modules/unidecstructure.py b/unidec_modules/unidecstructure.py index 3bd9a3ce..059447cf 100644 --- a/unidec_modules/unidecstructure.py +++ b/unidec_modules/unidecstructure.py @@ -45,6 +45,8 @@ def __init__(self): "Subtract Curved"] # , "Subtract Gaussian"] # , "Subtract SavGol","Subtract Polynomial"] self.isotopechoices = ["Isotopes: Off", "Isotopes: Mono", "Isotopes: Average"] self.figsize = (6, 5) + self.mass_proton = 1.007276467 + self.mass_diff_carbon = 1.0033 self.initialize() def initialize(self): @@ -147,7 +149,7 @@ def default_decon_params(self): # Other self.mtabsig = 0 - self.poolflag = 1 + self.poolflag = 2 self.nativezub = 100 self.nativezlb = -100 self.inflate = 1 @@ -855,6 +857,7 @@ def default_high_res(self): self.numit = 50 self.zzsig = 1 self.psig = 1 + self.poolflag = 2 def default_low_res(self): """ @@ -880,6 +883,7 @@ def default_low_res(self): self.numit = 50 self.zzsig = 1 self.psig = 1 + self.poolflag = 2 def default_nanodisc(self): """ @@ -904,6 +908,7 @@ def default_nanodisc(self): self.zzsig = 1 self.psig = 1 self.isotopemode = 0 + self.poolflag = 2 def default_isotopic_res(self): """ @@ -925,6 +930,7 @@ def default_isotopic_res(self): self.psfun = 0 self.isotopemode = 1 self.psig = 0 + self.poolflag = 2 def default_zero_charge(self): """ @@ -943,6 +949,7 @@ def default_zero_charge(self): self.adductmass = 0 self.minmz = '' self.maxmz = '' + self.poolflag = 2 def initialize_system_paths(self): """ diff --git a/unidec_modules/unidectools.py b/unidec_modules/unidectools.py index b4a46ff5..40087f1d 100644 --- a/unidec_modules/unidectools.py +++ b/unidec_modules/unidectools.py @@ -202,7 +202,7 @@ def weighted_std(values, weights): :return: Weighted standard deviation. """ average = np.average(values, weights=weights) - variance = np.average((np.array(values) - average) ** 2, weights=weights) # Fast and numerically precise + variance = np.average(np.power((np.array(values) - average), 2), weights=weights) # Fast and numerically precise return np.sqrt(variance) @@ -278,6 +278,7 @@ def get_z_offset(mass, charge): def predict_charge(mass): """ Give predicted native charge state of species of defined mass + https://www.pnas.org/content/107/5/2007.long :param mass: Mass in Da :return: Float, predicted average native charge state """ @@ -764,7 +765,6 @@ def load_mz_file(path, config=None): except IOError: print("Failed to open:", path) data = None - return data @@ -1121,7 +1121,7 @@ def remove_noise(datatop, percent=None): index = round(l1 * percent / 100.) cutoff = sdat[index] datatop = intensitythresh(datatop, cutoff) - datatop = remove_middle_zeros(datatop) + #datatop = remove_middle_zeros(datatop) print(l1, len(datatop)) return datatop @@ -1327,7 +1327,7 @@ def remove_middle_zeros(data): return data[boo6] -def dataprep(datatop, config, removenoise=False): +def dataprep(datatop, config, peaks=True, intthresh=True): """ Main function to process 1D MS data. The order is: @@ -1349,14 +1349,15 @@ def dataprep(datatop, config, removenoise=False): buff = config.subbuff smooth = config.smooth binsize = config.mzbins - # thresh = config.intthresh + thresh = config.intthresh subtype = config.subtype va = config.detectoreffva linflag = config.linflag redper = config.reductionpercent # Crop Data - data2 = datachop(datatop, newmin, newmax) - + data2 = datachop(deepcopy(datatop), newmin, newmax) + if len(data2)==0: + print("Error: m/z range is too small. No data fits the range.") # correct for detector efficiency if va != 0: # data2=detectoreff(data2,9.1) @@ -1396,10 +1397,6 @@ def dataprep(datatop, config, removenoise=False): else: print("Background subtraction code unsupported", subtype, buff) - # Intensity Threshold - data2 = intensitythresh(data2, 0) # thresh - # data2=data2[data2[:,1]>0] - # Scale Adjustment print(config.intscale, config.intscale == "Square Root") if config.intscale == "Square Root": @@ -1410,18 +1407,39 @@ def dataprep(datatop, config, removenoise=False): data2[:, 1] -= np.amin(data2[:, 1]) print("Log Scale") + #Data Reduction if redper > 0: data2 = remove_noise(data2, redper) - elif linflag == 2: + + if config.datanorm == 1: + # Normalization + data2 = normalize(data2) + + # Intensity Threshold + if thresh > 0 and intthresh: + print(len(data2)) + data2 = intensitythresh(data2, thresh) # thresh + print("Intensity Threshold Applied:", thresh, len(data2), np.amax(data2[:, 1])) + # data2=data2[data2[:,1]>0] + else: + data2 = intensitythresh(data2, 0) # thresh + # data2=data2[data2[:,1]>0] + + if redper < 0 and peaks: + #widths = [1, 2, 4, 8, 16] + #peakind = signal.find_peaks_cwt(data2[:,1], widths) + #data2 = data2[peakind] + data2 = peakdetect(data2, window = -redper, threshold=thresh) + print(data2) + + if linflag == 2: try: data2 = remove_middle_zeros(data2) except: pass pass - if config.datanorm == 1: - # Normalization - data2 = normalize(data2) + return data2 @@ -1481,7 +1499,7 @@ def peakdetect(data, config=None, window=10, threshold=0): peaks = [] length = len(data) maxval = np.amax(data[:, 1]) - for i in range(1, length): + for i in range(0, length): if data[i, 1] > maxval * threshold: start = i - window end = i + window @@ -2011,7 +2029,7 @@ def make_peak_shape(xaxis, psfun, fwhm, mid, norm_area=False): return kernel -def fft(data): +def fft(data, phases=False): # X-axis massdif = data[1, 0] - data[0, 0] # fvals = np.fft.fftfreq(len(data), d=massdif) @@ -2023,8 +2041,11 @@ def fft(data): # Cleanup aftdat = np.transpose([fvals, np.abs(fft)]) aftdat[:, 1] /= np.amax(aftdat[:, 1]) - # phases = np.arctan2(fft.imag,fft.real) - return aftdat + if not phases: + return aftdat + if phases: + phases = np.arctan2(fft.imag,fft.real) + return aftdat, phases def fft_diff(data, diffrange=[500., 1000.]): @@ -2437,45 +2458,44 @@ def peaks_error_mean(pks, data, ztab, massdat, config): :param config: self.config :return: """ + # Reshape the data length = len(data) / len(ztab) + data2 = data.reshape(int(length), len(ztab)) + # Loop over each peak for pk in pks.peaks: + # Set the window as 2 x FWHM if possible + window = config.peakwindow + if pk.errorFWHM > 0: + window = 2* pk.errorFWHM + + # Grab a slice of the data around the peak mass index = nearest(massdat[:, 0], pk.mass) + startindmass = nearest(massdat[:, 0], massdat[index, 0] - window) + endindmass = nearest(massdat[:, 0], massdat[index, 0] + window) + data3 = data2[startindmass:endindmass] + + # Get the peak mass and intensity at each charge state masses = [] ints = [] - zgrid = [] - startindmass = nearest(massdat[:, 0], massdat[index, 0] - config.peakwindow) - endindmass = nearest(massdat[:, 0], massdat[index, 0] + config.peakwindow) - # plotmasses = massdat[startindmass:endindmass, 0] for z in range(0, len(ztab)): - startind = startindmass + (z * length) - endind = endindmass + (z * length) - tmparr = data[int(startind):int(endind)] - ind = np.argmax(tmparr) - ints.append(tmparr[ind]) - # print massdat[startindmass + ind, 0] - masses.append(massdat[startindmass + ind, 0]) - # zgrid.extend(tmparr) - mean = np.average(masses, weights=ints) + tmparr = data3[:, z] + ints.append(np.amax(tmparr)) + masses.append(massdat[startindmass + np.argmax(tmparr), 0]) + + # Convert to Arrays + ints = np.array(ints) + masses = np.array(masses) + + # Set a 1% Threshold + b1 = ints > 0.01 * np.amax(ints) # Calculate weighted standard deviation - sum = 0 - denom = 0 - for w, m in enumerate(masses): - sum += ints[w] * pow(m - mean, 2) - denom += ints[w] - denom *= (len(ztab) - 1) - std = sum / denom - std = std / len(ztab) - std = std ** 0.5 - # print mean - # print std - # tab = wx.Frame(None, title="Test", size=(500, 500)) - # plot = plot2d.Plot2d(tab, figsize=(500, 500)) - # plot.contourplot(xvals=np.asarray(plotmasses), yvals=ztab, zgrid=np.asarray(zgrid), - # config=config, title="Mass vs. Charge", xlab="Mass (Da)", - # normflag=1, test_kda=False, repaint=False) - # tab.Show() + std=weighted_std(masses[b1], ints[b1]) + + # Set the parameters pk.errormean = std - # pks.peaks[count].errormean = abs(sums[count] - pks.peaks[count].mass) + pk.errormasses = masses + pk.errorints = ints + pass def subtract_and_divide(pks, basemass, refguess): avgmass = [] diff --git a/unidec_src/UniDec/UniDec.h b/unidec_src/UniDec/UniDec.h index f7b37558..b09f1d34 100644 --- a/unidec_src/UniDec/UniDec.h +++ b/unidec_src/UniDec/UniDec.h @@ -502,6 +502,23 @@ void PrintHelp() //printf("\nsize of: %d",sizeof(char)); } +//Print a double array +void DoublePrint(const double* array, const int length) +{ + for (int i = 0; i < length; i++) + { + printf("%f\n", array[i]); + } +} + +//Print an int array +void IntPrint(const int* array, const int length) +{ + for (int i = 0; i < length; i++) + { + printf("%d\n", array[i]); + } +} //Calculate Average double Average(const int length, const double* xarray) @@ -761,7 +778,7 @@ double clip(double x,double cutoff) //Function for defining m/z peak shape. Other peak shape functions could be easily added her. double mzpeakshape(double x,double y,double sig,int psfun) { - if (sig == 0) { printf("Error: mzpeakshape sigma is 0"); exit(103); } + if (sig == 0) { printf("Error: mzpeakshape sigma is 0\n"); exit(103); } double result; if(psfun==0) { @@ -872,7 +889,7 @@ void blur_it(const int lengthmz, const int numz, const int numclose, const int * __restrict closeind, - const double * __restrict closeval, + const double * __restrict closearray, double * __restrict newblur, const double * __restrict blur, const char * __restrict barr) @@ -892,7 +909,7 @@ void blur_it(const int lengthmz, { if (closeind[index2D(numclose, i, k)] != -1) { - temp += closeval[k] * blur[closeind[index2D(numclose, i, k)]]; + temp += closearray[index2D(numclose, i, k)] * blur[closeind[index2D(numclose, i, k)]]; } } } @@ -909,6 +926,7 @@ void blur_it_mean(const int lengthmz, double * __restrict newblur, const double * __restrict blur, const char * __restrict barr, + const double * __restrict closearray, const double zerolog) { if (numclose == 1) @@ -927,7 +945,7 @@ void blur_it_mean(const int lengthmz, double temp2 = 0; if (closeind[index2D(numclose, i, k)] != -1) { - temp2 = blur[closeind[index2D(numclose, i, k)]]; + temp2 = blur[closeind[index2D(numclose, i, k)]] *closearray[index2D(numclose, i, k)]; } if (temp2 > 0) { temp += log(temp2); } else { temp += zerolog; } @@ -939,7 +957,7 @@ void blur_it_mean(const int lengthmz, } } - +/* //Charge state smooth using a mean filter of the log void blur_it_geometric_mean(const int lengthmz, const int numz, @@ -976,10 +994,10 @@ void blur_it_geometric_mean(const int lengthmz, newblur[i] = temp; } } -} - +}*/ +/* //Convolution of neighborhood function with gaussian filter. -void blur_it_hybrid1(const int lengthmz, +void blur_it_hybrid1alt(const int lengthmz, const int numz, const int zlength, const int mlength, @@ -991,6 +1009,7 @@ void blur_it_hybrid1(const int lengthmz, double * __restrict newblur, const double * __restrict blur, const char * __restrict barr, + const double* __restrict closearray, const double zerolog) { int i, j, k, n; @@ -1016,7 +1035,7 @@ void blur_it_hybrid1(const int lengthmz, int m = index2D(mlength, k, n); if (closeind[index3D(numz, numclose, i, j, m)] != -1) { - temp2 += blur[closeind[index3D(numz, numclose, i, j, m)]]*mdist[n]; + temp2 += blur[closeind[index3D(numz, numclose, i, j, m)]] * mdist[n] *closearray[index3D(numz, numclose, i, j, m)]; } } if (temp2 > 0) { temp += log(temp2); } @@ -1028,10 +1047,10 @@ void blur_it_hybrid1(const int lengthmz, } } } -} +}*/ //Convolution of neighborhood function with gaussian filter. -void blur_it_hybrid1alt(const int lengthmz, +void blur_it_hybrid1(const int lengthmz, const int numz, const int zlength, const int mlength, @@ -1043,6 +1062,7 @@ void blur_it_hybrid1alt(const int lengthmz, double * __restrict newblur, const double * __restrict blur, const char * __restrict barr, + const double* __restrict closearray, const double zerolog) { int i, j, k, n; @@ -1069,7 +1089,7 @@ void blur_it_hybrid1alt(const int lengthmz, double temp3 = 0; if (closeind[index3D(numz, numclose, i, j, m)] != -1) { - temp3 = blur[closeind[index3D(numz, numclose, i, j, m)]]; + temp3 = blur[closeind[index3D(numz, numclose, i, j, m)]] *closearray[index3D(numz, numclose, i, j, m)]; } if (temp3 > 0) { temp2 += log(temp3); } else { temp2 += zerolog; } @@ -1097,6 +1117,7 @@ void blur_it_hybrid2(const int lengthmz, double * __restrict newblur, const double * __restrict blur, const char * __restrict barr, + const double* __restrict closearray, const double zerolog) { int i, j, k, n; @@ -1122,7 +1143,7 @@ void blur_it_hybrid2(const int lengthmz, int m = index2D(mlength, k, n); if (closeind[index3D(numz, numclose, i, j, m)] != -1) { - temp2 += blur[closeind[index3D(numz, numclose, i, j, m)]]*zdist[k]; + temp2 += blur[closeind[index3D(numz, numclose, i, j, m)]]*zdist[k] * closearray[index3D(numz, numclose, i, j, m)]; } } if (temp2 > 0) { temp += log(temp2); }// / (double)mlength);} @@ -1294,33 +1315,32 @@ inline int fixk(int k, int lengthmz) { k = abs(k); if (k >= lengthmz){ k = 2 * lengthmz - k-2; } + //if (k < 0) { k = 0; } return k; } void convolve_simp(const int lengthmz, const int maxlength, const int*starttab, const int *endtab, const double *mzdist, const double *deltas, double *denom, const int speedyflag) { - - unsigned int i, k; if (speedyflag == 0){ - #pragma omp parallel for private (i,k), schedule(auto) - for (i = 0; i < lengthmz; i++) + #pragma omp parallel for schedule(auto) + for (int i = 0; i < lengthmz; i++) { double cv = 0; - for (k = starttab[i]; k <= endtab[i]; k++) + for (int k = starttab[i]; k <= endtab[i]; k++) { int k2 = fixk(k, lengthmz); int start = starttab[k2]; - cv += deltas[k2] * mzdist[index2D(maxlength, k2, i - start)]; + cv += deltas[k2] * mzdist[index2D(maxlength, k2, i - start)]; } denom[i] = cv; } } else{ - #pragma omp parallel for private (i,k), schedule(auto) - for (i = 0; i < lengthmz; i++) + #pragma omp parallel for schedule(auto) + for (int i = 0; i < lengthmz; i++) { double cv = 0; - for (k = starttab[i]; k <= endtab[i]; k++) + for (int k = starttab[i]; k <= endtab[i]; k++) { cv += deltas[k] * mzdist[indexmod(lengthmz,k, i)]; } @@ -1462,10 +1482,10 @@ double deconvolve_iteration_speedy(const int lengthmz, const int numz,const int //blur_baseline(baseline, lengthmz, 10); //blur_noise(noise, lengthmz); } - + //printf("1\n"); //Sum deltas sum_deltas(lengthmz, numz, blur, barr, isolength, isotopepos, isotopeval, deltas); - + //printf("2\n"); if (mzsig != 0 && psig>=0) { //Convolve with peak shape @@ -1475,7 +1495,7 @@ double deconvolve_iteration_speedy(const int lengthmz, const int numz,const int { memcpy(denom, deltas, sizeof(double)*lengthmz); } - + //printf("3\n"); if (aggressiveflag == 1) { #pragma omp parallel for private(i), schedule(auto) @@ -1483,14 +1503,14 @@ double deconvolve_iteration_speedy(const int lengthmz, const int numz,const int denom[i] += baseline[i];// +noise[i]); } } - + //printf("4\n"); //Calculate Ratio #pragma omp parallel for private(i), schedule(auto) for (i = 0; i < lengthmz; i++) { if (denom[i] != 0 && dataInt[i] >= 0) { denom[i] = dataInt[i] / denom[i]; } } - + //printf("5\n"); if ( mzsig < 0) { //Real Richardson-Lucy Second Convolution @@ -1509,11 +1529,11 @@ double deconvolve_iteration_speedy(const int lengthmz, const int numz,const int memcpy(denom, deltas, sizeof(double)*lengthmz);*/ } - + //printf("6\n"); //Multiply Ratio by prior apply_ratios(lengthmz, numz, blur, barr, isolength, isotopepos, isotopeval, denom, blur2); - + //printf("7\n"); if (aggressiveflag ==1) { //memcpy(deltas, denom, sizeof(double)*lengthmz); @@ -1527,7 +1547,7 @@ double deconvolve_iteration_speedy(const int lengthmz, const int numz,const int //noise[i] = noise[i]*(deltas[i]); } } - + //printf("8\n"); free(deltas); free(denom); return 0; @@ -1840,47 +1860,142 @@ void ManualAssign(char *manualfile, int lengthmz, int numz, double *dataMZ, char printf("Using Manual Assignments for Some Peaks\n"); } -void MakeBlur(int lengthmz,int numz,int numclose,char *barr,int *closezind,int *closemind,double *mtab,double molig,double adductmass,int *nztab,double *dataMZ,int *closeind) +/* +void MakeBlur(const int lengthmz, const int numz, const int numclose, char *barr, const int *closezind, + const int *closemind, const double *mtab, const double molig, const double adductmass, const int *nztab, + const double *dataMZ,int *closeind, const double threshold, const Config config) { - unsigned int i,j,k; - #pragma omp parallel for private (i,j,k), schedule(auto) - for(i=0;i= numz || (nztab[j] + closezind[k])==0) { closeind[index3D(numz, numclose, i, j, k)] = -1; } - else - { - double point=(double)((mtab[index2D(numz, i, j)]+closemind[k]*molig+adductmass*(double)(nztab[j]+closezind[k]))/(double)(nztab[j]+closezind[k])); - if(pointdataMZ[lengthmz-1]){ closeind[index3D(numz, numclose, i, j, k)] = -1; } - else { - ind = nearfast(dataMZ, point, lengthmz); - int newind= index2D(numz, ind, indz); - if (barr[newind] == 1) { - closeind[index3D(numz, numclose, i, j, k)] = newind; - } - else { closeind[index3D(numz, numclose, i, j, k)] = -1; } - } - } - } - } - else - { - for (k=0;k= numz || (nztab[j] + closezind[k])==0) { closeind[index3D(numz, numclose, i, j, k)] = -1; } + else + { + //Find the nearest m/z value and test if it is close enough to the predicted one and within appropriate ranges + double point=(double)((mtab[index2D(numz, i, j)]+closemind[k]*molig+adductmass*(double)(nztab[j]+closezind[k]))/(double)(nztab[j]+closezind[k])); + if(pointdataMZ[lengthmz-1]+ newthreshold){ closeind[index3D(numz, numclose, i, j, k)] = -1; } + else { + int ind = nearfast(dataMZ, point, lengthmz); + double closepoint = dataMZ[ind]; + int newind= index2D(numz, ind, indz); + if (barr[newind] == 1 && fabs(point- closepoint)< newthreshold) { + closeind[index3D(numz, numclose, i, j, k)] = newind; + num += 1; + } + else { closeind[index3D(numz, numclose, i, j, k)] = -1; } + //printf("%d %d %d %f %f %d\n", i, j, k, point, closepoint, closeind[index3D(numz, numclose, i, j, k)]); + } + + } + } + if (num < 2 && config.isotopemode == 0) { barr[index2D(numz, i, j)] = 0; }// printf("%d %d \n", i, j);} + } + else + { + for (int k=0;k= lengthmz - 1) { i2 = i; } + if (i == 0) { i1 = i; } + mzsig = 2*fabs(dataMZ[i2] - dataMZ[i1]); + if (mzsig > config.massbins || mzsig == 0) { mzsig = config.massbins*2; } + } + double newthreshold = mzsig * 2; + + for (int k = 0; k < numclose; k++) + { + //Find the z index and test if it is within the charge range + int indz = (int)(j + closezind[k]); + if (indz < 0 || indz >= numz || (nztab[j] + closezind[k]) == 0) + { + closeind[index3D(numz, numclose, i, j, k)] = -1; + closearray[index3D(numz, numclose, i, j, k)] = 0; + } + else + { + //Find the nearest m/z value and test if it is close enough to the predicted one and within appropriate ranges + double point = (double)((mtab[index2D(numz, i, j)] + closemind[k] * molig + adductmass * (double)(nztab[j] + closezind[k])) / (double)(nztab[j] + closezind[k])); + if (pointdataMZ[lengthmz - 1] + newthreshold) + { + closeind[index3D(numz, numclose, i, j, k)] = -1; + closearray[index3D(numz, numclose, i, j, k)] = 0; + } + else { + int ind = nearfast(dataMZ, point, lengthmz); + double closepoint = dataMZ[ind]; + int newind = index2D(numz, ind, indz); + if (barr[newind] == 1 && fabs(point - closepoint) < newthreshold) { + closeind[index3D(numz, numclose, i, j, k)] = newind; + closearray[index3D(numz, numclose, i, j, k)] = closeval[k] *mzpeakshape(point, closepoint, mzsig, config.psfun); + num += 1; + } + else { + closeind[index3D(numz, numclose, i, j, k)] = -1; + closearray[index3D(numz, numclose, i, j, k)] = 0; + } + //printf("%d %d %d %f %f %d\n", i, j, k, point, closepoint, closeind[index3D(numz, numclose, i, j, k)]); + } + + } + } + if (num < 2 && config.isotopemode == 0) { barr[index2D(numz, i, j)] = 0; }// printf("%d %d \n", i, j);} + } + else + { + for (int k = 0; k < numclose; k++) + { + closeind[index3D(numz, numclose, i, j, k)] = -1; + closearray[index3D(numz, numclose, i, j, k)] = 0; + } + } + } + } } +/* void MakeBlurZ(int lengthmz, int numz, int numclose, char *barr, int *closezind, double *mtab, double adductmass, int *nztab, double *dataMZ, int *closeind) { unsigned int i, j, k; @@ -1949,18 +2064,17 @@ void MakeBlurM(int lengthmz, int numz, int numclose, char *barr, int *closemind, } } } -} +}*/ void MakePeakShape2D(int lengthmz,int maxlength,int *starttab,int *endtab,double *dataMZ,double mzsig,int psfun,int speedyflag,double *mzdist, double *rmzdist, int makereverse) { - unsigned int i,j; - #pragma omp parallel for private (i,j), schedule(auto) - for(i=0;imassmin) { - indexes[index2D(numz, i, j)] = nearfast(massaxis, testmass, maaxle); - } - } - } - - for (i = 0; imassmin) { - int index = nearfast(massaxis, testmass, maaxle); - double newval = blur[index2D(numz, i, j)]; - if (massaxis[index] == testmass) { - massaxisval[index] += newval; - massgrid[index2D(numz, index, j)] += newval; - } - - if (massaxis[index] < testmass &&index testmass&&index>0) - { - int index2 = index - 1; - double interpos = LinearInterpolatePosition(massaxis[index], massaxis[index2], testmass); - massaxisval[index] += (1 - interpos)*newval; - massgrid[index2D(numz, index, j)] += (1 - interpos)*newval; - massaxisval[index2] += (interpos)*newval; - massgrid[index2D(numz, index2, j)] += (interpos)*newval; - } - } - } - } - free(indexes); -} - - void IntegrateTransform(const int lengthmz, const int numz, const double *mtab, double massmax, double massmin, const int maaxle, double *massaxis,double *massaxisval, const double*blur,double *massgrid) { @@ -2167,7 +2223,7 @@ void IntegrateTransform(const int lengthmz, const int numz, const double *mtab, } void InterpolateTransform(const int maaxle,const int numz,const int lengthmz, const int *nztab,double *massaxis, - const double adductmass,const double *dataMZ,const double *dataInt,double *massgrid, double *massaxisval, const double *blur) + const double adductmass,const double *dataMZ, double *massgrid, double *massaxisval, const double *blur) { double startmzval = dataMZ[0]; double endmzval = dataMZ[lengthmz - 1]; @@ -2175,9 +2231,10 @@ void InterpolateTransform(const int maaxle,const int numz,const int lengthmz, co for(int i=0;istartmzval&&mztest0){ + mzlower = (massaxis[i-1] + nztab[j] * adductmass) / (double)nztab[j]; + //mzlower = (mzlower + mztest) / 2; + } + else { mzlower = mztest; } + + double mzupper; + if (i < maaxle-1) { + mzupper = (massaxis[i + 1] + nztab[j] * adductmass) / (double)nztab[j]; + //mzupper = (mzupper + mztest) / 2; + } + else { mzupper = mztest; } + + double newval = 0; + if (mzupper > startmzval&& mzlower < endmzval) + { + int index = nearfast(dataMZ, mztest, lengthmz); + int index1 = nearfast(dataMZ, mzlower, lengthmz); + int index2 = nearfast(dataMZ, mzupper, lengthmz); + double imz = dataMZ[index]; + + if (index2-index1 < 5) { + + if (imz == mztest) + { + newval = clip(blur[index2D(numz, index, j)],0); + val += newval; + massgrid[index2D(numz, i, j)] = newval; + + } + else + { + int edge = 0; + index2 = index; + if (imz > mztest) + { + index = index - 1; + } + else if (imz < mztest) + { + index2 = index + 1; + } + + if (index < 1 || index2 >= lengthmz - 1) + { + edge = 1; + } + if (index < 0 || index2 >= lengthmz) + { + edge = 2; + } + + + if (index2 > index && (dataMZ[index2] - dataMZ[index]) != 0 && edge ==0) + { + double mu = (mztest - dataMZ[index]) / (dataMZ[index2] - dataMZ[index]); + double y0 = blur[index2D(numz, index - 1, j)]; + double y1 = blur[index2D(numz, index, j)]; + double y2 = blur[index2D(numz, index2, j)]; + double y3 = blur[index2D(numz, index2 + 1, j)]; + newval = clip(CubicInterpolate(y0, y1, y2, y3, mu), 0); + //newval=clip(CRSplineInterpolate(y0,y1,y2,y3,mu), 0); + //newval=clip(LinearInterpolate(y1,y2,mu),0); + val += newval; + massgrid[index2D(numz, i, j)] = newval; + //printf("0\n"); + } + else if (edge == 1 && (dataMZ[index2] - dataMZ[index]) != 0) + { + double mu = (mztest - dataMZ[index]) / (dataMZ[index2] - dataMZ[index]); + double y1 = blur[index2D(numz, index, j)]; + double y2 = blur[index2D(numz, index2, j)]; + newval=clip(LinearInterpolate(y1,y2,mu),0); + val += newval; + massgrid[index2D(numz, i, j)] = newval; + //printf("1 %d %d %f %f %f\n", index, index2, mztest, imz, newval, massaxis[i]); + } + else if (edge == 2 && (dataMZ[index2] - dataMZ[index]) != 0) + { + double factor = 1; + if (index2 == 0) { index = 0; index2 = 1; } + if (index == lengthmz - 1) { index = lengthmz - 1; index2 = lengthmz - 2; factor = -1; } + double mu = (mztest - dataMZ[index]) / (dataMZ[index] - dataMZ[index2]); + double y1 = blur[index2D(numz, index, j)]; + double y2 = 0; + newval = clip(LinearInterpolate(y1, y2, mu), 0); + val += newval; + massgrid[index2D(numz, i, j)] = newval; + //printf("2\n"); + } + } + } + + else{ + //printf("Integrating\n"); + double num = 0; + for (int k = index1; k < index2+1; k++) + { + double kmz = dataMZ[k]; + double scale = 1; + + if (mztest < kmz) + { + scale = LinearInterpolatePosition(mzupper, mztest, kmz); + } + else if (kmz < mztest) + { + scale = LinearInterpolatePosition(mzlower, mztest, kmz); + } + + newval += scale * blur[index2D(numz, k, j)]; + num += scale; + + /*int k2 = k + 1; + double k2mz = dataMZ[k2]; + double ki = blur[index2D(numz, k, j)]; + double k2i = blur[index2D(numz, k2, j)]; + newval += (k2mz - kmz) * (ki + k2i) / 2;*/ + } + if (num != 0) { newval /= num; } + newval = clip(newval, 0); + val += newval; + massgrid[index2D(numz, i, j)] = newval; + } + } + } + massaxisval[i] = val; + } +} + + + void ignorezeros(char *barr, const double * dataInt, const int lengthmz, const int numz) { @@ -2916,6 +3119,8 @@ void SetLimits(const Config config, Input *inp) free(testmasspos); } +//Sets the maxlength parameter and the start and end values for the m/z peak shape convolution +//Convolution uses a reflection for the edges, so some care needs to be taken when things are over the edge. int SetStartsEnds(const Config config, const Input * inp, int *starttab, int *endtab, const double threshold) { int maxlength = 1; for (int i = 0; i < config.lengthmz; i++) @@ -2923,22 +3128,25 @@ int SetStartsEnds(const Config config, const Input * inp, int *starttab, int *en double point = inp->dataMZ[i] - threshold; int start, end; if (point < inp->dataMZ[0] && config.speedyflag == 0) { - start = (int)((point - inp->dataMZ[0]) / (inp->dataMZ[1] - inp->dataMZ[0])); + //start = (int)((point - inp->dataMZ[0]) / (inp->dataMZ[1] - inp->dataMZ[0])); + start = 0 - nearfast(inp->dataMZ, 2 * inp->dataMZ[0] - point, config.lengthmz); } else { start = nearfast(inp->dataMZ, point, config.lengthmz); } - starttab[i] = start; + point = inp->dataMZ[i] + threshold; if (point > inp->dataMZ[config.lengthmz - 1] && config.speedyflag == 0) { - end = config.lengthmz - 1 + (int)((point - inp->dataMZ[config.lengthmz - 1]) / (inp->dataMZ[config.lengthmz - 1] - inp->dataMZ[config.lengthmz - 2])); + //end = config.lengthmz - 1 + (int)((point - inp->dataMZ[config.lengthmz - 1]) / (inp->dataMZ[config.lengthmz - 1] - inp->dataMZ[config.lengthmz - 2])); + end = config.lengthmz - 1 + nearfast(inp->dataMZ, 2 * inp->dataMZ[0] - point, config.lengthmz); } else { end = nearfast(inp->dataMZ, point, config.lengthmz); } endtab[i] = end; if (end - start > maxlength) { maxlength = end - start; } + //printf("%d %d\n", start, end); } //printf("Max Length: %d\t", maxlength); return maxlength; diff --git a/unidec_src/UniDec/UniDec_Main.h b/unidec_src/UniDec/UniDec_Main.h index dbb87819..1c6587a2 100644 --- a/unidec_src/UniDec/UniDec_Main.h +++ b/unidec_src/UniDec/UniDec_Main.h @@ -40,7 +40,8 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) * mzdist = NULL, * rmzdist = NULL, * oldblur = NULL, - * closeval = NULL; + * closeval = NULL, + * closearray = NULL; //................................................................... // @@ -82,7 +83,8 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) //Calculates peak shape as a 1D list centered at the first element for circular convolutions MakePeakShape1D(inp.dataMZ, threshold, config.lengthmz, config.speedyflag, fabs(config.mzsig) * config.peakshapeinflate, config.psfun, mzdist, rmzdist, makereverse); } - if (silent == 0) { printf("mzdist set: %f\t", mzdist[0]); } + if (silent == 0) { printf("mzdist set: %f\t maxlength: %d\n", mzdist[0], maxlength); } + } else { @@ -134,7 +136,8 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) closemind = calloc(numclose, sizeof(int)); closezind = calloc(numclose, sizeof(int)); closeval = calloc(numclose, sizeof(double)); - closeind = calloc(numclose * config.lengthmz * config.numz, sizeof(double)); + closeind = calloc(numclose * config.lengthmz * config.numz, sizeof(int)); + closearray = calloc(numclose * config.lengthmz * config.numz, sizeof(double)); //Determines the indexes of things that are close as well as the values used in the neighborhood convolution for (int k = 0; k < numclose; k++) @@ -148,10 +151,19 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) simp_norm_sum(numclose, closeval); //Set up blur - MakeBlur(config.lengthmz, config.numz, numclose, barr, closezind, closemind, inp.mtab, config.molig, config.adductmass, inp.nztab, inp.dataMZ, closeind); + //MakeBlur(config.lengthmz, config.numz, numclose, barr, closezind, closemind, inp.mtab, config.molig, config.adductmass, inp.nztab, inp.dataMZ, closeind, threshold, config); + MakeSparseBlur(numclose, barr, closezind, closemind, inp.mtab, inp.nztab, inp.dataMZ, closeind, closeval, closearray, config); - if (silent == 0) { printf("Charges blurred: %d Oligomers blurred: %d\n", zlength, mlength); } + int badness = 1; + for (int i = 0; i < config.lengthmz * config.numz; i++) + { + if (barr[i] == 1) { badness = 0;} + } + if (badness == 1) { printf("ERROR: Setup is bad. No points are allowed\n"); exit(10); } + if (silent == 0) { printf("Charges blurred: %d Masses blurred: %d\n", zlength, mlength); } + + //IntPrint(closeind, numclose * config.lengthmz * config.numz); //Determine the maximum intensity in the data double dmax = Max(inp.dataInt, config.lengthmz); @@ -266,21 +278,21 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) point_smoothing_peak_width(config.lengthmz, config.numz, maxlength, starttab, endtab, mzdist, decon.blur, config.speedyflag, barr); } - + //Run Blurs if (config.zsig >= 0 && config.msig >= 0) { - blur_it_mean(config.lengthmz, config.numz, numclose, closeind, decon.newblur, decon.blur, barr, config.zerolog); + blur_it_mean(config.lengthmz, config.numz, numclose, closeind, decon.newblur, decon.blur, barr, closearray, config.zerolog); } else if (config.zsig > 0 && config.msig < 0) { - blur_it_hybrid1(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, config.zerolog); + blur_it_hybrid1(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, closearray, config.zerolog); } else if (config.zsig < 0 && config.msig > 0) { - blur_it_hybrid2(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, config.zerolog); + blur_it_hybrid2(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, closearray, config.zerolog); } else { - blur_it(config.lengthmz, config.numz, numclose, closeind, closeval, decon.newblur, decon.blur, barr); + blur_it(config.lengthmz, config.numz, numclose, closeind, closearray, decon.newblur, decon.blur, barr); } //Run Richardson-Lucy Deconvolution @@ -288,7 +300,7 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) decon.newblur, decon.blur, barr, config.aggressiveflag, dataInt2, config.isolength, inp.isotopepos, inp.isotopeval, starttab, endtab, mzdist, rmzdist, config.speedyflag, config.baselineflag, decon.baseline, decon.noise, config.mzsig, inp.dataMZ, config.filterwidth, config.psig); - + //Determine the metrics for conversion. Only do this every 10% to speed up. if ((config.numit < 10 || iterations % 10 == 0 || iterations % 10 == 1 || iterations>0.9 * config.numit)) { double diff = 0; @@ -302,7 +314,7 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) } } if (tot != 0) { decon.conv = (diff / tot); } - else { decon.conv = 12345678; } + else { decon.conv = 12345678; printf("m/z vs. charge grid is zero. Iteration: %d\n", iterations); } //printf("Iteration: %d Convergence: %f\n", iterations, decon.conv); if (decon.conv < 0.000001) { @@ -314,6 +326,7 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) } memcpy(oldblur, decon.blur, config.lengthmz * config.numz * sizeof(double)); } + } free(dataInt2); @@ -342,7 +355,7 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) //Determine the maximum intensity in the blur matrix blurmax = Max(decon.blur, config.lengthmz * config.numz); double cutoff = 0; - if (blurmax != 0) { cutoff = 0.0000001 / blurmax; } + if (blurmax != 0) { cutoff = 0.0000001; } //Apply The Cutoff ApplyCutoff1D(decon.blur, blurmax * cutoff, config.lengthmz * config.numz); @@ -395,29 +408,45 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) { if (decon.newblur[index2D(config.numz, i, j)] * barr[index2D(config.numz, i, j)] > newblurmax * cutoff) { - if (inp.mtab[index2D(config.numz, i, j)] + threshold * inp.nztab[j] > massmax) - { - massmax = inp.mtab[index2D(config.numz, i, j)] + threshold * inp.nztab[j]; - } - if (inp.mtab[index2D(config.numz, i, j)] - threshold * inp.nztab[j] < massmin) - { - massmin = inp.mtab[index2D(config.numz, i, j)] - threshold * inp.nztab[j]; - } + double testmax = inp.mtab[index2D(config.numz, i, j)] + threshold * inp.nztab[j]+config.massbins; + double testmin = inp.mtab[index2D(config.numz, i, j)] - threshold * inp.nztab[j]; + + //To prevent really wierd decimals + testmin = round(testmin / config.massbins) * config.massbins; + testmax = round(testmax / config.massbins) * config.massbins; + + if (testmax > massmax){ massmax = testmax;} + if (testmin < massmin){ massmin = testmin;} } } } - printf("Massmax: %f ", massmax); printf("Massmin: %f ", massmin); + printf("Massmax: %f ", massmax); } else { massmax = config.massub; massmin = config.masslb; } - //Performs an interpolation to get the zero charge mass values starting at config.masslb and going to config.massub in steps of config.massbins + //Checks to make sure the mass axis is good and makes a dummy axis if not decon.mlen = (int)(massmax - massmin) / config.massbins; if (decon.mlen < 1) { printf("Bad mass axis length: %d\n", decon.mlen); massmax = config.massub; massmin = config.masslb; decon.mlen = (int)(massmax - massmin) / config.massbins; + + //Declare the memory + decon.massaxis = calloc(decon.mlen, sizeof(double)); + decon.massaxisval = calloc(decon.mlen, sizeof(double)); + decon.massgrid = calloc(decon.mlen * config.numz, sizeof(double)); + memset(decon.massaxisval, 0, decon.mlen * sizeof(double)); + memset(decon.massgrid, 0, decon.mlen * config.numz * sizeof(double)); + + //Create the mass axis + for (int i = 0; i < decon.mlen; i++) + { + decon.massaxis[i] = massmin + i * config.massbins; + } + decon.uniscore = 0; + printf("ERROR: No masses detected.\n"); } else { @@ -429,10 +458,7 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) memset(decon.massgrid, 0, decon.mlen * config.numz * sizeof(double)); if (silent == 0) { printf("Mass axis length: %d\n", decon.mlen); } - //To prevent really wierd decimals - //Only do this if the axis isn't already fixed - if (config.fixedmassaxis == 0) { massmin = round(massmin / config.massbins) * config.massbins; } - + //Create the mass axis for (int i = 0; i < decon.mlen; i++) { @@ -448,31 +474,49 @@ Decon MainDeconvolution(const Config config, const Input inp, const int silent) IntegrateTransform(config.lengthmz, config.numz, inp.mtab, massmax, massmin, decon.mlen, decon.massaxis, decon.massaxisval, decon.newblur, decon.massgrid); } } - else { + else if (config.poolflag == 1) { if (config.rawflag == 1 || config.rawflag == 3) { InterpolateTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, - inp.dataMZ, inp.dataInt, decon.massgrid, decon.massaxisval, decon.blur); + inp.dataMZ, decon.massgrid, decon.massaxisval, decon.blur); } if (config.rawflag == 0 || config.rawflag == 2) { InterpolateTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, - inp.dataMZ, inp.dataInt, decon.massgrid, decon.massaxisval, decon.newblur); + inp.dataMZ, decon.massgrid, decon.massaxisval, decon.newblur); } } - - } + else if (config.poolflag == 2) { + if (config.rawflag == 1 || config.rawflag == 3) { + SmartTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, + inp.dataMZ, decon.massgrid, decon.massaxisval, decon.blur); + } + if (config.rawflag == 0 || config.rawflag == 2) { + SmartTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, + inp.dataMZ, decon.massgrid, decon.massaxisval, decon.newblur); + } + } + else { + printf("Invalid poolflag %d\n", config.poolflag); + exit(1987); + } //..................................... // Scores // ....................................... + //Note this will not execute if the mass axis is bad double scorethreshold = 0; decon.uniscore = score(config, decon, inp.dataMZ, inp.dataInt, decon.newblur, decon.massaxis, decon.massaxisval, decon.massgrid, inp.nztab, scorethreshold); + } + + + //Free Memory free(mzdist); free(rmzdist); free(closeval); + free(closearray); free(closemind); free(closezind); free(endtab); @@ -629,6 +673,7 @@ int run_unidec(int argc, char *argv[], Config config) { } } + //Allocates the memory for the boolean array of whether to test a value inp.barr = calloc(config.lengthmz * config.numz, sizeof(char)); //Tells the algorithm to ignore data that are equal to zero ignorezeros(inp.barr, inp.dataInt, config.lengthmz, config.numz); @@ -656,8 +701,10 @@ int run_unidec(int argc, char *argv[], Config config) { printf("Isotopes set up, Length: %d\n", config.isolength); } + //Setup the Deconvolution Decon decon=SetupDecon(); + //Autotuning if (autotune == 1) { printf("Starting Autotune...\n"); double start_peakwindow = config.peakwin; @@ -726,7 +773,9 @@ int run_unidec(int argc, char *argv[], Config config) { printf("Best mzsig: %f zsig: %f beta: %f psig: %f Score:%f\n", start_mzsig, start_zsig, start_beta, start_psig, bestscore); decon = MainDeconvolution(config, inp, 0); } - else{ decon = MainDeconvolution(config, inp, 0); } + else{ + //Run the main Deconvolution + decon = MainDeconvolution(config, inp, 0); } //................................................................ //