diff --git a/.gitignore b/.gitignore index 8839c612..a52205e6 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,8 @@ unidec_bin/Example Data/BSA_unidecfiles/BSA_chargedata.dat unidec_bin/Example Data/BSA_unidecfiles/BSA_chargedata_areas.dat unidec_bin/Example Data/BSA_unidecfiles/BSA_peakparam.dat /__pycache__/ + __pycache__/ unidec_bin/Example Data/GroEL HCD UniDec_unidecfiles/GroEL HCD UniDec_peak_areas.dat +unidec_bin/mkl_intel_thread.dll diff --git a/GUniDec.py b/GUniDec.py index 4bc7b67a..10ee2ba8 100644 --- a/GUniDec.py +++ b/GUniDec.py @@ -159,11 +159,11 @@ def on_open_file(self, filename, directory, skipengine=False, **kwargs): if self.eng.config.batchflag == 0: self.view.plot1im.contourplot(self.eng.data.rawdata3, self.eng.config, xlab="m/z (Th)", ylab="Arrival Time (ms)", title="IM-MS Data") - #tstart = time.perf_counter() + # tstart = time.perf_counter() # Load Config to GUI self.import_config() self.view.SetStatusText("Ready", number=5) - #print("ImportConfig: %.2gs" % (time.perf_counter() - tstart)) + # print("ImportConfig: %.2gs" % (time.perf_counter() - tstart)) if False: try: self.eng.unidec_imports(everything=False) @@ -171,7 +171,7 @@ def on_open_file(self, filename, directory, skipengine=False, **kwargs): except: pass - #print("ImportData: %.2gs" % (time.perf_counter() - tstart)) + # print("ImportData: %.2gs" % (time.perf_counter() - tstart)) def on_save_state(self, e=None, filenew=None): """ @@ -425,7 +425,7 @@ def on_pick_peaks(self, e=None): self.makeplot4(1) self.view.SetStatusText("Peak Pick Done", number=5) - #self.on_score() + self.on_score() pass def on_plot_peaks(self, e=None): @@ -764,7 +764,7 @@ def on_delete(self, e=None): self.makeplot6(1) self.view.SetStatusText("Peak Change Done", number=5) - def on_charge_states(self, e=None): + def on_charge_states(self, e=None, mass=None): """ Triggered by right click "plot charge states" on self.view.peakpanel. Plots a line with text listing the charge states of a specific peak. @@ -776,7 +776,10 @@ def on_charge_states(self, e=None): else: sign = "-" charges = np.arange(self.eng.config.startz, self.eng.config.endz + 1) - peaksel = self.view.peakpanel.selection2[0] + if mass is None: + peaksel = self.view.peakpanel.selection2[0] + else: + peaksel = mass peakpos = (peaksel + charges * self.eng.config.adductmass) / charges boo1 = np.all([peakpos < self.eng.config.maxmz, peakpos > self.eng.config.minmz], axis=0) peakpos = peakpos[boo1] @@ -1654,12 +1657,29 @@ def on_plot_isotope_distribution(self, e=0): def on_score(self, e=0): self.eng.dscore() self.view.peakpanel.add_data(self.eng.pks, show="dscore") - self.view.SetStatusText("Average Peaks Score: " + str(round(self.eng.pks.aps * 100, 2)), number=3) + self.view.SetStatusText("UniScore: " + str(round(self.eng.pks.uniscore * 100, 2)), number=3) def on_score2(self, e=0): - self.eng.filter_peaks() + defaultvalue = "40" + try: + defaultvalue = str(self.eng.fdrs[0, 1] * 100) + except: + pass + dialog = miscwindows.SingleInputDialog(self.view) + dialog.initialize_interface(title="Minimum DScore", message="Set Minimum DScore Value (%): ", + defaultvalue=defaultvalue) + dialog.ShowModal() + + try: + minval = float(dialog.value)/100. + except: + print("Error with Score Input:", dialog.value) + minval = 0.4 + + print("Using DScore Cutoff (%): ", minval*100) + self.eng.filter_peaks(minscore=minval) self.view.peakpanel.add_data(self.eng.pks, show="dscore") - self.view.SetStatusText("Average Peaks Score: " + str(round(self.eng.pks.aps * 100, 2)), number=3) + self.view.SetStatusText("UniScore: " + str(round(self.eng.pks.uniscore * 100, 2)), number=3) self.makeplot2() self.makeplot4() self.makeplot6() @@ -1671,9 +1691,20 @@ def on_score_window(self, e=0): sw.populate(self.eng.pks) pass + def on_score_label(self, e=0): + self.on_score() + offset = 0.08 * np.amax(self.eng.data.massdat[:, 1]) + for p in self.eng.pks.peaks: + text = str(int(round(p.dscore * 100))) + self.view.plot2.addtext(text, p.mass, p.height + offset, vlines=False) + # def on_remove_noise_points(self, e=0): # self.on_dataprep_button(removenoise=True) + def on_score_FDR(self, e=0): + self.eng.estimate_FDR() + + def on_flip_mode(self, e=None): """ Flips between MS and IM-MS mode diff --git a/GUniDec.spec b/GUniDec.spec index b5dfad96..a5993583 100644 --- a/GUniDec.spec +++ b/GUniDec.spec @@ -12,9 +12,6 @@ import comtypes import encodings import zipimport from PyInstaller.utils.hooks import collect_data_files -from sklearn.decomposition import PCA -import sklearn.decomposition -import sklearn from multiprocessing import freeze_support freeze_support() @@ -55,11 +52,11 @@ zipdirectory = outputdir + "_" + date + ".zip" # Analysis of packages a = Analysis(['Launcher.py'], pathex=[os.getcwd()], - excludes=['pandas', 'IPython', 'Cython', 'statsmodels', 'pyopenms', + excludes=['pandas', 'IPython', 'Cython', 'statsmodels', 'pyopenms', 'sklearn', 'GdkPixbuf', 'PIL', 'pyQT4', 'pygobject', 'pygtk', 'pyside', 'PySide2', 'shiboken2', 'PyQt5'], hiddenimports=[ # 'plotly',' - 'sklearn', 'sklearn.decomposition', 'sklearn.preprocessing', 'sklearn.utils', 'pytest', 'pluggy', - 'sklearn.utils.testing', 'sklearn.utils._cython_blas', + # 'sklearn', 'sklearn.decomposition', 'sklearn.preprocessing', 'sklearn.utils', 'pytest', 'pluggy', + # 'sklearn.utils.testing', 'sklearn.utils._cython_blas', 'scipy.special._ufuncs_cxx', 'scipy.linalg.cython_blas', 'scipy.linalg.cython_lapack', 'scipy._lib.messagestream', 'FileDialog', 'Dialog', 'encodings', 'encodings.__init__', @@ -85,6 +82,7 @@ if system == "Windows": a.datas += [('readme.md', 'readme.md', 'DATA')] a.datas += [('LICENSE', 'LICENSE', 'DATA')] a.datas += [('libiomp5md.dll', 'unidec_bin\\libiomp5md.dll', 'DATA')] + a.datas += [('mkl_intel_thread.dll', 'unidec_bin\\mkl_intel_thread.dll', 'DATA')] a.datas += [('ucrtbase.dll', 'unidec_bin\\ucrtbase.dll', 'DATA')] a.datas += [('vcruntime140.dll', 'unidec_bin\\vcruntime140.dll', 'DATA')] a.datas += [('libmypfunc.dll', 'unidec_bin\\libmypfunc.dll', 'DATA')] @@ -132,6 +130,15 @@ a.datas.extend(dir_files("unidec_bin\\Example Data", 'Example Data')) # grammar=os.path.join(os.path.dirname(lib2to3.__file__),'PatternGrammar.txt') # a.datas.extend([(os.path.join('lib2to3','PatternGrammar.txt'),grammar,'DATA')]) +from os import listdir +from PyInstaller import compat + +mkldir = compat.base_prefix + "/Lib/site-packages/numpy/DLLs" +# a.datas.extend(dir_files(mkldir, '')) +#a.datas.extend( +# [(mkldir + "/" + mkl, '', 'DATA') for mkl in listdir(mkldir) if mkl.startswith('mkl_') or mkl.startswith('libio')]) +# for b in binaries: +# a.datas += b # Assemble and build pyz = PYZ(a.pure) @@ -154,6 +161,8 @@ coll = COLLECT(exe, path = "C:\\Python\\UniDec3\\dist\\UniDec_Windows\\GUI_UniDec.exe" import subprocess +print("Testing Software...", path) + out = subprocess.call(path) if out != 0: diff --git a/readme.md b/readme.md index 9d0bf4f9..0150df0b 100644 --- a/readme.md +++ b/readme.md @@ -112,7 +112,7 @@ we have provided the source code for the GUI and API. ### Python -UniDec is currently compatible only with Python 2.7. There are several Python libraries that UniDec will depend on. +UniDec is currently compatible only with Python 3. There are several Python libraries that UniDec will depend on. matplotlib numpy @@ -124,6 +124,7 @@ networkx h5py multiplierz (Windows only, for Thermo RAW imports) pypubsub +tornado All of these can be installed from the command line with (for example): @@ -193,6 +194,26 @@ The main GUI class is GUniDec.UniDecApp. ## Change Log +v.4.1.0 + +Introduced **UniScore**. This will automatically score the quality of your peaks and the overall deconvolution. Various experimental features were also added with this to visualize and filter the scores. Some updates to the scoring algorithm from Version 4.0.2 to improve it. + +A good heuristic for UniScore and DScore values is: + +80-100: A (Excellent) + +60-80: B (Good) + +40-60: C (Fair) + +20-40: D (Poor) + +0-20: F (Almost certainly noise) + +Added **Calculator for getting masses from Protein/Peptide or RNA sequences**. This can be launched stand alone from the Experimental Menu or by right clicking on the Oligomer and Mass Tools Oligomer Builder box. + +Fixed bug to now allow protein-protein oligomerization KD values to be measured with the Data Collector. Find out more on the [UniDec Wiki Page](https://github.com/michaelmarty/UniDec/wiki/Fitting-KDs-with-Data-Collector). + v.4.0.2 Switched UniDec to load processed data if it already exists. diff --git a/unidec.py b/unidec.py index 97df137b..afb251c7 100644 --- a/unidec.py +++ b/unidec.py @@ -14,7 +14,6 @@ import unidec_modules.IM_functions as IM_func import unidec_modules.MassSpecBuilder as MSBuild from unidec_modules.unidec_enginebase import UniDecEngine -from sklearn.decomposition import PCA import scipy.stats as stats __author__ = 'Michael.Marty' @@ -248,6 +247,13 @@ def process_data(self, **kwargs): if self.config.imflag == 0: self.data.data2 = ud.dataprep(self.data.rawdata, self.config) + if "scramble" in kwargs: + if kwargs["scramble"]: + # np.random.shuffle(self.data.data2[:, 1]) + self.data.data2[:, 1] = np.abs( + np.random.normal(0, 100 * np.amax(self.data.data2[:, 1]), len(self.data.data2))) + self.data.data2[:, 1] /= np.amax(self.data.data2[:, 1]) + print("Added noise to data") ud.dataexport(self.data.data2, self.config.infname) else: tstart2 = time.perf_counter() @@ -416,8 +422,9 @@ def setup_peaks(self, peaks): # Generate Intensities of Each Charge State for Each Peak mztab = ud.make_peaks_mztab(self.data.mzgrid, self.pks, self.config.adductmass) # Calculate errors for peaks with FWHM + ud.peaks_error_FWHM(self.pks, self.data.massdat) try: - ud.peaks_error_FWHM(self.pks, self.data.massdat) + ud.peaks_error_mean(self.pks, self.data.massgrid, self.data.ztab, self.data.massdat, self.config) except Exception as e: print("Error in error calculations:", e) @@ -440,6 +447,39 @@ def autorun(self): self.autointegrate() self.export_params(0) + def estimate_FDR(self): + print("Starting FDR Estimate") + dscores = [] + maxit = 50 + maxdscores = 300 + i = 0 + while i < maxit and len(dscores) < maxdscores: + self.process_data(scramble="True", silent=False) + self.run_unidec(silent=False) + self.pick_peaks() + self.dscore() + for p in self.pks.peaks: + dscores.append(p.dscore) + i += 1 + print(i) + + self.autorun() + + print("Number of False Scores:", len(dscores)) + sd = np.sort(dscores) + l = len(sd) + fdrs = [0, 0.01, 0.05] + cutoffs = [] + + for fdr in fdrs: + f = int(l * (1 - fdr)) + 1 + if f >= l: + f = l - 1 + cutoffs.append(sd[f]) + print("FDR (%):", fdr * 100, "DScore Cut Off (%):", sd[f] * 100) + + self.fdrs = np.transpose([fdrs, cutoffs]) + def autocorrelation(self, massdat=None): """ Performs autocorrelation on mass data. Result is stored as self.data.autocorr. @@ -1054,7 +1094,7 @@ def get_peaks_scores(self, window=None, x_range=None, ci=0.99, **kwargs): def get_zstack(self, xfwhm=1): zarr = np.reshape(self.data.massgrid, (len(self.data.massdat), len(self.data.ztab))) - #zarr = zarr / np.amax(np.sum(zarr, axis=1)) * np.amax(self.data.massdat[:, 1]) + # zarr = zarr / np.amax(np.sum(zarr, axis=1)) * np.amax(self.data.massdat[:, 1]) for i, p in enumerate(self.pks.peaks): # fwhm = p.errorFWHM @@ -1080,6 +1120,8 @@ def get_zstack(self, xfwhm=1): stack.append(zarr[boo3, j]) p.zstack = np.array(stack) + ''' + from sklearn.decomposition import PCA def pks_pca(self, xfwhm=3): self.get_zstack(xfwhm=xfwhm) for p in self.pks.peaks: @@ -1090,7 +1132,7 @@ def pks_pca(self, xfwhm=3): svals = pca.fit_transform(ints)[:, 0] coms = pca.components_ p.pca_score = pca.explained_variance_ratio_[0] - p.pca_mdist = np.transpose([m, coms[0] / np.amax(coms[0])]) + p.mdist = np.transpose([m, coms[0] / np.amax(coms[0])]) p.pca_zdist = np.transpose([self.data.ztab, svals / np.amax(svals)]) except Exception as e: print("Error in PCA:", e) @@ -1101,41 +1143,41 @@ def pks_pca(self, xfwhm=3): p.pca_mdist = np.transpose([m, msum / np.amax(msum)]) p.pca_zdist = np.transpose([self.data.ztab, zsum / np.amax(zsum)]) # print("Peak Mass:", p.mass, "PCA Score", p.pca_score) - self.pks_mscore() + self.pks_mscore()''' - def pks_mscore(self, xfwhm=3, pow=2): + def pks_mscore(self, xfwhm=2, pow=2): self.get_zstack(xfwhm=xfwhm) for p in self.pks.peaks: m = p.zstack[0] - ints = p.zstack[1:] #mzgrid + ints = p.zstack[1:] # mzgrid msum = np.sum(ints, axis=0) zsum = np.sum(ints, axis=1) msum /= np.amax(msum) zsum /= np.amax(zsum) - p.pca_mdist = np.transpose([m, msum]) + p.mdist = np.transpose([m, msum]) # mmax = np.argmax(msum) - # sumz = np.sum(ints, axis=1) # TODO: Switch to peak center only + # sumz = np.sum(ints, axis=1) # zsum = deepcopy(ints[:, mmax]) - p.pca_zdist = np.transpose([self.data.ztab, zsum]) + p.zdist = np.transpose([self.data.ztab, zsum]) rats = [] weights = [] for i, z in enumerate(self.data.ztab): - y = ints[i] #mzgrid - sy = np.sum(y) - mdat = msum * sy / np.sum(msum) #summed decon - sz = np.sum(mdat) - sse = np.sum(np.abs(y - mdat)) - r = 1 - ud.safedivide1(sse, sz) - rats.append(r) - weights.append(sy) + Y = ints[i] # mzgrid + sY = np.sum(Y) + X = msum * sY / np.sum(msum) # summed decon + sX = np.sum(X) + sae = np.sum(np.abs(Y - X)) + M = 1 - ud.safedivide1(sae, sX) + rats.append(M) + weights.append(sY) rats = np.array(rats) weights = np.array(weights) - avg = ud.weighted_avg(rats, weights**pow) - print("Peak Mass:", p.mass, "Peak Shape Score", avg, p.pca_score) - p.pca_score = avg + avg = ud.weighted_avg(rats, weights ** pow) + # print("Peak Mass:", p.mass, "Peak Shape Score", avg, p.mscore) + p.mscore = avg - def pks_zscore(self, xfwhm=3): + def pks_zscore(self, xfwhm=2): try: if len(self.pks.peaks[0].zstack) < 1: self.get_zstack(xfwhm=xfwhm) @@ -1145,7 +1187,7 @@ def pks_zscore(self, xfwhm=3): for p in self.pks.peaks: ints = p.zstack[1:] msum = np.sum(ints, axis=0) - mmax = np.argmax(msum) + # mmax = np.argmax(msum) sumz = np.sum(ints, axis=1) # sumz = deepcopy(ints[:, mmax]) # Switch to peak center only sumz /= np.amax(sumz) @@ -1180,9 +1222,9 @@ def pks_zscore(self, xfwhm=3): p.z_score = 1 - ud.safedivide1(badarea, zs) # print(badarea, zs, p.z_score) - def get_mzstack(self, xfwhm=1): + def get_mzstack(self, xfwhm=2): zarr = np.reshape(self.data.mzgrid[:, 2], (len(self.data.data2), len(self.data.ztab))) - #zarr = zarr / np.amax(np.sum(zarr, axis=1)) * np.amax(self.data.data2[:, 1]) + # zarr = zarr / np.amax(np.sum(zarr, axis=1)) * np.amax(self.data.data2[:, 1]) for i, p in enumerate(self.pks.peaks): # fwhm = p.errorFWHM # if fwhm == 0: @@ -1201,8 +1243,8 @@ def get_mzstack(self, xfwhm=1): bvals = [] for j, z in enumerate(self.data.ztab): interval2 = (interval + z * self.config.adductmass) / z - boo1 = self.data.data2[:, 0] < interval2[1] - boo2 = self.data.data2[:, 0] > interval2[0] + boo1 = self.data.data2[:, 0] <= interval2[1] + boo2 = self.data.data2[:, 0] >= interval2[0] boo3 = np.all([boo1, boo2], axis=0) top = self.data.data2[boo3] stack.append(np.transpose([top[:, 0], top[:, 1], zarr[boo3, j]])) @@ -1215,34 +1257,34 @@ def get_mzstack(self, xfwhm=1): p.rsquared = 1 - ud.safedivide1(sse, denom) p.mzstack = np.array(stack) - def pks_uscore(self, xfwhm=3, pow=1): + def pks_uscore(self, xfwhm=2, pow=1): self.get_mzstack(xfwhm=xfwhm) for p in self.pks.peaks: rats = [] weights = [] for i, zval in enumerate(self.data.ztab): v = p.mzstack[i] - y = v[:, 1] #spectrum - z = v[:, 2] #mzgrid - sy = np.sum(y) - sz = np.sum(z) + X = v[:, 1] # spectrum + Y = v[:, 2] # mzgrid + sx = np.sum(X) + sy = np.sum(Y) # r = 1 - ud.safedivide1(np.abs(sy - sz), sy) - sse = np.sum(np.abs(y - z)) # ** 2) - r = 1 - ud.safedivide1(sse, sy) - rats.append(r) - weights.append(sz) - #try: + sae = np.sum(np.abs(X - Y)) # ** 2) + U = 1 - ud.safedivide1(sae, sx) + rats.append(U) + weights.append(sy) + # try: # print(sse, r, sz, sy, z[0], z[-1]) - #except: - #// print(z) + # except: + # // print(z) rats = np.array(rats) weights = np.array(weights) avg = ud.weighted_avg(rats, weights ** pow) # print("Peak Mass:", p.mass, "Uniqueness Score", avg) - p.uni_score = avg + p.uscore = avg - def fscore(self, height, min): + def score_minimum(self, height, min): x2 = height x1 = height / 2. if min > x1: @@ -1251,79 +1293,104 @@ def fscore(self, height, min): y = 1 return y - def pks_fwhmscore(self, thresh=0.8): + def pks_fscore(self): for p in self.pks.peaks: # Tests if the FWHM interval is highly asymetric - fwhm = p.errorFWHM + index = ud.nearest(self.data.massdat[:, 0], p.mass) + height = self.data.massdat[index, 1] + interval = np.array(p.intervalFWHM) - diff = np.abs(interval - p.mass) - r = ud.safedivide1(diff, fwhm) - p.fwhm_score = 1 - if np.any(r > thresh): - p.fwhm_score = 1 - (np.amax(r) - thresh) / (1 - thresh) + diff = np.abs(np.array(p.intervalFWHM) - p.mass) + + p.fscore = 1 + if p.badFWHM: + if diff[0] > diff[1]: + dcut1 = ud.datachop(self.data.massdat, interval[0] - self.config.massbins, p.mass) + lmin = np.amin(dcut1[:, 1]) + fscore1 = self.score_minimum(height, lmin) + # print("Fscore2", fscore1, lmin, height) + p.fscore *= fscore1 + else: + dcut2 = ud.datachop(self.data.massdat, p.mass, interval[1] + self.config.massbins) + umin = np.amin(dcut2[:, 1]) + fscore2 = self.score_minimum(height, umin) + # print("Fscore3", fscore2, umin, height) + p.fscore *= fscore2 + + # diff = np.abs(interval - p.mass) + # r = ud.safedivide1(diff, fwhm) + # if np.any(r > thresh): + # p.fscore = 1 - (np.amax(r) - thresh) / (1 - thresh) + # Test if the FWHM dip isn't low enough masses = self.pks.masses - lower = np.all([0 < p.mass - masses, p.mass - masses < p.errorFWHM], axis=0) - upper = np.all([0 < masses - p.mass, masses - p.mass < p.errorFWHM], axis=0) + lower = np.all([interval[0] < masses, masses < p.mass], axis=0) + upper = np.all([p.mass < masses, masses < interval[1]], axis=0) if np.any(lower): - dcut1 = ud.datachop(self.data.massdat, np.amin(masses[lower]), p.mass) - lmin = np.amin(dcut1[:, 1]) - fscore = self.fscore(p.height, lmin) - #print("Fscore2", fscore, lmin, p.height) - p.fwhm_score *= fscore + badmasses = masses[lower] + for m in badmasses: + dcut1 = ud.datachop(self.data.massdat, m, p.mass) + lmin = np.amin(dcut1[:, 1]) + fscore1 = self.score_minimum(height, lmin) + # print("Fscore4", fscore1, lmin, height) + p.fscore *= fscore1 if np.any(upper): - dcut2 = ud.datachop(self.data.massdat, p.mass, np.amax(masses[upper])) - umin = np.amin(dcut2[:, 1]) - fscore2 = self.fscore(p.height, umin) - #print("Fscore3", fscore2, umin, p.height) - p.fwhm_score *= fscore2 - - def dscore(self, xfwhm=3): + badmasses = masses[upper] + for m in badmasses: + dcut2 = ud.datachop(self.data.massdat, p.mass, m) + umin = np.amin(dcut2[:, 1]) + fscore2 = self.score_minimum(height, umin) + # print("Fscore5", fscore2, umin, height) + p.fscore *= fscore2 + + def dscore(self, xfwhm=2, pow=2): """ Combined score should include: 1) Fit to data -> Uniqueness Score 2) Background -> Uniqueness Score 3) Unique evidence for each peak -> Uniqueness Score - 4) Noise -> PCA Score - 5) Consistent peak shape -> PCA Score + 4) Noise -> M Score + 5) Consistent peak shape -> M Score 6) Charge state distribution -> Z_Score 7) FWHM -> Penalties for poor FWHM :return: """ - self.pks_pca(xfwhm=xfwhm) - self.pks_uscore(pow=2, xfwhm=xfwhm) + self.pks_mscore(xfwhm=xfwhm, pow=pow) + self.pks_uscore(pow=pow, xfwhm=xfwhm) self.pks_zscore(xfwhm=xfwhm) - self.pks_fwhmscore() + self.pks_fscore() tscores = [] ints = [] for p in self.pks.peaks: - scores = np.array([p.pca_score, p.uni_score, p.z_score, p.fwhm_score]) # p.rsquared, + scores = np.array([p.mscore, p.uscore, p.z_score, p.fscore]) # p.rsquared, p.dscore = np.product(scores) tscores.append(p.dscore) ints.append(p.height) print("Mass:", p.mass, - "PCA:", round(p.pca_score * 100, 2), - "Uniqueness:", round(p.uni_score * 100, 2), + "Peak Shape:", round(p.mscore * 100, 2), + "Uniqueness:", round(p.uscore * 100, 2), # "Fitting R^2", round(p.rsquared * 100, 2), "Charge:", round(p.z_score * 100, 2), - "FWHM:", round(p.fwhm_score * 100, 2), + "FWHM:", round(p.fscore * 100, 2), "Combined:", round(p.dscore * 100, 2)) - self.pks.aps = ud.weighted_avg(tscores, ints) - print("Average Peaks Score:", self.pks.aps) - - def filter_peaks(self, minscore=0.33): - w = deepcopy(self.config.peakwindow) - t = deepcopy(self.config.peakthresh) - self.config.peakwindow = 3 * self.config.massbins - self.config.peakthresh = 0.01 + # print(p.intervalFWHM) + ints = np.array(ints) + self.pks.uniscore = ud.weighted_avg(tscores, ints ** pow) + print("Average Peaks Score (UniScore):", self.pks.uniscore) + + def filter_peaks(self, minscore=0.4): + # w = deepcopy(self.config.peakwindow) + # t = deepcopy(self.config.peakthresh) + # self.config.peakwindow = 3 * self.config.massbins + # self.config.peakthresh = 0.01 self.pick_peaks() - self.config.peakwindow = w - self.config.peakthresh = t + # self.config.peakwindow = w + # self.config.peakthresh = t self.dscore() newpeaks = [] diff --git a/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat b/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat index 16fb5824..d2fec115 100644 --- a/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat +++ b/unidec_bin/Example Data/POPC_Nanodiscs_unidecfiles/POPC_Nanodiscs_conf.dat @@ -41,6 +41,7 @@ noiseflag 0.0 linflag 2 cmap nipy_spectral peakcmap rainbow +spectracmap rainbow publicationmode 1 isotopemode 0 peaknorm 1 diff --git a/unidec_bin/UniDec.exe b/unidec_bin/UniDec.exe index 841209d9..7808ed90 100644 Binary files a/unidec_bin/UniDec.exe and b/unidec_bin/UniDec.exe differ diff --git a/unidec_bin/libiomp5md.dll b/unidec_bin/libiomp5md.dll index 84e12042..a4e603e5 100644 Binary files a/unidec_bin/libiomp5md.dll and b/unidec_bin/libiomp5md.dll differ diff --git a/unidec_modules/MassSpecBuilder.py b/unidec_modules/MassSpecBuilder.py index 91be43ed..6a0e9af7 100644 --- a/unidec_modules/MassSpecBuilder.py +++ b/unidec_modules/MassSpecBuilder.py @@ -1,7 +1,7 @@ import numpy as np - +from scipy.stats import halfnorm from unidec_modules import unidectools as ud __author__ = 'Michael.Marty' @@ -57,12 +57,18 @@ def make_mass_spectrum(array, zrange=(10, 50), mzrange=(2000, 10000), mz_bin_siz output = output + background if baseline < 0: output += np.abs(baseline) + output /= np.amax(output) if noise > 0: noisedat = np.random.normal(0, noise, size=len(output)) output = np.abs(output + noisedat) output /= np.amax(output) + + if "scramble" in kwargs: + if kwargs['scramble']: + np.random.shuffle(output) + return np.transpose([mzaxis, output]), ztab diff --git a/unidec_modules/UniFit.py b/unidec_modules/UniFit.py index c0fe4ac0..a937a2cb 100644 --- a/unidec_modules/UniFit.py +++ b/unidec_modules/UniFit.py @@ -173,7 +173,11 @@ def GetFree(pO, ptot, ltot, ureact, prottab, ligtab, paths, kds, nfactors): def MakeGrid(pfree, lfree, ureact, prottab, ligtab, paths, kds, nfactors): intgrid = np.zeros_like(prottab) intgrid[1, 0] = pfree - intgrid[0, 1] = lfree + try: + intgrid[0, 1] = lfree + except: + pass + h = 1 for i in range(0, len(ureact)): if len(ureact[i, 2]) > 2: diff --git a/unidec_modules/gui_elements/peaklistsort.py b/unidec_modules/gui_elements/peaklistsort.py index 7449fd5b..8fefe9f2 100644 --- a/unidec_modules/gui_elements/peaklistsort.py +++ b/unidec_modules/gui_elements/peaklistsort.py @@ -151,8 +151,8 @@ def add_data(self, pks, show="area", collab1="Mass (Da)"): self.list_ctrl.SetItem(i, 3, str(p.diff)) elif show == "avgcharge": self.list_ctrl.SetItem(i, 3, str(p.avgcharge)) - elif show == "pca_score": - self.list_ctrl.SetItem(i, 3, str(np.round(p.pca_score*100, 2))) + elif show == "mscore": + self.list_ctrl.SetItem(i, 3, str(np.round(p.mscore*100, 2))) elif show == "dscore": self.list_ctrl.SetItem(i, 3, str(np.round(p.dscore*100, 2))) else: diff --git a/unidec_modules/gui_elements/ud_menu.py b/unidec_modules/gui_elements/ud_menu.py index 80ec4b01..c9dc3133 100644 --- a/unidec_modules/gui_elements/ud_menu.py +++ b/unidec_modules/gui_elements/ud_menu.py @@ -202,6 +202,11 @@ def __init__(self, parent, config, pres, tabbed): self.advancedmenu.AppendSeparator() + self.advancedmenu.Append(4001, "Autotune", "Let UniDec find the best parameters", wx.ITEM_CHECK) + self.parent.Bind(wx.EVT_MENU, self.menu_4001, id=4001) + + self.advancedmenu.AppendSeparator() + self.scalemenu = wx.Menu() self.scalemenu.Append(501, "Linear", "Normal linear intensity scale", wx.ITEM_RADIO) @@ -247,8 +252,15 @@ def __init__(self, parent, config, pres, tabbed): self.menuscore2 = self.experimentalmenu.Append(wx.ID_ANY, "Peak Scores Window", "Launch Peak Scores Window") self.parent.Bind(wx.EVT_MENU, self.pres.on_score_window, self.menuscore2) + self.menuscore3 = self.experimentalmenu.Append(wx.ID_ANY, "Label Peak Scores", "Label Peak Scores on Plot") + self.parent.Bind(wx.EVT_MENU, self.pres.on_score_label, self.menuscore3) + + self.menuscoreFDR = self.experimentalmenu.Append(wx.ID_ANY, "Estimate FDR", "Estimate DScore Cutoff for a Fixed False Discovery Rate") + self.parent.Bind(wx.EVT_MENU, self.pres.on_score_FDR, self.menuscoreFDR) + self.menuscore = self.experimentalmenu.Append(wx.ID_ANY, "Filter Peak Scores", "Filter Peak Scores") self.parent.Bind(wx.EVT_MENU, self.pres.on_score2, self.menuscore) + self.experimentalmenu.AppendSeparator() # self.menuMinimize = self.experimentalmenu.Append(wx.ID_ANY, "Minimize", "Minimize Peak List") # self.experimentalmenu.AppendSeparator() @@ -299,9 +311,18 @@ def __init__(self, parent, config, pres, tabbed): self.menuifams = self.experimentalmenu.Append(wx.ID_ANY, "iFAMS") self.parent.Bind(wx.EVT_MENU, self.pres.on_iFAMS, self.menuifams) + self.experimentalmenu.AppendSeparator() self.menuisotopes = self.experimentalmenu.Append(wx.ID_ANY, "Plot Averagine Isotope Distributions") self.parent.Bind(wx.EVT_MENU, self.pres.on_plot_isotope_distribution, self.menuisotopes) + self.experimentalmenu.AppendSeparator() + self.menubiocalc = self.experimentalmenu.Append(wx.ID_ANY, "Protein/RNA Mass Calculator") + self.parent.Bind(wx.EVT_MENU, self.pres.on_biopolymer, self.menubiocalc) + + self.menutheomass = self.experimentalmenu.Append(wx.ID_ANY, "Plot Theoretical Mass") + self.parent.Bind(wx.EVT_MENU, self.pres.plot_theo_mass, self.menutheomass) + + # self.menucentroid = self.experimentalmenu.Append(wx.ID_ANY, "Get Centroid at FWHM") # self.parent.Bind(wx.EVT_MENU, self.pres.on_centroid, self.menucentroid) @@ -444,6 +465,9 @@ def menu_501_503(self, event): self.config.intscale = "Square Root" print(self.config.intscale) + def menu_4001(self, event): + self.config.autotune=self.advancedmenu.IsChecked(4001) + def on_custom_defaults(self, e): # print("Clicked", e) try: diff --git a/unidec_modules/isolated_packages/biopolymer_calculator.py b/unidec_modules/isolated_packages/biopolymer_calculator.py new file mode 100644 index 00000000..ec9d117c --- /dev/null +++ b/unidec_modules/isolated_packages/biopolymer_calculator.py @@ -0,0 +1,163 @@ +import numpy as np +import wx + +aa_masses = {'A': 71.0788, 'C': 103.1388, 'D': 115.0886, 'E': 129.1155, 'F': 147.1766, + 'G': 57.0519, 'H': 137.1411, 'I': 113.1594, 'K': 128.1741, 'L': 113.1594, + 'M': 131.1926, 'N': 114.1038, 'P': 97.1167, 'Q': 128.1307, 'R': 156.1875, + 'S': 87.0782, 'T': 101.1051, 'V': 99.1326, 'W': 186.2132, 'Y': 163.1760} + +rna_masses = {'A': 329.2, 'U': 306.2, 'C': 305.2, 'G': 345.2, 'T': 306.2} + +dna_masses = {'A': 313.2, 'T': 304.2, 'C': 289.2, 'G': 329.2} + +sequence = "testtest" +s2 = "MKTVVLAVAVLFLTGSQARHFWQRDDPQTPWDRVKDFATVYVDAVKDSGREYVSQFETSALGKQLNLNLLENWDTLGSTVGRLQEQLGPVTQEFWDNLEKETEWLRREMNKDLEEVKAKVQPYLDQFQTKWQEEVALYRQKMEPLGAELRDGARQKLQELQEKLTPLGEDLRDRMRHHVDALRTKMTPYSDQMRDRLAERLAQLKDSPTLAEYHTKAADHLKAFGEKAKPALEDLRQGLMPVFESFKTRIMSMVEEASKKLNAQ" + +mass_water = 18.0153 +mass_OH = 17.008 +mass_O = 15.9994 +mass_HPO4 = 95.9793 +mass_H = 1.00794 + + +def get_aa_mass(letter): + try: + return aa_masses[letter] + except: + print("Bad Amino Acid Code:", letter) + return 0 + + +def get_rna_mass(letter): + if letter == "T": + print("Assuming T means U") + + try: + return rna_masses[letter] + except: + print("Bad RNA Code:", letter) + return 0 + + +def calc_pep_mass(sequence): + seq = sequence.upper() + mass = np.sum([get_aa_mass(s) for s in seq]) + mass_water + return np.round(mass, 2) + + +def calc_rna_mass(sequence, threeend="OH", fiveend="MP"): + seq = sequence.upper() + mass = np.sum([get_rna_mass(s) for s in seq]) + if threeend == "OH": + mass += mass_OH + + if fiveend == "OH": + mass -= mass_HPO4 + mass += mass_OH + elif fiveend == "MP": + mass += mass_H + elif fiveend == "TP": + mass += mass_HPO4 + mass_HPO4 - mass_O - mass_O + mass_H + + return round(mass, 2) + + +class BiopolymerFrame(wx.Dialog): + def __init__(self, parent): + wx.Dialog.__init__(self, parent, title="Biopolymer Calculator", size=(500, 500)) # ,size=(-1,-1)) + self.parent = parent + self.mass = "" + self.type = 0 #Protein/Peptide = 0, RNA = 1, DNA = 2 + self.typerna = 1 #fivend parameter + + self.panel = wx.Panel(self) + + self.ctltype = wx.RadioBox(self.panel, label="Type", choices=["Protein/Peptide", "RNA"]) + self.ctltyperna = wx.RadioBox(self.panel, label="RNA 5' End", choices=["-OH", "MonoP", "TriP"]) + self.ctltype.SetSelection(self.type) + self.ctltyperna.SetSelection(self.typerna) + + self.ctlseq = wx.TextCtrl(self.panel, value="", size=(450, 200), style=wx.TE_MULTILINE | wx.TE_CHARWRAP) + self.ctlmass = wx.TextCtrl(self.panel, value="", ) + + self.calcbutton = wx.Button(self.panel, -1, "Calculate", size=(450, 25)) + self.Bind(wx.EVT_BUTTON, self.calculate, self.calcbutton) + + self.sizer = wx.BoxSizer(wx.VERTICAL) + + s2 = wx.BoxSizer(wx.HORIZONTAL) + s2.Add(self.ctltype, 0, flag=wx.ALIGN_CENTER_VERTICAL) + s2.Add(self.ctltyperna, 0, flag=wx.ALIGN_CENTER_VERTICAL) + self.sizer.Add(s2, 0) + + self.sizer.Add(wx.StaticText(self.panel, label=" Sequence: "), flag=wx.ALIGN_CENTER_VERTICAL) + self.sizer.Add(self.ctlseq, 0, flag=wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_CENTER_HORIZONTAL) + + self.sizer.Add(self.calcbutton, 0, flag=wx.ALIGN_CENTER_VERTICAL| wx.ALIGN_CENTER_HORIZONTAL) + + self.sizer.Add(wx.StaticText(self.panel, label="\nCalculated Mass (Da): "), flag=wx.ALIGN_CENTER_VERTICAL) + self.sizer.Add(self.ctlmass, 0, flag=wx.ALIGN_CENTER_VERTICAL) + + hboxend = wx.BoxSizer(wx.HORIZONTAL) + okbutton = wx.Button(self.panel, label='Ok') + closebutton = wx.Button(self.panel, label='Cancel') + okbutton.Bind(wx.EVT_BUTTON, self.close_ok) + closebutton.Bind(wx.EVT_BUTTON, self.on_close_cancel) + + hboxend.Add(okbutton) + hboxend.Add(closebutton, flag=wx.LEFT, border=5) + + self.sizer.Add(hboxend, flag=wx.ALIGN_CENTER | wx.TOP | wx.BOTTOM, border=10) + + self.panel.SetSizer(self.sizer) + self.panel.Fit() + + + def calculate(self, e=0): + self.seq = self.ctlseq.GetValue() + self.type = self.ctltype.GetSelection() + self.typerna = self.ctltyperna.GetSelection() + + if self.typerna == 0: + fiveend = "OH" + elif self.typerna == 1: + fiveend = "MP" + elif self.typerna == 2: + fiveend = "TP" + if self.type == 0: + self.mass = calc_pep_mass(self.seq) + elif self.type == 1: + self.mass = calc_rna_mass(self.seq, fiveend=fiveend) + print("Mass:", self.mass) + self.ctlmass.SetValue(str(self.mass)) + + def close_ok(self, e=None): + self.Destroy() + try: + self.EndModal(0) + except Exception as e: + pass + + def on_close_cancel(self, e): + """ + Close the dialog but do not modify any of the values. + :param e: Unused event + :return: None + """ + self.Destroy() + try: + self.EndModal(1) + except Exception as e: + pass + + # TODO: Add DNA feature + # TODO: Cleanup buttons and positions + # TODO: Add some color + +if __name__ == "__main__": + print(mass_HPO4 + mass_HPO4 - mass_O - mass_O + mass_H + mass_H) + app = wx.App() + panel = BiopolymerFrame(None, modal=True) + panel.Show() + # panel.draw() + app.MainLoop() diff --git a/unidec_modules/isolated_packages/score_window.py b/unidec_modules/isolated_packages/score_window.py index 7714f83b..1736512d 100644 --- a/unidec_modules/isolated_packages/score_window.py +++ b/unidec_modules/isolated_packages/score_window.py @@ -17,14 +17,16 @@ def scr(score): s = int(np.round(score * 100)) return str(s) + def scr2(p): - scores = [p.dscore, p.uni_score, p.pca_score, p.z_score, p.fwhm_score] + scores = [p.dscore, p.uscore, p.mscore, p.z_score, p.fscore] strings = [scr(s) for s in scores] out = "" for s in strings: - out+=s+"," + out += s + "," return out + def score_plots(eng): import matplotlib.pyplot as plt ztab = eng.data.ztab @@ -39,15 +41,15 @@ def score_plots(eng): plt.subplot(131) plt.title( "Combined Score: " + str(round(p.dscore * 100, 2)) + "\nCharge Score: " + str(round(p.z_score * 100, 2)) - ) + ) plt.plot(ztab, sumz) - plt.plot(p.pca_zdist[:, 0], p.pca_zdist[:, 1], color="k") + plt.plot(p.zdist[:, 0], p.zdist[:, 1], color="k") plt.subplot(132) for ival in ints: mdat = ival / np.amax(ints) plt.plot(m, mdat) - plt.plot(p.pca_mdist[:, 0], p.pca_mdist[:, 1]/np.sum(p.pca_mdist[:,1])*np.sum(mdat), color="k") - plt.title("Mass:" + str(p.mass) + "\nPCA Score: " + str(round(p.pca_score * 100, 2))) + plt.plot(p.mdist[:, 0], p.mdist[:, 1] / np.sum(p.mdist[:, 1]) * np.sum(mdat), color="k") + plt.title("Mass:" + str(p.mass) + "\nPeak Shape Score: " + str(round(p.mscore * 100, 2))) plt.subplot(133) for i, z in enumerate(ztab): @@ -56,8 +58,8 @@ def score_plots(eng): y = v[:, 1] zvals = v[:, 2] plt.title( - "Uniqueness : " + str(round(p.uni_score * 100, 2)) + "\nR Squared: " + str(round(p.rsquared * 100, 2)) - ) + "Uniqueness : " + str(round(p.uscore * 100, 2)) + "\nFWHM: " + str(round(p.fscore * 100, 2)) + ) if True: try: x = x - np.amin(x) @@ -69,10 +71,75 @@ def score_plots(eng): plt.show() + +def score_plots2(eng, save=False): + import matplotlib.pyplot as plt + from matplotlib import rcParams + rcParams['ps.useafm'] = True + rcParams['ps.fonttype'] = 42 + rcParams['pdf.fonttype'] = 42 + + colormap = cm.get_cmap('RdYlGn') + norm = Normalize(vmin=0, vmax=1) + + plt.rcParams.update({'font.size': 10}) + fig = plt.figure(figsize=[4, 3]) + p1 = plt.subplot(211) + plt.plot(eng.data.data2[:, 0], eng.data.data2[:, 1] * 100, color="k") + plt.xlabel("$\it{m/z}$") + p1.get_yaxis().set_ticks([0, 50, 100]) + p1.get_yaxis().set_ticklabels(["0", '%', "100"]) + p1.spines['top'].set_visible(False) + p1.spines['right'].set_visible(False) + plt.ylim(0, 100) + plt.xlim(np.amin(eng.data.data2[:, 0]), np.amax(eng.data.data2[:, 0])) + p2 = plt.subplot(212) + plt.plot(eng.data.massdat[:, 0] / 1000., eng.data.massdat[:, 1], color="k") + plt.xlabel("Mass (kDa)") + p2.get_yaxis().set_ticks([0, 50, 100]) + p2.get_yaxis().set_ticklabels(["0", '%', "100"]) + p2.spines['top'].set_visible(False) + p2.spines['right'].set_visible(False) + plt.ylim(0, 100) + xmin = np.amin(eng.data.massdat[:, 0]) / 1000 + xmax = np.amax(eng.data.massdat[:, 0]) / 1000 + plt.xlim(xmin, xmax) + offset = 0.2 * np.amax(eng.data.massdat[:, 1]) + for k, p in enumerate(eng.pks.peaks): + text = str(int(round(p.dscore * 100))) + color = colormap(norm(p.dscore)) + lum = ud.get_luminance(color) + if lum > luminance_cutoff / 255.: + #print(p.mass, lum, color) + color = np.array(color) * 0.8 + color[3] = 1 + p2.text(p.mass / 1000., p.height + offset, text, horizontalalignment="center", + verticalalignment="top", color=color) + pass + + text = "UniScore=" + str(int(round(eng.pks.uniscore * 100))) + color = colormap(norm(eng.pks.uniscore)) + p2.text(xmax, 100, text, horizontalalignment="right", + verticalalignment="top", color=color) + plt.tight_layout() + + if save: + fig.savefig("Fig1.pdf") + fig.savefig("Fig1.png") + # plt.show() + return fig + + class ScoreListCtrl(wx.grid.Grid): - def __init__(self, parent, id_value, pos=wx.DefaultPosition, size=wx.DefaultSize, style=wx.LC_REPORT): - wx.grid.Grid.__init__(self, parent, id_value, pos, size, style=style) + def __init__(self, parent, id_value, pos=wx.DefaultPosition, size=wx.DefaultSize): # , style=wx.LC_REPORT): + wx.grid.Grid.__init__(self, parent, id_value, pos, size) # , style=style) self.parent = parent + self.Bind(wx.EVT_KEY_DOWN, self.OnKey) + + def OnKey(self, event): + # If Ctrl+c is pressed... + if event.ControlDown() and event.GetKeyCode() == 67: + self.copy() def populate(self, pks): self.ClearGrid() @@ -101,7 +168,8 @@ def populate(self, pks): self.SetCellFont(i, 0, wx.Font(10, wx.FONTFAMILY_DEFAULT, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_BOLD)) self.SetCellValue(i, 1, str(p.height)) - scores = [p.dscore, p.uni_score, p.pca_score, p.z_score, p.fwhm_score] + scores = [p.dscore, p.uscore, p.mscore, p.z_score, p.fscore] + scores = deepcopy(scores) cindex = 2 for s in scores: self.SetCellValue(i, cindex, scr(s)) @@ -123,10 +191,52 @@ def populate(self, pks): def clear_list(self): self.ClearGrid() + def copy(self): + '''Copies the current range of select cells to clipboard. + ''' + # Get number of copy rows and cols + if self.GetSelectionBlockTopLeft() == []: + rowstart = self.GetGridCursorRow() + colstart = self.GetGridCursorCol() + rowend = rowstart + colend = colstart + else: + rowstart = self.GetSelectionBlockTopLeft()[0][0] + colstart = self.GetSelectionBlockTopLeft()[0][1] + rowend = self.GetSelectionBlockBottomRight()[0][0] + colend = self.GetSelectionBlockBottomRight()[0][1] + + self.crows = rowend - rowstart + 1 + self.ccols = colend - colstart + 1 + + # data variable contains text that must be set in the clipboard + data = '' + # For each cell in selected range append the cell value + # in the data variable Tabs '\t' for cols and '\n' for rows + for r in range(self.crows): + for c in range(self.ccols): + data += str(self.GetCellValue(rowstart + r, colstart + c)) + if c < self.ccols - 1: + data += '\t' + data += '\n' + + # Create text data object + clipboard = wx.TextDataObject() + + # Set data object value + clipboard.SetText(data) + + # Put the data in the clipboard + if wx.TheClipboard.Open(): + wx.TheClipboard.SetData(clipboard) + wx.TheClipboard.Close() + else: + wx.MessageBox("Can't open the clipboard", "Error") + class ScoreFrame(wx.Frame): def __init__(self, parent): - wx.Frame.__init__(self, parent, title="Score Window", size=(1000, 500)) # ,size=(-1,-1)) + wx.Frame.__init__(self, parent, title="Score Window", size=(1017, 500)) # ,size=(-1,-1)) self.parent = parent self.type = 1 self.elevation = None @@ -134,10 +244,10 @@ def __init__(self, parent): self.panel = wx.Panel(self) - self.listctrl = ScoreListCtrl(self, wx.ID_ANY, size=(1000, 500)) + self.listctrl = ScoreListCtrl(self.panel, wx.ID_ANY, size=(1000, 500)) self.sizer = wx.BoxSizer(wx.VERTICAL) - self.sizer.Add(self.listctrl, 1, wx.LEFT | wx.TOP | wx.GROW) + self.sizer.Add(self.listctrl, 0, wx.LEFT | wx.TOP | wx.EXPAND) self.panel.SetSizer(self.sizer) self.panel.Fit() @@ -146,6 +256,14 @@ def __init__(self, parent): def populate(self, pks): self.listctrl.populate(pks) + +def score_window(eng): + app = wx.App() + panel = ScoreFrame(None) + panel.populate(eng.pks) + app.MainLoop() + + if __name__ == "__main__": if True: @@ -154,6 +272,7 @@ def populate(self, pks): os.chdir(directory) file = "ADH.txt" + file = "0.txt" eng = unidec.UniDec() eng.open_file(file, directory) @@ -164,7 +283,7 @@ def populate(self, pks): eng.config.massub = 200000 eng.config.masslb = 50000 eng.run_unidec() - eng.config.peakthresh = 0.025 + # eng.config.peakthresh = 0.025 eng.pick_peaks() # eng.pks_pca() eng.dscore() diff --git a/unidec_modules/masstools.py b/unidec_modules/masstools.py index 613010cd..ce33fb91 100644 --- a/unidec_modules/masstools.py +++ b/unidec_modules/masstools.py @@ -6,7 +6,7 @@ from scipy.interpolate import interp1d from unidec_modules.AutocorrWindow import AutocorrWindow -from unidec_modules.isolated_packages import FileDialogs +from unidec_modules.isolated_packages import FileDialogs, biopolymer_calculator import unidec_modules.unidectools as ud ''' @@ -35,7 +35,9 @@ def __init__(self, parent, id_value, pos=wx.DefaultPosition, size=wx.DefaultSize self.SetColumnWidth(0, width=190) # , wx.LIST_AUTOSIZE) self.popupID1 = wx.NewId() + self.popupID3 = wx.NewId() self.Bind(wx.EVT_MENU, self.on_masslist_delete, id=self.popupID1) + self.Bind(wx.EVT_MENU, self.on_biopolymer, id=self.popupID3) self.Bind(wx.EVT_LIST_ITEM_RIGHT_CLICK, self.on_right_click_masslist, self) def populate(self, listctrldata): @@ -82,6 +84,7 @@ def on_right_click_masslist(self, event): """ if hasattr(self, "popupID1"): menu = wx.Menu() + menu.Append(self.popupID3, "Calculate Mass from Sequence") menu.Append(self.popupID1, "Delete") self.PopupMenu(menu) menu.Destroy() @@ -101,6 +104,21 @@ def on_masslist_delete(self, event): for i in range(0, num): self.DeleteItem(selection[num - i - 1]) + def on_biopolymer(self, e=0): + bp = biopolymer_calculator.BiopolymerFrame(self) + result = bp.ShowModal() + if result == 0: + print("Imported Mass from Sequence:", bp.mass) + calc_mass = 0 + try: + calc_mass = float(bp.mass) + if calc_mass > 0: + item = self.GetFirstSelected() + self.SetItem(item, 0, str(calc_mass)) + except: + pass + bp.Destroy() + class MassListCtrlPanel(wx.Panel): def __init__(self, parent, columntitle="Mass (Da)", size=(210, 380)): @@ -144,7 +162,9 @@ def __init__(self, parent, id_value, pos=wx.DefaultPosition, size=wx.DefaultSize self.index = 0 self.popupID2 = wx.NewId() + self.popupID3 = wx.NewId() self.Bind(wx.EVT_MENU, self.on_oligo_delete, id=self.popupID2) + self.Bind(wx.EVT_MENU, self.on_biopolymer, id=self.popupID3) self.Bind(wx.EVT_LIST_ITEM_RIGHT_CLICK, self.on_right_click_oligolist, self) def clear(self): @@ -214,6 +234,7 @@ def on_right_click_oligolist(self, event): :return: None """ menu = wx.Menu() + menu.Append(self.popupID3, "Calculate Mass from Sequence") menu.Append(self.popupID2, "Delete") self.PopupMenu(menu) menu.Destroy() @@ -233,6 +254,21 @@ def on_oligo_delete(self, event): for i in range(0, num): self.DeleteItem(selection[num - i - 1]) + def on_biopolymer(self, e=0): + bp = biopolymer_calculator.BiopolymerFrame(self) + result = bp.ShowModal() + if result == 0: + print("Imported Mass from Sequence:", bp.mass) + calc_mass = 0 + try: + calc_mass = float(bp.mass) + if calc_mass > 0: + item = self.GetFirstSelected() + self.SetItem(item, 1, str(calc_mass)) + except: + pass + bp.Destroy() + class OligomerListCrtlPanel(wx.Panel): def __init__(self, parent): diff --git a/unidec_modules/peakstructure.py b/unidec_modules/peakstructure.py index b504bcef..10f3abf9 100644 --- a/unidec_modules/peakstructure.py +++ b/unidec_modules/peakstructure.py @@ -53,19 +53,20 @@ def __init__(self): self.extracts = [] self.errorFWHM = 0 self.intervalFWHM = [0, 0] + self.badFWHM=False self.errormean = -1 self.errorreplicate = 0 self.avgcharge = 0 self.zstack = [] self.mzstack = [] - self.pca_score = 0 - self.uni_score = 0 + self.mscore = 0 + self.uscore = 0 self.z_score = 0 self.rsquared = 0 - self.fwhm_score = 0 + self.fscore = 0 self.dscore = 0 - self.pca_mdist = None - self.pca_zdist = None + self.mdist = None + self.zdist = None def line_out(self, type="Full"): if type == "Full": @@ -106,7 +107,7 @@ def __init__(self): self.marklen = 0 self.massbins = 0 self.norm = 1 - self.aps = 0 + self.uniscore = 0 def add_peaks(self, parray, massbins=0): """ diff --git a/unidec_modules/plot1d.py b/unidec_modules/plot1d.py index 846192f9..99d51750 100644 --- a/unidec_modules/plot1d.py +++ b/unidec_modules/plot1d.py @@ -205,7 +205,7 @@ def histogram(self, xarray, labels=None, xlab="", ylab="", title=""): label = "" xvals = xarray[i] - n, bins, patches = self.subplot1.hist(xvals, label=label, histtype="stepfilled", alpha=0.4, normed=1) + n, bins, patches = self.subplot1.hist(xvals, label=label, histtype="stepfilled", alpha=0.4, density=1) ytempmax = np.amax(n) xtempmax = np.amax(bins) if ytempmax > ymax: diff --git a/unidec_modules/unidec_enginebase.py b/unidec_modules/unidec_enginebase.py index fc116aad..ae7001d1 100644 --- a/unidec_modules/unidec_enginebase.py +++ b/unidec_modules/unidec_enginebase.py @@ -14,7 +14,7 @@ def __init__(self): :return: None """ - self.version = "4.0.2" + self.version = "4.1.0" print("\nUniDec Engine v." + self.version) self.config = None self.config_history = [] diff --git a/unidec_modules/unidec_presbase.py b/unidec_modules/unidec_presbase.py index 5e5bce72..7f10c6f5 100644 --- a/unidec_modules/unidec_presbase.py +++ b/unidec_modules/unidec_presbase.py @@ -1,10 +1,11 @@ import wx import unidec_modules.isolated_packages.FileDialogs as FileDialogs +import unidec_modules.isolated_packages.biopolymer_calculator as biopolymer_calculator import os import numpy as np import unidec_modules.unidectools as ud import unidec_modules.peakwidthtools as peakwidthtools -from unidec_modules import ManualSelectionWindow, AutocorrWindow +from unidec_modules import ManualSelectionWindow, AutocorrWindow, miscwindows import time @@ -237,6 +238,45 @@ def on_autocorr_window(self, e=None): dlg.initalize_dialog(self.eng.config, self.eng.data.massdat) dlg.ShowModal() + def on_biopolymer(self, e=0): + bp = biopolymer_calculator.BiopolymerFrame(self.view) + result = bp.Show() + ''' + if result == 0: + print("Calculated Mass from Sequence:", bp.mass) + calc_mass = 0 + try: + calc_mass = float(bp.mass) + if calc_mass > 0: + label = bp.mass + mval = np.amax(self.eng.data.massdat[:, 1]) + self.view.plot2.addtext(label, calc_mass, mval * 0.96, vlines=False) + pass + except: + pass + bp.Destroy()''' + + def plot_theo_mass(self, e=0): + dialog = miscwindows.SingleInputDialog(self.view) + dialog.initialize_interface(title="Theoretical Mass", message="Set Theoretical Mass to Plot (Da): ", + defaultvalue="") + dialog.ShowModal() + + try: + mass = float(dialog.value) + except: + print("Error with theoretical mass input:", dialog.value) + mass = 0 + + if self.eng.config.massbins >= 1: + label = str(int(round(mass))) + else: + label = str(mass) + mval = np.amax(self.eng.data.massdat[:, 1]) + self.view.plot2.addtext(label, mass, mval * 0.96, vlines=True) + self.on_charge_states(mass=mass) + + def register(self, e=None): from unidec_modules.data_reader import register register() diff --git a/unidec_modules/unidecstructure.py b/unidec_modules/unidecstructure.py index 3e484083..162fa4e1 100644 --- a/unidec_modules/unidecstructure.py +++ b/unidec_modules/unidecstructure.py @@ -38,6 +38,7 @@ def __init__(self): self.imflag = 0 self.metamode = -2 self.filetype = 0 + self.autotune = False self.twavedict = {1: "Logarithmic", 2: "Linear", 3: "Power Law"} self.backgroundchoices = ["Subtract Minimum", "Subtract Line", diff --git a/unidec_modules/unidectools.py b/unidec_modules/unidectools.py index 89ab3f30..80e97a54 100644 --- a/unidec_modules/unidectools.py +++ b/unidec_modules/unidectools.py @@ -96,9 +96,14 @@ def commonprefix(args): def get_luminance(color, type=2): - r = color.Red() - g = color.Green() - b = color.Blue() + try: + r = color.Red() + g = color.Green() + b = color.Blue() + except: + r = color[0] + g = color[1] + b = color[2] l1 = 0.2126 * r + 0.7152 * g + 0.0722 * b l2 = 0.299 * r + 0.587 * g + 0.114 * b l3 = np.sqrt(0.299 * r * r + 0.587 * g * g + 0.114 * b * b) @@ -664,7 +669,7 @@ def header_test(path): try: float(sin) except ValueError: - #print(sin, line) + # print(sin, line) header += 1 break if header > 0: @@ -740,9 +745,9 @@ def load_mz_file(path, config=None): else: if extension == ".txt": data = np.loadtxt(path, skiprows=header_test(path)) - #data = np.loadtxt(path, skiprows=header_test(path, delimiter=","), delimiter=",") + # data = np.loadtxt(path, skiprows=header_test(path, delimiter=","), delimiter=",") elif extension == ".csv": - data = np.loadtxt(path, delimiter=",", skiprows=1, usecols=(0,1)) + data = np.loadtxt(path, delimiter=",", skiprows=1, usecols=(0, 1)) elif extension == ".mzml": data = mzMLimporter.mzMLimporter(path).get_data() txtname = path[:-5] + ".txt" @@ -900,7 +905,7 @@ def datachop(datatop, newmin, newmax): :param newmax: Maximum value of chopped data :return: New data limited between the two bounds """ - boo1 = np.logical_and(datatop[:, 0] < newmax, datatop[:, 0] > newmin) + boo1 = np.logical_and(datatop[:, 0] <= newmax, datatop[:, 0] >= newmin) return datatop[boo1] @@ -1432,11 +1437,8 @@ def unidec_call(config, silent=False, **kwargs): call = [config.UniDecPath, str(config.confname)] - if "kill" in kwargs: - killnum = kwargs["kill"] - # print "Killing Peak:",killnum - call.append("-kill") - call.append(str(killnum)) + if config.autotune: + call.append("-autotune") if silent: out = subprocess.call(call, stdout=subprocess.PIPE) @@ -1472,9 +1474,12 @@ def peakdetect(data, config=None, window=10, threshold=0): start = 0 if end > length: end = length - testmax = np.amax(data[int(start):int(end) + 1, 1]) - if data[i, 1] == testmax and data[i, 1] != data[i - 1, 1]: + start = int(start) + end = int(end)+1 + testmax = np.amax(data[start:end, 1]) + if data[i, 1] == testmax and np.all(data[i, 1] != data[start:i, 1]): peaks.append([data[i, 0], data[i, 1]]) + return np.array(peaks) @@ -2340,27 +2345,32 @@ def peaks_error_FWHM(pks, data): except: datamax = 0 div = datamax / pmax - for pk in pks.peaks: - int = pk.height - index = nearest(data[:, 0], pk.mass) + for p in pks.peaks: + int = p.height + index = nearest(data[:, 0], p.mass) leftwidth = 0 rightwidth = 0 counter = 1 leftfound = False rightfound = False + val = (int * div) / 2. while rightfound is False or leftfound is False: - if leftfound is False: - if data[index - counter, 1] <= (int * div) / 2.: + if leftfound is False and index - counter >= 0: + if data[index - counter, 1] <= val: leftfound = True leftwidth += 1 else: leftwidth += 1 - if rightfound is False: - if data[index + counter, 1] <= (int * div) / 2.: + else: + leftfound = True + if rightfound is False and index + counter < len(data): + if data[index + counter, 1] <= val: rightfound = True rightwidth += 1 else: rightwidth += 1 + else: + rightfound = True counter += 1 indexstart = index - leftwidth @@ -2370,12 +2380,35 @@ def peaks_error_FWHM(pks, data): if indexend >= len(data): indexend = len(data) - 1 - pk.errorFWHM = data[indexend, 0] - data[indexstart, 0] - pk.intervalFWHM = [data[indexstart, 0], data[indexend, 0]] - start = pk.intervalFWHM[0] - end = pk.intervalFWHM[1] - pk.centroid = center_of_mass(data, start, end)[0] - print("Apex:", pk.mass, "Centroid:", pk.centroid, "FWHM Range:", pk.intervalFWHM) + mlow = data[indexstart, 0] + mhigh = data[indexend, 0] + + p.errorFWHM = mhigh - mlow + p.intervalFWHM = [mlow, mhigh] + + diff = np.abs(np.array(p.intervalFWHM) - p.mass) + r = safedivide1(diff, p.errorFWHM) + #Check to make sure that the FWHM is symmetric. If not, flag it and bring things back. + cutoff = 0.75 + if r[0] > cutoff: + mlow = p.mass - diff[1] * cutoff/(1-cutoff) + p.errorFWHM = mhigh - mlow + p.intervalFWHM = [mlow, mhigh] + p.badFWHM = True + pass + elif r[1] > cutoff: + mhigh = p.mass + diff[0] * cutoff/(1-cutoff) + p.errorFWHM = mhigh - mlow + p.intervalFWHM = [mlow, mhigh] + p.badFWHM = True + pass + else: + p.badFWHM = False + + start = p.intervalFWHM[0] + end = p.intervalFWHM[1] + p.centroid = center_of_mass(data, start, end)[0] + print("Apex:", p.mass, "Centroid:", p.centroid, "FWHM Range:", p.intervalFWHM) def peaks_error_mean(pks, data, ztab, massdat, config): diff --git a/unidec_src/UniDec/UD_H5_IO.h b/unidec_src/UniDec/UD_H5_IO.h index 215c91b1..76099372 100644 --- a/unidec_src/UniDec/UD_H5_IO.h +++ b/unidec_src/UniDec/UD_H5_IO.h @@ -11,6 +11,9 @@ // // +#ifndef UD_H5_IO +#define UD_H5_IO + #include #include #include @@ -265,3 +268,6 @@ int question_grids(hid_t file_id) if (status == 0) { write_attr_int(file_id, "/config", "gridsflag", 0); }//printf("Grids not found %d\n", status);} return int_attr(file_id, "/config", "gridsflag", 0); } + + +#endif \ No newline at end of file diff --git a/unidec_src/UniDec/UD_score.h b/unidec_src/UniDec/UD_score.h index 621e5374..793f2ef8 100644 --- a/unidec_src/UniDec/UD_score.h +++ b/unidec_src/UniDec/UD_score.h @@ -17,11 +17,12 @@ -void get_fwhms(Config config, const double* massaxis, const double* masssum, const double* peakx, double* fwhmlow, double* fwhmhigh) +void get_fwhms(Config config, const int mlen, const double* massaxis, const double* masssum, const double* peakx, double* fwhmlow, double* fwhmhigh, double* badfwhm) { for (int i = 0; i < config.plen; i++) { - int index = nearfast(massaxis, peakx[i], config.mlen); + double peak = peakx[i]; + int index = nearfast(massaxis, peak, mlen); double max = masssum[index]; double halfmax = max / 2.0; @@ -49,7 +50,7 @@ void get_fwhms(Config config, const double* massaxis, const double* masssum, con while (hfound == 0) { hindex += 1; - if (hindex >= config.mlen) + if (hindex >= mlen) { hfound = 1; } @@ -63,9 +64,40 @@ void get_fwhms(Config config, const double* massaxis, const double* masssum, con } - fwhmlow[i] = massaxis[lindex]; - fwhmhigh[i] = massaxis[hindex]; - //printf("%f %f %f\n", massaxis[index], massaxis[lindex], massaxis[hindex]); + double mlow = massaxis[lindex]; + double mhigh = massaxis[hindex]; + + //Catch peaks that are too assymetric. Fix and flag them. + double highdiff = mhigh - peak; + double lowdiff = peak - mlow; + double fwhm = mhigh - mlow; + + double threshold = 0.75; + double mult = threshold / (1 - threshold); + if (fwhm != 0) { + double rh = highdiff / fwhm; + double rl = lowdiff / fwhm; + if (rh > threshold) + { + mhigh = peak + lowdiff * mult; + badfwhm[i] = 1; + } + else if (rl > threshold) + { + mlow = peak - highdiff * mult; + badfwhm[i] = 1; + } + } + else + { + printf("Warning: FWHM was 0.\n"); + //mlow = peak - config.massbins * 3; + //mhigh = peak + config.massbins * 3; + } + + fwhmlow[i] = mlow; + fwhmhigh[i] = mhigh; + //printf("%f %f %f\n", massaxis[index], mlow, mhigh); } } @@ -73,7 +105,6 @@ double uscore(Config config, const double* dataMZ, const double* dataInt, const const double mlow, const double mhigh, const double peak) { double power = 2; - double uscore = 0; //double* errors = NULL; //double* weights = NULL; //errors = calloc(config.numz, sizeof(double)); @@ -99,6 +130,8 @@ double uscore(Config config, const double* dataMZ, const double* dataInt, const if (dataMZ[lindex] < lmz) { lindex += 1; } if (dataMZ[hindex] > hmz) { hindex -= 1; } + //printf("%f %f\n", dataMZ[lindex], dataMZ[hindex]); + double sumdata = 0; double sumerrors = 0; double sumdecon = 0; @@ -134,7 +167,7 @@ double uscore(Config config, const double* dataMZ, const double* dataInt, const } -double mscore(Config config, const double* massaxis, const double* masssum, const double* massgrid, const double mlow, const double mhigh, const double peak) +double mscore(Config config, const int mlen, const double* massaxis, const double* masssum, const double* massgrid, const double mlow, const double mhigh, const double peak) { double power = 2; double mscore = 0; @@ -143,12 +176,12 @@ double mscore(Config config, const double* massaxis, const double* masssum, cons double denominator = 0; double datamin = massaxis[0]; - double datamax = massaxis[config.mlen - 1]; + double datamax = massaxis[mlen - 1]; if (mhigh > datamin && mlow < datamax) { - int lindex = nearfast(massaxis, mlow, config.mlen); - int hindex = nearfast(massaxis, mhigh, config.mlen); + int lindex = nearfast(massaxis, mlow, mlen); + int hindex = nearfast(massaxis, mhigh, mlen); if (massaxis[lindex] < mlow) { lindex += 1; } if (massaxis[hindex] > mhigh) { hindex -= 1; } @@ -210,7 +243,7 @@ double mscore(Config config, const double* massaxis, const double* masssum, cons } -double zscore(Config config, const double* massaxis, const double* masssum, const double* massgrid, const double mlow, const double mhigh, const double peak) +double zscore(Config config, const int mlen, const double* massaxis, const double* masssum, const double* massgrid, const double mlow, const double mhigh, const double peak) { double zscore = 0; @@ -218,12 +251,12 @@ double zscore(Config config, const double* massaxis, const double* masssum, cons zvals = calloc(config.numz, sizeof(double)); double datamin = massaxis[0]; - double datamax = massaxis[config.mlen - 1]; + double datamax = massaxis[mlen - 1]; if (mhigh > datamin && mlow < datamax) { - int lindex = nearfast(massaxis, mlow, config.mlen); - int hindex = nearfast(massaxis, mhigh, config.mlen); + int lindex = nearfast(massaxis, mlow, mlen); + int hindex = nearfast(massaxis, mhigh, mlen); if (massaxis[lindex] < mlow) { lindex += 1; } if (massaxis[hindex] > mhigh) { hindex -= 1; } @@ -284,10 +317,10 @@ double zscore(Config config, const double* massaxis, const double* masssum, cons } -double find_minimum(Config config, const double* massaxis, const double* masssum, const double lowpt, const double highpt) +double find_minimum(Config config, const int mlen, const double* massaxis, const double* masssum, const double lowpt, const double highpt) { - int lindex = nearfast(massaxis, lowpt, config.mlen); - int hindex = nearfast(massaxis, highpt, config.mlen); + int lindex = nearfast(massaxis, lowpt, mlen); + int hindex = nearfast(massaxis, highpt, mlen); double minval = masssum[hindex]; for (int i = lindex; i < hindex; i++) @@ -307,35 +340,32 @@ double score_minimum(double height, double min) else { return 1; } } -double fscore(Config config, const double* massaxis, const double* masssum, const double *peakx, const double *peaky, const double mlow, const double mhigh, const double peak) +double fscore(Config config, const int mlen, const double* massaxis, const double* masssum, const double *peakx, const double height, + const double mlow, const double mhigh, const double peak, const int badfwhm) { double fscore = 1; - //First, test if the FWHM interval is highly asymetric - double threshold = 0.8; - double highdiff = mhigh - peak; - double lowdiff = peak - mlow; - double fwhm = mhigh - mlow; - - if (fwhm != 0) { - double rh = highdiff / fwhm; - double rl = lowdiff / fwhm; - if (rh > threshold) - { - fscore = 1 - ((rh - threshold) / (1 - threshold)); + //Score down peaks that are highly assymetic. + if (badfwhm == 1) { + //printf("Badfwhm\n"); + double highdiff = mhigh - peak; + double lowdiff = peak - mlow; + double fwhm = mhigh - mlow; + if (lowdiff > highdiff) { + double min = find_minimum(config, mlen, massaxis, masssum, mlow - config.massbins, peak); + double fsc = score_minimum(height, min); + //printf("Fscore2 %f %f %f\n", fsc, min, height); + fscore *= fsc; } - else if (rl > threshold) - { - fscore = 1 - ((rl - threshold) / (1 - threshold)); + else { + double min = find_minimum(config, mlen, massaxis, masssum, peak, mhigh + config.massbins); + double fsc = score_minimum(height, min); + //printf("Fscore3 %f %f %f\n", fsc, min, height); + fscore *= fsc; } } - else - { - printf("Warning: FWHM was 0."); - return 0; - } - - //Now test if the peaks are actually separated with at least a half max height + + //Test if the peaks are actually separated with at least a half max height for (int i = 0; i < config.plen; i++) { double peak2 = peakx[i]; @@ -343,18 +373,16 @@ double fscore(Config config, const double* massaxis, const double* masssum, cons { if (peak2 < peak && peak2 > mlow) { - int index = nearfast(massaxis, peak, config.mlen); - double height = masssum[index]; - double min = find_minimum(config, massaxis, masssum, peak2, peak); + double min = find_minimum(config, mlen, massaxis, masssum, peak2, peak); double fsc = score_minimum(height, min); + //printf("Fscore4 %f %f %f %f\n", fsc, min, height, peak2); fscore *= fsc; } if (peak2 > peak && peak2 < mhigh) { - int index = nearfast(massaxis, peak, config.mlen); - double height = masssum[index]; - double min = find_minimum(config, massaxis, masssum, peak, peak2); + double min = find_minimum(config, mlen, massaxis, masssum, peak, peak2); double fsc = score_minimum(height, min); + //printf("Fscore5 %f %f %f %f\n", fsc, min, height, peak2); fscore *= fsc; } } @@ -364,14 +392,13 @@ double fscore(Config config, const double* massaxis, const double* masssum, cons } -double score(Config config, const double * dataMZ, const double * dataInt, const double *mzgrid, const double *massaxis, const double* masssum, const double *massgrid, const int *nztab, const double threshold) +double score(Config config, const int mlen, const double * dataMZ, const double * dataInt, const double *mzgrid, const double *massaxis, const double* masssum, const double *massgrid, const int *nztab, const double threshold) { - printf("Starting Score %f %f\n", config.peakwin, config.peakthresh); - double xfwhm = 3; + //printf("Starting Score %f %f\n", config.peakwin, config.peakthresh); + double xfwhm = 2; double* peakx = NULL; double* peaky = NULL; - int mlen = config.mlen; peakx = calloc(mlen, sizeof(double)); peaky = calloc(mlen, sizeof(double)); @@ -385,14 +412,16 @@ double score(Config config, const double * dataMZ, const double * dataInt, const double* fwhmlow = NULL; double* fwhmhigh = NULL; + double* badfwhm = NULL; fwhmlow = calloc(plen, sizeof(double)); fwhmhigh = calloc(plen, sizeof(double)); + badfwhm = calloc(plen, sizeof(double)); - get_fwhms(config, massaxis, masssum, peakx, fwhmlow, fwhmhigh); + get_fwhms(config, mlen, massaxis, masssum, peakx, fwhmlow, fwhmhigh, badfwhm); double numerator = 0; double denominator = 0; - double avgscore = 0; + double uniscore = 0; for (int i = 0; i < plen; i++) { @@ -400,23 +429,25 @@ double score(Config config, const double * dataMZ, const double * dataInt, const double ival = peaky[i]; double l = m - (m-fwhmlow[i])*xfwhm; double h = m + (fwhmhigh[i]-m)*xfwhm; + int index = nearfast(massaxis, m, mlen); + double height = masssum[index]; double usc = uscore(config, dataMZ, dataInt, mzgrid, nztab, l, h, m); - double msc = mscore(config, massaxis, masssum, massgrid, l, h, m); - double zsc = zscore(config, massaxis, masssum, massgrid, l, h, m); - double fsc = fscore(config, massaxis, masssum, peakx, peaky, l, h, m); + double msc = mscore(config, mlen, massaxis, masssum, massgrid, l, h, m); + double zsc = zscore(config, mlen, massaxis, masssum, massgrid, l, h, m); + double fsc = fscore(config, mlen, massaxis, masssum, peakx, height, fwhmlow[i], fwhmhigh[i], m, badfwhm[i]); double dsc = usc * msc * zsc * fsc; if (dsc > threshold) { printf("Peak: Mass:%f Int:%f M:%f U:%f Z:%f F:%f D: %f \n", peakx[i], peaky[i], msc, usc, zsc, fsc, dsc); - numerator += ival * dsc; - denominator += ival; + numerator += ival * ival * dsc; + denominator += ival * ival; } } - if (denominator != 0) { avgscore = numerator / denominator; } + if (denominator != 0) { uniscore = numerator / denominator; } - printf("Average Peaks Score: %f\n", avgscore); - return avgscore; + printf("Average Peaks Score (UniScore): %f\n", uniscore); + return uniscore; } \ No newline at end of file diff --git a/unidec_src/UniDec/UniDec.h b/unidec_src/UniDec/UniDec.h index 4c88febb..2930c749 100644 --- a/unidec_src/UniDec/UniDec.h +++ b/unidec_src/UniDec/UniDec.h @@ -10,6 +10,9 @@ #define UNIDEC_H_ #endif*/ /* UNIDEC_H_ */ +#ifndef UNIDEC_HEADER +#define UNIDEC_HEADER + #include #include #include @@ -21,6 +24,98 @@ void mh5readfile3d(hid_t file_id, char *dataname, int lengthmz, double *dataMZ, double *dataInt, double *data3); int mh5getfilelength(hid_t file_id, char *dataname); +void strjoin(const char* s1, const char* s2, char* newstring); +void mh5writefile1d(hid_t file_id, char* dataname, int length, double* data1); +void mh5writefile2d(hid_t file_id, char* dataname, int length, double* data1, double* data2); + +typedef struct Input Input; + +struct Input { + double* dataMZ; + double* dataInt; + double* testmasses; + int* nztab; + double* mtab; + char* barr; + int* isotopepos; + float* isotopeval; +}; + +Input SetupInputs() +{ + Input inp; + inp.dataMZ = NULL; + inp.dataInt = NULL; + inp.testmasses = NULL; + inp.nztab = NULL; + inp.mtab = NULL; + inp.barr = NULL; + inp.isotopepos = NULL; + inp.isotopeval = NULL; + return inp; +} + +void FreeInputs(Input inp) +{ + free(inp.dataMZ); + free(inp.dataInt); + free(inp.nztab); + free(inp.mtab); + free(inp.testmasses); + free(inp.barr); + free(inp.isotopepos); + free(inp.isotopeval); +} + +typedef struct Decon Decon; + +struct Decon { + double* fitdat; + double* baseline; + double* noise; + double* massgrid; + double* massaxis; + double* massaxisval; + double* blur; + double* newblur; + double error; + int iterations; + double uniscore; + double conv; + double threshold; + int mlen; +}; + +Decon SetupDecon() { + Decon decon; + decon.fitdat = NULL; + decon.baseline = NULL; + decon.noise = NULL; + decon.massgrid = NULL; + decon.massaxis = NULL; + decon.massaxisval = NULL; + decon.blur = NULL; + decon.newblur = NULL; + decon.error = 0; + decon.iterations = 0; + decon.uniscore = 0; + decon.conv = 0; + decon.threshold = 0; + decon.mlen = 0; + return decon; +} + +void FreeDecon(Decon decon) +{ + free(decon.fitdat); + free(decon.baseline); + free(decon.noise); + free(decon.massgrid); + free(decon.massaxis); + free(decon.massaxisval); + free(decon.blur); + free(decon.newblur); +} typedef struct Config Config; @@ -47,7 +142,6 @@ struct Config int mflag; double massbins; int limitflag; - double cutoff; double psthresh; int speedyflag; int linflag; @@ -106,8 +200,9 @@ struct Config int filterwidth; double zerolog; int lengthmz; - int mlen; int plen; + int mfilelen; + int isolength; }; Config SetDefaultConfig() @@ -132,7 +227,6 @@ Config SetDefaultConfig() config.mflag = 0; config.massbins = 100; config.limitflag = 0; - config.cutoff = 0; config.psthresh = 6; config.speedyflag = 0; config.aggressiveflag = 0; @@ -191,8 +285,9 @@ Config SetDefaultConfig() config.filterwidth = 20; config.zerolog = -12; config.lengthmz = 0; - config.mlen = 0; config.plen = 0; + config.mfilelen = 0; + config.isolength = 0; return config; } @@ -217,8 +312,8 @@ Config PostImport(Config config) { //Print inputs for check printf("infile = %s\n", config.infile); - printf("outfile = %s\n", config.outfile); - printf("\n"); + //printf("outfile = %s\n", config.outfile); + //printf("\n"); } //Check to see if the mass axis should be fixed @@ -253,74 +348,74 @@ Config LoadConfig(Config config,const char *filename) { char x[500]; char y[500]; - printf("\nRead from file:"); + //printf("\nRead from file:"); while (fscanf(file, "%s %500[^\n]", x, y) != EOF) { //printf( "read in: %s %s \n", x,y ); - if (strstr(x, "input") != NULL){ strcpy(config.infile, y); printf(" input"); } - if (strstr(x, "output") != NULL){ strcpy(config.outfile, y); printf(" output"); } - if (strstr(x, "mfile") != NULL){ strcpy(config.mfile, y); config.mflag = 1; printf(" mfile"); } - if (strstr(x, "numit") != NULL){ config.numit = atoi(y); printf(" numit"); } + if (strstr(x, "input") != NULL) { strcpy(config.infile, y); }// printf(" input"); + if (strstr(x, "output") != NULL){ strcpy(config.outfile, y);}// printf(" output"); } + if (strstr(x, "mfile") != NULL){ strcpy(config.mfile, y); config.mflag = 1; }// printf(" mfile"); } + if (strstr(x, "numit") != NULL){ config.numit = atoi(y); }// printf(" numit"); } //if (strstr(x, "numz") != NULL){ config.numz = atoi(y); printf(" numz"); } - if (strstr(x, "startz") != NULL){ config.startz = atoi(y); printf(" startz"); } - if (strstr(x, "endz") != NULL) { config.endz = atoi(y); printf(" endz"); } - if (strstr(x, "zzsig") != NULL){ config.zsig = atof(y); printf(" zzsig"); } - if (strstr(x, "psig") != NULL) { config.psig = atof(y); printf(" psig"); } - if (strstr(x, "beta") != NULL) { config.beta = atof(y); printf(" beta"); } - if (strstr(x, "mzsig") != NULL){ config.mzsig = atof(y); printf(" mzsig"); } - if (strstr(x, "msig") != NULL){ config.msig = atof(y); printf(" msig"); } - if (strstr(x, "molig") != NULL){ config.molig = atof(y); printf(" molig"); } - if (strstr(x, "massub") != NULL){ config.massub = atof(y); printf(" massub"); } - if (strstr(x, "masslb") != NULL){ config.masslb = atof(y); printf(" masslb"); } - if (strstr(x, "psfun") != NULL){ config.psfun = atoi(y); printf(" psfun"); } - if (strstr(x, "mtabsig") != NULL){ config.mtabsig = atof(y); printf(" mtabsig"); } - if (strstr(x, "massbins") != NULL){ config.massbins = atof(y); printf(" massbins"); } - if (strstr(x, "psthresh") != NULL){ config.psthresh = atof(y); printf(" psthresh"); } - if (strstr(x, "speedy") != NULL){ config.speedyflag = atoi(y); printf(" speedy"); } - if (strstr(x, "aggressive") != NULL){ config.aggressiveflag = atoi(y); printf(" aggressive"); } - if (strstr(x, "adductmass") != NULL){ config.adductmass = atof(y); printf(" adductmass"); } - if (strstr(x, "rawflag") != NULL){ config.rawflag = atoi(y); printf(" rawflag"); } - if (strstr(x, "nativezub") != NULL){ config.nativezub = atof(y); printf(" nativezub"); } - if (strstr(x, "nativezlb") != NULL){ config.nativezlb = atof(y); printf(" nativezlb"); } - if (strstr(x, "poolflag") != NULL){ config.poolflag = atoi(y); printf(" poolflag"); } - if (strstr(x, "manualfile") != NULL){ config.manualflag = 1; strcpy(config.manualfile, y); printf(" manualfile"); } - if (strstr(x, "intthresh") != NULL){ config.intthresh = atof(y); printf(" intthresh"); } - if (strstr(x, "peakshapeinflate") != NULL){ config.peakshapeinflate = atof(y); printf(" peakshapeinflate"); } - if (strstr(x, "killmass") != NULL){ config.killmass = atof(y); printf(" killmass"); } - if (strstr(x, "isotopemode") != NULL){ config.isotopemode = atoi(y); printf(" isotopemode"); } - if (strstr(x, "orbimode") != NULL) { config.orbimode = atoi(y); printf(" orbimode"); } - if (strstr(x, "imflag") != NULL) { config.imflag = atoi(y); printf(" imflag"); } - if (strstr(x, "linflag") != NULL) { config.linflag = atoi(y); printf(" linflag"); } + if (strstr(x, "startz") != NULL){ config.startz = atoi(y); }// printf(" startz"); } + if (strstr(x, "endz") != NULL) { config.endz = atoi(y); }// printf(" endz"); } + if (strstr(x, "zzsig") != NULL){ config.zsig = atof(y); }// printf(" zzsig"); } + if (strstr(x, "psig") != NULL) { config.psig = atof(y); }// printf(" psig"); } + if (strstr(x, "beta") != NULL) { config.beta = atof(y); }// printf(" beta"); } + if (strstr(x, "mzsig") != NULL){ config.mzsig = atof(y); }// printf(" mzsig"); } + if (strstr(x, "msig") != NULL){ config.msig = atof(y); }// printf(" msig"); } + if (strstr(x, "molig") != NULL){ config.molig = atof(y); }// printf(" molig"); } + if (strstr(x, "massub") != NULL){ config.massub = atof(y); }// printf(" massub"); } + if (strstr(x, "masslb") != NULL){ config.masslb = atof(y); }// printf(" masslb"); } + if (strstr(x, "psfun") != NULL){ config.psfun = atoi(y); }// printf(" psfun"); } + if (strstr(x, "mtabsig") != NULL){ config.mtabsig = atof(y); }// printf(" mtabsig"); } + if (strstr(x, "massbins") != NULL){ config.massbins = atof(y); }// printf(" massbins"); } + if (strstr(x, "psthresh") != NULL){ config.psthresh = atof(y); }// printf(" psthresh"); } + if (strstr(x, "speedy") != NULL){ config.speedyflag = atoi(y);}// printf(" speedy"); } + if (strstr(x, "aggressive") != NULL){ config.aggressiveflag = atoi(y);}// printf(" aggressive"); } + if (strstr(x, "adductmass") != NULL){ config.adductmass = atof(y);}// printf(" adductmass"); } + if (strstr(x, "rawflag") != NULL){ config.rawflag = atoi(y); }// printf(" rawflag"); } + if (strstr(x, "nativezub") != NULL){ config.nativezub = atof(y); }// printf(" nativezub"); } + if (strstr(x, "nativezlb") != NULL){ config.nativezlb = atof(y); }// printf(" nativezlb"); } + if (strstr(x, "poolflag") != NULL){ config.poolflag = atoi(y); }// printf(" poolflag"); } + if (strstr(x, "manualfile") != NULL){ config.manualflag = 1; strcpy(config.manualfile, y);}// printf(" manualfile"); } + if (strstr(x, "intthresh") != NULL){ config.intthresh = atof(y); }// printf(" intthresh"); } + if (strstr(x, "peakshapeinflate") != NULL){ config.peakshapeinflate = atof(y); }// printf(" peakshapeinflate"); } + if (strstr(x, "killmass") != NULL){ config.killmass = atof(y); }// printf(" killmass"); } + if (strstr(x, "isotopemode") != NULL){ config.isotopemode = atoi(y); }// printf(" isotopemode"); } + if (strstr(x, "orbimode") != NULL) { config.orbimode = atoi(y); }// printf(" orbimode"); } + if (strstr(x, "imflag") != NULL) { config.imflag = atoi(y); }// printf(" imflag"); } + if (strstr(x, "linflag") != NULL) { config.linflag = atoi(y); }// printf(" linflag"); } //IM Parameters - if (strstr(x, "csig") != NULL) { config.csig = atof(y); printf(" csig"); } - if (strstr(x, "dtsig") != NULL) { config.dtsig = atof(y); printf(" dtsig"); } - if (strstr(x, "ccsub") != NULL) { config.ccsub = atof(y); printf(" ccsub"); } - if (strstr(x, "ccslb") != NULL) { config.ccslb = atof(y); printf(" ccslb"); } - if (strstr(x, "ccsbins") != NULL) { config.ccsbins = atof(y); printf(" ccsbins"); } - if (strstr(x, "temp") != NULL) { config.temp = atof(y); printf(" temp"); } - if (strstr(x, "pressure") != NULL) { config.press = atof(y); printf(" pressure"); } - if (strstr(x, "volt") != NULL) { config.volt = atof(y); printf(" volt"); } - if (strstr(x, "gasmass") != NULL) { config.hmass = atof(y); printf(" gasmass"); } - if (strstr(x, "tnaught") != NULL) { config.to = atof(y); printf(" to"); } - if (strstr(x, "tcal1") != NULL) { config.tcal1 = atof(y); printf(" tcal1"); } - if (strstr(x, "tcal2") != NULL) { config.tcal2 = atof(y); printf(" tcal2"); } - if (strstr(x, "edc") != NULL) { config.edc = atof(y); printf(" edc"); } - if (strstr(x, "zout") != NULL) { config.zout = atoi(y); printf(" zout"); } - if (strstr(x, "twaveflag") != NULL) { config.twaveflag = atoi(y); printf(" twaveflag"); } - if (strstr(x, "ubnativeccs") != NULL) { config.nativeccsub = atof(y); printf(" ubnativeccs"); } - if (strstr(x, "lbnativeccs") != NULL) { config.nativeccslb = atof(y); printf(" lbnativeccs"); } - if (strstr(x, "driftlength") != NULL) { config.len = atof(y); printf(" driftlength"); } - if (strstr(x, "baselineflag") != NULL) { config.baselineflag = atoi(y); printf(" baselineflag"); } - if (strstr(x, "noiseflag") != NULL) { config.noiseflag = atoi(y); printf(" noiseflag"); } + if (strstr(x, "csig") != NULL) { config.csig = atof(y); }// printf(" csig"); } + if (strstr(x, "dtsig") != NULL) { config.dtsig = atof(y); }// printf(" dtsig"); } + if (strstr(x, "ccsub") != NULL) { config.ccsub = atof(y); }// printf(" ccsub"); } + if (strstr(x, "ccslb") != NULL) { config.ccslb = atof(y); }// printf(" ccslb"); } + if (strstr(x, "ccsbins") != NULL) { config.ccsbins = atof(y); }// printf(" ccsbins"); } + if (strstr(x, "temp") != NULL) { config.temp = atof(y); }// printf(" temp"); } + if (strstr(x, "pressure") != NULL) { config.press = atof(y); }// printf(" pressure"); } + if (strstr(x, "volt") != NULL) { config.volt = atof(y); }// printf(" volt"); } + if (strstr(x, "gasmass") != NULL) { config.hmass = atof(y); }// printf(" gasmass"); } + if (strstr(x, "tnaught") != NULL) { config.to = atof(y); }// printf(" to"); } + if (strstr(x, "tcal1") != NULL) { config.tcal1 = atof(y); }// printf(" tcal1"); } + if (strstr(x, "tcal2") != NULL) { config.tcal2 = atof(y); }// printf(" tcal2"); } + if (strstr(x, "edc") != NULL) { config.edc = atof(y); }// printf(" edc"); } + if (strstr(x, "zout") != NULL) { config.zout = atoi(y); }// printf(" zout"); } + if (strstr(x, "twaveflag") != NULL) { config.twaveflag = atoi(y); }// printf(" twaveflag"); } + if (strstr(x, "ubnativeccs") != NULL) { config.nativeccsub = atof(y); }// printf(" ubnativeccs"); } + if (strstr(x, "lbnativeccs") != NULL) { config.nativeccslb = atof(y); }// printf(" lbnativeccs"); } + if (strstr(x, "driftlength") != NULL) { config.len = atof(y); }// printf(" driftlength"); } + if (strstr(x, "baselineflag") != NULL) { config.baselineflag = atoi(y); }// printf(" baselineflag"); } + if (strstr(x, "noiseflag") != NULL) { config.noiseflag = atoi(y); }// printf(" noiseflag"); } //Experimental - if (strstr(x, "filterwidth") != NULL) { config.filterwidth = atoi(y); printf(" filterwidth"); } - if (strstr(x, "zerolog") != NULL) { config.zerolog = atof(y); printf(" zerolog"); } + if (strstr(x, "filterwidth") != NULL) { config.filterwidth = atoi(y); }// printf(" filterwidth"); } + if (strstr(x, "zerolog") != NULL) { config.zerolog = atof(y); }// printf(" zerolog"); } //Peak Parameters - if (strstr(x, "peakwindow") != NULL) { config.peakwin = atof(y); printf(" peakwindow"); } - if (strstr(x, "peakthresh") != NULL) { config.peakthresh = atof(y); printf(" peakthresh"); } - if (strstr(x, "peaknorm") != NULL) { config.peaknorm = atoi(y); printf(" peaknorm"); } + if (strstr(x, "peakwindow") != NULL) { config.peakwin = atof(y); }// printf(" peakwindow"); } + if (strstr(x, "peakthresh") != NULL) { config.peakthresh = atof(y); }// printf(" peakthresh"); } + if (strstr(x, "peaknorm") != NULL) { config.peaknorm = atoi(y); }// printf(" peaknorm"); } } - printf("\n\n"); + //printf("\n\n"); } fclose(file); @@ -330,6 +425,7 @@ Config LoadConfig(Config config,const char *filename) } + //Gives command line help options void PrintHelp() { @@ -506,7 +602,7 @@ int getfilelength(char *infile) if (file_ptr == 0) { - printf("TESTCould not open %s file\n", infile); + printf("Could not open %s file\n", infile); exit(9); } else @@ -1493,13 +1589,20 @@ double getfitdatspeedy(double *fitdat, double *blur, int lengthmz,int numz,int m return fitmax; } -double errfunspeedy(double *dataInt,double *fitdat, double *blur,int lengthmz,int numz,int maxlength,double maxint, +double errfunspeedy(double *dataInt,double *fitdat, double *blur,int lengthmz,int numz,int maxlength, int isolength, int *isotopepos, float *isotopeval, int*starttab, int *endtab, double *mzdist, int speedyflag) { - int i; + //Get max intensity + double maxint = 0; + for (int i = 0; i < lengthmz; i++) + { + if (dataInt[i] > maxint) { maxint = dataInt[i]; } + } + getfitdatspeedy(fitdat, blur,lengthmz, numz,maxlength, maxint,isolength, isotopepos, isotopeval,starttab,endtab,mzdist,speedyflag); - double error=0; - for(i=0;itestmasses[i] + config.adductmass * inp->nztab[j]) / (double)inp->nztab[j]; + testmasspos[index2D(config.numz, i, j)] = nearfast(inp->dataMZ, mztest, config.lengthmz); + } + } + } + + //If there is a mass file read, it will only allow masses close to those masses within some config.mtabsig window. + if (config.mflag == 1 && config.limitflag == 0) + { + TestMassListWindowed(config.lengthmz, config.numz, inp->barr, inp->mtab, config.nativezub, config.nativezlb, config.massub, config.masslb, inp->nztab, inp->testmasses, config.mfilelen, config.mtabsig); + } + //If there is a mass file read and the mass table window (config.mtabsig) is 0, it will only write intensities at the m/z values closest to the m/z values read in from the mfile. + else if (config.mflag == 1 && config.limitflag == 1) + { + TestMassListLimit(config.lengthmz, config.numz, inp->barr, inp->mtab, config.nativezub, config.nativezlb, config.massub, config.masslb, inp->nztab, testmasspos, config.mfilelen); + } + //Normally, write the intensity values if the values fall within the mass upperbound and lower bound + else + { + TestMass(config.lengthmz, config.numz, inp->barr, inp->mtab, config.nativezub, config.nativezlb, config.massub, config.masslb, inp->nztab); + } + free(testmasspos); +} + +int SetStartsEnds(const Config config, const Input * inp, int *starttab, int *endtab, const double threshold) { + int maxlength = 1; + for (int i = 0; i < config.lengthmz; i++) + { + double point = inp->dataMZ[i] - threshold; + int start, end; + if (point < inp->dataMZ[0] && config.speedyflag == 0) { + start = (int)((point - inp->dataMZ[0]) / (inp->dataMZ[1] - inp->dataMZ[0])); + } + else { + start = nearfast(inp->dataMZ, point, config.lengthmz); + } + + starttab[i] = start; + point = inp->dataMZ[i] + threshold; + if (point > inp->dataMZ[config.lengthmz - 1] && config.speedyflag == 0) { + end = config.lengthmz - 1 + (int)((point - inp->dataMZ[config.lengthmz - 1]) / (inp->dataMZ[config.lengthmz - 1] - inp->dataMZ[config.lengthmz - 2])); + } + else { + end = nearfast(inp->dataMZ, point, config.lengthmz); + } + endtab[i] = end; + if (end - start > maxlength) { maxlength = end - start; } + } + //printf("Max Length: %d\t", maxlength); + return maxlength; +} + +void WriteDecon(const Config config, const Decon * decon, const Input * inp, const hid_t file_id, const char * dataset) +{ + char outdat[1024]; + FILE* out_ptr = NULL; + //Write the fit data to a file. + if (config.rawflag >= 0) { + if (config.filetype == 0) { + char outstring2[500]; + sprintf(outstring2, "%s_fitdat.bin", config.outfile); + out_ptr = fopen(outstring2, "wb"); + fwrite(decon->fitdat, sizeof(double), config.lengthmz, out_ptr); + fclose(out_ptr); + printf("Fit: %s\t", outstring2); + } + else { + strjoin(dataset, "/fit_data", outdat); + mh5writefile1d(file_id, outdat, config.lengthmz, decon->fitdat); + } + } + + //Write the baseline to a file. + if (config.rawflag >= 0 && config.baselineflag == 1) { + if (config.filetype == 0) { + char outstring2[500]; + sprintf(outstring2, "%s_baseline.bin", config.outfile); + out_ptr = fopen(outstring2, "wb"); + fwrite(decon->baseline, sizeof(double), config.lengthmz, out_ptr); + fclose(out_ptr); + printf("Background: %s\t", outstring2); + } + else { + strjoin(dataset, "/baseline", outdat); + mh5writefile1d(file_id, outdat, config.lengthmz, decon->baseline); + } + } + + //Writes the convolved m/z grid in binary format + if (config.rawflag == 0 || config.rawflag == 1) + { + if (config.filetype == 0) { + char outstring9[500]; + sprintf(outstring9, "%s_grid.bin", config.outfile); + out_ptr = fopen(outstring9, "wb"); + if (config.rawflag == 0) { fwrite(decon->newblur, sizeof(double), config.lengthmz * config.numz, out_ptr); } + if (config.rawflag == 1) { fwrite(decon->blur, sizeof(double), config.lengthmz * config.numz, out_ptr); } + fclose(out_ptr); + printf("m/z grid: %s\t", outstring9); + } + else { + strjoin(dataset, "/mz_grid", outdat); + if (config.rawflag == 0) { mh5writefile1d(file_id, outdat, config.lengthmz * config.numz, decon->newblur); } + if (config.rawflag == 1) { mh5writefile1d(file_id, outdat, config.lengthmz * config.numz, decon->blur); } + + strjoin(dataset, "/mz_grid", outdat); + double* chargedat = NULL; + chargedat = calloc(config.numz, sizeof(double)); + double* chargeaxis = NULL; + chargeaxis = calloc(config.numz, sizeof(double)); + + for (int j = 0; j < config.numz; j++) { + double val = 0; + chargeaxis[j] = (double)inp->nztab[j]; + for (int i = 0; i < config.lengthmz; i++) { + + val += decon->newblur[index2D(config.numz, i, j)]; + } + chargedat[j] = val; + } + strjoin(dataset, "/charge_data", outdat); + mh5writefile2d(file_id, outdat, config.numz, chargeaxis, chargedat); + free(chargedat); + free(chargeaxis); + } + } + + + //Writes the convolved mass grid in binary format + if (config.rawflag == 0 || config.rawflag == 1) { + if (config.filetype == 0) { + char outstring10[500]; + sprintf(outstring10, "%s_massgrid.bin", config.outfile); + out_ptr = fopen(outstring10, "wb"); + fwrite(decon->massgrid, sizeof(double), decon->mlen * config.numz, out_ptr); + fclose(out_ptr); + printf("Mass Grid: %s\t", outstring10); + } + else { + strjoin(dataset, "/mass_grid", outdat); + mh5writefile1d(file_id, outdat, decon->mlen * config.numz, decon->massgrid); + } + } + + //Writes the mass values convolved with the peak shape + if (config.rawflag == 0 || config.rawflag == 1 || config.rawflag == 2 || config.rawflag == 3) { + if (config.filetype == 0) { + char outstring4[500]; + sprintf(outstring4, "%s_mass.txt", config.outfile); + out_ptr = fopen(outstring4, "w"); + for (int i = 0; i < decon->mlen; i++) + { + fprintf(out_ptr, "%f %f\n", decon->massaxis[i], decon->massaxisval[i]); + } + fclose(out_ptr); + printf("Masses: %s\n", outstring4); + } + else { + strjoin(dataset, "/mass_data", outdat); + mh5writefile2d(file_id, outdat, decon->mlen, decon->massaxis, decon->massaxisval); + } + } + +} + + + + + +#endif \ No newline at end of file diff --git a/unidec_src/UniDec/UniDec.vcxproj b/unidec_src/UniDec/UniDec.vcxproj index 0ebb49be..fe3604fe 100644 --- a/unidec_src/UniDec/UniDec.vcxproj +++ b/unidec_src/UniDec/UniDec.vcxproj @@ -238,7 +238,6 @@ - diff --git a/unidec_src/UniDec/UniDec_Main.h b/unidec_src/UniDec/UniDec_Main.h index a0841fbe..e20f5dea 100644 --- a/unidec_src/UniDec/UniDec_Main.h +++ b/unidec_src/UniDec/UniDec_Main.h @@ -21,854 +21,715 @@ #include "UD_score.h" -int run_unidec(int argc, char *argv[], Config config) { - //Initialize - clock_t starttime; - starttime = clock(); - - FILE *out_ptr = NULL; - hid_t file_id; - - unsigned int i, j, k, iterations; - char *barr = NULL; +Decon MainDeconvolution(const Config config, const Input inp, const int silent) +{ + Decon decon = SetupDecon(); + char* barr = NULL; int mlength, zlength, numclose, - *nztab = NULL, - *mind = NULL, - *zind = NULL, - *closemind = NULL, - *closezind = NULL, - *closeind=NULL, - *starttab = NULL, - *endtab = NULL, - *isotopepos = NULL; + * mind = NULL, + * zind = NULL, + * closemind = NULL, + * closezind = NULL, + * closeind = NULL, + * starttab = NULL, + * endtab = NULL; double - *mdist = NULL, - *zdist = NULL, - *mzdist = NULL, + * mdist = NULL, + * zdist = NULL, + * mzdist = NULL, * rmzdist = NULL, - *mtab = NULL, - *blur = NULL, - *newblur = NULL, - *oldblur = NULL, - *fitdat = NULL, - *massaxis = NULL, - *massaxisval = NULL, - *massgrid = NULL, - *closeval = NULL, - *dataMZ = NULL, - *testmasses = NULL, - *dataInt = NULL, - *baseline=NULL, - *noise=NULL; - - float *isotopeval = NULL; - int lengthmz; - int mfilelen = 0; - char dataset[1024]; - char outdat[1024]; - bool autotune = 0; - + * oldblur = NULL, + * closeval = NULL; - //Set default parameters. These can be overwritten by config file. - double killmass = 0; - double isoparams[10] = { 1.00840852e+00, 1.25318718e-03, 2.37226341e+00, 8.19178000e-04, - -4.37741951e-01, 6.64992972e-04, 9.94230511e-01, 4.64975237e-01, 1.00529041e-02, 5.81240792e-01 }; - - if (argc>2) - { - if (strcmp(argv[2], "-kill") == 0) - { - killmass = atof(argv[3]); - printf("Killing Peak: %f\n", killmass); - } - if (strcmp(argv[2], "-test") == 0) + //................................................................... + // + // Sets the mzdist with the peak shape + // + //.................................................................... + barr = calloc(config.lengthmz * config.numz, sizeof(char)); + memcpy(barr, inp.barr, config.lengthmz * config.numz * sizeof(char)); + + //Sets a threshold for m/z values to check. Things that are far away in m/z space don't need to be considered in the iterations. + double threshold = config.psthresh * fabs(config.mzsig) * config.peakshapeinflate; + if (silent == 0) { printf("Threshold: %f\t", threshold); } + //Create a list of start and end values to box in arrays based on the above threshold + starttab = calloc(config.lengthmz, sizeof(int)); + endtab = calloc(config.lengthmz, sizeof(int)); + int maxlength = 1; + if (config.mzsig != 0) { + //Gets maxlength and sets start and endtab + maxlength = SetStartsEnds(config, &inp, starttab, endtab, threshold); + + //Changes dimensions of the peak shape function. 1D for speedy and 2D otherwise + int pslen = config.lengthmz; + if (config.speedyflag == 0) { pslen = config.lengthmz * maxlength; } + mzdist = calloc(pslen, sizeof(double)); + rmzdist = calloc(pslen, sizeof(double)); + memset(mzdist, 0, pslen * sizeof(double)); + memset(rmzdist, 0, pslen * sizeof(double)); + + int makereverse = 0; + if (config.mzsig < 0 || config.beta < 0) { makereverse = 1; } + + //Calculates the distance between mz values as a 2D or 3D matrix + if (config.speedyflag == 0) { - killmass = atof(argv[3]); - printf("Testing Mass: %f\n", killmass); - test_isotopes(killmass, isoparams); - exit(0); + MakePeakShape2D(config.lengthmz, maxlength, starttab, endtab, inp.dataMZ, fabs(config.mzsig) * config.peakshapeinflate, config.psfun, config.speedyflag, mzdist, rmzdist, makereverse); } - - if (strcmp(argv[2], "-autotune") == 0) + else { - autotune = 1; + //Calculates peak shape as a 1D list centered at the first element for circular convolutions + MakePeakShape1D(inp.dataMZ, threshold, config.lengthmz, config.speedyflag, fabs(config.mzsig) * config.peakshapeinflate, config.psfun, mzdist, rmzdist, makereverse); } - } - - if (config.metamode != -2) - { - strcpy(dataset, "/ms_dataset"); - char strval[1024]; - sprintf(strval, "/%d", config.metamode); - strcat(dataset, strval); - printf("HDF5 Data Set: %s\n", dataset); + if (silent == 0) { printf("mzdist set: %f\t", mzdist[0]); } } else { - strcpy(dataset, "/ms_data"); + mzdist = calloc(0, sizeof(double)); + maxlength = 0; } - //.................................. + //.................................................... // - // File Inputs + // Setting up the neighborhood blur // - //................................. - - if (argc >= 2) - { - if (config.filetype == 1) { - file_id = H5Fopen(argv[1], H5F_ACC_RDWR, H5P_DEFAULT); - strjoin(dataset, "/processed_data", outdat); - lengthmz = mh5getfilelength(file_id, outdat); - dataMZ = calloc(lengthmz, sizeof(double)); - dataInt = calloc(lengthmz, sizeof(double)); - mh5readfile2d(file_id, outdat, lengthmz, dataMZ, dataInt); - printf("Length of Data: %d \n", lengthmz); + //...................................................... - //Check the length of the mfile and then read it in. - if (config.mflag == 1) - { - mfilelen = mh5getfilelength(file_id, "/config/masslist"); - printf("Length of mfile: %d \n", mfilelen); - testmasses = malloc(sizeof(double)*mfilelen); - mh5readfile1d(file_id, "/config/masslist", testmasses); - } - else { - testmasses = malloc(sizeof(double)*mfilelen); - } - } - else { + //sets some parameters regarding the neighborhood blur function + if (config.zsig >= 0 && config.msig >= 0) { + zlength = 1 + 2 * (int)config.zsig; + mlength = 1 + 2 * (int)config.msig; + } + else { + if (config.zsig != 0) { zlength = 1 + 2 * (int)(3 * fabs(config.zsig) + 0.5); } + else { zlength = 1; } + if (config.msig != 0) { mlength = 1 + 2 * (int)(3 * fabs(config.msig) + 0.5); } + else { mlength = 1; } + } + numclose = mlength * zlength; - //Calculate the length of the data file automatically - lengthmz = getfilelength(config.infile); - dataMZ = calloc(lengthmz, sizeof(double)); - dataInt = calloc(lengthmz, sizeof(double)); + //Sets up the blur function in oligomer mass and charge + mind = calloc(mlength, sizeof(int)); + mdist = calloc(mlength, sizeof(double)); - readfile(config.infile, lengthmz, dataMZ, dataInt);//load up the data array + for (int i = 0; i < mlength; i++) + { + mind[i] = i - (mlength - 1) / 2; + if (config.msig != 0) { mdist[i] = exp(-(pow((i - (mlength - 1) / 2.), 2)) / (2.0 * config.msig * config.msig)); } + else { mdist[i] = 1; } + } - printf("Length of Data: %d \n", lengthmz); - config.lengthmz = lengthmz; + zind = calloc(zlength, sizeof(int)); + zdist = calloc(zlength, sizeof(double)); + for (int i = 0; i < zlength; i++) + { + zind[i] = i - (zlength - 1) / 2; + if (config.zsig != 0) { zdist[i] = exp(-(pow((i - (zlength - 1) / 2.), 2)) / (2.0 * config.zsig * config.zsig)); } + else { zdist[i] = 1; } + //printf("%f\n", zdist[i]); + } - //Check the length of the mfile and then read it in. + //Initializing memory + closemind = calloc(numclose, sizeof(int)); + closezind = calloc(numclose, sizeof(int)); + closeval = calloc(numclose, sizeof(double)); + closeind = calloc(numclose * config.lengthmz * config.numz, sizeof(double)); - if (config.mflag == 1) - { - mfilelen = getfilelength(config.mfile); - printf("Length of mfile: %d \n", mfilelen); - } - testmasses = malloc(sizeof(double)*mfilelen); - if (config.mflag == 1) - { - readmfile(config.mfile, mfilelen, testmasses);//read in mass tab - } - } + //Determines the indexes of things that are close as well as the values used in the neighborhood convolution + for (int k = 0; k < numclose; k++) + { + closemind[k] = mind[k % mlength]; + closezind[k] = zind[(int)k / mlength]; + closeval[k] = zdist[(int)k / mlength] * mdist[k % mlength]; } + simp_norm_sum(mlength, mdist); + simp_norm_sum(zlength, zdist); + simp_norm_sum(numclose, closeval); + //Set up blur + MakeBlur(config.lengthmz, config.numz, numclose, barr, closezind, closemind, inp.mtab, config.molig, config.adductmass, inp.nztab, inp.dataMZ, closeind); - //This for loop creates a list of charge values - nztab = calloc(config.numz, sizeof(int)); - for (i = 0; imaxint) { maxint = dataInt[i]; } - } - - double error, blurmax, conv, beta, avgscore, newblurmax; - int maaxle; - int runit = 1; - for (int runnum=0; runnum < runit;runnum++) - { - //................................................................... - // - // Sets the mzdist with the peak shape - // - //.................................................................... - - - //Experimental correction. Not sure why this is necessary. - if (config.psig < 0) { config.mzsig /= 3; } - - //Sets a threshold for m/z values to check. Things that are far away in m/z space don't need to be considered in the iterations. - double threshold = config.psthresh*fabs(config.mzsig)*config.peakshapeinflate; - printf("Threshold: %f\n", threshold); - //Create a list of start and end values to box in arrays based on the above threshold - starttab = calloc(lengthmz, sizeof(int)); - endtab = calloc(lengthmz, sizeof(int)); - int maxlength = 1; - if(config.mzsig!=0){ - for (i = 0; i dataMZ[lengthmz - 1] && config.speedyflag == 0) { - end = lengthmz - 1 + (int)((point - dataMZ[lengthmz - 1]) / (dataMZ[lengthmz - 1] - dataMZ[lengthmz - 2])); } - else { - end = nearfast(dataMZ, point, lengthmz); + for (int i = 0; i < config.lengthmz; i++) + { + if (decon.baseline[i] > 0) { + dataInt2[i] -= decon.baseline[i]; + } } - endtab[i] = end; - if (end - start>maxlength) { maxlength = end - start; } + //memcpy(decon.baseline, dataInt2, sizeof(double)*config.lengthmz); } - printf("maxlength %d\n", maxlength); + } + else + { + printf("Ignoring baseline subtraction because peak width is 0\n"); + } + } - //Changes dimensions of the peak shape function. 1D for speedy and 2D otherwise - int pslen = lengthmz; - if (config.speedyflag == 0) { pslen = lengthmz*maxlength; } - mzdist = calloc(pslen, sizeof(double)); - rmzdist = calloc(pslen, sizeof(double)); - memset(mzdist, 0, pslen * sizeof(double)); - memset(rmzdist, 0, pslen * sizeof(double)); - int makereverse = 0; - if (config.mzsig < 0 || config.beta < 0) { makereverse = 1; } + //Run the iteration + double blurmax = 0; + decon.conv = 0; + int off = 0; + if (silent == 0) { printf("Iterating..."); } + for (int iterations = 0; iterations < abs(config.numit); iterations++) + { + decon.iterations = iterations; + if (config.beta > 0 && iterations > 0) + { - //Calculates the distance between mz values as a 2D or 3D matrix - if (config.speedyflag == 0) - { - MakePeakShape2D(lengthmz, maxlength, starttab, endtab, dataMZ, fabs(config.mzsig)*config.peakshapeinflate, config.psfun, config.speedyflag, mzdist, rmzdist, makereverse); - } - else - { - //Calculates peak shape as a 1D list centered at the first element for circular convolutions - MakePeakShape1D(dataMZ, threshold, lengthmz, config.speedyflag, fabs(config.mzsig)*config.peakshapeinflate, config.psfun, mzdist, rmzdist, makereverse); - } - printf("mzdist set: %f\n", mzdist[0]); + softargmax(decon.blur, config.lengthmz, config.numz, config.beta); + //printf("Beta %f\n", beta); } - else + else if (config.beta < 0 && iterations >0) { - mzdist = calloc(0, sizeof(double)); - maxlength = 0; + softargmax_transposed(decon.blur, config.lengthmz, config.numz, fabs(config.beta), barr, maxlength, config.isolength, inp.isotopepos, inp.isotopeval, config.speedyflag, starttab, endtab, rmzdist, config.mzsig); } - //.................................................... - // - // Setting up the neighborhood blur - // - //...................................................... - - //sets some parameters regarding the neighborhood blur function - if (config.zsig >= 0 && config.msig >= 0) { - zlength = 1 + 2 * (int)config.zsig; - mlength = 1 + 2 * (int)config.msig; + if (config.psig >= 1 && iterations > 0) + { + point_smoothing(decon.blur, barr, config.lengthmz, config.numz, abs((int)config.psig)); + //printf("Point Smoothed %f\n", config.psig); } - else { - if (config.zsig != 0){zlength = 1 + 2 * (int)(3 * fabs(config.zsig) + 0.5); } - else { zlength = 1; } - if (config.msig != 0) { mlength = 1 + 2 * (int)(3 * fabs(config.msig) + 0.5); } - else { mlength = 1; } + else if (config.psig < 0 && iterations >0) + { + point_smoothing_peak_width(config.lengthmz, config.numz, maxlength, starttab, endtab, mzdist, decon.blur, config.speedyflag, barr); } - numclose = mlength*zlength; - //Sets up the blur function in oligomer mass and charge - mind = calloc(mlength, sizeof(int)); - mdist = calloc(mlength, sizeof(double)); - for (i = 0; i= 0 && config.msig >= 0) { + blur_it_mean(config.lengthmz, config.numz, numclose, closeind, decon.newblur, decon.blur, barr, config.zerolog); + } + else if (config.zsig > 0 && config.msig < 0) { - mind[i] = i - (mlength - 1) / 2; - //mind[i] = i; - if (config.msig != 0) { mdist[i] = exp(-(pow((i - (mlength - 1) / 2.), 2)) / (2.0 * config.msig*config.msig)); } - else { mdist[i] = 1; } - //mdist[i] = exp(-(double)i/fabs(config.msig)); - //mdist[i]=secderndis((mlength-1)/2.,config.msig,i); - //mdist[i] = mc[mod(i - (mlength - 1) / 2, mlength)]; - //mdist[i] = 1; + blur_it_hybrid1(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, config.zerolog); } - - zind = calloc(zlength, sizeof(int)); - zdist = calloc(zlength, sizeof(double)); - for (i = 0; i < zlength; i++) + else if (config.zsig < 0 && config.msig > 0) { - zind[i] = i - (zlength - 1) / 2; - if (config.zsig != 0) { zdist[i] = exp(-(pow((i - (zlength - 1) / 2.), 2)) / (2.0 * config.zsig*config.zsig)); } - else { zdist[i] = 1; } - //zdist[i]=secderndis((zlength-1)/2.,zsig,i); - //zdist[i] = zc[mod(i - (zlength - 1) / 2, zlength)]; - //zdist[i] = 1; - printf("%f\n", zdist[i]); + blur_it_hybrid2(config.lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, decon.newblur, decon.blur, barr, config.zerolog); + } + else { + blur_it(config.lengthmz, config.numz, numclose, closeind, closeval, decon.newblur, decon.blur, barr); + } + + //Run Richardson-Lucy Deconvolution + deconvolve_iteration_speedy(config.lengthmz, config.numz, maxlength, + decon.newblur, decon.blur, barr, config.aggressiveflag, dataInt2, + config.isolength, inp.isotopepos, inp.isotopeval, starttab, endtab, mzdist, rmzdist, config.speedyflag, + config.baselineflag, decon.baseline, decon.noise, config.mzsig, inp.dataMZ, config.filterwidth, config.psig); + + //Determine the metrics for conversion. Only do this every 10% to speed up. + if ((config.numit < 10 || iterations % 10 == 0 || iterations % 10 == 1 || iterations>0.9 * config.numit)) { + double diff = 0; + double tot = 0; + for (int i = 0; i < config.lengthmz * config.numz; i++) + { + if (barr[i] == 1) + { + diff += pow((decon.blur[i] - oldblur[i]), 2); + tot += decon.blur[i]; + } + } + if (tot != 0) { decon.conv = (diff / tot); } + else { decon.conv = 12345678; } + + //printf("Iteration: %d Convergence: %f\n", iterations, decon.conv); + if (decon.conv < 0.000001) { + if (off == 1 && config.numit > 0) { + printf("Converged in %d iterations.\n\n", iterations); + break; + } + off = 1; + } + memcpy(oldblur, decon.blur, config.lengthmz * config.numz * sizeof(double)); } + } + free(dataInt2); + free(oldblur); - //Initializing memory - closemind = calloc(numclose, sizeof(int)); - closezind = calloc(numclose, sizeof(int)); - closeval = calloc(numclose, sizeof(double)); - closeind = calloc(numclose*lengthmz*config.numz, sizeof(double)); + //................................................................ + // + // Setting up the outputs + // + //............................................................... - //Determines the indexes of things that are close as well as the values used in the neighborhood convolution - for (k = 0; k 0) + else { - isolength = setup_isotopes(isoparams, isotopepos, isotopeval, mtab, nztab, barr, dataMZ, lengthmz, config.numz); - - isotopepos = calloc(isolength*lengthmz*config.numz, sizeof(int)); - isotopeval = calloc(isolength*lengthmz*config.numz, sizeof(float)); - - make_isotopes(isoparams, isotopepos, isotopeval, mtab, nztab, barr, dataMZ, lengthmz, config.numz); - - printf("Isotopes set up, Length: %d\n", isolength); + MakePeakShape1D(inp.dataMZ, threshold, config.lengthmz, config.speedyflag, fabs(config.mzsig), config.psfun, mzdist, rmzdist, 0); } + printf("mzdist reset: %f\n", config.mzsig); + } - //................................................... - // - // Setting up and running the iteration - // - //......................................................... + //Determine the maximum intensity in the blur matrix + blurmax = Max(decon.blur, config.lengthmz * config.numz); + double cutoff = 0; + if (blurmax != 0) { cutoff = 0.0000001 / blurmax; } - //Convert aggressiveflag to baselineflag - if (config.aggressiveflag == 1 || config.aggressiveflag==2) { config.baselineflag = 1; } - else { config.baselineflag = 0; } + //Apply The Cutoff + ApplyCutoff1D(decon.blur, blurmax * cutoff, config.lengthmz * config.numz); + //Calculate the fit data and error. + decon.fitdat = calloc(config.lengthmz, sizeof(double)); + decon.error = errfunspeedy(inp.dataInt, decon.fitdat, decon.blur, config.lengthmz, config.numz, maxlength, + config.isolength, inp.isotopepos, inp.isotopeval, starttab, endtab, mzdist, config.speedyflag); + if (config.baselineflag == 1) + { +#pragma omp parallel for schedule(auto) + for (int i = 0; i < config.lengthmz; i++) { + decon.fitdat[i] += decon.baseline[i];// +decon.noise[i]; + //decon.fitdat[i] = decon.noise[i]+0.1; + } + } + ApplyCutoff1D(decon.fitdat, 0, config.lengthmz); - //Creates an intial probability matrix, blur, of 1 for each element - blur = calloc(lengthmz*config.numz, sizeof(double)); - newblur = calloc(lengthmz*config.numz, sizeof(double)); - oldblur = calloc(lengthmz*config.numz, sizeof(double)); + // Charge scaling (orbimode) + if (config.orbimode == 1) + { + printf("Rescaling charge states and normalizing "); + charge_scaling(decon.blur, inp.nztab, config.lengthmz, config.numz); + simp_norm(config.lengthmz * config.numz, decon.blur); + printf("Done\n"); + } - if (config.baselineflag == 1) { - printf("Auto Baseline Mode On: %d\n",config.aggressiveflag); - baseline = calloc(lengthmz, sizeof(double)); - noise = calloc(lengthmz, sizeof(double)); - } + //Change Monoisotopic to Average if necessary + if (config.isotopemode == 2) + { + monotopic_to_average(config.lengthmz, config.numz, decon.blur, barr, config.isolength, inp.isotopepos, inp.isotopeval); + } - //#pragma omp parallel for private (i,j), schedule(auto) - for (i = 0; i newblurmax * cutoff) { - for (int i = 0; i < 10; i++) + if (inp.mtab[index2D(config.numz, i, j)] + threshold * inp.nztab[j] > massmax) { - deconvolve_baseline(lengthmz, dataMZ, dataInt, baseline, fabs(config.mzsig)); + massmax = inp.mtab[index2D(config.numz, i, j)] + threshold * inp.nztab[j]; } - for (int i = 0; i < lengthmz; i++) + if (inp.mtab[index2D(config.numz, i, j)] - threshold * inp.nztab[j] < massmin) { - if (baseline[i]>0) { - dataInt2[i] -= baseline[i]; - } + massmin = inp.mtab[index2D(config.numz, i, j)] - threshold * inp.nztab[j]; } - //memcpy(baseline, dataInt2, sizeof(double)*lengthmz); } } - else - { - printf("Ignoring baseline subtraction because peak width is 0\n"); - } - } - - - //Run the iteration - blurmax = 0; - conv = 0; - beta = 0; - int off = 0; - printf("Iterating\n\n"); - - for (iterations = 0; iterations 0 && iterations > 0) - { - - softargmax(blur, lengthmz, config.numz, config.beta); - //printf("Beta %f\n", beta); - } - else if (config.beta < 0 && iterations >0) - { - softargmax_transposed(blur, lengthmz, config.numz, fabs(config.beta), barr, maxlength, isolength, isotopepos, isotopeval, config.speedyflag, starttab, endtab, rmzdist, config.mzsig); - } - - if (config.psig >= 1 && iterations>0) - { - point_smoothing(blur, barr, lengthmz, config.numz, abs((int) config.psig)); - //printf("Point Smoothed %f\n", config.psig); - } - else if (config.psig < 0 && iterations >0) - { - point_smoothing_peak_width(lengthmz, config.numz, maxlength, starttab, endtab, mzdist, blur, config.speedyflag, barr); - } + decon.massaxis[i] = massmin + i * config.massbins; + } - - //Run Blurs - if (config.zsig >= 0 && config.msig >= 0) { - blur_it_mean(lengthmz,config.numz,numclose,closeind,newblur,blur,barr,config.zerolog); - } - else if (config.zsig > 0 && config.msig < 0) - { - blur_it_hybrid1(lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, newblur, blur, barr, config.zerolog); + //Determine the mass intensities from m/z grid + if (config.poolflag == 0) { + if (config.rawflag == 1 || config.rawflag == 3) { + IntegrateTransform(config.lengthmz, config.numz, inp.mtab, massmax, massmin, decon.mlen, decon.massaxis, decon.massaxisval, decon.blur, decon.massgrid); } - else if (config.zsig < 0 && config.msig > 0) - { - blur_it_hybrid2(lengthmz, config.numz, zlength, mlength, closeind, closemind, closezind, mdist, zdist, newblur, blur, barr, config.zerolog); + if (config.rawflag == 0 || config.rawflag == 2) { + IntegrateTransform(config.lengthmz, config.numz, inp.mtab, massmax, massmin, decon.mlen, decon.massaxis, decon.massaxisval, decon.newblur, decon.massgrid); } - else { - blur_it(lengthmz,config.numz,numclose,closeind,closeval,newblur,blur,barr); + } + else { + if (config.rawflag == 1 || config.rawflag == 3) { + InterpolateTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, + inp.dataMZ, inp.dataInt, decon.massgrid, decon.massaxisval, decon.blur); } - - /* - if (config.msig > 0) { - blur_it_mean(lengthmz, config.numz, mlength, closemind, newblur, blur, barr, config.zerolog); - memcpy(blur, newblur, lengthmz*config.numz * sizeof(double));} - if (config.msig < 0) { - blur_it(lengthmz, config.numz, mlength, closemind, mdist, newblur, blur, barr); - memcpy(blur, newblur, lengthmz*config.numz * sizeof(double));} - if (config.zsig > 0) {blur_it_mean(lengthmz,config.numz,zlength,closezind,newblur,blur,barr,config.zerolog);} - if (config.zsig < 0) {blur_it(lengthmz, config.numz, zlength, closezind, zdist, newblur, blur, barr);} - if (config.zsig == 0) {memcpy(newblur, blur, lengthmz*config.numz * sizeof(double));}*/ - - //Run Richardson-Lucy Deconvolution - deconvolve_iteration_speedy(lengthmz, config.numz, maxlength, - newblur, blur, barr, config.aggressiveflag, dataInt2, - isolength, isotopepos, isotopeval, starttab, endtab, mzdist, rmzdist, config.speedyflag, - config.baselineflag, baseline,noise,config.mzsig,dataMZ, config.filterwidth, config.psig); - - //Determine the metrics for conversion. Only do this every 10% to speed up. - if ((config.numit<10 || iterations % 10 == 0 || iterations % 10 == 1 || iterations>0.9*config.numit)) { - double diff = 0; - double tot = 0; - for (i = 0; i0) { - printf("\nConverged in %d iterations.\n\n", iterations); - break; - } - off = 1; - } - memcpy(oldblur, blur, lengthmz*config.numz * sizeof(double)); + if (config.rawflag == 0 || config.rawflag == 2) { + InterpolateTransform(decon.mlen, config.numz, config.lengthmz, inp.nztab, decon.massaxis, config.adductmass, + inp.dataMZ, inp.dataInt, decon.massgrid, decon.massaxisval, decon.newblur); } } - free(dataInt2); - //................................................................ - // - // Writing and reporting the outputs - // - //............................................................... + } - + //..................................... + // Scores + // ....................................... - //Reset the peak shape if it was inflated - if (config.peakshapeinflate != 1 && config.mzsig!=0) { - if (config.speedyflag == 0) - { - MakePeakShape2D(lengthmz, maxlength, starttab, endtab, dataMZ, fabs(config.mzsig), config.psfun, config.speedyflag, mzdist, rmzdist, 0); - } + double scorethreshold = 0; + decon.uniscore = score(config, decon.mlen, inp.dataMZ, inp.dataInt, decon.newblur, decon.massaxis, decon.massaxisval, decon.massgrid, inp.nztab, scorethreshold); - else - { - MakePeakShape1D(dataMZ, threshold, lengthmz, config.speedyflag, fabs(config.mzsig), config.psfun, mzdist, rmzdist, 0); - } - printf("mzdist reset: %f\n", config.mzsig); - } + //Free Memory + free(mzdist); + free(rmzdist); + free(closeval); + free(closemind); + free(closezind); + free(endtab); + free(starttab); + free(mdist); + free(mind); + free(zind); + free(zdist); + free(barr); + free(closeind); + return decon; +} - //Determine the maximum intensity in the blur matrix - blurmax = Max(blur, lengthmz * config.numz); - if(blurmax!=0){ config.cutoff = 0.0000001 / blurmax; } - else { config.cutoff = 0; } - - //Apply The Cutoff - ApplyCutoff1D(blur, blurmax*config.cutoff, lengthmz*config.numz); +int run_unidec(int argc, char *argv[], Config config) { + //Initialize + clock_t starttime; + starttime = clock(); - //Calculate the fit data and error. - fitdat = calloc(lengthmz, sizeof(double)); - error = errfunspeedy(dataInt, fitdat,blur,lengthmz, config.numz, maxlength, maxint, - isolength, isotopepos, isotopeval, starttab, endtab, mzdist, config.speedyflag); + FILE *out_ptr = NULL; + hid_t file_id; - - if (config.baselineflag == 1) - { - #pragma omp parallel for private(i), schedule(auto) - for (int i = 0; i= 0) { - if (config.filetype == 0) { - char outstring2[500]; - sprintf(outstring2, "%s_fitdat.bin", config.outfile); - out_ptr = fopen(outstring2, "wb"); - fwrite(fitdat, sizeof(double), lengthmz, out_ptr); - fclose(out_ptr); - printf("Fit data written to: %s\n", outstring2); - } - else { - strjoin(dataset, "/fit_data", outdat); - mh5writefile1d(file_id, outdat, lengthmz, fitdat); - } - } + Input inp = SetupInputs(); - //Write the baseline to a file. - if (config.rawflag >= 0 && config.baselineflag==1) { - if (config.filetype == 0) { - char outstring2[500]; - sprintf(outstring2, "%s_baseline.bin", config.outfile); - out_ptr = fopen(outstring2, "wb"); - fwrite(baseline, sizeof(double), lengthmz, out_ptr); - fclose(out_ptr); - printf("Background written to: %s\n", outstring2); - } - else { - strjoin(dataset, "/baseline", outdat); - mh5writefile1d(file_id, outdat, lengthmz, baseline); - } - } + char dataset[1024]; + char outdat[1024]; + bool autotune = 0; + + //Convert aggressiveflag to baselineflag + if (config.aggressiveflag == 1 || config.aggressiveflag == 2) { config.baselineflag = 1; } + else { config.baselineflag = 0; } + + //Experimental correction. Not sure why this is necessary. + if (config.psig < 0) { config.mzsig /= 3; } - // Charge scaling (orbimode) - if (config.orbimode == 1) + //Set default parameters. These can be overwritten by config file. + double isoparams[10] = { 1.00840852e+00, 1.25318718e-03, 2.37226341e+00, 8.19178000e-04, + -4.37741951e-01, 6.64992972e-04, 9.94230511e-01, 4.64975237e-01, 1.00529041e-02, 5.81240792e-01 }; + + if (argc>2) + { + if (strcmp(argv[2], "-test") == 0) { - printf("Rescaling charge states and normalizing "); - charge_scaling(blur, nztab, lengthmz, config.numz); - simp_norm(lengthmz*config.numz,blur); - printf("Done\n"); + double testmass = atof(argv[3]); + printf("Testing Mass: %f\n", testmass); + test_isotopes(testmass, isoparams); + exit(0); } - if (config.isotopemode == 2) + if (strcmp(argv[2], "-autotune") == 0) { - monotopic_to_average(lengthmz, config.numz, blur, barr, isolength, isotopepos, isotopeval); + autotune = 1; } + } - //newblur is repurposed as the convolution of blur by the mz peaks shape - newblurmax = blurmax; - if ((config.rawflag == 0 || config.rawflag == 2)) { - if (config.mzsig != 0) { - newblurmax = Reconvolve(lengthmz, config.numz, maxlength, starttab, endtab, mzdist, blur, newblur, config.speedyflag, barr); - } - else - { - memcpy(newblur, blur, lengthmz*config.numz * sizeof(double)); - } - } + if (config.metamode != -2) + { + strcpy(dataset, "/ms_dataset"); + char strval[1024]; + sprintf(strval, "/%d", config.metamode); + strcat(dataset, strval); + printf("HDF5 Data Set: %s\n", dataset); + } + else + { + strcpy(dataset, "/ms_data"); + } - //Writes the convolved m/z grid in binary format - if (config.rawflag == 0 || config.rawflag == 1) - { - if (config.filetype == 0) { - char outstring9[500]; - sprintf(outstring9, "%s_grid.bin", config.outfile); - out_ptr = fopen(outstring9, "wb"); - if (config.rawflag == 0) { fwrite(newblur, sizeof(double), lengthmz*config.numz, out_ptr); } - if (config.rawflag == 1) { fwrite(blur, sizeof(double), lengthmz*config.numz, out_ptr); } - fclose(out_ptr); - printf("m/z grid written to: %s\n", outstring9); + //.................................. + // + // File Inputs + // + //................................. + + if (argc >= 2) + { + if (config.filetype == 1) { + file_id = H5Fopen(argv[1], H5F_ACC_RDWR, H5P_DEFAULT); + strjoin(dataset, "/processed_data", outdat); + config.lengthmz = mh5getfilelength(file_id, outdat); + inp.dataMZ = calloc(config.lengthmz, sizeof(double)); + inp.dataInt = calloc(config.lengthmz, sizeof(double)); + mh5readfile2d(file_id, outdat, config.lengthmz, inp.dataMZ, inp.dataInt); + printf("Length of Data: %d \n", config.lengthmz); + + //Check the length of the mfile and then read it in. + if (config.mflag == 1) + { + config.mfilelen = mh5getfilelength(file_id, "/config/masslist"); + printf("Length of mfile: %d \n", config.mfilelen); + inp.testmasses = malloc(sizeof(double)*config.mfilelen); + mh5readfile1d(file_id, "/config/masslist", inp.testmasses); } else { - strjoin(dataset, "/mz_grid", outdat); - if (config.rawflag == 0) { mh5writefile1d(file_id, outdat, lengthmz*config.numz, newblur); } - if (config.rawflag == 1) { mh5writefile1d(file_id, outdat, lengthmz*config.numz, blur); } - - strjoin(dataset, "/mz_grid", outdat); - double *chargedat = NULL; - chargedat = calloc(config.numz, sizeof(double)); - double *chargeaxis = NULL; - chargeaxis = calloc(config.numz, sizeof(double)); - - for (int j = 0; j < config.numz; j++) { - double val = 0; - chargeaxis[j] = (double)nztab[j]; - for (int i = 0; inewblurmax*config.cutoff) - { - if (mtab[index2D(config.numz, i, j)] + threshold*nztab[j]>massmax) - { - massmax = mtab[index2D(config.numz, i, j)] + threshold*nztab[j]; - } - if (mtab[index2D(config.numz, i, j)] - threshold*nztab[j] 0) + { + printf("Isotope Mode: %d\n", config.isotopemode); + config.isolength = setup_isotopes(isoparams, inp.isotopepos, inp.isotopeval, inp.mtab, inp.nztab, inp.barr, inp.dataMZ, config.lengthmz, config.numz); - //Determine the mass intensities from m/z grid - if (config.poolflag == 0) { - if (config.rawflag == 1 || config.rawflag == 3) { - IntegrateTransform(lengthmz, config.numz, mtab, massmax, massmin, maaxle, massaxis, massaxisval, blur, massgrid); - } - if (config.rawflag == 0 || config.rawflag == 2) { - IntegrateTransform(lengthmz, config.numz, mtab, massmax, massmin, maaxle, massaxis, massaxisval, newblur, massgrid); - } - } - else { - if (config.rawflag == 1 || config.rawflag == 3) { - InterpolateTransform(maaxle, config.numz, lengthmz, nztab, massaxis, config.adductmass, dataMZ, dataInt, massgrid, massaxisval, blur); - } - if (config.rawflag == 0 || config.rawflag == 2) { - InterpolateTransform(maaxle, config.numz, lengthmz, nztab, massaxis, config.adductmass, dataMZ, dataInt, massgrid, massaxisval, newblur); - } - } + inp.isotopepos = calloc(config.isolength * config.lengthmz * config.numz, sizeof(int)); + inp.isotopeval = calloc(config.isolength * config.lengthmz * config.numz, sizeof(float)); + make_isotopes(isoparams, inp.isotopepos, inp.isotopeval, inp.mtab, inp.nztab, inp.barr, inp.dataMZ, config.lengthmz, config.numz); - //Writes the convolved mass grid in binary format - if (config.rawflag == 0 || config.rawflag == 1) { - if (config.filetype == 0) { - char outstring10[500]; - sprintf(outstring10, "%s_massgrid.bin", config.outfile); - out_ptr = fopen(outstring10, "wb"); - fwrite(massgrid, sizeof(double), maaxle*config.numz, out_ptr); - fclose(out_ptr); - printf("Mass Grid Written: %s\n", outstring10); - } - else { - strjoin(dataset, "/mass_grid", outdat); - mh5writefile1d(file_id, outdat, maaxle*config.numz, massgrid); - } - } + printf("Isotopes set up, Length: %d\n", config.isolength); + } - //Writes the mass values convolved with the peak shape - if (config.rawflag == 0 || config.rawflag == 1 || config.rawflag == 2 || config.rawflag == 3) { - if (config.filetype == 0) { - char outstring4[500]; - sprintf(outstring4, "%s_mass.txt", config.outfile); - out_ptr = fopen(outstring4, "w"); - for (i = 0; i < maaxle; i++) + Decon decon=SetupDecon(); + + if (autotune == 1) { + printf("Starting Autotune...\n"); + double start_peakwindow = config.peakwin; + double start_peakthresh = config.peakthresh; + config.peakwin = 3 * config.massbins; + config.peakthresh = 0.01; + + int start_numit = config.numit; + config.numit = 10; + decon = MainDeconvolution(config, inp, 1); + double bestscore = decon.uniscore; + + double start_mzsig = config.mzsig; + double start_zsig = config.zsig; + double start_beta = config.beta; + double start_psig = config.psig; + + //double mz_sigs[5] = { 0, 0.1, 1, 10, 100}; + double mz_sigs[2] = { 0, config.mzsig }; + //double z_sigs[2] = { -1, 1 }; + double z_sigs[1] = { config.zsig }; + double betas[3] = { 0, 50, 500 }; + double psigs[3] = { 0, 1, 10 }; + + //int n_mzsigs = 5; + int n_mzsigs = 2; + //int n_zsigs = 2; + int n_zsigs = 1; + int n_betas = 3; + int n_psigs = 3; + + for (int i = 0; i < n_mzsigs; i++) + { + for (int j = 0; j < n_zsigs; j++) + { + for (int k = 0; k < n_betas; k++) + { + for (int h = 0; h < n_psigs; h++) { - fprintf(out_ptr, "%f %f\n", massaxis[i], massaxisval[i]); + config.mzsig = mz_sigs[i]; + config.zsig = z_sigs[j]; + config.beta = betas[k]; + config.psig = psigs[h]; + + decon = MainDeconvolution(config, inp, 1); + double newscore = decon.uniscore; + if (newscore > bestscore) { + start_mzsig = config.mzsig; + start_zsig = config.zsig; + start_beta = config.beta; + start_psig = config.psig; + bestscore = newscore; + } } - fclose(out_ptr); - printf("Masses written: %s\n", outstring4); - } - else { - strjoin(dataset, "/mass_data", outdat); - mh5writefile2d(file_id, outdat, maaxle, massaxis, massaxisval); } } } - //..................................... - // Scores - // ....................................... - - double scorethreshold = 0; - //avgscore = score(config, dataMZ, dataInt, newblur, massaxis, massaxisval, massgrid, nztab, scorethreshold); + config.numit = start_numit; + config.mzsig = start_mzsig; + config.zsig = start_zsig; + config.beta = start_beta; + config.psig = start_psig; + config.peakthresh = start_peakthresh; + config.peakwin = start_peakwindow; + printf("Best mzsig: %f zsig: %f beta: %f psig: %f Score:%f\n", start_mzsig, start_zsig, start_beta, start_psig, bestscore); + decon = MainDeconvolution(config, inp, 0); } + else{ decon = MainDeconvolution(config, inp, 0); } //................................................................ // @@ -876,6 +737,9 @@ int run_unidec(int argc, char *argv[], Config config) { // //................................................................... + //Write Everything + WriteDecon(config, &decon, &inp, file_id, dataset); + //Writes a file with a number of key parameters such as error and number of significant parameters. clock_t end = clock(); double totaltime = (double)(end - starttime) / CLOCKS_PER_SEC; @@ -883,65 +747,39 @@ int run_unidec(int argc, char *argv[], Config config) { char outstring3[500]; sprintf(outstring3, "%s_error.txt", config.outfile); out_ptr = fopen(outstring3, "w"); - fprintf(out_ptr, "error = %f\n", error); - fprintf(out_ptr, "blurmax = %f\n", blurmax); - fprintf(out_ptr, "newblurmax = %f\n", newblurmax); - fprintf(out_ptr, "cutoff = %f\n", config.cutoff); + fprintf(out_ptr, "error = %f\n", decon.error); fprintf(out_ptr, "time = %f\n", totaltime); - fprintf(out_ptr, "interations = %d\n", iterations); - //fprintf(out_ptr, "avgscore = %f\n", avgscore); + fprintf(out_ptr, "iterations = %d\n", decon.iterations); + fprintf(out_ptr, "uniscore = %f\n", decon.uniscore); + fprintf(out_ptr, "mzsig = %f\n", config.mzsig); + fprintf(out_ptr, "zzsig = %f\n", config.zsig); + fprintf(out_ptr, "beta = %f\n", config.beta); + fprintf(out_ptr, "psig = %f\n", config.psig); fclose(out_ptr); printf("Stats and Error written to: %s\n", outstring3); } else { - write_attr_double(file_id, dataset, "error", error); - write_attr_int(file_id, dataset, "interations", iterations); + write_attr_double(file_id, dataset, "error", decon.error); + write_attr_int(file_id, dataset, "iterations", decon.iterations); write_attr_double(file_id, dataset, "time", totaltime); - write_attr_double(file_id, dataset, "blurmax", blurmax); - write_attr_double(file_id, dataset, "newblurmax", newblurmax); - write_attr_double(file_id, dataset, "cutoff", config.cutoff); - write_attr_int(file_id, dataset, "length_mz", lengthmz); - write_attr_int(file_id, dataset, "length_mass", maaxle); - //write_attr_int(file_id, dataset, "avgscore", avgscore); + write_attr_int(file_id, dataset, "length_mz", config.lengthmz); + write_attr_int(file_id, dataset, "length_mass", decon.mlen); + write_attr_double(file_id, dataset, "uniscore", decon.uniscore); + write_attr_double(file_id, dataset, "mzsig", config.mzsig); + write_attr_double(file_id, dataset, "zsig", config.zsig); + write_attr_double(file_id, dataset, "psig", config.psig); + write_attr_double(file_id, dataset, "beta", config.beta); set_needs_grids(file_id); H5Fclose(file_id); } //Free memory - free(mtab); - free(mzdist); - free(rmzdist); - free(closeval); - free(closemind); - free(closezind); - free(fitdat); - free(blur); - free(endtab); - free(dataMZ); - free(massaxisval); - free(dataInt); - free(starttab); - free(massaxis); - free(newblur); - free(oldblur); - free(mdist); - free(mind); - free(zind); - free(zdist); - free(nztab); - free(barr); - free(massgrid); - free(isotopepos); - free(isotopeval); - free(testmasses); - free(testmasspos); - free(baseline); - free(noise); - free(closeind); + FreeInputs(inp); + FreeDecon(decon); - printf("\nError: %f\n", error); + printf("\nError: %f\n", decon.error); //Final Check that iterations worked and a reporter of the time consumed - printf("\nFinished with %d iterations and %f convergence in %f seconds!\n\n", iterations, conv, totaltime); + printf("\nFinished with %d iterations and %f convergence in %f seconds!\n\n", decon.iterations, decon.conv, totaltime); return 0; } \ No newline at end of file