diff --git a/devutils/makesdist b/devutils/makesdist index 1d81cf0..a7e2676 100644 --- a/devutils/makesdist +++ b/devutils/makesdist @@ -17,11 +17,11 @@ sys.path.insert(0, BASEDIR) from setup import versiondata timestamp = versiondata.getint('DEFAULT', 'timestamp') -print 'Run "setup.py sdist --formats=tar"', +print('Run "setup.py sdist --formats=tar"',) cmd_sdist = [sys.executable] + 'setup.py sdist --formats=tar'.split() ec = subprocess.call(cmd_sdist, cwd=BASEDIR, stdout=open(os.devnull, 'w')) if ec: sys.exit(ec) -print "[done]" +print("[done]") tarname = max(glob.glob(BASEDIR + '/dist/*.tar'), key=os.path.getmtime) @@ -36,8 +36,8 @@ def fixtarinfo(tinfo): return tinfo -print 'Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)), +print('Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)),) for ti in tfin: tfout.addfile(fixtarinfo(ti), tfin.extractfile(ti)) os.remove(tarname) -print "[done]" +print("[done]") diff --git a/devutils/prep.py b/devutils/prep.py index c63ce0f..af3b446 100644 --- a/devutils/prep.py +++ b/devutils/prep.py @@ -8,6 +8,8 @@ __basedir__ = os.getcwdu() +from numpy.compat import unicode + # Example imports @@ -18,20 +20,21 @@ def __init__(self): def test(self, call, *args, **kwds): m = sys.modules[call.__module__] - testname = m.__name__+'.'+call.__name__ + testname = m.__name__ + "." + call.__name__ path = os.path.dirname(m.__file__) os.chdir(path) try: call(*args, **kwds) - self.messages.append("%s: success" %testname) - except Exception, e: - self.messages.append("%s: error, details below.\n%s" %(testname, e)) + self.messages.append("%s: success" % testname) + except Exception as e: + self.messages.append("%s: error, details below.\n%s" % (testname, e)) finally: os.chdir(__basedir__) def report(self): - print '==== Results of Tests ====' - print '\n'.join(self.messages) + print("==== Results of Tests ====") + print("\n".join(self.messages)) + def scrubeol(directory, filerestr): """Use unix-style endlines for files in directory matched by regex string. @@ -50,11 +53,11 @@ def scrubeol(directory, filerestr): text = unicode(original.read()) original.close() - updated = io.open(f, 'w', newline='\n') + updated = io.open(f, "w", newline="\n") updated.write(text) updated.close() - print "Updated %s to unix-style endlines." %f + print("Updated %s to unix-style endlines." % f) def rm(directory, filerestr): @@ -72,30 +75,31 @@ def rm(directory, filerestr): for f in files: os.remove(f) - print "Deleted %s." %f - + print("Deleted %s." % f) if __name__ == "__main__": # Temporarily add examples to path - lib_path = os.path.abspath(os.path.join('..','doc','examples')) + lib_path = os.path.abspath(os.path.join("..", "doc", "examples")) sys.path.append(lib_path) # Delete existing files that don't necessarily have a fixed name. rm("../doc/examples/output", r"known_dG.*\.pwa") rm("../doc/examples/output", r"unknown_dG.*\.pwa") - ### Testing examples + # Testing examples examples = Test() - test_names = ["extract_single_peak", - "parameter_summary", - "fit_initial", - "query_results", - "multimodel_known_dG1", - "multimodel_known_dG2", - "multimodel_unknown_dG1", - "multimodel_unknown_dG2"] + test_names = [ + "extract_single_peak", + "parameter_summary", + "fit_initial", + "query_results", + "multimodel_known_dG1", + "multimodel_known_dG2", + "multimodel_unknown_dG1", + "multimodel_unknown_dG2", + ] test_modules = [] for test in test_names: @@ -106,10 +110,8 @@ def rm(directory, filerestr): examples.report() - ### Convert output of example files to Unix-style endlines for sdist. - if os.linesep != '\n': - print "==== Scrubbing Endlines ====" + # Convert output of example files to Unix-style endlines for sdist. + if os.linesep != "\n": + print("==== Scrubbing Endlines ====") # All *.srmise and *.pwa files in examples directory. scrubeol("../doc/examples/output", r".*(\.srmise|\.pwa)") - - diff --git a/diffpy/srmise/__init__.py b/diffpy/srmise/__init__.py index 532c767..f746b0f 100644 --- a/diffpy/srmise/__init__.py +++ b/diffpy/srmise/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,28 +14,5 @@ """Tools for peak extraction from PDF.""" -__all__ = [ - "basefunction", - "srmiseerrors", - "srmiselog", - "dataclusters", - "modelcluster", - "modelparts", - "pdfdataset", - "pdfpeakextraction", - "peakextraction", - "peakstability", - "multimodelselection", -] - -from basefunction import BaseFunction -from dataclusters import DataClusters -from modelcluster import ModelCluster, ModelCovariance -from modelparts import ModelPart, ModelParts -from multimodelselection import MultimodelSelection -from pdfdataset import PDFDataSet -from pdfpeakextraction import PDFPeakExtraction -from peakextraction import PeakExtraction -from peakstability import PeakStability from diffpy.srmise.version import __version__ diff --git a/diffpy/srmise/applications/__init__.py b/diffpy/srmise/applications/__init__.py index e9f32f6..5540acb 100644 --- a/diffpy/srmise/applications/__init__.py +++ b/diffpy/srmise/applications/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund diff --git a/diffpy/srmise/applications/extract.py b/diffpy/srmise/applications/extract.py index 5bbc646..c070704 100755 --- a/diffpy/srmise/applications/extract.py +++ b/diffpy/srmise/applications/extract.py @@ -11,7 +11,8 @@ # ############################################################################## -from optparse import OptionGroup, OptionParser +import textwrap +from optparse import IndentedHelpFormatter, OptionGroup, OptionParser import matplotlib.pyplot as plt import numpy as np @@ -20,285 +21,424 @@ def main(): """Default SrMise entry-point.""" - usage = ("usage: %prog pdf_file [options]\n" - "pdf_file is a file containing a PDF (accepts several " - "common formats), or a .srmise file.") + usage = ( + "usage: %prog pdf_file [options]\n" + "pdf_file is a file containing a PDF (accepts several " + "common formats), or a .srmise file." + ) from diffpy.srmise import __version__ - version = "diffpy.srmise "+__version__ - - descr = ("The SrMise package is a tool to aid extracting and fitting peaks " - "that comprise a pair distribution function. This script exposes " - "basic peak extraction functionality. For many PDFs it is " - "sufficient to specify the range, baseline, and sometimes an ad " - "hoc uncertainty. See the discussion of these options below for " - "further guidance.") - - epilog = ("Options set above override those from an existing .srmise " - "file, as well as the usual defaults summarized here.\n\n" - "Defaults (when qmax > 0)\n" - "------------------------\n" - "baseline - None (identically 0).\n" - "dg - The uncertainty reported in the PDF (if any), otherwise " - "5% of maximum value of PDF.\n" - "nyquist - True\n" - "range - All the data\n" - "cres - The Nyquist rate.\n" - "supersample - 4.0\n" - "scale - (Deprecated) False\n\n" - "Defaults (when qmax = 0)\n" - "------------------------\n" - "baseline - as above\n" - "dg - as above\n" - "nyquist - False (and no effect if True)\n" - "range - as above\n" - "cres - Four times the average distance between data points\n" - "supersample - Parameter has no effect.\n" - "scale - (Deprecated) False, and no effect if True\n\n" - "Known issues\n" - "------------\n" - "1) Peak extraction works best when the data are moderately " - "oversampled first. When qmax > 0 this is handled " - "automatically, but when qmax = 0 no resampling of any kind is " - "performed.\n" - "2) Peak extraction performed on a PDF file and a .srmise file " - "derived from that data with identical extraction parameters " - "can give different results even on the same platform. This is " - "because the original data may undergo some processing before it " - "can be saved by SrMise. For consistent results, always specify " - "the original PDF, or always load the PDF from a .srmise file " - "you save before performing any peak extraction on that data.\n" - "3) Liveplotting depends on the matplotlib backend, and doesn't " - "implement an idle handler, so interaction with its window will " - "likely cause a freeze.") + + version = "diffpy.srmise " + __version__ + + descr = ( + "The SrMise package is a tool to aid extracting and fitting peaks " + "that comprise a pair distribution function. This script exposes " + "basic peak extraction functionality. For many PDFs it is " + "sufficient to specify the range, baseline, and sometimes an ad " + "hoc uncertainty. See the discussion of these options below for " + "further guidance." + ) + + epilog = ( + "Options set above override those from an existing .srmise " + "file, as well as the usual defaults summarized here.\n\n" + "Defaults (when qmax > 0)\n" + "------------------------\n" + "baseline - None (identically 0).\n" + "dg - The uncertainty reported in the PDF (if any), otherwise " + "5% of maximum value of PDF.\n" + "nyquist - True\n" + "range - All the data\n" + "cres - The Nyquist rate.\n" + "supersample - 4.0\n" + "scale - (Deprecated) False\n\n" + "Defaults (when qmax = 0)\n" + "------------------------\n" + "baseline - as above\n" + "dg - as above\n" + "nyquist - False (and no effect if True)\n" + "range - as above\n" + "cres - Four times the average distance between data points\n" + "supersample - Parameter has no effect.\n" + "scale - (Deprecated) False, and no effect if True\n\n" + "Known issues\n" + "------------\n" + "1) Peak extraction works best when the data are moderately " + "oversampled first. When qmax > 0 this is handled " + "automatically, but when qmax = 0 no resampling of any kind is " + "performed.\n" + "2) Peak extraction performed on a PDF file and a .srmise file " + "derived from that data with identical extraction parameters " + "can give different results even on the same platform. This is " + "because the original data may undergo some processing before it " + "can be saved by SrMise. For consistent results, always specify " + "the original PDF, or always load the PDF from a .srmise file " + "you save before performing any peak extraction on that data.\n" + "3) Liveplotting depends on the matplotlib backend, and doesn't " + "implement an idle handler, so interaction with its window will " + "likely cause a freeze." + ) # TODO: Move to argparse (though not in 2.6 by default) to handle # variable-length options without callbacks. Longterm, the major # value is using the same option to specify a baseline that should # use estimation vs. one that should use explicitly provided pars. - parser = OptionParser(usage=usage, description=descr, epilog=epilog, - version=version, - formatter=IndentedHelpFormatterWithNL()) - - parser.set_defaults(plot=False, liveplot=False, wait=False, - performextraction=True, verbosity="warning") - dg_defaults = {'absolute':None, 'data':None, 'max-fraction':.05, - 'ptp-fraction':.05, 'dG-fraction':1.} - - parser.add_option("--extract", action="store_true", - dest="performextraction", - help="[Default] Perform extraction.") - parser.add_option("--no-extract", action="store_false", - dest="performextraction", - help="Do not perform extraction.") - parser.add_option("--range", nargs=2, dest="rng", type="float", - metavar="rmin rmax", - help="Extract over the range (rmin, rmax).") - parser.add_option("--qmax", dest="qmax", type="string", metavar="QMAX", - help="Model peaks with this maximum q value.") - parser.add_option("--nyquist", action="store_true", dest="nyquist", - help="Use Nyquist resampling if qmax > 0.") - parser.add_option("--no-nyquist", action="store_false", dest="nyquist", - help="Do not use Nyquist resampling.") - parser.add_option("--pf", dest="peakfunction", metavar="PF", - help="Fit peak function PF defined in " - "diffpy.srmise.peaks, e.g. " - "'GaussianOverR(maxwidth=0.7)'") - parser.add_option("--cres", dest="cres", type="float", metavar="cres", - help="Clustering resolution.") - parser.add_option("--supersample", dest="supersample", type="float", - metavar="SS", - help="Minimum initial oversampling rate as multiple of " - "Nyquist rate.") - parser.add_option("--me", "-m", dest="modelevaluator", metavar="ME", - help="ModelEvaluator defined in " - "diffpy.srmise.modelevaluators, e.g. 'AIC'") - - group = OptionGroup(parser, "Baseline Options", - "SrMise cannot determine the appropriate type of " - "baseline (e.g. crystalline vs. some nanoparticle) " - "solely from the data, so the user should specify the " - "appropriate type and/or parameters. (Default is " - "identically 0, which is unphysical.) SrMise keeps the " - "PDF baseline fixed at its initial value until the " - "final stages of peak extraction, so results are " - "frequently conditioned on that choice. (See the " - "SrMise documentation for details.) A good estimate " - "is therefore important for best results. SrMise can " - "estimate initial parameters from the data for linear " - "baselines in some situations (all peaks are positive, " - "and the degree of overlap in the region of extraction " - "is not too great), but in most cases it is best to " - "provide reasonable initial parameters. Run 'srmise " - "pdf_file.gr [baseline_option] --no-extract --plot' " - "for different values of the parameters for rapid " - "visual estimation.") - group.add_option("--baseline", dest="baseline", metavar="BL", - help="Estimate baseline from baseline function BL " - "defined in diffpy.srmise.baselines, e.g. " - "'Polynomial(degree=1)'. All parameters are free. " - "(Many POSIX shells attempt to interpret the " - "parentheses, and on these shells the option should " - "be surrounded by quotation marks.)" ) - group.add_option("--bcrystal", dest="bcrystal", type="string", - metavar="rho0[c]", - help="Use linear baseline defined by crystal number " - "density rho0. Append 'c' to make parameter " - "constant. Equivalent to " - "'--bpoly1 -4*pi*rho0[c] 0c'.") - group.add_option("--bsrmise", dest="bsrmise", type="string", metavar="file", - help="Use baseline from specified .srmise file.") - group.add_option("--bpoly0", dest="bpoly0", type="string", metavar="a0[c]", - help="Use constant baseline given by y=a0. " - "Append 'c' to make parameter constant.") - group.add_option("--bpoly1", dest="bpoly1", type="string", nargs=2, - metavar="a1[c] a0[c]", - help="Use baseline given by y=a1*x + a0. Append 'c' to " - "make parameter constant.") - group.add_option("--bpoly2", dest="bpoly2", type="string", nargs=3, - metavar="a2[c] a1[c] a0[c]", - help="Use baseline given by y=a2*x^2+a1*x + a0. Append " - "'c' to make parameter constant.") - group.add_option("--bseq", dest="bseq", type="string", metavar="FILE", - help="Use baseline interpolated from x,y values in FILE. " - "This baseline has no free parameters.") - group.add_option("--bspherical", dest="bspherical", type="string", nargs=2, - metavar="s[c] r[c]", - help="Use spherical nanoparticle baseline with scale s " - "and radius r. Append 'c' to make parameter " - "constant.") + parser = OptionParser( + usage=usage, + description=descr, + epilog=epilog, + version=version, + formatter=IndentedHelpFormatterWithNL(), + ) + + parser.set_defaults( + plot=False, + liveplot=False, + wait=False, + performextraction=True, + verbosity="warning", + ) + dg_defaults = { + "absolute": None, + "data": None, + "max-fraction": 0.05, + "ptp-fraction": 0.05, + "dG-fraction": 1.0, + } + + parser.add_option( + "--extract", + action="store_true", + dest="performextraction", + help="[Default] Perform extraction.", + ) + parser.add_option( + "--no-extract", + action="store_false", + dest="performextraction", + help="Do not perform extraction.", + ) + parser.add_option( + "--range", + nargs=2, + dest="rng", + type="float", + metavar="rmin rmax", + help="Extract over the range (rmin, rmax).", + ) + parser.add_option( + "--qmax", + dest="qmax", + type="string", + metavar="QMAX", + help="Model peaks with this maximum q value.", + ) + parser.add_option( + "--nyquist", + action="store_true", + dest="nyquist", + help="Use Nyquist resampling if qmax > 0.", + ) + parser.add_option( + "--no-nyquist", + action="store_false", + dest="nyquist", + help="Do not use Nyquist resampling.", + ) + parser.add_option( + "--pf", + dest="peakfunction", + metavar="PF", + help="Fit peak function PF defined in " "diffpy.srmise.peaks, e.g. " "'GaussianOverR(maxwidth=0.7)'", + ) + parser.add_option( + "--cres", + dest="cres", + type="float", + metavar="cres", + help="Clustering resolution.", + ) + parser.add_option( + "--supersample", + dest="supersample", + type="float", + metavar="SS", + help="Minimum initial oversampling rate as multiple of " "Nyquist rate.", + ) + parser.add_option( + "--me", + "-m", + dest="modelevaluator", + metavar="ME", + help="ModelEvaluator defined in " "diffpy.srmise.modelevaluators, e.g. 'AIC'", + ) + + group = OptionGroup( + parser, + "Baseline Options", + "SrMise cannot determine the appropriate type of " + "baseline (e.g. crystalline vs. some nanoparticle) " + "solely from the data, so the user should specify the " + "appropriate type and/or parameters. (Default is " + "identically 0, which is unphysical.) SrMise keeps the " + "PDF baseline fixed at its initial value until the " + "final stages of peak extraction, so results are " + "frequently conditioned on that choice. (See the " + "SrMise documentation for details.) A good estimate " + "is therefore important for best results. SrMise can " + "estimate initial parameters from the data for linear " + "baselines in some situations (all peaks are positive, " + "and the degree of overlap in the region of extraction " + "is not too great), but in most cases it is best to " + "provide reasonable initial parameters. Run 'srmise " + "pdf_file.gr [baseline_option] --no-extract --plot' " + "for different values of the parameters for rapid " + "visual estimation.", + ) + group.add_option( + "--baseline", + dest="baseline", + metavar="BL", + help="Estimate baseline from baseline function BL " + "defined in diffpy.srmise.baselines, e.g. " + "'Polynomial(degree=1)'. All parameters are free. " + "(Many POSIX shells attempt to interpret the " + "parentheses, and on these shells the option should " + "be surrounded by quotation marks.)", + ) + group.add_option( + "--bcrystal", + dest="bcrystal", + type="string", + metavar="rho0[c]", + help="Use linear baseline defined by crystal number " + "density rho0. Append 'c' to make parameter " + "constant. Equivalent to " + "'--bpoly1 -4*pi*rho0[c] 0c'.", + ) + group.add_option( + "--bsrmise", + dest="bsrmise", + type="string", + metavar="file", + help="Use baseline from specified .srmise file.", + ) + group.add_option( + "--bpoly0", + dest="bpoly0", + type="string", + metavar="a0[c]", + help="Use constant baseline given by y=a0. " "Append 'c' to make parameter constant.", + ) + group.add_option( + "--bpoly1", + dest="bpoly1", + type="string", + nargs=2, + metavar="a1[c] a0[c]", + help="Use baseline given by y=a1*x + a0. Append 'c' to " "make parameter constant.", + ) + group.add_option( + "--bpoly2", + dest="bpoly2", + type="string", + nargs=3, + metavar="a2[c] a1[c] a0[c]", + help="Use baseline given by y=a2*x^2+a1*x + a0. Append " "'c' to make parameter constant.", + ) + group.add_option( + "--bseq", + dest="bseq", + type="string", + metavar="FILE", + help="Use baseline interpolated from x,y values in FILE. " "This baseline has no free parameters.", + ) + group.add_option( + "--bspherical", + dest="bspherical", + type="string", + nargs=2, + metavar="s[c] r[c]", + help="Use spherical nanoparticle baseline with scale s " + "and radius r. Append 'c' to make parameter " + "constant.", + ) parser.add_option_group(group) - - group = OptionGroup(parser, "Uncertainty Options", - "Ideally a PDF reports the accurate experimentally " - "determined uncertainty. In practice, many PDFs " - "report none, while for others the reported values " - "are not necessarily reliable. (If in doubt, ask your " - "friendly neighborhood diffraction expert!) Even when " - "uncertainties are accurate, it can be " - "pragmatically useful to see how the results of " - "peak extraction change when assuming a different " - "value. Nevertheless, the primary determinant of " - "model complexity in SrMise is the uncertainty, so an " - "ad hoc uncertainty yields ad hoc model complexity. " - "See the SrMise documentation for further discussion, " - "including methods to mitigate this issue with " - "multimodel selection.") - group.add_option("--dg-mode", dest="dg_mode", type="choice", - choices=['absolute', 'data', 'max-fraction', - 'ptp-fraction', 'dG-fraction'], - help="Define how values passed to '--dg' are treated. " - "Possible values are: \n" - "'absolute' - The actual uncertainty in the PDF.\n" - "'max-fraction' - Fraction of max value in PDF.\n" - "'ptp-fraction' - Fraction of max minus min value " - "in the PDF.\n" - "'dG-fraction' - Fraction of dG reported by PDF.\n" - "If '--dg' is specified but mode is not, then mode " - "ia absolute. Otherwise, 'dG-fraction' is default " - "if the PDF reports uncertaintes, and 'max-fraction' " - "ia default if it does not.") - group.add_option("--dg", dest="dg", type="float", - help="Perform extraction assuming uncertainty dg. " - "Defaults depend on --dg-mode as follows:\n" - "'absolute'=%s\n" - "'max-fraction'=%s\n" - "'ptp-fraction'=%s\n" - "'dG-fraction'=%s" %(dg_defaults['absolute'], - dg_defaults['max-fraction'], - dg_defaults['ptp-fraction'], - dg_defaults['dG-fraction'])) -# group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", -# metavar="dg_min dg_max n", -# help="Generate n models from dg_min to dg_max (given by " -# "--dg-mode) and perform multimodel analysis. " -# "This overrides any value given for --dg") + group = OptionGroup( + parser, + "Uncertainty Options", + "Ideally a PDF reports the accurate experimentally " + "determined uncertainty. In practice, many PDFs " + "report none, while for others the reported values " + "are not necessarily reliable. (If in doubt, ask your " + "friendly neighborhood diffraction expert!) Even when " + "uncertainties are accurate, it can be " + "pragmatically useful to see how the results of " + "peak extraction change when assuming a different " + "value. Nevertheless, the primary determinant of " + "model complexity in SrMise is the uncertainty, so an " + "ad hoc uncertainty yields ad hoc model complexity. " + "See the SrMise documentation for further discussion, " + "including methods to mitigate this issue with " + "multimodel selection.", + ) + group.add_option( + "--dg-mode", + dest="dg_mode", + type="choice", + choices=["absolute", "data", "max-fraction", "ptp-fraction", "dG-fraction"], + help="Define how values passed to '--dg' are treated. " + "Possible values are: \n" + "'absolute' - The actual uncertainty in the PDF.\n" + "'max-fraction' - Fraction of max value in PDF.\n" + "'ptp-fraction' - Fraction of max minus min value " + "in the PDF.\n" + "'dG-fraction' - Fraction of dG reported by PDF.\n" + "If '--dg' is specified but mode is not, then mode " + "ia absolute. Otherwise, 'dG-fraction' is default " + "if the PDF reports uncertaintes, and 'max-fraction' " + "ia default if it does not.", + ) + group.add_option( + "--dg", + dest="dg", + type="float", + help="Perform extraction assuming uncertainty dg. " + "Defaults depend on --dg-mode as follows:\n" + "'absolute'=%s\n" + "'max-fraction'=%s\n" + "'ptp-fraction'=%s\n" + "'dG-fraction'=%s" + % ( + dg_defaults["absolute"], + dg_defaults["max-fraction"], + dg_defaults["ptp-fraction"], + dg_defaults["dG-fraction"], + ), + ) + # group.add_option("--multimodel", nargs=3, dest="multimodel", type="float", + # metavar="dg_min dg_max n", + # help="Generate n models from dg_min to dg_max (given by " + # "--dg-mode) and perform multimodel analysis. " + # "This overrides any value given for --dg") parser.add_option_group(group) - - group = OptionGroup(parser, "Saving and Plotting Options", - "") - group.add_option("--pwa", dest="pwafile", metavar="FILE", - help="Save summary of result to FILE (.pwa format).") - group.add_option("--save", dest="savefile", metavar="FILE", - help="Save result of extraction to FILE (.srmise " - "format).") - group.add_option("--plot", "-p", action="store_true", dest="plot", - help="Plot extracted peaks.") - group.add_option("--liveplot", "-l", action="store_true", dest="liveplot", - help="(Experimental) Plot extracted peaks when fitting.") - group.add_option("--wait", "-w", action="store_true", dest="wait", - help="(Experimental) When using liveplot wait for user " - "after plotting.") + group = OptionGroup(parser, "Saving and Plotting Options", "") + group.add_option( + "--pwa", + dest="pwafile", + metavar="FILE", + help="Save summary of result to FILE (.pwa format).", + ) + group.add_option( + "--save", + dest="savefile", + metavar="FILE", + help="Save result of extraction to FILE (.srmise " "format).", + ) + group.add_option("--plot", "-p", action="store_true", dest="plot", help="Plot extracted peaks.") + group.add_option( + "--liveplot", + "-l", + action="store_true", + dest="liveplot", + help="(Experimental) Plot extracted peaks when fitting.", + ) + group.add_option( + "--wait", + "-w", + action="store_true", + dest="wait", + help="(Experimental) When using liveplot wait for user " "after plotting.", + ) parser.add_option_group(group) - - group = OptionGroup(parser, "Verbosity Options", - "Control detail printed to console.") - group.add_option("--informative", "-i", action="store_const", const="info", - dest="verbosity", - help="Summary of progress.") - group.add_option("--quiet", "-q", action="store_const", const="warning", - dest="verbosity", - help="[Default] Show minimal summary.") - group.add_option("--silent", "-s", action="store_const", const="critical", - dest="verbosity", - help="No non-critical output.") - group.add_option("--verbose", "-v", action="store_const", const="debug", - dest="verbosity", - help="Show verbose output.") + group = OptionGroup(parser, "Verbosity Options", "Control detail printed to console.") + group.add_option( + "--informative", + "-i", + action="store_const", + const="info", + dest="verbosity", + help="Summary of progress.", + ) + group.add_option( + "--quiet", + "-q", + action="store_const", + const="warning", + dest="verbosity", + help="[Default] Show minimal summary.", + ) + group.add_option( + "--silent", + "-s", + action="store_const", + const="critical", + dest="verbosity", + help="No non-critical output.", + ) + group.add_option( + "--verbose", + "-v", + action="store_const", + const="debug", + dest="verbosity", + help="Show verbose output.", + ) parser.add_option_group(group) - group = OptionGroup(parser, "Deprecated Options", - "Not for general use.") - group.add_option("--scale", action="store_true", dest="scale", - help="(Deprecated) Scale supersampled uncertainties by " - "sqrt(oversampling) in intermediate steps when " - "Nyquist sampling.") - group.add_option("--no-scale", action="store_false", dest="scale", - help="(Deprecated) Never rescale uncertainties.") + group = OptionGroup(parser, "Deprecated Options", "Not for general use.") + group.add_option( + "--scale", + action="store_true", + dest="scale", + help="(Deprecated) Scale supersampled uncertainties by " + "sqrt(oversampling) in intermediate steps when " + "Nyquist sampling.", + ) + group.add_option( + "--no-scale", + action="store_false", + dest="scale", + help="(Deprecated) Never rescale uncertainties.", + ) parser.add_option_group(group) - (options, args) = parser.parse_args() if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) - + parser.error("Exactly one argument required. \n" + usage) from diffpy.srmise import srmiselog + srmiselog.setlevel(options.verbosity) from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError - if options.peakfunction is not None: - from diffpy.srmise import peaks - try: - options.peakfunction = eval("peaks."+options.peakfunction) - except Exception, err: - print err - print "Could not create peak function '%s'. Exiting." \ - %options.peakfunction - return + try: + options.peakfunction = eval("peaks." + options.peakfunction) + except Exception as err: + print(err) + print("Could not create peak function '%s'. Exiting." % options.peakfunction) + return - if options.modelevaluator is not None: - from diffpy.srmise import modelevaluators - try: - options.modelevaluator = \ - eval("modelevaluators."+options.modelevaluator) - except Exception, err: - print err - print "Could not find ModelEvaluator '%s'. Exiting." \ - %options.modelevaluator - return + try: + options.modelevaluator = eval("modelevaluators." + options.modelevaluator) + except Exception as err: + print(err) + print("Could not find ModelEvaluator '%s'. Exiting." % options.modelevaluator) + return if options.bcrystal is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=1) - options.baseline = parsepars(bl, [options.bcrystal, '0c']) - options.baseline.pars[0] = -4*np.pi*options.baseline.pars[0] + options.baseline = parsepars(bl, [options.bcrystal, "0c"]) + options.baseline.pars[0] = -4 * np.pi * options.baseline.pars[0] elif options.bsrmise is not None: # use baseline from existing file blext = PDFPeakExtraction() @@ -306,31 +446,35 @@ def main(): options.baseline = blext.extracted.baseline elif options.bpoly0 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=0) options.baseline = parsepars(bl, [options.bpoly0]) elif options.bpoly1 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=1) options.baseline = parsepars(bl, options.bpoly1) elif options.bpoly2 is not None: from diffpy.srmise.baselines import Polynomial + bl = Polynomial(degree=2) options.baseline = parsepars(bl, options.bpoly2) elif options.bseq is not None: from diffpy.srmise.baselines import FromSequence + bl = FromSequence(options.bseq) options.baseline = bl.actualize([], "internal") elif options.bspherical is not None: from diffpy.srmise.baselines import NanoSpherical + bl = NanoSpherical() options.baseline = parsepars(bl, options.bspherical) - elif options.baseline is not None: - from diffpy.srmise import baselines + try: - options.baseline = eval("baselines."+options.baseline) - except Exception, err: - print err - print "Could not create baseline '%s'. Exiting." %options.baseline + options.baseline = eval("baselines." + options.baseline) + except Exception as err: + print(err) + print("Could not create baseline '%s'. Exiting." % options.baseline) return filename = args[0] @@ -359,13 +503,13 @@ def main(): if options.dg is None: options.dg = dg_defaults[options.dg_mode] if options.dg_mode == "absolute": - pdict["effective_dy"] = options.dg*np.ones(len(ext.x)) + pdict["effective_dy"] = options.dg * np.ones(len(ext.x)) elif options.dg_mode == "max-fraction": - pdict["effective_dy"] = options.dg*ext.y.max()*np.ones(len(ext.x)) + pdict["effective_dy"] = options.dg * ext.y.max() * np.ones(len(ext.x)) elif options.dg_mode == "ptp-fraction": - pdict["effective_dy"] = options.dg*ext.y.ptp()*np.ones(len(ext.y)) + pdict["effective_dy"] = options.dg * ext.y.ptp() * np.ones(len(ext.y)) elif options.dg_mode == "dG-fraction": - pdict["effective_dy"] = options.dg*ext.dy + pdict["effective_dy"] = options.dg * ext.dy if options.rng is not None: pdict["rng"] = list(options.rng) if options.qmax is not None: @@ -381,41 +525,41 @@ def main(): if options.liveplot: from diffpy.srmise import srmiselog + srmiselog.liveplotting(True, options.wait) ext.setvars(**pdict) cov = None if options.performextraction: cov = ext.extract() - out = ext.extracted if options.savefile is not None: try: ext.write(options.savefile) - except SrMiseFileError, err: - print err - print "Could not save result to '%s'." %options.savefile - + except SrMiseFileError as err: + print(err) + print("Could not save result to '%s'." % options.savefile) if options.pwafile is not None: try: ext.writepwa(options.pwafile) - except SrMiseFileError, err: - print err - print "Could not save pwa summary to '%s'." %options.pwafile - + except SrMiseFileError as err: + print(err) + print("Could not save pwa summary to '%s'." % options.pwafile) - print ext + print(ext) if cov is not None: - print cov + print(cov) if options.plot: from diffpy.srmise.applications.plot import makeplot + makeplot(ext) plt.show() elif options.liveplot: plt.show() + def parsepars(mp, parseq): """Return actualized model from sequence of strings. @@ -430,7 +574,7 @@ def parsepars(mp, parseq): pars = [] free = [] for p in parseq: - if p[-1] == 'c': + if p[-1] == "c": pars.append(float(p[0:-1])) free.append(False) else: @@ -439,70 +583,65 @@ def parsepars(mp, parseq): return mp.actualize(pars, "internal", free=free) -### Class to preserve newlines in optparse +# Class to preserve newlines in optparse # Borrowed, with minor changes, from # http://groups.google.com/group/comp.lang.python/browse_frm/thread/6df6e6b541a15bc2/09f28e26af0699b1 -import textwrap -from optparse import IndentedHelpFormatter - class IndentedHelpFormatterWithNL(IndentedHelpFormatter): - def _format_text(self, text): - if not text: return "" - text_width = self.width - self.current_indent - indent = " "*self.current_indent -# the above is still the same - bits = text.split('\n') - formatted_bits = [ - textwrap.fill(bit, - text_width, - initial_indent=indent, - subsequent_indent=indent) - for bit in bits] - result = "\n".join(formatted_bits) + "\n" - return result - - def format_option(self, option): - # The help for each option consists of two parts: - # * the opt strings and metavars - # eg. ("-x", or "-fFILENAME, --file=FILENAME") - # * the user-supplied help string - # eg. ("turn on expert mode", "read data from FILENAME") - # - # If possible, we write both of these on the same line: - # -x turn on expert mode - # - # But if the opt string list is too long, we put the help - # string on a second line, indented to the same column it would - # start in if it fit on the first line. - # -fFILENAME, --file=FILENAME - # read data from FILENAME - result = [] - opts = self.option_strings[option] - opt_width = self.help_position - self.current_indent - 2 - if len(opts) > opt_width: - opts = "%*s%s\n" % (self.current_indent, "", opts) - indent_first = self.help_position - else: # start help on same line as opts - opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) - indent_first = 0 - result.append(opts) - if option.help: - help_text = self.expand_default(option) -# Everything is the same up through here - help_lines = [] - for para in help_text.split("\n"): - help_lines.extend(textwrap.wrap(para, self.help_width)) -# Everything is the same after here - result.append("%*s%s\n" % ( - indent_first, "", help_lines[0])) - result.extend(["%*s%s\n" % (self.help_position, "", line) - for line in help_lines[1:]]) - elif opts[-1] != "\n": - result.append("\n") - return "".join(result) -### End class + def _format_text(self, text): + if not text: + return "" + text_width = self.width - self.current_indent + indent = " " * self.current_indent + # the above is still the same + bits = text.split("\n") + formatted_bits = [ + textwrap.fill(bit, text_width, initial_indent=indent, subsequent_indent=indent) for bit in bits + ] + result = "\n".join(formatted_bits) + "\n" + return result + + def format_option(self, option): + # The help for each option consists of two parts: + # * the opt strings and metavars + # eg. ("-x", or "-fFILENAME, --file=FILENAME") + # * the user-supplied help string + # eg. ("turn on expert mode", "read data from FILENAME") + # + # If possible, we write both of these on the same line: + # -x turn on expert mode + # + # But if the opt string list is too long, we put the help + # string on a second line, indented to the same column it would + # start in if it fit on the first line. + # -fFILENAME, --file=FILENAME + # read data from FILENAME + result = [] + opts = self.option_strings[option] + opt_width = self.help_position - self.current_indent - 2 + if len(opts) > opt_width: + opts = "%*s%s\n" % (self.current_indent, "", opts) + indent_first = self.help_position + else: # start help on same line as opts + opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) + indent_first = 0 + result.append(opts) + if option.help: + help_text = self.expand_default(option) + # Everything is the same up through here + help_lines = [] + for para in help_text.split("\n"): + help_lines.extend(textwrap.wrap(para, self.help_width)) + # Everything is the same after here + result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + result.extend(["%*s%s\n" % (self.help_position, "", line) for line in help_lines[1:]]) + elif opts[-1] != "\n": + result.append("\n") + return "".join(result) + + +# End class if __name__ == "__main__": main() diff --git a/diffpy/srmise/applications/plot.py b/diffpy/srmise/applications/plot.py index b696aa8..5dc59e1 100755 --- a/diffpy/srmise/applications/plot.py +++ b/diffpy/srmise/applications/plot.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -18,7 +19,7 @@ import matplotlib.pyplot as plt import mpl_toolkits.axisartist as AA import numpy as np -from mpl_toolkits.axes_grid1 import make_axes_locatable +from matplotlib.ticker import MultipleLocator from mpl_toolkits.axes_grid1.inset_locator import inset_axes from diffpy.srmise import PDFPeakExtraction, PeakStability @@ -27,49 +28,63 @@ # For a given figure, returns a label of interest labeldict = {} -default_gobs_style = {'color' : 'b', 'linestyle' : '', - 'markeredgecolor' : 'b', 'marker' : 'o', - 'markerfacecolor' : 'none', 'markersize' : 4} - -default_gfit_style = {'color' : 'g'} -default_gind_style = {'facecolor' : 'green', 'alpha' : 0.2} +default_gobs_style = { + "color": "b", + "linestyle": "", + "markeredgecolor": "b", + "marker": "o", + "markerfacecolor": "none", + "markersize": 4, +} + +default_gfit_style = {"color": "g"} +default_gind_style = {"facecolor": "green", "alpha": 0.2} default_gres_style = {} default_ep_style = {} default_ip_style = {} -default_dg_style = {'linestyle' : 'none', 'color' : 'black', - 'marker' : 'o', 'markerfacecolor' : 'black', - 'markeredgecolor' : 'black', - 'markersize' : 1, 'antialiased': False} +default_dg_style = { + "linestyle": "none", + "color": "black", + "marker": "o", + "markerfacecolor": "black", + "markeredgecolor": "black", + "markersize": 1, + "antialiased": False, +} def setfigformat(figsize): from matplotlib import rc - rc('legend', numpoints=2) - rc('figure', figsize=figsize) - rc('axes', titlesize=12, labelsize=11) - rc('xtick', labelsize=10) - rc('ytick', labelsize=10) - rc('lines', linewidth=0.75, markeredgewidth=0.5) + + rc("legend", numpoints=2) + rc("figure", figsize=figsize) + rc("axes", titlesize=12, labelsize=11) + rc("xtick", labelsize=10) + rc("ytick", labelsize=10) + rc("lines", linewidth=0.75, markeredgewidth=0.5) return + def gr_formataxis(ax=None): - if ax is None: ax = plt.gca() + if ax is None: + ax = plt.gca() ax.xaxis.set_minor_locator(MultipleLocator(1)) - ax.yaxis.set_label_position('left') + ax.yaxis.set_label_position("left") ax.yaxis.tick_left() - ax.yaxis.set_ticks_position('both') + ax.yaxis.set_ticks_position("both") return + def comparepositions(ppe, ip=None, **kwds): ax = kwds.get("ax", plt.gca()) - base = kwds.get("base", 0.) - yideal = kwds.get("yideal", -1.) - yext = kwds.get("yext", 1.) + base = kwds.get("base", 0.0) + yideal = kwds.get("yideal", -1.0) + yext = kwds.get("yext", 1.0) ep_style = kwds.get("ep_style", default_ep_style) ip_style = kwds.get("ip_style", default_ip_style) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") pmin = kwds.get("pmin", -np.inf) pmax = kwds.get("pmax", np.inf) @@ -77,39 +92,38 @@ def comparepositions(ppe, ip=None, **kwds): ep = [p for p in ep if p >= pmin and p <= pmax] if ip is not None: - xi = np.NaN + np.zeros(3*len(ip)) + xi = np.NaN + np.zeros(3 * len(ip)) xi[0::3] = ip xi[1::3] = ip yi = np.zeros_like(xi) + base yi[1::3] += yideal - plt.plot(xi, yi, 'b', lw=1.5, **ip_style) + plt.plot(xi, yi, "b", lw=1.5, **ip_style) - xe = np.NaN + np.zeros(3*len(ep)) + xe = np.NaN + np.zeros(3 * len(ep)) xe[0::3] = ep xe[1::3] = ep ye = np.zeros_like(xe) + base ye[1::3] += yext - plt.plot(xe, ye, 'g', lw=1.5, **ep_style) + plt.plot(xe, ye, "g", lw=1.5, **ep_style) if ip is not None: - yb = (base, base) - plt.axhline(base, linestyle=":", color="k" ) - ax.yaxis.set_ticks([base+.5*yideal, base+.5*yext]) + plt.axhline(base, linestyle=":", color="k") + ax.yaxis.set_ticks([base + 0.5 * yideal, base + 0.5 * yext]) ax.yaxis.set_ticklabels([yideal_label, yext_label]) else: - ax.yaxis.set_ticks([base+.5*yext]) + ax.yaxis.set_ticks([base + 0.5 * yext]) ax.yaxis.set_ticklabels([yext_label]) # Set ylim explicitly, for case where yext is empty. if ip is not None: - plt.ylim(base+yideal, base+yext) + plt.ylim(base + yideal, base + yext) else: - plt.ylim(base, base+yext) + plt.ylim(base, base + yext) for tick in ax.yaxis.get_major_ticks(): tick.tick1line.set_markersize(0) tick.tick2line.set_markersize(0) - tick.label1.set_verticalalignment('center') + tick.label1.set_verticalalignment("center") tick.label1.set_fontsize(8) ticks = ax.yaxis.get_major_ticks() ticks[-1].label1.set_color("green") @@ -117,37 +131,45 @@ def comparepositions(ppe, ip=None, **kwds): ticks[0].label1.set_color("blue") return + def dgseries(stability, **kwds): - ax = kwds.get("ax", plt.gca()) + kwds.get("ax", plt.gca()) dg_style = kwds.get("dg_style", default_dg_style) - scale = kwds.get("scale", 1.) + scale = kwds.get("scale", 1.0) - dgmin = kwds.get("dgmin", stability.results[0][0])*scale - dgmax = kwds.get("dgmax", stability.results[-1][0])*scale + dgmin = kwds.get("dgmin", stability.results[0][0]) * scale + dgmax = kwds.get("dgmax", stability.results[-1][0]) * scale - pmin = kwds.get("pmin", 0.) + pmin = kwds.get("pmin", 0.0) pmax = kwds.get("pmax", np.inf) x = [] y = [] for dg, peaks, bl, dr in stability.results: - if dg*scale < dgmin or dg*scale > dgmax: + if dg * scale < dgmin or dg * scale > dgmax: continue peakpos = [p["position"] for p in peaks] peakpos = [p for p in peakpos if p >= pmin and p <= pmax] x.extend(peakpos) - y.extend(np.zeros_like(peakpos) + dg*scale) + y.extend(np.zeros_like(peakpos) + dg * scale) plt.plot(x, y, **dg_style) + def labelallsubplots(): rv = [] - for i, c in enumerate('abcd'): + for i, c in enumerate("abcd"): plt.subplot(221 + i) s = "(%s)" % c - ht = plt.text(0.04, 0.95, s, - horizontalalignment='left', verticalalignment='top', - transform=gca().transAxes, weight='bold') + ht = plt.text( + 0.04, + 0.95, + s, + horizontalalignment="left", + verticalalignment="top", + transform=plt.gca().transAxes, + weight="bold", + ) rv.append(ht) return rv @@ -164,14 +186,14 @@ def makeplot(ppe_or_stability, ip=None, **kwds): if ppe.extracted is None: # Makeplot requires a ModelCluster, so whip one up. from diffpy.srmise import ModelCluster - ppe.defaultvars() # Make sure everything has some setting. This - # shouldn't have harmful side effects. + + ppe.defaultvars() # Make sure everything has some setting. This + # shouldn't have harmful side effects. rangeslice = ppe.getrangeslice() x = ppe.x[rangeslice] y = ppe.y[rangeslice] dy = ppe.effective_dy[rangeslice] - mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, \ - dy, None, ppe.error_method, ppe.pf) + mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, dy, None, ppe.error_method, ppe.pf) ext = mcluster else: ext = ppe.extracted @@ -189,17 +211,17 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Define heights and interstitial offsets # All values in percent of main axis. - top_offset = kwds.get("top_offset", 0.) - dg_height = kwds.get("dg_height", 15. if stability is not None else 0.) - cmp_height = kwds.get("cmp_height", 15. if ip is not None else 7.5) - datatop_offset = kwds.get("datatop_offset", 3.) + top_offset = kwds.get("top_offset", 0.0) + dg_height = kwds.get("dg_height", 15.0 if stability is not None else 0.0) + cmp_height = kwds.get("cmp_height", 15.0 if ip is not None else 7.5) + datatop_offset = kwds.get("datatop_offset", 3.0) # <- Data appears here -> - databottom_offset = kwds.get("databottom_offset", 3.) + databottom_offset = kwds.get("databottom_offset", 3.0) # <- Residual appears here -> - bottom_offset = kwds.get("bottom_offset", 3.) + bottom_offset = kwds.get("bottom_offset", 3.0) # Style options - dg_style = kwds.get("dg_style", default_dg_style) + kwds.get("dg_style", default_dg_style) gobs_style = kwds.get("gobs_style", default_gobs_style) gfit_style = kwds.get("gfit_style", default_gfit_style) gind_style = kwds.get("gind_style", default_gind_style) @@ -208,83 +230,81 @@ def makeplot(ppe_or_stability, ip=None, **kwds): ip_style = kwds.get("ip_style", default_ip_style) # Label options - userxlabel = kwds.get("xlabel", r'r ($\mathrm{\AA}$)') - userylabel = kwds.get("ylabel", r'G ($\mathrm{\AA^{-2}}$)') - datalabelx = kwds.get("datalabelx", .04) - yideal_label = kwds.get("yideal_label", r'ideal') - yext_label = kwds.get("yext_label", r'found') + userxlabel = kwds.get("xlabel", r"r ($\mathrm{\AA}$)") + userylabel = kwds.get("ylabel", r"G ($\mathrm{\AA^{-2}}$)") + datalabelx = kwds.get("datalabelx", 0.04) + yideal_label = kwds.get("yideal_label", r"ideal") + yext_label = kwds.get("yext_label", r"found") # Other options datalabel = kwds.get("datalabel", None) - dgformatstr = kwds.get("dgformatstr", r'$\delta$g=%f') - dgformatpost = kwds.get("dgformatpost", None) #->userfunction(string) + dgformatstr = kwds.get("dgformatstr", r"$\delta$g=%f") + kwds.get("dgformatpost", None) # ->userfunction(string) show_fit = kwds.get("show_fit", True) show_individual = kwds.get("show_individual", True) fill_individual = kwds.get("fill_individual", True) show_observed = kwds.get("show_observed", True) show_residual = kwds.get("show_residual", True) - mask_residual = kwds.get("mask_residual", False) #-> number + mask_residual = kwds.get("mask_residual", False) # -> number show_annotation = kwds.get("show_annotation", True) - scale = kwds.get("scale", 1.) # Apply a global scaling factor to the data - - + scale = kwds.get("scale", 1.0) # Apply a global scaling factor to the data # Define the various data which will be plotted r = ext.r_cluster - dr = (r[-1]-r[0])/len(r) - rexpand = np.concatenate((np.arange(r[0]-dr, xlo, -dr)[::-1], r, np.arange(r[-1]+dr, xhi+dr, dr))) - rfine = np.arange(r[0], r[-1], .1*dr) - gr_obs = np.array(resample(ppe.x, ppe.y, rexpand))*scale - #gr_fit = resample(r, ext.value(), rfine) - gr_fit = np.array(ext.value(rfine))*scale - gr_fit_baseline = np.array(ext.valuebl(rfine))*scale - gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine))*scale for p in ext.model] - gr_res = np.array(ext.residual())*scale + dr = (r[-1] - r[0]) / len(r) + rexpand = np.concatenate((np.arange(r[0] - dr, xlo, -dr)[::-1], r, np.arange(r[-1] + dr, xhi + dr, dr))) + rfine = np.arange(r[0], r[-1], 0.1 * dr) + gr_obs = np.array(resample(ppe.x, ppe.y, rexpand)) * scale + # gr_fit = resample(r, ext.value(), rfine) + gr_fit = np.array(ext.value(rfine)) * scale + gr_fit_baseline = np.array(ext.valuebl(rfine)) * scale + gr_fit_ind = [gr_fit_baseline + np.array(p.value(rfine)) * scale for p in ext.model] + gr_res = np.array(ext.residual()) * scale if mask_residual: gr_res = np.ma.masked_outside(gr_res, -mask_residual, mask_residual) all_gr = [] - if show_fit: all_gr.append(gr_fit) - #if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) + if show_fit: + all_gr.append(gr_fit) + # if show_individual: all_gr.extend([gr_fit_baseline, gr_fit_ind]) if show_individual: all_gr.append(gr_fit_baseline) if len(gr_fit_ind) > 0: all_gr.extend(gr_fit_ind) - if show_observed: all_gr.append(gr_obs) + if show_observed: + all_gr.append(gr_obs) # gr_fit_ind is a list of lists, so use np.min/max # The funky bit with scale makes sure that a user-specified value # has scale applied to it, without messing up the default values, # which are calculated from already scaled quantities. - min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr])/scale)*scale - max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr])/scale)*scale - + min_gr = kwds.get("min_gr", np.min([np.min(gr) for gr in all_gr]) / scale) * scale + max_gr = kwds.get("max_gr", np.max([np.max(gr) for gr in all_gr]) / scale) * scale if show_residual: min_res = np.min(gr_res) max_res = np.max(gr_res) else: - min_res = 0. - max_res = 0. + min_res = 0.0 + max_res = 0.0 # Derive various y limits based on all the offsets - rel_height = 100. - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset - abs_height = 100*((max_gr - min_gr) + (max_res - min_res))/rel_height + rel_height = 100.0 - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset + abs_height = 100 * ((max_gr - min_gr) + (max_res - min_res)) / rel_height - yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset)*abs_height/100 + yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset) * abs_height / 100 ylo = yhi - abs_height yhi = kwds.get("yhi", yhi) ylo = kwds.get("ylo", ylo) - datatop = yhi - (yhi-ylo)*.01*(top_offset + dg_height + cmp_height) - datalabeltop = 1 - .01*(top_offset + dg_height + cmp_height + datatop_offset) - resbase = ylo + bottom_offset*abs_height/100 - min_res + datatop = yhi - (yhi - ylo) * 0.01 * (top_offset + dg_height + cmp_height) + datalabeltop = 1 - 0.01 * (top_offset + dg_height + cmp_height + datatop_offset) + resbase = ylo + bottom_offset * abs_height / 100 - min_res resbase = kwds.get("resbase", resbase) - fig = kwds.get("figure", plt.gcf()) fig.clf() ax_data = AA.Subplot(fig, 111) @@ -301,8 +321,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): for peak in gr_fit_ind: plt.fill_between(rfine, gr_fit_baseline, peak, **gind_style) if show_residual: - plt.plot(r, gr_res + resbase, 'r-', **gres_style) - plt.plot((xlo, xhi), 2*[resbase], 'k:') + plt.plot(r, gr_res + resbase, "r-", **gres_style) + plt.plot((xlo, xhi), 2 * [resbase], "k:") # Format ax_data plt.xlim(xlo, xhi) @@ -310,27 +330,33 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlabel(userxlabel) plt.ylabel(userylabel) ax_data.xaxis.set_minor_locator(plt.MultipleLocator(1)) - #ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) - ax_data.yaxis.set_label_position('left') + # ax_data.yaxis.set_minor_locator(plt.MultipleLocator(np.max([1,int((yhi-ylo)/20)]))) + ax_data.yaxis.set_label_position("left") ax_data.yaxis.tick_left() - ax_data.yaxis.set_ticks_position('both') + ax_data.yaxis.set_ticks_position("both") # Remove labels above where insets begin - #ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) + # ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) ax_data.yaxis.set_ticks([loc for loc in ax_data.yaxis.get_majorticklocs() if (loc < datatop and loc >= ylo)]) - # Dataset label if datalabel is not None: - dl = plt.text(datalabelx, datalabeltop, datalabel, ha='left', va='top', - transform=ax_data.transAxes, weight='bold') + dl = plt.text( + datalabelx, + datalabeltop, + datalabel, + ha="left", + va="top", + transform=ax_data.transAxes, + weight="bold", + ) else: dl = None figdict["datalabel"] = dl # Create new x axis at bottom edge of compare inset ax_data.axis["top"].set_visible(False) - ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? + ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? ax_data.axis["newtop"].toggle(all=False, ticks=True) ax_data.axis["newtop"].major_ticks.set_tick_out(True) ax_data.axis["newtop"].minor_ticks.set_tick_out(True) @@ -339,37 +365,55 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # The original label is invisible, but we use its (dynamic) x position # to update the new label, which we define have the correct y position. # A bit of a tradeoff for the nice insets and ability to define new axes. - newylabel = plt.text(-.1, .5*(datatop-ylo)/(yhi-ylo), userylabel, - ha='center', va='center', rotation='vertical', transform=ax_data.transAxes) - labeldict[fig] = newylabel # so we can find the correct text object - fig.canvas.mpl_connect('draw_event', on_draw) # original label invisibility and updating + newylabel = plt.text( + -0.1, + 0.5 * (datatop - ylo) / (yhi - ylo), + userylabel, + ha="center", + va="center", + rotation="vertical", + transform=ax_data.transAxes, + ) + labeldict[fig] = newylabel # so we can find the correct text object + fig.canvas.mpl_connect("draw_event", on_draw) # original label invisibility and updating # Compare extracted (and ideal, if provided) peak positions clearly. if cmp_height > 0: - ax_cmp = inset_axes(ax_data, - width="100%", - height="%s%%" %cmp_height, - loc=2, - bbox_to_anchor=(0., -.01*(top_offset+dg_height), 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_cmp = inset_axes( + ax_data, + width="100%", + height="%s%%" % cmp_height, + loc=2, + bbox_to_anchor=(0.0, -0.01 * (top_offset + dg_height), 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["cmp"] = ax_cmp plt.axes(ax_cmp) - comparepositions(ext, ip, ep_style=ep_style, ip_style=ip_style, yideal_label=yideal_label, yext_label=yext_label) + comparepositions( + ext, + ip, + ep_style=ep_style, + ip_style=ip_style, + yideal_label=yideal_label, + yext_label=yext_label, + ) plt.xlim(xlo, xhi) ax_cmp.set_xticks([]) # Show how extracted peak positions change as dg is changed if dg_height > 0: - ax_dg = inset_axes(ax_data, - width="100%", - height="%s%%" %dg_height, - loc=2, - bbox_to_anchor=(0, -.01*top_offset, 1, 1), - bbox_transform=ax_data.transAxes, - borderpad=0) + ax_dg = inset_axes( + ax_data, + width="100%", + height="%s%%" % dg_height, + loc=2, + bbox_to_anchor=(0, -0.01 * top_offset, 1, 1), + bbox_transform=ax_data.transAxes, + borderpad=0, + ) figdict["dg"] = ax_dg plt.axes(ax_dg) @@ -384,31 +428,42 @@ def makeplot(ppe_or_stability, ip=None, **kwds): plt.xlim(xlo, xhi) ax_dg.xaxis.set_major_locator(plt.NullLocator()) ax_dg.yaxis.set_major_locator(plt.MaxNLocator(3)) - plt.ylabel(r'$\delta$g') + plt.ylabel(r"$\delta$g") # Annotate the actual dg shown if show_annotation: - dg = np.mean(ext.error_cluster)*scale - dgstr = dgformatstr %dg - if "dgformatpost" in kwds: #post-processing on dg annotation + dg = np.mean(ext.error_cluster) * scale + dgstr = dgformatstr % dg + if "dgformatpost" in kwds: # post-processing on dg annotation dgstr = kwds["dgformatpost"](dgstr) if len(ext.model) > 0: - xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. + xpos = np.mean([xlo, ext.model[0]["position"]]) # OK for now. else: - xpos = xlo + .1*(xhi-xlo) + xpos = xlo + 0.1 * (xhi - xlo) if dg_height > 0 and cmp_height > 0: # Arrow, text in compare distances line ylo2, yhi2 = ax_dg.get_ylim() if ip is not None: - ypos = ylo2 - .25*cmp_height/dg_height*(yhi2-ylo2) + ypos = ylo2 - 0.25 * cmp_height / dg_height * (yhi2 - ylo2) else: - ypos = ylo2 - .5*cmp_height/dg_height*(yhi2-ylo2) - plt.annotate(dgstr, xy=(xlo, dg), xycoords='data', va='center', ha='center', - xytext=(xpos,ypos), textcoords='data', size=8, color='green', - arrowprops=dict(arrowstyle="->", - connectionstyle="angle,angleA=90,angleB=0,rad=10", - color="green")) + ypos = ylo2 - 0.5 * cmp_height / dg_height * (yhi2 - ylo2) + plt.annotate( + dgstr, + xy=(xlo, dg), + xycoords="data", + va="center", + ha="center", + xytext=(xpos, ypos), + textcoords="data", + size=8, + color="green", + arrowprops=dict( + arrowstyle="->", + connectionstyle="angle,angleA=90,angleB=0,rad=10", + color="green", + ), + ) elif dg_height > 0: # Arrow, and text located somewhere in main plot region @@ -419,8 +474,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Must change axes plt.axes(ax_cmp) ylo2, yhi2 = ax_cmp.get_ylim() - ypos = yhi2/2. - plt.text(xpos, ypos, dgstr, va='center', ha='center', size=8, color='green') + ypos = yhi2 / 2.0 + plt.text(xpos, ypos, dgstr, va="center", ha="center", size=8, color="green") else: # Text only in main plot region # Must change axes @@ -439,6 +494,8 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # invisiblelabel must be temporarily made visible to update # its values. _lastxpos = {} + + def on_draw(event): global _lastxpos fig = event.canvas.figure @@ -458,7 +515,7 @@ def on_draw(event): # If it is kept visible the whole time this problem doesn't occur. # This problem doesn't occur onscreen (TkAgg) or printing PDFs, and # didn't occur in matplotlib 1.0.0. - if abs(xpos - _lastxpos.get(fig, 0)) > .001: + if abs(xpos - _lastxpos.get(fig, 0)) > 0.001: _lastxpos[fig] = xpos plt.draw() else: @@ -466,33 +523,34 @@ def on_draw(event): invisiblelabel.set_visible(False) xpos_old = visiblelabel.get_position()[0] - if abs(xpos - xpos_old) > .001: + if abs(xpos - xpos_old) > 0.001: labeldict[fig].set_x(xpos) plt.draw() return + def readcompare(filename): """Returns a list of distances read from filename, otherwise None.""" - from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError # TODO: Make this safer try: - datastring = open(filename,'rb').read() - except Exception, err: + datastring = open(filename, "rb").read() + except Exception as err: raise err import re - res = re.search(r'^[^#]', datastring, re.M) + + res = re.search(r"^[^#]", datastring, re.M) if res: - datastring = datastring[res.end():].strip() + datastring = datastring[res.end() :].strip() distances = [] try: for line in datastring.split("\n"): distances.append(float(line)) - except (ValueError, IndexError), err: - print "Could not read distances from '%s'. Ignoring file." %filename + except (ValueError, IndexError): + print("Could not read distances from '%s'. Ignoring file." % filename) if len(distances) == 0: return None @@ -502,31 +560,37 @@ def readcompare(filename): def main(): # configure options parsing - usage = ("%prog srmise_file [options]\n" - "srmise_file can be an extraction file saved by SrMise, " - "or a data file saved by PeakStability.") - descr = ("A very basic tool for somewhat prettier plotting than provided by " - "the basic SrMise classes. Can be used to compare peak positions " - "with those from a list.\n" - "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction.") + usage = ( + "%prog srmise_file [options]\n" + "srmise_file can be an extraction file saved by SrMise, " + "or a data file saved by PeakStability." + ) + descr = ( + "A very basic tool for somewhat prettier plotting than provided by " + "the basic SrMise classes. Can be used to compare peak positions " + "with those from a list.\n" + "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction." + ) parser = optparse.OptionParser(usage=usage, description=descr) - parser.add_option("--compare", type="string", - help="Compare extracted distances to distances listed (1/line) in this file.") - parser.add_option("--model", type="int", - help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.") - parser.add_option("--show", action="store_true", - help="execute pylab.show() blocking call") - parser.add_option("-o", "--output", type="string", - help="save plot to the specified file") - parser.add_option("--format", type="string", default="eps", - help="output format for plot saving") + parser.add_option( + "--compare", + type="string", + help="Compare extracted distances to distances listed (1/line) in this file.", + ) + parser.add_option( + "--model", + type="int", + help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.", + ) + parser.add_option("--show", action="store_true", help="execute pylab.show() blocking call") + parser.add_option("-o", "--output", type="string", help="save plot to the specified file") + parser.add_option("--format", type="string", default="eps", help="output format for plot saving") parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) - if len(args) != 1: - parser.error("Exactly one argument required. \n"+usage) + parser.error("Exactly one argument required. \n" + usage) filename = args[0] @@ -534,27 +598,27 @@ def main(): toplot = PDFPeakExtraction() try: toplot.read(filename) - except (Exception): + except Exception: toplot = PeakStability() try: toplot.load(filename) except Exception: - print "File '%s' is not a .srmise or PeakStability data file." %filename + print("File '%s' is not a .srmise or PeakStability data file." % filename) return if opts.model is not None: try: toplot.setcurrent(opts.model) - except (Exception): - print "Ignoring model, %s is not a PeakStability file." %filename + except Exception: + print("Ignoring model, %s is not a PeakStability file." % filename) distances = None if opts.compare is not None: # use baseline from existing file distances = readcompare(opts.compare) - setfigformat(figsize=(6., 4.0)) - figdict = makeplot(toplot, distances) + setfigformat(figsize=(6.0, 4.0)) + makeplot(toplot, distances) if opts.output: plt.savefig(opts.output, format=opts.format, dpi=600) if opts.show: @@ -564,5 +628,5 @@ def main(): return -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/diffpy/srmise/basefunction.py b/diffpy/srmise/basefunction.py index 18a55e4..d5dd220 100644 --- a/diffpy/srmise/basefunction.py +++ b/diffpy/srmise/basefunction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -17,12 +18,13 @@ import sys import numpy as np +from numpy.compat import unicode -from diffpy.srmise.modelparts import ModelPart, ModelParts -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError logger = logging.getLogger("diffpy.srmise") + class BaseFunction(object): """Base class for mathematical functions which model numeric sequences. @@ -61,7 +63,15 @@ class BaseFunction(object): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass Parameters @@ -96,23 +106,22 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No vals = self.parameterdict.values() vals.sort() if vals != range(self.npars): - emsg = "Argument parameterdict's values must uniquely specify "+\ - "the index of each parameter defined by its keys." + emsg = ( + "Argument parameterdict's values must uniquely specify " + + "the index of each parameter defined by its keys." + ) raise ValueError(emsg) self.parformats = parformats # Check validity of default_formats self.default_formats = default_formats - if not ("default_input" in self.default_formats and - "default_output" in self.default_formats): - emsg = "Argument default_formats must specify 'default_input' "+\ - "and 'default_output' as keys." + if not ("default_input" in self.default_formats and "default_output" in self.default_formats): + emsg = "Argument default_formats must specify 'default_input' " + "and 'default_output' as keys." raise ValueError(emsg) for f in self.default_formats.values(): - if not f in self.parformats: - emsg = "Keys of argument default_formats must map to a "+\ - "value within argument parformats." + if f not in self.parformats: + emsg = "Keys of argument default_formats must map to a " + "value within argument parformats." raise ValueError() # Set metadictionary @@ -126,13 +135,12 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No # of PeakFunction. # Object to cache: (basefunctioninstance, tuple of parameters) if Cache is not None: - #self.value = Cache(self.value, "value") - #self.jacobian = Cache(self.jacobian, "jacobian") + # self.value = Cache(self.value, "value") + # self.jacobian = Cache(self.jacobian, "jacobian") pass return - - #### "Virtual" class methods #### + # "Virtual" class methods #### def actualize(self, *args, **kwds): """Create ModelPart instance of self with given parameters. ("Virtual" method)""" @@ -164,8 +172,7 @@ def _valueraw(self, *args, **kwds): emsg = "_valueraw must() be implemented in a BaseFunction subclass." raise NotImplementedError(emsg) - - #### Class methods #### + # Class methods #### def jacobian(self, p, r, rng=None): """Calculate jacobian of p, possibly restricted by range. @@ -179,8 +186,7 @@ def jacobian(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -192,7 +198,7 @@ def jacobian(self, p, r, rng=None): output = [None for j in jac] for idx in range(len(output)): if jac[idx] is not None: - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx] return output except TypeError: @@ -201,10 +207,10 @@ def jacobian(self, p, r, rng=None): def transform_derivatives(self, pars, in_format=None, out_format=None): """Return gradient matrix for pars converted from in_format to out_format. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -222,12 +228,10 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): elif out_format == "default_input": out_format = self.default_formats["default_input"] - if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + if in_format not in self.parformats: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + if out_format not in self.parformats: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) if in_format == out_format: return np.identity(self.npars) return self._transform_derivativesraw(pars, in_format=in_format, out_format=out_format) @@ -235,13 +239,13 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): def transform_parameters(self, pars, in_format=None, out_format=None): """Return new sequence with pars converted from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Parameters - pars - Sequence of parameters - in_format - A format defined for this class - out_format - A format defined for this class + Parameters + pars - Sequence of parameters + in_format - A format defined for this class + out_format - A format defined for this class """ # Map unspecified formats to specific formats defined in default_formats if in_format is None: @@ -259,17 +263,14 @@ def transform_parameters(self, pars, in_format=None, out_format=None): elif out_format == "default_input": out_format = self.default_formats["default_input"] - if not in_format in self.parformats: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) - if not out_format in self.parformats: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) - #if in_format == out_format: + if in_format not in self.parformats: + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) + if out_format not in self.parformats: + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) + # if in_format == out_format: # return pars return self._transform_parametersraw(pars, in_format=in_format, out_format=out_format) - def value(self, p, r, rng=None): """Calculate value of ModelPart over r, possibly restricted by range. @@ -282,8 +283,7 @@ def value(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = "Argument 'p' must be evaluated by the BaseFunction "+\ - "subclass which owns it." + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -291,7 +291,7 @@ def value(self, p, r, rng=None): if rng is None: rng = slice(0, len(r)) rpart = r[rng] - output = r * 0. + output = r * 0.0 output[rng] = self._valueraw(p.pars, rpart) return output except TypeError: @@ -335,20 +335,20 @@ def writestr(self, baselist): """ if self.base is not None and self.base not in baselist: emsg = "baselist does not include this BaseFunction's base function." - raise ValueError("emsg") + raise ValueError(emsg) lines = [] # Write function type - lines.append("function=%s" %repr(self.__class__.__name__)) - lines.append("module=%s" %repr(self.getmodule())) + lines.append("function=%s" % repr(self.__class__.__name__)) + lines.append("module=%s" % repr(self.getmodule())) # Write base if self.base is not None: - lines.append("base=%s" %repr(baselist.index(self.base))) + lines.append("base=%s" % repr(baselist.index(self.base))) else: - lines.append("base=%s" %repr(None)) + lines.append("base=%s" % repr(None)) # Write all other metadata for k, (v, f) in self.metadict.iteritems(): - lines.append("%s=%s" %(k, f(v))) - datastring = "\n".join(lines)+"\n" + lines.append("%s=%s" % (k, f(v))) + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -367,19 +367,19 @@ def factory(functionstr, baselist): # populate dictionary with parameter definition # "key=value"->{"key":"value"} - data = re.split(r'(?:[\r\n]+|\A)(\S+)=', data) + data = re.split(r"(?:[\r\n]+|\A)(\S+)=", data) ddict = {} - for i in range(len(data)/2): - ddict[data[2*i+1]] = data[2*i+2] + for i in range(len(data) / 2): + ddict[data[2 * i + 1]] = data[2 * i + 2] # dictionary of parameters pdict = {} - for (k, v) in ddict.items(): + for k, v in ddict.items(): try: pdict[k] = eval(v) - except Exception, e: + except Exception as e: logger.exception(e) - emsg = ("Invalid parameter: %s=%s" %(k,v)) + emsg = "Invalid parameter: %s=%s" % (k, v) raise SrMiseDataFormatError(emsg) function_name = pdict["function"] @@ -438,9 +438,9 @@ def safefunction(f, fsafe): return -#end of class BaseFunction +# end of class BaseFunction -if __name__ == '__main__': +if __name__ == "__main__": from diffpy.srmise.peaks import GaussianOverR, TerminationRipples @@ -451,7 +451,7 @@ def safefunction(f, fsafe): pt = TerminationRipples(p, 20) outstr2 = pt.writestr([p]) - print outstr + print(outstr) pt2 = BaseFunction.factory(outstr2, [p]) - print type(pt2) + print(type(pt2)) diff --git a/diffpy/srmise/baselines/__init__.py b/diffpy/srmise/baselines/__init__.py index fc8ad98..0515732 100644 --- a/diffpy/srmise/baselines/__init__.py +++ b/diffpy/srmise/baselines/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund diff --git a/diffpy/srmise/baselines/arbitrary.py b/diffpy/srmise/baselines/arbitrary.py index 2588740..bda2494 100644 --- a/diffpy/srmise/baselines/arbitrary.py +++ b/diffpy/srmise/baselines/arbitrary.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,16 +14,16 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog +from diffpy.srmise.baselines import Polynomial from diffpy.srmise.baselines.base import BaselineFunction from diffpy.srmise.srmiseerrors import SrMiseEstimationError logger = logging.getLogger("diffpy.srmise") -class Arbitrary (BaselineFunction): + +class Arbitrary(BaselineFunction): """Methods for evaluating a baseline from an arbitrary function. Supports baseline calculations with arbitrary functions. These functions, @@ -64,10 +65,10 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): # Define parameterdict # e.g. {"a_0":0, "a_1":1, "a_2":2, "a_3":3} if npars is 4. parameterdict = {} - for d in range(self.testnpars+1): - parameterdict["a_"+str(d)] = d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + for d in range(self.testnpars + 1): + parameterdict["a_" + str(d)] = d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} # Check that the provided functions are at least callable if valuef is None or callable(valuef): @@ -88,14 +89,15 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): # TODO: figure out how the metadict can be used to save the functions # and use them again when a file is loaded... - metadict = {} - metadict["npars"] = (npars, repr) - metadict["valuef"] = (valuef, repr) - metadict["jacobianf"] = (jacobianf, repr) - metadict["estimatef"] = (estimatef, repr) + metadict = { + "npars": (npars, repr), + "valuef": (valuef, repr), + "jacobianf": (jacobianf, repr), + "estimatef": (estimatef, repr), + } BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - #### Methods required by BaselineFunction #### + # Methods required by BaselineFunction #### def estimate_parameters(self, r, y): """Estimate parameters for data baseline. @@ -114,9 +116,8 @@ def estimate_parameters(self, r, y): # TODO: check that estimatef returns something proper? try: return self.estimatef(r, y) - except Exception, e: - emsg = "Error within estimation routine provided to Arbitrary:\n"+\ - str(e) + except Exception as e: + emsg = "Error within estimation routine provided to Arbitrary:\n" + str(e) raise SrMiseEstimationError(emsg) def _jacobianraw(self, pars, r, free): @@ -132,15 +133,15 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation.""" nfree = None if self.jacobianf is None: - nfree = (pars == True).sum() + nfree = (pars is True).sum() if nfree != 0: emsg = "No jacobian routine provided to Arbitrary." raise NotImplementedError(emsg) if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) # Allow an arbitrary function without a Jacobian provided act as @@ -149,7 +150,7 @@ def _jacobianraw(self, pars, r, free): # large performance implications if all other functions used while # fitting a function define a Jacobian. if nfree == 0: - return [None for p in range(len(par))] + return [None for p in range(len(pars))] # TODO: check that jacobianf returns something proper? return self.jacobianf(pars, r, free) @@ -170,21 +171,18 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): """Return value of polynomial for the given parameters and r values. - Parameters Parameters pars: Sequence of parameters pars[0] = a_0 @@ -192,7 +190,7 @@ def _valueraw(self, pars, r): ... r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) # TODO: check that valuef returns something proper? @@ -201,21 +199,22 @@ def _valueraw(self, pars, r): def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - f = Polynomial(degree = 3) + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) - f = Polynomial(degree = -1) + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) diff --git a/diffpy/srmise/baselines/base.py b/diffpy/srmise/baselines/base.py index d84160a..ca58155 100644 --- a/diffpy/srmise/baselines/base.py +++ b/diffpy/srmise/baselines/base.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,10 +16,9 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.basefunction import BaseFunction from diffpy.srmise.modelparts import ModelPart -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError logger = logging.getLogger("diffpy.srmise") @@ -83,13 +83,11 @@ def __init__( additional functionality. Cache: A class (not instance) which implements caching of BaseFunction evaluations.""" - BaseFunction.__init__( - self, parameterdict, parformats, default_formats, metadict, base, Cache - ) + BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) - #### "Virtual" class methods #### + # "Virtual" class methods #### - #### Methods required by BaseFunction #### + # Methods required by BaseFunction #### def actualize( self, @@ -138,17 +136,16 @@ def factory(baselinestr, ownerlist): baselinestr: string representing Baseline ownerlist: List of BaseFunctions that owner is in """ - from numpy import array data = baselinestr.strip().splitlines() # dictionary of parameters pdict = {} for d in data: - l = d.split("=", 1) - if len(l) == 2: + result = d.split("=", 1) + if len(result) == 2: try: - pdict[l[0]] = eval(l[1]) + pdict[result[0]] = eval(result[1]) except Exception: emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) @@ -171,12 +168,10 @@ def factory(baselinestr, ownerlist): # simple test code if __name__ == "__main__": - import matplotlib.pyplot as plt from numpy.random import randn - from diffpy.srmise.modelcluster import ModelCluster from diffpy.srmise.modelevaluators import AICc - from diffpy.srmise.peaks import GaussianOverR + from diffpy.srmise.peaks import GaussianOverR, Peaks res = 0.01 r = np.arange(2, 4, res) diff --git a/diffpy/srmise/baselines/fromsequence.py b/diffpy/srmise/baselines/fromsequence.py index c06e2de..4d4c553 100644 --- a/diffpy/srmise/baselines/fromsequence.py +++ b/diffpy/srmise/baselines/fromsequence.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,16 +14,15 @@ import logging -import matplotlib.pyplot as plt import numpy as np import scipy.interpolate as spi -import diffpy.srmise.srmiselog from diffpy.srmise.baselines.base import BaselineFunction logger = logging.getLogger("diffpy.srmise") -class FromSequence (BaselineFunction): + +class FromSequence(BaselineFunction): """Methods for evaluation of a baseline from discrete data via interpolation. FromSequence uses cubic spline interpolation (no smoothing) on discrete @@ -46,13 +46,13 @@ def __init__(self, *args, **kwds): or file: Name of file with column of x values and column of y values. """ - if len(args)==1 and len(kwds)==0: + if len(args) == 1 and len(kwds) == 0: # load from file x, y = self.readxy(args[0]) elif len(args) == 0 and ("file" in kwds and "x" not in kwds and "y" not in kwds): # load file x, y = self.readxy(kwds["file"]) - elif len(args)==2 and len(kwds)==0: + elif len(args) == 2 and len(kwds) == 0: # Load x, y directly from arguments x = args[0] y = args[1] @@ -69,8 +69,8 @@ def __init__(self, *args, **kwds): emsg = "Sequences x and y must have the same length." raise ValueError(emsg) parameterdict = {} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} self.spline = spi.InterpolatedUnivariateSpline(x, y) self.minx = x[0] self.maxx = x[-1] @@ -79,7 +79,7 @@ def __init__(self, *args, **kwds): metadict["y"] = (y, self.xyrepr) BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache=None) - #### Methods required by BaselineFunction #### + # Methods required by BaselineFunction #### def estimate_parameters(self, r, y): """Return empty numpy array. @@ -102,10 +102,10 @@ def _jacobianraw(self, pars, r, free): r: sequence or scalar over which pars is evaluated free: Empty sequence.""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) return [] @@ -125,15 +125,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -143,18 +141,22 @@ def _valueraw(self, pars, r): pars: Empty sequence r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) try: if r[0] < self.minx or r[-1] > self.maxx: - logger.warn("Warning: Evaluating interpolating function over %s, outside safe range of %s.", - [r[0], r[-1]], - [self.minx, self.maxx]) - except IndexError, TypeError: + logger.warn( + "Warning: Evaluating interpolating function over %s, outside safe range of %s.", + [r[0], r[-1]], + [self.minx, self.maxx], + ) + except (IndexError, TypeError): if r < self.minx or r > self.maxx: - logger.warn("Warning: Evaluating interpolating function at %s, outside safe range of %s.", - r, - [self.minx, self.maxx]) + logger.warn( + "Warning: Evaluating interpolating function at %s, outside safe range of %s.", + r, + [self.minx, self.maxx], + ) return self.spline(r) def getmodule(self): @@ -162,46 +164,48 @@ def getmodule(self): def xyrepr(self, var): """Safe string output of x and y, compatible with eval()""" - return "[%s]" %", ".join([repr(v) for v in var]) + return "[%s]" % ", ".join([repr(v) for v in var]) def readxy(self, filename): """ """ - from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError + from diffpy.srmise.srmiseerrors import SrMiseDataFormatError # TODO: Make this safer try: - datastring = open(filename,'rb').read() - except Exception, err: + datastring = open(filename, "rb").read() + except Exception as err: raise err import re - res = re.search(r'^[^#]', datastring, re.M) + + res = re.search(r"^[^#]", datastring, re.M) if res: - datastring = datastring[res.end():].strip() + datastring = datastring[res.end() :].strip() - x=[] - y=[] + x = [] + y = [] try: for line in datastring.split("\n"): v = line.split() x.append(float(v[0])) y.append(float(v[1])) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(str(err)) return (np.array(x), np.array(y)) -#end of class FromSequence + +# end of class FromSequence # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - r = np.arange(0, 9.42413, .2) - b = -(np.tanh(.5*r) + np.sin(.5*r)) + r = np.arange(0, 9.42413, 0.2) + b = -(np.tanh(0.5 * r) + np.sin(0.5 * r)) f = FromSequence(r, b) pars = np.array([]) free = np.array([]) - r2 = np.arange(0, 9.42413, .5) + r2 = np.arange(0, 9.42413, 0.5) b2 = f._valueraw(pars, r2) diff --git a/diffpy/srmise/baselines/nanospherical.py b/diffpy/srmise/baselines/nanospherical.py index 04c1d43..b45113b 100644 --- a/diffpy/srmise/baselines/nanospherical.py +++ b/diffpy/srmise/baselines/nanospherical.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,16 +14,14 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.baselines.base import BaselineFunction -from diffpy.srmise.srmiseerrors import SrMiseEstimationError logger = logging.getLogger("diffpy.srmise") -class NanoSpherical (BaselineFunction): + +class NanoSpherical(BaselineFunction): """Methods for evaluation of baseline of spherical nanoparticle of uniform density. Allowed formats are @@ -36,7 +35,7 @@ class NanoSpherical (BaselineFunction): scale factor is 4*pi*rho_0, where rho_r is the nanoparticle density. gamma_0(r) Reference: - Guinier et. al. (1955). Small-angle Scattering from X-rays. New York: John Wiley & Sons, Inc. + Guinier et al. (1955). Small-angle Scattering from X-rays. New York: John Wiley & Sons, Inc. """ def __init__(self, Cache=None): @@ -47,29 +46,29 @@ def __init__(self, Cache=None): evaluations. """ # Define parameterdict - parameterdict = {'scale':0, 'radius':1} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + parameterdict = {"scale": 0, "radius": 1} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} metadict = {} BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - #### Methods required by BaselineFunction #### - -# def estimate_parameters(self, r, y): -# """Estimate parameters for spherical baseline. (Not implemented!) -# -# Parameters -# r - array along r from which to estimate -# y - array along y from which to estimate -# -# Returns Numpy array of parameters in the default internal format. -# Raises NotImplementedError if estimation is not implemented for this -# degree, or SrMiseEstimationError if parameters cannot be estimated for -# any other reason. -# """ -# if len(r) != len(y): -# emsg = "Arrays r, y must have equal length." -# raise ValueError(emsg) + # Methods required by BaselineFunction #### + + # def estimate_parameters(self, r, y): + # """Estimate parameters for spherical baseline. (Not implemented!) + # + # Parameters + # r - array along r from which to estimate + # y - array along y from which to estimate + # + # Returns Numpy array of parameters in the default internal format. + # Raises NotImplementedError if estimation is not implemented for this + # degree, or SrMiseEstimationError if parameters cannot be estimated for + # any other reason. + # """ + # if len(r) != len(y): + # emsg = "Arrays r, y must have equal length." + # raise ValueError(emsg) def _jacobianraw(self, pars, r, free): """Return the Jacobian of the spherical baseline. @@ -83,22 +82,26 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] - if (free == False).sum() == self.npars: + if (free is False).sum() == self.npars: return jacobian if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - if free[0]: jacobian[0] = 0. - if free[1]: jacobian[1] = 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + if free[0]: + jacobian[0] = 0.0 + if free[1]: + jacobian[1] = 0.0 else: - if free[0]: jacobian[0] = self._jacobianrawscale(pars, r) - if free[1]: jacobian[1] = self._jacobianrawradius(pars, r) + if free[0]: + jacobian[0] = self._jacobianrawscale(pars, r) + if free[1]: + jacobian[1] = self._jacobianrawradius(pars, r) else: s = self._getdomain(pars, r) if free[0]: @@ -118,14 +121,14 @@ def _jacobianrawscale(self, pars, r): pars[1] = radius r - sequence or scalar over which pars is evaluated. """ - s = np.abs(pars[0]) + np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R + rdivR = r / R # From abs'(s) in derivative, which is equivalent to sign(s) except at 0 where it # is undefined. Since s=0 is equivalent to the absence of a nanoparticle, sign will # be fine. sign = np.sign(pars[1]) - return -sign*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + return -sign * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _jacobianrawradius(self, pars, r): """Return partial Jacobian wrt radius without bounds checking. @@ -141,7 +144,7 @@ def _jacobianrawradius(self, pars, r): # From abs'(R) in derivative, which is equivalent to sign(R) except at 0 where it # is undefined. Since R=0 is a singularity anyway, sign will be fine. sign = np.sign(pars[1]) - return sign*s*(3*r**2*(r**2-4*R**2))/(16*R**4) + return sign * s * (3 * r**2 * (r**2 - 4 * R**2)) / (16 * R**4) def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. @@ -162,15 +165,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): temp[0] = np.abs(temp[0]) temp[1] = np.abs(temp[1]) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -185,11 +186,11 @@ def _valueraw(self, pars, r): r - sequence or scalar over which pars is evaluated. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - return 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + return 0.0 else: return self._valueraw2(pars, r) else: @@ -209,38 +210,44 @@ def _valueraw2(self, pars, r): """ s = np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R - return -s*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + rdivR = r / R + return -s * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _getdomain(self, pars, r): """Return slice object for which r > 0 and r < twice the radius""" - low = r.searchsorted(0., side='right') - high = r.searchsorted(2.*pars[1], side='left') + low = r.searchsorted(0.0, side="right") + high = r.searchsorted(2.0 * pars[1], side="left") return slice(low, high) def getmodule(self): return __name__ -#end of class NanoSpherical + +# end of class NanoSpherical # simple test code -if __name__ == '__main__': +if __name__ == "__main__": f = NanoSpherical() r = np.arange(-5, 10) - pars = np.array([-1., 7.]) + pars = np.array([-1.0, 7.0]) free = np.array([False, True]) - print "Testing nanoparticle spherical baseline" - print "Scale: %f, Radius: %f" %(pars[0], pars[1]) - print "-----------------------------------------" + print("Testing nanoparticle spherical baseline") + print("Scale: %f, Radius: %f" % (pars[0], pars[1])) + print("-----------------------------------------") val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - outjac = [j if j is not None else [None]*len(r) for j in jac] - print "r".center(10), "value".center(10), "jac(scale)".center(10), "jac(radius)".center(10) + jac = f._jacobianraw(pars, r, free) + outjac = [j if j is not None else [None] * len(r) for j in jac] + print( + "r".center(10), + "value".center(10), + "jac(scale)".center(10), + "jac(radius)".center(10), + ) for tup in zip(r, val, *outjac): for t in tup: if t is None: - print ("%s" %None).ljust(10), + print("%s" % None).ljust(10), else: - print ("% .3g" %t).ljust(10), + print("% .3g" % t).ljust(10), print diff --git a/diffpy/srmise/baselines/polynomial.py b/diffpy/srmise/baselines/polynomial.py index 4d95c4f..70498be 100644 --- a/diffpy/srmise/baselines/polynomial.py +++ b/diffpy/srmise/baselines/polynomial.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,16 +14,15 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.baselines.base import BaselineFunction from diffpy.srmise.srmiseerrors import SrMiseEstimationError logger = logging.getLogger("diffpy.srmise") -class Polynomial (BaselineFunction): + +class Polynomial(BaselineFunction): """Methods for evaluation and parameter estimation of a polynomial baseline.""" def __init__(self, degree, Cache=None): @@ -41,19 +41,18 @@ def __init__(self, degree, Cache=None): emsg = "Argument degree must be an integer." raise ValueError(emsg) if self.degree < 0: - self.degree = -1 # interpreted as negative infinity + self.degree = -1 # interpreted as negative infinity # Define parameterdict # e.g. {"a_0":3, "a_1":2, "a_2":1, "a_3":0} if degree is 3. parameterdict = {} - for d in range(self.degree+1): - parameterdict["a_"+str(d)] = self.degree - d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} - metadict = {} - metadict["degree"] = (degree, repr) + for d in range(self.degree + 1): + parameterdict["a_" + str(d)] = self.degree - d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} + metadict = {"degree": (degree, repr)} BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) - #### Methods required by BaselineFunction #### + # Methods required by BaselineFunction #### def estimate_parameters(self, r, y): """Estimate parameters for polynomial baseline. @@ -81,7 +80,7 @@ def estimate_parameters(self, r, y): return np.array([]) if self.degree == 0: - return np.array([0.]) + return np.array([0.0]) if self.degree == 1: # Estimate degree=1 baseline. @@ -90,16 +89,16 @@ def estimate_parameters(self, r, y): # lies above the baseline. # TODO: Make this more sophisticated. try: - cut = np.max([len(y)/10, 1]) + cut = np.max([len(y) / 10, 1]) cut_idx = y.argsort()[:cut] import numpy.linalg as la - A = np.array([r[cut_idx]]).T - slope = la.lstsq(A, y[cut_idx])[0][0] - return np.array([slope, 0.]) - except Exception, e: - emsg = "Error during estimation -- "+str(e) - raise + + a = np.array([r[cut_idx]]).T + slope = la.lstsq(a, y[cut_idx])[0][0] + return np.array([slope, 0.0]) + except Exception as e: + emsg = "Error during estimation -- " + str(e) raise SrMiseEstimationError(emsg) def _jacobianraw(self, pars, r, free): @@ -116,13 +115,13 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] - if (free == False).sum() == self.npars: + if (free is False).sum() == self.npars: return jacobian # The partial derivative with respect to the nth coefficient of a @@ -148,15 +147,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -171,51 +168,54 @@ def _valueraw(self, pars, r): If degree is negative infinity, pars is an empty sequence. r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) return np.polyval(pars, r) def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": # Test polynomial of degree 3 - print "Testing degree 3 polynomial" - print "---------------------------" - f = Polynomial(degree = 3) + print("Testing degree 3 polynomial") + print("---------------------------") + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test polynomial of degree -oo - print "\nTesting degree -oo polynomial (== 0)" - print "------------------------------------" - f = Polynomial(degree = -1) + print("\nTesting degree -oo polynomial (== 0)") + print("------------------------------------") + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test linear estimation - print "\nTesting linear baseline estimation" - print "------------------------------------" - f = Polynomial(degree = 1) + print("\nTesting linear baseline estimation") + print("------------------------------------") + f = Polynomial(degree=1) pars = np.array([1, 0]) - r = np.arange(0, 10, .1) - y = -r + 10*np.exp(-(r-5)**2) + np.random.rand(len(r)) + r = np.arange(0, 10, 0.1) + y = -r + 10 * np.exp(-((r - 5) ** 2)) + np.random.rand(len(r)) est = f.estimate_parameters(r, y) - print "Actual baseline: ", np.array([-1, 0.]) - print "Estimated baseline: ", est + print("Actual baseline: ", np.array([-1, 0.0])) + print("Estimated baseline: ", est) diff --git a/diffpy/srmise/dataclusters.py b/diffpy/srmise/dataclusters.py index 077c1a8..d87a6b9 100644 --- a/diffpy/srmise/dataclusters.py +++ b/diffpy/srmise/dataclusters.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -17,10 +18,9 @@ import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog - logger = logging.getLogger("diffpy.srmise") + class DataClusters: """Find clusters corresponding to peaks in numerical x-, y-value arrays. @@ -51,7 +51,7 @@ def __init__(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Track internal state of clustering. + # Track internal state of clustering. self.INIT = 0 self.READY = 1 self.CLUSTERING = 2 @@ -62,14 +62,12 @@ def __init__(self, x, y, res): return - # This iterator operates not over found clusters, but over the process of # clustering. This behavior could cause confusion and should perhaps be # altered. def __iter__(self): return self - def clear(self): """Clear all members, including user data.""" self.x = np.array([]) @@ -85,7 +83,7 @@ def clear(self): def reset_clusters(self): """Reset all progress on clustering.""" - self.clusters = np.array([[self.data_order[-1],self.data_order[-1]]]) + self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = self.data_order.size - 1 self.lastcluster_idx = 0 self.lastpoint_idx = self.data_order[-1] @@ -102,7 +100,7 @@ def setdata(self, x, y, res): y - corresponding sequence of y-values res - clustering 'resolution' """ - #Test for error conditions + # Test for error conditions # 1) Length mismatch # 2) Bound errors for res # 3) r isn't sorted? @@ -116,7 +114,7 @@ def setdata(self, x, y, res): self.y = y self.res = res - self.data_order = self.y.argsort() # Defines order of clustering + self.data_order = self.y.argsort() # Defines order of clustering self.clusters = np.array([[self.data_order[-1], self.data_order[-1]]]) self.current_idx = len(self.data_order) - 1 self.lastcluster_idx = 0 @@ -169,9 +167,8 @@ def next(self): self.lastcluster_idx = nearest_cluster[0] else: # insert right of nearest cluster - self.lastcluster_idx = nearest_cluster[0]+1 - self.clusters = np.insert(self.clusters, self.lastcluster_idx, - [test_idx, test_idx], 0) + self.lastcluster_idx = nearest_cluster[0] + 1 + self.clusters = np.insert(self.clusters, self.lastcluster_idx, [test_idx, test_idx], 0) return self def makeclusters(self): @@ -200,10 +197,10 @@ def find_nearest_cluster2(self, x): return self.find_nearest_cluster(idx) else: # Choose adjacent index nearest to x - if (self.x[idx] - x) < (x - self.x[idx-1]): + if (self.x[idx] - x) < (x - self.x[idx - 1]): return self.find_nearest_cluster(idx) else: - return self.find_nearest_cluster(idx-1) + return self.find_nearest_cluster(idx - 1) def find_nearest_cluster(self, idx): """Return [cluster index, distance] for cluster nearest to x[idx]. @@ -225,23 +222,27 @@ def find_nearest_cluster(self, idx): return None flat_idx = clusters_flat.searchsorted(idx) - near_idx = flat_idx/2 + near_idx = flat_idx / 2 if flat_idx == len(clusters_flat): - #test_idx is right of the last cluster - return [near_idx-1, self.x[idx]-self.x[self.clusters[-1, 1]]] - if clusters_flat[flat_idx] == idx or flat_idx%2 == 1: + # test_idx is right of the last cluster + return [near_idx - 1, self.x[idx] - self.x[self.clusters[-1, 1]]] + if clusters_flat[flat_idx] == idx or flat_idx % 2 == 1: # idx is within some cluster return [near_idx, 0.0] if flat_idx == 0: # idx is left of the first cluster - return [near_idx, self.x[idx]-self.x[self.clusters[0,0]]] + return [near_idx, self.x[idx] - self.x[self.clusters[0, 0]]] # Calculate which of the two nearest clusters is closer - distances=np.array([self.x[idx]-self.x[self.clusters[near_idx-1, 1]], - self.x[idx]-self.x[self.clusters[near_idx, 0]]]) + distances = np.array( + [ + self.x[idx] - self.x[self.clusters[near_idx - 1, 1]], + self.x[idx] - self.x[self.clusters[near_idx, 0]], + ] + ) if distances[0] < np.abs(distances[1]): - return [near_idx-1, distances[0]] + return [near_idx - 1, distances[0]] else: return [near_idx, distances[1]] @@ -255,15 +256,14 @@ def cluster_is_full(self, cluster_idx): cluster_idx - The index of the cluster to test """ if cluster_idx > 0: - low = self.clusters[cluster_idx-1, 1] + 1 + low = self.clusters[cluster_idx - 1, 1] + 1 else: low = 0 if cluster_idx < len(self.clusters) - 1: - high = self.clusters[cluster_idx+1, 0] - 1 + high = self.clusters[cluster_idx + 1, 0] - 1 else: high = len(self.data_order) - 1 - return self.clusters[cluster_idx, 0] == low \ - and self.clusters[cluster_idx, 1] == high + return self.clusters[cluster_idx, 0] == low and self.clusters[cluster_idx, 1] == high def combine_clusters(self, combine): """Combine clusters specified by each subarray of cluster indices. @@ -283,15 +283,35 @@ def combine_clusters(self, combine): # Test that all clusters are contiguous and adjacent first = c[0] for i in range(c[0], c[-1]): - if c[i+1-first]-1 != c[i-first]: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " are not contiguous and/or increasing."])) - if self.clusters[i+1, 0]-self.clusters[i, 1] != 1: - raise ValueError(''.join(["Clusters ", str(c[i]), " and ", str(c[i+1]), " have unclustered points between them."])) - - #update cluster endpoints + if c[i + 1 - first] - 1 != c[i - first]: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " are not contiguous and/or increasing.", + ] + ) + ) + if self.clusters[i + 1, 0] - self.clusters[i, 1] != 1: + raise ValueError( + "".join( + [ + "Clusters ", + str(c[i]), + " and ", + str(c[i + 1]), + " have unclustered points between them.", + ] + ) + ) + + # update cluster endpoints self.clusters[c[0], 1] = self.clusters[c[-1], 1] todelete = np.array([c[1:] for c in combine]).ravel() - self.clusters = np.delete(self.clusters, todelete ,0) + self.clusters = np.delete(self.clusters, todelete, 0) def find_adjacent_clusters(self): """Return all cluster indices with no unclustered points between them. @@ -306,20 +326,26 @@ def find_adjacent_clusters(self): adj = [] left_idx = 0 - while left_idx < len(self.clusters)-1: - while left_idx < len(self.clusters)-1 and self.clusters[left_idx+1, 0] - self.clusters[left_idx, 1] !=1: + while left_idx < len(self.clusters) - 1: + while ( + left_idx < len(self.clusters) - 1 + and self.clusters[left_idx + 1, 0] - self.clusters[left_idx, 1] != 1 + ): left_idx += 1 # Not left_idx+1 since left_idx=len(self.clusters)-2 even if no # clusters are actually adjacent. right_idx = left_idx - while right_idx < len(self.clusters)-1 and self.clusters[right_idx+1, 0] - self.clusters[right_idx, 1] == 1: + while ( + right_idx < len(self.clusters) - 1 + and self.clusters[right_idx + 1, 0] - self.clusters[right_idx, 1] == 1 + ): right_idx += 1 if right_idx > left_idx: - adj.append(range(left_idx, right_idx+1)) - left_idx = right_idx+1 # set for next possible left_idx + adj.append(range(left_idx, right_idx + 1)) + left_idx = right_idx + 1 # set for next possible left_idx return np.array(adj) def cut(self, idx): @@ -331,24 +357,23 @@ def cut(self, idx): data_ids = self.clusters[idx] if len(data_ids) == data_ids.size: # idx is a scalar, so give single slice object - return slice(data_ids[0], data_ids[1]+1) + return slice(data_ids[0], data_ids[1] + 1) else: # idx is a list/slice, so give list of slice objects - return [slice(c[0], c[1]+1) for c in data_ids] + return [slice(c[0], c[1] + 1) for c in data_ids] def cluster_boundaries(self): """Return sequence with (x,y) of all cluster boundaries.""" boundaries = [] for l in self.clusters: - xlo = np.mean(self.x[l[0]-1:l[0]+1]) - ylo = np.mean(self.y[l[0]-1:l[0]+1]) - xhi = np.mean(self.x[l[1]:l[1]+2]) - yhi = np.mean(self.y[l[1]:l[1]+2]) + xlo = np.mean(self.x[l[0] - 1 : l[0] + 1]) + ylo = np.mean(self.y[l[0] - 1 : l[0] + 1]) + xhi = np.mean(self.x[l[1] : l[1] + 2]) + yhi = np.mean(self.y[l[1] : l[1] + 2]) boundaries.append((xlo, ylo)) boundaries.append((xhi, yhi)) return np.unique(boundaries) - def plot(self, *args, **kwds): """Plot the data with vertical lines at the cluster divisions. @@ -362,7 +387,7 @@ def plot(self, *args, **kwds): boundaries = self.cluster_boundaries() (ymin, ymax) = ax.get_ylim() for b in boundaries: - plt.axvline(b[0], 0, (b[1]-ymin)/(ymax-ymin), color='k') + plt.axvline(b[0], 0, (b[1] - ymin) / (ymax - ymin), color="k") plt.ion() ax.figure.canvas.draw() return @@ -376,18 +401,22 @@ def animate(self): status = self.status self.reset_clusters() + fig, ax = plt.subplots() + canvas = fig.canvas + background = canvas.copy_from_bbox(ax.bbox) + ymin, ymax = ax.get_ylim() all_lines = [] for i in self: canvas.restore_region(background) boundaries = self.cluster_boundaries() for i, b in enumerate(boundaries): - height = (b[1]-ymin)/(ymax-ymin) + height = (b[1] - ymin) / (ymax - ymin) if i < len(all_lines): all_lines[i].set_xdata([b[0], b[0]]) all_lines[i].set_ydata([0, height]) ax.draw_artist(all_lines[i]) else: - l = plt.axvline(b[0], 0, height, color='k', animated=True) + l = plt.axvline(b[0], 0, height, color="k", animated=True) ax.draw_artist(l) all_lines.append(l) canvas.blit(ax.bbox) @@ -399,21 +428,40 @@ def animate(self): self.status = status return -#End of class DataClusters +# End of class DataClusters -# simple test code -if __name__ == '__main__': - x = np.array([-2., -1.5, -1., -0.5, 0., 0.5, 1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5.]) - y = np.array([0.0183156, 0.105399, 0.36788, 0.778806, 1.00012, 0.780731, 0.386195, 0.210798, 0.386195, 0.780731, 1.00012, 0.778806, 0.36788, 0.105399, 0.0183156]) - - testcluster = DataClusters(x, y, .1) +# simple test code +if __name__ == "__main__": + + x = np.array([-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]) + y = np.array( + [ + 0.0183156, + 0.105399, + 0.36788, + 0.778806, + 1.00012, + 0.780731, + 0.386195, + 0.210798, + 0.386195, + 0.780731, + 1.00012, + 0.778806, + 0.36788, + 0.105399, + 0.0183156, + ] + ) + + testcluster = DataClusters(x, y, 0.1) testcluster.makeclusters() - print testcluster.clusters + print(testcluster.clusters) adj = testcluster.find_adjacent_clusters() - print adj - if len(adj) >0: + print(adj) + if len(adj) > 0: testcluster.combine_clusters(adj) - print testcluster.clusters + print(testcluster.clusters) diff --git a/diffpy/srmise/modelcluster.py b/diffpy/srmise/modelcluster.py index 10c0677..cf952b2 100644 --- a/diffpy/srmise/modelcluster.py +++ b/diffpy/srmise/modelcluster.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -22,11 +23,9 @@ import re import sys -import matplotlib.pyplot as plt import numpy as np -import scipy as sp -from scipy.optimize import leastsq +from diffpy.srmise import srmiselog from diffpy.srmise.baselines import Baseline from diffpy.srmise.modelparts import ModelParts from diffpy.srmise.peaks import Peak, Peaks @@ -39,8 +38,6 @@ logger = logging.getLogger("diffpy.srmise") -from diffpy.srmise import srmiselog - class ModelCovariance(object): """Helper class preserves uncertainty info (full covariance matrix) for a fit model. @@ -57,8 +54,8 @@ class ModelCovariance(object): def __init__(self, *args, **kwds): """Intialize object.""" - self.cov = None # The raw covariance matrix - self.model = None # ModelParts instance, so both peaks and baseline (if present) + self.cov = None # The raw covariance matrix + self.model = None # ModelParts instance, so both peaks and baseline (if present) # Map i->[n1,n2,...] of the jth ModelPart to the n_i parameters in cov. self.mmap = {} @@ -96,11 +93,12 @@ def setcovariance(self, model, cov): emsg = "Parameter 'cov' must be a square matrix." raise ValueError(emsg) - if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars(False): - emsg = ["Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", - "parameters in the model, or the total number of parameters (fixed and free) of ", - "the model."] + emsg = [ + "Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", + "parameters in the model, or the total number of parameters (fixed and free) of ", + "the model.", + ] raise ValueError("".join(emsg)) self.model = model.copy() @@ -113,8 +111,8 @@ def setcovariance(self, model, cov): for i, m in enumerate(model): self.mmap[i] = n + np.arange(m.npars(True)) for j, p in enumerate(m): - self.pmap[(i,j)] = n - self.ipmap[n] = (i,j) + self.pmap[(i, j)] = n + self.ipmap[n] = (i, j) n += 1 if n == tempcov.shape[0]: @@ -122,21 +120,20 @@ def setcovariance(self, model, cov): self.cov = tempcov else: # Create new covariance matrix, making sure to account for fixed pars - self.cov = np.matrix(np.zeros((n,n))) + self.cov = np.matrix(np.zeros((n, n))) - i=0 - rawi=0 + i = 0 + rawi = 0 for i in range(n): j = 0 rawj = 0 if free[i]: for j in range(n): if free[j]: - self.cov[i,j] = cov[rawi,rawj] + self.cov[i, j] = cov[rawi, rawj] rawj += 1 rawi += 1 - def transform(self, in_format, out_format, **kwds): """Transform parameters and covariance matrix under specified change of variables. @@ -168,7 +165,7 @@ def transform(self, in_format, out_format, **kwds): if "parts" in kwds: if kwds["parts"] == "peaks": - parts = range(len(self.model)-1) + parts = range(len(self.model) - 1) elif kwds["parts"] == "baseline": parts = [-1] else: @@ -189,33 +186,41 @@ def transform(self, in_format, out_format, **kwds): for i in parts: start = self.mmap[i][0] - stop = self.mmap[i][-1]+1 + stop = self.mmap[i][-1] + 1 p = self.model[i] try: subg = p.owner().transform_derivatives(p.pars, in_format, out_format) except NotImplementedError: - logger.warning("Transformation gradient not implemented for part %i: %s. Ignoring transformation." %(i, str(p))) + logger.warning( + "Transformation gradient not implemented for part %i: %s. Ignoring transformation." + % (i, str(p)) + ) subg = np.identity(p.npars(True)) except Exception as e: - logger.warning("Transformation gradient failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Transformation gradient failed for part %i: %s. " + "Failed with message %s. Ignoring transformation." % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Now transform the parameters to match try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except Exception as e: - logger.warning("Parameter transformation failed for part %i: %s. Failed with message %s. Ignoring transformation." %(i, str(p), str(e))) + logger.warning( + "Parameter transformation failed for part %i: %s. " + "Failed with message %s. Ignoring transformation." % (i, str(p), str(e)) + ) subg = np.identity(p.npars(True)) # Update the global gradient matrix g[start:stop, start:stop] = subg g = np.matrix(g) - self.cov = np.array(g*np.matrix(self.cov).transpose()*g) + self.cov = np.array(g * np.matrix(self.cov).transpose() * g) return - def getcorrelation(self, i, j): """Return the correlation between variables i and j, Corr_ij=Cov_ij/(sigma_i sigma_j) @@ -233,10 +238,10 @@ def getcorrelation(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - if self.cov[i1,i1] == 0. or self.cov[j1,j1] == 0.: - return 0. # Avoiding undefined quantities is sensible in this context. + if self.cov[i1, i1] == 0.0 or self.cov[j1, j1] == 0.0: + return 0.0 # Avoiding undefined quantities is sensible in this context. else: - return self.cov[i1,j1]/(np.sqrt(self.cov[i1,i1])*np.sqrt(self.cov[j1,j1])) + return self.cov[i1, j1] / (np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1])) def getvalue(self, i): """Return value of parameter i. @@ -254,7 +259,7 @@ def getuncertainty(self, i): which indicate the mth parameter of modelpart l. """ (l, m) = i if i in self.pmap else self.ipmap[i] - return np.sqrt(self.getcovariance(i,i)) + return np.sqrt(self.getcovariance(i, i)) def getcovariance(self, i, j): """Return the covariance between variables i and j. @@ -270,7 +275,7 @@ def getcovariance(self, i, j): i1 = self.pmap[i] if i in self.pmap else i j1 = self.pmap[j] if j in self.pmap else j - return self.cov[i1,j1] + return self.cov[i1, j1] def get(self, i): """Return (value, uncertainty) tuple for parameter i. @@ -298,9 +303,9 @@ def correlationwarning(self, threshold=0.8): correlated = [] for i in range(self.cov.shape[0]): - for j in range(i+1, self.cov.shape[0]): - c = self.getcorrelation(i,j) - if c and np.abs(c) > threshold: # filter out None values + for j in range(i + 1, self.cov.shape[0]): + c = self.getcorrelation(i, j) + if c and np.abs(c) > threshold: # filter out None values correlated.append((self.ipmap[i], self.ipmap[j], c)) return correlated @@ -310,7 +315,7 @@ def __str__(self): return "Model and/or Covariance matrix undefined." lines = [] for i, m in enumerate(self.model): - lines.append(" ".join([self.prettypar((i,j)) for j in range(len(m))])) + lines.append(" ".join([self.prettypar((i, j)) for j in range(len(m))])) return "\n".join(lines) def prettypar(self, i): @@ -322,11 +327,12 @@ def prettypar(self, i): if self.model is None or self.cov is None: return "Model and/or Covariance matrix undefined." k = i if i in self.ipmap else self.pmap[i] - return "%.5e (%.5e)" %(self.getvalue(k), np.sqrt(self.getcovariance(k,k))) + return "%.5e (%.5e)" % (self.getvalue(k), np.sqrt(self.getcovariance(k, k))) # End of class ModelCovariance + class ModelCluster(object): """Associate a contiguous cluster of data with an appropriate model. @@ -399,7 +405,7 @@ def __init__(self, model, *args, **kwds): self.error_method = orig.error_method self.peak_funcs = list(orig.peak_funcs) return - else: # Explicit creation + else: # Explicit creation if model is None: self.model = Peaks([]) else: @@ -468,26 +474,26 @@ def writestr(self, **kwds): if self.peak_funcs is None: lines.append("peak_funcs=None") else: - lines.append("peak_funcs=%s" %repr([pfbaselist.index(p) for p in self.peak_funcs])) + lines.append("peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs])) if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) - lines.append("slice=%s" %repr(self.slice)) + lines.append("slice=%s" % repr(self.slice)) # Indexed baseline functions (unless externally provided) if writeblf: lines.append("## BaselineFunctions") for i, bf in enumerate(blfbaselist): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(blfbaselist)) # Indexed peak functions (unless externally provided) if writepf: lines.append("## PeakFunctions") for i, pf in enumerate(pfbaselist): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(pfbaselist)) lines.append("# BaselineObject") @@ -501,17 +507,16 @@ def writestr(self, **kwds): lines.append("None") else: for m in self.model: - lines.append('# ModelPeak') + lines.append("# ModelPeak") lines.append(m.writestr(pfbaselist)) # Raw data in modelcluster. - lines.append('### start data') - lines.append('#L r y dy') + lines.append("### start data") + lines.append("#L r y dy") for i in range(len(self.r_data)): - lines.append('%g %g %g' % \ - (self.r_data[i], self.y_data[i], self.y_error[i]) ) + lines.append("%g %g %g" % (self.r_data[i], self.y_data[i], self.y_error[i])) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring @staticmethod @@ -545,102 +550,97 @@ def factory(mcstr, **kwds): # - StartData # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', mcstr, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", mcstr, re.M) if res: - start_data = mcstr[res.end():].strip() - start_data_info = mcstr[res.start():res.end()] - header = mcstr[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = mcstr[res.end() :].strip() + start_data_info = mcstr[res.start() : res.end()] + header = mcstr[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasr = False hasy = False hasdy = False - res = re.search(r'\br\b', start_data_info) + res = re.search(r"\br\b", start_data_info) if res: hasr = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True # Model - res = re.search(r'^#+ ModelPeaks.*$', header, re.M) + res = re.search(r"^#+ ModelPeaks.*$", header, re.M) if res: - model_peaks = header[res.end():].strip() - header = header[:res.start()] + model_peaks = header[res.end() :].strip() + header = header[: res.start()] # Baseline Object - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] # Peak functions if readpf: - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] # Baseline functions if readblf: - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] - ### Instantiating baseline functions + # Instantiating baseline functions if readblf: blfbaselist = [] - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: blfbaselist.append(BaseFunction.factory(s, blfbaselist)) - ### Instantiating peak functions + # Instantiating peak functions if readpf: pfbaselist = [] - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: pfbaselist.append(BaseFunction.factory(s, pfbaselist)) - - ### Instantiating header data + # Instantiating header data # peak_funcs - res = re.search(r'^peak_funcs=(.*)$', header, re.M) + res = re.search(r"^peak_funcs=(.*)$", header, re.M) peak_funcs = eval(res.groups()[0].strip()) if peak_funcs is not None: peak_funcs = [pfbaselist[i] for i in peak_funcs] # error_method - res = re.search(r'^ModelEvaluator=(.*)$', header, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", header, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] error_method = getattr(module, res.groups()[0].strip()) # slice - res = re.search(r'^slice=(.*)$', header, re.M) + res = re.search(r"^slice=(.*)$", header, re.M) cluster_slice = eval(res.groups()[0].strip()) - - ### Instantiating BaselineObject - if re.match(r'^None$', baselineobject): + # Instantiating BaselineObject + if re.match(r"^None$", baselineobject): baseline = None else: baseline = Baseline.factory(baselineobject, blfbaselist) - - ### Instantiating model + # Instantiating model model = Peaks() - res = re.split(r'(?m)^#+ ModelPeak\s*(?:#.*\s+)*', model_peaks) + res = re.split(r"(?m)^#+ ModelPeak\s*(?:#.*\s+)*", model_peaks) for s in res[1:]: model.append(Peak.factory(s, pfbaselist)) - - - ### Instantiating start data + # Instantiating start data # read actual data - r, y, dy arrays = [] if hasr: @@ -661,12 +661,13 @@ def factory(mcstr, **kwds): # raise SrMiseDataFormatError if something goes wrong try: for line in start_data.split("\n"): - l = line.split() - if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + lines = line.split() + if len(arrays) != len(lines): + emsg = "Number of value fields does not match that given by '%s'" % start_data_info + raise IndexError(emsg) for a, v in zip(arrays, line.split()): a.append(float(v)) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(err) if hasr: r_data = np.array(r_data) @@ -675,8 +676,16 @@ def factory(mcstr, **kwds): if hasdy: y_error = np.array(y_error) - return ModelCluster(model, baseline, r_data, y_data, y_error, cluster_slice, error_method, peak_funcs) - + return ModelCluster( + model, + baseline, + r_data, + y_data, + y_error, + cluster_slice, + error_method, + peak_funcs, + ) @staticmethod def join_adjacent(m1, m2): @@ -724,11 +733,11 @@ def join_adjacent(m1, m2): if not right_ids[0] == left_ids[1]: raise ValueError("Given ModelClusters are not adjacent.") - new_slice=slice(left_ids[0], right_ids[1], 1) + new_slice = slice(left_ids[0], right_ids[1], 1) # Approximately where the clusters meet. - border_x = .5*(left.r_data[left_ids[1]-1] + right.r_data[right_ids[0]]) - border_y = .5*(left.y_data[left_ids[1]-1] + right.y_data[right_ids[0]]) + border_x = 0.5 * (left.r_data[left_ids[1] - 1] + right.r_data[right_ids[0]]) + border_y = 0.5 * (left.y_data[left_ids[1] - 1] + right.y_data[right_ids[0]]) if len(m1.model) > 0 and len(m2.model) > 0: new_model = left.model.copy() @@ -741,24 +750,32 @@ def join_adjacent(m1, m2): # border_x are removed. The highly unlikely case of two peaks # exactly at the border is also handled. for i in reversed(range(len(new_model))): - if new_model[i]["position"] == border_x and \ - i > 0 and new_model[i-1]["position"] == border_x: + if new_model[i]["position"] == border_x and i > 0 and new_model[i - 1]["position"] == border_x: del new_model[i] elif new_ids[i] != i: - if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and \ - (new_model[i]["position"] < border_x and new_ids[i] >= len(left.model)): + if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and ( + new_model[i]["position"] < border_x and new_ids[i] >= len(left.model) + ): del new_model[i] # Likely to improve any future fitting new_model.match_at(border_x, border_y) elif len(m1.model) > 0: new_model = m1.model.copy() - else: # Only m2 has entries, or both are empty + else: # Only m2 has entries, or both are empty new_model = m2.model.copy() - peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" - return ModelCluster(new_model, m1.baseline, m1.r_data, m1.y_data, - m1.y_error, new_slice, m1.error_method, peak_funcs) + peak_funcs = list(set(m1.peak_funcs) | set(m2.peak_funcs)) # "Union" + return ModelCluster( + new_model, + m1.baseline, + m1.r_data, + m1.y_data, + m1.y_error, + new_slice, + m1.error_method, + peak_funcs, + ) def change_slice(self, new_slice): """Change the slice which represents the extent of a cluster.""" @@ -808,7 +825,7 @@ def npars(self, count_baseline=True, count_fixed=True): n += self.baseline.npars(count_fixed=count_fixed) return n - def replacepeaks(self, newpeaks, delslice=slice(0,0)): + def replacepeaks(self, newpeaks, delslice=slice(0, 0)): """Replace peaks given by delslice by those in newpeaks. Parameters @@ -824,11 +841,11 @@ def replacepeaks(self, newpeaks, delslice=slice(0,0)): def deletepeak(self, idx): """Delete the peak at the given index.""" - self.replacepeaks([], slice(idx,idx+1)) + self.replacepeaks([], slice(idx, idx + 1)) def estimatepeak(self): """Attempt to add single peak to empty cluster. Return True if successful.""" - ### STUB!!! ### + # STUB!!! ### # Currently only a single peak function is supported. Dynamic # selection from multiple types may require additional support # within peak functions themselves. The simplest method would @@ -842,16 +859,23 @@ def estimatepeak(self): selected = self.peak_funcs[0] estimate = selected.estimate_parameters(self.r_cluster, self.y_cluster - self.valuebl()) - if estimate is not None: newpeak = selected.actualize(estimate, "internal") - logger.info("Estimate: %s" %newpeak) + logger.info("Estimate: %s" % newpeak) self.replacepeaks(Peaks([newpeak])) return True else: return False - def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=None, cov_format="default_output"): + def fit( + self, + justify=False, + ntrials=0, + fitbaseline=False, + estimate=True, + cov=None, + cov_format="default_output", + ): """Perform a chi-square fit of the model to data in cluster. Parameters @@ -871,11 +895,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No if self.never_fit: return None if len(self.model) == 0: - #Attempt to add a first peak to the cluster + # Attempt to add a first peak to the cluster if estimate: try: self.estimatepeak() - except SrMiseEstimationError, e: + except SrMiseEstimationError: logger.info("Fit: No model to fit, estimation not possible.") return else: @@ -899,14 +923,16 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No fmodel = self.model try: - fmodel.fit(self.r_data, - y_datafit, - self.y_error, - self.slice, - ntrials, - cov, - cov_format) - except SrMiseFitError, e: + fmodel.fit( + self.r_data, + y_datafit, + self.y_error, + self.slice, + ntrials, + cov, + cov_format, + ) + except SrMiseFitError as e: logger.debug("Error while fitting cluster: %s\nReverting to original model.", e) self.model = orig_model self.baseline = orig_baseline @@ -925,17 +951,15 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # Test for fit improvement if new_qual < orig_qual: # either fit blew up (and leastsq didn't notice) or the fit had already converged. - msg = ["ModelCluster.fit() warning: fit seems not to have improved.", - "Reverting to original model.", - "----------", - "New Quality: %s", - "Original Quality: %s" - "%s", - "----------"] - logger.debug("\n".join(msg), - new_qual.stat, - orig_qual.stat, - self.model) + msg = [ + "ModelCluster.fit() warning: fit seems not to have improved.", + "Reverting to original model.", + "----------", + "New Quality: %s", + "Original Quality: %s" "%s", + "----------", + ] + logger.debug("\n".join(msg), new_qual.stat, orig_qual.stat, self.model) self.model = orig_model self.baseline = orig_baseline @@ -953,9 +977,11 @@ def fit(self, justify=False, ntrials=0, fitbaseline=False, estimate=True, cov=No # original fit is less likely to obscure any hidden peaks. if justify and len(self.model) == 1 and len(orig_model) > 0: min_npars = min([p.npars for p in self.peak_funcs]) - if new_qual.growth_justified(self, min_npars): - msg = ["ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", - "Reverting to original model."] + if new_qual.growth_justified(self, min_npars): + msg = [ + "ModelCluster.fit(): Fit over current cluster better explained by additional peaks.", + "Reverting to original model.", + ] logger.debug("\n".join(msg)) self.model = orig_model @@ -974,28 +1000,31 @@ def contingent_fit(self, minpoints, growth_threshold): """ if self.never_fit: return None - if (self.last_fit_size > 0 and float(self.size)/self.last_fit_size >= growth_threshold) \ - or (self.last_fit_size == 0 and self.size >= minpoints): + if (self.last_fit_size > 0 and float(self.size) / self.last_fit_size >= growth_threshold) or ( + self.last_fit_size == 0 and self.size >= minpoints + ): return self.fit(justify=True) return None def cleanfit(self): """Remove poor-quality peaks in the fit. Return number removed.""" - #Find peaks located outside the cluster + # Find peaks located outside the cluster pos = np.array([p["position"] for p in self.model]) left_idx = pos.searchsorted(self.r_cluster[0]) right_idx = pos.searchsorted(self.r_cluster[-1]) outside_idx = range(0, left_idx) outside_idx.extend(range(right_idx, len(self.model))) - #inside_idx = range(left_idx, right_idx) + # inside_idx = range(left_idx, right_idx) # Identify outside peaks that contribute < error everywhere in cluster. # Must check entire cluster and not just nearest endpoint because not # every peak function can be assumed to have its greatest contribution # there, and errors are not necessarily constant. - outside_idx = [i for i in outside_idx \ - if (self.model[i].removable \ - and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0)] + outside_idx = [ + i + for i in outside_idx + if (self.model[i].removable and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0) + ] # TODO: Check for peaks that have blown up. # Remember to check if a peak is removable. @@ -1015,11 +1044,11 @@ def cleanfit(self): msg.extend([str(self.model[i]) for i in nan_idx]) logger.debug("\n".join(msg)) -# # TODO: Uncomment when there's a point! -# if len(blown_idx) > 0: -# msg = ["Following peaks inside cluster were too large and had to be removed:"] -# msg.extend([str(self.model[i]) for i in blown_idx]) -# logger.info("\n".join(msg)) + # # TODO: Uncomment when there's a point! + # if len(blown_idx) > 0: + # msg = ["Following peaks inside cluster were too large and had to be removed:"] + # msg.extend([str(self.model[i]) for i in blown_idx]) + # logger.info("\n".join(msg)) # A peak can only be removed once. to_remove = list(set(outside_idx) | set(blown_idx) | set(nan_idx)) @@ -1048,7 +1077,7 @@ def reduce_to(self, x, y): logger.debug("reduce_to: No reduction necessary.") return None orig_model = self.model.copy() - self.model.match_at(x, y-self.valuebl(x)) + self.model.match_at(x, y - self.valuebl(x)) quality = self.fit() # Did reduction help? @@ -1067,13 +1096,13 @@ def reduce_to(self, x, y): def value(self, r=None): """Return value of baseline+model over cluster.""" - if len(self.model)==0: + if len(self.model) == 0: return self.valuebl(r) else: if r is None: - return self.valuebl(r)+(self.model.value(self.r_data, self.slice)[self.slice]) + return self.valuebl(r) + (self.model.value(self.r_data, self.slice)[self.slice]) else: - return self.valuebl(r)+(self.model.value(r)) + return self.valuebl(r) + (self.model.value(r)) def valuebl(self, r=None): """Return baseline's value over cluster. @@ -1088,10 +1117,10 @@ def valuebl(self, r=None): if r is None: return np.zeros(self.size) else: - return r*0. + return r * 0.0 else: if r is None: - return (self.baseline.value(self.r_data, self.slice)[self.slice]) + return self.baseline.value(self.r_data, self.slice)[self.slice] else: return self.baseline.value(r) @@ -1130,9 +1159,9 @@ def plottable(self, joined=False): else: toreturn = [self.r_cluster, self.y_cluster] bl = self.valuebl() - toreturn.extend([self.r_cluster for i in range(2*len(self.model))]) + toreturn.extend([self.r_cluster for i in range(2 * len(self.model))]) for i, p in enumerate(self.model): - toreturn[2*i+3] = bl + p.value(self.r_data, self.slice)[self.slice] + toreturn[2 * i + 3] = bl + p.value(self.r_data, self.slice)[self.slice] return toreturn def plottable_residual(self): @@ -1149,16 +1178,15 @@ def augment(self, source): best_qual = self.quality() source_model = source.model.copy() - msg = ["==== Augmenting model ====", - "Original fit:", - "%s", - "w/ quality: %s", - "New model fits:", - "%s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat, - source_model) + msg = [ + "==== Augmenting model ====", + "Original fit:", + "%s", + "w/ quality: %s", + "New model fits:", + "%s", + ] + logger.debug("\n".join(msg), best_model, best_qual.stat, source_model) # Each iteration improves best_model by adding the peak from # source_model to best_model that most improves its quality, breaking @@ -1181,28 +1209,27 @@ def augment(self, source): best_model = test_models[args[-1]] del source_model[args[-1]] else: - break # Best possible model has been found. + break # Best possible model has been found. self.replacepeaks(best_model, slice(len(self.model))) # TODO: Do I need this? If test_model contains peaks # by reference, the fit peaks will change as well. self.fit() - msg = ["Best model after fit is:", - "%s", - "w/ quality: %s", - "================="] - logger.debug("\n".join(msg), - self.model, - best_qual.stat) + msg = ["Best model after fit is:", "%s", "w/ quality: %s", "================="] + logger.debug("\n".join(msg), self.model, best_qual.stat) return def __str__(self): """Return string representation of the cluster.""" - return '\n'.join(["Slice: %s" %self.slice, - "Quality: %s" %self.quality().stat, - "Baseline: %s" %self.baseline, - "Peaks:\n%s" %self.model]) + return "\n".join( + [ + "Slice: %s" % self.slice, + "Quality: %s" % self.quality().stat, + "Baseline: %s" % self.baseline, + "Peaks:\n%s" % self.model, + ] + ) def prune(self): """Remove peaks until model quality no longer improves. @@ -1223,10 +1250,19 @@ def prune(self): tracer.pushc() y_nobl = self.y_cluster - self.valuebl() - prune_mc = ModelCluster(None, None, self.r_cluster, y_nobl, self.error_cluster, None, self.error_method, self.peak_funcs) + prune_mc = ModelCluster( + None, + None, + self.r_cluster, + y_nobl, + self.error_cluster, + None, + self.error_method, + self.peak_funcs, + ) orig_model = self.model.copy() - peak_range = 3 # number of peaks on either side of deleted peak to fit + peak_range = 3 # number of peaks on either side of deleted peak to fit check_models = [] for m in orig_model: if m.removable: @@ -1237,16 +1273,11 @@ def prune(self): best_model = self.model.copy() best_qual = self.quality() - msg = ["====Pruning fits:====", - "Original model:", - "%s", - "w/ quality: %s"] - logger.info("\n".join(msg), - best_model, - best_qual.stat) + msg = ["====Pruning fits:====", "Original model:", "%s", "w/ quality: %s"] + logger.info("\n".join(msg), best_model, best_qual.stat) - #### Main prune loop #### - while(check_models.count(None) < len(check_models)): + # Main prune loop #### + while check_models.count(None) < len(check_models): # Cache value of individual peaks for best current model. best_modely = [] @@ -1271,23 +1302,17 @@ def prune(self): for i in range(len(check_models)): if check_models[i] is not None: # Create model with ith peak removed, and distant peaks effectively fixed - lo = max(i-peak_range, 0) - hi = min(i+peak_range+1, len(best_model)) + lo = max(i - peak_range, 0) + hi = min(i + peak_range + 1, len(best_model)) check_models[i] = best_model[lo:i].copy() - check_models[i].extend(best_model[i+1:hi].copy()) + check_models[i].extend(best_model[i + 1 : hi].copy()) prune_mc.model = check_models[i] - msg = ["len(check_models): %s", - "len(best_model): %s", - "i: %s"] - logger.debug("\n".join(msg), - len(check_models), - len(best_model), - i) + msg = ["len(check_models): %s", "len(best_model): %s", "i: %s"] + logger.debug("\n".join(msg), len(check_models), len(best_model), i) addpars = best_model.npars() - check_models[i].npars() - best_model[i].npars(count_fixed=False) - # Remove contribution of (effectively) fixed peaks y = np.array(y_nobl) if lo > 0: @@ -1298,30 +1323,23 @@ def prune(self): prune_mc.y_data = y prune_mc.y_cluster = y - msg = ["", - "--- %s ---", - "Removed peak: %s", - "Starting model:", - "%s"] - logger.debug("\n".join(msg), - i, - best_model[i], - prune_mc.model) + msg = [ + "", + "--- %s ---", + "Removed peak: %s", + "Starting model:", + "%s", + ] + logger.debug("\n".join(msg), i, best_model[i], prune_mc.model) - prune_mc.fit(ntrials=int(np.sqrt(len(y))+50), estimate=False) + prune_mc.fit(ntrials=int(np.sqrt(len(y)) + 50), estimate=False) qual = prune_mc.quality(kshift=addpars) check_qual = np.append(check_qual, qual) check_qualidx = np.append(check_qualidx, i) - msg = ["Found model:", - "%s", - "addpars: %s", - "qual: %s"] - logger.debug("\n".join(msg), - prune_mc.model, - addpars, - qual.stat) + msg = ["Found model:", "%s", "addpars: %s", "qual: %s"] + logger.debug("\n".join(msg), prune_mc.model, addpars, qual.stat) # Do not check this peak in the future if quality decreased. if qual < best_qual: @@ -1329,21 +1347,20 @@ def prune(self): arg = check_qual.argsort() - msg = [" - Finished round of pruning -", - "best_qual: %s", - "check_qual: %s", - "sorted check_qual: %s"] - logger.debug("\n".join(msg), - best_qual.stat, - [c.stat for c in check_qual], - arg) + msg = [ + " - Finished round of pruning -", + "best_qual: %s", + "check_qual: %s", + "sorted check_qual: %s", + ] + logger.debug("\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg) arg = arg[-1] newbest_qual = check_qual[arg] newbest_qualidx = check_qualidx[arg] if newbest_qual > best_qual: - lo = max(newbest_qualidx-peak_range, 0) - hi = min(newbest_qualidx+peak_range+1, len(orig_model)) + lo = max(newbest_qualidx - peak_range, 0) + hi = min(newbest_qualidx + peak_range + 1, len(orig_model)) bmtemp = best_model[:lo] bmtemp.extend(check_models[newbest_qualidx]) bmtemp.extend(best_model[hi:]) @@ -1360,12 +1377,8 @@ def prune(self): self.model = best_model tracer.emit(self) - msg = ["New best model:", - "%s", - "best_qual: %s"] - logger.debug("\n".join(msg), - best_model, - best_qual.stat) + msg = ["New best model:", "%s", "best_qual: %s"] + logger.debug("\n".join(msg), best_model, best_qual.stat) if len(best_model) > 0: del check_models[newbest_qualidx] @@ -1378,48 +1391,49 @@ def prune(self): else: break - msg = ["Best model after pruning is:", - "%s", - "w/ quality: %s", - "================="] - logger.info("\n".join(msg), - self.model, - self.quality().stat) + msg = [ + "Best model after pruning is:", + "%s", + "w/ quality: %s", + "=================", + ] + logger.info("\n".join(msg), self.model, self.quality().stat) tracer.popc() return + # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import GaussianOverR - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) evaluator = AICc() - guesspars = [[2.9, .15, 5], [3.6, .3, 5]] + guesspars = [[2.9, 0.15, 5], [3.6, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, None, r, y, err, None, AICc, [pf]) - print "--- Actual Peak parameters ---" - print ideal_peaks + print("--- Actual Peak parameters ---") + print(ideal_peaks) - print "\n--- Before fit ---" - print cluster + print("\n--- Before fit ---") + print(cluster) cluster.fit() - print "\n--- After fit ---" - print cluster + print("\n--- After fit ---") + print(cluster) diff --git a/diffpy/srmise/modelevaluators/__init__.py b/diffpy/srmise/modelevaluators/__init__.py index 4cda42c..1d80760 100644 --- a/diffpy/srmise/modelevaluators/__init__.py +++ b/diffpy/srmise/modelevaluators/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund diff --git a/diffpy/srmise/modelevaluators/aic.py b/diffpy/srmise/modelevaluators/aic.py index 915cace..e3ee9e4 100644 --- a/diffpy/srmise/modelevaluators/aic.py +++ b/diffpy/srmise/modelevaluators/aic.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,13 +16,13 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.modelevaluators.base import ModelEvaluator from diffpy.srmise.srmiseerrors import SrMiseModelEvaluatorError logger = logging.getLogger("diffpy.srmise") -class AIC (ModelEvaluator): + +class AIC(ModelEvaluator): """Evaluate and compare models with the AIC statistic. Akaike's Information Criterion (AIC) is a method for comparing statistical @@ -52,11 +53,11 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AIC (Akaike's Information Criterion). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -70,14 +71,13 @@ def evaluate(self, fit, count_fixed=False, kshift=0): logger.warn("AIC.evaluate(): too few data to evaluate quality reliably.") n = self.minpoints(k) - if self.chisq == None: + if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) self.stat = self.chisq + self.parpenalty(k, n) return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -86,36 +86,39 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k)*fudgefactor + return (2 * k) * fudgefactor def growth_justified(self, fit, k_prime): - """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified + given the current quality of the fit. The assumption is that adding k_prime parameters will + result in "effectively 0" chiSquared cost, and so adding it is justified if the cost of adding + these parameters is less than the current chiSquared cost. + The validity of this assumption (which depends on an unknown chiSquared value) + and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AIC with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit if n < self.minpoints(k_actual): logger.warn("AIC.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -125,24 +128,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AIC.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AIC # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AIC() - m2=AIC() + m1 = AIC() + m2 = AIC() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/aicc.py b/diffpy/srmise/modelevaluators/aicc.py index 57f0713..be1052f 100644 --- a/diffpy/srmise/modelevaluators/aicc.py +++ b/diffpy/srmise/modelevaluators/aicc.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -21,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class AICc (ModelEvaluator): + +class AICc(ModelEvaluator): """Evaluate and compare models with the AICc statistic. Akaike's Information Criterion w/ 2nd order correction for small sample @@ -50,13 +52,13 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AICc (Akaike's Information Criterion - with 2nd order correction for small sample size). + with 2nd order correction for small sample size). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -77,7 +79,6 @@ def evaluate(self, fit, count_fixed=False, kshift=0): return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -88,36 +89,37 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k+float(2*k*(k+1))/(n-k-1))*fudgefactor + return (2 * k + float(2 * k * (k + 1)) / (n - k - 1)) * fudgefactor def growth_justified(self, fit, k_prime): """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified + if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which + depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AICc with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit if n < self.minpoints(k_actual): logger.warn("AICc.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -127,24 +129,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AICc.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AICc # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AICc() - m2=AICc() + m1 = AICc() + m2 = AICc() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/base.py b/diffpy/srmise/modelevaluators/base.py index eb28272..2e97b3a 100644 --- a/diffpy/srmise/modelevaluators/base.py +++ b/diffpy/srmise/modelevaluators/base.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -43,9 +44,6 @@ import numpy as np -import diffpy.srmise.srmiselog -from diffpy.srmise.srmiseerrors import SrMiseModelEvaluatorError - logger = logging.getLogger("diffpy.srmise") @@ -68,11 +66,9 @@ def __lt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat < other.stat else: return other.stat < self.stat @@ -81,11 +77,9 @@ def __le__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat <= other.stat else: return other.stat <= self.stat @@ -94,9 +88,7 @@ def __eq__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated return self.stat == other.stat @@ -104,9 +96,7 @@ def __ne__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated return self.stat != other.stat @@ -114,11 +104,9 @@ def __gt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat > other.stat else: return other.stat > self.stat @@ -127,11 +115,9 @@ def __ge__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat is not None and other.stat is not None # The statistic must already be calculated - if self.higher_is_better: + if self.higher_is_better is not None: return self.stat >= other.stat else: return other.stat >= self.stat diff --git a/diffpy/srmise/modelparts.py b/diffpy/srmise/modelparts.py index 1cb6412..8dd0c40 100644 --- a/diffpy/srmise/modelparts.py +++ b/diffpy/srmise/modelparts.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -34,8 +35,8 @@ # Before it returned a scalar, later it returned an array of length 1. import pkg_resources as pr -__spv__ = pr.get_distribution('scipy').version -__oldleastsqbehavior__ = (pr.parse_version(__spv__) < pr.parse_version('0.8.0')) +__spv__ = pr.get_distribution("scipy").version +__oldleastsqbehavior__ = pr.parse_version(__spv__) < pr.parse_version("0.8.0") class ModelParts(list): @@ -57,7 +58,16 @@ class ModelParts(list): def __init__(self, *args, **kwds): list.__init__(self, *args, **kwds) - def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="default_output"): + def fit( + self, + r, + y, + y_error, + range=None, + ntrials=0, + cov=None, + cov_format="default_output", + ): """Chi-square fit of all free parameters to given data. There must be at least as many free parameters as data points. @@ -75,12 +85,17 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul """ freepars = self.unpack_freepars() if len(freepars) >= len(r): - emsg = "Cannot fit model with " + str(len(freepars)) +\ - " free parametersbut only "+str(len(r)) + " data points." + emsg = ( + "Cannot fit model with " + + str(len(freepars)) + + " free parametersbut only " + + str(len(r)) + + " data points." + ) raise SrMiseFitError(emsg) if len(freepars) == 0: - #emsg = "Cannot fit model with no free parameters." - #raise SrMiseFitError(emsg) + # emsg = "Cannot fit model with no free parameters." + # raise SrMiseFitError(emsg) return if range == None: @@ -95,38 +110,48 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("Before") plt.plot(r, y, label="_nolabel_") - plt.plot(r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), label="_nolabel_") + plt.plot( + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + label="_nolabel_", + ) for p in self: plt.plot(r, p.value(r, range=range), label=str(p)) plt.ion() try: f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual, residual_jacobian - Dfun=self.residual_jacobian, # explicit Jacobian - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual, residual_jacobian + Dfun=self.residual_jacobian, # explicit Jacobian + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except NotImplementedError: # TODO: Figure out if is worth checking for residual_jacobian # before leastsq(). This exception will either occur almost never # or extremely frequently, and the extra evaluations will add up. logger.info("One or more functions do not define residual_jacobian().") f = leastsq( - self.residual, # minimize this function - freepars, # initial parameters - args=args, # arguments to residual - col_deriv=1, # order of derivatives in Jacobian + self.residual, # minimize this function + freepars, # initial parameters + args=args, # arguments to residual + col_deriv=1, # order of derivatives in Jacobian full_output=1, - maxfev=ntrials) + maxfev=ntrials, + ) except Exception: # Sadly, KeyboardInterrupt, etc. is reraised as minpack.error # Not much I can do about that, though. import traceback - emsg = "Unexpected error in modelparts.fit(). Original exception:\n" +\ - traceback.format_exc() + "End original exception." + + emsg = ( + "Unexpected error in modelparts.fit(). Original exception:\n" + + traceback.format_exc() + + "End original exception." + ) raise SrMiseFitError(emsg) result = f[0] @@ -144,22 +169,28 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul plt.cla() plt.title("After") plt.ion() - plt.plot(r, y, - r, (y-self.value(r, range=range))-1.1*(max(y) - min(y)), - *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist]) + plt.plot( + r, + y, + r, + (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), + *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist] + ) plt.draw() if srmiselog.wait: - print "Press 'Enter' to continue...", - raw_input() + print( + "Press 'Enter' to continue...", + ) + input() - if f[4] not in (1,2,3,4): + if f[4] not in (1, 2, 3, 4): emsg = "Fit did not succeed -- " + str(f[3]) raise SrMiseFitError(emsg) # clean up parameters for p in self: - p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") + p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") # Supply estimated covariance matrix if requested. # The precise relationship between f[1] and estimated covariance matrix is a little unclear from @@ -169,28 +200,28 @@ def fit(self, r, y, y_error, range=None, ntrials=0, cov=None, cov_format="defaul pcov = f[1] fvec = f[2]["fvec"] dof = len(r) - len(freepars) - cov.setcovariance(self, pcov*np.sum(fvec**2)/dof) + cov.setcovariance(self, pcov * np.sum(fvec**2) / dof) try: cov.transform(in_format="internal", out_format=cov_format) except SrMiseUndefinedCovarianceError as e: logger.warn("Covariance not defined. Fit may not have converged.") - return -#### Notes on the fit f -# f[0] = solution -# f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. -# None if a singular matrix encountered (indicates very flat curvature in some direction). -# This matrix must be multiplied by the residual variance to get the covariance of the parameter -# estimates - see curve fit. -# f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} -# nfev - The number of function calls made -# fvec - function (residual) evaluated at solution -# fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." -# ipvt - integer array defining a permutation matrix P such that fjac*P=QR -# qtf - transpose(q)*fvec -# f[3] = message about results of fit -# f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. + + # # Notes on the fit f + # f[0] = solution + # f[1] = Uses the fjac and ipvt optional outputs to construct an estimate of the jacobian around the solution. + # None if a singular matrix encountered (indicates very flat curvature in some direction). + # This matrix must be multiplied by the residual variance to get the covariance of the parameter + # estimates - see curve fit. + # f[2] = dictionary{nfev: int, fvec: array(), fjac: array(), ipvt: array(), qtf: array()} + # nfev - The number of function calls made + # fvec - function (residual) evaluated at solution + # fjac - "a permutation of the R matrix of a QR factorization of the final Jacobian." + # ipvt - integer array defining a permutation matrix P such that fjac*P=QR + # qtf - transpose(q)*fvec + # f[3] = message about results of fit + # f[4] = integer flag. Fit was successful on 1,2,3, or 4. Otherwise unsuccessful. def npars(self, count_fixed=True): """Return total number of parameters in all parts. @@ -201,7 +232,7 @@ def npars(self, count_fixed=True): """ n = 0 for p in self: - n+=p.npars(count_fixed=count_fixed) + n += p.npars(count_fixed=count_fixed) return n def pack_freepars(self, freepars): @@ -229,9 +260,9 @@ def residual(self, freepars, r, y_expected, y_error, range=None): try: if range is None: range = slice(0, len(r)) - return (y_expected[range]-total[range])/y_error[range] + return (y_expected[range] - total[range]) / y_error[range] except TypeError: - return (y_expected-total)/y_error + return (y_expected - total) / y_error def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): """Calculate the Jacobian of freepars. @@ -245,22 +276,23 @@ def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): All the data by default. """ if len(freepars) == 0: - raise ValueError("Argument freepars has length 0. The Jacobian " - "is only defined with >=1 free parameters.") + raise ValueError( + "Argument freepars has length 0. The Jacobian " "is only defined with >=1 free parameters." + ) self.pack_freepars(freepars) - tempJac=[] + tempJac = [] for p in self: - tempJac[len(tempJac):] = p.jacobian(r, range) + tempJac[len(tempJac) :] = p.jacobian(r, range) # Since the residual is (expected - calculated) the jacobian # of the residual has a minus sign. - jac=-np.array([j for j in tempJac if j is not None]) + jac = -np.array([j for j in tempJac if j is not None]) try: if range is None: range = slice(0, len(r)) - return jac[:,range]/y_error[range] + return jac[:, range] / y_error[range] except TypeError: - return jac/y_error + return jac / y_error def value(self, r, range=None): """Calculate total value of all parts over range. @@ -270,14 +302,14 @@ def value(self, r, range=None): range - Slice object specifying region of r and y over which to fit. All the data by default. """ - total = r * 0. + total = r * 0.0 for p in self: total += p.value(r, range) return total def unpack_freepars(self): """Return array of all free parameters.""" - #To check: ravel() sometimes returns a reference and othertimes a copy. + # To check: ravel() sometimes returns a reference and othertimes a copy. # Do I need to use flatten() instead? return np.concatenate([p.compress() for p in self]).ravel() @@ -302,16 +334,13 @@ def covariance(self, format="internal", **kwds): try: idx = int(k[1:]) except ValueError: - emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." %k + emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." % k raise ValueError(emsg) formats[int(k[1:])] = v - - return - def copy(self): """Return deep copy of this ModelParts. @@ -321,7 +350,7 @@ def copy(self): def __str__(self): """Return string representation of this ModelParts.""" - return ''.join([str(p)+"\n" for p in self]) + return "".join([str(p) + "\n" for p in self]) def __getslice__(self, i, j): """Extends list.__getslice__""" @@ -338,10 +367,15 @@ def transform(self, in_format="internal", out_format="internal"): try: p.pars = p.owner().transform_parameters(p.pars, in_format, out_format) except ValueError: - logger.info("Invalid parameter transformation: Ignoring %s->%s for function of type %s." %(in_format, out_format, p.owner().getmodule())) + logger.info( + "Invalid parameter transformation: Ignoring %s->%s for function of type %s." + % (in_format, out_format, p.owner().getmodule()) + ) + # End of class ModelParts + class ModelPart(object): """Represents a single part (instance of some function) of a model. @@ -388,18 +422,19 @@ def __init__(self, owner, pars, free=None, removable=True, static_owner=False): self._owner = owner if len(pars) != owner.npars: - emsg = "The length of pars must equal the number of parameters "+\ - "specified by the model part owner." + emsg = "The length of pars must equal the number of parameters " + "specified by the model part owner." raise ValueError(emsg) - self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart + self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart if free is None: self.free = np.array([True for p in pars], dtype=bool) else: self.free = np.array(free, dtype=bool) if len(self.free) != owner.npars: - emsg = "The length of free must be equal to the number of "+\ - "parameters specified by the model part owner." + emsg = ( + "The length of free must be equal to the number of " + + "parameters specified by the model part owner." + ) raise ValueError(emsg) self.removable = removable @@ -419,8 +454,7 @@ def changeowner(self, owner): emsg = "Cannot change owner if static_owner is True." raise SrMiseStaticOwnerError(emsg) if self._owner.npars != owner.npars: - emsg = "New owner specifies different number of parameters than "+\ - "original owner." + emsg = "New owner specifies different number of parameters than " + "original owner." raise SrMiseStaticOwnerError(emsg) self._owner = owner @@ -453,8 +487,8 @@ def update(self, freepars): """ numfree = self.npars(count_fixed=False) if len(freepars) < numfree: - pass # raise "freepars does not have enough elements to - # update every unheld parameter." + pass # raise "freepars does not have enough elements to + # update every unheld parameter." # TODO: Check if I need to make copies here, or if references # to parameters are safe. self.pars[self.free] = freepars[:numfree] @@ -532,17 +566,19 @@ def __str__(self): return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) def __eq__(self, other): - """ """ + """ """ if hasattr(other, "_owner"): - return ((self._owner is other._owner) and - np.all(self.pars == other.pars) and - np.all(self.free == other.free) and - self.removable == other.removable) + return ( + (self._owner is other._owner) + and np.all(self.pars == other.pars) + and np.all(self.free == other.free) + and self.removable == other.removable + ) else: return False def __ne__(self, other): - """ """ + """ """ return not self == other def writestr(self, ownerlist): @@ -557,19 +593,20 @@ def writestr(self, ownerlist): emsg = "ownerlist does not contain this ModelPart's owner." raise ValueError(emsg) lines = [] - lines.append("owner=%s" %repr(ownerlist.index(self._owner))) - - #Lists/numpy arrays don't give full representation of long lists - lines.append("pars=[%s]" %", ".join([repr(p) for p in self.pars])) - lines.append("free=[%s]" %", ".join([repr(f) for f in self.free])) - lines.append("removable=%s" %repr(self.removable)) - lines.append("static_owner=%s" %repr(self.static_owner)) - datastring = "\n".join(lines)+"\n" + lines.append("owner=%s" % repr(ownerlist.index(self._owner))) + + # Lists/numpy arrays don't give full representation of long lists + lines.append("pars=[%s]" % ", ".join([repr(p) for p in self.pars])) + lines.append("free=[%s]" % ", ".join([repr(f) for f in self.free])) + lines.append("removable=%s" % repr(self.removable)) + lines.append("static_owner=%s" % repr(self.static_owner)) + datastring = "\n".join(lines) + "\n" return datastring + # End of class ModelPart # simple test code -if __name__ == '__main__': +if __name__ == "__main__": pass diff --git a/diffpy/srmise/multimodelselection.py b/diffpy/srmise/multimodelselection.py index 35d9e08..3fc4f5b 100644 --- a/diffpy/srmise/multimodelselection.py +++ b/diffpy/srmise/multimodelselection.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -17,19 +18,20 @@ import numpy as np from matplotlib import transforms -import diffpy.srmise.srmiselog -from diffpy.srmise import ModelCluster, PeakStability -from diffpy.srmise.modelevaluators.base import ModelEvaluator +from diffpy.srmise.modelcluster import ModelCluster +from diffpy.srmise.peakstability import PeakStability logger = logging.getLogger("diffpy.srmise") + def eatkwds(*args, **kwds): """Convenience function to remove all keywords in args from kwds.""" for k in args: if k in kwds: - print "Keyword %s=%s ignored." %(k, kwds.pop(k)) + print("Keyword %s=%s ignored." % (k, kwds.pop(k))) return kwds + class MultimodelSelection(PeakStability): """Quick and dirty multimodel selection using AIC and its offspring.""" @@ -52,7 +54,7 @@ def __init__(self): self.classweights = {} self.classprobs = {} self.sortedclassprobs = {} - self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC + self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC PeakStability.__init__(self) return @@ -67,7 +69,7 @@ def makeaics(self, dgs, dr, filename=None): nominal value. filename - Optional file to save pickled results """ - aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. + aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. self.dgs = np.array(dgs) for i, dg in enumerate(self.dgs): self.dgs_idx[dg] = i @@ -77,12 +79,11 @@ def makeaics(self, dgs, dr, filename=None): (r, y, dr, dy) = self.ppe.resampledata(dr) for model_idx in range(len(self.results)): - print "Testing model %s of %s." %(model_idx, len(self.results)) + print("Testing model %s of %s." % (model_idx, len(self.results))) result = self.results[model_idx] em = self.ppe.error_method - # This section dependent on spaghetti code elsewhere in srmise! # Short cut evaluation of AICs which doesn't require calculating chi-square # over and over again. This method assumes that the various uncertainties @@ -95,13 +96,13 @@ def makeaics(self, dgs, dr, filename=None): # modelevaluators subpackage are in need of a rewrite, and so it would be # best to do them all at once. dg0 = self.dgs[0] - mc = ModelCluster(result[1], result[2], r, y, dg0*np.ones(len(r)), None, em, self.ppe.pf) + mc = ModelCluster(result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf) em0 = mc.quality() for dg in self.dgs: em_instance = em() - em_instance.chisq = em0.chisq*(dg0/dg)**2 # rescale chi-square - em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square + em_instance.chisq = em0.chisq * (dg0 / dg) ** 2 # rescale chi-square + em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square self.aics[dg].append(em_instance) aics_out[dg].append(em_instance.stat) @@ -111,10 +112,10 @@ def makeaics(self, dgs, dr, filename=None): if filename is not None: try: - import cPickle as pickle - except: - import pickle - out_s = open(filename, 'wb') + import cPickle as pickle + except ImportError: + import pickle + out_s = open(filename, "wb") pickle.dump(aics_out, out_s) out_s.close() @@ -123,10 +124,10 @@ def makeaics(self, dgs, dr, filename=None): def loadaics(self, filename): """Load file containing results of the testall method.""" try: - import cPickle as pickle - except: - import pickle - in_s = open(filename, 'rb') + import cPickle as pickle + except ImportError: + import pickle + in_s = open(filename, "rb") aics_in = pickle.load(in_s) in_s.close() @@ -171,7 +172,7 @@ def makesortedprobs(self): for dg in self.dgs: self.sortedprobs[dg] = np.argsort(self.aicprobs[dg]).tolist() - def animate_probs(self, step=False, duration=0., **kwds): + def animate_probs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -181,14 +182,15 @@ def animate_probs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() plt.subplot(211) best_idx = self.sortedprobs[self.dgs[0]][-1] - line, = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) + (line,) = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) vline = plt.axvline(self.dgs[0]) - dot, = plt.plot(self.dgs[best_idx],self.aicprobs[self.dgs[0]][best_idx],'ro') + (dot,) = plt.plot(self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro") plt.subplot(212) self.setcurrent(best_idx) @@ -208,11 +210,11 @@ def animate_probs(self, step=False, duration=0., **kwds): plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) - def animate_classprobs(self, step=False, duration=0., **kwds): + def animate_classprobs(self, step=False, duration=0.0, **kwds): """Show animation of extracted peaks from first to last. Parameters: @@ -222,23 +224,29 @@ def animate_classprobs(self, step=False, duration=0., **kwds): Keywords passed to pyplot.plot()""" if duration > 0: import time - sleeptime = duration/len(self.dgs) + + sleeptime = duration / len(self.dgs) plt.ion() ax1 = plt.subplot(211) bestclass_idx = self.sortedclassprobs[self.dgs[0]][-1] best_idx = self.sortedclasses[self.dgs[0]][bestclass_idx][-1] - arrow_left = len(self.classes)-1 - arrow_right = arrow_left + .05*arrow_left - line, = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) - dot, = plt.plot(self.dgs[best_idx],self.classprobs[self.dgs[0]][bestclass_idx],'ro') - plt.axvline(arrow_left, color='k') + arrow_left = len(self.classes) - 1 + arrow_right = arrow_left + 0.05 * arrow_left + (line,) = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) + (dot,) = plt.plot(self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro") + plt.axvline(arrow_left, color="k") ax2 = ax1.twinx() - ax2.set_ylim(self.dgs[0],self.dgs[-1]) + ax2.set_ylim(self.dgs[0], self.dgs[-1]) ax2.set_ylabel("dg") ax1.set_xlim(right=arrow_right) ax2.set_xlim(right=arrow_right) - dgarrow = ax2.annotate("",(arrow_right, self.dgs[0]), (arrow_left, self.dgs[0]), arrowprops=dict(arrowstyle="-|>")) + dgarrow = ax2.annotate( + "", + (arrow_right, self.dgs[0]), + (arrow_left, self.dgs[0]), + arrowprops=dict(arrowstyle="-|>"), + ) plt.subplot(212) self.setcurrent(best_idx) @@ -247,7 +255,7 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) for dg in self.dgs[1:]: plt.ioff() line.set_ydata(self.classprobs[dg]) @@ -263,13 +271,13 @@ def animate_classprobs(self, step=False, duration=0., **kwds): minval = np.min(val[1::2]) [r, res] = tempcluster.plottable_residual() plt.plot(*val) - plt.plot(r, minval-np.max(res)+res) + plt.plot(r, minval - np.max(res) + res) dot.set_xdata(bestclass_idx) dot.set_ydata(self.classprobs[dg][bestclass_idx]) plt.ion() plt.draw() if step: - raw_input() + input() if duration > 0: time.sleep(sleeptime) @@ -295,16 +303,16 @@ def classify(self, r, tolerance=0.05): self.classes_idx = {} self.class_tolerance = None - classes = [] # each element is a list of the models (result indices) in the class - classes_idx = {} # given an integer corresponding to a model, return its class - epsqval = {} # holds the squared value of each class' exemplar peaks - ebsqval = {} # holds the squared value of each class exemplar baselines + classes = [] # each element is a list of the models (result indices) in the class + classes_idx = {} # given an integer corresponding to a model, return its class + epsqval = {} # holds the squared value of each class' exemplar peaks + ebsqval = {} # holds the squared value of each class exemplar baselines for i in range(len(self.results)): peaks = self.results[i][1] baseline = self.results[i][2] - bsqval = baseline.value(r)**2 - psqval = [p.value(r)**2 for p in peaks] + bsqval = baseline.value(r) ** 2 + psqval = [p.value(r) ** 2 for p in peaks] added_to_class = False for c in range(len(classes)): @@ -312,17 +320,17 @@ def classify(self, r, tolerance=0.05): exemplar_baseline = self.results[classes[c][0]][2] # Check baseline type and number of parameters - if type(baseline) != type(exemplar_baseline): + if type(baseline) is not type(exemplar_baseline): continue if baseline.npars() != exemplar_baseline.npars(): continue # check peak types and number of parameters - badpeak=False + badpeak = False if len(peaks) != len(exemplar_peaks): continue - for p, ep in zip(peaks,exemplar_peaks): - if type(p) != type(ep): + for p, ep in zip(peaks, exemplar_peaks): + if type(p) is not type(ep): badpeak = True break if p.npars() != ep.npars(): @@ -332,20 +340,19 @@ def classify(self, r, tolerance=0.05): continue # check peak values - current_psqval = [] - for p, ep in zip(psqval,epsqval[c]): - basediff = np.abs(np.sum(p-ep)) - #if basediff > tolerance*np.sum(ep): - if basediff > tolerance*np.sum(ep) or basediff > tolerance*np.sum(p): + for p, ep in zip(psqval, epsqval[c]): + basediff = np.abs(np.sum(p - ep)) + # if basediff > tolerance*np.sum(ep): + if basediff > tolerance * np.sum(ep) or basediff > tolerance * np.sum(p): badpeak = True break if badpeak: continue # check baseline values - basediff = np.abs(np.sum(bsqval-ebsqval[c])) - #if basediff > tolerance*np.sum(ebsqval[c]): - if basediff > tolerance*np.sum(ebsqval[c]) or basediff > tolerance*np.sum(bsqval): + basediff = np.abs(np.sum(bsqval - ebsqval[c])) + # if basediff > tolerance*np.sum(ebsqval[c]): + if basediff > tolerance * np.sum(ebsqval[c]) or basediff > tolerance * np.sum(bsqval): continue # that's all the checks, add to current class @@ -357,7 +364,7 @@ def classify(self, r, tolerance=0.05): if added_to_class is False: # make a new class with the current model as exemplar classes.append([i]) - classnum = len(classes)-1 + classnum = len(classes) - 1 classes_idx[i] = classnum epsqval[classnum] = psqval ebsqval[classnum] = bsqval @@ -374,7 +381,6 @@ def classify(self, r, tolerance=0.05): def makesortedclasses(self): self.sortedclasses = {} - em = self.ppe.error_method for dg in self.dgs: bestinclass = [] @@ -411,7 +417,7 @@ def makesortedclassprobs(self): def dg_key(self, dg_in): """Return the dg value usable as a key nearest to dg_in.""" - idx = (np.abs(self.dgs-dg_in)).argmin() + idx = (np.abs(self.dgs - dg_in)).argmin() return self.dgs[idx] def bestclasses(self, dgs=None): @@ -459,13 +465,15 @@ def plot3dclassprobs(self, **kwds): dGs - Sequence of dG values to plot. Default is all values. highlight - Sequence of dG values to highlight on plot. Default is []. classes - Sequence of indices of classes to plot. Default is all classes. - probfilter - [float1, float2]. Only show classes with maximum probability in given range. Default is [0., 1.] + probfilter - [float1, float2]. Only show classes with maximum probability in given range. + Default is [0., 1.] class_size - Report the size of each class as a "number" or "fraction". Default is "number". norm - A colors normalization for displaying number/fraction of models in class. Default is "auto". If equal to "full" determined by the total number of models. If equal to "auto" determined by the number of models in displayed classes. - cmap - A colormap or registered colormap name. Default is cm.jet. If class_size is "number" and norm is either "auto" - or "full" the map is converted to an indexed colormap. + cmap - A colormap or registered colormap name. Default is cm.jet. + If class_size is "number" and norm is either "auto" + or "full" the map is converted to an indexed colormap. highlight_cmap - A colormap or registered colormap name for coloring highlights. Default is cm.gray. title - True, False, or a string. Defaults to True, which displays some basic information about the graph. p_alpha - Probability graph alpha. (Colorbar remains opaque). Default is 0.7. @@ -486,27 +494,26 @@ def plot3dclassprobs(self, **kwds): from matplotlib import cm, colorbar, colors from matplotlib.collections import PolyCollection - from mpl_toolkits.mplot3d import Axes3D fig = kwds.pop("figure", plt.gcf()) - ax = fig.add_subplot(kwds.pop("subplot",111), projection='3d') + ax = fig.add_subplot(kwds.pop("subplot", 111), projection="3d") - cbkwds = kwds.copy() + kwds.copy() # Resolve keywords (title resolved later) dGs = kwds.pop("dGs", self.dgs) highlight = kwds.pop("highlight", []) classes = kwds.pop("classes", range(len(self.classes))) - probfilter = kwds.pop("probfilter", [0.,1.]) + probfilter = kwds.pop("probfilter", [0.0, 1.0]) class_size = kwds.pop("class_size", "number") norm = kwds.pop("norm", "auto") cmap = kwds.pop("cmap", cm.jet) highlight_cmap = kwds.pop("highlight_cmap", cm.gray) title = kwds.pop("title", True) p_alpha = kwds.pop("p_alpha", 0.7) - scale = kwds.pop("scale", 1.) + scale = kwds.pop("scale", 1.0) - xs = dGs*scale + xs = dGs * scale verts = [] zs = [] zlabels = [] @@ -515,104 +522,113 @@ def plot3dclassprobs(self, **kwds): maxys = np.max(ys) if maxys >= probfilter[0] and maxys <= probfilter[1]: - p0, p1 = ((xs[0], 0), (xs[-1],0)) # points to close the vertices - verts.append(np.concatenate([[p0], zip(xs,ys), [p1], [p0]])) + p0, p1 = ((xs[0], 0), (xs[-1], 0)) # points to close the vertices + verts.append(np.concatenate([[p0], zip(xs, ys), [p1], [p0]])) zlabels.append(i) - ### Define face colors + # Define face colors fc = np.array([len(self.classes[z]) for z in zlabels]) - if class_size is "fraction": - fc = fc/float(len(self.results)) + if class_size == "fraction": + fc = fc / float(len(self.results)) # Index the colormap if necessary - if class_size is "number": - if norm is "auto": - indexedcolors = cmap(np.linspace(0., 1., np.max(fc))) + if class_size == "number": + if norm == "auto": + indexedcolors = cmap(np.linspace(0.0, 1.0, np.max(fc))) cmap = colors.ListedColormap(indexedcolors) - elif norm is "full": - indexedcolors = cmap(np.linspace(0., 1., len(self.results))) + elif norm == "full": + indexedcolors = cmap(np.linspace(0.0, 1.0, len(self.results))) cmap = colors.ListedColormap(indexedcolors) # A user-specified norm cannot be used to index a colormap. # Create proper norms for "auto" and "full" types. - if norm is "auto": - if class_size is "number": + if norm == "auto": + if class_size == "number": mic = np.min(fc) mac = np.max(fc) nc = mac - mic + 1 - norm = colors.BoundaryNorm(np.linspace(mic, mac+1, nc+1), nc) - if class_size is "fraction": + norm = colors.BoundaryNorm(np.linspace(mic, mac + 1, nc + 1), nc) + if class_size == "fraction": norm = colors.Normalize() norm.autoscale(fc) - elif norm is "full": + elif norm == "full": mcolor = len(self.results) - if class_size is "number": - norm = colors.BoundaryNorm(np.linspace(0, mcolor+1, mcolor+2), mcolor+1) - if class_size is "fraction": - norm = colors.Normalize(0., 1.) + if class_size == "number": + norm = colors.BoundaryNorm(np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1) + if class_size == "fraction": + norm = colors.Normalize(0.0, 1.0) zs = np.arange(len(zlabels)) poly = PolyCollection(verts, facecolors=cmap(norm(fc)), closed=False) poly.set_alpha(p_alpha) - cax = ax.add_collection3d(poly, zs=zs, zdir='y') + ax.add_collection3d(poly, zs=zs, zdir="y") # Highlight values of interest color_idx = np.linspace(0, 1, len(highlight)) for dG, ci in zip(highlight, color_idx): for z_logical, z_plot in zip(zlabels, zs): - ax.plot([dG, dG], [z_plot, z_plot], [0, self.classprobs[dG][z_logical]], color=highlight_cmap(ci), alpha=p_alpha) - - ax.set_xlabel('dG') - ax.set_xlim3d(dGs[0]*scale, dGs[-1]*scale) - ax.set_ylabel('Class') + ax.plot( + [dG, dG], + [z_plot, z_plot], + [0, self.classprobs[dG][z_logical]], + color=highlight_cmap(ci), + alpha=p_alpha, + ) + + ax.set_xlabel("dG") + ax.set_xlim3d(dGs[0] * scale, dGs[-1] * scale) + ax.set_ylabel("Class") ax.set_ylim3d(zs[0], zs[-1]) ax.set_yticks(zs) ax.set_yticklabels([str(z) for z in zlabels]) - ax.set_zlabel('Akaike probability') + ax.set_zlabel("Akaike probability") ax.set_zlim3d(0, 1) if title is True: - title = "Class probabilities\n\ + title = ( + "Class probabilities\n\ Max probabilities in %s\n\ - %i/%i classes with %i/%i models displayed"\ - %(probfilter, - len(zs), len(self.classes), - np.sum([len(self.classes[z]) for z in zlabels]), len(self.results) ) + %i/%i classes with %i/%i models displayed" + % ( + probfilter, + len(zs), + len(self.classes), + np.sum([len(self.classes[z]) for z in zlabels]), + len(self.results), + ) + ) if title is not False: - figtitle = fig.suptitle(title) + fig.suptitle(title) # Add colorbar if "cbpos" in kwds: cbpos = kwds.pop("cbpos") - aspect = cbpos[3]/cbpos[2] - plt.tight_layout() # do it before cbaxis, so colorbar is ignored. + plt.tight_layout() # do it before cbaxis, so colorbar is ignored. transAtoF = ax.transAxes + fig.transFigure.inverted() rect = transforms.Bbox.from_bounds(*cbpos).transformed(transAtoF).bounds cbaxis = fig.add_axes(rect) # Remove all colorbar.make_axes keywords except orientation - kwds = eatkwds("fraction", "pad", "shrink", "aspect", - "anchor", "panchor", **kwds) + kwds = eatkwds("fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds) else: kwds.setdefault("shrink", 0.75) # In matplotlib 1.1.0 make_axes_gridspec ignores anchor and panchor keywords. # Eat these keywords for now. kwds = eatkwds("anchor", "panchor", **kwds) - cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout - plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored + cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout + plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored cb = colorbar.ColorbarBase(cbaxis, cmap=cmap, norm=norm, **kwds) - if class_size is "number": + if class_size == "number": cb.set_label("Models in class") - elif class_size is "fraction": + elif class_size == "fraction": cb.set_label("Fraction of models in class") - - return {"fig":fig, "axis":ax, "cb":cb, "cbaxis": cbaxis} + return {"fig": fig, "axis": ax, "cb": cb, "cbaxis": cbaxis} def get_model(self, dG, **kwds): """Return index of best model of best class at given dG. @@ -625,7 +641,8 @@ def get_model(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ corder = kwds.pop("corder", 0) morder = kwds.pop("morder", 0) if "cls" in kwds: @@ -634,9 +651,9 @@ def get_model(self, dG, **kwds): cls = self.get_class(dG, corder=corder) if cls is None: - return self.sortedprobs[dG][-1-morder] + return self.sortedprobs[dG][-1 - morder] else: - return self.sortedclasses[dG][cls][-1-morder] + return self.sortedclasses[dG][cls][-1 - morder] def get_class(self, dG, **kwds): """Return index of best class at given dG. @@ -646,9 +663,10 @@ def get_class(self, dG, **kwds): Keywords: - corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default).""" + corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). + """ corder = kwds.pop("corder", 0) - return self.sortedclassprobs[dG][-1-corder] # index of corderth best class + return self.sortedclassprobs[dG][-1 - corder] # index of corderth best class def get_prob(self, dG, **kwds): """Return Akaike probability of best model of best class at given dG. @@ -661,7 +679,8 @@ def get_prob(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) if "cls" in kwds and kwds["cls"] is None: return self.aicprobs[dG][idx] @@ -680,7 +699,8 @@ def get_nfree(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) model = self.results[idx][1] baseline = self.results[idx][2] @@ -697,7 +717,8 @@ def get_aic(self, dG, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ idx = self.get_model(dG, **kwds) return self.aics[dG][idx].stat @@ -714,13 +735,16 @@ def get(self, dG, *args, **kwds): corder - Which class to get based on AIC. Ordered from best to worst from 0 (the default). morder - Which model to get based on AIC. Ordered from best to worst from 0 (the default). Returns a model from a class, or from the collection of all models if classes are ignored. - cls - Override corder with a specific class index, or None to ignore classes entirely.""" - fdict = {"aic": self.get_aic, - "class": self.get_class, - "dg": lambda x: x, - "model": self.get_model, - "nfree": self.get_nfree, - "prob": self.get_prob} + cls - Override corder with a specific class index, or None to ignore classes entirely. + """ + fdict = { + "aic": self.get_aic, + "class": self.get_class, + "dg": lambda x: x, + "model": self.get_model, + "nfree": self.get_nfree, + "prob": self.get_prob, + } values = [] for a in args: values.append(fdict[a].__call__(dG, **kwds)) diff --git a/diffpy/srmise/pdfdataset.py b/diffpy/srmise/pdfdataset.py index 2bdd9bf..8671e28 100644 --- a/diffpy/srmise/pdfdataset.py +++ b/diffpy/srmise/pdfdataset.py @@ -20,7 +20,6 @@ """ -import copy import os.path import re import time @@ -31,6 +30,7 @@ class PDFComponent(object): """Common base class.""" + def __init__(self, name): """initialize @@ -38,13 +38,14 @@ def __init__(self, name): """ self.name = name - def close ( self, force = False ): + def close(self, force=False): """close myself force -- if forcibly (no wait) """ pass + class PDFDataSet(PDFComponent): """PDFDataSet is a class for experimental PDF data. @@ -75,9 +76,21 @@ class PDFDataSet(PDFComponent): refinableVars -- set (dict) of refinable variable names. """ - persistentItems = [ 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', 'qmax', - 'qdamp', 'qbroad', 'dscale', 'rmin', 'rmax', 'metadata' ] - refinableVars = dict.fromkeys(('qdamp', 'qbroad', 'dscale')) + persistentItems = [ + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "metadata", + ] + refinableVars = dict.fromkeys(("qdamp", "qbroad", "dscale")) def __init__(self, name): """Initialize. @@ -94,7 +107,7 @@ def clear(self): self.Gobs = [] self.drobs = [] self.dGobs = [] - self.stype = 'X' + self.stype = "X" # user must specify qmax to get termination ripples self.qmax = 0.0 self.qdamp = 0.001 @@ -149,16 +162,17 @@ def read(self, filename): returns self """ try: - self.readStr(open(filename,'rb').read()) - except PDFDataFormatError, err: + self.readStr(open(filename, "rb").read()) + except PDFDataFormatError as err: basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) self.filename = os.path.abspath(filename) return self - def readStr(self, datastring): """read experimental PDF data from a string @@ -168,15 +182,15 @@ def readStr(self, datastring): """ self.clear() # useful regex patterns: - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} # find where does the data start - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", datastring, re.M) # start_data is position where the first data line starts if res: start_data = res.end() else: # find line that starts with a floating point number - regexp = r'^\s*%(f)s' % rx + regexp = r"^\s*%(f)s" % rx res = re.search(regexp, datastring, re.M) if res: start_data = res.start() @@ -186,18 +200,18 @@ def readStr(self, datastring): databody = datastring[start_data:].strip() # find where the metadata starts - metadata = '' - res = re.search(r'^#+\ +metadata\b\n', header, re.M) + metadata = "" + res = re.search(r"^#+\ +metadata\b\n", header, re.M) if res: - metadata = header[res.end():] - header = header[:res.start()] + metadata = header[res.end() :] + header = header[: res.start()] # parse header # stype - if re.search('(x-?ray|PDFgetX)', header, re.I): - self.stype = 'X' - elif re.search('(neutron|PDFgetN)', header, re.I): - self.stype = 'N' + if re.search("(x-?ray|PDFgetX)", header, re.I): + self.stype = "X" + elif re.search("(neutron|PDFgetN)", header, re.I): + self.stype = "N" # qmax regexp = r"\bqmax *= *(%(f)s)\b" % rx res = re.search(regexp, header, re.I) @@ -227,12 +241,12 @@ def readStr(self, datastring): regexp = r"\b(?:temp|temperature|T)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['temperature'] = float(res.groups()[0]) + self.metadata["temperature"] = float(res.groups()[0]) # doping regexp = r"\b(?:x|doping)\ *=\ *(%(f)s)\b" % rx res = re.search(regexp, header) if res: - self.metadata['doping'] = float(res.groups()[0]) + self.metadata["doping"] = float(res.groups()[0]) # parsing gerneral metadata if metadata: @@ -241,12 +255,12 @@ def readStr(self, datastring): res = re.search(regexp, metadata, re.M) if res: self.metadata[res.groups()[0]] = float(res.groups()[1]) - metadata = metadata[res.end():] + metadata = metadata[res.end() :] else: break # read actual data - robs, Gobs, drobs, dGobs - inf_or_nan = re.compile('(?i)^[+-]?(NaN|Inf)\\b') + inf_or_nan = re.compile("(?i)^[+-]?(NaN|Inf)\\b") has_drobs = True has_dGobs = True # raise PDFDataFormatError if something goes wrong @@ -257,15 +271,13 @@ def readStr(self, datastring): self.robs.append(float(v[0])) self.Gobs.append(float(v[1])) # drobs is valid if all values are defined and positive - has_drobs = (has_drobs and - len(v) > 2 and not inf_or_nan.match(v[2])) + has_drobs = has_drobs and len(v) > 2 and not inf_or_nan.match(v[2]) if has_drobs: v2 = float(v[2]) has_drobs = v2 > 0.0 self.drobs.append(v2) # dGobs is valid if all values are defined and positive - has_dGobs = (has_dGobs and - len(v) > 3 and not inf_or_nan.match(v[3])) + has_dGobs = has_dGobs and len(v) > 3 and not inf_or_nan.match(v[3]) if has_dGobs: v3 = float(v[3]) has_dGobs = v3 > 0.0 @@ -274,15 +286,16 @@ def readStr(self, datastring): self.drobs = len(self.robs) * [0.0] if not has_dGobs: self.dGobs = len(self.robs) * [0.0] - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise PDFDataFormatError(err) self.rmin = self.robs[0] self.rmax = self.robs[-1] - if not has_drobs: self.drobs = len(self.robs) * [0.0] - if not has_dGobs: self.dGobs = len(self.robs) * [0.0] + if not has_drobs: + self.drobs = len(self.robs) * [0.0] + if not has_dGobs: + self.dGobs = len(self.robs) * [0.0] return self - def write(self, filename): """Write experimental PDF data to a file. @@ -291,7 +304,7 @@ def write(self, filename): No return value. """ bytes = self.writeStr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return @@ -303,38 +316,40 @@ def writeStr(self): """ lines = [] # write metadata - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - '##### PDFgui' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "##### PDFgui", + ] + ) # stype - if self.stype == 'X': - lines.append('stype=X x-ray scattering') - elif self.stype == 'N': - lines.append('stype=N neutron scattering') + if self.stype == "X": + lines.append("stype=X x-ray scattering") + elif self.stype == "N": + lines.append("stype=N neutron scattering") # qmax if self.qmax == 0: - qmax_line = 'qmax=0 correction not applied' + qmax_line = "qmax=0 correction not applied" else: - qmax_line = 'qmax=%.2f' % self.qmax + qmax_line = "qmax=%.2f" % self.qmax lines.append(qmax_line) # qdamp - lines.append('qdamp=%g' % self.qdamp) + lines.append("qdamp=%g" % self.qdamp) # qbroad - lines.append('qbroad=%g' % self.qbroad) + lines.append("qbroad=%g" % self.qbroad) # dscale - lines.append('dscale=%g' % self.dscale) + lines.append("dscale=%g" % self.dscale) # metadata if len(self.metadata) > 0: - lines.append('# metadata') + lines.append("# metadata") for k, v in self.metadata.iteritems(): - lines.append( "%s=%s" % (k,v) ) + lines.append("%s=%s" % (k, v)) # write data: - lines.append('##### start data') - lines.append('#L r(A) G(r) d_r d_Gr') + lines.append("##### start data") + lines.append("#L r(A) G(r) d_r d_Gr") for i in range(len(self.robs)): - lines.append('%g %g %g %g' % \ - (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) ) + lines.append("%g %g %g %g" % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i])) # that should be it datastring = "\n".join(lines) + "\n" return datastring @@ -351,48 +366,62 @@ def copy(self, other=None): other.clear() # some attributes can be assigned, e.g., robs, Gobs, drobs, dGobs are # constant so they can be shared between copies. - assign_attributes = ( 'robs', 'Gobs', 'drobs', 'dGobs', 'stype', - 'qmax', 'qdamp', 'qbroad', 'dscale', - 'rmin', 'rmax', 'filename' ) + assign_attributes = ( + "robs", + "Gobs", + "drobs", + "dGobs", + "stype", + "qmax", + "qdamp", + "qbroad", + "dscale", + "rmin", + "rmax", + "filename", + ) # for others we will assign a copy - copy_attributes = ( 'metadata', ) + copy_attributes = ("metadata",) for a in assign_attributes: setattr(other, a, getattr(self, a)) import copy + for a in copy_attributes: setattr(other, a, copy.deepcopy(getattr(self, a))) return other + # End of class PDFDataSet class PDFDataFormatError(Exception): - """Exception class marking failure to proccess PDF data string. - """ + """Exception class marking failure to proccess PDF data string.""" + pass # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import sys + filename = sys.argv[1] dataset = PDFDataSet("test") dataset.read(filename) - print "== metadata ==" + print("== metadata ==") for k, v in dataset.metadata.iteritems(): - print k, "=", v - print "== data members ==" + print(k, "=", v) + print("== data members ==") for k, v in dataset.__dict__.iteritems(): - if k in ('metadata', 'robs', 'Gobs', 'drobs', 'dGobs') or k[0] == "_": + if k in ("metadata", "robs", "Gobs", "drobs", "dGobs") or k[0] == "_": continue - print k, "=", v - print "== robs Gobs drobs dGobs ==" + print(k, "=", v) + print("== robs Gobs drobs dGobs ==") for i in range(len(dataset.robs)): - print dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i] - print "== writeStr() ==" - print dataset.writeStr() - print "== datasetcopy.writeStr() ==" + print(dataset.robs[i], dataset.Gobs[i], dataset.drobs[i], dataset.dGobs[i]) + print("== writeStr() ==") + print(dataset.writeStr()) + print("== datasetcopy.writeStr() ==") datasetcopy = dataset.copy() - print datasetcopy.writeStr() + print(datasetcopy.writeStr()) # End of file diff --git a/diffpy/srmise/pdfpeakextraction.py b/diffpy/srmise/pdfpeakextraction.py index 8f9ecd6..ab5f03d 100644 --- a/diffpy/srmise/pdfpeakextraction.py +++ b/diffpy/srmise/pdfpeakextraction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,20 +16,24 @@ import os.path import re -import matplotlib.pyplot as plt import numpy as np +from diffpy.srmise import srmiselog from diffpy.srmise.modelcluster import ModelCluster, ModelCovariance # from diffpy.pdfgui.control.pdfdataset import PDFDataSet from diffpy.srmise.pdfdataset import PDFDataSet from diffpy.srmise.peakextraction import PeakExtraction -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import ( + SrMiseDataFormatError, + SrMiseError, + SrMiseQmaxError, + SrMiseStaticOwnerError, + SrMiseUndefinedCovarianceError, +) logger = logging.getLogger("diffpy.srmise") -from diffpy.srmise import srmiselog - class PDFPeakExtraction(PeakExtraction): """Class for peak extraction of peaks from the PDF. @@ -234,8 +239,7 @@ def resampledata(self, dr, **kwds): # Not a robust epsilon test, but all physical Nyquist rates in same oom. if dr - dr_nyquist > eps: logger.warning( - "Resampling at %s, below Nyquist rate of %s. Information will be lost!" - % (dr, dr_nyquist) + "Resampling at %s, below Nyquist rate of %s. Information will be lost!" % (dr, dr_nyquist) ) r = np.arange(max(self.x[0], self.rng[0]), min(self.x[-1], self.rng[1]), dr) @@ -440,9 +444,7 @@ def extract(self, **kwds): break else: - ext = ModelCluster( - ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf - ) + ext = ModelCluster(ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf) ext.prune() logger.info("Model after resampling and termination ripples:\n%s", ext) @@ -456,10 +458,8 @@ def extract(self, **kwds): try: logger.info(str(cov)) # logger.info("Correlations > .8:\n%s", "\n".join(str(c) for c in cov.correlationwarning(.8))) - except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + except SrMiseUndefinedCovarianceError: + logger.warn("Covariance not defined for final model. Fit may not have converged.") logger.info(str(ext)) # Update calculated instance variables @@ -531,10 +531,8 @@ def fit(self, **kwds): logger.info(str(ext)) try: logger.info(str(cov)) - except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + except SrMiseUndefinedCovarianceError: + logger.warn("Covariance not defined for final model. Fit may not have converged.") # Update calculated instance variables self.extraction_type = "fit" @@ -703,13 +701,8 @@ def writepwastr(self, comments): lines.append("## Model Quality") # Quality of fit - lines.append( - "# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat - ) - lines.append( - "# Free parameters in extracted peaks: %s" - % self.extracted.model.npars(count_fixed=False) - ) + lines.append("# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat) + lines.append("# Free parameters in extracted peaks: %s" % self.extracted.model.npars(count_fixed=False)) if self.baseline is not None: fblpars = self.baseline.npars(count_fixed=False) else: @@ -830,15 +823,11 @@ def resample(orig_r, orig_y, new_r): dr = (orig_r[-1] - orig_r[0]) / (n - 1) if new_r[0] < orig_r[0]: - logger.warning( - "Resampling outside original grid: %s (requested) < %s (original)" - % (new_r[0], orig_r[0]) - ) + logger.warning("Resampling outside original grid: %s (requested) < %s (original)" % (new_r[0], orig_r[0])) if new_r[-1] > orig_r[-1]: logger.warning( - "Resampling outside original grid: %s (requested) > %s (original)" - % (new_r[-1], orig_r[-1]) + "Resampling outside original grid: %s (requested) > %s (original)" % (new_r[-1], orig_r[-1]) ) new_y = new_r * 0.0 @@ -913,7 +902,7 @@ def find_qmax(r, y, showgraphs=False): plt.show() plt.ioff() - raw_input() + input() return (qmax, dq) diff --git a/diffpy/srmise/peakextraction.py b/diffpy/srmise/peakextraction.py index 73da8a2..4693d66 100644 --- a/diffpy/srmise/peakextraction.py +++ b/diffpy/srmise/peakextraction.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -58,12 +59,22 @@ def __init__(self, newvars=[]): Parameters newvars: Sequence of strings that represent additional extraction parameters.""" self.clear() - self.extractvars = dict.fromkeys(('effective_dy', 'rng', 'pf', 'initial_peaks', 'baseline', 'cres', 'error_method')) + self.extractvars = dict.fromkeys( + ( + "effective_dy", + "rng", + "pf", + "initial_peaks", + "baseline", + "cres", + "error_method", + ) + ) for k in newvars: if k not in self.extractvars: self.extractvars[k] = None else: - emsg = "Extraction variable %s conflicts with existing variable" %k + emsg = "Extraction variable %s conflicts with existing variable" % k raise ValueError(emsg) return @@ -104,7 +115,7 @@ def setdata(self, x, y, dx=None, dy=None): if len(self.x) != len(self.dx) or len(self.x) != len(self.dy): emsg = "Sequences dx and dy (if present) must have the same length as x" raise ValueError(emsg) - #self.defaultvars() + # self.defaultvars() return def setvars(self, quiet=False, **kwds): @@ -120,7 +131,8 @@ def setvars(self, quiet=False, **kwds): baseline: Baseline instance or BaselineFunction instance (use built-in estimation) error_method: ErrorEvaluator subclass instance used to compare models (default AIC) initial_peaks: Peaks instance. These peaks are present at the start of extraction. - rng: Sequence specifying the least and greatest x-values over which to extract peaks.""" + rng: Sequence specifying the least and greatest x-values over which to extract peaks. + """ for k, v in kwds.iteritems(): if k in self.extractvars: if quiet: @@ -159,24 +171,24 @@ def defaultvars(self, *args): Note that the default values of very important parameters like the uncertainty and clustering resolution are crude guesses at best. """ - if self.cres is None or 'cres' in args: - self.cres = 4*(self.x[-1] - self.x[0])/len(self.x) + if self.cres is None or "cres" in args: + self.cres = 4 * (self.x[-1] - self.x[0]) / len(self.x) - if self.effective_dy is None or 'effective_dy' in args: + if self.effective_dy is None or "effective_dy" in args: if np.all(self.dy > 0): # That is, all points positive uncertainty. self.effective_dy = self.dy else: # A terribly crude guess - self.effective_dy = .05*(np.max(self.y)-np.min(self.y))*np.ones(len(self.x)) + self.effective_dy = 0.05 * (np.max(self.y) - np.min(self.y)) * np.ones(len(self.x)) elif np.isscalar(self.effective_dy) and self.effective_dy > 0: - self.effective_dy = self.effective_dy*np.ones(len(self.x)) + self.effective_dy = self.effective_dy * np.ones(len(self.x)) if self.pf is None or "pf" in args: from diffpy.srmise.peaks import GaussianOverR # TODO: Make a more useful default. - self.pf = [GaussianOverR(self.x[-1]-self.x[0])] + self.pf = [GaussianOverR(self.x[-1] - self.x[0])] if self.rng is None or "rng" in args: self.rng = [self.x[0], self.x[-1]] @@ -192,37 +204,38 @@ def defaultvars(self, *args): s = self.getrangeslice() epars = self.baseline.estimate_parameters(self.x[s], self.y[s]) self.baseline = self.baseline.actualize(epars, "internal") - logger.info("Estimating baseline: %s" %self.baseline) + logger.info("Estimating baseline: %s" % self.baseline) except (NotImplementedError, SrMiseEstimationError): logger.error("Could not estimate baseline from provided BaselineFunction, trying default values.") self.baseline = None if self.baseline is None or "baseline" in args: from diffpy.srmise.baselines import Polynomial - bl = Polynomial(degree = -1) + + bl = Polynomial(degree=-1) self.baseline = bl.actualize(np.array([]), "internal") if self.error_method is None or "error_method" in args: from diffpy.srmise.modelevaluators import AIC + self.error_method = AIC if self.initial_peaks is None or "initial_peaks" in args: self.initial_peaks = Peaks() - def __str__(self): """Return string summary of PeakExtraction.""" out = [] for k in self.extractvars: - out.append("%s: %s" %(k, getattr(self, k))) + out.append("%s: %s" % (k, getattr(self, k))) if self.extracted is not None: - out.append("Extraction type: %s" %self.extraction_type) + out.append("Extraction type: %s" % self.extraction_type) out.append("--- Extracted ---") out.append(str(self.extracted)) else: out.append("No extracted peaks exist.") - return '\n'.join(out)+'\n' + return "\n".join(out) + "\n" def plot(self, **kwds): """Convenience function to plot data and extracted peaks with matplotlib. @@ -242,10 +255,18 @@ def plot(self, **kwds): x = self.x[rangeslice] y = self.y[rangeslice] dy = self.dy[rangeslice] - mcluster = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, self.error_method, self.pf) + mcluster = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) plt.plot(*mcluster.plottable(kwds)) - def read(self, filename): """load PeakExtraction object from file @@ -254,12 +275,14 @@ def read(self, filename): returns self """ try: - self.readstr(open(filename,'rb').read()) - except SrMiseDataFormatError, err: + self.readstr(open(filename, "rb").read()) + except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) return self @@ -288,126 +311,126 @@ def readstr(self, datastring): safebf = [] # find where the results section starts - res = re.search(r'^#+ Results\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ Results\s*(?:#.*\s+)*", datastring, re.M) if res: - results = datastring[res.end():].strip() - header = datastring[:res.start()] + results = datastring[res.end() :].strip() + header = datastring[: res.start()] # find data section, and what information it contains - res = re.search(r'^#+ start data\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ start data\s*(?:#.*\s+)*", header, re.M) if res: - start_data = header[res.end():].strip() - start_data_info = header[res.start():res.end()] - header = header[:res.start()] - res = re.search(r'^(#+L.*)$', start_data_info, re.M) + start_data = header[res.end() :].strip() + start_data_info = header[res.start() : res.end()] + header = header[: res.start()] + res = re.search(r"^(#+L.*)$", start_data_info, re.M) if res: - start_data_info = start_data_info[res.start():res.end()].strip() + start_data_info = start_data_info[res.start() : res.end()].strip() hasx = False hasy = False hasdx = False hasdy = False hasedy = False - res = re.search(r'\bx\b', start_data_info) + res = re.search(r"\bx\b", start_data_info) if res: hasx = True - res = re.search(r'\by\b', start_data_info) + res = re.search(r"\by\b", start_data_info) if res: hasy = True - res = re.search(r'\bdx\b', start_data_info) + res = re.search(r"\bdx\b", start_data_info) if res: hasdx = True - res = re.search(r'\bdy\b', start_data_info) + res = re.search(r"\bdy\b", start_data_info) if res: hasdy = True - res = re.search(r'\edy\b', start_data_info) + res = re.search(r"\edy\b", start_data_info) if res: hasedy = True - res = re.search(r'^#+ Metadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ Metadata\s*(?:#.*\s+)*", header, re.M) if res: - metadata = header[res.end():].strip() - header = header[:res.start()] + metadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ SrMiseMetadata\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ SrMiseMetadata\s*(?:#.*\s+)*", header, re.M) if res: - srmisemetadata = header[res.end():].strip() - header = header[:res.start()] + srmisemetadata = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ InitialPeaks.*$', header, re.M) + res = re.search(r"^#+ InitialPeaks.*$", header, re.M) if res: - initial_peaks = header[res.end():].strip() - header = header[:res.start()] + initial_peaks = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineObject\s*(?:#.*\s+)*', header, re.M) + res = re.search(r"^#+ BaselineObject\s*(?:#.*\s+)*", header, re.M) if res: - baselineobject = header[res.end():].strip() - header = header[:res.start()] + baselineobject = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ PeakFunctions.*$', header, re.M) + res = re.search(r"^#+ PeakFunctions.*$", header, re.M) if res: - peakfunctions = header[res.end():].strip() - header = header[:res.start()] + peakfunctions = header[res.end() :].strip() + header = header[: res.start()] - res = re.search(r'^#+ BaselineFunctions.*$', header, re.M) + res = re.search(r"^#+ BaselineFunctions.*$", header, re.M) if res: - baselinefunctions = header[res.end():].strip() - header = header[:res.start()] + baselinefunctions = header[res.end() :].strip() + header = header[: res.start()] - ### Instantiating baseline functions - res = re.split(r'(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*', baselinefunctions) + # Instantiating baseline functions + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: safebf.append(BaseFunction.factory(s, safebf)) - ### Instantiating peak functions - res = re.split(r'(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*', peakfunctions) + # Instantiating peak functions + res = re.split(r"(?m)^#+ PeakFunction \d+\s*(?:#.*\s+)*", peakfunctions) for s in res[1:]: safepf.append(BaseFunction.factory(s, safepf)) - ### Instantiating Baseline object - if re.match(r'^None$', baselineobject): + # Instantiating Baseline object + if re.match(r"^None$", baselineobject): self.baseline = None - elif re.match(r'^\d+$', baselineobject): + elif re.match(r"^\d+$", baselineobject): self.baseline = safebf[int(baselineobject)] else: self.baseline = Baseline.factory(baselineobject, safebf) - ### Instantiating initial peaks - if re.match(r'^None$', initial_peaks): + # Instantiating initial peaks + if re.match(r"^None$", initial_peaks): self.initial_peaks = None else: self.initial_peaks = Peaks() - res = re.split(r'(?m)^#+ InitialPeak\s*(?:#.*\s+)*', initial_peaks) + res = re.split(r"(?m)^#+ InitialPeak\s*(?:#.*\s+)*", initial_peaks) for s in res[1:]: self.initial_peaks.append(Peak.factory(s, safepf)) - ### Instantiating srmise metatdata + # Instantiating srmise metatdata # pf - res = re.search(r'^pf=(.*)$', srmisemetadata, re.M) + res = re.search(r"^pf=(.*)$", srmisemetadata, re.M) self.pf = eval(res.groups()[0].strip()) if self.pf is not None: self.pf = [safepf[i] for i in self.pf] # cres - rx = { 'f' : r'[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?' } + rx = {"f": r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?"} regexp = r"\bcres *= *(%(f)s)\b" % rx res = re.search(regexp, srmisemetadata, re.I) self.cres = float(res.groups()[0]) # error_method - res = re.search(r'^ModelEvaluator=(.*)$', srmisemetadata, re.M) + res = re.search(r"^ModelEvaluator=(.*)$", srmisemetadata, re.M) __import__("diffpy.srmise.modelevaluators") module = sys.modules["diffpy.srmise.modelevaluators"] self.error_method = getattr(module, res.groups()[0].strip()) # range - res = re.search(r'^Range=(.*)$', srmisemetadata, re.M) + res = re.search(r"^Range=(.*)$", srmisemetadata, re.M) self.rng = eval(res.groups()[0].strip()) - ### Instantiating other metadata + # Instantiating other metadata self.readmetadata(metadata) - ### Instantiating start data + # Instantiating start data # read actual data - x, y, dx, dy, plus effective_dy arrays = [] if hasx: @@ -440,10 +463,10 @@ def readstr(self, datastring): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ("Number of value fields does not match that given by '%s'" %start_data_info) + emsg = "Number of value fields does not match that given by '%s'" % start_data_info for a, v in zip(arrays, line.split()): a.append(float(v)) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(str(err)) if hasx: self.x = np.array(self.x) @@ -456,15 +479,14 @@ def readstr(self, datastring): if hasedy: self.effective_dy = np.array(self.effective_dy) - - ### Instantiating results - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', results, re.M) + # Instantiating results + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", results, re.M) if res: - mc = results[res.end():].strip() - results = results[:res.start()] + mc = results[res.end() :].strip() + results = results[: res.start()] # extraction type - res = re.search(r'^extraction_type=(.*)$', results, re.M) + res = re.search(r"^extraction_type=(.*)$", results, re.M) if res: self.extraction_type = eval(res.groups()[0].strip()) else: @@ -472,7 +494,7 @@ def readstr(self, datastring): raise SrMiseDataFormatError(emsg) # extracted - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): self.extracted = None else: self.extracted = ModelCluster.factory(mc, pfbaselist=safepf, blfbaselist=safebf) @@ -483,12 +505,11 @@ def write(self, filename): Parameters filename: the name of the file to write""" bytes = self.writestr() - f = open(filename, 'w') + f = open(filename, "w") f.write(bytes) f.close() return - def writestr(self): """Return string representation of PeakExtraction object.""" import time @@ -500,11 +521,14 @@ def writestr(self): lines = [] # Header - lines.extend([ - 'History written: ' + time.ctime(), - 'produced by ' + getuser(), - 'diffpy.srmise version %s' %__version__, - '##### PDF Peak Extraction' ]) + lines.extend( + [ + "History written: " + time.ctime(), + "produced by " + getuser(), + "diffpy.srmise version %s" % __version__, + "##### PDF Peak Extraction", + ] + ) # Generate list of PeakFunctions and BaselineFunctions # so I can refer to them by index when necessary. @@ -517,7 +541,7 @@ def writestr(self): if self.baseline is not None: if isinstance(self.baseline, BaseFunction): allbf.append(self.baseline) - else: # should be a ModelPart + else: # should be a ModelPart allbf.append(self.baseline.owner()) if self.extracted is not None: allpf.extend(self.extracted.peak_funcs) @@ -532,13 +556,13 @@ def writestr(self): # Indexed baseline functions lines.append("## BaselineFunctions") for i, bf in enumerate(safebf): - lines.append('# BaselineFunction %s' %i) + lines.append("# BaselineFunction %s" % i) lines.append(bf.writestr(safebf)) # Indexed peak functions lines.append("## PeakFunctions") for i, pf in enumerate(safepf): - lines.append('# PeakFunction %s' %i) + lines.append("# PeakFunction %s" % i) lines.append(pf.writestr(safepf)) # Baseline @@ -546,7 +570,7 @@ def writestr(self): if self.baseline is None: lines.append("None") elif self.baseline in safebf: - lines.append('%s' %repr(safebf.index(self.baseline))) + lines.append("%s" % repr(safebf.index(self.baseline))) else: lines.append(self.baseline.writestr(safebf)) @@ -556,34 +580,34 @@ def writestr(self): lines.append("None") else: for ip in self.initial_peaks: - lines.append('# InitialPeak') + lines.append("# InitialPeak") lines.append(ip.writestr(safepf)) - lines.append('# SrMiseMetadata') + lines.append("# SrMiseMetadata") # Extractable peak types if self.pf is None: lines.append("pf=None") else: - lines.append("pf=%s" %repr([safepf.index(p) for p in self.pf])) + lines.append("pf=%s" % repr([safepf.index(p) for p in self.pf])) # Clustering resolution - lines.append('cres=%g' %self.cres) + lines.append("cres=%g" % self.cres) # Model evaluator if self.error_method is None: - lines.append('ModelEvaluator=None') + lines.append("ModelEvaluator=None") else: - lines.append('ModelEvaluator=%s' %self.error_method.__name__) + lines.append("ModelEvaluator=%s" % self.error_method.__name__) # Extraction range - lines.append("Range=%s" %repr(self.rng)) + lines.append("Range=%s" % repr(self.rng)) # Everything not defined by PeakExtraction - lines.append('# Metadata') + lines.append("# Metadata") lines.append(self.writemetadata()) # Raw data used in extraction. - lines.append('##### start data') - line = ['#L'] + lines.append("##### start data") + line = ["#L"] numlines = 0 if self.x is not None: line.append("x") @@ -604,29 +628,28 @@ def writestr(self): for i in range(numlines): line = [] if self.x is not None: - line.append("%g" %self.x[i]) + line.append("%g" % self.x[i]) if self.y is not None: - line.append("%g" %self.y[i]) + line.append("%g" % self.y[i]) if self.dx is not None: - line.append("%g" %self.dx[i]) + line.append("%g" % self.dx[i]) if self.dy is not None: - line.append("%g" %self.dy[i]) + line.append("%g" % self.dy[i]) if self.effective_dy is not None: - line.append("%g" %self.effective_dy[i]) + line.append("%g" % self.effective_dy[i]) lines.append(" ".join(line)) - - ### Calculated members - lines.append('##### Results') - lines.append('extraction_type=%s' %repr(self.extraction_type)) + # Calculated members + lines.append("##### Results") + lines.append("extraction_type=%s" % repr(self.extraction_type)) lines.append("### ModelCluster") if self.extracted is None: - lines.append('None') + lines.append("None") else: lines.append(self.extracted.writestr(pfbaselist=safepf, blfbaselist=safebf)) - datastring = "\n".join(lines)+"\n" + datastring = "\n".join(lines) + "\n" return datastring def writemetadata(self): @@ -647,7 +670,7 @@ def getrangeslice(self): while self.x[low_idx] < max(self.x[0], self.rng[0]): low_idx += 1 hi_idx = len(self.x) - while self.x[hi_idx-1] > min(self.x[-1], self.rng[1]): + while self.x[hi_idx - 1] > min(self.x[-1], self.rng[1]): hi_idx -= 1 return slice(low_idx, hi_idx) @@ -679,14 +702,14 @@ def estimate_peak(self, x, add=True): dy = self.effective_dy[rangeslice] if x < x1[0] or x > x1[-1]: - emsg = "Argument x=%s outside allowed range (%s, %s)." %(x, x1[0], x1[-1]) + emsg = "Argument x=%s outside allowed range (%s, %s)." % (x, x1[0], x1[-1]) raise ValueError(emsg) # Object performing clustering on data. Note that DataClusters # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x1, y1, self.cres) # Cluster with baseline removed dclusters.makeclusters() cidx = dclusters.find_nearest_cluster2(x)[0] cslice = dclusters.cut(cidx) @@ -700,10 +723,10 @@ def estimate_peak(self, x, add=True): if len(mcluster.model) > 0: if add: - logger.info("Adding peak: %s" %mcluster.model[0]) + logger.info("Adding peak: %s" % mcluster.model[0]) self.add_peaks(mcluster.model) else: - logger.info("Found peak: %s" %mcluster.model[0]) + logger.info("Found peak: %s" % mcluster.model[0]) return mcluster.model[0] else: logger.info("No peaks found.") @@ -725,12 +748,12 @@ def add_peaks(self, peaks): def extract_single(self, recursion_depth=1): """Find ModelCluster with peaks extracted from data. Return ModelCovariance instance at top level. - Every extracted peak is one of the peak functions supplied. All - comparisons of different peak models are performed with the class - specified by error_method. + Every extracted peak is one of the peak functions supplied. All + comparisons of different peak models are performed with the class + specified by error_method. - Parameters - recursion_depth: (1) Tracks recursion with extract_single.""" + Parameters + recursion_depth: (1) Tracks recursion with extract_single.""" self.clearcalc() tracer = srmiselog.tracer tracer.pushc() @@ -755,7 +778,7 @@ def extract_single(self, recursion_depth=1): # provides an iterator that clusters the next point and returns # itself. Thus, dclusters and step (below) refer to the same object. - dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed + dclusters = DataClusters(x, y, self.cres) # Cluster with baseline removed # The data for model clusters includes the baseline y = self.y[rangeslice] - ip.value(x) @@ -770,32 +793,37 @@ def extract_single(self, recursion_depth=1): stepcounter = 0 - ############################ - ### Main extraction loop ### + # ######################### + # Main extraction loop ### for step in dclusters: stepcounter += 1 msg = "\n\n------ Recursion: %s Step: %s Cluster: %s %s ------" - logger.debug(msg, - recursion_depth, - stepcounter, - step.lastcluster_idx, - step.clusters[step.lastcluster_idx] - ) + logger.debug( + msg, + recursion_depth, + stepcounter, + step.lastcluster_idx, + step.clusters[step.lastcluster_idx], + ) # Update mclusters if len(step.clusters) > len(mclusters): # Add a new cluster - mclusters.insert(step.lastcluster_idx, - ModelCluster(None, - bl, - x, - y, - dy, - step.cut(step.lastcluster_idx), - self.error_method, - self.pf)) + mclusters.insert( + step.lastcluster_idx, + ModelCluster( + None, + bl, + x, + y, + dy, + step.cut(step.lastcluster_idx), + self.error_method, + self.pf, + ), + ) else: # Update an existing cluster mclusters[step.lastcluster_idx].change_slice(step.cut(step.lastcluster_idx)) @@ -812,8 +840,8 @@ def extract_single(self, recursion_depth=1): # three clusters can become adjacent at any given step. assert len(adjacent) <= 3 - ### Update cluster fits ### - #1. Refit clusters adjacent to at least one other cluster. + # Update cluster fits ### + # 1. Refit clusters adjacent to at least one other cluster. for a in adjacent: mclusters[a].fit(justify=True) @@ -842,7 +870,7 @@ def extract_single(self, recursion_depth=1): # enlarged cluster ("new_cluster") or an intermediate cluster # ("adj_cluster"). - if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x)-1: + if step.lastpoint_idx == 0 or step.lastpoint_idx == len(step.x) - 1: logger.debug("Boundary full: %s", step.lastpoint_idx) full_cluster = ModelCluster(mclusters[step.lastcluster_idx]) full_cluster.fit(True) @@ -853,7 +881,7 @@ def extract_single(self, recursion_depth=1): # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(full_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in full_cluster.model]) + peak_pos = np.array([p["position"] for p in full_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -871,104 +899,132 @@ def extract_single(self, recursion_depth=1): elif pivot == 1: # One peak left left_data = full_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0, x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = full_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = full_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = full_cluster.slice.indices(len(x))[1]-1 + right_data = full_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([full_cluster.model[i] for i in near_peaks]) other_peaks = Peaks([full_cluster.model[i] for i in other_peaks]) - ### Remove contribution of peaks outside neighborhood + # Remove contribution of peaks outside neighborhood # Define range of fitting/recursion to the interpeak range # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] - adj_cluster = ModelCluster(near_peaks, bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster = ModelCluster( + near_peaks, + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Recursively cluster/fit the residual rec_r = adj_x - rec_y = adj_y-near_peaks.value(rec_r) + rec_y = adj_y - near_peaks.value(rec_r) rec_error = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r, rec_y, rec_error, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r, + rec_y, + rec_error, + None, + self.error_method, + self.pf, + ) recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (full boundary)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (full boundary)************" + % (recursion_depth + 1) + ) rec_search = PeakExtraction() rec_search.setdata(rec_r, rec_y, None, rec_error) - rec_search.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search.extract_single(recursion_depth+1) + rec_search.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search.extract_single(recursion_depth + 1) rec = rec_search.extracted - logger.info("*********ENDING RECURSION level %s (full boundary) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (full boundary) ************\n" % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster.augment(rec) - ### Select which model to use + # Select which model to use full_cluster.model = other_peaks full_cluster.replacepeaks(adj_cluster.model) full_cluster.fit(True) - msg = ["---Result of full boundary---", - "Original cluster:", - "%s", - "Final cluster:", - "%s", - "---End of combining clusters---"] - logger.debug("\n".join(msg), - mclusters[step.lastcluster_idx], - full_cluster) + msg = [ + "---Result of full boundary---", + "Original cluster:", + "%s", + "Final cluster:", + "%s", + "---End of combining clusters---", + ] + logger.debug("\n".join(msg), mclusters[step.lastcluster_idx], full_cluster) mclusters[step.lastcluster_idx] = full_cluster - ### End update cluster fits ### + # End update cluster fits ### - ### Combine adjacent clusters ### + # Combine adjacent clusters ### # Iterate in reverse order to preserve earlier indices for idx in adjacent[-1:0:-1]: msg = ["Current model"] msg.extend(["%s" for m in mclusters]) - logger.debug("\n".join(msg), - *[m.model for m in mclusters]) + logger.debug("\n".join(msg), *[m.model for m in mclusters]) - cleft = step.clusters[idx-1] + cleft = step.clusters[idx - 1] cright = step.clusters[idx] - new_cluster = ModelCluster.join_adjacent(mclusters[idx-1], mclusters[idx]) + new_cluster = ModelCluster.join_adjacent(mclusters[idx - 1], mclusters[idx]) # Estimate coordinate where clusters combine. - border_x = .5*(x[cleft[1]]+x[cright[0]]) - border_y = .5*(y[cleft[1]]+y[cright[0]]) + border_x = 0.5 * (x[cleft[1]] + x[cright[0]]) + border_y = 0.5 * (y[cleft[1]] + y[cright[0]]) # Determine neighborhood appropriate for fitting (no larger than combined clusters) if len(new_cluster.model) > 0: - peak_pos = np.array([p['position'] for p in new_cluster.model]) + peak_pos = np.array([p["position"] for p in new_cluster.model]) pivot = peak_pos.searchsorted(border_x) else: peak_pos = np.array([]) @@ -982,50 +1038,68 @@ def extract_single(self, recursion_depth=1): # interpeak range goes from peak to peak of next nearest peaks, although their contributions to the data are still removed. if pivot == 0: # No peaks left of border_x! - left_data=new_cluster.slice.indices(len(x))[0] + left_data = new_cluster.slice.indices(len(x))[0] elif pivot == 1: # One peak left left_data = new_cluster.slice.indices(len(x))[0] - near_peaks = np.append(near_peaks, pivot-1) + near_peaks = np.append(near_peaks, pivot - 1) else: # left_data -> one more peak to the left - left_data = max(0,x.searchsorted(peak_pos[pivot-2])-1) - near_peaks = np.append(near_peaks, pivot-1) + left_data = max(0, x.searchsorted(peak_pos[pivot - 2]) - 1) + near_peaks = np.append(near_peaks, pivot - 1) if pivot == len(peak_pos): # No peaks right of border_x! - right_data = new_cluster.slice.indices(len(x))[1]-1 - elif pivot == len(peak_pos)-1: + right_data = new_cluster.slice.indices(len(x))[1] - 1 + elif pivot == len(peak_pos) - 1: # One peak right - right_data = new_cluster.slice.indices(len(x))[1]-1 + right_data = new_cluster.slice.indices(len(x))[1] - 1 near_peaks = np.append(near_peaks, pivot) else: # right_data -> one more peak to the right - right_data = min(len(x), x.searchsorted(peak_pos[pivot+1])+1) + right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate([np.arange(0, pivot-1), np.arange(pivot+1, len(peak_pos))]) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([new_cluster.model[i] for i in near_peaks]) other_peaks = Peaks([new_cluster.model[i] for i in other_peaks]) - ### Remove contribution of peaks outside neighborhood + # Remove contribution of peaks outside neighborhood # Define range of fitting/recursion to the interpeak range # The adjusted error is passed unchanged. This may introduce # a few more peaks than is justified, but they can be pruned # with the correct statistics at the top level of recursion. - adj_slice = slice(left_data, right_data+1) + adj_slice = slice(left_data, right_data + 1) adj_x = x[adj_slice] - adj_y = y[adj_slice]-other_peaks.value(adj_x) + adj_y = y[adj_slice] - other_peaks.value(adj_x) adj_error = dy[adj_slice] - #### Perform recursion on a version that is scaled at the + # # Perform recursion on a version that is scaled at the # border, as well as on that is simply fit beforehand. In # many cases these lead to nearly identical results, but # occasionally one works much better than the other. - adj_cluster1 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) - adj_cluster2 = ModelCluster(near_peaks.copy(), bl, adj_x, adj_y, adj_error, slice(len(adj_x)), self.error_method, self.pf) + adj_cluster1 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) + adj_cluster2 = ModelCluster( + near_peaks.copy(), + bl, + adj_x, + adj_y, + adj_error, + slice(len(adj_x)), + self.error_method, + self.pf, + ) # Adjust cluster at border if there is at least one peak on # either side. @@ -1034,23 +1108,44 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r1 = adj_x - #rec_y1 = adj_y - near_peaks.value(rec_r1) + # rec_y1 = adj_y - near_peaks.value(rec_r1) rec_y1 = adj_y - adj_cluster1.model.value(rec_r1) rec_error1 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r1, rec_y1, rec_error1, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r1, + rec_y1, + rec_error1, + None, + self.error_method, + self.pf, + ) recurse1 = checkrec.quality().growth_justified(checkrec, min_npars) if recurse1 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (reduce at border)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (reduce at border)************" + % (recursion_depth + 1) + ) rec_search1 = PeakExtraction() rec_search1.setdata(rec_r1, rec_y1, None, rec_error1) - rec_search1.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search1.extract_single(recursion_depth+1) + rec_search1.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search1.extract_single(recursion_depth + 1) rec1 = rec_search1.extracted - logger.info("*********ENDING RECURSION level %s (reduce at border) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (reduce at border) ************\n" + % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster1.augment(rec1) @@ -1060,28 +1155,47 @@ def extract_single(self, recursion_depth=1): # Recursively cluster/fit the residual rec_r2 = adj_x - #rec_y2 = adj_y - near_peaks.value(rec_r2) + # rec_y2 = adj_y - near_peaks.value(rec_r2) rec_y2 = adj_y - adj_cluster2.model.value(rec_r2) rec_error2 = adj_error # Quick check to see if there is anything to find min_npars = min([p.npars for p in self.pf]) - checkrec = ModelCluster(None, None, rec_r2, rec_y2, rec_error2, None, self.error_method, self.pf) + checkrec = ModelCluster( + None, + None, + rec_r2, + rec_y2, + rec_error2, + None, + self.error_method, + self.pf, + ) recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse2 and recursion_depth < 3: - logger.info("\n*********STARTING RECURSION level %s (prefit)************" %(recursion_depth+1)) + logger.info( + "\n*********STARTING RECURSION level %s (prefit)************" % (recursion_depth + 1) + ) rec_search2 = PeakExtraction() rec_search2.setdata(rec_r2, rec_y2, None, rec_error2) - rec_search2.setvars(quiet=True, baseline=bl, cres=self.cres, pf=self.pf, error_method=self.error_method) - rec_search2.extract_single(recursion_depth+1) + rec_search2.setvars( + quiet=True, + baseline=bl, + cres=self.cres, + pf=self.pf, + error_method=self.error_method, + ) + rec_search2.extract_single(recursion_depth + 1) rec2 = rec_search2.extracted - logger.info("*********ENDING RECURSION level %s (prefit) ************\n" %(recursion_depth+1)) + logger.info( + "*********ENDING RECURSION level %s (prefit) ************\n" % (recursion_depth + 1) + ) # Incorporate best peaks from recursive search. adj_cluster2.augment(rec2) - ### Select which model to use + # Select which model to use new_cluster.model = other_peaks rej_cluster = ModelCluster(new_cluster) q1 = adj_cluster1.quality(self.error_method) @@ -1095,25 +1209,23 @@ def extract_single(self, recursion_depth=1): new_cluster.fit(True) + msg = [ + "---Result of combining clusters---", + "First cluster:", + "%s", + "Second cluster:", + "%s", + "Resulting cluster:", + "%s", + "---End of combining clusters---", + ] - msg = ["---Result of combining clusters---", - "First cluster:", - "%s", - "Second cluster:", - "%s", - "Resulting cluster:", - "%s", - "---End of combining clusters---"] - - logger.debug("\n".join(msg), - mclusters[idx-1], - mclusters[idx], - new_cluster) + logger.debug("\n".join(msg), mclusters[idx - 1], mclusters[idx], new_cluster) - mclusters[idx-1] = new_cluster + mclusters[idx - 1] = new_cluster del mclusters[idx] - ### End combine adjacent clusters loop ### + # End combine adjacent clusters loop ### # Finally, combine clusters in dclusters if len(adjacent) > 0: @@ -1121,9 +1233,8 @@ def extract_single(self, recursion_depth=1): tracer.emit(*mclusters) - - ### End main extraction loop ### - ################################ + # End main extraction loop ### + # ############################# # Put initial peaks back in mclusters[0].addexternalpeaks(ip) @@ -1166,13 +1277,20 @@ def fit_single(self): dy = self.effective_dy[rngslice] # Set up ModelCluster - ext = ModelCluster(self.initial_peaks, self.baseline, x, y, dy, None, - self.error_method, self.pf) + ext = ModelCluster( + self.initial_peaks, + self.baseline, + x, + y, + dy, + None, + self.error_method, + self.pf, + ) # Fit model with baseline and calculate covariance matrix cov = ModelCovariance() - ext.fit(fitbaseline=True, estimate=False, cov=cov, - cov_format="default_output") + ext.fit(fitbaseline=True, estimate=False, cov=cov, cov_format="default_output") # Update calculated instance variables self.extraction_type = "fit_single" @@ -1180,11 +1298,12 @@ def fit_single(self): return cov -#end PeakExtraction class + +# end PeakExtraction class # simple test code -if __name__ == '__main__': +if __name__ == "__main__": from numpy.random import randn @@ -1195,13 +1314,13 @@ def fit_single(self): srmiselog.setlevel("info") srmiselog.liveplotting(False) - pf = GaussianOverR(.7) - res = .01 + pf = GaussianOverR(0.7) + res = 0.01 - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - r = np.arange(2,4,res) + r = np.arange(2, 4, res) y = ideal_peaks.value(r) + randn(len(r)) err = np.ones(len(r)) @@ -1209,14 +1328,14 @@ def fit_single(self): te = PeakExtraction() te.setdata(r, y, None, err) - te.setvars(rng=[1.51,10.], pf=[pf], cres=.1, effective_dy = 1.5*err) + te.setvars(rng=[1.51, 10.0], pf=[pf], cres=0.1, effective_dy=1.5 * err) te.extract_single() - print "--- Actual Peak parameters ---" - print ideal_peaks + print("--- Actual Peak parameters ---") + print(ideal_peaks) - print "\n--- After extraction ---" - print te + print("\n--- After extraction ---") + print(te) te.plot() - raw_input() + input() diff --git a/diffpy/srmise/peaks/__init__.py b/diffpy/srmise/peaks/__init__.py index d3c7ee3..ea3cdfc 100644 --- a/diffpy/srmise/peaks/__init__.py +++ b/diffpy/srmise/peaks/__init__.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -12,8 +13,3 @@ ############################################################################## __all__ = ["base", "gaussian", "gaussianoverr", "terminationripples"] - -from base import Peak, Peaks -from gaussian import Gaussian -from gaussianoverr import GaussianOverR -from terminationripples import TerminationRipples diff --git a/diffpy/srmise/peaks/base.py b/diffpy/srmise/peaks/base.py index d8e3469..653a0e3 100644 --- a/diffpy/srmise/peaks/base.py +++ b/diffpy/srmise/peaks/base.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -15,13 +16,13 @@ import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.basefunction import BaseFunction from diffpy.srmise.modelparts import ModelPart, ModelParts -from diffpy.srmise.srmiseerrors import * +from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseScalingError logger = logging.getLogger("diffpy.srmise") + class PeakFunction(BaseFunction): """Base class for functions which represent peaks. @@ -60,7 +61,15 @@ class PeakFunction(BaseFunction): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass parameterdict: A dictionary mapping string keys to their index in a @@ -82,24 +91,31 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No raise ValueError(emsg) BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) - - #### "Virtual" class methods #### + # # "Virtual" class methods #### def scale_at(self, peak, x, scale): emsg = "scale_at must be implemented in a PeakFunction subclass." raise NotImplementedError(emsg) + # # Methods required by BaseFunction #### - #### Methods required by BaseFunction #### - - def actualize(self, pars, in_format="default_input", free=None, removable=True, static_owner=False): + def actualize( + self, + pars, + in_format="default_input", + free=None, + removable=True, + static_owner=False, + ): converted = self.transform_parameters(pars, in_format, out_format="internal") return Peak(self, converted, free, removable, static_owner) def getmodule(self): return __name__ -#end of class PeakFunction + +# end of class PeakFunction + class Peaks(ModelParts): """A collection for Peak objects.""" @@ -110,12 +126,12 @@ def __init__(self, *args, **kwds): def argsort(self, key="position"): """Return sequence of indices which sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) # In normal use the peaks will already be sorted, so check for it. - sorted=True - for i in range(len(keypars)-1): - if keypars[i] > keypars[i+1]: - sorted=False + sorted = True + for i in range(len(keypars) - 1): + if keypars[i] > keypars[i + 1]: + sorted = False break if not sorted: return keypars.argsort().tolist() @@ -142,14 +158,14 @@ def match_at(self, x, y): orig = self.copy() try: - scale = y/height + scale = y / height # First attempt at scaling peaks. Record which peaks, if any, # were not scaled in case a second attempt is required. scaled = [] all_scaled = True any_scaled = False - fixed_height = 0. + fixed_height = 0.0 for peak in self: scaled.append(peak.scale_at(x, scale)) all_scaled = all_scaled and scaled[-1] @@ -161,13 +177,13 @@ def match_at(self, x, y): if not all_scaled and fixed_height < y and fixed_height < height: self[:] = orig[:] any_scaled = False - scale = (y - fixed_height)/(height - fixed_height) + scale = (y - fixed_height) / (height - fixed_height) for peak, s in (self, scaled): if s: # "or" is short-circuited, so scale_at() must be first # to guarantee it is called. any_scaled = peak.scale_at(x, scale) or any_scaled - except Exception, e: + except Exception as e: logger.debug("An exception prevented matching -- %s", e) self[:] = orig[:] return False @@ -175,13 +191,15 @@ def match_at(self, x, y): def sort(self, key="position"): """Sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) order = keypars.argsort() self[:] = [self[idx] for idx in order] return + # End of class Peaks + class Peak(ModelPart): """Represents a single peak associated with a PeakFunction subclass.""" @@ -225,7 +243,7 @@ def scale_at(self, x, scale): try: adj_pars = self._owner.scale_at(self.pars, x, scale) - except SrMiseScalingError, err: + except SrMiseScalingError as err: logger.debug("Cannot scale peak:", err) return False @@ -244,22 +262,21 @@ def factory(peakstr, ownerlist): peakstr: string representing peak ownerlist: List of BaseFunctions that owner is in """ - from numpy import array data = peakstr.strip().splitlines() # dictionary of parameters pdict = {} for d in data: - l = d.split("=", 1) - if len(l) == 2: + parse_value = d.split("=", 1) + if len(parse_value) == 2: try: - pdict[l[0]] = eval(l[1]) + pdict[parse_value[0]] = eval(parse_value[1]) except Exception: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) else: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) # Correctly initialize the base function, if one exists. @@ -271,10 +288,11 @@ def factory(peakstr, ownerlist): return Peak(**pdict) + # End of class Peak # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -283,26 +301,26 @@ def factory(peakstr, ownerlist): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import GaussianOverR - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf = GaussianOverR(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/gaussian.py b/diffpy/srmise/peaks/gaussian.py index 3eae227..ad2530b 100644 --- a/diffpy/srmise/peaks/gaussian.py +++ b/diffpy/srmise/peaks/gaussian.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,29 +14,28 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.peaks.base import PeakFunction from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError logger = logging.getLogger("diffpy.srmise") -class Gaussian (PeakFunction): + +class Gaussian(PeakFunction): """Methods for evaluation and parameter estimation of width-limited Gaussian. - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. - Note that all full width at half maximum values are for the - corresponding Gaussian. + Note that all full width at half maximum values are for the + corresponding Gaussian. """ # Possibly implement cutoff later, but low priority. @@ -46,9 +46,9 @@ class Gaussian (PeakFunction): def __init__(self, maxwidth, Cache=None): """maxwidth defined as full width at half maximum for the corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} metadict = {} metadict["maxwidth"] = (maxwidth, repr) PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) @@ -58,21 +58,21 @@ def __init__(self, maxwidth, Cache=None): raise ValueError(emsg) self.maxwidth = maxwidth - ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) + # Useful constants ### + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) return - #### Methods required by PeakFunction #### + # Methods required by PeakFunction #### def estimate_parameters(self, r, y): """Estimate parameters for single peak from data provided. @@ -101,40 +101,49 @@ def estimate_parameters(self, r, y): emsg = "Not enough data for successful estimation." raise SrMiseEstimationError(emsg) - #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) + # Estimation #### + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) min_y = use_y.min() max_y = use_y.max() - center = use_r[use_y.argmax()] if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) # guesspars[0] = center if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin + # account for width-limit + guesspars[2] = self.c3 * max_y * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin else: - guesspars[2] = self.c3*max_y*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin + guesspars[2] = self.c3 * max_y * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin return guesspars @@ -149,17 +158,17 @@ def scale_at(self, pars, x, scale): x: (float) Position of the border scale: (float > 0) Size of scaling at x.""" if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) + emsg = "".join(["Cannot scale by ", str(scale), "."]) raise SrMiseScalingError(emsg) if scale == 1: return pars else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) # 3. mu1=mu2=rmax (the maximum of a Gaussian occurs at r=mu) # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). @@ -168,51 +177,57 @@ def scale_at(self, pars, x, scale): # the semi-nasty algebra reduces to something nice mu2 = mu1 - area2 = np.sqrt(area1**2/(2*np.log(ratio)*sigma1**2/(x-mu1)**2+1)) - sigma2 = sigma1*area2/area1 + area2 = np.sqrt(area1**2 / (2 * np.log(ratio) * sigma1**2 / (x - mu1) ** 2 + 1)) + sigma2 = sigma1 * area2 / area1 tpars[0] = mu2 tpars[1] = sigma2 tpars[2] = area2 try: tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: + except SrMiseTransformationError as err: raise SrMiseScalingError(str(err)) return tpars def _jacobianraw(self, pars, r, free): """Return Jacobian of width-limited Gaussian. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. """ - jacobian=[None, None, None] - if (free == False).sum() == self.npars: + jacobian = [None, None, None] + if (free is False).sum() == self.npars: return jacobian - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(self.c1*np.sqrt(sin_p)) + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (self.c1 * np.sqrt(sin_p)) if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to + # push the system back into a realistic parameter space should this improbable scenario occur. # jacobian[2] = sign(pars[2])*exp_p if pars[2] >= 0: jacobian[2] = exp_p @@ -223,18 +238,18 @@ def _jacobianraw(self, pars, r, free): def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ temp = np.array(pars) @@ -248,56 +263,63 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) + n = np.floor((temp[1] + np.pi / 2) / np.pi) if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n + temp[1] = temp[1] - np.pi * n else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one elif in_format == "pwa": if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm + fwhm = temp[1] * self.sigma2fwhm if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): """Return value of width-limited Gaussian for the given parameters and r values. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated """ - return np.abs(pars[2])/(self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) + return ( + np.abs(pars[2]) + / (self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) def getmodule(self): return __name__ - #### Other methods #### + # Other methods #### def max(self, pars): """Return position and height of the peak maximum.""" @@ -312,10 +334,11 @@ def max(self, pars): ymax = self._valueraw(pars, rmax) return np.array([rmax, ymax]) -#end of class Gaussian + +# end of class Gaussian # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -324,26 +347,26 @@ def max(self, pars): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import Peaks - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = Gaussian(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = Gaussian(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/gaussianoverr.py b/diffpy/srmise/peaks/gaussianoverr.py index f699d9f..a5f9794 100644 --- a/diffpy/srmise/peaks/gaussianoverr.py +++ b/diffpy/srmise/peaks/gaussianoverr.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -13,29 +14,28 @@ import logging -import matplotlib.pyplot as plt import numpy as np -import diffpy.srmise.srmiselog from diffpy.srmise.peaks.base import PeakFunction from diffpy.srmise.srmiseerrors import SrMiseEstimationError, SrMiseScalingError, SrMiseTransformationError logger = logging.getLogger("diffpy.srmise") -class GaussianOverR (PeakFunction): + +class GaussianOverR(PeakFunction): """Methods for evaluation and parameter estimation of width-limited Gaussian/r. - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. - Note that all full width at half maximum values are for the - corresponding Gaussian. + Note that all full width at half maximum values are for the + corresponding Gaussian. """ # Possibly implement cutoff later, but low priority. @@ -46,9 +46,9 @@ class GaussianOverR (PeakFunction): def __init__(self, maxwidth, Cache=None): """maxwidth defined as full width at half maximum for the corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} metadict = {} metadict["maxwidth"] = (maxwidth, repr) PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) @@ -58,21 +58,21 @@ def __init__(self, maxwidth, Cache=None): raise ValueError(emsg) self.maxwidth = maxwidth - ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) + # Useful constants ### + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) return - #### Methods required by PeakFunction #### + # Methods required by PeakFunction #### def estimate_parameters(self, r, y): """Estimate parameters for single peak from data provided. @@ -101,40 +101,49 @@ def estimate_parameters(self, r, y): emsg = "Not enough data for successful estimation." raise SrMiseEstimationError(emsg) - #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) + # Estimation #### + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) min_y = use_y.min() max_y = use_y.max() - center = use_r[use_y.argmax()] if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) # guesspars[0] = center if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*guesspars[0]*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin + # account for width-limit + guesspars[2] = self.c3 * max_y * guesspars[0] * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin else: - guesspars[2] = self.c3*max_y*guesspars[0]*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin + guesspars[2] = self.c3 * max_y * guesspars[0] * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin return guesspars @@ -149,17 +158,17 @@ def scale_at(self, pars, x, scale): x: (float) Position of the border scale: (float > 0) Size of scaling at x.""" if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) + emsg = "".join(["Cannot scale by ", str(scale), "."]) raise SrMiseScalingError(emsg) if scale == 1: return pars else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) # 3. 1/2*(mu1+sqrt(mu1^2+sigma1^2))=1/2*(mu2+sqrt(mu2^2+sigma2^2))=rmax # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). @@ -169,59 +178,70 @@ def scale_at(self, pars, x, scale): # position of the peak maximum try: rmax = self.max(pars)[0] - except ValueError, err: + except ValueError as err: raise SrMiseScalingError(str(err)) # lhs of eqn1/eqn2 multiplied by ratio. Then take the log. - log_ratio_prime = np.log(ratio)+(x-rmax)*(x-2*mu1+rmax)/(2*sigma1**2) + log_ratio_prime = np.log(ratio) + (x - rmax) * (x - 2 * mu1 + rmax) / (2 * sigma1**2) # the semi-nasty algebra reduces to something nice - sigma2 = np.sqrt(.5*rmax*(x-rmax)**2/(x-rmax+rmax*log_ratio_prime)) - mu2 = (sigma2**2+rmax**2)/rmax - area2 = area1*(sigma2/sigma1)*np.exp(-(rmax-mu1)**2/(2*sigma1**2))/np.exp(-(rmax-mu2)**2/(2*sigma2**2)) + sigma2 = np.sqrt(0.5 * rmax * (x - rmax) ** 2 / (x - rmax + rmax * log_ratio_prime)) + mu2 = (sigma2**2 + rmax**2) / rmax + area2 = ( + area1 + * (sigma2 / sigma1) + * np.exp(-((rmax - mu1) ** 2) / (2 * sigma1**2)) + / np.exp(-((rmax - mu2) ** 2) / (2 * sigma2**2)) + ) tpars[0] = mu2 tpars[1] = sigma2 tpars[2] = area2 try: tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: + except SrMiseTransformationError as err: raise SrMiseScalingError(str(err)) return tpars def _jacobianraw(self, pars, r, free): """Return Jacobian of width-limited Gaussian/r. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. """ - jacobian=[None, None, None] - if (free == False).sum() == self.npars: + jacobian = [None, None, None] + if (free is False).sum() == self.npars: return jacobian - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(np.abs(r)*self.c1*np.sqrt(sin_p)) + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (np.abs(r) * self.c1 * np.sqrt(sin_p)) if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to + # push the system back into a realistic parameter space should this improbable scenario occur. # jacobian[2] = sign(pars[2])*exp_p if pars[2] >= 0: jacobian[2] = exp_p @@ -232,15 +252,15 @@ def _jacobianraw(self, pars, r, free): def _transform_derivativesraw(self, pars, in_format, out_format): """Return gradient matrix for the pars converted from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ # With these three formats only the width-related parameter changes. # Therefore the gradient matrix is the identity matrix with the possible @@ -252,49 +272,50 @@ def _transform_derivativesraw(self, pars, in_format, out_format): if in_format == "internal": if out_format == "pwa": - g[1,1] = self.maxwidth/(2*np.sqrt(2))*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) + g[1, 1] = self.maxwidth / (2 * np.sqrt(2)) * np.cos(pars[1]) / np.sqrt(1 + np.sin(pars[1])) elif out_format == "mu_sigma_area": - g[1,1] = self.maxwidth/(2*np.sqrt(2)*self.sigma2fwhm)*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) + g[1, 1] = ( + self.maxwidth + / (2 * np.sqrt(2) * self.sigma2fwhm) + * np.cos(pars[1]) + / np.sqrt(1 + np.sin(pars[1])) + ) else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) elif in_format == "pwa": if out_format == "internal": - g[1,1] = 2/np.sqrt(self.maxwidth**2-pars[1]**2) + g[1, 1] = 2 / np.sqrt(self.maxwidth**2 - pars[1] ** 2) elif out_format == "mu_sigma_area": - g[1,1] = 1/self.sigma2fwhm + g[1, 1] = 1 / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) elif in_format == "mu_sigma_area": if out_format == "internal": - g[1,1] = 2*self.sigma2fwhm/np.sqrt(self.maxwidth**2-(self.sigma2fwhm*pars[1])**2) + g[1, 1] = 2 * self.sigma2fwhm / np.sqrt(self.maxwidth**2 - (self.sigma2fwhm * pars[1]) ** 2) elif out_format == "pwa": - g[1,1] = self.sigma2fwhm + g[1, 1] = self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) return g def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ temp = np.array(pars) @@ -308,56 +329,63 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) + n = np.floor((temp[1] + np.pi / 2) / np.pi) if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n + temp[1] = temp[1] - np.pi * n else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one elif in_format == "pwa": if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm + fwhm = temp[1] * self.sigma2fwhm if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): """Return value of width-limited Gaussian/r for the given parameters and r values. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated """ - return np.abs(pars[2])/(np.abs(r)*self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) + return ( + np.abs(pars[2]) + / (np.abs(r) * self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) def getmodule(self): return __name__ - #### Other methods #### + # Other methods #### def max(self, pars): """Return position and height of the peak maximum.""" @@ -371,18 +399,19 @@ def max(self, pars): # The Gaussian/r only has a local maximum under this condition. # Physically realistic peaks will always meet this condition, but # trying to fit a signal down to r=0 could conceivably lead to issues. - if tpars[0]**2 <= 4*tpars[1]**2: - emsg = ''.join(["No local maximum with parameters\n", str(pars)]) + if tpars[0] ** 2 <= 4 * tpars[1] ** 2: + emsg = "".join(["No local maximum with parameters\n", str(pars)]) raise ValueError(emsg) - rmax = .5*(tpars[0]+np.sqrt(tpars[0]**2-4*tpars[1]**2)) + rmax = 0.5 * (tpars[0] + np.sqrt(tpars[0] ** 2 - 4 * tpars[1] ** 2)) ymax = self._valueraw(pars, rmax) return np.array([rmax, ymax]) -#end of class GaussianOverR + +# end of class GaussianOverR # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -391,26 +420,26 @@ def max(self, pars): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import Peaks - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = GaussianOverR(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/terminationripples.py b/diffpy/srmise/peaks/terminationripples.py index cdce773..e814820 100644 --- a/diffpy/srmise/peaks/terminationripples.py +++ b/diffpy/srmise/peaks/terminationripples.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columbia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -16,15 +17,15 @@ import numpy as np import scipy.fftpack as fp -import diffpy.srmise.srmiselog from diffpy.srmise.peaks.base import PeakFunction logger = logging.getLogger("diffpy.srmise") -class TerminationRipples (PeakFunction): + +class TerminationRipples(PeakFunction): """Methods for evaluation and parameter estimation of a peak function with termination ripples.""" - def __init__(self, base, qmax, extension=4., supersample=5., Cache=None): + def __init__(self, base, qmax, extension=4.0, supersample=5.0, Cache=None): """Peak function which adds termination ripples to existing function. Unlike other peak functions, TerminationRipples can only be evaluated @@ -54,7 +55,7 @@ def __init__(self, base, qmax, extension=4., supersample=5., Cache=None): PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, base, Cache) return - #### Methods required by PeakFunction #### + # Methods required by PeakFunction #### # TODO: A smart way to convert from the basefunctions estimate to an # appropriate one when ripples are considered. This may not be necessary, @@ -73,7 +74,6 @@ def estimate_parameters(self, r, y): reason.""" return self.base.estimate_parameters(r, y) - # TODO: Can this be implemented sanely for termination ripples? def scale_at(self, pars, x, scale): """Change parameters so value(x)->scale*value(x) for the base function. @@ -90,39 +90,39 @@ def scale_at(self, pars, x, scale): def _jacobianraw(self, pars, r, free): """Return Jacobian of base function with termination ripples. - Parameters - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation.""" + Parameters + pars: Sequence of parameters for a single peak + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation.""" return self.base._jacobianraw(pars, r, free) def _transform_derivativesraw(self, pars, in_format, out_format): """Return gradient matrix for the pars converted from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" + Parameters + pars: Sequence of parameters + in_format: A format defined for base peak function + out_format: A format defined for base peak function""" return self.base._transform_derivativesraw(pars, in_format, out_format) def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" + Parameters + pars: Sequence of parameters + in_format: A format defined for base peak function + out_format: A format defined for base peak function""" return self.base._transform_parametersraw(pars, in_format, out_format) def _valueraw(self, pars, r): """Return value of base peak function for the given parameters and r values. - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated""" + pars: Sequence of parameters for a single peak + r: sequence or scalar over which pars is evaluated""" return self.base._valueraw(pars, r) - #### Overridden PeakFunction functions #### + # Overridden PeakFunction functions #### # jacobian() and value() are not normally overridden by PeakFunction # subclasses, but are here to minimize the effect of edge-effects while # introducing termination ripples. @@ -137,17 +137,19 @@ def jacobian(self, peak, r, rng=None): a default value of 0. If caching is enabled these may be previously calculated values instead.""" if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) # normally r will be a sequence, but also allow single numeric values try: if len(r) > 1: - dr = (r[-1]-r[0])/(len(r)-1) + dr = (r[-1] - r[0]) / (len(r) - 1) else: # dr is ad hoc if r is a single point - dr = 2*np.pi/(self.supersample*self.qmax) + dr = 2 * np.pi / (self.supersample * self.qmax) if rng is None: rng = slice(0, len(r)) @@ -158,12 +160,12 @@ def jacobian(self, peak, r, rng=None): for idx in range(len(output)): if jac[idx] is not None: jac[idx] = self.cut_freq(jac[idx], dr) - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx][ext_slice] return output - except (TypeError): + except TypeError: # dr is ad hoc if r is a single point. - dr = 2*np.pi/(self.supersample*self.qmax) + dr = 2 * np.pi / (self.supersample * self.qmax) (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr) jac = self._jacobianraw(peak.pars, ext_r, peak.free) for idx in range(len(output)): @@ -171,7 +173,6 @@ def jacobian(self, peak, r, rng=None): jac[idx] = self.cut_freq(jac[idx], dr)[ext_slice][0] return jac - def value(self, peak, r, rng=None): """Calculate (rippled) value of peak, possibly restricted by range. @@ -187,13 +188,15 @@ def value(self, peak, r, rng=None): previously calculated values instead. """ if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) # normally r will be a sequence, but also allow single numeric values - dr_super = 2*np.pi/(self.supersample*self.qmax) + dr_super = 2 * np.pi / (self.supersample * self.qmax) if np.isscalar(r): # dr is ad hoc if r is a single point. (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr_super) @@ -204,7 +207,7 @@ def value(self, peak, r, rng=None): if rng is None: rng = slice(0, len(r)) - output = r * 0. + output = r * 0.0 # Make sure the actual dr used for finding termination ripples # is at least as fine as dr_super, while still calculating the @@ -215,13 +218,13 @@ def value(self, peak, r, rng=None): # of sampling needed to avoid the worst of these discretization # issues is difficult to determine without detailed knowledge # of the underlying function. - dr = (r[-1]-r[0])/(len(r)-1) - segments = np.ceil(dr/dr_super) - dr_segmented = dr/segments + dr = (r[-1] - r[0]) / (len(r) - 1) + segments = np.ceil(dr / dr_super) + dr_segmented = dr / segments rpart = r[rng] if segments > 1: - rpart = np.arange(rpart[0], rpart[-1] + dr_segmented/2, dr_segmented) + rpart = np.arange(rpart[0], rpart[-1] + dr_segmented / 2, dr_segmented) (ext_r, ext_slice) = self.extend_grid(rpart, dr_segmented) value = self._valueraw(peak.pars, ext_r) @@ -233,7 +236,7 @@ def value(self, peak, r, rng=None): def getmodule(self): return __name__ - #### Other methods #### + # Other methods #### def cut_freq(self, sequence, delta): """Remove high-frequency components from sequence. @@ -244,64 +247,64 @@ def cut_freq(self, sequence, delta): Parameters sequence: (numpy array) The sequence to alter. delta: The spacing between elements in sequence.""" - padlen = int(2**np.ceil(np.log2(len(sequence)))) + padlen = int(2 ** np.ceil(np.log2(len(sequence)))) padseq = fp.fft(sequence, padlen) - dq = 2*np.pi/((padlen-1)*delta) - lowidx = int(np.ceil(self.qmax/dq)) - hiidx = padlen+1-lowidx + dq = 2 * np.pi / ((padlen - 1) * delta) + lowidx = int(np.ceil(self.qmax / dq)) + hiidx = padlen + 1 - lowidx # Remove hi-frequency components - padseq[lowidx:hiidx]=0 + padseq[lowidx:hiidx] = 0 padseq = fp.ifft(padseq) - return np.real(padseq[0:len(sequence)]) + return np.real(padseq[0 : len(sequence)]) def extend_grid(self, r, dr): """Return (extended r, slice giving original range).""" - ext = self.extension*2*np.pi/self.qmax - left_ext = np.arange(r[0]-dr, max(0., r[0]-ext-dr), -dr)[::-1] - right_ext = np.arange(r[-1]+dr, r[-1]+ext+dr, dr) + ext = self.extension * 2 * np.pi / self.qmax + left_ext = np.arange(r[0] - dr, max(0.0, r[0] - ext - dr), -dr)[::-1] + right_ext = np.arange(r[-1] + dr, r[-1] + ext + dr, dr) ext_r = np.concatenate((left_ext, r, right_ext)) - ext_slice = slice(len(left_ext), len(ext_r)-len(right_ext)) + ext_slice = slice(len(left_ext), len(ext_r) - len(right_ext)) return (ext_r, ext_slice) -#end of class TerminationRipples + +# end of class TerminationRipples # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluator import AICc - from diffpy.srmise.peakfunctions.gaussianoverr import GaussianOverR - from diffpy.srmise.peakfunctions.peaks import Peaks - from diffpy.srmise.peakfunctions.terminationripples import TerminationRipples - - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf1 = GaussianOverR(.7) - pf2 = TerminationRipples(pf1, 20.) + from diffpy.srmise.modelevaluators import AICc + from diffpy.srmise.peaks import Peaks + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf1 = GaussianOverR(0.7) + pf2 = TerminationRipples(pf1, 20.0) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf1.createpeak(p, "pwa") for p in pars]) ripple_peaks = Peaks([pf2.createpeak(p, "pwa") for p in pars]) y_ideal = ideal_peaks.value(r) - y_ripple = ripple_peaks.value(r) + .1*randn(len(r)) + y_ripple = ripple_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf2.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y_ripple, err, None, AICc, [pf2]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y_ideal, r, y_ripple, r, yfit) diff --git a/diffpy/srmise/peakstability.py b/diffpy/srmise/peakstability.py index 3897a2d..f53eed2 100644 --- a/diffpy/srmise/peakstability.py +++ b/diffpy/srmise/peakstability.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -27,7 +28,7 @@ class PeakStability: """Utility to test robustness of peaks. results: [error scalar, model, bl, dr] - ppe: a PDFPeakExtraction instance """ + ppe: a PDFPeakExtraction instance""" def __init__(self): self.results = [] @@ -39,11 +40,11 @@ def setppe(self, ppe): def load(self, filename): try: - import cPickle as pickle - except: - import pickle + import cPickle as pickle + except ImportError: + import pickle - in_s = open(filename, 'rb') + in_s = open(filename, "rb") try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() @@ -67,10 +68,10 @@ def load(self, filename): def save(self, filename): try: - import cPickle as pickle - except: - import pickle - out_s = open(filename, 'wb') + import cPickle as pickle + except ImportError: + import pickle + out_s = open(filename, "wb") try: # Write to the stream outstr = self.ppe.writestr() @@ -82,18 +83,23 @@ def save(self, filename): if r[2] is None: bldict = None else: - bldict = {"pars":r[2].pars, "free":r[2].free, "removable":r[2].removable, "static_owner":r[2].static_owner} + bldict = { + "pars": r[2].pars, + "free": r[2].free, + "removable": r[2].removable, + "static_owner": r[2].static_owner, + } results2.append([r[0], r[1], bldict, r[3]]) pickle.dump([results2, outstr], out_s) finally: out_s.close() - def plotseries(self, style='o', **kwds): + def plotseries(self, style="o", **kwds): plt.figure() plt.ioff() for e, r, bl, dr in self.results: peakpos = [p["position"] for p in r] - es = [e]*len(peakpos) + es = [e] * len(peakpos) plt.plot(peakpos, es, style, **kwds) plt.ion() plt.draw() @@ -103,19 +109,30 @@ def plot(self, **kwds): plt.clf() plt.plot(*self.ppe.extracted.plottable(), **kwds) q = self.ppe.extracted.quality() - plt.suptitle("[%i/%i]\n" - "Uncertainty: %6.3f. Peaks: %i.\n" - "Quality: %6.3f. Chi-square: %6.3f" - %(self.current+1, len(self.results), self.ppe.effective_dy[0], len(self.ppe.extracted.model), q.stat, q.chisq)) + plt.suptitle( + "[%i/%i]\n" + "Uncertainty: %6.3f. Peaks: %i.\n" + "Quality: %6.3f. Chi-square: %6.3f" + % ( + self.current + 1, + len(self.results), + self.ppe.effective_dy[0], + len(self.ppe.extracted.model), + q.stat, + q.chisq, + ) + ) def setcurrent(self, idx): """Make the idxth model the active one.""" self.current = idx if idx is not None: result = self.results[idx] - self.ppe.setvars(quiet=True, effective_dy=result[0]*np.ones(len(self.ppe.x))) + self.ppe.setvars(quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x))) (r, y, dr, dy) = self.ppe.resampledata(result[3]) - self.ppe.extracted = ModelCluster(result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf) + self.ppe.extracted = ModelCluster( + result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf + ) else: self.ppe.clearcalc() @@ -134,7 +151,7 @@ def animate(self, results=None, step=False, **kwds): self.setcurrent(0) plt.ion() plt.plot(*self.ppe.extracted.plottable()) - a = plt.axis() + plt.axis() for i in results: self.setcurrent(i) plt.ioff() @@ -142,7 +159,7 @@ def animate(self, results=None, step=False, **kwds): plt.ion() plt.draw() if step: - raw_input() + input() self.setcurrent(oldcurrent) @@ -153,18 +170,20 @@ def run(self, err, savecovs=False): self.results = [] covs = [] for i, e in enumerate(err): - print "---- Running for uncertainty %s (%i/%i) ----" %(e, i, len(err)) + print("---- Running for uncertainty %s (%i/%i) ----" % (e, i, len(err))) self.ppe.clearcalc() self.ppe.setvars(effective_dy=e) if savecovs: covs.append(self.ppe.extract()) else: self.ppe.extract() - dr = (self.ppe.extracted.r_cluster[-1]-self.ppe.extracted.r_cluster[0])/(len(self.ppe.extracted.r_cluster)-1) + dr = (self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0]) / ( + len(self.ppe.extracted.r_cluster) - 1 + ) self.results.append([e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) for e, r, bl, dr in self.results: - print "---- Results for uncertainty %s ----" %e - print r + print("---- Results for uncertainty %s ----" % e) + print(r) return covs diff --git a/diffpy/srmise/srmiseerrors.py b/diffpy/srmise/srmiseerrors.py index f7ef037..62953d5 100644 --- a/diffpy/srmise/srmiseerrors.py +++ b/diffpy/srmise/srmiseerrors.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -28,7 +29,7 @@ """ -### Superclass class for diffpy.srmise.mise +# Superclass class for diffpy.srmise.mise class SrMiseError(Exception): """Superclass of all diffpy.srmise exceptions.""" @@ -43,7 +44,7 @@ def __str__(self): return self.info -### SrMiseError subclasses ### +# SrMiseError subclasses ### class SrMiseDataFormatError(SrMiseError): diff --git a/diffpy/srmise/srmiselog.py b/diffpy/srmise/srmiselog.py index 3ea2c50..361ea4b 100644 --- a/diffpy/srmise/srmiselog.py +++ b/diffpy/srmise/srmiselog.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund @@ -40,21 +41,22 @@ import logging import os.path import re -import sys from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, SrMiseFileError, SrMiseLogError -### Default settings ### +# Default settings ### defaultformat = "%(message)s" defaultlevel = logging.INFO -LEVELS = {'debug': logging.DEBUG, - 'info': logging.INFO, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL} +LEVELS = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} -### Set up logging to stdout ### +# Set up logging to stdout ### logger = logging.getLogger("diffpy.srmise") logger.setLevel(defaultlevel) ch = logging.StreamHandler() @@ -65,20 +67,22 @@ logger.addHandler(ch) -### Optional file logger ### +# Optional file logger ### fh = None -### Make updated plots as fitting progresses. ### +# Make updated plots as fitting progresses. ### liveplots = False wait = False + def addfilelog(filename, level=defaultlevel, format=defaultformat): """Log output from diffpy.srmise in specified file. Parameters filename: Name of file to receiving output level: The logging level - format: A string defining format of output messages conforming to logging package.""" + format: A string defining format of output messages conforming to logging package. + """ global fh fh = logging.FileHandler(filename) fh.setLevel(level) @@ -86,6 +90,7 @@ def addfilelog(filename, level=defaultlevel, format=defaultformat): fh.setFormatter(formatter) logger.addHandler(fh) + def setfilelevel(level): """Set level of file logger. @@ -101,6 +106,7 @@ def setfilelevel(level): emsg = "File handler does not exist, cannot set its level." raise SrMiseLogError(emsg) + def setlevel(level): """Set level of default (stdout) logger. @@ -112,6 +118,7 @@ def setlevel(level): if level < logger.getEffectiveLevel(): logger.setLevel(level) + def liveplotting(lp, w=False): """Set whether or not to use live plotting. @@ -136,7 +143,8 @@ def liveplotting(lp, w=False): raise ValueError(emsg) -### TracePeaks. Primary purpose is to enable creating movies. ### +# TracePeaks. Primary purpose is to enable creating movies. ### + class TracePeaks(object): """Output trace information during peak extraction.""" @@ -191,15 +199,20 @@ def maketrace(self, *args, **kwds): clusters.append(m.slice) for m in args[1:]: mc.replacepeaks(m.model) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def writestr(self, trace): """Return string representation of current trace.""" lines = [] lines.append("### Trace") - lines.append("counter=%i" %trace["counter"]) - lines.append("recursion=%i" %trace["recursion"]) - lines.append("clusters=%s" %trace["clusters"]) + lines.append("counter=%i" % trace["counter"]) + lines.append("recursion=%i" % trace["recursion"]) + lines.append("clusters=%s" % trace["clusters"]) lines.append("### ModelCluster") lines.append(trace["mc"].writestr()) @@ -207,8 +220,8 @@ def writestr(self, trace): def write(self, trace): """Write current trace to file.""" - filename = "%s_%i" %(self.filebase, trace["counter"]) - f = open(filename, 'w') + filename = "%s_%i" % (self.filebase, trace["counter"]) + f = open(filename, "w") bytes = self.writestr(trace) f.write(bytes) f.close() @@ -225,12 +238,14 @@ def read(self, filename): "mc" - A ModelCluster instance "recursion" - The recursion level of mc""" try: - return self.readstr(open(filename,'rb').read()) - except SrMiseDataFormatError, err: + return self.readstr(open(filename, "rb").read()) + except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ("Could not open '%s' due to unsupported file format " + - "or corrupted data. [%s]") % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) return None @@ -247,43 +262,49 @@ def readstr(self, datastring): "recursion" - The recursion level of mc""" # find where the ModelCluster section starts - res = re.search(r'^#+ ModelCluster\s*(?:#.*\s+)*', datastring, re.M) + res = re.search(r"^#+ ModelCluster\s*(?:#.*\s+)*", datastring, re.M) if res: - header = datastring[:res.start()] - mc = datastring[res.end():].strip() + header = datastring[: res.start()] + mc = datastring[res.end() :].strip() else: emsg = "Required section 'ModelCluster' not found." raise SrMiseDataFormatError(emsg) # instantiate ModelCluster - if re.match(r'^None$', mc): + if re.match(r"^None$", mc): mc = None else: from diffpy.srmise.modelcluster import ModelCluster + mc = ModelCluster.factory(mc) - res = re.search(r'^clusters=(.*)$', header, re.M) + res = re.search(r"^clusters=(.*)$", header, re.M) if res: clusters = eval(res.groups()[0].strip()) else: emsg = "Required field 'clusters' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^recursion=(.*)$', header, re.M) + res = re.search(r"^recursion=(.*)$", header, re.M) if res: - recursion = eval(res.groups()[0].strip()) + eval(res.groups()[0].strip()) else: emsg = "Required field 'recursion' not found." raise SrMiseDataFormatError(emsg) - res = re.search(r'^counter=(.*)$', header, re.M) + res = re.search(r"^counter=(.*)$", header, re.M) if res: - counter = eval(res.groups()[0].strip()) + eval(res.groups()[0].strip()) else: emsg = "Required field 'counter' not found." raise SrMiseDataFormatError(emsg) - return {"mc":mc, "clusters":clusters, "recursion":self.recursion, "counter":self.counter} + return { + "mc": mc, + "clusters": clusters, + "recursion": self.recursion, + "counter": self.counter, + } def pushr(self): """Enter a layer of recursion, and return new level.""" @@ -315,16 +336,22 @@ def reset_trace(self): # filter property def setfilter(self, filter): - self.__filter = compile(" and ".join(["(%s)" %f for f in filter]), '', 'eval') - def getfilter(self): return self.__filter + self.__filter = compile(" and ".join(["(%s)" % f for f in filter]), "", "eval") + + def getfilter(self): + return self.__filter + filter = property(getfilter, setfilter) -### End of class TracePeaks + +# End of class TracePeaks + def settracer(**kwds): global tracer tracer = TracePeaks(**kwds) return tracer + # Default tracer never emits tracer = settracer() diff --git a/diffpy/srmise/version.py b/diffpy/srmise/version.py index d0c6e1d..094af4a 100644 --- a/diffpy/srmise/version.py +++ b/diffpy/srmise/version.py @@ -2,7 +2,8 @@ ############################################################################## # # SrMise by Luke Granlund -# (c) 2014 trustees of the Michigan State University. +# (c) 2014 trustees of the Michigan State University +# (c) 2024 trustees of Columia University in the City of New York # All rights reserved. # # File coded by: Luke Granlund diff --git a/doc/examples/extract_single_peak.py b/doc/examples/extract_single_peak.py index 99edd4c..b32b0be 100644 --- a/doc/examples/extract_single_peak.py +++ b/doc/examples/extract_single_peak.py @@ -27,26 +27,26 @@ import matplotlib.pyplot as plt -from diffpy.srmise import PDFPeakExtraction from diffpy.srmise.applications.plot import makeplot from diffpy.srmise.baselines import Polynomial +from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/Ag_nyquist_qmax30.gr") - ## Set up extraction parameters. - # For convenience we add all parameters to a dictionary before passing them + # Set up extraction parameters. + # For convenience, we add all parameters to a dictionary before passing them # to the extraction object. # # The "rng" (range) parameter defines the region over which peaks will be - # extracted and fit. For the well isolated nearest-neighbor silver peak, + # extracted and fit. For the well isolated nearest-neighbor silver peak, # which occurs near 2.9 angstroms, it is sufficient to perform extraction # between 2 and 3.5 angstroms. # @@ -61,24 +61,24 @@ def run(plot=True): # Apply peak extraction parameters. ppe.setvars(**kwds) - ## Perform peak extraction + # Perform peak extraction ppe.extract() - ## Save output + # Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. # # The writepwa() method saves a file intended as a human-readable summary. # In particular, it reports the position, width (as full-width at - # half-maximum), and area of of extracted peaks. The reported values + # half-maximum), and area of extracted peaks. The reported values # are for Gaussians in the radial distribution function (RDF) corresponding # to this PDF. ppe.write("output/extract_single_peak.srmise") ppe.writepwa("output/extract_single_peak.pwa") - ## Plot results. - # Display plot of extracted peak. It is also possible to plot an existing + # Plot results. + # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/Ag_singlepeak.srmise --no-extract --plot # For additional plotting options, run "srmiseplot --help". diff --git a/doc/examples/fit_initial.py b/doc/examples/fit_initial.py index 33bf4e6..71979e8 100644 --- a/doc/examples/fit_initial.py +++ b/doc/examples/fit_initial.py @@ -20,7 +20,6 @@ grid.""" import matplotlib.pyplot as plt -import numpy as np from diffpy.srmise import PDFPeakExtraction from diffpy.srmise.applications.plot import makeplot @@ -30,11 +29,11 @@ def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") - ## Set up interpolated baseline. + # Set up interpolated baseline. # The FromSequence baseline creates an interpolated baseline from provided # r and G(r) values, either two lists or a file containing (r, G(r)) pairs. # The baseline has no parameters. This particular baseline was estimated @@ -43,7 +42,7 @@ def run(plot=True): blf = FromSequence("data/C60baseline.dat") bl = blf.actualize([]) - ## Set up fitting parameters + # Set up fitting parameters # A summary of how parameters impact fitting is given below. # "rng" - Same as peak extraction # "baseline" - Same as peak extraction @@ -66,7 +65,7 @@ def run(plot=True): kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit. ppe.setvars(**kwds) - ## Set up termination ripples + # Set up termination ripples # Peak fitting never changes the peak function, so termination ripples # are not applied automatically as they are in peak extraction. # Termination ripples require setting the underlying peak function and qmax. @@ -95,7 +94,7 @@ def run(plot=True): # Perform fit. ppe.fit() - ## Save results + # Save results ppe.write("output/fit_initial.srmise") ppe.writepwa("output/fit_initial.pwa") diff --git a/doc/examples/multimodel_known_dG1.py b/doc/examples/multimodel_known_dG1.py index aa50c41..f0ceb1b 100644 --- a/doc/examples/multimodel_known_dG1.py +++ b/doc/examples/multimodel_known_dG1.py @@ -38,46 +38,45 @@ import diffpy.srmise.srmiselog as sml from diffpy.srmise import MultimodelSelection, PDFPeakExtraction -from diffpy.srmise.applications.plot import makeplot def run(plot=True): - ## Suppress mundane output + # Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") - ## Initialize peak extraction from saved trial + # Initialize peak extraction from saved trial ppe = PDFPeakExtraction() ppe.read("output/query_results.srmise") ppe.clearcalc() - ## Set up extraction parameters + # Set up extraction parameters # All parameters loaded from .srmise file. # Setting new values will override the previous values. kwds = {} kwds["rng"] = [10.9, 15] # Region of PDF with some overlap. ppe.setvars(**kwds) - ## Create multimodel selection object. + # Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) - ## Define range of dg values + # Define range of dg values # For the purpose of illustration use 15 evenly-spaced values of dg where # 50% < dg < 120% of mean experimental dG in extraction range. dg_mean = np.mean(ppe.dy[ppe.getrangeslice()]) dgs = np.linspace(0.5 * dg_mean, 1.2 * dg_mean, 15) - ## Perform peak extraction for each of the assumed uncertainties. + # Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) - ## Save results + # Save results # The file known_dG_models.dat saves the models generated above. The file # known_dG_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate diff --git a/doc/examples/multimodel_known_dG2.py b/doc/examples/multimodel_known_dG2.py index d061981..e72db46 100644 --- a/doc/examples/multimodel_known_dG2.py +++ b/doc/examples/multimodel_known_dG2.py @@ -39,27 +39,42 @@ from diffpy.srmise.applications.plot import makeplot # distances from ideal Ag (refined to PDF) -dcif = np.array([11.2394, 11.608, 11.9652, 12.3121, 12.6495, 12.9781, 13.2986, - 13.6116, 13.9175, 14.2168, 14.51, 14.7973]) +dcif = np.array( + [ + 11.2394, + 11.608, + 11.9652, + 12.3121, + 12.6495, + 12.9781, + 13.2986, + 13.6116, + 13.9175, + 14.2168, + 14.51, + 14.7973, + ] +) + def run(plot=True): # Suppress mundane output sml.setlevel("warning") - ## Create multimodeling object and load diffpy.srmise results from file. + # Create multimodeling object and load diffpy.srmise results from file. ms = MultimodelSelection() ms.load("output/known_dG_models.dat") ms.loadaics("output/known_dG_aics.dat") - ## Use Nyquist sampling + # Use Nyquist sampling # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) - ## Classify models + # Classify models # All models are placed into classes. Models in the same class # should be essentially identical (same peak parameters, etc.) # up to a small tolerance determined by comparing individual peaks. The @@ -75,16 +90,16 @@ def run(plot=True): tolerance = 0.2 ms.classify(r, tolerance) - ## Summarize various facts about the analysis. + # Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) - ## Get dG usable as key in analysis. + # Get dG usable as key in analysis. # The Akaike probabilities were calculated for many assumed values of the # experimental uncertainty dG, and each of these assumed dG is used as a # key when obtaining the corresponding results. Numerical precision can @@ -92,7 +107,7 @@ def run(plot=True): # the key closest to its argument. dG = ms.dg_key(np.mean(ms.ppe.dy)) - ## Find "best" models. + # Find "best" models. # In short, models with greatest Akaike probability. Akaike probabilities # can only be validly compared if they were calculated for identical data, # namely identical PDF values *and* uncertainties, and are only reliable @@ -101,11 +116,11 @@ def run(plot=True): # # The present PDF satisifes these conditions, so the rankings below reflect # an AIC-based estimate of which of the tested models the data best support. - print "\n--------- Model Rankings for dG = %f ---------" %dG - print "Rank Model Class Free AIC Prob File" + print("\n--------- Model Rankings for dG = %f ---------" % dG) + print("Rank Model Class Free AIC Prob File") for i in range(len(ms.classes)): - ## Generate information about best model in ith best class. + # Generate information about best model in ith best class. # The get(dG, *args, **kwds) method returns a tuple of values # corresponding to string arguments for the best model in best class at # given dG. When the corder keyword is given it returns the model from @@ -117,23 +132,25 @@ def run(plot=True): # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. For example, the model # index can also be obtained using get_model(dG, corder=i) - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) - filename_base = "output/known_dG_m"+str(model) + filename_base = "output/known_dG_m" + str(model) - # Print info for this model - print "%4i %5i %5i %4i %10.4e %6.3f %s" \ - %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa") + # print(info for this model + print( + "%4i %5i %5i %4i %10.4e %6.3f %s" % (i + 1, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. - msg = ["Multimodeling Summary", - "---------------------", - "Evaluated at dG: %s" %dG, - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Akaike probability: %g" %prob, - "Rank: %i" %(i+1),] + msg = [ + "Multimodeling Summary", + "---------------------", + "Evaluated at dG: %s" % dG, + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Akaike probability: %g" % prob, + "Rank: %i" % (i + 1), + ] msg = "\n".join(msg) # Make this the active model @@ -146,13 +163,11 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \ - %(model, cls, i+1, prob)) + plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" % (model, cls, i + 1, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - - ## 3D plot of Akaike probabilities + # 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models # selected by an AIC-based analysis would vary if the experimental @@ -161,13 +176,14 @@ def run(plot=True): # are highlighted. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG]) + ms.plot3dclassprobs(probfilter=[0.0, 1.0], highlight=[dG]) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/multimodel_unknown_dG1.py b/doc/examples/multimodel_unknown_dG1.py index 3016fb3..7a6a2b8 100644 --- a/doc/examples/multimodel_unknown_dG1.py +++ b/doc/examples/multimodel_unknown_dG1.py @@ -42,18 +42,18 @@ def run(plot=True): - ## Suppress mundane output + # Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") - ## Initialize peak extraction + # Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") - ## Set up extraction parameters + # Set up extraction parameters # The FromSequence baseline interpolates (r, G(r)) values read from a # specified file. It has parameters. This particular baseline was # calculated by approximating the C60 sample as a face-centered cubic @@ -65,22 +65,22 @@ def run(plot=True): kwds["cres"] = 0.05 ppe.setvars(**kwds) - ## Create multimodel selection object. + # Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) - ## Define range of dg values + # Define range of dg values # For the purpose of illustration use 20 evenly-spaced values of dg where # 1% < dg < 10% of max gr value between r=1 and 7.25. grmax = np.max(ppe.y[ppe.getrangeslice()]) dgs = np.linspace(0.01 * grmax, 0.10 * grmax, 20) - ## Perform peak extraction for each of the assumed uncertainties. + # Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) - ## Save results + # Save results # The file C60_models.dat saves the models generated above. The file # C60_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate diff --git a/doc/examples/multimodel_unknown_dG2.py b/doc/examples/multimodel_unknown_dG2.py index 1bc9f60..9a4b24a 100644 --- a/doc/examples/multimodel_unknown_dG2.py +++ b/doc/examples/multimodel_unknown_dG2.py @@ -46,30 +46,51 @@ from diffpy.srmise.applications.plot import makeplot # distances from ideal (unrefined) C60 -dcif = np.array([1.44, 2.329968944, 2.494153163, 2.88, 3.595985339, - 3.704477734, 4.132591264, 4.520339129, 4.659937888, - 4.877358006, 5.209968944, 5.405310018, 5.522583786, - 5.818426502, 6.099937888, 6.164518388, 6.529777754, - 6.686673127, 6.745638756, 6.989906831, 7.136693738]) +dcif = np.array( + [ + 1.44, + 2.329968944, + 2.494153163, + 2.88, + 3.595985339, + 3.704477734, + 4.132591264, + 4.520339129, + 4.659937888, + 4.877358006, + 5.209968944, + 5.405310018, + 5.522583786, + 5.818426502, + 6.099937888, + 6.164518388, + 6.529777754, + 6.686673127, + 6.745638756, + 6.989906831, + 7.136693738, + ] +) + def run(plot=True): # Suppress mundane output sml.setlevel("warning") - ## Create multimodeling object and load diffpy.srmise results from file. + # Create multimodeling object and load diffpy.srmise results from file. ms = MultimodelSelection() ms.load("output/unknown_dG_models.dat") ms.loadaics("output/unknown_dG_aics.dat") - ## Use Nyquist sampling + # Use Nyquist sampling # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) - ## Classify models + # Classify models # All models are placed into classes. Models in the same class # should be essentially identical (same peak parameters, etc.) # up to a small tolerance determined by comparing individual peaks. The @@ -85,16 +106,16 @@ def run(plot=True): tolerance = 0.2 ms.classify(r, tolerance) - ## Summarize various facts about the analysis. + # Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) - ## Find "best" models. + # Find "best" models. # In short, models with greatest Akaike probability. Akaike probabilities # can only be validly compared if they were calculated for identical data, # namely identical PDF values *and* uncertainties, and are only reliable @@ -115,16 +136,15 @@ def run(plot=True): best_classes = np.unique([ms.get_class(dG) for dG in ms.dgs]) best_dGs = [] for cls in best_classes: - cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 \ - for dG in ms.dgs] + cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 for dG in ms.dgs] dG = ms.dgs[np.argmax(cls_probs)] best_dGs.append(dG) - print "\n--------- Best models for at least one dG ---------" %dG - print " Best dG Model Class Free AIC Prob File" + print("\n--------- Best models for at least one dG ---------" % dG) + print(" Best dG Model Class Free AIC Prob File") for dG in best_dGs: - ## Generate information about best model. + # Generate information about best model. # The get(dG, *args, **kwds) method returns a tuple of values # corresponding to string arguments for the best model in best class at # given dG. When the corder keyword is given it returns the model from @@ -135,24 +155,26 @@ def run(plot=True): # "aic" -> The AIC for this model given uncertainty dG # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob") + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob") - filename_base = "output/unknown_dG_m"+str(model) + filename_base = "output/unknown_dG_m" + str(model) - # Print info for this model - print "%10.4e %5i %5i %4i %10.4e %6.3f %s" \ - %(dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + # print(info for this model + print( + "%10.4e %5i %5i %4i %10.4e %6.3f %s" % (dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. best_from = [dg for dg in ms.dgs if ms.get_class(dg) == cls] - msg = ["Multimodeling Summary", - "---------------------", - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Best model from dG: %s-%s" %(best_from[0], best_from[-1]), - "Evaluated at dG: %s" %dG, - "Akaike probability: %g" %prob] + msg = [ + "Multimodeling Summary", + "---------------------", + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Best model from dG: %s-%s" % (best_from[0], best_from[-1]), + "Evaluated at dG: %s" % dG, + "Akaike probability: %g" % prob, + ] msg = "\n".join(msg) # Make this the active model @@ -165,13 +187,11 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" \ - %(model, cls, dG, prob)) + plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" % (model, cls, dG, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - - ## 3D plot of Akaike probabilities + # 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models # selected by an AIC-based analysis would vary if the experimental @@ -179,13 +199,14 @@ def run(plot=True): # are highlighted at the various dG values found above. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.1, 1.], highlight=best_dGs) + ms.plot3dclassprobs(probfilter=[0.1, 1.0], highlight=best_dGs) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/parameter_summary.py b/doc/examples/parameter_summary.py index 2ba1ac1..1b8a195 100644 --- a/doc/examples/parameter_summary.py +++ b/doc/examples/parameter_summary.py @@ -37,25 +37,25 @@ def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/TiO2_fine_qmax26.gr") - ###### Set up extraction parameters. + # Set up extraction parameters. # In this section we'll examine the major extraction parameters in detail. # diffpy.srmise strives to provide reasonable default values for these # parameters. For normal use setting the range, baseline, and uncertainty # should be sufficient. kwds = {} - ## Range + # Range # Range defaults to the entire PDF if not specified. kwds["rng"] = [1.5, 10.0] - ## dg + # dg # diffpy.srmise selects model complexity based primarily on the uncertainty # of the PDF. Note that very small uncertainties (<1%) can make peak # extraction excessively slow. In general, the smaller the uncertainty the @@ -80,7 +80,7 @@ def run(plot=True): # 1273-1283. doi:10.1107/S1600576714010516 kwds["dg"] = 0.35 # Play with this value! - ## baseline + # baseline # As a crystal PDF, a linear baseline crossing the origin is appropriate. # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set # the y-intercept as a fixed parameter which will not be fit. For @@ -91,7 +91,7 @@ def run(plot=True): slope = -0.65 # Play with this value! y_intercept = 0.0 kwds["baseline"] = blfunc.actualize([slope, y_intercept], free=[True, False]) - ## pf + # pf # The pf (peakfunction) parameter allows setting the shape of peaks to be # extracted. Termination effects are added automatically to the peak # function during extraction. In the harmonic approximation of atomic @@ -109,7 +109,7 @@ def run(plot=True): pf = GaussianOverR(0.7) kwds["pf"] = [pf] # Despite the list, only one entry is currently supported. - ## qmax + # qmax # PDFs typically report the value of qmax (i.e. the maximum momentum # transfer q in the measurement), but it can be specified explicitly also. # If the PDF does not report qmax, diffpy.srmise attempts to estimate it @@ -119,7 +119,7 @@ def run(plot=True): # diffpy.srmise does not consider Nyquist sampling or termination effects. kwds["qmax"] = 26.0 - ## nyquist + # nyquist # This parameter governs whether diffpy.srmise attempts to find a model # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data # uncertainties are least correlated without loss of information. By @@ -132,7 +132,7 @@ def run(plot=True): # doi:10.1103/PhysRevB.84.134105 kwds["nyquist"] = True - ## supersample + # supersample # This parameter dictates the data be oversampled by at least this factor # (relative to the Nyquist rate) during the early stages of peak # extraction. If the input PDF is even more finely sampled, that level of @@ -141,7 +141,7 @@ def run(plot=True): # finding and clustering process, but reduces speed. kwds["supersample"] = 4.0 - ## cres + # cres # The cres (clustering resolution) parameter governs the sensitivity of the # clustering method used by diffpy.srmise. In short, when the data are # being clustered, data which are further than the clustering resolution @@ -156,7 +156,7 @@ def run(plot=True): # Apply peak extraction parameters. ppe.setvars(**kwds) - ## initial_peaks + # initial_peaks # Initial peaks are peaks which are kept fixed during the early stages of # peak extraction, effectively condition results upon their values. Since # initial peaks are sometimes dependent on other SrMise parameters (e.g. @@ -168,7 +168,7 @@ def run(plot=True): # diffpy.srmise estimate the peak parameters. # 2) Explicit specification of peak parameters. - ## Initial peaks from approximate positions. + # Initial peaks from approximate positions. # This routine estimates peak parameters by finding the peak-like cluster # containing the specified point. It does not search for occluded peaks, # so works best on well-separated peaks. It does, however, take any @@ -177,7 +177,7 @@ def run(plot=True): for p in positions: ppe.estimate_peak(p) # adds to initial_peaks - ## Initial peaks from explicit parameters. + # Initial peaks from explicit parameters. # Adding initial peaks explicitly is similar to defining a baseline. # Namely, choosing a peak function and then actualizing it with given # parameters. For this example peaks are created from the same GaussianOverR @@ -194,22 +194,22 @@ def run(plot=True): peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa")) ppe.add_peaks(peaks) # adds to initial_peaks - ## Initial peaks and pruning + # Initial peaks and pruning # While initial peaks condition what other peaks can be extracted, by # default they can also be pruned if a simpler model appears better. To # prevent this, they can be set as non-removable. for ip in ppe.initial_peaks: ip.removable = False - ## Plot initial parameters + # Plot initial parameters if plot: makeplot(ppe) plt.title("Initial Peaks") - ###### Perform peak extraction + # Perform peak extraction ppe.extract() - ## Save output + # Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. @@ -222,7 +222,7 @@ def run(plot=True): ppe.write("output/parameter_summary.srmise") ppe.writepwa("output/parameter_summary.pwa") - ## Plot results. + # Plot results. # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/TiO2_parameterdetail.srmise --no-extract --plot diff --git a/doc/examples/query_results.py b/doc/examples/query_results.py index c861ee5..8035921 100644 --- a/doc/examples/query_results.py +++ b/doc/examples/query_results.py @@ -30,12 +30,11 @@ import numpy as np from diffpy.srmise import ModelCovariance, PDFPeakExtraction -from diffpy.srmise.applications.plot import makeplot def run(plot=True): - ## Initialize peak extraction + # Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() @@ -49,23 +48,22 @@ def run(plot=True): ppebl.read("output/extract_single_peak.srmise") baseline = ppebl.extracted.baseline - ## Set up extraction parameters. + # Set up extraction parameters. # Peaks are extracted between 2 and 10 angstroms, using the baseline # from the isolated peak example. kwds = {} - kwds["rng"] = [2.0, 10.] + kwds["rng"] = [2.0, 10.0] kwds["baseline"] = baseline # Apply peak extraction parameters. ppe.setvars(**kwds) - ## Perform peak extraction, and retain object containing a copy of the + # Perform peak extraction, and retain object containing a copy of the # model and the full covariance matrix. cov = ppe.extract() - - print "\n======= Accessing SrMise Results ========" - ## Accessing results of extraction + print("\n======= Accessing SrMise Results ========") + # Accessing results of extraction # # Model parameters are organized using a nested structure, with a list # of peaks each of which is a list of parameters, similar to the the @@ -90,44 +88,40 @@ def run(plot=True): # peak. Thus, this parameter can be referenced as (1,2). Several examples # are presented below. - - print "\n------ Parameter values and uncertainties ------" + print("\n------ Parameter values and uncertainties ------") # ModelCovariance.get() returns a (value, uncertainty) tuple for a given # parameter. These are the results for the nearest-neighbor peak. - p0 = cov.get((0,0)) - w0 = cov.get((0,1)) - a0 = cov.get((0,2)) - print "Nearest-neighbor peak: " - print " position = %f +/- %f" %p0 - print " width = %f +/- %f" %w0 - print " area = %f +/- %f" %a0 - print " Covariance(width, area) = ", cov.getcovariance((0,1),(0,2)) + p0 = cov.get((0, 0)) + w0 = cov.get((0, 1)) + a0 = cov.get((0, 2)) + print("Nearest-neighbor peak: ") + print(" position = %f +/- %f" % p0) + print(" width = %f +/- %f" % w0) + print(" area = %f +/- %f" % a0) + print(" Covariance(width, area) = ", cov.getcovariance((0, 1), (0, 2))) # Baseline parameters. By convention, baseline is final element in cov. (slope, intercept) = cov.model[-1] - print "\nThe linear baseline B(r)=%f*r + %f" \ - % tuple(par for par in cov.model[-1]) + print("\nThe linear baseline B(r)=%f*r + %f" % tuple(par for par in cov.model[-1])) - - print "\n ------ Uncertainties from a Saved File --------" + print("\n ------ Uncertainties from a Saved File --------") # A .srmise file does not save the full covariance matrix, so it must be # recalculated when loading from these files. For example, here is the # nearest-neighbor peak in the file which we used to define the initial # baseline. cov2 = ModelCovariance() ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output") - p0_saved = cov2.get((0,0)) - w0_saved = cov2.get((0,1)) - a0_saved = cov2.get((0,2)) - print "Nearest-neighbor peak:" - print " position = %f +/- %f" %p0_saved - print " width == %f +/- %f" %w0_saved - print " area = = %f +/- %f" %a0_saved - print " Covariance(width, area) = ", cov2.getcovariance((0,1),(0,2)) - - - print "\n ---------- Alternate Parameterizations ---------" - ## Different Parameterizations + p0_saved = cov2.get((0, 0)) + w0_saved = cov2.get((0, 1)) + a0_saved = cov2.get((0, 2)) + print("Nearest-neighbor peak:") + print(" position = %f +/- %f" % p0_saved) + print(" width == %f +/- %f" % w0_saved) + print(" area = = %f +/- %f" % a0_saved) + print(" Covariance(width, area) = ", cov2.getcovariance((0, 1), (0, 2))) + + print("\n ---------- Alternate Parameterizations ---------") + # Different Parameterizations # Peaks and baselines may have equivalent parameterizations that are useful # in different situations. For example, the types defined by the # GaussianOverR peak function are: @@ -151,26 +145,24 @@ def run(plot=True): # would transform the second, third, and fourth peaks). If the keyword # is omitted, the transformation is attempted for all parts of the fit. cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks") - print "Width (sigma) of nearest-neighbor peak: %f +/- %f" %cov.get((0,1)) - + print("Width (sigma) of nearest-neighbor peak: %f +/- %f" % cov.get((0, 1))) - print "\n ------------ Highly Correlated Parameters ------------" + print("\n ------------ Highly Correlated Parameters ------------") # Highly-correlated parameters can indicate difficulties constraining the # fit. This function lists all pairs of parameters with an absolute value # of correlation which exceeds a given threshold. - print "|Correlation| > 0.9:" - print "par1 par2 corr(par1, par2)" - print "\n".join(str(c) for c in cov.correlationwarning(.9)) + print("|Correlation| > 0.9:") + print("par1 par2 corr(par1, par2)") + print("\n".join(str(c) for c in cov.correlationwarning(0.9))) - - print "\n-------- Estimate coordination shell occupancy ---------" + print("\n-------- Estimate coordination shell occupancy ---------") # Estimate the scale factor and its uncertainty from first peak's intensity. # G_normalized = scale * G_observed # dscale = scale * dG_observed/G_observed - scale = 12./a0[0] - dscale = scale * a0[1]/a0[0] - print "Estimate scale factor assuming nearest-neighbor intensity = 12" - print "Scale factor is %f +/- %f" %(scale, dscale) + scale = 12.0 / a0[0] + dscale = scale * a0[1] / a0[0] + print("Estimate scale factor assuming nearest-neighbor intensity = 12") + print("Scale factor is %f +/- %f" % (scale, dscale)) # Reference for number of atoms in coordination shells for FCC. # http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html @@ -178,36 +170,34 @@ def run(plot=True): # Calculated the scaled intensities and uncertainties. intensity = [] - for i in range(0, len(cov.model)-1): - (area, darea) = cov.get((i,2)) + for i in range(0, len(cov.model) - 1): + (area, darea) = cov.get((i, 2)) area *= scale - darea = area*np.sqrt((dscale/scale)**2 + (darea/area)**2) + darea = area * np.sqrt((dscale / scale) ** 2 + (darea / area) ** 2) intensity.append((ideal_intensity[i], area, darea)) - print "\nIntensity" - print "Ideal: Estimated" + print("\nIntensity") + print("Ideal: Estimated") for i in intensity: - print "%i: %f +/- %f" %i + print("%i: %f +/- %f" % i) - print "\nTotal intensity" + print("\nTotal intensity") # It is possible to iterate over peaks directly without using indices. # In addition, peak parameters can be accessed using string keys. For the # Gaussian over r all of "position", "width", and "area" are valid. total_observed_intensity = 0 total_ideal_intensity = 0 for peak, ii in zip(cov.model[:-1], ideal_intensity): - total_observed_intensity += scale*peak["area"] + total_observed_intensity += scale * peak["area"] total_ideal_intensity += ii - print "Ideal: Observed (using estimated scale factor)" - print "%i: %f" %(total_ideal_intensity, total_observed_intensity) - + print("Ideal: Observed (using estimated scale factor)") + print("%i: %f" % (total_ideal_intensity, total_observed_intensity)) - ## Save output + # Save output ppe.write("output/query_results.srmise") ppe.writepwa("output/query_results.pwa") - - ## Evaluating a model. + # Evaluating a model. # Although the ModelCovariance object is useful, the model used for fitting # can be directly accessed through PDFPeakExtraction as well, albeit # without uncertainties. This is particularly helpful when evaluating a @@ -217,14 +207,15 @@ def run(plot=True): # peaks are kept separate. if plot: plt.figure() - grid = np.arange(2, 10, .01) + grid = np.arange(2, 10, 0.01) bl = ppe.extracted.baseline everysecondpeak = ppe.extracted.model[::2] - plt.plot(ppe.x, ppe.y, 'o') + plt.plot(ppe.x, ppe.y, "o") for peak in everysecondpeak: plt.plot(grid, bl.value(grid) + peak.value(grid)) plt.xlim(2, 10) plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/manual/source/conf.py b/doc/manual/source/conf.py index 965010c..d856aef 100644 --- a/doc/manual/source/conf.py +++ b/doc/manual/source/conf.py @@ -16,6 +16,8 @@ import sys import time +from setup import versiondata + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. @@ -59,7 +61,6 @@ # |version| and |release|, also used in various other places throughout the # built documents. sys.path.insert(0, os.path.abspath("../../..")) -from setup import versiondata fullversion = versiondata.get("DEFAULT", "version") # The short X.Y version. @@ -205,11 +206,11 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', + # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', + # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. - #'preamble': '', + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3239179 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[tool.black] +line-length = 115 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + + # The following are specific to Black, you probably don't want those. + | blib2to3 + | tests/data +)/ +''' diff --git a/requirements/run.txt b/requirements/run.txt new file mode 100644 index 0000000..1b57b14 --- /dev/null +++ b/requirements/run.txt @@ -0,0 +1,3 @@ +numpy +scipy +matplotlib-base diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..7666f95 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,4 @@ +flake8 +pytest +codecov +coverage diff --git a/setup.py b/setup.py index e0e9b14..9828c76 100755 --- a/setup.py +++ b/setup.py @@ -77,10 +77,7 @@ def getversioncfg(): }, author="Luke Granlund", author_email="luke.r.granlund@gmail.com", - description=( - "Peak extraction and peak fitting tool for atomic " - "pair distribution functions." - ), + description=("Peak extraction and peak fitting tool for atomic " "pair distribution functions."), license="BSD-style license", url="https://github.com/diffpy/diffpy.srmise/", keywords="peak extraction fitting PDF AIC multimodeling",