diff --git a/.gitignore b/.gitignore index 93e3b77..8475191 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ + # Compiled Object files *.slo *.lo @@ -31,6 +32,12 @@ *.aux *.log +# Python files +*.pyc + # Miscellaneous *.pdf +# Local, compiled programs +sinusoid + diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py new file mode 100644 index 0000000..cddb57b --- /dev/null +++ b/.ycm_extra_conf.py @@ -0,0 +1,128 @@ + +import os +import ycm_core + +# These are the compilation flags that will be used in case there's no +# compilation database set (by default, one is not set). +# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. +flags = [ +'-Wall', +'-Wextra', +'-Werror', +#'-Wc++98-compat', +'-Wno-long-long', +'-Wno-variadic-macros', +'-fexceptions', +#'-DNDEBUG', +'-std=c++11', # Always specify something (c99, c++03, c++11, etc.) +'-x', 'c++', # Always specify something (c or c++) +'-isystem', '/usr/include/eigen3', +'-I', '.', +] + +# Set this to the absolute path to the folder (NOT the file!) containing the +# compile_commands.json file to use that instead of 'flags'. See here for +# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html +# +# You can get CMake to generate this file for you by adding: +# set( CMAKE_EXPORT_COMPILE_COMMANDS 1 ) +# to your CMakeLists.txt file. +# +# Most projects will NOT need to set this to anything; you can just change the +# 'flags' list of compilation flags. Notice that YCM itself uses that approach. +compilation_database_folder = '' + +if os.path.exists( compilation_database_folder ): + database = ycm_core.CompilationDatabase( compilation_database_folder ) +else: + database = None + +SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ] + + +def DirectoryOfThisScript(): + return os.path.dirname( os.path.abspath( __file__ ) ) + + +def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): + if not working_directory: + return list( flags ) + new_flags = [] + make_next_absolute = False + path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] + for flag in flags: + new_flag = flag + + if make_next_absolute: + make_next_absolute = False + if not flag.startswith( '/' ): + new_flag = os.path.join( working_directory, flag ) + + for path_flag in path_flags: + if flag == path_flag: + make_next_absolute = True + break + + if flag.startswith( path_flag ): + path = flag[ len( path_flag ): ] + new_flag = path_flag + os.path.join( working_directory, path ) + break + + if new_flag: + new_flags.append( new_flag ) + return new_flags + + +def IsHeaderFile( filename ): + extension = os.path.splitext( filename )[ 1 ] + return extension in [ '.h', '.hxx', '.hpp', '.hh' ] + + +def GetCompilationInfoForFile( filename ): + # The compilation_commands.json file generated by CMake does not have entries + # for header files. So we do our best by asking the db for flags for a + # corresponding source file, if any. If one exists, the flags for that file + # should be good enough. + if IsHeaderFile( filename ): + basename = os.path.splitext( filename )[ 0 ] + for extension in SOURCE_EXTENSIONS: + replacement_file = basename + extension + if os.path.exists( replacement_file ): + compilation_info = database.GetCompilationInfoForFile( + replacement_file ) + if compilation_info.compiler_flags_: + return compilation_info + return None + return database.GetCompilationInfoForFile( filename ) + + +def FlagsForFile( filename, **kwargs ): + if database: + # Bear in mind that compilation_info.compiler_flags_ does NOT return a + # python list, but a "list-like" StringVec object + compilation_info = GetCompilationInfoForFile( filename ) + if not compilation_info: + return None + + final_flags = MakeRelativePathsInFlagsAbsolute( + compilation_info.compiler_flags_, + compilation_info.compiler_working_dir_ ) + + # NOTE: This is just for YouCompleteMe; it's highly likely that your + # project does NOT need to remove the stdlib flag. + # + # DO NOT USE THIS IN YOUR ycm_extra_conf IF YOU'RE NOT 100% SURE YOU NEED + # IT. + try: + final_flags.remove( '-stdlib=libc++' ) + except ValueError: + pass + else: + relative_to = DirectoryOfThisScript() + final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) + + return { + 'flags': final_flags, + 'do_cache': True + } + diff --git a/Makefile b/Makefile index 5065ea8..c323e37 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,37 @@ +# --- BEG Automatic dependencies for C and C++ files. --- +# http://make.mad-scientist.net/papers/advanced-auto-dependency-generation/ +DEPDIR := .d +$(shell mkdir -p $(DEPDIR) >/dev/null) +DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td +COMPILE.c = $(CC) $(DEPFLAGS) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +COMPILE.cc = $(CXX) $(DEPFLAGS) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +POSTCOMPILE = mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d +%.o : %.c +%.o : %.c $(DEPDIR)/%.d + $(COMPILE.c) $(OUTPUT_OPTION) $< + $(POSTCOMPILE) +%.o : %.cpp +%.o : %.cpp $(DEPDIR)/%.d + $(COMPILE.cc) $(OUTPUT_OPTION) $< + $(POSTCOMPILE) +$(DEPDIR)/%.d: ; +.PRECIOUS: $(DEPDIR)/%.d +# --- END Automatic dependencies for C and C++ files. --- + +CXXFLAGS = -std=c++11 -Wall +CPPFLAGS = -I/usr/include/eigen3 +LDLIBS = -lm + +PROGRAMS = sinusoid +PROG_PDF = $(PROGRAMS:=.pdf) + +%.pdf : %.gpi %.dat + gnuplot $< + +%.dat : % + ./$< > $@ + %.pdf : %.fig fig2dev -L pdf $< > $@ @@ -10,14 +43,21 @@ PDFNAME = $(DOCNAME).pdf all : $(PDFNAME) -$(PDFNAME) : $(TEXNAME) logo.pdf fdl-1.3.tex +$(PDFNAME) : $(TEXNAME) logo.pdf fdl-1.3.tex $(PROGRAMS) $(PROG_PDF) pdflatex $(TEXNAME) pdflatex $(TEXNAME) clean : + @rm -frv .d @rm -fv *.aux @rm -fv *.log - @rm -fv *.out @rm -fv logo.pdf + @rm -fv *.o + @rm -fv *.out @rm -fv $(PDFNAME) + @rm -fv $(PROG_PDF) + @rm -fv $(PROGRAMS) +# This must be the last line. +# http://make.mad-scientist.net/papers/advanced-auto-dependency-generation/ +-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(SRCS))) diff --git a/README.md b/README.md index 62ff808..1866faf 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Linear Regression and the Best-Fit Curve +# Linear Regression, the Best-Fit Curve, and C++ **Linear regression can be used to find the best-fit curve through a set of points in the plane.** diff --git a/linear-regression.tex b/linear-regression.tex index 64c7c67..b7b764c 100644 --- a/linear-regression.tex +++ b/linear-regression.tex @@ -1,5 +1,5 @@ -\newcommand{\doctitle}{Framework for Linear Regression in C++} +\newcommand{\doctitle}{Linear Regression, the Best-Fit Curve, and C++} \documentclass[twocolumn]{article} @@ -86,6 +86,20 @@ % %\end{abstract} +\begin{figure} + \begin{center} + \includegraphics[width=0.95\columnwidth]{sinusoid} + \caption{% + Linear regression that finds the best-fit sinusoidal model to a set of + 100 measured points. Note that only the amplitude, phase, and + (vertical) offset of a sinusoidal model can be found by linear + regression. The wavelength must be known ahead of time. Here, the + wavelength is 360$^\circ$.% + } + \label{fig:sinusoid} + \end{center} +\end{figure} + \section{Introduction} \emph{Linear regression} allows one to find the best-fit curve through a set of @@ -93,21 +107,20 @@ \section{Introduction} shape of the best-fit curve (for it need not be a straight line) but to an aspect of the expression that represents the curve. The function corresponding to the best-fit curve must be expressed as a \emph{linear combination} of basis -functions; extremely convenient and useful, however, is that each basis -function may be a \emph{non}-linear function of its variable. So, if the curve -can be expressed as a linear combination of functions of the same variable, and -if, by ``best'', one mean that the coefficients of the linear combination are -chosen to minimize the sum of squared deviations between the points and the -curve, then one can find the best-fit curve by way of linear regression. +functions; extremely convenient and useful is that each basis function may be a +\emph{non}-linear function of its variable. So, if the curve can be expressed +as a linear combination of functions of the same variable, and if, by ``best'', +one mean that the coefficients of the linear combination are chosen to minimize +the sum of squared deviations between the points and the curve, then one can +find the best-fit curve by way of linear regression. For example, in the $x$-$y$ plane, a curve $y = f(x)$ might correspond to a -functional relation whose expression is +function whose expression is \begin{equation} f(x) = c_1 \cos\tfrac{2\pi x}{\lambda} + c_2 \sin\tfrac{2\pi x}{\lambda}. \end{equation} In this case, we call $x$ the \emph{independent variable} and $y$ the -\emph{dependent variable}. Supposing that we have a physical system, measured -in terms of spatial units, we represent the wavelenth of the sinusoidal model +\emph{dependent variable}. We represent the wavelenth of the sinusoidal model as $\lambda$. The expression of $f(x)$ is the linear combination of two functions of $x$. The coefficients of the linear combination are $c_1$ and $c_2$, each of which, like $\lambda$, has the dimension of length. The @@ -126,7 +139,8 @@ \section{Introduction} \left[y_1 - f(x_1)\right]^2 + \left[y_2 - f(x_2)\right]^2 + \left[y_3 - f(x_3)\right]^2 \end{equation} -is minimized. +is minimized. Figure~\ref{fig:sinusoid} shows a sinusoidal linear regression +for a set of 100 measured points. The main benefits of linear regression include determinism in the time required to compute the fit and certainty that the solution is in fact the best diff --git a/sinusoid.cpp b/sinusoid.cpp new file mode 100644 index 0000000..2caaaba --- /dev/null +++ b/sinusoid.cpp @@ -0,0 +1,55 @@ + +/// Copyright 2016 Thomas E. Vaughan +/// +/// \file sinusoid.cpp +/// \brief Generator for data in sinusoidal example. + +#include // for sin(), cos() +#include // for cout, endl +#include // for default_random_engine, normal_distribution<> + +#include // for VectorXd, MatrixXd, MatrixXd::jacobiSvd() + +using namespace Eigen; +using namespace std; + +typedef double (*basis_func)(double); +typedef vector func_basis; + +int main(int, char **argv) +{ + func_basis const basis{cos, sin}; + unsigned constexpr M = 100; // Number of measurements. + unsigned const N = basis.size(); // Number of coefficients. + MatrixXd B(M, N); + VectorXd c(N), x(M), y(M); + c[0] = 2.1; + c[1] = 2.4; + default_random_engine generator; + normal_distribution distribution; + double constexpr X_LO = 0.0; + double constexpr X_HI = 2.0 * M_PI; + double constexpr DX = (X_HI - X_LO) / (M - 1); + for (unsigned i = 0; i < M; ++i) { + x(i) = X_LO + i * DX; + y(i) = distribution(generator); // Start with noise. + for (unsigned j = 0; j < N; ++j) { + B(i, j) = basis[j](x(i)); + y(i) += c[j] * B(i, j); // Add in signal. + } + } + MatrixXd const c_fit = B.jacobiSvd(ComputeThinU | ComputeThinV).solve(y); + cerr << argv[0] << ": coefs:"; + for (unsigned j = 0; j < N; ++j) { + cerr << " " << c[j]; + } + cerr << endl; + for (unsigned i = 0; i < M; ++i) { + double fit_y = 0.0; + for (unsigned j = 0; j < N; ++j) { + fit_y += c[j] * basis[j](x(i)); + } + cout << x(i) << " " << y(i) << " " << fit_y << endl; + } +} + diff --git a/sinusoid.gpi b/sinusoid.gpi new file mode 100644 index 0000000..d3ac28c --- /dev/null +++ b/sinusoid.gpi @@ -0,0 +1,14 @@ + +set grid +set key below +set output "sinusoid.pdf" +set term pdfcairo +set xlabel "x (degrees)" +set xrange [0:360] +set xtics 0, 30 +set ylabel "y" + +plot\ + "sinusoid.dat" using ($1*180.0/pi):2 with points title "data",\ + "sinusoid.dat" using ($1*180.0/pi):3 with lines title "fit" +