#!/usr/bin/env python #------------------------------------------------------------ # Purpose: Fit parameters for plane in Z = A + B*X + C*Y # # Vog, 17 Nov 2011 #------------------------------------------------------------ try: import numpy from numpy import * except: raise Exception, "Cannot import module numpy" try: from matplotlib.pyplot import figure, show, cm, rc from matplotlib.mlab import griddata except: raise Exception, "Cannot import module matplotlib" try: from kapteyn import tabarray except: raise Exception, "Cannot import module kapteyn" try: from kapteyn import kmpfit except: raise Exception, "Cannot import module kmpfit (upgrade the Kapteyn Package)" try: from mpl_toolkits.mplot3d import axes3d canplot = True except: print "Cannot import module mpl_toolkits.mplot3d: will skip 3D plot" canplot = False try: from scipy.special import erf, chdtrc, gammainc use_scipy = True except: use_scipy = False import gipsy import warnings # We define a0 .. a9 as allowed fit parameters allowed_vars = ['a'+str(i) for i in range(10)] #-------------------------------------------------------------------------- # For future use we prepared a safe version of the eval function e.g. for # a web application. This version allows only a set of numpy functions, # the allowed fit parameters and the arrays x and y. It can only set the # allowed parameters after they are known to the model function, i.e. # after 'exec(tuplestr+'=p'). The tuplestr argument stores the names of # the used parameters (extracted from the user given function). The # argument 'p' has the values of the parameters in the order of the # index of the fit parameter (a0 before a1 etc.). #-------------------------------------------------------------------------- """ safe_list = ['abs', 'arccos', 'arcsin', 'arctan', 'arctan2', 'ceil', 'cos', 'cosh', 'deg2rad', 'degrees', 'e', 'exp', 'exp2', 'fabs', 'fix', 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log2', 'log10', 'modf', 'pi', 'power', 'radians', 'sin', 'sinc', 'sinh', 'sqrt', 'tan', 'tanh'] #use the list to filter the local namespace safe_dict = dict( [(k, locals().get(k, None)) for k in safe_list] ) def safemodel(p, x, y, fie, tuplestr): exec(tuplestr+'=p') pardict = dict( [(k, locals().get(k, None)) for k in allowed_vars] ) safe_dict.update(pardict) safe_dict.update({'x':x, 'y':y}) print safe_dict return eval(fie, {"__builtins__":None}, safe_dict) """ def model(p, x, y, fie, tuplestr, userfun): if userfun: exec(tuplestr+'=p') return eval(fie) else: a0, a1, a2 = p return a0 + a1*x + a2*y def residuals(p, data): xx, yy, Z, err,fie, tuplestr, userfun = data return (Z - model(p, xx, yy, fie, tuplestr, userfun))/err def prob(nsigma): # Probability to find a value between mean-nsigma and mean+nsigma return erf(nsigma/numpy.sqrt(2.0)) def chauvenet(x, y, z, r, zmodel): # Filter possible outliers based on Chauvenet's criterion # Calculate mean and standard deviation of the residuals. # Express residual-mean in standard deviations of the # sample: d = |res_i-mean|/std # The probability to find this value in a range # mean-d*sigma and mean+d*sigma is P = erf(d/sqrt(2)) # The probabilty to find a residual outside this # interval is Q=1-P # According to Chauvenet we can discard data points where # Q < N/2 x3 = []; y3 = []; z3 = []; r3 = [] N = len(z) mean = zres.mean(); gipsy.anyout("sum zres=%g"%(zres.sum())) stdv = zres.std() s = "\nPossible outliers based on Chauvenet's criterion" gipsy.anyout(s) gipsy.anyout('='*len(s)) numoutliers = 0 for xd, yd, zd, rd, zr in zip(x, y, z, r, zres): delta = abs(zr-mean)/stdv Np = 1.0-prob(delta) criterion = N*Np if criterion < 0.5: s = "(%g,%g,%g) has distance %f sigma to mean %f, PxN=%f" %(xd,yd,zd, delta, mean, Np) gipsy.anyout(s) numoutliers += 1 else: x3.append(xd) y3.append(yd) z3.append(zd) r3.append(rd) if numoutliers == 0: gipsy.anyout("No outliers detected") return numpy.array(x3), numpy.array(y3), numpy.array(z3), numpy.array(r3) gipsy.init() # Contact Hermes # Ask for the file with the data key = "FILENAME=" mes = "Enter name of file: " ask = True while ask: try: filename = gipsy.usertext(key, mes, default=0) fp = open(filename, "r") fp.close() ask = False # File is ok except IOError, (code, errmes): gipsy.reject(key, errmes) ask = True except: errmes = "Cannot open this file, don't know why!" gipsy.reject(key, errmes) ask = True # Which columns do we want to read? key = "COLS=" mes = "Enter columns for X, Y, Z, sigmaZ ... [1,2,3,4]:" ask = True equalweights = True while ask: cols = gipsy.userint(key, mes, default=1, defval=[1,2,3,4], nmax=4) if len(cols) < 3: gipsy.reject(key, "I need at least three columns for X,Y,Z)!") else: cols = [c-1 for c in cols] try: if len(cols) == 3: x2, y2, z2 = tabarray.readColumns(filename, cols=cols) r2 = numpy.ones(len(z2)) equalweights = True else: x2, y2, z2, r2 = tabarray.readColumns(filename, cols=cols) equalweights = False ask = False except: gipsy.reject(key, "Cannot read these columns!") key = "SCALE=" mes = "Multiply Z and sigmaZ with factor .... [1.0]:" scale = gipsy.userreal(key, mes, default=1, defval=1.0, nmax=1) if scale != 1.0: gipsy.anyout("Program scaled data as Z = Z*%g"%scale) z2 *= scale if not equalweights: r2 *= scale N = len(z2) nx = ny = 100 # Number of points in X & Y in meshgrid xmin = x2.min(); xmax = x2.max() ymin = y2.min(); ymax = y2.max() dx = abs(xmax-xmin)/10 # Add some extra space for plot dy = abs(ymax-ymin)/10 X = numpy.linspace(xmin-dx,xmax+dx,nx) Y = numpy.linspace(ymin-dy,ymax+dy,ny) xx, yy = numpy.meshgrid(X,Y) Z = griddata(x2, y2, z2, xx, yy) # Interpolate between ungridded data # Ask to enter a model function F(x,y) ask = True defval = 'a0+a1*x+a2*y' key = "FXY=" mes = "Enter F(x,y) with parameters a0,..,a9 [a0+a1*x+a2*y]: " while ask: fie = gipsy.usertext(key, mes, default=1, defval=defval) userfun = (fie != defval) used_vars = [] for v in allowed_vars: if v in fie: used_vars.append(v) numpars = len(used_vars) if numpars == 0: ask = True gipsy.reject(key, "This function has no parameters to fit!") else: ask = False # Prepare initial values parnames = ','.join(used_vars) tuplestr = parnames initvals = [0]*numpars initpars = str(initvals) mes = "Init. values for "+ parnames + ' ... ['+initpars+']:' key = "INITVALS=" p0 = gipsy.userreal(key, mes, default=1+4, defval=initvals, nmax=numpars) key = "RESTABLE=" mes = "Plot a table with the residuals ... [Y]/N:" restable = gipsy.userlog(key, mes, default=1, defval=True, nmax=1) trials = 0 # Default is No bootstrapping key = "TRIALS=" mes = "Number of trials for bootstrap ... [%g]:"%trials trials = gipsy.userint(key, mes, default=1, defval=trials, nmax=1) if canplot: key = "PLOT=" mes = "Do you want a 3d plot ... [Y]/N:" plot = gipsy.userlog(key, mes, default=1, defval=True, nmax=1) else: plot = False cont = True runs = 0 while cont: # Prepare a 'Fitter' object' fitobj = kmpfit.Fitter(residuals=residuals, data=(x2,y2,z2,r2,fie,tuplestr,userfun)) try: fitobj.fit(params0=p0) except Exception, mes: s = "Something wrong with fit: %s"%mes gipsy.error(4, s) # Fatal error, abort task header = "Results for F(x,y)=%s"%fie if runs == 1: header += " (Outliers removed)" gipsy.anyout("\n\n"+header) gipsy.anyout("="*len(header)) gipsy.anyout("%5s %15s %15s %15s"%("par", "value", "cov.error", "std error")) for i in range(numpars): s = "%5s %15g %15g %15g"%(used_vars[i], fitobj.params[i], fitobj.xerror[i], fitobj.stderr[i]) gipsy.anyout(s) gipsy.anyout(" ") #gipsy.anyout("Parameters: "+parnames) #gipsy.anyout("Param. values: "+ str(fitobj.params)) #gipsy.anyout("Errors from covariance matrix: " + str(fitobj.xerror)) #gipsy.anyout("Uncertainties assuming reduced Chi^2=1: " + str(fitobj.stderr)) gipsy.anyout("Chi^2 min: %g"% fitobj.chi2_min) s = "Reduced Chi^2: %g"%(fitobj.rchi2_min) if equalweights: s += " (Could be meaningless because fit was unweighted)" gipsy.anyout(s) gipsy.anyout("Number of data points: %g"% N) gipsy.anyout("Degrees of freedom: %g"% fitobj.dof) gipsy.anyout("Iterations: %g"% fitobj.niter) gipsy.anyout("Function ev: %d"% fitobj.nfev) gipsy.anyout("Status: %d"% fitobj.status) gipsy.anyout("Status Message: %s"% fitobj.message) fittedparams = fitobj.params # Store for later use standarderrors = fitobj.stderr # Store for later use Dz = abs(model(fittedparams, x2, y2, fie, tuplestr, userfun) - z2) gipsy.anyout("\nResiduals from Z - Zmodel:") gipsy.anyout("Residuals max: %g"% Dz.max()) gipsy.anyout("Residuals min: %g"% Dz.min()) gipsy.anyout("Residuals mean: %g"% Dz.mean()) gipsy.anyout("Residuals standard deviation: %g"% Dz.std()) zmodel = model(fittedparams, x2, y2, fie, tuplestr, userfun) zres = z2 - zmodel if runs == 0 and use_scipy: key = "CHAUVENET=" mes = "Remove Outliers with Chauvenet ... [Y]/N:" remove_out = gipsy.userlog(key, mes, default=1, defval=True, nmax=1) if remove_out: x2, y2, z2, r2 = chauvenet(x2, y2, z2, r2, zres) N = len(z2) runs += 1 cont = (runs == 1 and remove_out) if restable: # The original data is x2,y2,z2 arrays = zip(x2,y2,z2,zmodel,zres) s = "\n%10s %10s %10s %10s %14s"%('X','Y','Z','Zmodel','Zresidual') gipsy.anyout(s) gipsy.anyout('='*len(s)) for xa,ya,za,zm,zr in arrays: s = "%10g %10g %10g %10g %14g"%(xa,ya,za,zm,zr) gipsy.anyout(s) if use_scipy and not equalweights: # P = chdtrc(v,x) returns the area under the right hand tail # (from x to infinity) of the Chi square probability density function # with v degrees of freedom: # 1/(2**(v/2) * gamma(v/2)) * integral(t**(v/2-1) * exp(-t/2), t=x..inf) P = chdtrc(fitobj.dof, fitobj.chi2_min) # Integral from 0 to chi^2 # Note that this is P in the Chi^2 tables of Bevington and Q in Numerical # Recipes, Press et al. It the probability of exceeding Chi^2. # Note that the same answer can be found if you use SciPy's # incomplete gamma function: # gammainc(v,x) s = """\nIf the model function is a good approximation to the parent function, the reduced chi^2 should be near 1.0 and the probability to find the measured reduced chi^2 (=%g) should be approximately 0.5. For larger values of chi^2, the probability of obtaining such a large value of chi^2 from the correct model function is smaller, which probably implies that the model function is not appropriate. The probability that we find a measured value of chi^2 of %g from the selected model function F(x,y)=%s is %g. Small values may indicate that 1) The model is not appropriate (Truly wrong models have Q < 1e-18) 2) The errors on the data points are too small 3) The measurement errors may not normally distributed """%(fitobj.rchi2_min, fitobj.chi2_min, fie, P) gipsy.anyout(s) # Do the bootstrapping if requested if trials > 0: bootparams = [] x3 = x2.copy() y3 = y2.copy() z3 = z2.copy() r3 = r2.copy() fitobj = kmpfit.Fitter(residuals=residuals, data=(x3,y3,z3,r3,fie,tuplestr,userfun)) for i in range(trials): # Start loop over pseudo sample indx = numpy.random.randint(0, N, N) # Do the resampling using an RNG # indx is an array of random indices. Use this array to create a new one. x3[:] = x2[indx] y3[:] = y2[indx] z3[:] = z2[indx] r3[:] = r2[indx] # Only do a regression if there are at least two different # data points in the pseudo sample ok = (x3 != x3[0]).any() if (not ok): gipsy.anyout("Skipping this sample: All elements are the same!") else: fitobj.fit(params0=p0) bootparams.append(fitobj.params) gipsy.anyout("\nStandard errors with bootstrap:") bootparams = numpy.array(bootparams) for i in range(numpars): sigma = bootparams[:,i] - fittedparams[i] gipsy.anyout("sigma "+used_vars[i]+": %g"%sigma.std()) # Plot data and fit in (interactive) 3D plot if plot: # Suppress user warnings for PyFITS actions warnings.resetwarnings() warnings.filterwarnings('ignore', category=UserWarning, append=True) # Plotting rc('xtick', labelsize=8) rc('ytick', labelsize=8) #rc('ztick', labelsize=8) fig1 = figure(1) frame = fig1.add_subplot(1,1,1, projection='3d', azim=-41, elev=21) # For Matplotlib >- 1.01 one can use: #frame.plot_surface(xx, yy, Z, rstride=1, cstride=1, cmap=cm.jet, # linewidth=0, antialiased=False, alpha=0.6) #frame.plot_surface(xx,yy,Z) frame.scatter(x2, y2, z2, c='r') for x3,y3,z3,zm in zip(x2,y2,z2,zmodel): frame.plot((x3,x3),(y3,y3),(z3,zm), 'b') zz = model(fittedparams, xx, yy, fie, tuplestr, userfun) frame.plot_surface(xx, yy, zz, color='y', alpha=0.4) frame.set_xlabel('X') frame.set_ylabel('Y') frame.set_zlabel('Z') # Turn warnings on warnings.resetwarnings() warnings.filterwarnings('always', category=UserWarning, append=True) show() gipsy.finis() # Disconnect Hermes as soon user stopped interactive plot