function [paramEsts, paramEsts1Norm]=FitNorms(Data, varargin) %%% % SUMMARY: % Takes in the column vector of data, Data and fits the LOGARITHM (natural log) % of the data to two normal distributions using an EM algorithm. Returns the means and % standard deviation of the fits as well as the proportion of the % population in each distribution % % INPUTS: % Data: Data that we will fit to. We are fitting either one or two % gaussians to the logarithm of this data. % % varargin: % varargin{1}: 0 if output is to be suppressed (default), 1 if output % (ie, plot of the gate and the stuff in the gate) IS to be plotted % varargin{2}: number of bins to use when taking the histograms % varargin{3}: vector of the starting guesses for P, the means, and the % standard deviations % % % OUTPUTS: % paramEsts- Parameters for fit to two normal distributions (proportion % of population in first distribution, mean 1, mean 2, std deviation 1, % standard deviation 2) % paramEsts1Norm-Parameters for fit to 1 normal distribution (mean and % standard deviation) % % % REQUIRES: % Matlab statistics toolbox % % REFERENCES: % Mathworks documentation on inpolygon: % http://www.mathworks.com/help/techdoc/ref/inpolygon.html % % Written by Megan McClean, Ph.D. % Lewis-Sigler Institute for Integrative Genomics % Princeton University % 120 Carl Icahn % Princeton, NJ 08544 % mmcclean@princeton.edu % % Last revised on February 22, 2012 %% %Some parameters that will be used throughout + getting rid of open figures numarg=length(varargin); close all; %% %Take the logarithm of the data and the histogram logData=log(Data); %Take the logarithm of the data %Some steps to remove complex numbers (from the negative values in the FACS %data) and -Inf (from any 0's in the FACS data) logData=real(logData); f=find(logData==-Inf); logData(f)=-max(logData); f=find(logData<0); logData(f)=0; if numarg<2 [xx,bins]=hist(logData,100); else [xx,bins]=hist(logData,varargin{2}); end %% %Define the model for fitting, in this case a mixture of two gaussians that %will be fit to the logarithm of the data pdf_normmixture = @(x,p,mu1,mu2,sigma1,sigma2) p*normpdf(x,mu1,sigma1) + (1-p)*normpdf(x,mu2,sigma2); %Set the guesses for the proportion (of the population split between the %two gaussians), the means and the standard deviation if numarg<3 pStart = .5; muStart = quantile(logData,[.25 .75]); sigmaStart = sqrt(var(logData) - .25*diff(muStart).^2); else pGuesses=varargin{3}; pStart=pGuesses(1); mustart=[pGuesses(2) pGuesses(3)]; sigmaStart=[pGuesses(4) pGuesses(5)]; end start = [pStart muStart sigmaStart sigmaStart]; %upper and lower bounds for means and standard deviations lb = [0 -Inf -Inf 0 0]; ub = [1 Inf Inf Inf Inf]; %reset parameters for the mle estimation and estimate the parameters options = statset('MaxIter',300, 'MaxFunEvals',600); paramEsts = mle(logData, 'pdf',pdf_normmixture, 'start',start,'lower',lb, 'upper',ub, 'options',options); %%Plot the fits if numarg>0 & varargin{1}==1 %plot all the hard work: figure; %bins = -2.5:.5:7.5; h = bar(bins,histc(logData,bins)/(length(logData)*(bins(2)-bins(1))),'histc'); set(h,'FaceColor',[.9 .7 .9]); xgrid = linspace(1.1*min(logData),1.1*max(logData),200); pdfgrid = pdf_normmixture(xgrid,paramEsts(1),paramEsts(2),paramEsts(3),paramEsts(4),paramEsts(5)); %figure; hold on; plot(xgrid,pdfgrid,'r-'); hold off xlabel('Ln of data'); ylabel('Probability Density'); end %% %Do it all again but only fit 1 normal %set up start parameters for fitting 1 normal distribution if numarg<3 pStart = .5; muStart = mean(logData); sigmaStart = std(logData); else pGuesses=varargin{3}; pStart=pGuesses(1); mustart=(pGuesses(2)+pGuesses(3))/2; sigmaStart=pGuesses(4); end %paramEsts1Norm = mle(logData, 'distribution','Normal', 'start',start,'lower',lb, 'upper',ub, 'options',options); [paramEsts1Norm(1) paramEsts1Norm(2)]=normfit(logData); %%Plot the fits if numarg>0 & varargin{1}==1 %plot all the hard work: figure; %bins = -2.5:.5:7.5; h = bar(bins,histc(logData,bins)/(length(logData)*(bins(2)-bins(1))),'histc'); set(h,'FaceColor',[.9 .7 .9]); xgrid = linspace(1.1*min(logData),1.1*max(logData),200); pdfgrid = normpdf(xgrid,paramEsts1Norm(1),paramEsts1Norm(2)); hold on; plot(xgrid,pdfgrid,'r-'); hold off xlabel('Log_{10} of data'); ylabel('Probability Density'); end