/*------------------------------------------------------------------------------*
 * File Name: Stat_Utils.h														*
 * Creation: GJL 4/1/2003														*
 * Purpose: Header file for Stat_Utils.c.										*
 * Copyright (c) OriginLab Corp.	2003-2007									*
 * All Rights Reserved															*
 * 																				*
 * Modification Log:															*
 *------------------------------------------------------------------------------*/

#ifndef _STAT_UTILS_H
#define _STAT_UTILS_H
 
////////////////////////////////////////////////////////////////////////////////////
// Included header files
//////////////////////////////////////////////////////////////////////////////////
//
// System includes
//#include <origin.h>           // Most Origin related classes
//#include <Tree.h>             // Tree class
//#include <NAG\nag_types.h>    // NAG structures
//#include <NAG\OCN_g01.h>      // NAG Basic Statistics functions

////////////////////////////////////////////////////////////////////////////////////
// Define onstants and non-localized strings
////////////////////////////////////////////////////////////////////////////////////
//
#define SU_MIN_NPTS					2
#define SU_DEFAULT_CONF_LEVEL		0.95
#define SU_DEFAULT_TOLERANCE		0.000001
#define SU_DELIM					'|'
#define SU_P1						"P1"
#define SU_PN						"P%d"
#define SU_PARAMS_NODE				"Parameters"

#define LR_COL_NAMES				"Intercept|Slope"

#define PR_P0_NAME					"A"
#define PR_PN_NAME					"B%d"
#define PR_N_OUTPUT_COLS			6
#define PR_FITX_COL					0
#define PR_FITY_COL					1
#define PR_LCB_COL					2
#define PR_UCB_COL					3
#define PR_LPB_COL					4
#define PR_UPB_COL					5					

////////////////////////////////////////////////////////////////////////////////////
// SumStats structure
////////////////////////////////////////////////////////////////////////////////////
//
typedef struct tagSumStats
{
	int N;
	double dSum;
	double dWeightSum;
	double dMean;
	double dSD;
	double dKurtosis;
	double dSkewness;	
	double dSE;
	double dVariance;
	double dCoefVar;	
	double dCIL;
	double dCIU;
	double dMin;
	double dMax;
	double dRange;
	int nMissing;
} SumStats;

////////////////////////////////////////////////////////////////////////////////////
// Enumerations
////////////////////////////////////////////////////////////////////////////////////
//
enum {FITLR_X, FITLR_Y, FITLR_LCB, FITLR_UCB, FITLR_LPB, FITLR_UPB, FITLR_NUM_COLS};

////////////////////////////////////////////////////////////////////////////////////
// Function prototypes
////////////////////////////////////////////////////////////////////////////////////
//

/** >Statistics
	Function to compute summary statistics for a vector passing results back in a tree.
*/
bool stat_descriptive(const vector& vData, TreeNode& trWhole, int nOffset = 0, const vector* pvWeight = NULL);

/** >Statistics
	Function to compute summary statistics for a vector passing results back in a SumStats structure.
*/
bool stat_summary(SumStats& ss, const vector& vData, const vector* pvWeight = NULL,
	 double dConfLevel = SU_DEFAULT_CONF_LEVEL);

/** >Statistics
	Function to compute percentiles for a vector.
*/
bool stat_percentiles(vector& vPercentiles, const vector& vData, const vector& vPercents, bool bInterpolate = false);

/** >Statistics
		Function to perform multiple linear regression using the NAG function nag_regsn_mult_linear.
	Parameters:
		nPts=Input number of rows (or data points) in the input matrix
		nTdx=Input number of columns (or independent variables) in the input matrix
		mX=Input matrix containing data points of the independent variables
		vY=Input vector containing data points of dependent variable
		vWT=Input vector containing weights of data points, NULL if weighting not used
		dRss=Output residual sum of squares for the regression 
		dDf=Output degrees of freedom associated with dRss
		vB=Output least-squares estimates of the parameters of the regression model
		vSE=Output standard errors of the parameter estimates given in vB
		vCOV=Output variance-covariance matrix of estimated parameters in vB
		vRES=Output weighted residuals
		vH=Output diagonal elements of H
		mQ=Output results of the QR decomposition
		bSvd=Output flag set TRUE if singular value decomposition was performed, otherwise FALSE
		iRank=Output rank of the independent variables
		vP=Output details of QR decomposition if SVD is used
		vCOMAR=Output information which is needed by nag_regsn_mult_linear_newyvar_if bSvd is TRUE 
		dTol=Input tolerance used to decide rank of independent variables, default is MR_DEFAULT_TOLERANCE 
		bThroughZero=Input flag forcing regression line through origin, default is FALSE
	Return:
		Returns the results of the NAG nag_regsn_mult_linear function.
*/
int stat_multiple_linear_regression( int nPts, int nTdx, matrix& mX, vector& vY, vector& vWT,
	double& dRss, double& dDf, vector& vB, vector& vSE, vector& vCOV, vector& vRES,
	vector& vH, matrix& mQ, BOOL& bSvd, int& iRank, vector& vP,	vector& vCOMAR,
	double dTol = SU_DEFAULT_TOLERANCE, BOOL bThroughZero = FALSE );

/** >Statistics
		Perform a linear regression on the input curve.
	Parameters:
		crvData=Input, the X and Y coordinates of the sample data
		mResultCurves=Output, the result curves where
			Col(0) is the X coordinates of the fit line
			Col(1) is the Y coordinates of the fit line
			Col(2) is the lower confidence band
			Col(3) is the upper confidence band
			Col(4) is the lower prediction band
			Col(5) is the upper prediction band
		trLR=Input settings & Output results of the linear regression
			trLR.GUI.Fit.ThroughZero.nVal				// Force fit to pass thru zero
			trLR.GUI.Fit.FixSlope.nVal					// force slope to be fixed
			trLR.GUI.Fit.FixSlopeAt.dVal				// fixed value of slope (Note: cannot fix slope and thru zero at same time, should return error from LLOC)
			trLR.GUI.Fit.ErrBarWeight.nVal				// Use error column for wt - use (1/err^2) as wt factor
			trLR.GUI.Fit.UseReducedChiSq.nVal			// Scale parameter errors with reduced chisqr

			trLR.GUI.ResultCurves.Points.nVal			// Number of points in fit curve
			trLR.GUI.ResultCurves.ConfBands.nVal		// Create confidence bands - if not set, then matrix will have empty columns
			trLR.GUI.ResultCurves.PredBands.nVal		// Create prediction bands - if not set, then matrix will have empty columns
			trLR.GUI.ResultCurves.Confidence.dVal		// Confidence value to be used 

			trLR.Calculation.Control.UseDataXRange.nVal	// Option = 1 to use data range, = 0 to use X1, X2
			trLR.Calculation.Control.X1.dVal			// Default X minimum for fit curve
			trLR.Calculation.Control.X2.dVal			// Default X maximum for fit curve

			trLR.Calculation.Parameters.P1.Name.strVal	// Parameter Name...A
			trLR.Calculation.Parameters.P1.Value.dVal	// Parameter Value
			trLR.Calculation.Parameters.P1.Error.dVal	// Parameter Error
			trLR.Calculation.Parameters.P1.Vary.nVal	// Parameter Vary
			trLR.Calculation.Parameters.P1.tValue.dVal	// Parameter t-Value
			trLR.Calculation.Parameters.P1.LCI.dVal		// Parameter LCI
			trLR.Calculation.Parameters.P1.UCI.dVal		// Parameter UCI
			trLR.Calculation.Parameters.P1.Prob.dVal	// Parameter Prob > |t|
	
			trLR.Calculation.Parameters.P2.Name.strVal	// Parameter Name...B
			trLR.Calculation.Parameters.P2.Value.dVal
			trLR.Calculation.Parameters.P2.Error.dVal
			trLR.Calculation.Parameters.P2.Vary.nVal
			trLR.Calculation.Parameters.P2.tValue.dVal
			trLR.Calculation.Parameters.P2.LCI.dVal
			trLR.Calculation.Parameters.P2.UCI.dVal
			trLR.Calculation.Parameters.P2.Prob.dVal
	
			trLR.Calculation.Stat.Rvalue.dVal			// R-value
			trLR.Calculation.Stat.RSqCOD.dVal			// RSq-COD value
			trLR.Calculation.Stat.AdjRSq.dVal			// Adjusted RSq
			trLR.Calculation.Stat.RMSESD.dVal			// RMSE-Standard Dev.
			trLR.Calculation.Stat.N.nVal				// No. of points in fit
	
			trLR.Calculation.ANOVA.Model.DOF.nVal		// Model DOF 
			trLR.Calculation.ANOVA.Model.SSq.dVal		// Model SS
			trLR.Calculation.ANOVA.Model.MeanSq.dVal	// Model Mean Sq
			trLR.Calculation.ANOVA.Error.DOF.nVal		// Error DOF
			trLR.Calculation.ANOVA.Error.SSq.dVal		// Error SS
			trLR.Calculation.ANOVA.Error.MeanSq.dVal	// Error Mean Sq
			trLR.Calculation.ANOVA.Total.DOF.dVal		// Total degrees of freedom
			trLR.Calculation.ANOVA.Total.SSq.dVal		// Total SS
			trLR.Calculation.ANOVA.Fvalue.dVal			// F-value
			trLR.Calculation.ANOVA.Pvalue.dVal			// P-value
	Return:
		Returns ERROR_NO_ERROR on successful exit and an error code on failure.
		NAG error codes:		Negative 1 * NAG error code
		ERROR_INVALID_CURVE:	Invalid input curve
		ERROR_INVALID_TREENODE:	Invalid TreeNode
		ERROR_TO_FEW_PTS:		Not enough points in the curve
		ERROR_X_RANGE:			All the elements of X are equal
		ERROR_SETTINGS:			'ThroughZero' and 'FixSlope' can not be both enabled
*/
int stat_linear_fit(const curvebase& crvData, matrix& mResultCurves, TreeNode& trLR); 

/** >Statistics
		Perform a multiple linear regression using the NAG function nag_regsn_mult_linear.
	Parameters:
		vDepData=Input, vector holding input dependent data
		mIndepData=Input, matrix holding input independent data
			trMR: Input settings and Output results, tree having the nodes:
			trMR.GUI.Fit.ThroughZero.nVal					// Force fit to pass thru zero
	
			trMR.Calculation.Parameters.P1.Name.strVal		// Parameter Name...Y-Intercept
			trMR.Calculation.Parameters.P1.Value.dVal		// Parameter Value
			trMR.Calculation.Parameters.P1.Error.dVal		// Parameter Error
			trMR.Calculation.Parameters.P1.tValue.dVal		// Parameter t-Value
			trMR.Calculation.Parameters.P1.Prob.dVal		// Parameter Prob > |t|
	
			trMR.Calculation.Parameters.P2.Name.strVal		// Parameter Name...<Dataset Name>
			trMR.Calculation.Parameters.P2.Value.dVal
			trMR.Calculation.Parameters.P2.Error.dVal
			trMR.Calculation.Parameters.P2.tValue.dVal
			trMR.Calculation.Parameters.P2.Prob.dVal
	
			etc. depending on number of dependent variables
	
			trMR.Calculation.Stat.Rvalue.dVal				// R-value
			trMR.Calculation.Stat.RSqCOD.dVal				// RSq-COD value
			trMR.Calculation.Stat.AdjRSq.dVal				// Adjusted RSq
			trMR.Calculation.Stat.RMSESD.dVal				// RMSE-Standard Dev.
			trMR.Calculation.Stat.N.nVal					// No. of points in fit
	
			trMR.Calculation.ANOVA.Model.DOF.nVal			// Model DOF 
			trMR.Calculation.ANOVA.Model.SSq.dVal			// Model SS
			trMR.Calculation.ANOVA.Model.MeanSq.dVal		// Model Mean Sq
			trMR.Calculation.ANOVA.Error.DOF.nVal			// Error DOF
			trMR.Calculation.ANOVA.Error.SSq.dVal			// Error SS
			trMR.Calculation.ANOVA.Error.MeanSq.dVal		// Error Mean Sq
			trMR.Calculation.ANOVA.Total.DOF.nVal			// Total degrees of freedom
			trMR.Calculation.ANOVA.Total.SSq.dVal			// Total SS
			trMR.Calculation.ANOVA.Fvalue.dVal				// F-value
			trMR.Calculation.ANOVA.Pvalue.dVal				// P-value
		vWeightData=Input, Optional vector containing input weighting data if used
		mCov=Output, the variance-covariance matrix
	Return:
		Returns ERROR_NO_ERROR on successful exit and an error code on failure.
		NAG error codes:		Negative 1 * NAG error code
		ERROR_INVALID_TREENODE:	Invalid TreeNode
		ERROR_TO_FEW_PTS:		To few data points
		ERROR_UNEQUAL_N:		Unequal N where equal N is required
*/
int stat_multiple_linear_regression(const vector& vDepData, const matrix& mIndepData, 
	TreeNode& trMR, const vector* vWeightData = NULL, matrix* mCov = NULL );

/** >Statistics
		Perform a polynomial fit on the input curve.
	Parameters:
		crvData=Input, the X and Y coordinates of the sample data
		mResultCurves=Output, the result curves where
			Col(0) is the X coordinates of the fit line
			Col(1) is the Y coordinates of the fit line
			Col(2) is the lower confidence band
			Col(3) is the upper confidence band
			Col(4) is the lower prediction band
			Col(5) is the upper prediction band
		trPR=Input settings & Output results of the linear regression
			trPR.GUI.Fit.PolynomialOrder.nVal			// The order of the polynomial
			trPR.GUI.Fit.ThroughZero.nVal				// Force fit to pass thru zero
			trPR.GUI.Fit.ErrBarWeight.nVal				// Use error column for wt - use (1/err^2) as wt factor
			trPR.GUI.Fit.UseReducedChiSq.nVal			// Scale parameter errors with reduced chisqr

			trPR.GUI.ResultCurves.Points.nVal			// Number of points in fit curve
			trPR.GUI.ResultCurves.ConfBands.nVal		// Create confidence bands - if not set, then matrix will have empty columns
			trPR.GUI.ResultCurves.PredBands.nVal		// Create prediction bands - if not set, then matrix will have empty columns
			trPR.GUI.ResultCurves.Confidence.dVal		// Confidence value to be used 

			trPR.Calculation.Control.UseDataXRange.nVal	// Option = 1 to use data range, = 0 to use X1, X2
			trPR.Calculation.Control.X1.dVal			// Default X minimum for fit curve
			trPR.Calculation.Control.X2.dVal			// Default X maximum for fit curve

			trPR.Calculation.Parameters.P1.Name.strVal	// Parameter Name...A
			trPR.Calculation.Parameters.P1.Value.dVal	// Parameter Value
			trPR.Calculation.Parameters.P1.Error.dVal	// Parameter Error
			trPR.Calculation.Parameters.P1.Vary.nVal	// Parameter Vary
			trPR.Calculation.Parameters.P1.tValue.dVal	// Parameter t-Value
			trPR.Calculation.Parameters.P1.LCI.dVal		// Parameter LCI
			trPR.Calculation.Parameters.P1.UCI.dVal		// Parameter UCI
			trPR.Calculation.Parameters.P1.Prob.dVal	// Parameter Prob > |t|

			trPR.Calculation.Parameters.P2.Name.strVal	// Parameter Name...B1
			trPR.Calculation.Parameters.P2.Value.dVal
			trPR.Calculation.Parameters.P2.Error.dVal
			trPR.Calculation.Parameters.P2.Vary.nVal
			trPR.Calculation.Parameters.P2.tValue.dVal
			trPR.Calculation.Parameters.P2.LCI.dVal
			trPR.Calculation.Parameters.P2.UCI.dVal
			trPR.Calculation.Parameters.P2.Prob.dVal

			etc. per order of Polynomial

			trPR.Calculation.Stat.Rvalue.dVal			// R-value
			trPR.Calculation.Stat.RSqCOD.dVal			// RSq-COD value
			trPR.Calculation.Stat.AdjRSq.dVal			// Adjusted RSq
			trPR.Calculation.Stat.RMSESD.dVal			// RMSE-Standard Dev.
			trPR.Calculation.Stat.N.nVal				// No. of points in fit
	
			trPR.Calculation.ANOVA.Model.DOF.nVal		// Model DOF 
			trPR.Calculation.ANOVA.Model.SSq.dVal		// Model SS
			trPR.Calculation.ANOVA.Model.MeanSq.dVal	// Model Mean Sq
			trPR.Calculation.ANOVA.Error.DOF.nVal		// Error DOF
			trPR.Calculation.ANOVA.Error.SSq.dVal		// Error SS
			trPR.Calculation.ANOVA.Error.MeanSq.dVal	// Error Mean Sq
			trPR.Calculation.ANOVA.Total.DOF.dVal		// Total degrees of freedom
			trPR.Calculation.ANOVA.Total.SSq.dVal		// Total SS
			trPR.Calculation.ANOVA.Fvalue.dVal			// F-value
			trPR.Calculation.ANOVA.Pvalue.dVal			// P-value
	Return:
		Returns ERROR_NO_ERROR on successful exit and an error code on failure.
		NAG error codes:		Negative 1 * NAG error code
		ERROR_INVALID_CURVE:	Invalid input curve
		ERROR_INVALID_TREENODE:	Invalid TreeNode
		ERROR_TO_FEW_PTS:		Not enough points in the curve
		ERROR_SETTINGS:			Polynomial order invalid
*/
int stat_polynomial_fit(const curvebase& crvData, matrix& mResultCurves, TreeNode& trPR);

#endif // _STAT_UTILS_H