/* This is an example HY-PHY Batch File.


   It reads in a '#' nucleotide dataset data/molclock.seq and performs
   a series of molecular clock tests on the data using the HKY85 model.

   

   Output is printed out as a Newick Style tree with branch lengths
   representing the number of expected substitutions per branch (which
   is the default setting for nucleotide models w/o rate variation).
   Also, the likelihood ratio statistic is evaluated and the P-values
   for the tests are reported.

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in a DataSet variable.*/

DataSet 		nucleotideSequences = ReadDataFile ("data/molclock.seq");

/* 2. Filter the data, specifying that all of the data is to be used
  and that it is to be treated as nucleotides. */

DataSetFilter	filteredData = CreateFilter (nucleotideSequences,1);

/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will
	  store the vector of frequencies. */

HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1);

/* 4. Define the HKY substitution matrix. '*' is defined to be -(sum of off-diag row elements) */

HKY85RateMatrix = 

		{{*,trvs,trst,trvs}
		 {trvs,*,trvs,trst}
		 {trst,trvs,*,trvs}
		 {trvs,trst,trvs,*}};

		 
/*5.  Define the HKY85 model, by combining the substitution matrix with the vector of observed (equilibrium)
	  frequencies. */

Model HKY85	 = (HKY85RateMatrix, observedFreqs);

/*6.  Now we can define the tree for the data just read.*/

Tree  theTree = (TAAJ153,(HVRNASS,(RICRSS3,((ZMSUCS1,(OSRSS1A,(TASUCSYN1,HVSSYNMR))),(MZESUS1,ORRSS2)))));

/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)
	  have been defined we are ready to construct the likelihood function. */

LikelihoodFunction  theLnLik = (filteredData, theTree);

/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues. 
	  We also store the resulting ln-lik and the number of model parameters. */

Optimize (paramValues, theLnLik);

unconstrainedLnLik = paramValues[1][0];
paramCount = paramValues[1][1];


/*9.  Print the tree with optimal branch lengths to the console. */


fprintf  (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik);

/*10. Now we impose the molecular clock constraint on the entire tree, 
	  enforcing the clock on transition rates only.*/

MolecularClock (theTree, trst);

/*11. We maximize the tree with molecular clock constraints and report the results.*/

Optimize (paramValues, theLnLik);
lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);
pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);
fprintf (stdout, "\n\n1). Global Molecular Clock on transition rates; the P-value is:", pValue, "\n", theLnLik);

/*12. Now we enforce the clock on transversion rates only. */

ClearConstraints (theTree);
MolecularClock (theTree, trvs);
Optimize (paramValues, theLnLik);
lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);
pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);

fprintf (stdout, "\n\n2). Global Molecular Clock on transversion rates; the P-value is:", pValue, "\n", theLnLik);

/*13. Finally, enforce the clock on both rates */

MolecularClock (theTree, trst);
Optimize (paramValues, theLnLik);
lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);
pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);
fprintf (stdout, "\n\n3). Global Molecular Clock on both rates; the P-value is:", pValue, "\n", theLnLik);

