/*Approximation of the given dissimilarity DI by a tree metric DA*/
/*Copyright (C) 2002 by V. Makarenkov*/
/*Please see for more details*/
/*Enter the dissimilarity matrix's file name. Ther input file must be a text (ASCII) file,
containing the lower triangular (no diagonal) of the dissimilarity matrix.
Values in the matrix should be separated by blanks (spaces) or tabs, as seen
in the sample files. Select one of the five reconstruction methods proposed.
If you select the MW method you should also specify whether to use local
or global optimization, and define the matrix of weights. This latter matrix
can either be a file (same format as above) or a function of the dissimilarity
matrix: W = 1/(D^p) where W is the weight, D is the dissimilarity and
you specify p. If you enter 0.0 for p, then all weights W will be 1.
*/
#include
#include
#include
#include
#include
#include
#define N1 1000
int n;
FILE *Output1;
float method;
void construction(double**,double**,int*,double**);
void odp(double**,int*,int*,int*);
void parcour1(double**,double**,double**,int*,long int *, double *);
void parcour2(double**,double**,double**,int*,long int *, double *);
void NJ(double **,double **);
void ADDTREE(double **,double **);
void UNJ(double **,double **);
void construction2(double**,double**,int*);
int comparb(double**,double**);
int comparb1(double**,double**);
void coder(double**,int *,double*);
void decoder(double**,int *,double*);
void compute_criteres(double **,double **,double *,double **, int);
void Tree_edges(double **,long int *,double *);
int floor1(double);
void approx_arb2(double**,double**,double**,double**, int*, long int*, double*);
void Scaling(double **, double *);
void Scaling1(double **,double **,double *,double *,double *);
void PrintEdges(long int *,double *, int, int, double*, double*, int);
void RETICULATIONS (int n, double **DISS, double **D, long int *ARETE, double *LONGUEUR, int OptionFunction, double **W, int Iternumber, int *ReticulationsNumber, double *CRITERION1, double *CRITERION2);
// Variables and functions used for tree reconstrcution from incomplete matrices
double **Dist, **Da, *Long, Dmax=0., eps=0.001;
char **Et, Nom[20], car;
int N, Ns, NbInc=0, *Tree, *Ch, *Pl, *Clx, *Cly;
int miss=-99;
/* Function prototypes */
void GL_Main(double **DD, int nbObj, int methode, double **distanceArbre, double *RESULTATS, long int *ARETE, double *LONGUEUR, double **W);
void LecDist();
void EditDist(double **D, int N);
void Chemin(int *T, int t, int u);
void Arete(double *path, int *x, int *y);
int GrowTree(int s, int t, int u, int NbPl);
void MajDist(int s, int NbPl);
void IniTri(double **D, int *ps, int *pt, int *pu);
int PSM(double **D, int N);
void alea (int T, int *aa);
void Ultra1(int *sum, int *sum2, int T, int **B2, double **m, double *max, int *aa);
void Ultra2(int *sum, int *sum2, int T, int **B2, double **m, double *max);
void Additif(int *sum, int *sum2, int T, int **B2, double **m, double *max);
void compute_criteres11 (double **D,double **DA, double *R, double *LONGUEUR, int nn);
int constructionMisVal (double **D,double **DA,int *X,double **W);
int parcour211(double **DISS,double **W,double **TM,int *Iternum,long int *ARETE, double *LONGUEUR);
int parcour2MisVal(double **DISS,double **W,double **TM,int *Iternum,long int *ARETE, double *LONGUEUR);
// The main function
void main (void)
{
float Puissance;
int l,i,j,Iternum,ReticulationsNumber=0, OptionFunction=0, optionMiss=0;
double *LONGUEUR,power=0.0;
long int *ARETE;
double **DI,**DA,**DA1,**W,*R,*RAJ;
float Option,Option1;
double *CRITERION1, *CRITERION2;
double RESULTATS[4];
FILE *data;
char source[20],source1[20],symbol, PrintTreeMetric='Y';;
double c;
printf("************************************************\n");
printf("* T-Rex - Tree and Reticulogram reconstruction *\n");
printf("* Copyright (C) 2002 by V. Makarenkov *\n");
printf("************************************************\n\n\n");
printf("Enter the file name for the dissimilarity matrix D; this matrix should be given in the lower triangular form without diagonal and matrix dimension:\n\n");
scanf("%s", source);
if ((data=fopen(source,"r"))==0) { printf("\nFile %s was not found \n", source); exit(1); }
// Reading the dissimilarity data file
i=1;
fscanf(data,"%lf",&c);
while ((symbol=getc(data))!=EOF)
{
fscanf(data,"%lf",&c); i++;
}
fseek(data,0,0);
//Determining the number of objects n
if ((i==1)||(i==2)) n=2;
else
n=floor1((1.0+sqrt(1.0+8.0*i))/2.0);
if ((2*i!=n*(n-1))&&(2*(i-1)!=n*(n-1))) { printf("\n\nInput file error "); exit(0); }
//Merory allocation
DI=(double **) malloc((n+1)*sizeof(double*));
DA=(double **) malloc((n+1)*sizeof(double*));
DA1=(double **) malloc((n+1)*sizeof(double*));
W=(double **) malloc((n+1)*sizeof(double*));
R=(double *) malloc(4*sizeof(double));
RAJ=(double *) malloc(4*sizeof(double));
ARETE=(long int *) malloc((4*n-1)*sizeof(long int));
LONGUEUR=(double *)malloc((2*n-1)*sizeof(double));
for (i=0;i<=n;i++)
{
DI[i]=(double*)malloc((n+1)*sizeof(double));
DA[i]=(double*)malloc((n+1)*sizeof(double));
DA1[i]=(double*)malloc((n+1)*sizeof(double));
W[i]=(double*)malloc((n+1)*sizeof(double));
if ((DI[i]==NULL)||(DA[i]==NULL)||(DA1[i]==NULL)||(W[i]==NULL))
{
printf(" Data matrix is too large\n ");
exit(1);
}
}
// Reading the dissimilarity matrix DI
for (j=2; j<=n; j++)
{
for (i=1; i<=j-1; i++)
{
fscanf(data,"%lf",&c);
DI[i][j]=c;
DI[j][i]=c;
}
DI[j][j]=0;
}
DI[1][1]=0;
DI[0][0]=0;
fclose (data);
// Main menu
int optionTR=0;
while ((optionTR!=1)&&(optionTR!=2)&&(optionTR!=3)){
printf("\nSelect a reconstruction method: \n\n");
printf(" 1. Infer an additive (phylogenetic) tree from D\n");
printf(" 2. Infer a reticulogram (reticulated network) from D\n");
printf(" 3. Infer an additive tree from an incomplete matix D \n\n");
scanf("%d",&optionTR);
printf("\n");}
// Tree reconstruction from incomplete matrices
if (optionTR==3)
{
while ((optionMiss!=1)&&(optionMiss!=2)&&(optionMiss!=3)&&(optionMiss!=4)){
printf("\nSelect a tree reconstruction method for an incomplete dissimilarity matrix D\n(missing entries in D should be indicated by -99)\n\n");
printf(" 1. Triangles method\n");
printf(" 2. Ultrametric procedure + MW\n");
printf(" 3. Additive procedure + MW \n");
printf(" 4. MW-modified\n\n");
scanf("%d",&optionMiss);
printf("\n\n");}
printf("\n\nComputation begins ... Tree reconstruction ...\n\n");
GL_Main(DI, n, optionMiss, DA, RESULTATS, ARETE, LONGUEUR, W);
} // end of tree reconstruction from incomlete matrices
// Tree reconstruction from comlete matrices and reticulogram reconstruction
else {
method=0;
while ((method!=1)&&(method!=2)&&(method!=3)&&(method!=4)&&(method!=5)){
if (optionTR==1) printf("\nSelect a tree reconstruction method: \n\n");
if (optionTR==2) printf("\nSelect a tree reconstruction method to infer the additive tree used as a starting point for the reticulogram to be constructed:\n\n");
// Tree reconstruction menu (5 methods available)
printf(" 1. ADDTREE\n");
printf(" 2. Neighbor Joining \n");
printf(" 3. Unweighted Neighbor Joining\n");
printf(" 4. Circular order reconstruction \n");
printf(" 5. Weighted least-squares method MW\n\n");
scanf("%f",&method);
printf("\n");}
if (method==5) // options for the MW method
{
if (n>3)
{
Option=3;
while ((Option!=1)&&(Option!=2)){
printf("\n\nSelect the MW optimization option: \n\n");
printf(" 1. Global MW optimization\n");
printf(" 2. Local MW optimization\n\n");
scanf("%f",&Option);}
DI[0][0]=Option;
}
else Option=1;
Option1=3;
while ((Option1!=1)&&(Option1!=2)){
printf("\n\nWeight matrix W is: \n\n"); // Coice of the weight matrix W
printf(" 1. Of the form W=1/D^p\n");
printf(" 2. In a file \n\n");
scanf("%f",&Option1);}
if (Option1==1)
{
printf("\nEnter the power p such that W=1/D^p (no weights if p=0)\n");
scanf("%f",&Puissance); // W is given by a formula
for (j=1;j<=n;j++)
{
for (i=j+1;i<=n;i++)
{
if (DI[j][i]>0)
W[j][i]=pow(1/DI[j][i],Puissance);
else if (DI[j][i]<=0)
W[j][i]=0.000001;
W[i][j]=W[j][i];
}
W[j][j]=1;
}
}
else if (Option1==2) // reading W from a file
{
FILE *weights;
printf("\nEnter the file name for the matrix of weights W\n");
scanf("%s", source1);
if ((weights=fopen(source1,"r"))==0) { printf("\nFile %s was not found ", source1); exit(1); }
for (j=2; j<=n; j++)
{
for (i=1; i<=j-1; i++)
{
fscanf(weights,"%lf",&c);
W[i][j]=c;
W[j][i]=c;
}
W[j][j]=0;
}
fclose (weights);
}
}
printf("\nComputation begins ... Tree reconstruction ...\n");
if (method!=5) // Setting up W matrix if MW is not selected
{
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
W[i][j]=1;
W[j][i]=W[i][j];
}
W[i][i]=1;
}
}
W[0][0]=1;
if (n==2) // the trivial case - of two objects
{
DA[1][2]=DI[1][2];
DA[2][1]=DI[1][2];
DA[1][1]=0.0;
DA[2][2]=0.0;
}
else
{
if (n<=10) l=30;
else l=3*n;
Iternum=10;
if (optionTR==1) Scaling(DI,&power);
if (method==1) //method ADDTREE
ADDTREE(DI,DA1);
else if (method==2) //method NJ
NJ(DI,DA1);
else if (method==4) //method Circular Orders
parcour1(DI,W,DA,&Iternum,ARETE,LONGUEUR);
else if (method==3) //method UNJ
UNJ(DI,DA1);
else if (method==5) //method MW
{
if (Option==1)
DI[0][0]=1;
else
DI[0][0]=2;
parcour2(DI,W,DA,&Iternum,ARETE,LONGUEUR);
}
if (method<4) //edge lengths polishing for ADDTREE, NJ, and UNJ
approx_arb2 (DI,DA1,DA,W,&l,ARETE,LONGUEUR);
}
// Reticulogram reconstruction option
if (optionTR==2)
{
int Iternumber=(2*n-2)*(2*n-2-1)/2-2*n+3;
//Selection of the stopping rule
while ((OptionFunction!=1)&&(OptionFunction!=2)&&(OptionFunction!=3)){
printf("\nSelect a stopping rule for addition of new edges to the reticulogram:\n\n");
printf(" 1. When the criterion Q1 is minimized\n");
printf(" 2. When the criterion Q2 is minimized\n");
printf(" 3. Add a fixed number of edges K to the reticulogram\n\n");
scanf("%d",&OptionFunction);
printf("\n\n");}
if (OptionFunction==3)
{
Iternumber=-1;
while ((Iternumber<0)||(Iternumber>(2*n-2)*(2*n-2-1)/2-2*n+3)){
printf("Select the number of edges K to be placed into the reticulogram, 0 <= K < %d\n\n",(2*n-2)*(2*n-2-1)/2-2*n+3);
scanf("%d",&Iternumber);
printf("\n\n");}
}
CRITERION2 = new double [2*n-3 + Iternumber+1];
CRITERION1 = new double [2*n-3 + Iternumber+1];
free(ARETE); free(LONGUEUR);
ARETE=(long int *) malloc(((2*n-2)*(2*n-3)+1)*sizeof(long int));
LONGUEUR=(double *)malloc(((2*n-2)*(2*n-3)/2+1)*sizeof(double));
printf("\nComputation continues ... Reticulogram reconstruction ...\n");
RETICULATIONS (n, DI, DA, ARETE, LONGUEUR, OptionFunction, W, Iternumber, &ReticulationsNumber, CRITERION1, CRITERION2);
} //end of the reticulogram reconstruction option
}
// Printing results
Output1=fopen("Output.txt","w");
fprintf(Output1,"Input File for Dissimilarity matrix: %s\n",source);
if ((method==5)&&(Option1==2)) fprintf(Output1,"Input File for Weight matrix: %s\n",source1);
else if ((method==5)&&(Option1==1))
fprintf(Output1,"Weight matix is of the form W=1/D^%f \n",Puissance);
if (optionTR!=3)
{
if (method==1) fprintf(Output1,"\nTree reconstruction method - ADDTREE\n");
else if (method==2) fprintf(Output1,"\nTree reconstruction method - Neighbor Joining\n");
else if (method==3) fprintf(Output1,"\nTree reconstruction method - Unweighted Neighbor Joining\n");
else if (method==4) fprintf(Output1,"\nTree reconstruction method - Circular order reconstruction\n");
else if (method==5)
{fprintf(Output1,"\nTree reconstruction method - Weighted least-squares method MW");
if (Option==1) fprintf(Output1," (global optimization)\n");
else fprintf(Output1," (local optimization)\n");
}
}
else //if (optionTR==3)
{
fprintf(Output1,"\nTree reconstruction from incomplete dissimilarity matrix D\n\n");
if (optionMiss==1) fprintf(Output1,"\nTree reconstruction method - Triangles method\n");
else if (optionMiss==2) fprintf(Output1,"\nTree reconstruction method - Ultrametric procedure + MW\n");
else if (optionMiss==3) fprintf(Output1,"\nTree reconstruction method - Additive procedure + MW\n");
else if (optionMiss==4) fprintf(Output1,"\nTree reconstruction method - MW-modified\n");
}
if (power!=0.0) W[0][0]=2;
else if (PrintTreeMetric=='Y') W[0][0]=0.1;
else W[0][0]=0;
if (optionTR==3) { RAJ[1]=RESULTATS[0]; RAJ[2]=RESULTATS[1]; RAJ[3]=RESULTATS[2]; RAJ[0]=RESULTATS[3]; }
compute_criteres(DI,DA,RAJ,W,optionTR);
if ((power!=0.0)&&(n>2))
{
if (optionTR==1) Scaling1(DI,DA,RAJ,LONGUEUR,&power);
if (PrintTreeMetric=='Y') W[0][0]=0.1;
else W[0][0]=0;
compute_criteres(DI,DA,RAJ,W,optionTR);
}
if (n==2) { ARETE[0]=1;ARETE[1]=2;LONGUEUR[0]=DA[1][2]; }
if (optionTR!=2) ReticulationsNumber=2*n-3;
PrintEdges(ARETE,LONGUEUR,ReticulationsNumber,optionTR,CRITERION1,CRITERION2,OptionFunction);
fclose (Output1);
printf("\n\n");
printf("The results are in the file Output.txt\n\n");
}
// Method MW
void construction(double **D,double **DA,int *X,double **W)
{
int i,j,i1,p,P,xi,a,*Y,NV,NR,PP,PN,option,*ARETE;
double *A,am,c,am1,c1,k,k1,k2,k5,k6,k7,ki,**G1,S1,S2,
S3,M,MR,M1,DIS,DIS1,Su,Sv,*L,**L1,**L2,
**L3,*DIST,*DIST1;
double Dummy1;
Y=(int *) malloc((n+1)*sizeof(int));
A=(double *) malloc((8*n+8)*sizeof(double));
G1=(double **) malloc((2*n-2)*sizeof(double*));
L=(double *) malloc((n+1)*sizeof(double));
L1=(double **) malloc((2*n-2)*sizeof(double*));
L2=(double **) malloc((2*n-2)*sizeof(double*));
L3=(double **) malloc((2*n-2)*sizeof(double*));
DIST=(double *) malloc(3*sizeof(double));
DIST1=(double *) malloc(3*sizeof(double));
ARETE=(int *) malloc(3*sizeof(int));
for (i=0;i<=2*n-3;i++)
{
G1[i]=(double*)malloc((n+1)*sizeof(double));
L1[i]=(double*)malloc((n+1)*sizeof(double));
L2[i]=(double*)malloc((n+1)*sizeof(double));
L3[i]=(double*)malloc((n+1)*sizeof(double));
if ((G1[i]==NULL)||(L1[i]==NULL)||(L2[i]==NULL)||(L3[i]==NULL))
{
exit(1);
}
}
Dummy1=100;
if (D[X[1]][X[2]]<(n-1)*0.0002){ Dummy1=D[X[1]][X[2]]; D[X[1]][X[2]]=(n-1)*0.0002;}
option=X[0];
MR=0;
A[1]=X[1];
A[2]=X[2];
A[3]=0;
A[4]=D[X[1]][X[2]];
for (i=1;i<=n;i++)
Y[i]=1;
Y[X[1]]=0;
Y[X[2]]=0;
P=1;
DA[X[1]][X[1]]=0;
DA[X[2]][X[2]]=0;
DA[X[1]][X[2]]=A[4];
DA[X[2]][X[1]]=A[4];
xi=X[2];
for (j=1;j<=n;j++)
{
if ((j!=X[1])&&(j!=X[2]))
{
k1=DA[X[1]][xi]-D[xi][j];
k2=D[X[1]][j];
L[j]=W[X[1]][j]+W[X[2]][j];
L1[1][j]=2*(k1*W[X[2]][j]-k2*W[X[1]][j]);
L2[1][j]=2*(-k1*W[X[2]][j]-k2*W[X[1]][j]);
L3[1][j]=2*(W[X[1]][j]-W[X[2]][j]);
G1[1][j]=k1*k1*W[X[2]][j]+k2*k2*W[X[1]][j];
}
}
for (i=2;i<=n-1;i++)
{ a=0;
for (p=1;p<=P;p++)
if ((option!=2)||(i>3))
{
for (p=1;p<=P;p++)
{
for (j=1;j<=n;j++)
{
if (Y[j]==1)
{
xi=floor1(A[4*p-2]);
k1=DA[X[1]][xi]-D[xi][j];
k2=D[X[1]][j];
Su=A[4*p-1];
Sv=Su+A[4*p];
k6=L1[p][j];
k5=L2[p][j];
k7=L3[p][j];
k=L[j];
M=G1[p][j]+MR+1.0;
if ((2*k*Sv+k5<=0.00001)&&(k6+k7*Sv>=-0.00001))
{ M=k*Sv*Sv+k5*Sv;
am=Sv;
c=0;
}
if (((c1=-(k6+k7*Sv)/(2*k))>=-0.00001)&&(-k5-2*k*Sv-k7*c1>=-0.00001)&&((M1=k*(Sv*Sv+c1*c1)+k7*Sv*c1+k5*Sv+c1*k6)=Su-0.00001)&&(-k6-k7*am1<=0.00001)&&((M1=k*am1*am1+k5*am1)=-0.00001)&&((M1=k*(am1*am1+c1*c1)+k7*am1*c1+k5*am1+k6*c1)=Su-0.00001))
{
M=M1;
c=c1;
am=am1;
}
if (((4*k*k-k7*k7)==0)&&(k5==k6)&&((-k5/(2*k))>=Su-0.00001)&&((c1=-k5/(2*k)-Su)>=-0.00001)&&((M1=k*(Su+c1)*(Su+c1)+k5*(Su+c1))=-0.00001)&&((M1=k*Su*Su+k5*Su)=-0.00001)&&(-k6-k7*Su<=0.00001)&&((M1=k*Su*Su+k5*Su)=-0.00001)&&((k5+2*k*Su+c1*k7)>=-0.00001)&&((M1=k*Su*Su+k5*Su+c1*c1*k+c1*(Su*k7+k6))M+G1[p][j]))
{
a=1;
MR=M+G1[p][j];
DIS=am;
DIS1=c;
if (c<=(n-i+1.0)*0.00002) DIS1=(n-i+1.0)*0.00002;
if (fabs(Sv-Su)<=2*(n-i+1)*0.00002) DIS=Sv-(Sv-Su)/(n-i+1.0);
else
{
if (fabs(am-Su)<=(n-i+1.0)*0.00002) DIS=Su+(n-i+1.0)*0.00002;
if (fabs(am-Sv)<=(n-i+1.0)*0.00002) DIS=Sv-(n-i+1.0)*0.00002;
}
NV=j;
NR=p;
}
}
}
}
}
if ((option==2)&&(i<=3))
{
NV=X[i+1];
NR=ARETE[i-1];
DIS=DIST[i-1];
DIS1=DIST1[i-1];
}
/* Arrays Updating */
p=NR;
PP=P;
if (fabs(DIS-A[4*p-1])<=0.00001)
DIS=A[4*p-1];
if (fabs(DIS-A[4*p-1]-A[4*p])<=0.00001)
{
DIS=A[4*p-1]+A[4*p];
}
X[i+1]=NV;
xi=floor1(A[4*p-2]);
Y[NV]=0;
if ((DIS-A[4*p-1]>0)&&(DIS-A[4*p-1]-A[4*p]<0))
{
c=A[4*p];
A[4*p]=DIS-A[4*p-1];
P=P+1;
A[4*P-3]=P;
A[4*P-2]=A[4*p-2];
A[4*P-1]=DIS;
A[4*P]=A[4*p-1]+c-DIS;
}
if (DIS1>0)
{
P=P+1;
A[4*P-3]=P;
A[4*P-2]=X[i+1];
A[4*P-1]=DIS;
A[4*P]=DIS1;
}
DA[X[1]][X[i+1]]=DIS+DIS1;
DA[X[i+1]][X[1]]=DIS+DIS1;
/* The induction formula */
for (j=2;j<=i;j++)
{
if (((DA[X[1]][X[j]]+DA[X[1]][xi]-DA[xi][X[j]])/2)<=DIS)
DA[X[j]][X[i+1]]=DIS+DIS1+DA[X[j]][xi]-DA[X[1]][xi];
else
DA[X[j]][X[i+1]]=DIS1-DIS+DA[X[1]][X[j]];
DA[X[i+1]][X[j]]=DA[X[j]][X[i+1]];
DA[X[i+1]][X[i+1]]=0;
}
if ((P==PP+2)||((P==PP+1)&&(DIS1==0)))
PN=PP+1;
else
PN=PP;
if ((DIS-A[4*p-1]>0)&&(DIS-A[4*p-1]-c<0))
{
for(j=1;j<=n;j++)
{
if (Y[j]==1)
{
L1[PP+1][j]=L1[p][j];
L2[PP+1][j]=L2[p][j];
L3[PP+1][j]=L3[p][j];
G1[PP+1][j]=G1[p][j];
}
}
}
/* BLOCK 1 */
for (p=1;p<=PN;p++)
{
xi=floor1(A[4*p-2]);
for (j=1;j<=n;j++)
{
if (Y[j]==1)
{
if (((DA[X[1]][xi]+DA[X[1]][X[i+1]]-DA[X[i+1]][xi])/2)<=A[4*p-1]+0.00001)
{
ki=D[X[i+1]][j]+DA[X[1]][xi]-DA[X[i+1]][xi];
L1[p][j]=L1[p][j]-2*ki*W[X[i+1]][j];
L2[p][j]=L2[p][j]-2*ki*W[X[i+1]][j];
L3[p][j]=L3[p][j]+2*W[X[i+1]][j];
}
else
{
ki=D[X[i+1]][j]-DA[X[i+1]][X[1]];
L1[p][j]=L1[p][j]-2*ki*W[X[i+1]][j];
L2[p][j]=L2[p][j]+2*ki*W[X[i+1]][j];
L3[p][j]=L3[p][j]-2*W[X[i+1]][j];
}
if (p==1)
L[j]=L[j]+W[X[i+1]][j];
G1[p][j]=G1[p][j]+ki*ki*W[X[i+1]][j];
}
}
}
/* BLOCK 2 */
if (DIS1>0)
{
for (j=1;j<=n;j++)
{
if (Y[j]==1)
{
k1=DA[X[1]][X[i+1]]-D[X[i+1]][j];
k2=D[X[1]][j];
S1=W[X[1]][j]+W[X[i+1]][j];
S2=k2*W[X[1]][j];
S3=k1*k1*W[X[i+1]][j]+k2*k2*W[X[1]][j];
for (i1=2;i1<=i;i1++)
{
ki=D[X[i1]][j]+DA[X[1]][X[i+1]]-DA[X[i+1]][X[i1]];
S1=S1+W[X[i1]][j];
S2=S2+ki*W[X[i1]][j];
S3=S3+ki*ki*W[X[i1]][j];
}
L[j]=S1;
L1[P][j]=2*(k1*W[X[i+1]][j]-S2);
L2[P][j]=2*(-k1*W[X[i+1]][j]-S2);
L3[P][j]=2*(S1-2*W[X[i+1]][j]);
G1[P][j]=S3;
}
}
}
}
if (Dummy1<(n-1)*0.0002) D[X[1]][X[2]]=Dummy1;
free(L);
free(Y);
free(A);
free(DIST);
free(DIST1);
free(ARETE);
for (i=0;i<=2*n-3;i++)
{
free(G1[i]);
free(L1[i]);
free(L2[i]);
free(L3[i]);
}
free(G1);
free(L1);
free(L2);
free(L3);
}
/* Extacting a Circular order X of D*/
void odp(double **D,int *X,int *i1,int *j1)
{
double S1,S;
int i,j,k,a,*Y1;
Y1=(int *) malloc((n+1)*sizeof(int));
for(i=1;i<=n;i++)
Y1[i]=1;
X[1]=*i1;
X[n]=*j1;
if (n==2) return;
Y1[*i1]=0;
Y1[*j1]=0;
for(i=0;i<=n-3;i++)
{ a=2;
S=0;
for(j=1;j<=n;j++)
{ if (Y1[j]>0)
{
S1= D[X[n-i]][X[1]]-D[j][X[1]]+D[X[n-i]][j];
if ((a==2)||(S1<=S))
{
S=S1;
a=1;
X[n-i-1]=j;
k=j;
}
}
}
Y1[k]=0;
}
free(Y1);
}
// Neighbor Joining method
void NJ(double **D1,double **DA)
{
double **D,*T1,*S,*LP,Som,Smin,Sij,L,Lii,Ljj,l1,l2,l3;
int *T,i,j,ii,jj,n1;
D=(double **) malloc((n+1)*sizeof(double*));
T1=(double *) malloc((n+1)*sizeof(double));
S=(double *) malloc((n+1)*sizeof(double));
LP=(double *) malloc((n+1)*sizeof(double));
T=(int *) malloc((n+1)*sizeof(int));
for (i=0;i<=n;i++)
{
D[i]=(double*)malloc((n+1)*sizeof(double));
if (D[i]==NULL)
{
exit(1);
}
}
L=0;
Som=0;
for (i=1;i<=n;i++)
{
S[i]=0; LP[i]=0;
for (j=1;j<=n;j++)
{
D[i][j]=D1[i][j];
S[i]=S[i]+D[i][j];
}
Som=Som+S[i]/2;
T[i]=i;
T1[i]=0;
}
/* Main procedure */
for (n1=n;n1>3;n1--)
{
/* Research of the best pair of objects (i,j) for clustering*/
Smin=2*Som;
for (i=1;i<=n1-1;i++)
{
for (j=i+1;j<=n1;j++)
{
Sij=2*Som-S[i]-S[j]+D[i][j]*(n1-2);
if (Sij=-0.00001)&&((M1=k*(Sv*Sv+c1*c1)+B*Sv*c1+C*Sv+D*c1)=Su-0.00001)&&(a1<=Sv+0.00001)&&((M1=k*a1*a1+C*a1)=Su-0.00001)&&(a1<=Sv+0.00001)&&((c1=(B*C-2*k*D)/(4*k*k-B*B))>=-0.00001)&&((M1=k*(a1*a1+c1*c1)+B*a1*c1+C*a1+D*c1)=-0.00001)&&((M1=k*(Su*Su+c1*c1)+B*Su*c1+C*Su+D*c1)M+G1))
{
MR=M+G1;
DIS=a;
DIS1=c;
if (c<=(n-k+1.0)*0.00002) DIS1=(n-k+1.0)*0.00002;
if (fabs(Sv-Su)<=2*(n-k+1)*0.00002) DIS=Sv-(Sv-Su)/(n-k+1.0);
else
{
if (fabs(a-Su)<=(n-k+1.0)*0.00002) DIS=Su+(n-k+1.0)*0.00002;
if (fabs(a-Sv)<=(n-k+1.0)*0.00002) DIS=Sv-(n-k+1.0)*0.00002;
}
}
}
i=0;
S=0;
while (S0))
{
L[i+1]=DIS1;
P=i+1;
}
if ((S>DIS)&&(DIS1>0))
{
L[i]=L[i]+DIS-S;
L[i+1]=DIS1;
P=i+1;
}
DA[X[1]][X[k+1]]=DIS+DIS1;
DA[X[k+1]][X[1]]=DIS+DIS1;
/*Induction formula */
for (j=2;j<=k;j++)
{
if (((DA[X[1]][X[j]]+DA[X[1]][X[k]]-DA[X[k]][X[j]])/2)<=DIS+0.00001)
DA[X[j]][X[k+1]]=DIS+DIS1+DA[X[j]][X[k]]-DA[X[1]][X[k]];
else
DA[X[j]][X[k+1]]=DIS1-DIS+DA[X[1]][X[j]];
DA[X[k+1]][X[j]]=DA[X[j]][X[k+1]];
DA[X[k+1]][X[k+1]]=0;
}
}
if (Dummy1<(n-1)*0.0002) DI[X[1]][X[2]]=Dummy1;
free(A);
free(L);
}
// Compute Robinson and Foulds topological distance between two trees
// given by their distance matrices D and D1
int comparb (double **D,double **D1)
{
struct MATR { unsigned int V:1; };
struct MATR **A,**A1;
int i,j,k,p,p1,*P,*P1,*X,*X1,R;
double S,DIS,DIS1,*L,*L1;
P=(int *)malloc((2*n-2)*sizeof(int));
P1=(int *)malloc((2*n-2)*sizeof(int));
X=(int *)malloc((2*n-2)*sizeof(int));
X1=(int *)malloc((2*n-2)*sizeof(int));
L=(double *)malloc((2*n-2)*sizeof(double));
L1=(double *)malloc((2*n-2)*sizeof(double));
A=(MATR **)malloc((2*n-2)*sizeof(MATR*));
A1=(MATR **)malloc((2*n-2)*sizeof(MATR*));
for (i=0;i<=2*n-3;i++)
{
A[i]=(MATR *)malloc((n+1)*sizeof(MATR));
A1[i]=(MATR *)malloc((n+1)*sizeof(MATR));
if ((A[i]==NULL)||(A1[i]==NULL))
{
exit(1);
}
}
i=1; j=n;
odp(D,X,&i,&j);
odp(D1,X1,&i,&j);
for (i=1;i<=2*n-3;i++)
{
for (j=0;j<=n;j++)
{
A[i][j].V=0;
A1[i][j].V=0;
}
}
A[1][X[2]].V=1;
A1[1][X1[2]].V=1;
P[1]=1;
P1[1]=1;
L[1]=D[X[1]][X[2]];
L1[1]=D1[X1[1]][X1[2]];
p=1;
p1=1;
for(k=2;k<=n-1;k++)
{
S=0;
i=0;
DIS=(D[X[1]][X[k]]+D[X[1]][X[k+1]]-D[X[k]][X[k+1]])/2;
DIS1=(D[X[1]][X[k+1]]+D[X[k]][X[k+1]]-D[X[1]][X[k]])/2;
while (S=0.001)
{
p=p+1;
for (j=1;j<=k;j++)
A[p][X[j]].V=A[P[i]][X[j]].V;
L[P[i]]=L[P[i]]+DIS-S;
L[p]=S-DIS;
}
if (DIS1>=0.001)
{
p=p+1;
A[p][X[k+1]].V=1;
P[i+1]=p;
L[p]=DIS1;
}
S=0;
i=0;
DIS=(D1[X1[1]][X1[k]]+D1[X1[1]][X1[k+1]]-D1[X1[k]][X1[k+1]])/2;
DIS1=(D1[X1[1]][X1[k+1]]+D1[X1[k]][X1[k+1]]-D1[X1[1]][X1[k]])/2;
while (S=0.001)
{
p1=p1+1;
for (j=1;j<=k;j++)
A1[p1][X1[j]].V=A1[P1[i]][X1[j]].V;
L1[P1[i]]=L1[P1[i]]+DIS-S;
L1[p1]=S-DIS;
}
if (DIS1>=0.001)
{
p1=p1+1;
A1[p1][X1[k+1]].V=1;
P1[i+1]=p1;
L1[p1]=DIS1;
}
}
for (i=1;i<=p;i++)
{
for (k=1;k<=p1;k++)
{
if (A1[k][0].V==0)
{
S=0;
for (j=1;j<=n;j++)
{
if (A[i][j].V==A1[k][j].V)
S=S+1;
}
if ((S==0)||(S==n))
{
A1[k][0].V=1;
A[i][0].V=1;
k=p1;
}
}
}
}
R=0;
for (i=1;i<=p;i++)
{
if (A[i][0].V==0)
R=R+1;
}
for (i=1;i<=p1;i++)
{
if (A1[i][0].V==0)
R=R+1;
}
free(P);
free(X);
free(L);
free(P1);
free(X1);
free(L1);
for (i=0;i<=2*n-3;i++)
{
free(A[i]);
free(A1[i]);
}
free(A);
free(A1);
return R;
}
// Compare if two trees given by their distance matrices D and D1 are identical
int comparb1 (double **D,double **D1)
{
int i,j,k,p,p1,*X,*Y;
double S,S1,DIS,DIS1,DIS0,DIS10,*L,*L1;
X=(int *)malloc((n+1)*sizeof(int));
Y=(int *)malloc((n+1)*sizeof(int));
L=(double *)malloc((n+1)*sizeof(double));
L1=(double *)malloc((n+1)*sizeof(double));
i=1; j=n;
odp(D,X,&i,&j);
for (j=1;j<=n;j++)
Y[j]=0;
Y[X[1]]=1;
Y[X[n]]=1;
for (i=0;i<=n-3;i++)
{
S=D1[X[n-i]][X[n-i-1]]-D1[X[n-i-1]][X[1]];
Y[X[n-i-1]]=1;
for (j=2;j<=n-i-2;j++)
{
S1=D1[X[n-i]][X[j]]-D1[X[j]][X[1]];
if (S10.001))||((S-DIS>0.001)&&(S1-DIS0<0.001)))
{
free(Y);
free(L);
free(L1);
free(X);
return 0;
}
p=i;
if ((S-DIS)>=0.001)
{
L[i]=L[i]+DIS-S;
}
if (DIS1>=0.001)
{
p=p+1;
L[p]=DIS1;
}
p1=j;
if ((S1-DIS0)>=0.001)
{
L1[j]=L1[j]+DIS0-S1;
}
if (DIS10>=0.001)
{
p1=p1+1;
L1[p1]=DIS10;
}
}
free(Y);
free(L);
free(L1);
free(X);
return 1;
}
// Coding the tree metric DA (n,n) by the vectors C (2n-3) and X(n)
void coder(double **DA,int *X,double *C)
{
int i,j;
i=1; j=n;
odp(DA,X,&i,&j);
for (i=1;i<=n-2; i++)
{
C[2*i-1]=DA[X[i]][X[i+1]];
C[2*i]=DA[X[1]][X[i+2]];
}
C[2*n-3]=DA[X[n-1]][X[n]];
}
// Decoding the tree metric DA (n,n) from the vectors C (2n-3) and X(n)
void decoder(double **DA,int *X,double *C)
{
int i,j;
for (i=1;i<=n-2; i++)
{
DA[X[i]][X[i+1]]=C[2*i-1];
DA[X[1]][X[i+2]]=C[2*i];
DA[X[i+1]][X[i]]=C[2*i-1];
DA[X[i+2]][X[1]]=C[2*i];
}
DA[X[n-1]][X[n]]=C[2*n-3];
DA[X[n]][X[n-1]]=C[2*n-3];
DA[X[1]][X[1]]=0;
DA[X[2]][X[2]]=0;
DA[X[3]][X[3]]=0;
for (i=4;i<=n;i++)
{
DA[X[i]][X[i]]=0;
for (j=2;j<=i-2;j++)
{
if (DA[X[1]][X[j]]+DA[X[i-1]][X[i]]R[3]) R[3]=W[i1][j1]*s;
}
}
}
if ((W[0][0]<1)||(W[0][0]==2))
{
R[2]=2*R[2]/n/(n-1);
i1=1; j1=n;
odp(DA,X,&i1,&j1);
R[0]=DA[X[1]][X[n]];;
for (i1=1;i1<=n-1;i1++)
R[0]=R[0]+DA[X[i1]][X[i1+1]];
}
R[0]=R[0]/2;
}
// Print tree metric matrix if needed
if (W[0][0]==0.1)
{
if (optionTR!=2) fprintf(Output1,"\n\nTREE METRIC (ADDITIVE DISTANCE) MATRIX (AD)\n\n");
else fprintf(Output1,"\n\nRETICULOGRAM DISTANCE MATRIX (AD)\n\n");
for (i1=1; i1<=n-1; i1++)
fprintf(Output1," %d ",i1);
fprintf(Output1,"\n\n");
for (j1=2; j1<=n; j1++)
{
if (n<10) fprintf(Output1,"%d ",j1);
else if (n<100)
{if (j1<10) fprintf(Output1," %d ",j1);
else fprintf(Output1,"%d ",j1);}
else if (n<1000)
{if (j1<10) fprintf(Output1," %d ",j1);
if (j1<100) fprintf(Output1," %d ",j1);
else fprintf(Output1,"%d ",j1);}
else
{if (j1<10) fprintf(Output1," %d ",j1);
if (j1<100) fprintf(Output1," %d ",j1);
if (j1<1000) fprintf(Output1," %d ",j1);
else fprintf(Output1,"%d ",j1);}
for (i1=1; i1<=j1-1; i1++)
{
c=(float) DA[i1][j1];
fprintf(Output1,"%f ",c);
}
fprintf(Output1,"\n");
}
}
if (W[0][0]<1)
if (optionTR!=2) fprintf(Output1,"\n\nTHE FOLLOWING STATISTICS ARE AVAILABLE FOR \nA GIVEN DISSIMILARITY (D) AND AN OBTAINED TREE METRIC (AD)\n\n");
else fprintf(Output1,"\n\nTHE FOLLOWING STATISTICS ARE AVAILABLE FOR \nA GIVEN DISSIMILARITY (D) AND AN OBTAINED RETICULOGRAM DISTANCE (AD)\n\n");
if (optionTR==3) fprintf(Output1,"Only existing values of D were taken into account when computing these statistics\n\n");
if ((W[0][0]<1)&&(method==5))
{
fprintf(Output1,"\n");
if (fabs(R[1])<1) fprintf(Output1, "Weighted least-squares coefficient Sum Wij(Dij-ADij)^2 = %12.10f\n",R[1]);
else fprintf(Output1, "Weighted least-squares coefficient Sum Wij(Dij-ADij)^2 = %f\n",R[1]);
fprintf(Output1," i10) Iternumber=2*n-3;
else Iternumber=15;
X[1]=iopt;
X[2]=jopt;
construction(DISS,TM1,X,W);
approx_arb2 (DISS,TM1,TM,W,&Iternumber,ARETE,LONGUEUR);
free(X);
for (i=0;i<=n;i++)
free(TM1[i]);
free(TM1);
}
// ADDTREE method
void ADDTREE(double **D1,double **DA)
{
double Som,Smin,Sij,L,Lii,Ljj,l1,l2,l3;
int *T,i,j,ii,jj,n1,i1,j1;
double **D,*LP,*S,*T1;
D=(double **) malloc((n+1)*sizeof(double*));
T1=(double *) malloc((n+1)*sizeof(double));
S=(double *) malloc((n+1)*sizeof(double));
LP=(double *) malloc((n+1)*sizeof(double));
T=(int *) malloc((n+1)*sizeof(int));
for (i=0;i<=n;i++)
{
D[i]=(double*)malloc((n+1)*sizeof(double));
if (D[i]==NULL)
{
exit(1);
}
}
L=0;
Som=0;
for (i=1;i<=n;i++)
{
S[i]=0; LP[i]=0;
for (j=1;j<=n;j++)
{
D[i][j]=D1[i][j];
S[i]=S[i]+D[i][j];
}
Som=Som+S[i]/2.0;
T[i]=i;
T1[i]=0;
}
/* Main procedure */
for (n1=n;n1>3;n1--)
{
/* Research of the best pair of objects (i,j) for clustering*/
Smin=0.0;
for (i=1;i<=n1-1;i++)
{
for (j=i+1;j<=n1;j++)
{
Sij=0.0;
for (i1=1;i1<=n1-1;i1++)
{
if ((i1!=i)&&(i1!=j))
{
for (j1=i1+1;j1<=n1;j1++)
{
if ((j1!=i)&&(j1!=j))
{
if ((D[i][i1]+D[j][j1]-D[i][j]-D[i1][j1]>=0)&&(D[i][j1]+D[j][i1]-D[i][j]-D[i1][j1]>=0))
Sij=Sij+1.0;
}
}
}
}
if (Sij>Smin)
{
Smin=Sij;
ii=i;
jj=j;
}
}
}
/* New clustering */
Lii=(D[ii][jj]+(S[ii]-S[jj])/(n1-2.0))/2-LP[ii];
Ljj=(D[ii][jj]+(S[jj]-S[ii])/(n1-2.0))/2-LP[jj];
if (Lii<0.00001) Lii=0.00005;
if (Ljj<0.00001) Ljj=0.00005;
L=L+Lii+Ljj;
LP[ii]=0.5*D[ii][jj];
Som=Som-(S[ii]+S[jj])/2.0;
for (i=1;i<=n1;i++)
{
if ((i!=ii)&&(i!=jj))
{
S[i]=S[i]-0.5*(D[i][ii]+D[i][jj]);
D[i][ii]=(D[i][ii]+D[i][jj])/2;
D[ii][i]=D[i][ii];
}
}
D[ii][ii]=0;
S[ii]=0.5*(S[ii]+S[jj])-D[ii][jj];
if (jj!=n1)
{
for (i=1;i<=n1-1;i++)
{
D[i][jj]=D[i][n1];
D[jj][i]=D[n1][i];
}
D[jj][jj]=0;
S[jj]=S[n1];
LP[jj]=LP[n1];
}
/* Mise a jour de DA */
for (i=1;i<=n;i++)
{
if (T[i]==ii) T1[i]=T1[i]+Lii;
if (T[i]==jj) T1[i]=T1[i]+Ljj;
}
for (j=1;j<=n;j++)
{
if (T[j]==jj)
{
for (i=1;i<=n;i++)
{
if (T[i]==ii)
{
DA[i][j]=T1[i]+T1[j];
DA[j][i]=DA[i][j];
}
}
}
}
for (j=1;j<=n;j++)
if (T[j]==jj) T[j]=ii;
if (jj!=n1)
{
for (j=1;j<=n;j++)
if (T[j]==n1) T[j]=jj;
}
}
/*3 objects remaining*/
l1=(D[1][2]+D[1][3]-D[2][3])/2.0-LP[1];
l2=(D[1][2]+D[2][3]-D[1][3])/2.0-LP[2];
l3=(D[1][3]+D[2][3]-D[1][2])/2.0-LP[3];
if (l1<0.00001) l1=0.00005;
if (l2<0.00001) l2=0.00005;
if (l3<0.00001) l3=0.00005;
L=L+l1+l2+l3;
for (j=1;j<=n;j++)
{
for (i=1;i<=n;i++)
{
if ((T[j]==1)&&(T[i]==2))
{
DA[i][j]=T1[i]+T1[j]+l1+l2;
DA[j][i]=DA[i][j];
}
if ((T[j]==1)&&(T[i]==3))
{
DA[i][j]=T1[i]+T1[j]+l1+l3;
DA[j][i]=DA[i][j];
}
if ((T[j]==2)&&(T[i]==3))
{
DA[i][j]=T1[i]+T1[j]+l2+l3;
DA[j][i]=DA[i][j];
}
}
DA[j][j]=0;
}
free(T);
free(T1);
free(S);
free(LP);
for (i=0;i<=n;i++)
{
free(D[i]);
}
free(D);
}
// Circular order reconstruction, main procedure
void parcour1(double **DISS,double **W,double **TM,int *Iternum,long int *ARETE, double *LONGUEUR)
{
int i,j,i1,j1,iopt,jopt,*X,Iternumber;
double **TM1;
double EQ,EQmin;
X=(int *) malloc((n+1)*sizeof(int));
TM1=(double **) malloc((n+1)*sizeof(double*));
for (i=0;i<=n;i++)
{
TM1[i]=(double*)malloc((n+1)*sizeof(double));
if (TM1[i]==NULL)
{
exit(1);
}
}
Iternumber=*Iternum;
for (i=1;i<=n;i++)
{
for (j=1;j<=n;j++)
{ if (i!=j)
{
odp(DISS,X,&i,&j);
construction2(DISS,TM1,X);
approx_arb2 (DISS,TM1,TM,W,&Iternumber,ARETE,LONGUEUR);
EQ=0.0;
for (i1=1;i1<=n;i1++)
{
for (j1=i1+1;j1<=n;j1++)
EQ=EQ+(DISS[i1][j1]-TM[i1][j1])*(DISS[i1][j1]-TM[i1][j1]);
}
if (((i==1)&&(j==2))||(EQ10) Iternumber=2*n-3;
else Iternumber=15;
odp(DISS,X,&iopt,&jopt);
construction2(DISS,TM1,X);
approx_arb2 (DISS,TM1,TM,W,&Iternumber,ARETE,LONGUEUR);
free(X);
for (i=0;i<=n;i++)
free(TM1[i]);
free(TM1);
}
// Compute the list of tree edges of a given tree distance DI
void Tree_edges (double **DI, long int *ARETE, double *LONGUEUR)
{
struct EDGE { unsigned int U; unsigned int V; double LN;};
struct EDGE *Path,*Tree;
int i,j,k,p,P,*X;
double S,DIS,DIS1,*L,**D;
X=(int *)malloc((n+1)*sizeof(int));
L=(double *)malloc((n+1)*sizeof(double));
Tree=(EDGE *)malloc((2*n-2)*sizeof(EDGE));
Path=(EDGE *)malloc((n+2)*sizeof(EDGE));
D=(double **) malloc((n+1)*sizeof(double*));
for (i=0;i<=n;i++)
{
D[i]=(double*)malloc((n+1)*sizeof(double));
if (D[i]==NULL)
{
// MyDebugStr("\pData matrix is too large ");
exit(1);
}
}
i=1; j=n;
odp(DI,X,&i,&j);
for (i=1;i<=n;i++)
{
for (j=1;j<=n;j++)
D[i][j]=DI[i][j];
}
L[1]=D[X[1]][X[2]];
Path[1].U=X[1];
Path[1].V=X[2];
p=0;
P=1;
for(k=2;k<=n-1;k++)
{
DIS=(D[X[1]][X[k]]+D[X[1]][X[k+1]]-D[X[k]][X[k+1]])/2;
DIS1=(D[X[1]][X[k+1]]+D[X[k]][X[k+1]]-D[X[1]][X[k]])/2;
S=0.0;
i=0;
if (DIS>0.00001)
{
while (S3;n1--)
{
/* Research of the best pair of objects (i,j) to be agglomerated */
Smin=-S[1]-S[2]+D[1][2]*(n1-2);
for (i=1;i<=n1-1;i++)
{
for (j=i+1;j<=n1;j++)
{
Sij=-S[i]-S[j]+D[i][j]*(n1-2);
if (Sij<=Smin)
{
Smin=Sij;
ii=i;
jj=j;
}
}
}
/* New clustering*/
S1=0;
for (i=1;i<=n1;i++)
{
if ((i!=ii)&&(i!=jj))
S1=S1+NUMBER[i]*(D[ii][i]-D[jj][i]);
}
Lii=S1/(2.0*(n-NUMBER[ii]-NUMBER[jj]))+0.5*D[ii][jj];
Ljj=-S1/(2.0*(n-NUMBER[ii]-NUMBER[jj]))+0.5*D[ii][jj];
if (Lii<0.00001) Lii=0.00005;
if (Ljj<0.00001) Ljj=0.00005;
L=L+Lii+Ljj;
S[ii]=0;
for (i=1;i<=n1;i++)
{
if ((i!=ii)&&(i!=jj))
{
S1=D[i][ii];
D[i][ii]=(NUMBER[ii]*(D[i][ii]-Lii)+NUMBER[jj]*(D[i][jj]-Ljj))/(NUMBER[ii]+NUMBER[jj]);
S[i]=S[i]+D[i][ii]-(D[i][jj]+S1);
D[ii][i]=D[i][ii];
S[ii]=S[ii]+D[i][ii];
}
}
S1=S[ii];
D[ii][ii]=0;
NUMBER[ii]=NUMBER[ii]+NUMBER[jj];
if (jj!=n1)
{
for (i=1;i<=n1-1;i++)
{
D[i][jj]=D[i][n1];
D[jj][i]=D[n1][i];
}
D[jj][jj]=0;
S[jj]=S[n1];
NUMBER[jj]=NUMBER[n1];
}
for (i=1;i<=n;i++)
{
if (T[i]==ii) T1[i]=T1[i]+Lii;
if (T[i]==jj) T1[i]=T1[i]+Ljj;
}
for (j=1;j<=n;j++)
{
if (T[j]==jj)
{
for (i=1;i<=n;i++)
{
if (T[i]==ii)
{
DA[i][j]=T1[i]+T1[j];
DA[j][i]=DA[i][j];
}
}
}
}
for (j=1;j<=n;j++)
if (T[j]==jj) T[j]=ii;
if (jj!=n1)
{
for (j=1;j<=n;j++)
if (T[j]==n1) T[j]=jj;
}
}
/*3 objects remaining */
l1=(D[1][2]+D[1][3]-D[2][3])/2;
l2=(D[1][2]+D[2][3]-D[1][3])/2;
l3=(D[1][3]+D[2][3]-D[1][2])/2;
if (l1<0.00001) l1=0.00005;
if (l2<0.00001) l2=0.00005;
if (l3<0.00001) l3=0.00005;
L=L+l1+l2+l3;
for (j=1;j<=n;j++)
{
for (i=1;i<=n;i++)
{
if ((T[j]==1)&&(T[i]==2))
{
DA[i][j]=T1[i]+T1[j]+l1+l2;
DA[j][i]=DA[i][j];
}
if ((T[j]==1)&&(T[i]==3))
{
DA[i][j]=T1[i]+T1[j]+l1+l3;
DA[j][i]=DA[i][j];
}
if ((T[j]==2)&&(T[i]==3))
{
DA[i][j]=T1[i]+T1[j]+l2+l3;
DA[j][i]=DA[i][j];
}
}
DA[j][j]=0;
}
free(T);
free(T1);
free(S);
free(LP);
free(NUMBER);
for (i=0;i<=n;i++)
{
free(D[i]);
}
free(D);
}
// returns the closest integer value of x
int floor1(double x)
{
int i;
if (ceil(x)-floor(x)==2.0) i=(int)x;
else if (fabs(x-floor(x)) > fabs(x-ceil(x))) i=(int)ceil(x);
else i=(int)floor(x);
return i;
}
// Procedure for the polishing the tree metric TM subject to the dissimilarity DISS and the weight function W,
// Iternum is the number of iteration in the Gauss-Seidel iterative procedure applyed,
// TMnew is the matrix of the polished tree metric,
// ARETE is the vector of tree edges, and LONGUEUR is the vector of their lengths.
void approx_arb2(double **DISS,double **TM,double **TMnew,double **W, int *Iternum, long int *ARETE, double *LONGUEUR)
{
long int *Level, *Score, *EX1, *EX2, *Succ1, *Succ2, *Succ11, *Succ22, *Neighbour1, *Neighbour2;
int i,j,k,k1,j1,i1,i2,*Flag, **Part, **Vertices, *VertexNumber;
double *L, **B, *C, Sum, Sum1, *Path, EQ, l;
// Variable declaration
Level=(long int *)malloc((2*n-1)*sizeof(long int));
Score=(long int *)malloc((2*n-1)*sizeof(long int));
EX1=(long int *)malloc((2*n-1)*sizeof(long int));
EX2=(long int *)malloc((2*n-1)*sizeof(long int));
Succ1=(long int *)malloc((2*n-1)*sizeof(long int));
Succ2=(long int *)malloc((2*n-1)*sizeof(long int));
Succ11=(long int *)malloc((2*n-1)*sizeof(long int));
Succ22=(long int *)malloc((2*n-1)*sizeof(long int));
C=(double *)malloc((2*n-1)*sizeof(double));
B=(double **)malloc((2*n-1)*sizeof(double*));
VertexNumber=(int *)malloc((2*n-1)*sizeof(int));
Vertices=(int **)malloc((2*n-1)*sizeof(int*));
Part=(int **)malloc((2*n-1)*sizeof(int*));
Neighbour1=(long int *)malloc((2*n-1)*sizeof(long int));
Neighbour2=(long int *)malloc((2*n-1)*sizeof(long int));
Flag=(int *)malloc((2*n-1)*sizeof(int));
L=(double *)malloc((2*n-1)*sizeof(double));
Path=(double *)malloc((n+1)*sizeof(double));
for (i=0;i<=2*n-2;i++)
{
Vertices[i]=(int*)malloc((n+1)*sizeof(int));
B[i]=(double*)malloc((2*n-1)*sizeof(double));
Part[i]=(int*)malloc((2*n-1)*sizeof(int));
if ((B[i]==NULL)||(Part[i]==NULL)||(Vertices[i]==NULL))
{
printf ("Data matrix is too large "); exit(1);
}
}
// Variable initialisation
Tree_edges (TM,ARETE,LONGUEUR);
for (i=1;i<=2*n-3;i++)
{
EX1[i]=ARETE[2*i-2];
EX2[i]=ARETE[2*i-1];
L[i]=LONGUEUR[i-1];
Flag[i]=0;
if (EX1[i]<=n)
{ Score[EX1[i]]=3; Succ1[EX1[i]]=0; Succ2[EX1[i]]=0; Score[EX2[i]]=0; }
else if (EX2[i]<=n)
{ Score[EX2[i]]=3; Succ1[EX2[i]]=0; Succ2[EX2[i]]=0; Score[EX1[i]]=0; }
else { Score[EX1[i]]=0; Score[EX2[i]]=0; }
}
for (i=1;i<=2*n-3;i++)
{
if (i<=n) { Path[i]=0.0; TMnew[i][i]=0; }
if ((EX1[i]<=n)||(EX2[i]<=n))
{
VertexNumber[i]=1;
if (EX1[i]<=n) Vertices[i][1]=EX1[i];
else Vertices[i][1]=EX2[i];
}
else VertexNumber[i]=0;
for (j=1;j<=2*n-3;j++)
{
Part[i][j]=0;
Part[j][i]=0;
}
}
// Filling in the vectors Level(2n-3), VertexNumber(2n-3),
// and the matrices Part(2n-3, 2n-3), Vertices(2n-3,n)
j=1;
while (j<2*n-2)
{
for (i=1; i<=2*n-3; i++)
{
if (Score[EX1[i]]==5) Score[EX1[i]]=3;
if (Score[EX2[i]]==5) Score[EX2[i]]=3;
if (((Score[EX1[i]]==3)&&(Score[EX2[i]]!=4))||((Score[EX2[i]]==3)&&(Score[EX1[i]]!=4)))
{
if (Score[EX1[i]]==3) { k=EX2[i]; k1=EX1[i]; }
else { k=EX1[i]; k1=EX2[i]; }
if (Score[k]<2)
{
Score[k]=Score[k]+1;
if (Score[k]==1) { Succ1[k]=i; Neighbour1[k]=k1; Neighbour2[k]=0;}
else { Succ2[k]=i; Neighbour2[k]=k1; }
}
}
}
for (i=1; i<=2*n-3; i++)
{
if (((Score[EX1[i]]==1)&&(Score[EX2[i]]==3))||((Score[EX1[i]]==2)&&(Score[EX2[i]]==3))||
((Score[EX1[i]]==3)&&(Score[EX2[i]]==1))||((Score[EX1[i]]==3)&&(Score[EX2[i]]==2)))
{
if ((Score[EX1[i]]==1)||(Score[EX1[i]]==2)) { k=EX1[i]; k1=EX2[i]; }
else { k=EX2[i]; k1=EX1[i]; }
if(Flag[i]==0)
{
Succ11[i]=Succ1[k1];
Succ22[i]=Succ2[k1];
Level[j]=i; j=j+1; Score[k1]=4;
Flag[i]=1;
if ((Score[Neighbour1[k]]==4)&&(Score[Neighbour2[k]]==4)) Score[k]=5;
if (j>n+1)
{
VertexNumber[i]=VertexNumber[Succ11[i]]+VertexNumber[Succ22[i]];
for (i1=1; i1<=VertexNumber[Succ11[i]]; i1++)
Vertices[i][i1]=Vertices[Succ11[i]][i1];
for (i1=VertexNumber[Succ11[i]]+1; i1<=VertexNumber[i]; i1++)
Vertices[i][i1]=Vertices[Succ22[i]][i1-VertexNumber[Succ11[i]]];
Part[Succ11[i]][i]=1; Part[Succ22[i]][i]=1;
Part[i][Succ11[i]]=1; Part[i][Succ22[i]]=1;
for (i1=1; i1<=2*n-3; i1++)
{
if ((Part[Succ11[i]][i1]==1)||(Part[Succ22[i]][i1]==1))
{
Part[i][i1]=1; Part[i1][i]=1;
}
}
}
}
if(Score[k]==2)
{
Score[k]=5;
Succ11[Succ2[k]]=Succ1[Neighbour2[k]];
Succ22[Succ2[k]]=Succ2[Neighbour2[k]];
Level[j]=Succ2[k]; j=j+1; Score[Neighbour2[k]]=4;
Flag[Succ2[k]]=1;
j1=i;
i=Succ2[k];
if (j>n+1)
{
VertexNumber[i]=VertexNumber[Succ11[i]]+VertexNumber[Succ22[i]];
for (i1=1; i1<=VertexNumber[Succ11[i]]; i1++)
Vertices[i][i1]=Vertices[Succ11[i]][i1];
for (i1=VertexNumber[Succ11[i]]+1; i1<=VertexNumber[i]; i1++)
Vertices[i][i1]=Vertices[Succ22[i]][i1-VertexNumber[Succ11[i]]];
Part[Succ11[i]][i]=1; Part[Succ22[i]][i]=1;
Part[i][Succ11[i]]=1; Part[i][Succ22[i]]=1;
for (i1=1; i1<=2*n-3; i1++)
{
if ((Part[Succ11[i]][i1]==1)||(Part[Succ22[i]][i1]==1))
{
Part[i][i1]=1; Part[i1][i]=1;
}
}
}
i=j1;
}
}
}
for (i=1; i<=2*n-3; i++)
{
if (((Score[EX1[i]]==5)&&(Score[EX2[i]]==3))||((Score[EX1[i]]==3)&&(Score[EX2[i]]==5))||
((Score[EX1[i]]==5)&&(Score[EX2[i]]==5)))
{
if ((Score[EX1[i]]==3)||((Score[EX1[i]]==5)&&(Score[EX2[i]]==5)))
{ k=EX1[i]; k1=EX2[i]; }
else { k=EX2[i]; k1=EX1[i]; }
Level[j]=i; j=j+1;
Succ11[i]=Succ1[k];
Succ22[i]=Succ2[k];
Score[k]=4;
Score[k1]=4;
if (j>n+1)
{
VertexNumber[i]=VertexNumber[Succ11[i]]+VertexNumber[Succ22[i]];
for (i1=1; i1<=VertexNumber[Succ11[i]]; i1++)
Vertices[i][i1]=Vertices[Succ11[i]][i1];
for (i1=VertexNumber[Succ11[i]]+1; i1<=VertexNumber[i]; i1++)
Vertices[i][i1]=Vertices[Succ22[i]][i1-VertexNumber[Succ11[i]]];
Part[Succ11[i]][i]=1; Part[Succ22[i]][i]=1;
Part[i][Succ11[i]]=1; Part[i][Succ22[i]]=1;
for (i1=1; i1<=2*n-3; i1++)
{
if ((Part[Succ11[i]][i1]==1)||(Part[Succ22[i]][i1]==1))
{
Part[i][i1]=1; Part[i1][i]=1;
}
}
}
}
}
}
// Filling in the matrix B(2n-3,2n-3) and the vector C(2n-3)
for (i=1; i<=2*n-3; i++)
{
B[i][i]=0;
C[i]=0;
for (j=1; j<=2*n-3; j++)
{
if ((EX1[i]<=n)||(EX2[i]<=n))
{
if (EX1[i]<=n) k=EX1[i];
else k=EX2[i];
if (((EX1[j]<=n)||(EX2[j]<=n))&&(i!=j))
{
if (EX1[j]<=n) k1=EX1[j];
else k1=EX2[j];
B[i][j]=W[k][k1];
B[j][i]=W[k][k1];
B[i][i]=B[i][i]+B[i][j];
C[i]=C[i]+DISS[k][k1]*W[k][k1];
}
}
}
}
for (k=n+1; k<=2*n-3; k++)
{
i=Level[k];
Sum=0.0;
Sum1=0.0;
for (j=1; j<=VertexNumber[Succ11[i]]; j++)
{
for (j1=1; j1<=VertexNumber[Succ22[i]]; j1++)
{
Sum=Sum+2*W[Vertices[Succ11[i]][j]][Vertices[Succ22[i]][j1]];
Sum1=Sum1+2*DISS[Vertices[Succ11[i]][j]][Vertices[Succ22[i]][j1]]*
W[Vertices[Succ11[i]][j]][Vertices[Succ22[i]][j1]];
}
}
B[i][i]=B[Succ11[i]][Succ11[i]]+B[Succ22[i]][Succ22[i]]-Sum;
C[i]=C[Succ11[i]]+C[Succ22[i]]-Sum1;
}
for (j1=n+1; j1<=2*n-3; j1++)
{
i=Level[j1];
for (i1=1; i1=i+1) Sum=Sum+B[i][j]*L[j];
if (j<=i-1) Sum1=Sum1+B[i][j]*L[j];
}
l=L[i];
L[i]=(-Sum-Sum1+C[i])/B[i][i];
if (L[i]<0) L[i]=0.0;
EQ=EQ+sqrt((L[i]-l)*(L[i]-l));
}
}
for (i=1;i<=2*n-3;i++)
{
LONGUEUR[i-1]=L[i];
}
// Computation of the new tree distance matrix TMnew(n,n) from the list of edges
for (j=n+1; j<=2*n-3; j++)
{
i=Level[j];
for (i1=1; i1<=VertexNumber[Succ11[i]]; i1++)
Path[Vertices[Succ11[i]][i1]]=Path[Vertices[Succ11[i]][i1]]+L[Succ11[i]];
for (i1=1; i1<=VertexNumber[Succ22[i]]; i1++)
Path[Vertices[Succ22[i]][i1]]=Path[Vertices[Succ22[i]][i1]]+L[Succ22[i]];
for (i1=1; i1<=VertexNumber[Succ11[i]]; i1++)
{
k=Vertices[Succ11[i]][i1];
for (j1=1; j1<=VertexNumber[Succ22[i]]; j1++)
{
k1=Vertices[Succ22[i]][j1];
TMnew[k][k1]=Path[k]+Path[k1];
TMnew[k1][k]=TMnew[k][k1];
}
}
}
i=Level[2*n-3];
if ((EX1[i]==EX1[Succ11[i]])||(EX1[i]==EX2[Succ11[i]])) k=EX2[i];
else k=EX1[i];
j1=1;
for (i1=1; i1<=2*n-4; i1++)
{
k1=Level[i1];
if ((EX1[k1]==k)||(EX2[k1]==k))
{
if (j1==1) { i=k1; j1=2; }
else { j=k1; break; }
}
}
for (i1=1; i1<=VertexNumber[i]; i1++)
{
for (j1=1; j1<=VertexNumber[j]; j1++)
{
k=Vertices[i][i1];
k1=Vertices[j][j1];
TMnew[k][k1]=Path[k]+Path[k1]+L[i]+L[j];
TMnew[k1][k]=TMnew[k][k1];
}
}
i2=Level[2*n-3];
for (i1=1; i1<=VertexNumber[i2]; i1++)
{
k=Vertices[i2][i1];
for (j1=1; j1<=VertexNumber[i]; j1++)
{
k1=Vertices[i][j1];
TMnew[k][k1]=Path[k]+Path[k1]+L[i2]+L[i];
TMnew[k1][k]=TMnew[k][k1];
}
for (j1=1; j1<=VertexNumber[j]; j1++)
{
k1=Vertices[j][j1];
TMnew[k][k1]=Path[k]+Path[k1]+L[i2]+L[j];
TMnew[k1][k]=TMnew[k][k1];
}
}
free(Level);
free(Score);
free(EX1);
free(EX2);
free(Succ1);
free(Succ2);
free(Succ11);
free(Succ22);
free(C);
free(VertexNumber);
free(Neighbour1);
free(Neighbour2);
free(Flag);
free(L);
free(Path);
for (i=0;i<=2*n-2;i++)
{
free(B[i]);
free(Part[i]);
free(Vertices[i]);
}
free(B);
free(Part);
free(Vertices);
}
// Scaling of the dissimilarity entries to avoid too small or too large values
void Scaling(double **DI, double *power)
{
int i,j;
double MAX, MIN, POWER;
MAX=fabs(DI[1][2]);
MIN=fabs(DI[1][2]);
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
if (fabs(DI[i][j])>MAX) MAX=fabs(DI[i][j]);
if (fabs(DI[i][j])100000.0))
{
POWER=-2;
while (MAX>pow(10,-POWER))
POWER=POWER-1;
if (POWER!=0)
{
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
DI[i][j]=DI[i][j]*pow(10,POWER);
DI[j][i]=DI[i][j];
}
}
}
}
*power=POWER;
}
// The procedure inverse to Scaling to restore true dissimilarity and tree metric values
void Scaling1(double **DI,double **DA,double *RAJ,double *LONGUEUR, double *power)
{
int i,j;
double POWER;
if (*power!=0)
{
POWER=*power;
RAJ[1]=RAJ[1]*pow(10,-2*POWER);
RAJ[2]=RAJ[2]*pow(10,-POWER);
RAJ[3]=RAJ[3]*pow(10,-POWER);
RAJ[0]=RAJ[0]*pow(10,-POWER);
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
DI[i][j]=DI[i][j]*pow(10,-POWER);
DI[j][i]=DI[i][j];
DA[i][j]=DA[i][j]*pow(10,-POWER);
DA[j][i]=DA[i][j];
}
}
for (i=0;i<=2*n-4;i++)
LONGUEUR[i]=LONGUEUR[i]*pow(10,-POWER);
}
}
// Print tree edges wth their lengths into the Output file
void PrintEdges (long int *ARETE, double *LONGUEUR, int EdgesNumber, int optionTR, double *CRITERION1, double *CRITERION2, int OptionFunction)
{
int i,j,NombreBlanc;
if (optionTR!=2) fprintf(Output1,"\n\n TREE EDGES WITH THEIR LENGTHS \n\n");
else if (OptionFunction!=2) fprintf(Output1,"\n\n RETICULOGRAM EDGES WITH THEIR LENGTHS \tLSC \tQ1\n\n");
else fprintf(Output1,"\n\n RETICULOGRAM EDGES WITH THEIR LENGTHS \tLSC \tQ2\n\n");
for (i=1;i<=EdgesNumber;i++)
{
NombreBlanc=0;
if (ARETE[2*i-2]<10) NombreBlanc=3;
else if (ARETE[2*i-2]<100) NombreBlanc=2;
else if (ARETE[2*i-2]<1000) NombreBlanc=1;
for (j=1;j<=NombreBlanc;j++)
fprintf(Output1," ");
fprintf(Output1," %d--%d",ARETE[2*i-2], ARETE[2*i-1]);
NombreBlanc=0;
if (ARETE[2*i-1]<10) NombreBlanc=3;
else if (ARETE[2*i-1]<100) NombreBlanc=2;
else if (ARETE[2*i-1]<1000) NombreBlanc=1;
for (j=1;j<=NombreBlanc;j++)
fprintf(Output1," ");
if ((optionTR!=2)||(i<2*n-3)) fprintf(Output1," %f \n", LONGUEUR[i-1]);
if ((optionTR==2)&&(i>=2*n-3)) fprintf(Output1," %f \t%f \t%f\n", LONGUEUR[i-1],CRITERION1[i-2*n+3],CRITERION2[i-2*n+3]);
}
if ((optionTR==2)&&(EdgesNumber==2*n-3))
fprintf(Output1,"\n\nNo extra edges have been added to the tree !");
if (optionTR==2)
fprintf(Output1,"\n\nLeast-squares coefficient (LSC) and Q1 or Q2 values are given: first, for the starting additive tree, then, for each of extra edges added to it (if any)");
}
/* Reticulogram Reconstruction */
/*
Input:
DISS - given dissimilarity matrix
D - additive distance matrix representing the starting tree
W - matrix of weights associated with dissimilarity entries (if no weights are considered, all Wij = 1, 00.00001)
{
while (S(DIST[i][j]-DIST[a][j]-DIST[b][i]))
{ MIN = DIST[i][j]-DIST[a][i]-DIST[b][j]; i2=i; j2=j; }
else
{ MIN = DIST[i][j]-DIST[a][j]-DIST[b][i]; i2=j; j2=i; }
if (MIN>0.000001)
{
P=P+1; A[2*P-1]=i2; A[2*P]=j2; A1[P]=MIN; j1=P-1;
while (A1[j1]0) {
NUM=1; NUM1[1]=1; NUMw[1]=W[A[1]][A[2]]; LIMIT[1]=A1[1];
for (j1=2;j1<=P;j1++)
{
if (A1[j1]=0.0) { j1=i2; i2=j2; j2=j1; }
Sum=Sum+W[i2][j2]*(DISS[i2][j2]-DIST[i2][a]-DIST[j2][b]);
//Sum1=Sum1+(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2])*(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]);
}
{
Sum2=0.0; Sum1=0.0; double Sum0=0.0;
for (j=1; j<=NUMnumber; j++)
{
i2=A[2*j-1]; j2=A[2*j];
if (DIST[i2][a]+DIST[j2][b]-DIST[i2][b]-DIST[j2][a]>=0.0) { j1=i2; i2=j2; j2=j1; }
Sum2=Sum2+(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+LIMIT[i])*(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+LIMIT[i]);
Sum1=Sum1+(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+LIMIT[i+1]+0.000001)*(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+LIMIT[i+1]+0.000001);
Sum0=Sum0+(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+Sum/NUMnumber)*(DIST[i2][a]+DIST[j2][b]-DISS[i2][j2]+Sum/NUMnumber);
}
}
if ((NUMnumberW!=0.0)&&((Sum/NUMnumberW>=LIMIT[i+1]+0.000001)&&(Sum/NUMnumberW<=LIMIT[i]))) { Lopt1=Sum/NUMnumberW; }
else {if (Sum1EQ))
{ Lopt=Lopt1; EQmin=EQ; }
}
if (((a10==1)&&(P>0))||(EQminGL>EQmin))
{
EQminGL=EQmin;
LoptGL=Lopt;
Aopt=a;
Bopt=b;
a10=0;
}
}
}
}
}
if (a10==1) { /*printf("\n No more edges can be added \n");*/ break; }
else
{
FUNCTION_old=FUNCTION;
if ((OptionFunction!=2)&&(iteration=(2*n-2)*(2*n-2-1)/2-2*n+3)&&(iteration==n*(n-1)/2-2*n+3)) break;
if ((Iternumber>=(2*n-2)*(2*n-2-1)/2-2*n+3)&&(FUNCTION_old<=FUNCTION)) break;
if ((Iternumber==(2*n-2)*(2*n-2-1)/2-2*n+3)&&(fabs(EQminGL-EQminGLold)<=0.00000001)) break;
EQminGLold=EQminGL;
CRITERION1[iteration]=EQminGL;
CRITERION2[iteration]=FUNCTION;
ARETE[4*n-6+2*iteration-2]=Aopt;
ARETE[4*n-6+2*iteration-1]=Bopt;
LONGUEUR[2*n-4+iteration]=LoptGL;
EDGES[Aopt][Bopt]=EDGES[Bopt][Aopt]=1;
// Updating the matrix DIST after addition of a new edge
for (i=1;i<=2*n-3;i++)
{
for (j=i+1;j<=2*n-2;j++)
{
{
if (DIST[i][Aopt]+DIST[j][Bopt]+LoptGLDmax) Dmax=Dist[i-1][j-1];
}
if (methode==1) { DI[i][i]=Dist[i-1][i-1]=0.0; }
else DI[i][i]=0.0;
}
// Test for avoiding entire rows and columns with missing entries
int FLAG1;
for (i=1;i<=n;i++)
{
FLAG1=1;
for (j=1;j<=n;j++)
{
if ((i!=j)&&(DI[i][j]!=-99)) { FLAG1=0; break; }
}
if (FLAG1==1) { printf("Too many missing values. You have to have at least one value by row and by colomn.");
exit(1); }
}
//Selection of a tree reconstruction method
if (methode==1) // Triangles method
{
N=nbObj;
int error=PSM(Dist,N);
if (error==-1) {printf("Too many missing entrees for the Triangles method"); return;}
for (i=1;i<=2*N-3;i++)
{
ARETE[2*i-2]=i;
ARETE[2*i-1]=Tree[i-1]+1;
LONGUEUR[i-1]=Long[i-1];
}
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
k=1+floor1((n-0.5*i)*(i-1)+j-i-1);
distanceArbre[i][j]=distanceArbre[j][i]=Dist[i-1][j-1];
}
distanceArbre[i][i]=0.0;
}
RESULTATS[0]=RESULTATS[1]=RESULTATS[2]=RESULTATS[3]=0.0;
for (i=1;i<=n-1;i++)
{
for (j=i+1;j<=n;j++)
{
double s=fabs(DI[i][j]-Dist[i-1][j-1]);
if (DI[i][j]!=-99) RESULTATS[0]=RESULTATS[0]+s*s;
if (DI[i][j]!=-99) RESULTATS[1]=RESULTATS[1]+s;
if ((DI[i][j]!=-99)&&(s>RESULTATS[2])) RESULTATS[2]=s;
}
}
RESULTATS[1]=RESULTATS[1]/(n*(n-1)/2.0);
for (i=1;i<=2*n-3;i++)
RESULTATS[3]=RESULTATS[3]+LONGUEUR[i-1];
}
else if ((methode==2)||(methode==3)) //Ultrametric and Additive procedures
{
T=n;
max = (double*)malloc((T+1) * sizeof(double));
m = (double**)malloc((T+1) * sizeof(double *));
B2 = (int**)malloc((T+1) * sizeof(int *));
if ((max== NULL) ||(m == NULL) || (B2 == NULL))
{ printf("Data matrix is too large"); exit(1);}
for (i=0;i0) complete=false;
aa=(int*)malloc((T+1) * sizeof(int));
if (methode==2) add=false;
if (methode==3) add=true;
if (nbmiss > 0)
{
nbultra=0;
nbcycles=1;
if (add) Additif(&sum, &sum2, T, B2, m, max);
else Ultra2(&sum, &sum2, T, B2, m, max);
}
while (sum2>0)
{
nbcycles=nbcycles+1;
if (!add) Ultra2(&sum, &sum2, T, B2, m, max);
if (add)
{
if (sum2==sum)
{
nbultra=nbultra+1;
alea(T, aa);
Ultra1(&sum, &sum2, T, B2, m, max, aa);
}
Additif(&sum, &sum2, T, B2, m, max);
}
}
//method MW to complete Additive and Ultrametric
for (i=1; i<=n; i++)
{
for (j=i+1; j<=n; j++)
{
W[i][j]=W[j][i]=1.0;
}
W[i][i]=1.0;
}
W[0][0]=1.0;
error=parcour211(m,W,Dist,&Iternum,ARETE,LONGUEUR);
if (error==-1) {exit(1);}
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
k=1+floor1((n-0.5*i)*(i-1)+j-i-1);
distanceArbre[i][j]=distanceArbre[j][i]=Dist[i][j];
}
distanceArbre[i][i]=0.0;
}
compute_criteres11(DI, Dist, RESULTATS, LONGUEUR, n);
}
else if (methode==4) // MW-modified method
{
int FLAG=0;
for (i=1; i<=n; i++)
{
for (j=i+1; j<=n; j++)
{
if ((DI[i][j]==-99.00)&&(DI[j][i]==-99.00)) {W[i][j]=W[j][i]=0.0;FLAG=1;}
else if ((DI[i][j]==-99.00)&&(DI[j][i]!=-99.00)) {DI[i][j]=DI[j][i]; W[i][j]=W[j][i]=1.0;}
else if ((DI[i][j]!=-99.00)&&(DI[j][i]==-99.00)) {DI[j][i]=DI[i][j]; W[i][j]=W[j][i]=1.0;}
else {W[i][j]=W[j][i]=1.0;}
}
DI[i][i]=0.0;
W[i][i]=1.0;
}
DI[0][0]=0; W[0][0]=0;
if (n==2)
{
Dist[1][2]=DI[1][2];
Dist[2][1]=DI[1][2];
Dist[1][1]=0.0;
Dist[2][2]=0.0;
}
else
{
Iternum=50;
DI[0][0]=1;
if (FLAG==0) error=parcour211(DI,W,Dist,&Iternum,ARETE,LONGUEUR);
else error=parcour2MisVal(DI,W,Dist,&Iternum,ARETE,LONGUEUR);
if (error==-1) { exit(1);}
}
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
k=1+floor1((n-0.5*i)*(i-1)+j-i-1);
distanceArbre[i][j]=distanceArbre[j][i]=Dist[i][j];
}
}
compute_criteres11(DI, Dist, RESULTATS, LONGUEUR, n);
}
// memory liberation
for (i=0;i<=n;i++)
{
free(DI[i]);
free(Dist[i]);
if ((methode==2)||(methode==3)) free(m[i]);
if ((methode==2)||(methode==3)) free(B2[i]);
}
free(DI);
free(Dist);
if ((methode==2)||(methode==3)) free(m);
if ((methode==2)||(methode==3)) free(B2);
if ((methode==2)||(methode==3)) free(max);
return;
}
//Triangles method functions
void EditDist(double **D, int N)
{ int i,j;
printf("Distance %s\n",Nom);
printf("N = %3d\n",N);
for (i=0;iD[i][k]+D[j][k] || D[i][j]0) *complete=false;
} // end input data
void alea (int T, int *aa)
{
int i, j, k;
double x, *P;
double XY=0.98674532;
P = (double*)malloc((T+1) * sizeof(double));
assert(P!=NULL);
for (i=1;i<=T;i++)
{
double D2P31M=2147483647.0, D2P31=2147483648.0;
double a;
a=16807 * XY;
XY=a - floor1(a/D2P31M) * D2P31M;
P[i]=XY/D2P31;
}
x=1;
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (P[j] < x)
{ x = P[j]; aa[i] = j; }
}
for (k=1;k<=T;k++)
{
if (P[k] == x) P[k] = 1;
}
x=1;
}
}// end alea
void Ultra1(int *sum, int *sum2, int T, int **B2, double **m, double *max, int *aa)
{
int i, j, k, a;
double min, val1, val2;
bool done;
done = false;
*sum = 0;
*sum2 = 0;
min = 1000;
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[aa[i]][aa[j]]==0) *sum=*sum+1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[aa[i]][aa[j]]==1) goto label2;
for (k=1;k<=T;k++)
{
if ((k==i)||(k==j)) goto label1;
if ((B2[aa[i]][aa[k]]==0)||(B2[aa[j]][aa[k]]==0)) goto label1;
val1=(m[aa[i]][aa[k]] + m[aa[k]][aa[i]])/2.0;
val2=(m[aa[j]][aa[k]] + m[aa[k]][aa[j]])/2.0;
if (val1>val2) max[aa[k]]=val1;
else max[aa[k]]=val2;
if (max[aa[k]] < min)
{
min=max[aa[k]];
m[aa[i]][aa[j]]=min;
m[aa[j]][aa[i]]=min;
done=true;
}
label1: a=1;
}
min=1000;
if (done) goto label3;
label2: a=1;
}
}
label3: a=1;
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (m[aa[i]][aa[j]]!=miss) B2[aa[i]][aa[j]]=1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[aa[i]][aa[j]]==0) *sum2=*sum2+1;
}
}
} // end Ultra1
void Ultra2(int *sum, int *sum2, int T, int **B2, double **m, double *max)
{
int i, j, k, a;
double min, val1, val2;
*sum = 0;
*sum2 = 0;
min = 1000;
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[i][j]==0) *sum=*sum+1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[i][j]==1) goto label2;
for (k=1;k<=T;k++)
{
if ((k==i)||(k==j)) goto label1;
if ((B2[i][k]==0)||(B2[j][k]==0)) goto label1;
val1=(m[i][k] + m[k][i])/2.0;
val2=(m[j][k] + m[k][j])/2.0;
if (val1>val2) max[k]=val1;
else max[k]=val2;
if (max[k] < min)
{
min=max[k];
m[i][j]=min;
}
label1: a=1;
}
min=1000;
label2: a=1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (m[i][j]!=miss) B2[i][j]=1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[i][j]==0) *sum2=*sum2+1;
}
}
} // end Ultra2
void Additif(int *sum, int *sum2, int T, int **B2, double **m, double *max)
{
int i, j, k, l, a;
double min, val1, val2, val3, newval;
*sum=0;
*sum2=0;
min=1000;
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[i][j]==0) *sum=*sum+1;
}
}
for (i=1;i<=T;i++)
{
for (j=1;j<=T;j++)
{
if (B2[i][j]==1) goto label3;
for (k=1;k<=T;k++)
{
if ((k==i)||(k==j)) goto label2;
if ((B2[i][k]==0)||(B2[j][k]==0)) goto label2;
for (l=1;l<=T;l++)
{
if ((l==i)||(l==j)||(l==k)) goto label1;
if ((B2[i][l]==0)||(B2[j][l]==0)||(B2[k,l]==0)) goto label1;
val1=(m[k][l]+m[l][k])/2.0;
val2=(m[i][l]+m[l][i])/2.0 + (m[j][k]+m[k][j])/2.0;
val3=(m[i][k]+m[k][i])/2.0 + (m[j][l]+m[l][j])/2.0;
if (val2>val3) max[l]=val2;
else max[l]=val3;
newval=max[l]-val1;
if (newvalR[2]) R[2]=s; }
}
}
for (i1=1;i1<=2*nn-3;i1++)
R[3]=R[3]+LONGUEUR[i1-1];
RESULTATS[0]=R[0];
RESULTATS[1]=R[1]/(n*(n-1)/2.0);
RESULTATS[2]=R[2];
RESULTATS[3]=R[3];
}
//MW-modified method
int parcour2MisVal(double **DISS,double **W,double **TM,int *Iternum,long int *ARETE, double *LONGUEUR)
{
int a,i,j,i1,j1,iopt,jopt,*X,Iternumber,Option,k,k1,*Flag;
double **TM1, **TMopt;
double EQ,EQmin,MAX,*SumW,*Sum,MAX1;
int error1;
X=(int *) malloc((n+1)*sizeof(int));
Flag=(int *) malloc((n+1)*sizeof(int));
TM1=(double **) malloc((n+1)*sizeof(double*));
TMopt=(double **) malloc((n+1)*sizeof(double*));
SumW=(double *) malloc((n+1)*sizeof(double));
Sum=(double *) malloc((n+1)*sizeof(double));
for (i=0;i<=n;i++)
{
TM1[i]=(double*)malloc((n+1)*sizeof(double));
TMopt[i]=(double*)malloc((n+1)*sizeof(double));
if ((TM1[i]==NULL)||(TMopt[i]==NULL))
{
printf("Data matrix is too large"); return -1;
}
}
Iternumber=*Iternum;
X[0]=1;
a=0;
for (i=1;i<=n;i++)
{
Sum[i]=0.0;
for (j=1;j<=n;j++)
Sum[i]=Sum[i]+W[i][j];
}
Option=1;
for (i=1;i<=n;i++)
{
for (j=i+1;j<=n;j++)
{
if (W[i][j]!=0.0)
{
X[1]=i;
X[2]=j;
for (k=1;k<=n;k++)
Flag[k]=0;
Flag[i]=Flag[j]=1;
for (k=1;k<=n;k++)
{
if (Flag[k]!=1)
{
SumW[k]=W[k][i];
}
}
for (k1=2;k1<=n-1;k1++)
{
MAX=MAX1=0.0;
for (k=1;k<=n;k++)
{
if (Flag[k]!=1)
{
SumW[k]=SumW[k]+W[k][X[k1]];
if ((MAX10) Iternumber=2*n-3;
else Iternumber=15;
X[1]=iopt;
X[2]=jopt;
construction(DISS,TM1,X,W);
if (error1==-1) {free(X);
for (i=0;i<=n;i++)
free(TM1[i]);
free(TM1); return -1;}
approx_arb2 (DISS,TM1,TM,W,&Iternumber,ARETE,LONGUEUR);
if (error1==-1) {free(X);
for (i=0;i<=n;i++)
free(TM1[i]);
free(TM1); return -1;}
free(X);
for (i=0;i<=n;i++)
free(TM1[i]);
free(TM1);
return 1;
}
/* Tree reconstruction from incomplete matrix by MW-modified (part3) */
int constructionMisVal(double **D,double **DA,int *X,double **W)
{
int i,j,i1,p,P,xi,a,*Y,NV,NR,PP,PN,option,*ARETE;
double *A,am,c,am1,c1,k,k1,k2,k5,k6,k7,ki,**G1,S1,S2,
S3,M,MR,M1,DIS,DIS1,Su,Sv,*L,**L1,**L2,
**L3,*DIST,*DIST1;
double Dummy1;
Y=(int *) malloc((n+1)*sizeof(int));
A=(double *) malloc((8*n+8)*sizeof(double));
G1=(double **) malloc((2*n-2)*sizeof(double*));
L=(double *) malloc((n+1)*sizeof(double));
L1=(double **) malloc((2*n-2)*sizeof(double*));
L2=(double **) malloc((2*n-2)*sizeof(double*));
L3=(double **) malloc((2*n-2)*sizeof(double*));
DIST=(double *) malloc(3*sizeof(double));
DIST1=(double *) malloc(3*sizeof(double));
ARETE=(int *) malloc(3*sizeof(int));
for (i=0;i<=2*n-3;i++)
{
G1[i]=(double*)malloc((n+1)*sizeof(double));
L1[i]=(double*)malloc((n+1)*sizeof(double));
L2[i]=(double*)malloc((n+1)*sizeof(double));
L3[i]=(double*)malloc((n+1)*sizeof(double));
if ((G1[i]==NULL)||(L1[i]==NULL)||(L2[i]==NULL)||(L3[i]==NULL))
{
printf("Data matrix is too large"); return -1;
}
}
Dummy1=100;
if (D[X[1]][X[2]]<(n-1)*0.0002){ Dummy1=D[X[1]][X[2]]; D[X[1]][X[2]]=(n-1)*0.0002;}
option=X[0];
MR=0;
A[1]=X[1];
A[2]=X[2];
A[3]=0;
A[4]=D[X[1]][X[2]];
for (i=1;i<=n;i++)
Y[i]=1;
Y[X[1]]=0;
Y[X[2]]=0;
P=1;
DA[X[1]][X[1]]=0;
DA[X[2]][X[2]]=0;
DA[X[1]][X[2]]=A[4];
DA[X[2]][X[1]]=A[4];
xi=X[2];
for (j=1;j<=n;j++)
{
if ((j!=X[1])&&(j!=X[2]))
{
k1=DA[X[1]][xi]-D[xi][j];
k2=D[X[1]][j];
L[j]=W[X[1]][j]+W[X[2]][j];
L1[1][j]=2*(k1*W[X[2]][j]-k2*W[X[1]][j]);
L2[1][j]=2*(-k1*W[X[2]][j]-k2*W[X[1]][j]);
L3[1][j]=2*(W[X[1]][j]-W[X[2]][j]);
G1[1][j]=k1*k1*W[X[2]][j]+k2*k2*W[X[1]][j];
}
}
for (i=2;i<=n-1;i++)
{
a=0;
if ((option!=2)||(i>3))
{
for (p=1;p<=P;p++)
{
for (j=1;j<=n;j++)
{
if ((Y[j]==1)&&(X[i+1]==j))
{
xi=floor1(A[4*p-2]);
k1=DA[X[1]][xi]-D[xi][j];
k2=D[X[1]][j];
Su=A[4*p-1];
Sv=Su+A[4*p];
k6=L1[p][j];
k5=L2[p][j];
k7=L3[p][j];
k=L[j];
M=G1[p][j]+MR+1.0;
if ((2*k*Sv+k5<=0.00001)&&(k6+k7*Sv>=-0.00001))
{ M=k*Sv*Sv+k5*Sv;
am=Sv;
c=0;
}
if (((c1=-(k6+k7*Sv)/(2*k))>=-0.00001)&&(-k5-2*k*Sv-k7*c1>=-0.00001)&&((M1=k*(Sv*Sv+c1*c1)+k7*Sv*c1+k5*Sv+c1*k6)=Su-0.00001)&&(-k6-k7*am1<=0.00001)&&((M1=k*am1*am1+k5*am1)=-0.00001)&&((M1=k*(am1*am1+c1*c1)+k7*am1*c1+k5*am1+k6*c1)=Su-0.00001))
{
M=M1;
c=c1;
am=am1;
}
if (((4*k*k-k7*k7)==0)&&(k5==k6)&&((-k5/(2*k))>=Su-0.00001)&&((c1=-k5/(2*k)-Su)>=-0.00001)&&((M1=k*(Su+c1)*(Su+c1)+k5*(Su+c1))=-0.00001)&&((M1=k*Su*Su+k5*Su)=-0.00001)&&(-k6-k7*Su<=0.00001)&&((M1=k*Su*Su+k5*Su)=-0.00001)&&((k5+2*k*Su+c1*k7)>=-0.00001)&&((M1=k*Su*Su+k5*Su+c1*c1*k+c1*(Su*k7+k6))M+G1[p][j]))
{
a=1;
MR=M+G1[p][j];
DIS=am;
DIS1=c;
if (c<=(n-i+1.0)*0.00002) DIS1=(n-i+1.0)*0.00002;
if (fabs(Sv-Su)<=2*(n-i+1)*0.00002) DIS=Sv-(Sv-Su)/(n-i+1.0);
else
{
if (fabs(am-Su)<=(n-i+1.0)*0.00002) DIS=Su+(n-i+1.0)*0.00002;
if (fabs(am-Sv)<=(n-i+1.0)*0.00002) DIS=Sv-(n-i+1.0)*0.00002;
}
NV=j;
NR=p;
}
}
}
}
}
if ((option==2)&&(i<=3))
{
NV=X[i+1];
NR=ARETE[i-1];
DIS=DIST[i-1];
DIS1=DIST1[i-1];
}
/* Updating data in the tables */
p=NR;
PP=P;
if (fabs(DIS-A[4*p-1])<=0.00001)
DIS=A[4*p-1];
if (fabs(DIS-A[4*p-1]-A[4*p])<=0.00001)
{
DIS=A[4*p-1]+A[4*p];
}
X[i+1]=NV;
xi=floor1(A[4*p-2]);
Y[NV]=0;
if ((DIS-A[4*p-1]>0)&&(DIS-A[4*p-1]-A[4*p]<0))
{
c=A[4*p];
A[4*p]=DIS-A[4*p-1];
P=P+1;
A[4*P-3]=P;
A[4*P-2]=A[4*p-2];
A[4*P-1]=DIS;
A[4*P]=A[4*p-1]+c-DIS;
}
if (DIS1>0)
{
P=P+1;
A[4*P-3]=P;
A[4*P-2]=X[i+1];
A[4*P-1]=DIS;
A[4*P]=DIS1;
}
DA[X[1]][X[i+1]]=DIS+DIS1;
DA[X[i+1]][X[1]]=DIS+DIS1;
/* The induction formula */
for (j=2;j<=i;j++)
{
if (((DA[X[1]][X[j]]+DA[X[1]][xi]-DA[xi][X[j]])/2)<=DIS)
DA[X[j]][X[i+1]]=DIS+DIS1+DA[X[j]][xi]-DA[X[1]][xi];
else
DA[X[j]][X[i+1]]=DIS1-DIS+DA[X[1]][X[j]];
DA[X[i+1]][X[j]]=DA[X[j]][X[i+1]];
DA[X[i+1]][X[i+1]]=0;
}
if ((P==PP+2)||((P==PP+1)&&(DIS1==0)))
PN=PP+1;
else
PN=PP;
if ((DIS-A[4*p-1]>0)&&(DIS-A[4*p-1]-c<0))
{
for(j=1;j<=n;j++)
{
if (Y[j]==1)
{
L1[PP+1][j]=L1[p][j];
L2[PP+1][j]=L2[p][j];
L3[PP+1][j]=L3[p][j];
G1[PP+1][j]=G1[p][j];
}
}
}
/* BLOCK 1 */
for (p=1;p<=PN;p++)
{
xi=floor1(A[4*p-2]);
for (j=1;j<=n;j++)
{
if (Y[j]==1)
{
if (((DA[X[1]][xi]+DA[X[1]][X[i+1]]-DA[X[i+1]][xi])/2)<=A[4*p-1]+0.00001)
{
ki=D[X[i+1]][j]+DA[X[1]][xi]-DA[X[i+1]][xi];
L1[p][j]=L1[p][j]-2*ki*W[X[i+1]][j];
L2[p][j]=L2[p][j]-2*ki*W[X[i+1]][j];
L3[p][j]=L3[p][j]+2*W[X[i+1]][j];
}
else
{
ki=D[X[i+1]][j]-DA[X[i+1]][X[1]];
L1[p][j]=L1[p][j]-2*ki*W[X[i+1]][j];
L2[p][j]=L2[p][j]+2*ki*W[X[i+1]][j];
L3[p][j]=L3[p][j]-2*W[X[i+1]][j];
}
if (p==1)
L[j]=L[j]+W[X[i+1]][j];
G1[p][j]=G1[p][j]+ki*ki*W[X[i+1]][j];
}
}
}
/* BLOCK 2 */
if (DIS1>0)
{
for (j=1;j<=n;j++)
{
if (Y[j]==1)
{
k1=DA[X[1]][X[i+1]]-D[X[i+1]][j];
k2=D[X[1]][j];
S1=W[X[1]][j]+W[X[i+1]][j];
S2=k2*W[X[1]][j];
S3=k1*k1*W[X[i+1]][j]+k2*k2*W[X[1]][j];
for (i1=2;i1<=i;i1++)
{
ki=D[X[i1]][j]+DA[X[1]][X[i+1]]-DA[X[i+1]][X[i1]];
S1=S1+W[X[i1]][j];
S2=S2+ki*W[X[i1]][j];
S3=S3+ki*ki*W[X[i1]][j];
}
L[j]=S1;
L1[P][j]=2*(k1*W[X[i+1]][j]-S2);
L2[P][j]=2*(-k1*W[X[i+1]][j]-S2);
L3[P][j]=2*(S1-2*W[X[i+1]][j]);
G1[P][j]=S3;
}
}
}
}
if (Dummy1<(n-1)*0.0002) D[X[1]][X[2]]=Dummy1;
free(L);
free(Y);
free(A);
free(DIST);
free(DIST1);
free(ARETE);
for (i=0;i<=2*n-3;i++)
{
free(G1[i]);
free(L1[i]);
free(L2[i]);
free(L3[i]);
}
free(G1);
free(L1);
free(L2);
free(L3);
return 1;
}
// the end !!!