#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <lapacke.h>

// Sum of the absolute values:
double dasum_(const int *n, const double *X, const int *incx);

// DNRM2 returns the euclidean norm of a vector
double dnrm2_(const int *n, const double *X, const int *incx);

/*
   LAPACKE functions prototypes:
		! DGETRF computes an LU factorization of a general M-by-N matrix A
		! using partial pivoting with row interchanges.
		! The factorization has the form
		!    A = P * L * U
    lapack_int LAPACKE_dgetrf 	( 	int  	matrix_order,
		lapack_int  	m,
		lapack_int  	n,
		double *a,
		lapack_int  	lda,
		lapack_int *ipiv 
	) 	

		! DGECON estimates the reciprocal of the condition number of a general
		! real matrix A, in either the 1-norm or the infinity-norm, using
		! the LU factorization computed by DGETRF.
	lapack_int LAPACKE_dgecon 	( 	int  	matrix_order,
		char  	norm,
		lapack_int  	n,
		const double *a,
		lapack_int  	lda,
		double  	anorm,
		double *rcond 
	) 	
	
        ! DGETRS solves a system of linear equations
        ! A * X = B  or  A' * X = B
        ! with a general N-by-N matrix A using the LU factorization computed
        ! by DGETRF.
	lapack_int LAPACKE_dgetrs 	( 	int  	matrix_order,
		char  	trans,
		lapack_int  	n,
		lapack_int  	nrhs,
		const double *a,
		lapack_int  	lda,
		const lapack_int *ipiv,
		double * b,
		lapack_int ldb) 
				
		! DGBTRF computes an LU factorization of a real m-by-n band matrix A
		! using partial pivoting with row interchanges.
	lapack_int 	LAPACKE_dgbtrf (int matrix_order, lapack_int m, lapack_int n, 
		lapack_int kl, lapack_int ku, 
		double *ab, lapack_int ldab, 
		lapack_int *ipiv)
		
		! DGBTRS solves a system of linear equations
		! 	A * X = B  or  A' * X = B
		! with a general band matrix A using the LU factorization computed
		! by DGBTRF. 
	lapack_int 	LAPACKE_dgbtrs (int matrix_order, char trans, 
		lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, 
		const double *ab, lapack_int ldab, 
		const lapack_int *ipiv, 
		double *b, lapack_int ldb) 
*/

#define index2D(i,j,LD1) i + (j*LD1)
#define index3D(i,j,k,LD1,LD2) i + (j*LD1) + (k*LD1*LD2)


int MAXVAL(int s, double *a)
{
   double v;
   int e;

   v = a[0];

   for ( e = 0; e < s; e++ )
   {
       if ( v < a[e] ) v = a[e];
   
   }
   
   return(v);
}

double MatrixNorm(const char *Norm, const int *N, const double *A, 
	const int *LDA, int *INFO) 
{
   double *TV, anorm;
   int j, rc, len, one;
   
   *INFO = 0;
   anorm = 0.0;
   if ( Norm[0] == '1' ) {
	   TV = (double*) malloc(sizeof((double)1.0)*(*N));
       if ( TV == NULL ) return(-9);
	   one = 1;
	   for ( j = 0; j < *N; j++ ) {
	   	   TV[j] = dasum_(LDA, &A[index2D(1,j,(*LDA))],&one);
	   } 
	   anorm = MAXVAL((*N),TV);
	   free(TV);
	} else if ( Norm[0] == '2' ) {
	   len = (*LDA)*(*N);
	   one = 1;
	   anorm = dnrm2_(&len,A,&one);
	} else if ( Norm[0] == 'i' || Norm[0] == 'I' ) {
	  // Row wise
	   TV = (double*) malloc(sizeof((double)1.0)*(*LDA));
       if ( TV == NULL ) return(-9);
	   for ( j = 0; j < (*LDA); j++ ) {
		   TV[j] = dasum_(N, &A[index2D(j,1,(*LDA))], LDA);
	   } 
	   anorm = MAXVAL((*LDA),TV);
	   free(TV);
	} else {
    	*INFO = -1;
    }
	return(anorm);
}

double Solution(double x, double y)
{
      double f;
      
      f = (x*x*x+y*y*y)/(double)6.0;
      
      return(f);
}
   	   
int GridDef(double x0, double x1, double y0, double y1, int N, double *Pts)
{
   		
   		// Pts[(N-1)*(N-1),2] = inner grid point Coordinates
   		// Info < 0 if errors occur 
//
		double x, y, dx, dy;
		int np, Nm1, i, j;
		
		Nm1 = N-1;
   		dx = (x1-x0)/(double)N; dy = (y1-y0)/(double)N;
   		np = 0;
   		for ( i = 0; i < Nm1; i++ ) {
   		   for ( j = 0; j < Nm1; j++ ) {
        		 if ( np > (Nm1*Nm1-1) ) {
        		 	 fprintf(stderr," Error: NP = %d > N*N = %d\n",np,(Nm1*Nm1));
        		 	 return(-1);
        		 }
        		 x = x0 + dx * (double)(i);
        		 y = y0 + dy * (double)(j);
        		 Pts[index2D(np,0,(Nm1*Nm1))] = x;
        		 Pts[index2D(np,1,(Nm1*Nm1))] = y;
				 np++;
        	} 
        }
        return(0);
}
       
      
int EqsDef(double x0, double x1, double y0, double y1, int N, int LA,
	double *A, double *Rhs, double *Pts)
{
	/*
   		REAL(8), INTENT(IN) :: x0,x1,y0,y1  // Define grid
   		INTEGER, INTENT(IN) :: N, LA   // Extent of grid side and number of grid points
   		REAL(8), DIMENSION(LA,2), INTENT(IN) :: Pts   // inner grid point Coordinates
   		REAL(8), DIMENSION(LA), INTENT(OUT) :: Rhs   // Linear equation RHS
   		REAL(8), DIMENSION(LA,LA), INTENT(OUT) :: A   // Linear equation matrix
   		INTEGER, INTENT(OUT) :: Info	// Info < 0 if errors occur 
   		*/
//
		double x, y, dx, dy, Eps;
		int np, Nm1, pos;
//
//  Define A matrix and RHS
//
   Nm1 = N-1;
   dx = (x1-x0)/(double)N; dy = (y1-y0)/(double)N;

   for ( np = 0; np < LA; np ++ ) {
      x = Pts[index2D(np,0,LA)];
      y = Pts[index2D(np,1,LA)];
      if ( LA < 6 ) printf("x, y = %lf %lf\n",x,y);
      A[index2D(np,np,LA)] = -4.0;
      Rhs[np] = (x + y)*dx*dy;
      // define Eps function of grid dimensions 
      Eps = (dx+dy)/(double)20.0;
      // where is P(x-dx,y) ?
      if ( abs( (x-dx)-x0 ) < Eps ) {
      	   Rhs[np] = Rhs[np] - Solution(x0,y);
      } else {
      	   // Find pos = position of P(x-dx,y)
      	   pos = np - Nm1;
      	   if ( abs( Pts[index2D(pos,0,LA)] - (x-dx) ) > Eps ) {
      	   	   fprintf(stderr," Error x-dx: %lf",abs(Pts[index2D(pos,0,LA)]-(x-dx)));
      	   	   return(-1);
      	   }
      	   A[index2D(np,pos,LA)] = (double)1.0;
      }
      // where is P(x+dx,y) ? 
      if ( abs((x+dx)-x1) < Eps ) {
      	   Rhs[np] = Rhs[np] - Solution(x1,y);
      } else {
   	      // Find pos = position of P(x+dx,y)
      	   pos = np + Nm1;
      	   if ( abs(Pts[index2D(pos,0,LA)]-(x+dx)) > Eps ) {
      	   	   fprintf(stderr," Error x+dx: %lf\n",abs(Pts[index2D(pos,0,LA)]-(x+dx)));
      	   	   return(-1);
      	   }
      	   A[index2D(np,pos,LA)] = (double)1.0;
      }
      // where is P(x,y-dy) ? 
      if ( abs((y-dy)-y0) < Eps ) {
      	   Rhs[np] = Rhs[np] - Solution(x,y0);
      } else {
      	   // Find pos = position of P(x,y-dy)
           pos = np - 1;
      	   if ( abs(Pts[index2D(pos,1,LA)]-(y-dy)) > Eps ) {
      	   	   fprintf(stderr," Error y-dy: %lf\n",abs(Pts[index2D(pos,1,LA)]-(y-dy)));
      	   	   return(-1);
      	   }
      	   A[index2D(np,pos,LA)] = (double)1.0;
      }
      // where is P(x,y+dy) ? 
      if ( abs((y+dy)-y1) < Eps ) {
      	   Rhs[np] = Rhs[np] - Solution(x,y1);
      } else {
      	   // Find pos = position of P(x,y+dy)
           pos = np + 1;
      	   if ( abs(Pts[index2D(pos,1,LA)]-(y+dy)) > Eps ) {
      	   	   fprintf(stderr," Error y+dy: %lf\n",abs(Pts[index2D(pos,1,LA)]-(y+dy)));
      	   	   return(-1);
      	   }
      	   A[index2D(np,pos,LA)] = (double)1.0;
      }
      if ( LA <= 5 ) fprintf(stderr," Rhs(%d) = %lf\n",np,Rhs[np]);
   }
   return(0);
}
   
void CopyVector(int LA, double *BB, double *B)
{
	// Copies Vector B[LA] in vector BB[LA]
	int i;
	for ( i = 0; i < LA; i++ ) BB[i] = B[i];
	return;
}

void PrintMatrix(int LA, int N, double *A, char *msg)
{

      int i, j;

      if ( LA*N <= 25 ) {
         printf("%s\n",msg);
         for ( i = 0; i < LA; i++ ) {
            for ( j = 0; j < N; j++ ) {
               printf("%lf ",A[index2D(i,j,LA)]);
            }
            printf("\n");
         }
         printf("\n");
      }
      return;      
}

void Gen2Band(int LBM, double *BM, int LGM, double *GM, int N, int KL, int KU)
{
//
//  Copies data from a general dense format matrix to
//  a band format matrix with KL lower bands and KU upper bands
//
//   double GM[LGM,N] = input general matrix
//   double BM[LBM,N] = generated banded format
//
   int i, j, k;

   for ( i = 0; i < LBM*N; i++ ) BM[i] = (double)0.0;
//	Store values in band format:
   for ( i = 0; i < N; i++ ) {
   	   for ( j = 0; j < N; j++ ) {
		   	k = KL + KU - (i-j);
			if ( 0 <= k && k < LBM ) {
				BM[index2D(k,i,LBM)] = GM[index2D(j,i,LGM)];
			}
		}
	}	
 
   return;
}

  
void CheckSolution(int LA, double *B, double *Points, char *msg)
{
   // Check if B vector solution meets function definition
   double rmin = (double)9999.0, rmax = (double)-1.0;
   double x, y, vs, v;
   int np;
   
   for ( np = 0; np < LA; np++ ) {
      x = Points[index2D(np,0,LA)];
      y = Points[index2D(np,1,LA)];
      vs = Solution(x,y);
      v = abs(B[np] - vs);
      if ( rmin > v ) rmin = v;
      if ( rmax < v ) rmax = v;
   }
   printf(" %s: rmin = %lf, rmax = %lf\n",msg, rmin, rmax);
   return;
}


int main(  int argc, char *argv[] )
{ 
   //
   // Let's consider the 2D-square 1 < X < 2, 1 < Y < 2
   // Suppose we divide the square in N-1 x N-1 portions 
   // Given a function F(x,y) suppose we know that in every point:
   //   d2F(x,x)/dx2 + d2F(x,y)/dy2 = x+y
   // Let's define a system of equation so that we can compute the value
   // of F(x,y) in every point of the 2D-square
   //
   double *A, *BA, *Points;
   double *S, *B, *BB, *work;
   int *Ipiv, *iwork;
   double x, y, xl, xr, yl, yu, dx, dy, Eps;
   double anorm, rcond, rmin, rmax, v, vs;
//  Define 2D-square:
   const double x0 = 1.0, y0 = 1.0, x1 = 2.0, y1 = 2.0;
   char WhichNorm[2], tr[2];
   int N, Np1, Nm1, LA, np, pos;
   int KL, KU, LBA;
   int i, j, rc, info, nr;
   
   printf(" How many intervals?\n");
   fscanf(stdin,"%d",&N);
   if ( N < 0 || N > 1000 ) N = 1000;
   printf(" Compute with %d intervals\n",N);
   
   Np1 = N+1;  // Grid points per side
   Nm1 = N-1;  // Grid points minus boundary
   LA = Nm1*Nm1; // unknown points
        
   A = malloc( sizeof((double)1.0)*LA*LA);
   if ( A == NULL ) {
   	  fprintf(stderr,"Error allocating A matrix\n");
      return(-1);
   }
   
   Points = malloc( sizeof((double)1.0)*LA*2);
   if ( Points == NULL ) {
   	  fprintf(stderr,"Error allocating Points matrix\n");
      return(-1);
   }

   Ipiv = malloc( sizeof((int)1)*LA);
   if ( Ipiv == NULL ) {
   	  fprintf(stderr,"Error allocating Ipiv workspace\n");
      return(-1);
   }

   B = malloc( sizeof((double)1.0)*LA);
   if ( B == NULL ) {
   	  fprintf(stderr,"Error allocating B RHS vector\n");
      return(-1);
   }

   BB = malloc( sizeof((double)1.0)*LA);
   if ( BB == NULL ) {
   	  fprintf(stderr,"Error allocating BB RHS vector\n");
      return(-1);
   }

   rc = GridDef(x0,x1,y0,y1,N,Points);

   rc = EqsDef(x0,x1,y0,y1,N,LA,A,B,Points);

   PrintMatrix(LA,LA,A," A = ");
   
   // =============   Band matrix  ===================  
   KL = Nm1; KU = Nm1; LBA = 2*KL+KU+1; 
        
   BA = malloc( sizeof((double)1.0)*LBA*LA);
   if ( BA == NULL ) {
   	  fprintf(stderr,"Error allocating BA matrix\n");
      return(-1);
   }
   
   Gen2Band(LBA,BA,LA,A,LA,KL,KU);

   PrintMatrix(LBA,LA,BA," BA = ");
      
   // =============   Band matrix  ===================  
   CopyVector(LA,BB,B);   // RHS for band solution
   printf("Computing matrix norm...\n");
   strcpy(WhichNorm,"1");
   anorm = MatrixNorm(WhichNorm,&LA,A,&LA,&rc);
   if ( rc != 0 ) {
   	  fprintf(stderr," After MatrixNorm('1') INFO = %d\n",rc);
   }

   printf("Computing matrix factorization...\n");   
   for ( i = 0; i < LA; i++ ) Ipiv[i] = 0;   
   // call LAPACKE_dgetrf to compute matrix factorization
		.	.	.
   if ( info != 0 ) {
   	  fprintf(stderr," After DGETRF INFO = %d\n",info);
   }
   PrintMatrix(LA,LA,A," FactorizedA = ");

   printf("Computing Condition number:\n");  
   strcpy(WhichNorm,"1");
   // call LAPACKE_dgecon to compute condition number rcond using anorm and
   // 	matrix order LAPACK_COL_MAJOR
		.	.	.
   if ( info != 0 ) {
   	  fprintf(stderr," After DGECON INFO = ",info);
   }  
   printf(" anorm, rcond = %lf %lf\n",anorm,rcond);

   
   printf("Computing the solution...\n"); 
    	
    strcpy(tr,"N");
    nr = 1;
    // call LAPACKE_dgetrs to compute solution with matrix order LAPACK_COL_MAJOR
		.	.	.
   if ( info != 0 ) {
   	  fprintf(stderr," After DGETRS INFO = \n",rc);
   }
   
   PrintMatrix(1,LA,B," Solution = ");
   
   CheckSolution(LA,B,Points,"Solution checking for general matrix");
 
   
   // =============   Band matrix  ===================  
   printf("Computing band matrix factorization...\n");   
   for ( i = 0; i < LA; i++ ) Ipiv[i] = 0;   
   // call LAPACKE_dgbtrf to compute band matrix factorization with 
   // matrix order LAPACK_COL_MAJOR
		.	.	.
   if ( info != 0 ) {
   	  fprintf(stderr," After DGBTRF INFO = %d\n",info);
   }
   PrintMatrix(LA,LA,BA," Factorized BA = ");

   printf("Computing band solution...\n"); 
    	
    strcpy(tr,"N");
    nr = 1;
    // call LAPACKE_dgbtrs to compute band matrix solution with 
    // 	matrix order LAPACK_COL_MAJOR
		.	.	.
   if ( info != 0 ) {
   	  fprintf(stderr," After DGBTRS INFO = \n",rc);
   } else {
      PrintMatrix(1,LA,BB," Solution = ");
   }    
   
   CheckSolution(LA,BB,Points,"Solution checking for band matrix");
   // =============   Band matrix  ===================
   
   return;
}