#include "SHarmonics.h"
#include "TorstensFilters.h"
#include "vuVector.h"
#include "Transform.h"
#include "Image_io.h"
#include <math.h>
#include <string.h>
#include <stdio.h>

using namespace FVR_NS;

#define ANGLE_TOL 20.0/180.0 * M_PI
// the harmonic transform coefficients for
// diffuse BRDF with max function
t_data dc[]={3.14198,
	     2.09440,2.09440,2.09440,
	     0.78520,0.78520,0.78520,0.78520,0.78520};
bool downsample = false;
bool inRange(float v, float value, float error)
{
  return (v < value + error) && (v > value - error);
}
int indexOf(int x, int xMax, int y, int yMax, int z, int zMax)
{
  return 2 * (z * yMax * xMax + y * xMax + x);
}

void realTrans(t_data* volume, int& xMax, int& yMax, int& zMax, t_data* Yarray[10])
{
  /* Real version of the spherical harmonic transform
     (aka tesseract harmonic transform)
     The main differences between this an complex are:
     
     for Ylm m!= 0, multiply by sqrt(2)
     all coefficients are positive

     this implementation follows Ravi Rammamorhi's in
     "Efficient Representation of Radiance Maps" from
     SIGGRAPH 2001

     x,y,z = unit normal

     Note that since we use real values. If I had used the real
     version of the fftw software I could gain a factor of 2 in time
     and storage, but this works and was easiest to get started.

     ***
     put Yarray[0] the volume itself
     ***

  */
  int subSample = 2;
  if ((xMax > 128 || yMax > 128 || zMax > 128) && false )
    {
      printf("We subsample!\n");
      xMax /= subSample;
      yMax /= subSample;
      zMax /= subSample;
      downsample = true;
    }


  int L = zMax; int M = yMax; int N = xMax;
  t_data x,y,z,delf,phshift;
  int volumeSize = L * M * N * 2;
  Yarray[0] = new t_data[volumeSize];
  
  if(downsample)
    for(int k = 0; k < L; k++)
      for(int j = 0; j < M; j++)
	for(int i = 0; i < N; i++)
	  {
	    int destIdx = indexOf(i,N,j,M,k,L);
	    //
	    //int sourceIdx = indexOf(subSample * i, subSample * N,
	    //subSample * j, subSample * M,
	    //subSample * k, subSample * L);
	    //
	    t_data accum = 0;
	    for(int avgx = 0; avgx < subSample; avgx++)
	      for(int avgy = 0; avgy < subSample; avgy++)
		for(int avgz = 0; avgz < subSample; avgz++)
		  accum += volume[indexOf(subSample * i + avgx, subSample * N,
					  subSample * j + avgy, subSample * M,
					  subSample * k + avgz, subSample * L)];
	    Yarray[0][destIdx] = accum/(t_data)(subSample * subSample * subSample);
	    accum = 0;
	    for(int avgx = 0; avgx < subSample; avgx++)
	      for(int avgy = 0; avgy < subSample; avgy++)
		for(int avgz = 0; avgz < subSample; avgz++)
		  accum += volume[indexOf(subSample * i + avgx + 1, subSample * N,
					  subSample * j + avgy + 1, subSample * M,
					  subSample * k + avgz + 1, subSample * L)];
	    
	    Yarray[0][destIdx+1] = accum/(t_data)(subSample * subSample * subSample);
	  }
  
  else
    memcpy(Yarray[0], volume, volumeSize * sizeof(t_data));
  
  /**
   * Let's compute the exact gradient, using Fourier Transform.
   *
   * Let's transform the volume to frequency domain first.
   */
  initTransform3D(N, M, L);
  shift3D(volume, N, M, L);
  transform3D(volume);
  //shift3D(volume, N, M, L);
  destroyTransform3D();

  initTransform3D(N, M, L);
  vuVector* grads = computeGradient(volume);
  destroyTransform3D();
  // spitVec3d(grads, N*M*L, "/tmp/grad.exact");
  memcpy(volume, Yarray[0], volumeSize * sizeof(t_data));

  ////////////////////////////////////////////////////////////

  Yarray[1] = new t_data[volumeSize]; 
  memset(Yarray[1], 0, volumeSize * sizeof(t_data));
  Yarray[2] = new t_data[volumeSize]; 
  memset(Yarray[2], 0, volumeSize * sizeof(t_data));
  Yarray[3] = new t_data[volumeSize]; 
  memset(Yarray[3], 0, volumeSize * sizeof(t_data));
  Yarray[4] = new t_data[volumeSize]; 
  memset(Yarray[4], 0, volumeSize * sizeof(t_data));
  Yarray[5] = new t_data[volumeSize]; 
  memset(Yarray[5], 0, volumeSize * sizeof(t_data));
  Yarray[6] = new t_data[volumeSize]; 
  memset(Yarray[6], 0, volumeSize * sizeof(t_data));
  Yarray[7] = new t_data[volumeSize]; 
  memset(Yarray[7], 0, volumeSize * sizeof(t_data));
  Yarray[8] = new t_data[volumeSize]; 
  memset(Yarray[8], 0, volumeSize * sizeof(t_data));
  Yarray[9] = new t_data[volumeSize]; 
  memset(Yarray[9], 0, volumeSize * sizeof(t_data));
  //return;
  float minAll = 100.0;
  float maxAll = 100.0;
  float sumLen = 0.0;
  float minLen = 100.0;
  float maxLen = -100.0;
  dword numOfOvers = 0;
  float largestTheta = 0.0;
  for(int l=1;l<L-1;l++)
    for(int m=1;m<M-1;m++)
      for(int n=1;n<N-1;n++) {
	Yarray[1][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[2][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[3][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[4][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[5][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[6][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[7][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[8][indexOf(n,N,m,M,l,L)+1]=0;
	Yarray[9][indexOf(n,N,m,M,l,L)+1]=0;

	/*
	  x = (rinp[l+1][m][n]-rinp[l-1][m][n]);
	  y = (rinp[l][m+1][n]-rinp[l][m-1][n]);
	  z = (rinp[l][m][n+1]-rinp[l][m][n-1]);
	*/
	

	x = (Yarray[0][indexOf(n+1,N,m,M,l,L)] -
	     Yarray[0][indexOf(n-1,N,m,M,l,L)]);
	y = (Yarray[0][indexOf(n,N,m+1,M,l,L)] - 
	     Yarray[0][indexOf(n,N,m-1,M,l,L)]);
	z = (Yarray[0][indexOf(n,N,m,M,l+1,L)] - 
	     Yarray[0][indexOf(n,N,m,M,l-1,L)]);

	vuVector normal = grads[indexOf(n,N,m,M,l,L)/2];

	//normal = normal.MakeUnit();
	vuVector cd(x,y,z);
	//cd = cd.MakeUnit();
	
	//if(inRange(n,N/2.0, 2) && inRange(m,M/2.0, 2) && inRange(l,L/2.0, 2))
	float nl = normal.norm();
	float cdl = cd.norm();
	float theta = acos(normal.dot(cd) / (nl * cdl));
	if(minAll > nl)
	  minAll = nl;
	if(maxAll < nl)
	  maxAll = nl;

	if(theta > ANGLE_TOL || cdl == 0)
	  {
	    numOfOvers++;
	    float len = normal.norm();
	    if(maxLen < len)
	      maxLen = len;
	    if(minLen > len)
	      minLen = len;
	    sumLen += len;
	    normal = vuVector(0,0,0);
	  }

	//if(cd.norm() > 0.005 || normal.norm() > 0.005)
	if(largestTheta < theta)
	  largestTheta = theta;

	if(false && theta > ANGLE_TOL)
	  {
	    cout << "(" <<  n << ", " << m << ", " << l << ")";
	    cout << "Grad "; normal.print();
	    cout << " CD "; cd.print();
	    cout << endl << flush;
	  }
	//use the central differencing method.
	normal = cd;

	x = normal[0];
	y = normal[1];
	z = normal[2];

	if(x==0 && y==0 && z==0) {
	  Yarray[1][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[2][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[3][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[4][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[5][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[6][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[7][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[8][indexOf(n,N,m,M,l,L)]=0;
	  Yarray[9][indexOf(n,N,m,M,l,L)]=0;
	}
	else {
	  int tst=l+m+n; 
	  // shift 0 freq to center of array with phshift
	  phshift= tst % 2 == 0  ? 1:1;//-1;
	  phshift*=Yarray[0][indexOf(n,N,m,M,l,L)];

	  delf = x*x;
	  delf += y*y;
	  delf += z*z;
	  delf = sqrt(delf);
	  x/=delf;
	  y/=delf;
	  z/=delf;
	  

	  // the real spherical harmonics for l=0..2
	  // in terms of surface normal (x,y,z)
	  Yarray[1][indexOf(n,N,m,M,l,L)]=.282096*phshift;  
	  Yarray[2][indexOf(n,N,m,M,l,L)]=.488603*y*phshift;
	  Yarray[3][indexOf(n,N,m,M,l,L)]=.488603*z*phshift;
	  Yarray[4][indexOf(n,N,m,M,l,L)]=.488603*x*phshift; 
	  Yarray[5][indexOf(n,N,m,M,l,L)]=1.092548*x*y*phshift;
	  Yarray[6][indexOf(n,N,m,M,l,L)]=1.092548*z*y*phshift; 
	  Yarray[7][indexOf(n,N,m,M,l,L)]=.315392*(3*z*z-1)*phshift;
	  Yarray[8][indexOf(n,N,m,M,l,L)]=1.092548*z*x*phshift;     
	  Yarray[9][indexOf(n,N,m,M,l,L)]=.546274*(x*x-y*y)*phshift;
	}
      }

  cout << "Largest Difference between CD and Exact was " << largestTheta * 180.0 /M_PI << endl << flush;

  cout << numOfOvers << " vectors thrown away. Avg: " << sumLen/numOfOvers 
       << " min: " << minLen << " max: " << maxLen;
  cout << "Min Normal: " << minAll << "max Normal: " << maxAll << endl << flush;
  return;
}

void realLight(t_data sphy[], t_data lv[3])
{
 
  /* Real version of the spherical harmonic transform This is the same
     as all_realY except it does only one evaluation. I use it to
     compute the coefficients for a directional light, which is the
     harmonic evaluated for the light direction. The calculation is
     exactly the same as for all_realY, but the coefficients are
     calculated explicitly to show how they are obtained.
 
     x,y,z = unit normal
     sphy = array with light coefficients */
 
  t_data sqrt2=sqrt(2.0);
  
  t_data x=lv[0], y=lv[1], z=lv[2];
 
  sphy[0]=sqrt(1.0/4.0/M_PI);
  sphy[1]=y*sqrt(3.0/8.0/M_PI)*sqrt2;
  sphy[2]=sqrt(3.0/4.0/M_PI)*z;
  sphy[3]=x*sqrt(3.0/8.0/M_PI)*sqrt2;
  sphy[4]=2*x*y*sqrt(15.0/2.0/M_PI)/4.0*sqrt2;
  sphy[5]=z*y*sqrt(15.0/8.0/M_PI)*sqrt2;
  sphy[6]=sqrt(5.0/4.0/M_PI)*(pow(1.5*z,2)-.5);
  sphy[7]=z*x*sqrt(15.0/8.0/M_PI)*sqrt2;
  sphy[8]=(x*x-y*y)*sqrt(15.0/2.0/M_PI)/4.0*sqrt2;
  return;
}                            

/*
#define clip(a) (a<0 ? 0 : a) // remove neg values due to approx in image
void shade(t_data lv[3], t_data*& pic)
{
  t_data lc[9];
  realLight(lv, lc);
  if(pic == NULL)
    pic = new t_data[Max*Max];
  ofstream out;
  int l;
  int m;
  int n;
  int L = Max; int M = Max; int N = Max;
  //  t_data sf=1.0/sqrt((t_data)(L*M*N)); // zero working array
  for(l=0;l<L;l++)
    for(m=0;m<M;m++) {
      Image[l][m].re =0.0;
      Image[l][m].im =0.0;
    }     
		    
  //  Color col;
  n=(int)ceil((t_data)Max/2.0);
  //  for(int yl=0;yl<9;yl++) {
  //    t_data c=dc[yl]*lc[yl]; // combined BRDF and light coef.
    for(l=0;l<L;l++) // loop over fft slice at origin 
      for(m=0;m<M;m++) // plane perpendicular to xy axis
	{
	  //	  Image[l][m].re +=c*Yarray[yl][indexOf(n,N,m,M,l,L)].re;
	  //	  Image[l][m].im +=c*Yarray[yl][indexOf(n,N,m,M,l,L)].im;
	  //t_data* currSlice = slices[yl];
	  //Image[l][m].re += c * currSlice[l * M + m];
	  //Image[l][m].im += c * currSlice[l * M + m + 1];
	}
    //  }
    //fftwnd_one(rp2, &Image[0][0], NULL);
    fftwnd_one(rp2, (fftw_complex*)g_fSlice, NULL);
    for(l=0; l<L;l++)
      for(m=0;m<M;m++)
	{
	  t_data val = g_fSlice[2*(l*M+m)];
	  if(val > 1.0f) val = 1.0f;
	  if(val < 0.0f) val = 0.0f;
	  pic[l*M+m] = val;
	}
    return;
  
  t_data phaser; // undo the shift
  for(l=0;l<L;l++)
    for(m=0;m<M;m++) {
      phaser=(l+m) % 2 ==0 ? 1:-1;
      Image[l][m].re*=phaser;
      int val=(int)(clip(Image[l][m].re));
      //      col.set(val,val,val);
      //      pic->SetPixel(l,m,col);
      pic[l * M + m] = (t_data)val;
    }
  
  
  t_data maxval=-HUGE; 
  // should not need if I worked out the 
  // fft scaling correcly, I think.
  for(l=0;l<L;l++)  // scale dynamic range to max
    for(m=0;m<M;m++) {
      if(clip(Image[l][m].re)>maxval)
	maxval=clip(Image[l][m].re);
    }
  if(maxval>0)maxval=1.0/maxval*255;
  for(l=0;l<L;l++)
    for(m=0;m<M;m++) {
      int val=(int)(clip(Image[l][m].re)*maxval);
      //      col.set(val,val,val);
      //      pic->SetPixel(l,m,col);
      pic[l * M + m] = (t_data)val;///(t_data)maxval;
    }
}  

*/
