/*
	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License version 2 
	as published by the Free Software Foundation.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


	Copyright (C) 2006  Thierry Berger-Perrin <tbptbp@gmail.com>
*/
#define _SECURE_SCL_THROWS		0
#define _SECURE_SCL				0
//#define _HAS_ITERATOR_DEBUGGING	0

#include "kdlib.h"
#include "kdlib_internal.h"

#include "sys_clock.h"

//#include <vector>
#include <algorithm>	// sort
#include <functional>	// std::binary_function
#include <vector>

namespace kdlib {
	/*
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
												Sorting.
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	*/

	// a little helper to deal with the initial sorting.
	/*
	template<uint_t axis>  struct sorter_t: public std::binary_function<int,int,bool> {
		const clip_box_t * __restrict const boxen;
		//const clip_box_t &boxen;
		sorter_t(const clip_box_t * __restrict const p): boxen(p) {}

		FINLINE
		//NOINLINE
		int operator() (const uint_t a, const uint_t b) const  {
			const clip_box_t &cba(boxen[a/2]), &cbb(boxen[b/2]);
			#if defined(__MSVC__)
				const ptrdiff_t d(&cba.box.max[axis]-&cba.box.min[axis]);
				const float *pa = &cba.box.min[axis], *pb = &cbb.box.min[axis];
				pa += (a & 1) ? d : 0;
				pb += (b & 1) ? d : 0;

				return _mm_comilt_ss(_mm_load_ss(pa),_mm_load_ss(pb));
			#else
				const float 
					a_min = cba.box.min[axis],
					a_max = cba.box.max[axis],
					b_min = cbb.box.min[axis],
					b_max = cbb.box.max[axis];

				const float
					a_val = (a & 1) ? a_max : a_min,
					b_val = (b & 1) ? b_max : b_min;
				return (a_val < b_val);
			#endif
		}
	};
	*/
	template<uint_t axis>  struct sorter_t: public std::binary_function<int,int,bool> {
		const lbox_t * __restrict const lboxen;
		//const clip_box_t &boxen;
		sorter_t(const lbox_t * __restrict const p): lboxen(p) {}

		
		FINLINE
		//NOINLINE
		int operator() (const uint_t a, const uint_t b) const  {
			const uint_t
				idx_a = a/2, idx_b = b/2,
				side_a = a&1, side_b = b&1;
			const lbox_t
				&b1(lboxen[idx_a]), 
				&b2(lboxen[idx_b]);
			const float 
				&f1 = b1.dim[axis][side_a].pos,
				&f2 = b2.dim[axis][side_b].pos;

			// the right side must come before the left one.
			/*
			if (f1 == f2)
				return a < b;
			else
				return f1 < f2;
			*/

			//BREAKPOINT();
			const bool_t
				eq = f1 == f2,
				lt = f1 < f2,
				sw = a < b;

			return eq ? sw : lt;
		}
	};

	int compiler_t::injection(const kdlib::triangle_t * const __restrict soup, const int_t tcount) {
		sys::log("kdlib::compiler_t::injection: ");
		//sys::log("\tsizeof(triangle_t)=%d, sizeof(aabb_t)=%d, sizeof(lbox_t)=%d\n\tsizeof(node_t)=%d\n", sizeof(triangle_t),sizeof(aabb_t),sizeof(lbox_t),sizeof(node_t));

		memset(&state, 0, sizeof(state));
		lbox_t *lboxen = (lbox_t*)sys::mem::allocate(sizeof(lbox_t)*tcount);
		pools.initial_lboxen = lboxen;

		state.tris = soup;
		state.tcount = tcount;
		
		sys::log("\t%d triangles\n",tcount);
		//
		// compute bbox for all triangles (and scene).
		//
		{
			aabb_t scene_bbox(vec_t(cst::inf_plus, cst::inf_plus, cst::inf_plus), vec_t(cst::inf_minus, cst::inf_minus, cst::inf_minus));
			for (int_t i=0; i<tcount; ++i) {
				const aabb_t bbox(soup[i].get_aabb());
				scene_bbox.compose(bbox);

				//boxen[i].tri = (kdlib::triangle_t *) &soup[i]; // meh.
				//boxen[i].clipped = false;
				lboxen[i] = lbox_t(bbox, i);
			}
			state.scene_bbox = scene_bbox;
			sys::log("\tscene_bbox{ (%.1f,%.1f,%.1f), (%.1f,%.1f,%.1f) }\n",
				scene_bbox.pmin.x,scene_bbox.pmin.y,scene_bbox.pmin.y,
				scene_bbox.pmax.x,scene_bbox.pmax.y,scene_bbox.pmax.y);
		}

		//
		// now we need to sort it on each axis to get started.
		//
		const sys::laps_t laps;
		{
			const int num_indicies = tcount*2;
			list3d_t l3;	
			std::vector<int> indices;
			indices.resize(num_indicies);
			for (int_t axis=0; axis<3; ++axis) {
				// it's faster if we reseed indices each time even when not //
				for (int_t i=0; i<num_indicies; ++i) indices.at(i) = i;

				switch (axis) {
					case 0: std::sort(indices.begin(), indices.end(), sorter_t<0>(pools.initial_lboxen)); break;
					case 1: std::sort(indices.begin(), indices.end(), sorter_t<1>(pools.initial_lboxen)); break;
					case 2: std::sort(indices.begin(), indices.end(), sorter_t<2>(pools.initial_lboxen)); break;
				}
				// build 2 lists for current axis.
				const int i0 = indices[0]/2;
				sideness_t side(sideness_t(indices[0] & 1));
				lbox_t * __restrict p = &pools.initial_lboxen[i0];
				l3.heads[axis].set(p, side);
				for (int_t i=1; i<num_indicies; ++i) {
					const int ref = indices[i], idx = ref/2;
					const sideness_t q_side(sideness_t(ref & 1));

					lbox_t * __restrict const q = &pools.initial_lboxen[idx];
					p->dim[axis][side].next.set(q, q_side);
					p = q;
					side = q_side;
				}
				p->dim[axis][side].next.set(0, side_left);
			}
			state.l3 = l3;
		}
		const double dt(sys::laps_t::to_time(laps.elapsed()));
		sys::log("\t%.0f ms to sort.\n", dt);


		// we have bounding boxes, sorted boundaries... let's rock.		
		return -1;
	}


	int compiler_t::dispose() {
		// sys::log("kdlib::compiler_t::dispose:\n");
		pools.purge();
		return -1;
	}

}
