/*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License version 2
 *   as published by the Free Software Foundation.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *   Copyright (C) 2007  Benjamin Segovia <bsegovia@liris.cnrs.fr>
 */

#include "specifics.h"

#include "rt_bvh.h"
#include "bvhlib_internal.h"

#include <functional>
#include <algorithm>
#include <float.h> // FLT_MAX

namespace bvhlib {
    /* First and last index of the currently processed node.
     * Id of the node (where we have to put the node)
     * Its Bounding Box
     */
    struct stack_data_t {
        int first, last;
        uint32_t id;
        aabb_t aabb;
        stack_data_t() {}
        stack_data_t(int f, int l, uint32_t index, const aabb_t &paabb)
            : first(f), last(l), id(index), aabb(paabb) {}
        FINLINE bool_t is_leaf() {
            return first == last;
        }
    };

    /* Grows up the bounding boxen to avoid precision problems */
    static const float aabb_eps = 1e-6f;

    /* The stack of nodes to process */
    struct stack_t {
        enum { max_size = 64 };
        stack_t() { n = 0; }
        FINLINE void push(int a, int b, uint32_t id, const aabb_t &aabb) {
            data[n++] = stack_data_t(a, b, id, aabb);
        }
        FINLINE stack_data_t pop() {
            return data[--n];
        }
        FINLINE bool_t is_not_empty() {
            return n != 0;
        }
        private:
            int n;
            stack_data_t data[max_size];
    };

    /* A partition of the current given sub-array */
    struct partition_t {
        aabb_t aabbs[2];
        float cost;
        uint32_t axis;
        int first[2], last[2];
        partition_t() {}
        partition_t(int f, int l, uint32_t d) :
            cost(FLT_MAX), axis(d) {
            aabbs[on_left] = aabbs[on_right] = aabb_t(FLT_MAX, -FLT_MAX);
            first[on_right] = first[on_left] = f;
            last[on_right] = last[on_left] = l;
        }
    };

    /* Sweep the bounding boxen from left to right */
    template <uint32_t axis>
    static FINLINE
    partition_t do_sweep(compiler_t &c, const int first, const int last) {

        /* We return the best partition */
        partition_t part(first, last, axis);

        /* Compute the inclusion sequence (from right to left) of the bounding
         * boxen of the scene)
         */
        c.rl_aabbs[c.ids[axis][last]] = c.aabbs[c.ids[axis][last]];
        for(int j  = last - 1; j >= first; --j) {
            c.rl_aabbs[c.ids[axis][j]] = c.aabbs[c.ids[axis][j]];
            c.rl_aabbs[c.ids[axis][j]].compose(c.rl_aabbs[c.ids[axis][j + 1]]);
        }

        /* Now, sweep from left to right and find the best partition */
        aabb_t aabb(FLT_MAX, -FLT_MAX);
        const float tri_n = (float) (last - first) + 1.f;
        float n = 1.f;
        part.cost = FLT_MAX;
        for(int j = first; j < last; ++j) {
            aabb.compose(c.aabbs[c.ids[axis][j]]);
            const float cost = aabb.half_surface_area() * n
                + c.rl_aabbs[c.ids[axis][j + 1]].half_surface_area() * (tri_n - n);
            n += 1.f;
            if(cost > part.cost)
                continue;
            part.cost = cost;
            part.last[on_left] = j;
            part.first[on_right] = j + 1;
            part.aabbs[on_left] = aabb;
            part.aabbs[on_right] = c.rl_aabbs[c.ids[axis][j + 1]];
        }
        return part;
    }

    /* Register a node */
    static FINLINE void do_make_node(
        compiler_t &compiler,
        const stack_data_t &data,
        const uint32_t axis) {
        compiler.root[data.id].d = axis;
        compiler.root[data.id].aabb = data.aabb;
        compiler.root[data.id].offset_flag = (compiler.curr_id + 1);
    }

    /* Register a leaf */
    static FINLINE void do_make_leaf(
        compiler_t &compiler,
        const stack_data_t &data) {
        compiler.root[data.id].aabb = data.aabb;
        compiler.root[data.id].offset_flag = 0;
        compiler.root[data.id].tri_id = compiler.ids[0][data.first];
    }

    /* Grow the bounding boxen with an epsilon */
    static FINLINE void do_grow_aabbs(compiler_t &compiler) {
        const int aabb_n = 2 * compiler.n - 1;
        const vec_t eps_vec(aabb_eps, aabb_eps, aabb_eps);
        for(int i = 0; i < aabb_n; ++i) {
            compiler.root[i].aabb.pmin = compiler.root[i].aabb.pmin - eps_vec;
            compiler.root[i].aabb.pmax = compiler.root[i].aabb.pmax + eps_vec;
        }
    }

    /* Directly compiles and *writes* the BVH into descriptor. descriptor has
     * been previously allocated, we only have to write the ouput into it. We
     * use same SAH structure than kd-lib but most of the fields are right now
     * unecessary (since we put one triangle per leaf only).
     * Strategy:
     *  1/ loop invariant: the sorted centroids which are in the current node.
     *  2/ For *each* axis: we compute the inclusive bounding boxen from right
     *  to left.
     *  3/ We find the best partition with the greedy SAH heuristics and the two
     *  bounding box arrays, create the two children and compute their
     *  sorted centroid arrays (in O(n) complexity since we do not have to sort
     *  the array anymore)
     *  4/ We stop as soon there is only one triangle in the node (arbitrary may
     *  be improved to make smaller structures --> see Reshetov article for
     *  example)
     */
    //int compiler_t::compile(bvh::descriptor_t &descriptor) {
    int compiler_t::compile() {
    
        /* The stack to store where we have to fetch the indices */
        stack_t stack;
        /* The node to process */
        stack_data_t node;

        stack.push(0, n - 1, 0, scene_aabb);
        while(stack.is_not_empty()) {
            node = stack.pop();
            while(!node.is_leaf()) {

                /* Find the best partition for this node */
                partition_t best = do_sweep<0>(*this, node.first, node.last);
                partition_t part = do_sweep<1>(*this, node.first, node.last);
                if(part.cost < best.cost) best = part;
                part = do_sweep<2>(*this, node.last, node.last);
                if(part.cost < best.cost) best = part;

                /* Register this node */
                do_make_node(*this, node, best.axis);

                /* First, store the positions of the primitives */
                const int d = best.axis;
                for(int j = best.first[on_left]; j <= best.last[on_left]; ++j)
                    pos[ids[d][j]] = on_left;
                for(int j = best.first[on_right]; j <= best.last[on_right]; ++j)
                    pos[ids[d][j]] = on_right;

                /* Then, for each axis, reorder the indices for the next step */
                int left_n, right_n;
                for(int i = 0; i < other_axis_n; ++i) {
                    const int d0 = remap_other_axis[best.axis + i];
                    left_n = 0, right_n = 0;
                    for(int j = node.first; j <= node.last; ++j)
                        if(pos[ids[d0][j]] == on_left)
                            ids[d0][node.first + left_n++] = ids[d0][j];
                        else
                            tmp_ids[right_n++] = ids[d0][j];
                    for(int j = node.first + left_n; j <= node.last; ++j)
                        ids[d0][j] = tmp_ids[j - left_n - node.first];
                }

                /* Now, prepare the stack data for the next step */
                const int p0 = right_n > left_n ? on_left : on_right;
                const int p1 = right_n > left_n ? on_right : on_left;
                stack.push(best.first[p1], best.last[p1],
                    curr_id + p1 + 1, best.aabbs[p1]);
                node.first = best.first[p0];
                node.last = best.last[p0];
                node.aabb = best.aabbs[p0];
                node.id = curr_id + p0 + 1;
                curr_id += 2;
            }

            /* Register this leaf */
            do_make_leaf(*this, node);
        }

        do_grow_aabbs(*this); 
        return 0;
    }
}
