/*
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License version 2 
    as published by the Free Software Foundation.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


    Copyright (C) 2006  Thierry Berger-Perrin <tbptbp@gmail.com>
*/
#include "specifics.h"

#include "sys_cpu.h"
#include "sys_mem.h"
#include "sys_map.h"
#include "sys_clock.h"
#include "sys_log.h"

#include "math_constants.h"
#include "math_triangle.h"

#include "ressource.h"

#include "ogl.h"
#include "driver.h"

#include "kdlib.h"
#include "bvhlib.h"

#include "horde.h"

#include "rt.h"
#include "rt_render.h"

#include "cuda.h"
#include "cuda_runtime_api.h"

#include <cutil.h>
#include <cutil_gl_error.h>

// !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~

#if defined __ICC__
    // This is a kludge for Intel shaddy monopolistic practices.
    extern "C" int __intel_cpu_indicator;
    static struct fuck_off_t { fuck_off_t() { __intel_cpu_indicator = -1; } } fuck_off;
#endif

enum {
    do_anim = 0
};


// a blob
struct application_t {
    ui::window_t    *window;
    ogl::context_t    context;

    enum render_mode_t { mode_mono = 0, mode_packet = 1, mode_last };
    struct state_t {
        float base_speed;
        render_mode_t mode;
        int pbo_mode;
    } state;

    void process_input(rt::renderer_t &renderer) {
        if (window) {
            rt::camera_t &cam(renderer.rt.camera);
            ui::inputs_t &inputs(window->inputs);
            {
                sys::lock_t lock(inputs.mutex);

                // immediates
                const float
                    damp_fov = 1/2.f,
                    damp_initial = state.base_speed *1/128.f, damp_factor = 8.f,
                    damp = damp_initial * (inputs.modifiers.shift ? 1/damp_factor : 1.f) * (inputs.modifiers.control ? damp_factor : 1.f),
                    dx = damp*inputs.axis[0], dy = damp*inputs.axis[1], dz = damp*inputs.axis[2];

                const vec_t eye(cam.get_eye() + cam.get_dir()*dx + cam.get_right()*dy + cam.get_up()*dz);
                cam.set_eye(eye);

                cam.set_fovx(cam.get_fovx() + inputs.axis[3]*damp_fov);

                // buffered
                const int num_events = int(inputs.queue.size());
                for (int i=0; i<num_events; ++i) {
                    const ui::inputs_t::event_t &e(inputs.queue[i]);

                    if (e.click.type == ui::inputs_t::type_click) {    // mouse
                        const point_t pt(e.click.x, e.click.y);
                        cam.look_at(eye + cam.sampler.map(pt));
                    }
                    else {                                            // kbd
                        // sys::log("queued key: '%c'\n", e.key.key);
                        switch (e.key.key) {
                            case 'd':
                                {
                                    const vec_t &v(renderer.rt.camera.get_eye());
                                    sys::log("\ncamera: eye(%f,%f,%f) fov %.2f\n\n", v.x, v.y, v.z, renderer.rt.camera.get_fovx());
                                }
                                break;

                            case 'a':
                                {
                                    // switches to the next up axis, not really useful as implemented.
                                    const int idx = (1+renderer.rt.camera.get_world_up_index()) % 3;
                                    renderer.rt.camera.look_at(renderer.rt.camera.get_eye() + renderer.rt.camera.get_dir(), idx);
                                }
                                break;

                            case 'c':
                                {
                                    const int mode = (1+int(state.mode)) & 1;
                                    (render_mode_t&) state.mode = render_mode_t(mode);
                                    sys::log("rendering mode switched to %d.\n", mode);
                                }
                                break;

                            case 'p':
                                {
                                    if (state.pbo_mode != 0) renderer.framebuffer.allocate();
                                    state.pbo_mode = (state.pbo_mode+1) & 1;
                                    sys::log("pbo transfer mode is now '%s'\n", state.pbo_mode ? "map" : "memcpy");
                                }
                                break;

                            case 'q': bail("keyboard request", 0); break;

                            default:
                                sys::log("nothing binded to '%c'.\n", e.key.key);
                                break;
                        }
                    }
                }
                inputs.queue.clear();
            }
        }
    }


    void bootstrap(const char *display_name, const point_t &res) {
        window = new ui::window_t(display_name, res);
        if (window->valid) {
            ogl::bootstrap(*window, context);
        }
        else
            //FIXME: shouldn't be fatal
            fatal("couldn't open a window.");
    }
};



static bool_t compute_tri_acc(const triangle_t &t, rt::wald_tri_t &w, const uint_t id, const uint_t matid) {
    // let's go.
    const vec_t
        &A(t.verts[0]), &B(t.verts[1]), &C(t.verts[2]),
        b(C - A), c(B - A), N(b.cross(c));

    // greatest normal dimension.
    uint_t k = 0;
    for (uint_t i=1; i<3; ++i)
        k = fabsf(N[i]) > fabsf(N[k]) ? i : k;

    const uint_t u = (k+1)%3, v = (k+2)%3;
    const float
        denom = (b[u]*c[v] - b[v]*c[u]),
        krec = N[k];

    // first potential NaN
    //if ((krec == 0.) | (denom == 0.)) fatal("compute_wald_triangle_acceleration: degenerated triangle, null N[k] or denom, NaNs; not going to happen.\n");

    const float
        nu = N[u] / krec, nv = N[v] / krec, nd = N.dot(A) / krec,
        // first line eq.
        bnu =  b[u] / denom, bnv = -b[v] / denom,
        // second line eq.
        cnu =  c[v] / denom, cnv = -c[u] / denom;

    w.k            = k;
    w.n_u        = float(nu);
    w.n_v        = float(nv);
    w.n_d        = float(nd);

    w.vert_ku    = float(A[u]);
    w.vert_kv    = float(A[v]);
    w.b_nu        = float(bnu);
    w.b_nv        = float(bnv);

    w.c_nu        = float(cnu);
    w.c_nv        = float(cnv);

    w.id        = id;
    w.matid        = matid;

    return (krec == 0.) | (denom == 0.);
}

static NOINLINE void bake_intersection(const triangle_t * __restrict const tri, const uint_t num_tri, rt::wald_tri_t * __restrict acc) {
    int degenerated = 0;
    for (uint_t tid=0; tid < num_tri; ++tid)
        degenerated += compute_tri_acc(tri[tid], acc[tid], tid, 0);

    sys::log("bake_intersection: %d triangles, %d degenerated.\n", num_tri, degenerated);
}

static NOINLINE int scene_compile(rt::raytracer_t &raytracer, const char *filename) {
    sys::log("loading %s...\n", filename);
    sys::map_t m;
    m.open(filename);

    if (!m.is_mapped())
        fatal("failed to mmap scene data.");

    const triangle_t * __restrict const soup = (const triangle_t * __restrict const)m.begin();
    const uint_t num_tri = m.get_size<triangle_t>();
    kdlib_compile(soup, num_tri, raytracer.scene.kdtree, raytracer.scene.bounding_box);
    raytracer.scene.kdtree.acc = (rt::wald_tri_t *)sys::mem::allocate(num_tri*sizeof(rt::wald_tri_t));
    bake_intersection(soup, num_tri, (rt::wald_tri_t *)raytracer.scene.kdtree.acc);

    bvhlib_compile(soup, num_tri, raytracer.scene.bvhtree);
    raytracer.scene.bounding_box = raytracer.scene.bvhtree.root->aabb;

    return num_tri;
}

static NOINLINE int scene_compile2(const char *filename) {
    sys::log("loading %s...\n", filename);
    sys::map_t m;
    m.open(filename);

    if (!m.is_mapped())
        fatal("failed to mmap scene data.");

    const triangle_t * __restrict const soup = (const triangle_t * __restrict const)m.begin();
    const uint_t num_tri = m.get_size<triangle_t>();
    kdtree::descriptor_t kd;
    aabb_t box;
    kdlib_compile(soup, num_tri, kd, box);
    rt::wald_tri_t * acc = (rt::wald_tri_t *)sys::mem::allocate(num_tri*sizeof(rt::wald_tri_t));
    bake_intersection(soup, num_tri, (rt::wald_tri_t *)acc);

    return num_tri;
}

static NOINLINE int scene_load(const bool_t old_tree, rt::raytracer_t &rt, const char *basename) {
    // first load the original triangle data to compute a bounding box. meh
    uint_t num_tri = 0;
    //{
        sys::map_t m;
        m.open(sys::format_t("%s.tri", basename).buf);
        const triangle_t * __restrict const soup = (const triangle_t * __restrict const)m.begin();
        num_tri = m.get_size<triangle_t>();

        aabb_t box(
            vec_t(cst::section.plus_inf.f0, cst::section.plus_inf.f0, cst::section.plus_inf.f0),
            vec_t(cst::section.minus_inf.f0, cst::section.minus_inf.f0, cst::section.minus_inf.f0));
        for (uint_t i=0; i<num_tri; ++i) {
                const aabb_t bbox(soup[i].get_aabb());
                box.compose(bbox);
        }
        rt.scene.bounding_box = box;
    //}

    {
        // now mmap kdtree, ids & wald
        sys::map_t
            *kd = new sys::map_t(),
            *ids = new sys::map_t(),
            *wald = new sys::map_t();

        kd->open(sys::format_t(old_tree ? "%s.old.kd" : "%s.kdlib.kd", basename).buf);
        ids->open(sys::format_t(old_tree ? "%s.old.ids" : "%s.kdlib.ids", basename).buf);
        wald->open(sys::format_t("%s.wald", basename).buf);

        if (kd->is_mapped() & ids->is_mapped()) {
            rt.scene.kdtree.root = (const kdtree::node_t * __restrict) kd->begin();
            rt.scene.kdtree.ids = (const kdtree::tri_id_t * __restrict) ids->begin();
            if (wald->is_mapped())
                rt.scene.kdtree.acc = (const rt::wald_tri_t * __restrict) wald->begin();
            else {
                sys::log("baking missing intersection data.\n");
                rt.scene.kdtree.acc = (rt::wald_tri_t *)sys::mem::allocate(num_tri*sizeof(rt::wald_tri_t));
                bake_intersection(soup, num_tri, const_cast<rt::wald_tri_t * __restrict>(rt.scene.kdtree.acc));
            }
        }
        else
            fatal("no mmapage for you.");
    }


    return num_tri;
}

static void camera_reset(application_t &app, rt::raytracer_t &rt, rt::framebuffer_t &framebuffer) {
    const float scale = 0.4f;
    const vec_t
        extent(rt.scene.bounding_box.get_extent()),
        center(rt.scene.bounding_box.pmin + extent*0.5f),

        eye(center + vec_t(0,0,extent.z*scale));

    rt.camera.set_eye(eye);
    rt.camera.look_at(center, 0);
    rt.camera.set_fovx(90.f);
    rt.camera.update(framebuffer);

    app.state.base_speed = extent.get_max() / 2;
}

static void checkboard(rt::framebuffer_t &buffer) {
    const point_t res(buffer.get_resolution());
    for (int y=0; y<res.y; ++y) {
        for (int x=0; x<res.x; ++x) {
            const float c = ((x*y)%8) == 0 ? 1.f : 0.f;
            buffer.plot(x,y, rt::pixel_t(c,c,c,0));
        }
    }
}

static void checkboard2(rt::framebuffer_t &buffer, const float f) {
    const point_t res(buffer.get_resolution());
    for (int y=0; y<res.y; ++y) {
        for (int x=0; x<res.x; ++x) {
            const float c = ((x*y)%8) == 0 ? 1.f : 0.f;
            buffer.plot(x,y, rt::pixel_t(f,f,c,0));
        }
    }
}


// http://www.nvidia.com/dev_content/nvopenglspecs/GL_ARB_pixel_buffer_object.txt
#if 1
int main(int argc, char *argv[]) {
    //point_t res(1024,1024);
    point_t res(1024,512);
    const char
        *file_prefix = 0,
        *file_scene = 0,
        *display_name = 0;
    scene_compile2("D:\\Data\\scene6.ra2");
    int mode_load = 0;
    {
        for (int i=1; i<argc; ++i) {
            if (argv[i][0] == '-') {
                switch (argv[i][1]) {
                    case '-':
                        break;

                    case 'd':
                        if (std::strcmp(argv[i], "-display") == 0) {
                            if (++i >= argc) sys::log("*** display name expected for -display.\n");
                            else display_name = argv[i];
                        }
                        break;
                    case 'p':
                        if (++i >= argc) sys::log("*** path expected for -p.\n");
                        else file_prefix = argv[i];
                        break;
                    case 'l':
                        if (++i >= argc) sys::log("*** integer expected for -l.\n");
                        else mode_load = std::atoi(argv[i]);
                        break;
                    case 'h':
                        if (++i >= argc) sys::log("*** integer expected for -h.\n");
                        else res.y = std::atoi(argv[i]);
                        break;
                    case 'w':
                        if (++i >= argc) sys::log("*** integer expected for -w.\n");
                        else res.x = std::atoi(argv[i]);
                        break;
                }
            }
            else {
                if (file_scene) sys::log("warning: scene file overloading.\n");
                file_scene = argv[i];
            }
                //sys::log("*** unexpected argument starting with '%s'\n", argv[i]);
        }
    }

    #ifdef WINDOWS
    //file_prefix = file_prefix ? file_prefix : "./";
        file_prefix = file_prefix ? file_prefix : "d:/data/";
    #else
        file_prefix = file_prefix ? file_prefix : "./";
    #endif
    file_scene = file_scene ? file_scene : "scene6.ra2";


    // bootstrap
    sys::log("VERSION: %s\n", COMPILER_PRETTY_STAMP);
    sys::cpu::bootstrap();
    sys::clock::bootstrap();

    application_t app;
    app.bootstrap(display_name, res);
    app.state.mode = application_t::mode_mono;

    horde::bootstrap();


    //
    // fire in the hole!
    //
    rt::renderer_t renderer;
    renderer.open(res);

    checkboard(renderer.framebuffer);


    ogl::unit_quad_t uq;
    uq.generate();


    /*
        ogl::texture_t tex("framebuffer", GL_RGBA32F_ARB, res);
        tex.generate(GL_RGBA, GL_FLOAT, GL_NEAREST, GL_NEAREST, renderer.framebuffer.get());
    */
    app.state.pbo_mode = 0;
    ogl::stream_texture_t tex("framebuffer", GL_RGBA32F_ARB, res);
    tex.generate(GL_RGBA, GL_FLOAT, GL_NEAREST, GL_NEAREST);

    glClearColor(0,0,0, 0);

    const int num_tri =
        mode_load ?
            scene_load(mode_load == 1 ? true :  false, renderer.rt, sys::format_t("%s%s", file_prefix, file_scene).buf) :
            scene_compile(renderer.rt, sys::format_t("%s%s", file_prefix, file_scene).buf);

    camera_reset(app, renderer.rt, renderer.framebuffer);


    app.window->set_title(sys::format_t("[%s] radius @%dx%d | %.3f Ktri", COMPILER_PRETTY_STAMP, res.x, res.y, double(num_tri)/1000.).buf);
    sys::log("hopla!\n");

    // "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
    uint_t frame = 0;
    double dt[3] = { 0,0,0 };
    while (true) {    // we'll get killed.
        const sys::laps_t
            laps_total,
            laps_trace;

        //rt::mono::render(raytracer);
        //rt::mono::render_tiled(raytracer);
        //rt::render_horde(renderer, true);
        if (app.state.pbo_mode == 0) {
            rt::render_horde(renderer, app.state.mode == application_t::mode_mono ? true : false);
            dt[0] = sys::laps_t::to_time(laps_trace.elapsed());

            const sys::laps_t laps_upload;
            tex.upload(renderer.framebuffer.get());
            dt[1] = sys::laps_t::to_time(laps_upload.elapsed());
        }
        else {
            renderer.framebuffer.set(tex.map());
            rt::render_horde(renderer, app.state.mode == application_t::mode_mono ? true : false);
            dt[0] = sys::laps_t::to_time(laps_trace.elapsed());

            tex.transfer();
            dt[1] = 0; // not measurable this way
        }

        if (1)
        {
            glViewport(0, 0, res.x, res.y);
            glMatrixMode(GL_PROJECTION);
            glLoadIdentity();
            gluOrtho2D(-1, 1, -1, 1);
            glMatrixMode(GL_MODELVIEW);
            glLoadIdentity();

            uq.draw();
        }

        if (1)
        {
            ogl::hud::prologue();
            const double
                dt_raytrace = dt[0],
                area = double(res.x*res.y),
                mray = (area/(1000.*1000))/(dt_raytrace/1000.),
                fps = 1000. / dt_raytrace,

                dt_upload = dt[1],
                uploaded = sizeof(float)*4*(area/(1024.*1024.*1024.)) / (dt_upload/1000.),

                dt_total = dt[2],
                fps_total = 1000. / dt_total;

            //ogl::hud::set_status(sys::format_t(" [%c] r: %5.1f ms %6.3f Mray/s %5.1f fps, u: %4.1f ms %5.3f G/s :: %5.1f fps",
            sys::format_t s(" [%c] r: %5.1f ms %7.3f Mray/s %5.1f fps, u: %4.1f ms %5.3f G/s :: %5.1f fps",
                app.state.mode == application_t::mode_mono ? 'M' : 'P',
                dt_raytrace, mray, fps, 
                dt_upload, uploaded,
                fps_total );
            ogl::hud::set_status(s.buf);
            //sys::log("%s\r", s.buf);

            ogl::hud::epilogue();
        }

        app.context.flip(*app.window);

        app.process_input(renderer);
        renderer.rt.camera.update(renderer.framebuffer);
        ++frame;

        dt[2] = sys::laps_t::to_time(laps_total.elapsed());
    }

    // we don't synchronize the exit, we just nuke it all.
    // and it's not done here. move along.
    return 0;
}

#endif
