/*
 * render2x2ntsc.c - 2x2 NTSC renderers
 *
 * Written by
 *  groepaz <groepaz@gmx.net> based on the pal renderers written by
 *  John Selck <graham@cruise.de>
 *
 * This file is part of VICE, the Versatile Commodore Emulator.
 * See README for copyright notice.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 *  02111-1307  USA.
 *
 */

#include "vice.h"

#include <stdio.h>

#include "render2x2.h"
#include "render2x2ntsc.h"
#include "types.h"
#include "video-color.h"

/*
    right now this is basically the PAL renderer without delay line emulation
*/

/*
    YIQ->RGB (Sony CXA2025AS US decoder matrix)

    R = Y + (1.630 * I + 0.317 * Q)
    G = Y - (0.378 * I + 0.466 * Q)
    B = Y - (1.089 * I - 1.677 * Q)
*/
static inline
void yuv_to_rgb(int32_t y, int32_t u, int32_t v, int16_t *red, int16_t *grn, int16_t *blu)
{
#ifdef _MSC_VER
# pragma warning( push )
# pragma warning( disable: 4244 )
#endif

    *red = (y + ((209 * u +  41 * v) >> 7)) >> 15;
    *grn = (y - (( 48 * u +  69 * v) >> 7)) >> 15;
    *blu = (y - ((139 * u - 215 * v) >> 7)) >> 15;

#ifdef _MSC_VER
# pragma warning( pop )
#endif
}

/* Often required function that stores gamma-corrected pixel to current line,
 * averages the current rgb with the contents of previous non-scanline-line,
 * stores the gamma-corrected scanline, and updates the prevline rgb buffer.
 * The variants 4, 3, 2 refer to pixel width of output. */

static inline
void store_line_and_scanline_2(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade, /* ignored by RGB modes */
    const int32_t y, const int32_t u, const int32_t v)
{
    int16_t red, grn, blu;
    uint16_t *tmp1, *tmp2;
    yuv_to_rgb(y, u, v, &red, &grn, &blu);

    tmp1 = (uint16_t *) scanline;
    tmp2 = (uint16_t *) line;

    *tmp1 = (uint16_t) (gamma_red_fac[512 + red + prevline[0]]
                    | gamma_grn_fac[512 + grn + prevline[1]]
                    | gamma_blu_fac[512 + blu + prevline[2]]);

    *tmp2 = (uint16_t) (gamma_red[256 + red] | gamma_grn[256 + grn] | gamma_blu[256 + blu]);

    prevline[0] = red;
    prevline[1] = grn;
    prevline[2] = blu;
}

static inline
void store_line_and_scanline_3(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade, /* ignored by RGB modes */
    const int32_t y, const int32_t u, const int32_t v)
{
    uint32_t tmp1, tmp2;
    int16_t red, grn, blu;
    yuv_to_rgb(y, u, v, &red, &grn, &blu);

    tmp1 = gamma_red_fac[512 + red + prevline[0]]
           | gamma_grn_fac[512 + grn + prevline[1]]
           | gamma_blu_fac[512 + blu + prevline[2]];
    tmp2 = gamma_red[256 + red] | gamma_grn[256 + grn] | gamma_blu[256 + blu];
    scanline[0] = (uint8_t) tmp1;
    tmp1 >>= 8;
    scanline[1] = (uint8_t) tmp1;
    tmp1 >>= 8;
    scanline[2] = (uint8_t) tmp1;

    line[0] = (uint8_t) tmp2;
    tmp2 >>= 8;
    line[1] = (uint8_t) tmp2;
    tmp2 >>= 8;
    line[2] = (uint8_t) tmp2;

    prevline[0] = red;
    prevline[1] = grn;
    prevline[2] = blu;
}

static inline
void store_line_and_scanline_4(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade, /* ignored by RGB modes */
    const int32_t y, const int32_t u, const int32_t v)
{
    int16_t red, grn, blu;
    uint32_t *tmp1, *tmp2;
    yuv_to_rgb(y, u, v, &red, &grn, &blu);

    tmp1 = (uint32_t *) scanline;
    tmp2 = (uint32_t *) line;
    *tmp1 = gamma_red_fac[512 + red + prevline[0]]
            | gamma_grn_fac[512 + grn + prevline[1]]
            | gamma_blu_fac[512 + blu + prevline[2]]
            | alpha;
    *tmp2 = gamma_red[256 + red] | gamma_grn[256 + grn] | gamma_blu[256 + blu]
            | alpha;

    prevline[0] = red;
    prevline[1] = grn;
    prevline[2] = blu;
}

static inline
void store_line_and_scanline_UYVY(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade,
    int32_t y, int32_t u, int32_t v)
{
#ifdef _MSC_VER
# pragma warning( push )
# pragma warning( disable: 4244 )
#endif

    y >>= 16;
    u >>= 16;
    v >>= 16;

    line[0] = u + 128;
    line[1] = y;
    line[2] = v + 128;
    line[3] = y;

    y = (y * shade) >> 8;
    u = 128 + ((u * shade) >> 8);
    v = 128 + ((v * shade) >> 8);

    scanline[0] = (u + prevline[1]) >> 1;
    scanline[1] = (y + prevline[0]) >> 1;
    scanline[2] = (v + prevline[2]) >> 1;
    scanline[3] = (y + prevline[0]) >> 1;

    prevline[0] = y;
    prevline[1] = u;
    prevline[2] = v;

#ifdef _MSC_VER
# pragma warning( pop )
#endif
}

static inline
void store_line_and_scanline_YUY2(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade,
    int32_t y, int32_t u, int32_t v)
{
#ifdef _MSC_VER
# pragma warning( push )
# pragma warning( disable: 4244 )
#endif

    y >>= 16;
    u >>= 16;
    v >>= 16;

    line[0] = y;
    line[1] = u + 128;
    line[2] = y;
    line[3] = v + 128;

    y = (y * shade) >> 8;
    u = 128 + ((u * shade) >> 8);
    v = 128 + ((v * shade) >> 8);

    scanline[0] = (y + prevline[0]) >> 1;
    scanline[1] = (u + prevline[1]) >> 1;
    scanline[2] = (y + prevline[0]) >> 1;
    scanline[3] = (v + prevline[2]) >> 1;

    prevline[0] = y;
    prevline[1] = u;
    prevline[2] = v;

#ifdef _MSC_VER
# pragma warning( pop )
#endif
}

static inline
void store_line_and_scanline_YVYU(
    uint8_t *const line, uint8_t *const scanline,
    int16_t *const prevline, const int shade,
    int32_t y, int32_t u, int32_t v)
{
#ifdef _MSC_VER
# pragma warning( push )
# pragma warning( disable: 4244 )
#endif

    y >>= 16;
    u >>= 16;
    v >>= 16;

    line[0] = y;
    line[1] = v + 128;
    line[2] = y;
    line[3] = u + 128;

    y = (y * shade) >> 8;
    u = 128 + ((u * shade) >> 8);
    v = 128 + ((v * shade) >> 8);

    scanline[0] = (y + prevline[0]) >> 1;
    scanline[1] = (v + prevline[2]) >> 1;
    scanline[2] = (y + prevline[0]) >> 1;
    scanline[3] = (u + prevline[1]) >> 1;

    prevline[0] = y;
    prevline[1] = u;
    prevline[2] = v;

#ifdef _MSC_VER
# pragma warning( pop )
#endif
}


static inline
void get_yuv_from_video(
    const int32_t unew, const int32_t vnew,
    const int off_flip,
    int32_t *const u, int32_t *const v)
{
    *u = (unew) * off_flip;
    *v = (vnew) * off_flip;
}

static inline
void render_generic_2x2_ntsc(video_render_color_tables_t *color_tab,
                             const uint8_t *src, uint8_t *trg,
                             unsigned int width, const unsigned int height,
                             unsigned int xs, const unsigned int ys,
                             unsigned int xt, const unsigned int yt,
                             const unsigned int pitchs, const unsigned int pitcht,
                             viewport_t *viewport, unsigned int pixelstride,
                             void (*store_func)(
                                 uint8_t *const line, uint8_t *const scanline,
                                 int16_t *const prevline, const int shade,
                                 int32_t l, int32_t u, int32_t v),
                             const int write_interpolated_pixels, video_render_config_t *config)
{
    int16_t *prevrgblineptr;
    const int32_t *ytablel = color_tab->ytablel;
    const int32_t *ytableh = color_tab->ytableh;
    const uint8_t *tmpsrc;
    uint8_t *tmptrg, *tmptrgscanline;
    int32_t *cbtable, *crtable;
    uint32_t x, y, wfirst, wlast, yys;
    int32_t l, l2, u, u2, unew, v, v2, vnew, off_flip, shade;
    int first_line = viewport->first_line * 2;
    int last_line = (viewport->last_line * 2) + 1;

    src = src + pitchs * ys + xs - 2;
    trg = trg + pitcht * yt + xt * pixelstride;
    yys = (ys << 1) | (yt & 1);
    wfirst = xt & 1;
    width -= wfirst;
    wlast = width & 1;
    width >>= 1;

    /* That's all initialization we need for full lines. Unfortunately, for
     * scanlines we also need to calculate the RGB color of the previous
     * full line, and that requires initialization from 2 full lines above our
     * rendering target. We just won't render the scanline above the target row,
     * so you need to call us with 1 line before the desired rectangle, and
     * for one full line after it! */

    /* Calculate odd line shading */
    shade = (int) ((float) config->video_resources.pal_scanlineshade / 1000.0f * 256.f);
    off_flip = 1 << 6;

    /* height & 1 == 0. */
    for (y = yys; y < yys + height + 1; y += 2) {
        /* when we are dealing with the last line, the rules change:
         * we no longer write the main output to screen, we just put it into
         * the scanline. */
        if (y == yys + height) {
            /* no place to put scanline in: we are outside viewport or still
             * doing the first iteration (y == yys), height == 0 */
            if (y == yys || y <= (unsigned int)first_line || y > (unsigned int)(last_line + 1)) {
                break;
            }
            tmptrg = &color_tab->rgbscratchbuffer[0];
            tmptrgscanline = trg - pitcht;
            if (y == (unsigned int)(last_line + 1)) {
                /* src would point after the source area, so rewind one line */
                src -= pitchs;
            }
        } else {
            /* pixel data to surface */
            tmptrg = trg;
            /* write scanline data to previous line if possible,
             * otherwise we dump it to the scratch region... We must never
             * render the scanline for the first row, because prevlinergb is not
             * yet initialized and scanline data would be bogus! */
            tmptrgscanline = y != yys && y > (unsigned int)first_line && y <= (unsigned int)last_line
                             ? trg - pitcht
                             : &color_tab->rgbscratchbuffer[0];
        }

        /* current source image for YUV xform */
        tmpsrc = src;

        cbtable = write_interpolated_pixels ? color_tab->cbtable : color_tab->cutable;
        crtable = write_interpolated_pixels ? color_tab->crtable : color_tab->cvtable;

        l = ytablel[tmpsrc[1]] + ytableh[tmpsrc[2]] + ytablel[tmpsrc[3]];
        unew = cbtable[tmpsrc[0]] + cbtable[tmpsrc[1]] + cbtable[tmpsrc[2]] + cbtable[tmpsrc[3]];
        vnew = crtable[tmpsrc[0]] + crtable[tmpsrc[1]] + crtable[tmpsrc[2]] + crtable[tmpsrc[3]];
        get_yuv_from_video(unew, vnew, off_flip, &u, &v);
        unew -= cbtable[tmpsrc[0]];
        vnew -= crtable[tmpsrc[0]];
        tmpsrc += 1;

        /* actual line */
        prevrgblineptr = &color_tab->prevrgbline[0];
        if (wfirst) {
            l2 = ytablel[tmpsrc[1]] + ytableh[tmpsrc[2]] + ytablel[tmpsrc[3]];
            unew += cbtable[tmpsrc[3]];
            vnew += crtable[tmpsrc[3]];
            get_yuv_from_video(unew, vnew, off_flip, &u2, &v2);
            unew -= cbtable[tmpsrc[0]];
            vnew -= crtable[tmpsrc[0]];
            tmpsrc += 1;

            if (write_interpolated_pixels) {
                store_func(tmptrg, tmptrgscanline, prevrgblineptr, shade, (l + l2) >> 1, (u + u2) >> 1, (v + v2) >> 1);
                tmptrgscanline += pixelstride;
                tmptrg += pixelstride;
                prevrgblineptr += 3;
            }

            l = l2;
            u = u2;
            v = v2;
        }
        for (x = 0; x < width; x++) {
            store_func(tmptrg, tmptrgscanline, prevrgblineptr, shade, l, u, v);
            tmptrgscanline += pixelstride;
            tmptrg += pixelstride;
            prevrgblineptr += 3;

            l2 = ytablel[tmpsrc[1]] + ytableh[tmpsrc[2]] + ytablel[tmpsrc[3]];
            unew += cbtable[tmpsrc[3]];
            vnew += crtable[tmpsrc[3]];
            get_yuv_from_video(unew, vnew, off_flip, &u2, &v2);
            unew -= cbtable[tmpsrc[0]];
            vnew -= crtable[tmpsrc[0]];
            tmpsrc += 1;

            if (write_interpolated_pixels) {
                store_func(tmptrg, tmptrgscanline, prevrgblineptr, shade, (l + l2) >> 1, (u + u2) >> 1, (v + v2) >> 1);
                tmptrgscanline += pixelstride;
                tmptrg += pixelstride;
                prevrgblineptr += 3;
            }

            l = l2;
            u = u2;
            v = v2;
        }
        if (wlast) {
            store_func(tmptrg, tmptrgscanline, prevrgblineptr, shade, l, u, v);
        }

        src += pitchs;
        trg += pitcht * 2;
    }
}

void render_UYVY_2x2_ntsc(video_render_color_tables_t *color_tab,
                          const uint8_t *src, uint8_t *trg,
                          unsigned int width, const unsigned int height,
                          const unsigned int xs, const unsigned int ys,
                          const unsigned int xt, const unsigned int yt,
                          const unsigned int pitchs, const unsigned int pitcht,
                          viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            4, store_line_and_scanline_UYVY, 0, config);
}

void render_YUY2_2x2_ntsc(video_render_color_tables_t *color_tab,
                          const uint8_t *src, uint8_t *trg,
                          unsigned int width, const unsigned int height,
                          const unsigned int xs, const unsigned int ys,
                          const unsigned int xt, const unsigned int yt,
                          const unsigned int pitchs, const unsigned int pitcht,
                          viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            4, store_line_and_scanline_YUY2, 0, config);
}

void render_YVYU_2x2_ntsc(video_render_color_tables_t *color_tab,
                          const uint8_t *src, uint8_t *trg,
                          unsigned int width, const unsigned int height,
                          const unsigned int xs, const unsigned int ys,
                          const unsigned int xt, const unsigned int yt,
                          const unsigned int pitchs, const unsigned int pitcht,
                          viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            4, store_line_and_scanline_YVYU, 0, config);
}

void render_16_2x2_ntsc(video_render_color_tables_t *color_tab,
                        const uint8_t *src, uint8_t *trg,
                        unsigned int width, const unsigned int height,
                        const unsigned int xs, const unsigned int ys,
                        const unsigned int xt, const unsigned int yt,
                        const unsigned int pitchs, const unsigned int pitcht,
                        viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            2, store_line_and_scanline_2, 1, config);
}

void render_24_2x2_ntsc(video_render_color_tables_t *color_tab,
                        const uint8_t *src, uint8_t *trg,
                        unsigned int width, const unsigned int height,
                        const unsigned int xs, const unsigned int ys,
                        const unsigned int xt, const unsigned int yt,
                        const unsigned int pitchs, const unsigned int pitcht,
                        viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            3, store_line_and_scanline_3, 1, config);
}

void render_32_2x2_ntsc(video_render_color_tables_t *color_tab,
                        const uint8_t *src, uint8_t *trg,
                        unsigned int width, const unsigned int height,
                        const unsigned int xs, const unsigned int ys,
                        const unsigned int xt, const unsigned int yt,
                        const unsigned int pitchs, const unsigned int pitcht,
                        viewport_t *viewport, video_render_config_t *config)
{
    render_generic_2x2_ntsc(color_tab, src, trg, width, height, xs, ys,
                            xt, yt, pitchs, pitcht, viewport,
                            4, store_line_and_scanline_4, 1, config);
}
