/*	$NetBSD: tdvfb.c,v 1.11 2023/08/01 20:50:11 andvar Exp $	*/

/*
 * Copyright (c) 2012 The NetBSD Foundation, Inc.   
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Radoslaw Kujawa.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * A console driver for 3Dfx Voodoo2 (CVG) and 3Dfx Voodoo Graphics (SST-1).
 *
 * 3Dfx Glide 2.x source code, Linux driver by Ghozlane Toumi, and 
 * "Voodoo2 Graphics Engine for 3D Game Acceleration" document were used as 
 * reference. wscons attachment code based mostly on genfb by Michael
 * Lorenz.
 *
 * This driver currently only support boards with ICS GENDAC (which seems to
 * be most popular, however at least two different DACs were used with CVG).
 *
 * TODO (in no particular order):
 * - Finally fix 16-bit depth handling on big-endian machines.
 * - Expose card to userspace through /dev/3dfx compatible device file
 *   (for Glide).
 * - Allow mmap'ing of registers through wscons access op.
 * - Complete wscons emul ops acceleration support.
 * - Add support for others DACs (need hardware).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tdvfb.c,v 1.11 2023/08/01 20:50:11 andvar Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/endian.h>

#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcidevs.h>
#include <dev/pci/pciio.h>

#include <dev/pci/tdvfbreg.h>
#include <dev/pci/tdvfbvar.h>

#include <dev/videomode/videomode.h>
#include <dev/pci/wsdisplay_pci.h>

#include "opt_wsemul.h"
#include "opt_tdvfb.h"

#define MAXLOOP 4096 
/* #define TDVFB_DEBUG 1 */

static int	tdvfb_match(device_t, cfdata_t, void *);
static void	tdvfb_attach(device_t, device_t, void *);

static uint32_t	tdvfb_cvg_read(struct tdvfb_softc *sc, uint32_t reg);
static void	tdvfb_cvg_write(struct tdvfb_softc *sc, uint32_t reg,
		    uint32_t val);
static void	tdvfb_cvg_set(struct tdvfb_softc *sc, uint32_t reg, 
		    uint32_t bits);
static void	tdvfb_cvg_unset(struct tdvfb_softc *sc, uint32_t reg, 
		    uint32_t bits);
static uint8_t	tdvfb_cvg_dac_read(struct tdvfb_softc *sc, uint32_t reg);
static void	tdvfb_cvg_dac_write(struct tdvfb_softc *sc, uint32_t reg, 
		    uint32_t val);
static void	tdvfb_wait(struct tdvfb_softc *sc);

static bool	tdvfb_init(struct tdvfb_softc *sc);
static void	tdvfb_fbiinit_defaults(struct tdvfb_softc *sc);
static size_t	tdvfb_mem_size(struct tdvfb_softc *sc);

static bool	tdvfb_videomode_set(struct tdvfb_softc *sc);
static void	tdvfb_videomode_dac(struct tdvfb_softc *sc);

static bool	tdvfb_gendac_detect(struct tdvfb_softc *sc);
static struct tdvfb_dac_timing	tdvfb_gendac_calc_pll(int freq);
static void	tdvfb_gendac_set_cvg_timing(struct tdvfb_softc *sc, 
		    struct tdvfb_dac_timing *timing);
static void	tdvfb_gendac_set_vid_timing(struct tdvfb_softc *sc, 
		    struct tdvfb_dac_timing *timing);

static paddr_t	tdvfb_mmap(void *v, void *vs, off_t offset, int prot);
static int	tdvfb_ioctl(void *v, void *vs, u_long cmd, void *data, int flag,
		    struct lwp *l);
static void	tdvfb_init_screen(void *cookie, struct vcons_screen *scr, 
		    int existing, long *defattr);
static void	tdvfb_init_palette(struct tdvfb_softc *sc);
/* blitter support */
static void	tdvfb_rectfill(struct tdvfb_softc *sc, int x, int y, int wi, 
		    int he, uint32_t color);
static void	tdvfb_bitblt(struct tdvfb_softc *sc, int xs, int ys, int xd, 
		    int yd, int wi, int he);
/* accelerated raster ops */
static void	tdvfb_eraserows(void *cookie, int row, int nrows, 
		    long fillattr);
static void	tdvfb_copyrows(void *cookie, int srcrow, int dstrow, int nrows);

CFATTACH_DECL_NEW(tdvfb, sizeof(struct tdvfb_softc),
    tdvfb_match, tdvfb_attach, NULL, NULL);

struct wsdisplay_accessops tdvfb_accessops = {
	tdvfb_ioctl,
	tdvfb_mmap,
	NULL,	/* alloc_screen */
	NULL,	/* free_screen */
	NULL,	/* show_screen */
	NULL, 	/* load_font */
	NULL,	/* pollc */
	NULL	/* scroll */
};

static int
tdvfb_match(device_t parent, cfdata_t match, void *aux)
{
	const struct pci_attach_args *pa = (const struct pci_attach_args *)aux;

	if ((PCI_VENDOR(pa->pa_id) == PCI_VENDOR_3DFX) &&
	    (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_3DFX_VOODOO2))
		return 100;
	if ((PCI_VENDOR(pa->pa_id) == PCI_VENDOR_3DFX) &&
	    (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_3DFX_VOODOO))
		return 100;

	return 0;
}

static void
tdvfb_attach(device_t parent, device_t self, void *aux)
{
	struct tdvfb_softc *sc = device_private(self);
	struct wsemuldisplaydev_attach_args ws_aa;
	struct rasops_info *ri;
	const struct pci_attach_args *pa = aux;
	pcireg_t screg;
	bool console;
	long defattr;

#ifdef TDVFB_CONSOLE
	console = true; 
#else
	console = false;
#endif

	sc->sc_pc = pa->pa_pc;
	sc->sc_pcitag = pa->pa_tag;
	sc->sc_dev = self;

	if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_3DFX_VOODOO2)
		sc->sc_voodootype = TDV_VOODOO_2;
	else
		sc->sc_voodootype = TDV_VOODOO_1;

	screg = pci_conf_read(sc->sc_pc, sc->sc_pcitag,
	    PCI_COMMAND_STATUS_REG);
	screg |= PCI_COMMAND_MEM_ENABLE;
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, PCI_COMMAND_STATUS_REG, 
	    screg);

	pci_aprint_devinfo(pa, NULL);

	/* map the BAR */
	if (pci_mapreg_map(pa, TDV_MM_BAR, PCI_MAPREG_TYPE_MEM, 
	    BUS_SPACE_MAP_LINEAR, &sc->sc_cvgt, &sc->sc_cvgh, 
	    &sc->sc_cvg_pa, 0) != 0 ) {
		aprint_error_dev(sc->sc_dev, "unable to map CVG BAR");
		return;
	}

	/* Map the framebuffer. */
	if (bus_space_subregion(sc->sc_cvgt, sc->sc_cvgh, TDV_OFF_FB, 
	    TDV_FB_SIZE, &sc->sc_fbh)) {
		aprint_error_dev(sc->sc_dev, "unable to map the framebuffer");	
	}

	aprint_normal_dev(sc->sc_dev, "registers at 0x%08x, fb at 0x%08x\n", 
	    (uint32_t) sc->sc_cvg_pa, (uint32_t) sc->sc_cvg_pa + TDV_OFF_FB);

	/* Do the low level setup. */
	if (!tdvfb_init(sc)) {
		aprint_error_dev(sc->sc_dev, "could not initialize CVG\n");
		return;
	}

	/* 
	 * The card is alive now, let's check how much framebuffer memory 
	 * do we have.
	 */
	sc->sc_memsize = tdvfb_mem_size(sc);

	aprint_normal_dev(sc->sc_dev, "%zu MB framebuffer memory present\n", 
	    sc->sc_memsize / 1024 / 1024);

	/* Select video mode, 800x600 32bpp 60Hz by default... */
	sc->sc_width = 800;
	sc->sc_height = 600;
#if BYTE_ORDER == BIG_ENDIAN
	sc->sc_bpp = 32;	/* XXX: 16 would allow blitter use. */
#else
	sc->sc_bpp = 16;
#endif 
	sc->sc_linebytes = 1024 * (sc->sc_bpp / 8);
	sc->sc_videomode = pick_mode_by_ref(sc->sc_width, sc->sc_height, 60);

	aprint_normal_dev(sc->sc_dev, "setting %dx%d %d bpp resolution\n",
	    sc->sc_width, sc->sc_height, sc->sc_bpp);

	tdvfb_videomode_set(sc);

	sc->sc_defaultscreen_descr = (struct wsscreen_descr){
		"default",
		0, 0,
		NULL,
		8, 16,
		WSSCREEN_WSCOLORS | WSSCREEN_HILIT,
		NULL
	};
	sc->sc_screens[0] = &sc->sc_defaultscreen_descr;
	sc->sc_screenlist = (struct wsscreen_list){1, sc->sc_screens};
	sc->sc_mode = WSDISPLAYIO_MODE_EMUL;
	
	vcons_init(&sc->vd, sc, &sc->sc_defaultscreen_descr,
	    &tdvfb_accessops);
	sc->vd.init_screen = tdvfb_init_screen;

	ri = &sc->sc_console_screen.scr_ri;

	tdvfb_init_palette(sc);

	if (console) {
		vcons_init_screen(&sc->vd, &sc->sc_console_screen, 1,
		    &defattr);

		sc->sc_console_screen.scr_flags |= VCONS_SCREEN_IS_STATIC |
		    VCONS_DONT_READ; 
		vcons_redraw_screen(&sc->sc_console_screen);

		sc->sc_defaultscreen_descr.textops = &ri->ri_ops;
		sc->sc_defaultscreen_descr.capabilities = ri->ri_caps;
		sc->sc_defaultscreen_descr.nrows = ri->ri_rows;
		sc->sc_defaultscreen_descr.ncols = ri->ri_cols;

		wsdisplay_cnattach(&sc->sc_defaultscreen_descr, ri, 0, 0,
		    defattr);
		vcons_replay_msgbuf(&sc->sc_console_screen);
	} else {
		if (sc->sc_console_screen.scr_ri.ri_rows == 0) {
			vcons_init_screen(&sc->vd, &sc->sc_console_screen, 1,
			    &defattr);
		} else
			(*ri->ri_ops.allocattr)(ri, 0, 0, 0, &defattr);
	}

	ws_aa.console = console;
	ws_aa.scrdata = &sc->sc_screenlist;
	ws_aa.accessops = &tdvfb_accessops;
	ws_aa.accesscookie = &sc->vd;
	
	config_found(sc->sc_dev, &ws_aa, wsemuldisplaydevprint, CFARGS_NONE);
}

static void
tdvfb_init_palette(struct tdvfb_softc *sc)
{
	int i, j;

	j = 0;
	for (i = 0; i < 256; i++) {
		sc->sc_cmap_red[i] = rasops_cmap[j];
		sc->sc_cmap_green[i] = rasops_cmap[j + 1];
		sc->sc_cmap_blue[i] = rasops_cmap[j + 2];
		j += 3;
	}
}

static void
tdvfb_init_screen(void *cookie, struct vcons_screen *scr, int existing,
    long *defattr)
{
	struct tdvfb_softc *sc = cookie;
	struct rasops_info *ri = &scr->scr_ri;

	wsfont_init();

	ri->ri_depth = sc->sc_bpp;
	ri->ri_width = sc->sc_width;
	ri->ri_height = sc->sc_height;
	ri->ri_stride = sc->sc_linebytes;
	ri->ri_flg = RI_CENTER;

#if BYTE_ORDER == BIG_ENDIAN
#if 0 /* XXX: not yet :( */
	if (sc->sc_bpp == 16)
		ri->ri_flg |= RI_BITSWAP;
#endif 
#endif

	ri->ri_bits = (char *) bus_space_vaddr(sc->sc_cvgt, sc->sc_fbh);
#ifdef TDVFB_DEBUG
	aprint_normal_dev(sc->sc_dev, "fb handle: %lx, ri_bits: %p\n", sc->sc_fbh, ri->ri_bits);
#endif /* TDVFB_DEBUG */

	scr->scr_flags |= VCONS_DONT_READ;

	rasops_init(ri, 0, 0);
	ri->ri_caps = WSSCREEN_WSCOLORS;
	rasops_reconfig(ri, sc->sc_height / ri->ri_font->fontheight,
	    sc->sc_width / ri->ri_font->fontwidth);

	ri->ri_hw = scr;

	/* If we are a Voodoo2 and running in 16 bits try to use blitter. */
	if ((sc->sc_voodootype == TDV_VOODOO_2) && (sc->sc_bpp == 16)) {
		aprint_normal_dev(sc->sc_dev, "using CVG blitter\n");
		ri->ri_ops.eraserows = tdvfb_eraserows;
		ri->ri_ops.copyrows = tdvfb_copyrows;
	}
}

static bool
tdvfb_videomode_set(struct tdvfb_softc *sc) 
{
	uint32_t fbiinit1, fbiinit5, fbiinit6, lfbmode;
	uint16_t vbackporch, vsyncon, vsyncoff;
	uint16_t hbackporch, hsyncon, hsyncoff; 
	uint16_t yheight, xwidth; 

	fbiinit5 = fbiinit6 = 0; /* XXX gcc */

	yheight = sc->sc_videomode->vdisplay;
	xwidth = sc->sc_videomode->hdisplay;

	vbackporch = sc->sc_videomode->vtotal - sc->sc_videomode->vsync_end;
	hbackporch = sc->sc_videomode->htotal - sc->sc_videomode->hsync_end;

	vsyncon = sc->sc_videomode->vsync_end - sc->sc_videomode->vsync_start;
	hsyncon = sc->sc_videomode->hsync_end - sc->sc_videomode->hsync_start;

	vsyncoff = sc->sc_videomode->vtotal - vsyncon;
	hsyncoff = sc->sc_videomode->htotal - hsyncon;
#ifdef TDVFB_DEBUG 
	aprint_normal_dev(sc->sc_dev, 
	    "xy %d %d hbp %d vbp %d, hson %d, hsoff %d, vson %d, vsoff %d\n",
	    xwidth, yheight, hbackporch, vbackporch, hsyncon, hsyncoff, 
	    vsyncon, vsyncoff);
#endif /* TDVFB_DEBUG */

	sc->vid_timing = tdvfb_gendac_calc_pll(sc->sc_videomode->dot_clock);

	if(sc->sc_voodootype == TDV_VOODOO_2)
		sc->sc_x_tiles = (sc->sc_videomode->hdisplay + 63 ) / 64 * 2;
	else
		sc->sc_x_tiles = (sc->sc_videomode->hdisplay + 63 ) / 64;

	tdvfb_cvg_write(sc, TDV_OFF_NOPCMD, 0);
	tdvfb_wait(sc);

	/* enable writing to fbiinit regs, reset, disable DRAM refresh */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT);
	tdvfb_cvg_set(sc, TDV_OFF_FBIINIT1, TDV_FBIINIT1_VIDEO_RST);
	tdvfb_cvg_set(sc, TDV_OFF_FBIINIT0, TDV_FBIINIT0_FBI_RST | 
	    TDV_FBIINIT0_FIFO_RST);
	tdvfb_cvg_unset(sc, TDV_OFF_FBIINIT2, TDV_FBIINIT2_DRAM_REFR);
	tdvfb_wait(sc);

	/* program video timings into CVG/SST-1*/
	tdvfb_cvg_write(sc, TDV_OFF_VDIMENSIONS, yheight << 16 | (xwidth - 1));
	tdvfb_cvg_write(sc, TDV_OFF_BACKPORCH, vbackporch << 16 | 
	    (hbackporch - 2));
	tdvfb_cvg_write(sc, TDV_OFF_HSYNC, hsyncoff << 16 | (hsyncon - 1));
	tdvfb_cvg_write(sc, TDV_OFF_VSYNC, vsyncoff << 16 | vsyncon);

	tdvfb_videomode_dac(sc);

	fbiinit1 = ((tdvfb_cvg_read(sc, TDV_OFF_FBIINIT1) & 
	    TDV_FBIINIT1_VIDMASK) | 
	    TDV_FBIINIT1_DR_DATA |
	    TDV_FBIINIT1_DR_BLANKING |
	    TDV_FBIINIT1_DR_HVSYNC |
	    TDV_FBIINIT1_DR_DCLK |
	    TDV_FBIINIT1_IN_VCLK_2X );

	if (sc->sc_voodootype == TDV_VOODOO_2) {
		fbiinit1 |= ((sc->sc_x_tiles & 0x20) >> 5) 
		    << TDV_FBIINIT1_TILES_X_MSB | ((sc->sc_x_tiles & 0x1e) >> 1)
		    << TDV_FBIINIT1_TILES_X;
		fbiinit6 = (sc->sc_x_tiles & 0x1) << TDV_FBIINIT6_TILES_X_LSB;
	} else
		fbiinit1 |= sc->sc_x_tiles  << TDV_FBIINIT1_TILES_X;

	fbiinit1 |= TDV_FBIINIT1_VCLK_2X << TDV_FBIINIT1_VCLK_SRC;

	if (sc->sc_voodootype == TDV_VOODOO_2) {
		fbiinit5 = tdvfb_cvg_read(sc, TDV_OFF_FBIINIT5) 
		    & TDV_FBIINIT5_VIDMASK;
		if (sc->sc_videomode->flags & VID_PHSYNC)
			fbiinit5 |= TDV_FBIINIT5_PHSYNC;
		if (sc->sc_videomode->flags & VID_PVSYNC)
			fbiinit5 |= TDV_FBIINIT5_PVSYNC;
	}
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT1, fbiinit1);
	if (sc->sc_voodootype == TDV_VOODOO_2) {
		tdvfb_cvg_write(sc, TDV_OFF_FBIINIT6, fbiinit6);
		tdvfb_cvg_write(sc, TDV_OFF_FBIINIT5, fbiinit5); 
	}
	tdvfb_wait(sc);	

	/* unreset, enable DRAM refresh */
	tdvfb_cvg_unset(sc, TDV_OFF_FBIINIT1, TDV_FBIINIT1_VIDEO_RST);
	tdvfb_cvg_unset(sc, TDV_OFF_FBIINIT0, TDV_FBIINIT0_FBI_RST |
	    TDV_FBIINIT0_FIFO_RST);
	tdvfb_cvg_set(sc, TDV_OFF_FBIINIT2, TDV_FBIINIT2_DRAM_REFR);
	/* disable access to FBIINIT regs */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_FIFO);
	tdvfb_wait(sc);	

	if (sc->sc_bpp == 16)
		lfbmode = TDV_LFBMODE_565; 
	else if (sc->sc_bpp == 32)
		lfbmode = TDV_LFBMODE_8888; 
	else
		return false;

#if BYTE_ORDER == BIG_ENDIAN
	lfbmode |= TDV_LFBMODE_BSW_WR | TDV_LFBMODE_BSW_RD;
#endif

	tdvfb_cvg_write(sc, TDV_OFF_LFBMODE, lfbmode);

	return true;
}

/*
 * Update DAC parameters for selected video mode. 
 */
static void
tdvfb_videomode_dac(struct tdvfb_softc *sc)
{
	uint32_t fbiinit2, fbiinit3; 

	/* remember current FBIINIT settings */
	fbiinit2 = tdvfb_cvg_read(sc, TDV_OFF_FBIINIT2);
	fbiinit3 = tdvfb_cvg_read(sc, TDV_OFF_FBIINIT3);

	/* remap DAC */	
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT | TDV_INITENABLE_REMAPDAC);

	tdvfb_cvg_dac_write(sc, TDV_GENDAC_CMD, TDV_GENDAC_CMD_16BITS);

	tdvfb_gendac_set_vid_timing(sc, &(sc->vid_timing));

	/* disable remapping */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT);
	/* restore */
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT2, fbiinit2);
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT2, fbiinit3);
}

/* 
 * Check how much memory do we have. Actually, Voodoo1/2 has separate 
 * framebuffer and texture memory. This function only checks for framebuffer 
 * memory. Texture memory ramains unused.
 */
static size_t
tdvfb_mem_size(struct tdvfb_softc *sc)
{
	size_t mem_size;
	uint32_t vram_test4, vram_test2;

	bus_space_write_4(sc->sc_cvgt, sc->sc_fbh, 0, 0x11aabbaa);
	bus_space_write_4(sc->sc_cvgt, sc->sc_fbh, 0x100000, 0x22aabbaa);
	bus_space_write_4(sc->sc_cvgt, sc->sc_fbh, 0x200000, 0x44aabbaa);

	vram_test4 = bus_space_read_4(sc->sc_cvgt, sc->sc_fbh, 0x400000);
	vram_test2 = bus_space_read_4(sc->sc_cvgt, sc->sc_fbh, 0x200000);

	if (vram_test4 == 0x44aabbaa)
		mem_size = 4*1024*1024;
	else if (vram_test2 == 0x22aabbaa) {
		mem_size = 2*1024*1024;
	} else
		mem_size = 1*1024*1024;

	return mem_size;
}

/* do the low level init of Voodoo board */
static bool
tdvfb_init(struct tdvfb_softc *sc)
{
	/* undocumented - found in glide code */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_VCLK_DISABLE_REG, 0);
	/* allow write to hardware initialization registers */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT);

	/* reset the board */
	tdvfb_cvg_set(sc, TDV_OFF_FBIINIT1, TDV_FBIINIT1_VIDEO_RST);
	tdvfb_wait(sc);
	tdvfb_cvg_set(sc, TDV_OFF_FBIINIT0, TDV_FBIINIT0_FBI_RST | 
	    TDV_FBIINIT0_FIFO_RST);
	tdvfb_wait(sc);

	/* disable video RAM refresh */
	tdvfb_cvg_unset(sc, TDV_OFF_FBIINIT2, TDV_FBIINIT2_DRAM_REFR);
	tdvfb_wait(sc);

	/* on voodoo1 I had to read FBIINIT2 before remapping, 
	 * otherwise weird things were happening, on v2 it works just fine */
	/* tdvfb_cvg_read(sc, TDV_OFF_FBIINIT2); */

	/* remap DAC */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT | TDV_INITENABLE_REMAPDAC);

	/* detect supported DAC, TODO: we really should support other DACs */
	if(!tdvfb_gendac_detect(sc)) {
		aprint_error_dev(sc->sc_dev, "could not detect ICS GENDAC\n");
		return false;
	}

	/* calculate PLL used to drive the chips (graphics clock) */
	if(sc->sc_voodootype == TDV_VOODOO_2)
		sc->cvg_timing = tdvfb_gendac_calc_pll(TDV_CVG_CLK);
	else
		sc->cvg_timing = tdvfb_gendac_calc_pll(TDV_SST_CLK);

	/* set PLL for gfx clock */
	tdvfb_gendac_set_cvg_timing(sc, &(sc->cvg_timing));

	/* don't remap the DAC anymore */
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG, 
	    TDV_INITENABLE_EN_INIT | TDV_INITENABLE_EN_FIFO);

	/* set FBIINIT registers to some default values that make sense */
	tdvfb_fbiinit_defaults(sc);

	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_INITENABLE_REG,
	    TDV_INITENABLE_EN_FIFO);
	pci_conf_write(sc->sc_pc, sc->sc_pcitag, TDV_VCLK_ENABLE_REG, 0); 

	return true;
}

static void
tdvfb_fbiinit_defaults(struct tdvfb_softc *sc)
{
	uint32_t fbiinit0, fbiinit1, fbiinit2, fbiinit3, fbiinit4, fbiinit6;

	fbiinit0 = TDV_FBIINIT0_VGA_PASS; /* disable VGA passthrough */
	fbiinit1 = /*TDV_FBIINIT1_PCIWAIT |*/ /* one wait state for PCI write */
	    TDV_FBIINIT1_LFB_EN |	  /* enable lfb reads */
	    TDV_FBIINIT1_VIDEO_RST | 	  /* video timing reset */
	    10 << TDV_FBIINIT1_TILES_X |   /* tiles x/horizontal */
	    TDV_FBIINIT1_VCLK_2X << TDV_FBIINIT1_VCLK_SRC ;

	fbiinit2 = TDV_FBIINIT2_SWB_ALG |/* swap buffer use DAC sync */
	    TDV_FBIINIT2_FAST_RAS |	  /* fast RAS read */
	    TDV_FBIINIT2_DRAM_OE |	  /* enable DRAM OE */
	    TDV_FBIINIT2_DRAM_REFR |	  /* enable DRAM refresh */
	    TDV_FBIINIT2_FIFO_RDA |	  /* FIFO read ahead */
	    TDV_FBIINIT2_DRAM_REF16 << TDV_FBIINIT2_DRAM_REFLD; /* 16 ms */
	    
	fbiinit3 = TDV_FBIINIT3_TREX_DIS; /* disable texture mapping */
	
	fbiinit4 = /*TDV_FBIINIT4_PCIWAIT|*/ /* one wait state for PCI write */
	    TDV_FBIINIT4_LFB_RDA;	  /* lfb read ahead */

	fbiinit6 = 0;
#ifdef TDVFB_DEBUG 	  
	aprint_normal("fbiinit: 0 %x, 1 %x, 2 %x, 3 %x, 4 %x, 6 %x\n",
	    fbiinit0, fbiinit1, fbiinit2, fbiinit3, fbiinit4, fbiinit6);
#endif /* TDVFB_DEBUG */
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT0, fbiinit0);
	tdvfb_wait(sc);
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT1, fbiinit1);
	tdvfb_wait(sc);
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT2, fbiinit2);
	tdvfb_wait(sc);
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT3, fbiinit3);
	tdvfb_wait(sc);
	tdvfb_cvg_write(sc, TDV_OFF_FBIINIT4, fbiinit4);
	tdvfb_wait(sc);
	if (sc->sc_voodootype == TDV_VOODOO_2) {
		tdvfb_cvg_write(sc, TDV_OFF_FBIINIT6, fbiinit6);
		tdvfb_wait(sc);
	}
}

static void
tdvfb_gendac_set_vid_timing(struct tdvfb_softc *sc, 
    struct tdvfb_dac_timing *timing)
{
	uint8_t pllreg;

	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, TDV_GENDAC_PLL_CTRL);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);

	/* write the timing for gfx clock into "slot" 0 */
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLWR, TDV_GENDAC_PLL_0);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, timing->m);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, timing->n);
	/* select "slot" 0 for output */
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLWR, TDV_GENDAC_PLL_CTRL);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, 
	    (pllreg & TDV_GENDAC_VIDPLLMASK) | TDV_GENDAC_PLL_VIDCLK |
	    TDV_GENDAC_PLL_VIDCLK0);
	tdvfb_wait(sc);
	tdvfb_wait(sc);
#ifdef TDVFB_DEBUG
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, TDV_GENDAC_PLL_0);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	aprint_normal("vid read again: %d\n", pllreg);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	aprint_normal("vid read again: %d\n", pllreg);
#endif /* TDVFB_DEBUG */
}

static void
tdvfb_gendac_set_cvg_timing(struct tdvfb_softc *sc, 
    struct tdvfb_dac_timing *timing)
{
	uint8_t pllreg;

	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, TDV_GENDAC_PLL_CTRL);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);

	/* write the timing for gfx clock into "slot" A */
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLWR, TDV_GENDAC_PLL_A);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, timing->m);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, timing->n);
	/* select "slot" A for output */
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLWR, TDV_GENDAC_PLL_CTRL);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLDATA, 
	    (pllreg & TDV_GENDAC_CVGPLLMASK) | TDV_GENDAC_PLL_CVGCLKA);
#ifdef TDVFB_DEBUG
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, TDV_GENDAC_PLL_A);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	aprint_normal("read again: %d\n", pllreg);
	pllreg = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	aprint_normal("read again: %d\n", pllreg);
#endif /* TDVFB_DEBUG */
	tdvfb_wait(sc);
}

static struct tdvfb_dac_timing 
tdvfb_gendac_calc_pll(int freq) 
{
	int n1, n2;
	int m, mdbl;
	int best_m, best_n1, best_error;
	int fout;
	struct tdvfb_dac_timing timing;

	best_m = -1; best_n1 = -1;

	/* select highest possible n2, check n2 * fCLK < TDV_GENDAC_MAXVCO */
	for (n2 = TDV_GENDAC_MAX_N2; n2 >= TDV_GENDAC_MIN_N2; n2--) {
		if ((freq * (1 << n2)) < TDV_GENDAC_MAXVCO) 
			break;
	}

	best_error = freq;

	/* 
	 * m+2	    2^n2 * fOUT
	 * ----  =  -----------
	 * n1+2        fREF
	 */
	for (n1 = TDV_GENDAC_MIN_N1; n1 <= TDV_GENDAC_MAX_N1; n1++) {
		/* loop mostly inspired by Linux driver */
		mdbl = (2 * freq * (1 << n2)*(n1 + 2)) / TDV_GENDAC_REFFREQ - 4;
		if (mdbl % 2)
			m = mdbl/2+1;
		else
			m = mdbl/2;
			
		if(m > TDV_GENDAC_MAX_M)
			break;

		fout = (TDV_GENDAC_REFFREQ * (m + 2)) / ((1 << n2) * (n1 + 2));
		if ((abs(fout - freq) < best_error) && (m > 0)) {
			best_n1 = n1;
			best_m = m;
			best_error = abs(fout - freq);
			if (200*best_error < freq) break;
		}

	}

	fout = (TDV_GENDAC_REFFREQ * (best_m + 2)) / ((1 << n2) * (best_n1 + 2));
	timing.m = best_m;
	timing.n = (n2 << 5) | best_n1;
	timing.fout = fout;

#ifdef TDVFB_DEBUG
	aprint_normal("tdvfb_gendac_calc_pll ret: m %d, n %d, fout %d kHz\n", 
	    timing.m, timing.n, timing.fout);
#endif /* TDVFB_DEBUG */

	return timing; 
}

static bool
tdvfb_gendac_detect(struct tdvfb_softc *sc)
{
	uint8_t m_f1, m_f7, m_fb;
	uint8_t n_f1, n_f7, n_fb; 

	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, 0x1);
	m_f1 = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	n_f1 = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, 0x7);
	m_f7 = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	n_f7 = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	tdvfb_cvg_dac_write(sc, TDV_GENDAC_PLLRD, 0xB);
	m_fb = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);
	n_fb = tdvfb_cvg_dac_read(sc, TDV_GENDAC_PLLDATA);

	if( (m_f1 == TDV_GENDAC_DFLT_F1_M) &&
	    (n_f1 == TDV_GENDAC_DFLT_F1_N) &&
	    (m_f7 == TDV_GENDAC_DFLT_F7_M) &&
	    (n_f7 == TDV_GENDAC_DFLT_F7_N) &&
	    (m_fb == TDV_GENDAC_DFLT_FB_M) &&
	    (n_fb == TDV_GENDAC_DFLT_FB_N) ) {
		aprint_normal_dev(sc->sc_dev, "ICS 5342 GENDAC\n");
		return true;
	}

	return false;
}

static void
tdvfb_wait(struct tdvfb_softc *sc) 
{
	uint32_t x, cnt;
	cnt = 0;
	for (x = 0; x < MAXLOOP; x++) {
		if (tdvfb_cvg_read(sc, TDV_OFF_STATUS) & TDV_STATUS_FBI_BUSY)
			cnt = 0;
		else
			cnt++;

		if (cnt >= 5)	/* Voodoo2 specs suggest at least 3 */
			break;
	}

	if (x == MAXLOOP)
		/* 
		 * The console probably isn't working now anyway, so maybe 
		 * let's panic... At least it will drop into ddb if some other 
		 * device a console.
		 */
		panic("tdvfb is stuck!\n");
}

static uint32_t
tdvfb_cvg_read(struct tdvfb_softc *sc, uint32_t reg) 
{
	uint32_t rv;
	rv = bus_space_read_4(sc->sc_cvgt, sc->sc_cvgh, reg);
#ifdef TDVFB_DEBUG_REGS
	aprint_normal("cvg_read val %x from reg %x\n", rv, reg);
#endif /* TDVFB_DEBUG_REGS */
	return rv;
}

static void
tdvfb_cvg_write(struct tdvfb_softc *sc, uint32_t reg, uint32_t val)
{
#ifdef TDVFB_DEBUG_REGS
	aprint_normal("cvg_write val %x to reg %x\n", val, reg);
#endif /* TDVFB_DEBUG_REGS */
	bus_space_write_4(sc->sc_cvgt, sc->sc_cvgh, reg, val);
}

static void
tdvfb_cvg_set(struct tdvfb_softc *sc, uint32_t reg, uint32_t bits)
{
	uint32_t v;
	v = tdvfb_cvg_read(sc, reg) | bits;
	tdvfb_cvg_write(sc, reg, v);
}

static void
tdvfb_cvg_unset(struct tdvfb_softc *sc, uint32_t reg, uint32_t bits)
{
	uint32_t v;
	v = tdvfb_cvg_read(sc, reg) & ~bits;
	tdvfb_cvg_write(sc, reg, v);
}

static uint8_t
tdvfb_cvg_dac_read(struct tdvfb_softc *sc, uint32_t reg)
{
	uint32_t rv;

	tdvfb_cvg_dac_write(sc, reg, TDV_DAC_DATA_READ);

	rv = tdvfb_cvg_read(sc, TDV_OFF_DAC_READ);
#ifdef TDVFB_DEBUG_REGS
	aprint_normal("cvg_dac_read val %x from reg %x\n", rv, reg);
#endif /* TDVFB_DEBUG_REGS */
	return rv & 0xFF;
}

static void
tdvfb_cvg_dac_write(struct tdvfb_softc *sc, uint32_t reg, uint32_t val)
{
	uint32_t wreg;

	wreg = ((reg & TDV_GENDAC_ADDRMASK) << 8) | val;

#ifdef TDVFB_DEBUG_REGS
	aprint_normal("cvg_dac_write val %x to reg %x (%x)\n", val, reg,
	    wreg);
#endif /* TDVFB_DEBUG_REGS */

	tdvfb_cvg_write(sc, TDV_OFF_DAC_DATA, wreg);
	tdvfb_wait(sc);
}

static void
tdvfb_rectfill(struct tdvfb_softc *sc, int x, int y, int wi, int he,
    uint32_t color)
{
	tdvfb_cvg_write(sc, TDV_OFF_BLTSRC, 0);
	tdvfb_cvg_write(sc, TDV_OFF_BLTDST, 0);
	tdvfb_cvg_write(sc, TDV_OFF_BLTROP, TDV_BLTROP_COPY);
	tdvfb_cvg_write(sc, TDV_OFF_BLTXYSTRIDE, 
	    sc->sc_linebytes | (sc->sc_linebytes << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTDSTXY, x | (y << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTSIZE, wi | (he << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTCMD, TDV_BLTCMD_RECTFILL | 
	    TDV_BLTCMD_LAUNCH | TDV_BLTCMD_FMT_565 << 3 | TDV_BLTCMD_DSTTILED |
	    TDV_BLTCMD_CLIPRECT );
	tdvfb_wait(sc);
}

static void
tdvfb_bitblt(struct tdvfb_softc *sc, int xs, int ys, int xd, int yd, int wi,
    int he) 
{
	tdvfb_cvg_write(sc, TDV_OFF_BLTSRC, 0);
	tdvfb_cvg_write(sc, TDV_OFF_BLTDST, 0);
	tdvfb_cvg_write(sc, TDV_OFF_BLTROP, TDV_BLTROP_COPY);
	tdvfb_cvg_write(sc, TDV_OFF_BLTXYSTRIDE, 
	    sc->sc_linebytes | (sc->sc_linebytes << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTSRCXY, xs | (ys << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTDSTXY, xd | (yd << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTSIZE, wi | (he << 16));
	tdvfb_cvg_write(sc, TDV_OFF_BLTCMD, TDV_BLTCMD_SCR2SCR | 
	    TDV_BLTCMD_LAUNCH | TDV_BLTCMD_FMT_565 << 3);
	
	tdvfb_wait(sc);
}

static void
tdvfb_copyrows(void *cookie, int srcrow, int dstrow, int nrows)
{
	struct tdvfb_softc *sc;
	struct rasops_info *ri;
	struct vcons_screen *scr;
	int x, ys, yd, wi, he;

	ri = cookie;
	scr = ri->ri_hw;
	sc = scr->scr_cookie;

	if (sc->sc_mode == WSDISPLAYIO_MODE_EMUL) {
		x = ri->ri_xorigin;
		ys = ri->ri_yorigin + ri->ri_font->fontheight * srcrow;
		yd = ri->ri_yorigin + ri->ri_font->fontheight * dstrow;
		wi = ri->ri_emuwidth;
		he = ri->ri_font->fontheight * nrows;
		tdvfb_bitblt(sc, x, ys, x, yd, wi, he);
	}
}

static void
tdvfb_eraserows(void *cookie, int row, int nrows, long fillattr)
{

	struct tdvfb_softc *sc;
	struct rasops_info *ri;
	struct vcons_screen *scr;
	int x, y, wi, he, fg, bg, ul;

	ri = cookie;
	scr = ri->ri_hw;
	sc = scr->scr_cookie;

	if (sc->sc_mode == WSDISPLAYIO_MODE_EMUL) {
		rasops_unpack_attr(fillattr, &fg, &bg, &ul);
		if ((row == 0) && (nrows == ri->ri_rows)) 
			tdvfb_rectfill(sc, 0, 0, ri->ri_width,
			    ri->ri_height, ri->ri_devcmap[bg]);
		else {
			x = ri->ri_xorigin;
			y = ri->ri_yorigin + ri->ri_font->fontheight * row;
			wi = ri->ri_emuwidth;
			he = ri->ri_font->fontheight * nrows;
			tdvfb_rectfill(sc, x, y, wi, he, ri->ri_devcmap[bg]);
		}
	}
}

static int
tdvfb_ioctl(void *v, void *vs, u_long cmd, void *data, int flag, struct lwp *l)
{
	struct vcons_data *vd;
	struct tdvfb_softc *sc;
	struct wsdisplay_fbinfo *wsfbi;
	struct vcons_screen *ms;

	vd = v;
	sc = vd->cookie;
	ms = vd->active;

	switch (cmd) {
	case WSDISPLAYIO_GTYPE:
		*(u_int *)data = WSDISPLAY_TYPE_PCIMISC;
		return 0;

	case PCI_IOC_CFGREAD:
	case PCI_IOC_CFGWRITE:
		return pci_devioctl(sc->sc_pc, sc->sc_pcitag,
		    cmd, data, flag, l);

	case WSDISPLAYIO_GET_BUSID:
		return wsdisplayio_busid_pci(sc->sc_dev, sc->sc_pc,
		    sc->sc_pcitag, data);

	case WSDISPLAYIO_GINFO:
		if (ms == NULL)
			return ENODEV;

		wsfbi = (void*) data;
		wsfbi->height = ms->scr_ri.ri_height;
		wsfbi->width = ms->scr_ri.ri_width;
		wsfbi->depth = ms->scr_ri.ri_depth;	
		wsfbi->cmsize = 256;
		return 0;

	case WSDISPLAYIO_LINEBYTES:
		*(u_int*)data = sc->sc_linebytes;
		return 0;
	
	case WSDISPLAYIO_SMODE: 
		{
			int new_mode = *(int*)data;
			if (new_mode != sc->sc_mode) {
				sc->sc_mode = new_mode;
				if(new_mode == WSDISPLAYIO_MODE_EMUL) 
					vcons_redraw_screen(ms);
			}		
			return 0;
		}
	case WSDISPLAYIO_GET_FBINFO:
		{
			struct wsdisplayio_fbinfo *fbi = data;
			struct rasops_info *ri;
			int ret;

			ri = &sc->vd.active->scr_ri;
			ret = wsdisplayio_get_fbinfo(ri, fbi);
			return ret;
		}
	}	
	return EPASSTHROUGH;	
}

static paddr_t
tdvfb_mmap(void *v, void *vs, off_t offset, int prot)
{
	struct vcons_data *vd;
	struct tdvfb_softc *sc;
	paddr_t pa;

	vd = v;
	sc = vd->cookie;

	if (offset < sc->sc_memsize) {
		pa = bus_space_mmap(sc->sc_cvgt, sc->sc_fbh + offset, 0, prot,
		    BUS_SPACE_MAP_LINEAR);
		return pa;
	}

	return -1;
}

