/*
 *   (C) Copyright IBM Corp. 2001, 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: md_super.h
 */

#ifndef _MD_SUPERBLOCK_H_
#define _MD_SUPERBLOCK_H_

// Datatypes and Defs used in mdregmgr.c
#define SECTOR_SIZE_BITS            9
#define PAGE_SIZE_SECTORS           16      // for 8k pages
#define PE_SIZE_ALIGNMENT_SECTORS   128     // for 64k aligned PEs
#define MD_NAME_SPACE               EVMS_DEV_NODE_PATH "md"
#define MD_DEV_DIRECTORY            "md/"
#define MAX_MD_MINORS               255
#define MD_MIN_CHUNK_SIZE           4       // in kilobytes
#define MD_MAX_CHUNK_SIZE           4096    // in kilobytes = 4MB
#define MD_MAX_CHUNK_SIZE_BYTES     (MD_MAX_CHUNK_SIZE*1024)
#define MD_DEFAULT_CHUNK_SIZE       32      // in kilobytes
#define BLOCK_SIZE                  1024    // needed by md size macros


/*
 * RAID superblock.
 *
 * The RAID superblock maintains some statistics on each RAID configuration.
 * Each real device in the RAID set contains it near the end of the device.
 * Some of the ideas are copied from the ext2fs implementation.
 *
 * We currently use 4096 bytes as follows:
 *
 *	word offset	function
 *
 *	   0  -    31	Constant generic RAID device information.
 *        32  -    63   Generic state information.
 *	  64  -   127	Personality specific information.
 *	 128  -   511	12 32-words descriptors of the disks in the raid set.
 *	 512  -   911	Reserved.
 *	 912  -  1023	Disk specific descriptor.
 */

/*
 * If x is the real device size in bytes, we return an apparent size of:
 *
 *	y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES
 *
 * and place the 4kB superblock at offset y.
 */
#define MD_RESERVED_BYTES		(64 * 1024)
#define MD_RESERVED_SECTORS		(MD_RESERVED_BYTES / 512)
#define MD_RESERVED_BLOCKS		(MD_RESERVED_BYTES / BLOCK_SIZE)

#define MD_NEW_SIZE_SECTORS(x)		(((x) & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS)
#define MD_NEW_SIZE_BLOCKS(x)		(((x) & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS)

#define MD_SB_BYTES			4096
#define MD_SB_WORDS			(MD_SB_BYTES / 4)
#define MD_SB_BLOCKS			(MD_SB_BYTES / BLOCK_SIZE)
#define MD_SB_SECTORS			(MD_SB_BYTES / 512)

/*
 * The following are counted in 32-bit words
 */
#define	MD_SB_GENERIC_OFFSET		0
#define MD_SB_PERSONALITY_OFFSET	64
#define MD_SB_DISKS_OFFSET		128
#define MD_SB_DESCRIPTOR_OFFSET		992

#define MD_SB_GENERIC_CONSTANT_WORDS	32
#define MD_SB_GENERIC_STATE_WORDS	32
#define MD_SB_GENERIC_WORDS		(MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS)
#define MD_SB_PERSONALITY_WORDS		64
#define MD_SB_DESCRIPTOR_WORDS		32
#define MD_SB_DISKS			27
#define MD_SB_DISKS_WORDS		(MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS)
#define MD_SB_RESERVED_WORDS		(1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS)
#define MD_SB_EQUAL_WORDS		(MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS)


/*
 * Device "operational" state bits
 */
#define MD_DISK_FAULTY		0 /* disk is faulty / operational */
#define MD_DISK_ACTIVE		1 /* disk is running or spare disk */
#define MD_DISK_SYNC		2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED		3 /* disk has kind of been removed, but not really or it would not be here */

typedef struct mdp_device_descriptor_s {
	u_int32_t number;	/* 0 Device number in the entire set	      */
	u_int32_t major;	/* 1 Device major number		      */
	u_int32_t minor;	/* 2 Device minor number		      */
	u_int32_t raid_disk;	/* 3 The role of the device in the raid set   */
	u_int32_t state;	/* 4 Operational state			      */
	u_int32_t reserved[MD_SB_DESCRIPTOR_WORDS - 5];
} mdp_disk_t;

static inline int disk_faulty(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_FAULTY);
}

static inline int disk_active(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_ACTIVE);
}

static inline int disk_sync(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_SYNC);
}

static inline int disk_spare(mdp_disk_t * d)
{
	return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
}

static inline int disk_removed(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_REMOVED);
}

static inline void mark_disk_faulty(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_FAULTY);
}

static inline void mark_disk_active(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_ACTIVE);
}

static inline void mark_disk_sync(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_SYNC);
}

static inline void mark_disk_spare(mdp_disk_t * d)
{
	d->state = 0;
}

static inline void mark_disk_removed(mdp_disk_t * d)
{
	d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
}

static inline void mark_disk_inactive(mdp_disk_t * d)
{
	d->state &= ~(1 << MD_DISK_ACTIVE);
}

static inline void mark_disk_nonsync(mdp_disk_t * d)
{
	d->state &= ~(1 << MD_DISK_SYNC);
}

static inline int descriptor_removed(mdp_disk_t *disk) {
	if ( (disk->major == 0) &&  (disk->minor == 0) &&
	     (disk->state & (1 << MD_DISK_REMOVED)))
		return 1;
	else
		return 0;

}

static inline int descriptor_empty(mdp_disk_t *d) {
	return ( d->number == 0 && 
		 d->major == 0 && 
		 d->minor == 0 && 
		 d->raid_disk == 0 
		 && !disk_faulty(d) 
		 && !disk_active(d));
}

static inline void remove_descriptor(mdp_disk_t *d) {
	d->major = d->minor = 0;
	d->state = (1 << MD_DISK_REMOVED);
}

#define MD_SB_MAGIC		0xa92b4efc

/*
 * Superblock state bits
 */
#define MD_SB_CLEAN		0
#define MD_SB_ERRORS		1

typedef struct mdp_superblock_s {
	/*
	 * Constant generic information
	 */
	u_int32_t md_magic;		/*  0 MD identifier 			      */
	u_int32_t major_version;	/*  1 major version to which the set conforms */
	u_int32_t minor_version;	/*  2 minor version ...			      */
	u_int32_t patch_version;	/*  3 patchlevel version ...		      */
	u_int32_t gvalid_words;		/*  4 Number of used words in this section    */
	u_int32_t set_uuid0;		/*  5 Raid set identifier		      */
	u_int32_t ctime;		/*  6 Creation time			      */
	u_int32_t level;		/*  7 Raid personality			      */
	u_int32_t size;			/*  8 Apparent size of each individual disk   */
	u_int32_t nr_disks;		/*  9 total disks in the raid set	      */
	u_int32_t raid_disks;		/* 10 disks in a fully functional raid set    */
	u_int32_t md_minor;		/* 11 preferred MD minor device number	      */
	u_int32_t not_persistent;	/* 12 does it have a persistent superblock    */
	u_int32_t set_uuid1;		/* 13 Raid set identifier #2		      */
	u_int32_t set_uuid2;		/* 14 Raid set identifier #3		      */
	u_int32_t set_uuid3;		/* 15 Raid set identifier #4		      */
	u_int32_t gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16];

	/*
	 * Generic state information
	 */
	u_int32_t utime;		/*  0 Superblock update time		      */
	u_int32_t state;		/*  1 State bits (clean, ...)		      */
	u_int32_t active_disks;		/*  2 Number of currently active disks	      */
	u_int32_t working_disks;	/*  3 Number of working disks		      */
	u_int32_t failed_disks;		/*  4 Number of failed disks		      */
	u_int32_t spare_disks;		/*  5 Number of spare disks		      */
	u_int32_t sb_csum;		/*  6 checksum of the whole superblock        */
#if __BYTE_ORDER == __BIG_ENDIAN
	u_int32_t events_hi;		/*  7 high-order of superblock update count   */
	u_int32_t events_lo;		/*  8 low-order of superblock update count    */
#else
	u_int32_t events_lo;		/*  7 low-order of superblock update count    */
	u_int32_t events_hi;		/*  8 high-order of superblock update count   */
#endif
	u_int32_t gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];

	/*
	 * Personality information
	 */
	u_int32_t layout;		/*  0 the array's physical layout	      */
	u_int32_t chunk_size;		/*  1 chunk size in bytes		      */
	u_int32_t root_pv;		/*  2 LV root PV */
	u_int32_t root_block;		/*  3 LV root block */
	u_int32_t pstate_reserved[MD_SB_PERSONALITY_WORDS - 4];

	/*
	 * Disks information
	 */
	mdp_disk_t disks[MD_SB_DISKS];

	/*
	 * Reserved
	 */
	u_int32_t reserved[MD_SB_RESERVED_WORDS];

	/*
	 * Active descriptor
	 */
	mdp_disk_t this_disk;

}mdp_super_t;

#define MD_SB0_SAVED_INFO_SECTOR_OFFSET	(MD_RESERVED_SECTORS - MD_SAVED_INFO_SECTS)


/*--------- MD superblock 1 format ------------*/
#define MD_SB_1_BYTES 1024
#define MD_SB_1_SECTORS MD_SB_1_BYTES/512
#define MD_SB_1_DISKS 384

#define MD_SB1_SAVED_INFO_SECTOR_OFFSET      MD_SB_1_SECTORS

/*
 * The version-1 superblock :
 * All numeric fields are little-endian.
 *
 * total size: 256 bytes plus 2 per device.
 *  1K allows 384 devices.
 */
typedef struct mdp_superblock_1 {
	/* constant array information - 128 bytes */
	u_int32_t	magic;		/* MD_SB_MAGIC: 0xa92b4efc - little endian */
	u_int32_t	major_version;	/* 1 */
	u_int32_t	feature_map;	/* 0 for now */
	u_int32_t	pad0;		/* always set to 0 when writing */

	u_int8_t	set_uuid[16];	/* user-space generated. */
	char	set_name[32];		/* set and interpreted by user-space */

	u_int64_t	ctime;		/* lo 40 bits are seconds, top 24 are microseconds or 0*/
	u_int32_t	level;		/* -4 (multipath), -1 (linear), 0,1,4,5 */
	u_int32_t	layout;		/* only for raid5 currently */
	u_int64_t	size;		/* used size of component devices, in 512byte sectors */

	u_int32_t	chunksize;	/* in 512byte sectors */
	u_int32_t	raid_disks;
	u_int8_t	pad1[128-96];	/* set to 0 when written */

	/* constant this-device information - 64 bytes */
	u_int64_t	data_offset;	/* sector start of data, often 0 */
	u_int64_t	data_size;	/* sectors in this device that can be used for data */
	u_int64_t	super_offset;	/* sector start of this superblock */
	u_int64_t	recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
	u_int32_t	dev_number;	/* permanent identifier of this  device - not role in raid */
	u_int32_t	cnt_corrected_read; /* number of read errors that were corrected by re-writing */
	u_int8_t	device_uuid[16]; /* user-space setable, ignored by kernel */
	u_int8_t	pad2[64-56];	/* set to 0 when writing */

	/* array state information - 64 bytes */
	u_int64_t	utime;		/* 40 bits second, 24 btes microseconds */
	u_int64_t	events;		/* incremented when superblock updated */
	u_int64_t	resync_offset;	/* data before this offset (from data_offset) known to be in sync */
	u_int32_t	sb_csum;	/* checksum upto devs[max_dev] */
	u_int32_t	max_dev;	/* size of devs[] array to consider */
	u_int8_t	pad3[64-32];	/* set to 0 when writing */

	/* device state information. Indexed by dev_number.
	 * 2 bytes per device
	 * Note there are no per-device state flags. State information is rolled
	 * into the 'roles' value.  If a device is spare or faulty, then it doesn't
	 * have a meaningful role.
	 */
	u_int16_t	dev_roles[0];	/* role in array, or 0xffff for a spare, or 0xfffe for faulty */
}mdp_sb_1_t;

struct super_func {
	int (*activate_spare)(md_member_t *member);
	int (*add_new_disk)(md_member_t *member);
	void *(*allocate_sb)(void);
	int (*analyze_sb)(md_volume_t *vol);
	u_int64_t (*calc_volume_size)(md_volume_t *vol);
	int (*duplicate_sb)(void **target, void *src);
	int (*find_empty_slot)(void *super, u_int32_t *available_slot);
	u_int64_t (*get_event)(void *sb);
	int (*get_dev_number)(void *sb);
	int (*get_level)(void *sb);
	void (*get_name)(char *name, void *sb);
	void (*get_sb_disk_info)(md_member_t *member, mdu_disk_info_t *info);
	void (*get_sb_disk_info_for_index)(void *sb, mdu_disk_info_t *info);
	void (*get_sb_info)(void *sb, md_super_info_t *info);
	md_array_info_t * (*get_disk_array_info)(void *sb);
	void (*increment_events)(void *sb);
	int (*init_sb)(void *sb, int md_minor, int level, u_int32_t layout,
		       u_int64_t size, u_int32_t chunk_size);
	void (*load_this_device_info)(md_member_t *member);
	int (*mark_disk_faulty)(md_member_t *member, boolean mark_removed);
	int (*mark_disk_missing)(md_volume_t *vol, int dev_number);
	int (*max_disks) (void);
	int (*read_saved_info)(md_member_t *member);
	int (*remove_disk)(md_member_t *member, boolean resize);
	int (*replace_disk)(md_member_t *member, storage_object_t *obj);
	boolean (*same_uuid)(void *sb1, void *sb2);
	int (*set_raid_disks)(void *sb, int raid_disks);
	void (*set_sb_info)(void *sb, md_super_info_t *info);
	void (*set_this_device_info)(md_member_t *member);
	void (*set_this_device_state)(md_member_t *member);
	void (*set_utime)(void *sb);
	int (*write_sb)(md_member_t *member);
	int (*write_saved_info)(md_member_t *member);
	int (*zero_superblock)(md_member_t *member, boolean now);
	int (*zero_saved_info)(md_member_t *member, boolean now);
};

#define MAX_DISKS(vol) (((md_volume_t *)vol)->sb_func->max_disks())

int md_read_sb0(storage_object_t *obj, void **super);
int md_read_sb1(storage_object_t *obj, void **super, md_sb_ver_t *sb_ver);

/* Functions for creating new MD regions */
int md_init_sb(md_volume_t *vol,
	       md_sb_ver_t *sb_ver,
	       int level,
	       u_int32_t layout,
	       u_int64_t size,
	       u_int32_t chunksize );
int md_volume_add_new_member(md_volume_t *vol, md_member_t *member);
int md_volume_add_new_missing(md_volume_t *vol);

//void md_print_sb(char *buf, u_int32_t buf_size, md_volume_t *vol);

/* Functions for writing MD superblocks */
int md_write_sbs_to_disk(md_volume_t * vol);

/* Functions for deleting MD superblocks */
static inline int md_zero_superblock(md_member_t *member, boolean now) {
	return member->vol->sb_func->zero_superblock(member, now);
}

static inline int md_zero_saved_info(md_member_t *member, boolean now) {
	return member->vol->sb_func->zero_saved_info(member, now);
}

static inline boolean md_same_uuid(struct super_func *sb_func, void *sb1, void *sb2) {
	return sb_func->same_uuid(sb1, sb2);
}

static inline u_int64_t md_get_event(struct super_func *sb_func, void *sb) {
	return sb_func->get_event(sb);
}

static inline u_int64_t md_volume_calc_size(md_volume_t *vol) {
	if (vol->flags & MD_CORRUPT) {
		return 0;
	} else {
		return vol->sb_func->calc_volume_size(vol);
	}
}

int md_volume_set_master_sb(md_volume_t *vol, md_sb_ver_t *sb_ver, void *sb);

int md_member_set_sb(md_member_t *member, void *sb);
int md_member_get_disk_major(md_member_t *member);
int md_member_get_disk_minor(md_member_t *member);
int md_member_get_raid_disk(md_member_t *member);

void md_volume_set_name(md_volume_t *vol, const char *name);

int md_read_saved_info(md_member_t *member);
static inline int md_write_saved_info(md_member_t *member)
{
	return member->vol->sb_func->write_saved_info(member);
}

static inline void md_volume_get_super_info(md_volume_t *vol, md_super_info_t *super_info)
{
	vol->sb_func->get_sb_info(vol->sb, super_info);
}

static inline void md_member_get_super_info(md_member_t *member, md_super_info_t *super_info)
{
	member->vol->sb_func->get_sb_info(member->sb, super_info);
}

u_int64_t md_object_usable_size(storage_object_t *obj, md_sb_ver_t *sb_ver, u_int32_t chunksize);

static inline int md_volume_find_empty_slot(md_volume_t *vol, u_int32_t *available_slot)
{
	return vol->sb_func->find_empty_slot(vol->sb, available_slot);
}


#endif

