/*
 * pfmon_symbols.c  - management of symbol tables
 *
 * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA
 */
#include "pfmon.h"

#include <ctype.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <libelf.h>

#ifdef CONFIG_PFMON_DEMANGLE

#include <libiberty.h>
/*
 * symbol name demangling is implemented by libiberty but it is not
 * exposed in the header file
 */
#ifndef DMGL_ANSI

#define DMGL_NO_OPTS     0              /* For readability... */
#define DMGL_PARAMS      (1 << 0)       /* Include function args */
#define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
#define DMGL_JAVA        (1 << 2)       /* Demangle as Java rather than C++. */
#define DMGL_VERBOSE     (1 << 3)       /* Include implementation details.  */
#define DMGL_TYPES       (1 << 4)       /* Also try to demangle type encodings.  */
#define DMGL_RET_POSTFIX (1 << 5)       /* Print function return types (when present) after function signature */

#define DMGL_AUTO        (1 << 8)
#define DMGL_GNU         (1 << 9)
#define DMGL_LUCID       (1 << 10)
#define DMGL_ARM         (1 << 11)
#define DMGL_HP          (1 << 12)       /* For the HP aCC compiler;
                                            same as ARM except for
                                       template arguments, etc. */
#define DMGL_EDG         (1 << 13)
#define DMGL_GNU_V3      (1 << 14)
#define DMGL_GNAT        (1 << 15)

extern char *cplus_demangle(char *, int);

#endif
#endif /* CONFIG_PFMON_DEMANGLE */

#define PFMON_KALLSYMS		"/proc/kallsyms"

#ifndef ELF_ST_TYPE
#define ELF_ST_TYPE(val)         ((val) & 0xF)
#endif

#define for_each_sym_module(m)	
struct sym_hash_table_t;

#define PFMON_MAX_SECTIONS	2

typedef struct _sym_hash_data {
	unsigned long		addr;
	unsigned long		eaddr;
	char			*name;
	char			*module;
} sym_hash_data_t;

#define PFMON_SYMS_PERSISTENT	0x1		/* ssymbol list persitent across all binaries */


typedef struct {
	unsigned long	map_start;
	unsigned long	map_offset;
	size_t		map_size;
} map_desc_t;

static pfmon_syms_list_t kernel_syms;

static int
symcmp(const void *a, const void *b)
{
	symbol_t *ap = (symbol_t *)a;
	symbol_t *bp = (symbol_t *)b;

	return ap->value > bp->value;
}

static int
read_sym64(Elf *elf, const char *filename, module_symbols_t *mod_sym, pfmon_sym_type_t sym_type, map_desc_t *map)
{
	Elf_Data *data;
	Elf64_Shdr *hdr;
	Elf64_Phdr *phdrs, *p;
	Elf64_Ehdr *ehdr;
	Elf64_Addr start = 0, end = 0;
	Elf_Scn *sections[PFMON_MAX_SECTIONS], *section;
	Elf_Scn *strsym_section;
	Elf64_Sym *symtab_data;
	symbol_t *symbol_tab;
	void *d_buf;
	char *str, *s_tmp;
	Elf64_Half phnums, k;
	unsigned long table_size, fpos, value, num_scans = 0;
	size_t strsym_index;
	unsigned int num_sections, use_text = 0, use_data = 0 ;
	unsigned int s, i, j;
	int type;

	ehdr = elf64_getehdr(elf);
	if (ehdr == NULL) {
		warning("no elf header\n");
		return -1;
	}
	phnums = ehdr->e_phnum;
	phdrs  = elf64_getphdr(elf);

	/* first find the symbol table and the dynamic symbol table */
	sections[0] = sections[1] = section = NULL;
	num_sections = 0;

	if (sym_type == PFMON_TEXT_SYMBOL) 
		use_text = 1;
	else if (sym_type == PFMON_DATA_SYMBOL) 
		use_data = 1;
	else {
		warning("invalid sym_type %d\n", sym_type);
		return -1;
	}
	while ((section = elf_nextscn(elf, section)) != 0 && num_sections < PFMON_MAX_SECTIONS) {

		hdr = elf64_getshdr(section);
		if (hdr == NULL) {
			warning("cannot get section header\n");
			return -1;
		}

		/* is this the symbol table? no DYNAMIC? */
		if (hdr->sh_type == SHT_SYMTAB || hdr->sh_type == SHT_DYNSYM) {
			sections[num_sections++] = section;
		} 
	}

	if (num_sections == 0) {
		vbprintf("no symbol table found for %s\n", filename);
		return -1;
	}

	if (num_sections > PFMON_MAX_SECTIONS) {
		vbprintf("too many symbol tables for %s\n", filename);
		return -1;
	}
	/*
	 * phase 1: figure out max size
	 */
	table_size = 0;
	for(s = 0; s < num_sections; s++) {

		section = sections[s];
		hdr     = elf64_getshdr(section);


		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(section, NULL); 
		if (data == NULL) {
			warning("cannot extract raw elf data for symbol table\n");
			return -1;
		}
		table_size  += hdr->sh_size/hdr->sh_entsize;
	}
	/*
	 * phase 2: allocate symbol_tab
	 *
	 * XXX: we allocate more than we need here because fo the split text/data
	 */
	symbol_tab = malloc(table_size * sizeof(symbol_t));
	if (symbol_tab == NULL) {
		vbprintf("cannot allocate space for symbol table\n");
		return -1;
	}

	/*
	 * phase 3: actual parsing of the tables
	 */
	j = 0;
	for(s = 0; s < num_sections; s++) {

		section = sections[s];
		hdr     = elf64_getshdr(section);

		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(section, NULL); 
		if (data == NULL) {
			warning("cannot extract raw elf data for symbol table\n");
			return -1;
		}
		symtab_data = (Elf64_Sym *)data->d_buf;

		/*
		 * only this table size now!
		 */
		table_size  = hdr->sh_size/hdr->sh_entsize;

		/* get the string table */
		strsym_index   = hdr->sh_link;
		strsym_section = elf_getscn(elf, strsym_index);

		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(strsym_section, NULL); 
		if (data == NULL) {
			vbprintf("cannot extract raw elf data for string section\n");
			return -1;
		}
		d_buf = data->d_buf;
	
		p = NULL;

		for (i = 0; i < table_size; i++) {

			type  = ELF_ST_TYPE(symtab_data[i].st_info);
			str   = d_buf + symtab_data[i].st_name;
			value = symtab_data[i].st_value;

			/*
			 * dicard if wrong type
			 */
			if ((type != STT_FUNC && use_text) || (type != STT_OBJECT && type != STT_COMMON && use_data)) 
				continue;
			/*
			 * discard undefined symbols
			 */
			if (symtab_data[i].st_shndx == SHN_UNDEF) {
				//printf("undef %s\n", str);
				continue;
			}

			/*
			 * discard no name symbols
			 */
			if (str == d_buf|| *str == '\0') continue;

			if (map) {
				/*
				 * shortcut: try reuse previous phdrs
				 */
				if (p && value >= start && value < end) goto skip_phdrs;

				num_scans++;
				for(k=0, p = phdrs; k < phnums; k++, p++) {
					if ((p->p_type & PT_LOAD) == 0) continue;
					start = p->p_vaddr;
					end   = p->p_vaddr + p->p_memsz;
					if (value >= start && value < end) break;
				}
				/*
				 * that happens for dynamic symbols
				 */
				if (k == phnums) {
					/*
					 * required for shortcut
					 */
					p = NULL;
					continue;
				}
skip_phdrs:
				fpos = p->p_offset + value - start;

				if (fpos < map->map_offset || (fpos-map->map_offset) >= map->map_size)
					continue;

				value = map->map_start + (fpos-map->map_offset);
				//printf("%20.20s @0x%lx num_scan=%lu\n", symbol_tab[j].name, value, num_scans);
			}
			s_tmp = NULL;
#ifdef CONFIG_PFMON_DEMANGLE
			if (options.opt_dem_type) {
				int flg;
				flg = options.opt_dem_type == 2 ?  DMGL_JAVA : 0;
				s_tmp = cplus_demangle(data->d_buf + symtab_data[i].st_name, DMGL_TYPES|DMGL_ANSI|flg);
			} 
#endif
			if (s_tmp == NULL)
				s_tmp  = strdup(data->d_buf + symtab_data[i].st_name);
			symbol_tab[j].name  = s_tmp;
			symbol_tab[j].value = value;
			symbol_tab[j].size  = symtab_data[i].st_size;
			symbol_tab[j].type  = sym_type;
			j++;
		}
	}

	if (j) {
		/*
		 * sort symbols by address, necessary to speed up search
		 */
		qsort(symbol_tab, j, sizeof(symbol_t), symcmp);

		mod_sym->symbol_tab = symbol_tab;
		mod_sym->min_addr   = symbol_tab[0].value;
		mod_sym->nsyms      = j;
	} else {
		free(symbol_tab);
		mod_sym->nsyms = 0;
	}
	vbprintf("table_size=%lu effective=%lu min_addr=0x%lx\n", table_size, j, mod_sym->min_addr);
	return 0;
}

static int
read_sym32(Elf *elf, const char *filename, module_symbols_t *mod_sym, pfmon_sym_type_t sym_type, map_desc_t *map)
{
	Elf_Data *data;
	Elf32_Shdr *hdr;
	Elf32_Phdr *phdrs, *p;
	Elf32_Ehdr *ehdr;
	Elf32_Addr start = 0, end = 0;
	Elf_Scn *sections[PFMON_MAX_SECTIONS], *section;
	Elf_Scn *strsym_section;
	Elf32_Sym *symtab_data;
	symbol_t *symbol_tab;
	void *d_buf;
	char *str, *s_tmp;
	Elf32_Half phnums, k;
	unsigned long table_size, fpos, value, num_scans = 0;
	size_t strsym_index;
	unsigned int num_sections, use_text = 0, use_data = 0 ;
	unsigned int s, i, j;
	int type;

	ehdr = elf32_getehdr(elf);
	if (ehdr == NULL) {
		warning("no elf header\n");
		return -1;
	}
	phnums = ehdr->e_phnum;
	phdrs  = elf32_getphdr(elf);

	/* first find the symbol table and the dynamic symbol table */
	sections[0] = sections[1] = section = NULL;
	num_sections = 0;

	if (sym_type == PFMON_TEXT_SYMBOL) 
		use_text = 1;
	else if (sym_type == PFMON_DATA_SYMBOL) 
		use_data = 1;
	else {
		warning("invalid sym_type %d\n", sym_type);
		return -1;
	}
	while ((section = elf_nextscn(elf, section)) != 0 && num_sections < PFMON_MAX_SECTIONS) {

		hdr = elf32_getshdr(section);
		if (hdr == NULL) {
			warning("cannot get section header\n");
			return -1;
		}

		/* is this the symbol table? no DYNAMIC? */
		if (hdr->sh_type == SHT_SYMTAB || hdr->sh_type == SHT_DYNSYM) {
			sections[num_sections++] = section;
		} 
	}

	if (num_sections == 0) {
		vbprintf("no symbol table found for %s\n", filename);
		return -1;
	}

	if (num_sections > PFMON_MAX_SECTIONS) {
		vbprintf("too many symbol tables for %s\n", filename);
		return -1;
	}
	/*
	 * phase 1: figure out max size
	 */
	table_size = 0;
	for(s = 0; s < num_sections; s++) {

		section = sections[s];
		hdr     = elf32_getshdr(section);


		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(section, NULL); 
		if (data == NULL) {
			warning("cannot extract raw elf data for symbol table\n");
			return -1;
		}
		table_size  += hdr->sh_size/hdr->sh_entsize;
	}
	/*
	 * phase 2: allocate symbol_tab
	 *
	 * XXX: we allocate more than we need here because fo the split text/data
	 */
	symbol_tab = malloc(table_size * sizeof(symbol_t));
	if (symbol_tab == NULL) {
		vbprintf("cannot allocate space for symbol table\n");
		return -1;
	}

	/*
	 * phase 3: actual parsing of the tables
	 */
	j = 0;
	for(s = 0; s < num_sections; s++) {

		section = sections[s];
		hdr     = elf32_getshdr(section);

		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(section, NULL); 
		if (data == NULL) {
			warning("cannot extract raw elf data for symbol table\n");
			return -1;
		}
		symtab_data = (Elf32_Sym *)data->d_buf;

		/*
		 * only this table size now!
		 */
		table_size  = hdr->sh_size/hdr->sh_entsize;

		/* get the string table */
		strsym_index   = hdr->sh_link;
		strsym_section = elf_getscn(elf, strsym_index);

		/* 
		 * use elf_rawdata since there is no memory image of this data
		 */
		data = elf_rawdata(strsym_section, NULL); 
		if (data == NULL) {
			vbprintf("cannot extract raw elf data for string section\n");
			return -1;
		}
		d_buf = data->d_buf;
	
		p = NULL;

		for (i = 0; i < table_size; i++) {

			type  = ELF_ST_TYPE(symtab_data[i].st_info);
			str   = d_buf + symtab_data[i].st_name;
			value = symtab_data[i].st_value;

			/*
			 * dicard if wrong type
			 */
			if ((type != STT_FUNC && use_text) || (type != STT_OBJECT && type != STT_COMMON && use_data)) 
				continue;
			/*
			 * discard undefined symbols
			 */
			if (symtab_data[i].st_shndx == SHN_UNDEF) {
				//printf("undef %s\n", str);
				continue;
			}

			/*
			 * discard no name symbols
			 */
			if (str == d_buf|| *str == '\0') continue;

			if (map) {
				/*
				 * shortcut: try reuse previous phdrs
				 */
				if (p && value >= start && value < end) goto skip_phdrs;

				num_scans++;
				for(k=0, p = phdrs; k < phnums; k++, p++) {
					if ((p->p_type & PT_LOAD) == 0) continue;
					start = p->p_vaddr;
					end   = p->p_vaddr + p->p_memsz;
					if (value >= start && value < end) break;
				}
				/*
				 * that happens for dynamic symbols
				 */
				if (k == phnums) {
					/*
					 * required for shortcut
					 */
					p = NULL;
					continue;
				}
skip_phdrs:
				fpos = p->p_offset + value - start;

				if (fpos < map->map_offset || (fpos-map->map_offset) >= map->map_size)
					continue;

				value = map->map_start + (fpos-map->map_offset);
				//printf("%20.20s @0x%lx num_scan=%lu\n", symbol_tab[j].name, value, num_scans);
			}
			s_tmp = NULL;
#ifdef CONFIG_PFMON_DEMANGLE
			if (options.opt_dem_type) {
				int flg;
				flg = options.opt_dem_type == 2 ?  DMGL_JAVA : 0;
				s_tmp = cplus_demangle(data->d_buf + symtab_data[i].st_name, DMGL_TYPES|DMGL_ANSI|flg);
			} 
#endif
			if (s_tmp == NULL)
				s_tmp  = strdup(data->d_buf + symtab_data[i].st_name);
			symbol_tab[j].name  = s_tmp;
			symbol_tab[j].value = value;
			symbol_tab[j].size  = symtab_data[i].st_size;
			symbol_tab[j].type  = sym_type;
			j++;
		}
	}

	if (j) {
		/*
		 * sort symbols by address, necessary to speed up search
		 */
		qsort(symbol_tab, j, sizeof(symbol_t), symcmp);

		mod_sym->symbol_tab = symbol_tab;
		mod_sym->min_addr   = symbol_tab[0].value;
		mod_sym->nsyms      = j;
	} else {
		free(symbol_tab);
		mod_sym->nsyms = 0;
	}
	vbprintf("table_size=%lu effective=%lu min_addr=0x%lx\n", table_size, j, mod_sym->min_addr);
	return 0;
}

static void
add_syms_module(module_symbols_t *p, pfmon_syms_list_t *list)
{
	module_symbols_t **ll, *l, *prev;
	unsigned long min_addr;

	min_addr = p->min_addr;
	if (p->sym_type == PFMON_TEXT_SYMBOL) {
		ll = (module_symbols_t **)&list->code_syms;
	} else {
		ll = (module_symbols_t **)&list->data_syms;
	}
	/*
	 * find position to insert. modules must be sorted
	 * by address.
	 * There is no overlapping of address ranges, therefore
	 * checking on min_addr is sufficient
	 */
	for(prev = NULL, l = *ll; l ; l = l->next) {
		if (l->min_addr > min_addr) break;
		prev = l;
	}
	/*
	 * XXX: not thread-safe
	 */
	if (prev) {
		prev->next = p;
		p->next    = l;
	} else {
		p->next = *ll;
		*ll = p;
	}
}

static int
load_elf_symbols(const char *filename, module_symbols_t *mod_sym, pfmon_sym_type_t sym_type, map_desc_t *map)
{
	Elf *elf;
	char *eident;
	int fd;

	if (filename == NULL) return -1;

	DPRINT(("filename=%s\n", filename));

	fd = open(filename, O_RDONLY);
	if (fd == -1) {
		vbprintf("symbol file for %s not found\n", filename);
		return -1;
	}

  	/* initial call to set internal version value */
	if (elf_version(EV_CURRENT) == EV_NONE) {
		DPRINT(("ELF library out of date"));
		close(fd);
		return -1;
	}

  	/* prepare to read the entire file */
	elf = elf_begin(fd, ELF_C_READ, NULL);
	if (elf == NULL) {
		DPRINT(("cannot read %s\n", filename));
		close(fd);
		return -1;
	}

	/* error checking */
	if (elf_kind(elf) != ELF_K_ELF) {
		DPRINT(("%s is not an ELF file\n", filename));
		close(fd);
		return -1;
	}
  
	eident = elf_getident(elf, NULL);
	if (eident[EI_MAG0] != ELFMAG0
	    || eident[EI_MAG1] != ELFMAG1
	    || eident[EI_MAG2] != ELFMAG2
	    || eident[EI_MAG3] != ELFMAG3) {
		DPRINT(("invalid ELF magic in %s\n", filename));
	}

	switch (eident[EI_CLASS]) {
  		case ELFCLASS32:
			if (read_sym32(elf, filename, mod_sym, sym_type, map)) {
				DPRINT(("cannot extract symbols from %s\n", filename));
				close(fd);
				return -1;
			}
			break;
		case ELFCLASS64:
			if (read_sym64(elf, filename, mod_sym, sym_type, map)) {
				DPRINT(("cannot extract symbols from %s\n", filename));
				close(fd);
				return -1;
			}
    			break;
    		default:
    			DPRINT(("unsupported ELF class for %s\n", filename));
			close(fd);
			return -1;
	}
	close(fd);
	return 0;
}

static char *
place_str(unsigned long length)
{
	static char *current_free, *current_end;
	char *tmp;
#define STR_CHUNK_SIZE	options.page_size
	if (length >= STR_CHUNK_SIZE)
		fatal_error("sysmap load string is too long\n");

	/*
	 * XXX: that's bad, we do not keep track of previously allocated
	 * chunks, so we cannot free!
	 */
	if (current_free == NULL || (current_end-current_free) < length) {
		current_free = (char *)malloc(STR_CHUNK_SIZE);
		if (current_free == NULL) return NULL;
		current_end = current_free + STR_CHUNK_SIZE;
	}
	tmp = current_free;
	current_free += length;
	return tmp;
}

/*
 * load kernel symbols using /proc/kallsyms.
 * This file does not contains kernel data symbols but includes code/data
 * symbols from modules. Code symbol size is not provided.
 */
static int
load_kallsyms_symbols(pfmon_syms_list_t *list)
{
#define PFMON_KALLSYMS_SYMBOLS	20000
#define PFMON_KALLSYMS_MAXLEN	256

	FILE *fp;
	unsigned long min_addr = 0UL;
	symbol_t *tmp;
	char *s, *str_addr, *sym_start, *mod_start, *endptr;
	module_symbols_t *mod_sym_code, *mod_sym_data;
	unsigned long line = 1UL;
	unsigned long symtab_old_size = 0, sym_len, mod_len;
	int need_sorting = 0;
	size_t sz, bufsize;
	char *line_str = NULL;
	char *bigbuf = NULL;
	char addr_str[24]; /* cannot be more than 16+2 (for 0x) */
	int type;
	struct {
		symbol_t *symbol_tab;
		unsigned long idx;
		unsigned long sym_count;
	} stab[2], *st = NULL; 

	mod_sym_code = list->code_syms;
	mod_sym_data = list->data_syms;

	fp = fopen(PFMON_KALLSYMS, "r");
	if (fp == NULL) {
		DPRINT(("file %s not found\n", PFMON_KALLSYMS));
		return -1;
	}

	/*
	 * allocate a default-sized symbol table 
	 */
	stab[0].sym_count = PFMON_KALLSYMS_SYMBOLS;
	stab[0].idx       = 0;
	stab[0].symbol_tab = (symbol_t *)malloc(PFMON_KALLSYMS_SYMBOLS*sizeof(symbol_t));
	if (stab[0].symbol_tab == NULL) {
		DPRINT(("cannot allocate sysmap table for %lu symbols\n", PFMON_KALLSYMS_SYMBOLS));
		goto load_abort;
	}
	stab[1].sym_count  = PFMON_KALLSYMS_SYMBOLS;
	stab[1].idx        = 0;
	stab[1].symbol_tab = (symbol_t *)malloc(PFMON_KALLSYMS_SYMBOLS*sizeof(symbol_t));
	if (stab[1].symbol_tab == NULL) {
		DPRINT(("cannot allocate sysmap table for %lu symbols\n", PFMON_KALLSYMS_SYMBOLS));
		goto load_abort;
	}
	bufsize = 2*getpagesize();
	bigbuf = malloc(bufsize);
	if (bigbuf) {
		setvbuf(fp, bigbuf, _IOFBF, bufsize);
	}

	while(getline(&line_str, &sz, fp)>0) {

		s = line_str;

		while(*s != ' ' && *s !='\0') s++;

		if (*s == '\0') break;

		if (s-line_str > 16+2) goto invalid_address;

		strncpy(addr_str, line_str, s-line_str);
		addr_str[s-line_str] = '\0';

		/* point to object type */
		s++;
		type = tolower(*s);

		/*
		 * convert static data symbols to data
		 */
		if (type == 's' || type == 'd' || type == 'D') 
			type = 1;
		else if (type == 't' || type == 'T' )
			type = 0;

		/* 
		 * keep only text and data symbols
		 *
		 * skip uninteresting symbols
		 */
		if (type != 0 && type != 1) continue;

		/* look for space separator */
		s++;
		if (*s != ' ') goto invalid_line;

		st = stab+type;

		if (st->idx == st->sym_count) {
			symtab_old_size = sizeof(symbol_t)*st->sym_count;
			st->sym_count <<=1;
			DPRINT(("extending kallsyms symbol table to %lu entries old_size=%lu\n", st->sym_count, symtab_old_size));
			tmp = (symbol_t *)realloc(st->symbol_tab, st->sym_count*sizeof(symbol_t));
			if (tmp == NULL) {
				DPRINT(("cannot extend kallsyms symbol table to %lu entries\n", st->sym_count));
				goto load_abort;
			}
			st->symbol_tab = tmp;
		}

		/* compute address */
		endptr = NULL;
		st->symbol_tab[st->idx].value  = (unsigned long )strtoul(addr_str, &endptr, 16);

		if (*endptr != '\0') goto invalid_address;

		/*
		 * check that file is sorted correctly
		 */
		if (st->idx == 0) 
			min_addr = st->symbol_tab[st->idx].value;
		else if (st->symbol_tab[st->idx].value < min_addr) 
			need_sorting = 1;

		/* advance to symbol name */
		sym_start = ++s;

		/* look for end-of-string */
		while(*s != '\n' && *s != '\0' && *s != ' ' && *s != '\t') s++;

		if (*s == '\0') goto invalid_line;

		sym_len = s - sym_start;


		/* check for module */
		while(*s != '\n' && *s != '\0' && *s != '[') s++;

		/* symbol belongs to a kernel module */
		if (*s == '[') {
			mod_start = s++;
			while(*s != '\n' && *s != '\0' && *s != ']') s++;
			if (*s != ']') goto invalid_line;
			mod_len = s - mod_start + 1;
		} else {
			mod_len   = 0;
			mod_start = NULL;
		}

		line++;

		/*
		 * place string in our memory pool
		 * +1 for '\0'
		 */
		str_addr = place_str(mod_len + sym_len + 1);
		if (str_addr == NULL) goto error2;


		strncpy(str_addr, sym_start, sym_len);
		if (mod_len) strncpy(str_addr+sym_len, mod_start, mod_len);
		str_addr[sym_len+mod_len] = '\0';

    		st->symbol_tab[st->idx].name  = str_addr;
    		st->symbol_tab[st->idx].size  = 0; /* use approximation */
    		st->symbol_tab[st->idx].type  = type == 0 ? PFMON_TEXT_SYMBOL : PFMON_DATA_SYMBOL;

		st->idx++;
	}
	if (line_str) free(line_str);

	/*
	 * normally a kallsyms is already sorted
	 * so we should not have to do this
	 */
	if (need_sorting) {
		if (stab[0].idx) qsort(stab[0].symbol_tab, stab[0].idx, sizeof(symbol_t), symcmp);
		if (stab[1].idx) qsort(stab[1].symbol_tab, stab[1].idx, sizeof(symbol_t), symcmp);
	}

	/* record final number of symbols */
	if (stab[0].idx) {
		mod_sym_code->symbol_tab = stab[0].symbol_tab;
		mod_sym_code->nsyms      = stab[0].idx;
		mod_sym_code->min_addr   = stab[0].symbol_tab[0].value;
	} else {
		free(stab[0].symbol_tab);
	}

	if (stab[1].idx) {
		mod_sym_data->symbol_tab = stab[1].symbol_tab;
		mod_sym_data->nsyms      = stab[1].idx;
		mod_sym_data->min_addr   = stab[1].symbol_tab[0].value;
	} else {
		free(stab[1].symbol_tab);
	}
	if (bigbuf) free(bigbuf);
	fclose(fp);

	return 0;
invalid_line:
	if (bigbuf) free(bigbuf);
	warning("sysmap file %s has invalid format, line %lu\n", PFMON_KALLSYMS, line);
	return -1;
error2:
	if (bigbuf) free(bigbuf);
	DPRINT(("sysmap load file cannot place new string\n"));
load_abort:
	if (bigbuf) free(bigbuf);
	fclose(fp);
	return -1;
invalid_address:
	if (bigbuf) free(bigbuf);
	warning("file %s has an invalid address, line %lu\n", PFMON_KALLSYMS, line);
	return -1;

}

static int
load_sysmap_symbols(pfmon_syms_list_t *list)
{
	int fd;
	struct stat st;
	unsigned long nsyms = 0, idx = 0;
	unsigned long min_addr = 0UL;
	unsigned long line = 1UL;
	module_symbols_t *mod_sym;
	char *filename = options.symbol_file;
	char *p, *s, *end, *str_addr, *base;
	char *endptr;
	int type;
	symbol_t *symbol_tab;
	char b[24]; /* cannot be more than 16+2 (for 0x) */
	int need_sorting = 0;


	fd = open(filename, O_RDONLY);
	if (fd == -1) {
		DPRINT(("sysmap  file %s not found\n", filename));
		return -1;
	}

	if (fstat(fd, &st) == -1) {
		DPRINT(("cannot access sysmap file %s\n", filename));
		return -1;
	}

	p = base = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
	if (p == (char *)-1) {
		DPRINT(("cannot map sysmap file %s\n", filename));
		goto load_abort;
	}

	end = base + st.st_size;


	/* find number of symbols */
	while (p < end) {
		if (*p == '\n') nsyms++;
		p++;
	}
	/*
	 * XXX: fix this for data syms
	 */
	mod_sym = list->code_syms;

	mod_sym->symbol_tab = symbol_tab = (symbol_t *)malloc(nsyms * sizeof(symbol_t));
	if (symbol_tab == NULL) {
		DPRINT(("cannot allocate sysmap table for %lu symbols\n", nsyms));
		goto load_abort;
	}

	idx = 0;

	/* now parse symbols */
	p = base;

	while (p < end) {

		/* find end */
		s = p;
		while(s < end && *s != ' ') s++;
		if (s == end) break;
		if (s-p > 16+2) {
			DPRINT(("invalid address at line %lu in %s\n", line, filename));
			goto load_abort;
		}

		strncpy(b, p, s-p);
		b[s-p] = '\0';

		/* point to object type */
		s++;
		type = tolower(*s);

		/* 
		 * keep only text and data symbols
		 * XXX: oversimplification here!
		 */
		if (type != 't' && type != 'd') {
			while(s < end && *s != '\n') s++;
			if (s == end) goto error;
			line++;
			p = s + 1;
			continue;
		}

		/* look for space separator */
		s++;
		if (*s != ' ') goto error;

    		symbol_tab[idx].type  = type == 't' ? PFMON_TEXT_SYMBOL : PFMON_DATA_SYMBOL;

		/* compute address */
    		symbol_tab[idx].value  = (unsigned long )strtoul(b, &endptr, 16);
		if (*endptr != '\0') {
			DPRINT(("invalid address at line %lu in %s\n", line, filename));
			goto load_abort;
		}

		/*
		 * check that file is sorted correctly
		 */
		if (idx == 0) 
			min_addr = symbol_tab[idx].value;
		else if (symbol_tab[idx].value < min_addr) 
			need_sorting = 1;


		/* advance to symbol name */
		s++;
		p = s;	

		/* look for end-of-line */
		while(s < end && *s != '\n') s++;
		if (s == end) goto error;
		if (s == p) goto error;
		line++;

		/*
		 * place string in our memory pool
		 */
		str_addr = place_str(s-p+1);
		if (str_addr == NULL) goto error2;

		strncpy(str_addr, p, s-p);
		str_addr[s-p] = '\0';
		p = s +1;	

		/* sanity */
		if (idx == nsyms) fatal_error("too many symbol for sysmap files\n");

    		symbol_tab[idx].name  = str_addr;
    		symbol_tab[idx].size  = 0; /* use approximation */

		idx++;
	}
	/* record final number of symbols */
	mod_sym->nsyms = idx;

	/*
	 * cleanup mappings
	 */
	munmap(base, st.st_size);
	close(fd);

	/*
	 * normally a System.map file is already sort
	 * so we should not have to do this
	 */
	if (need_sorting) qsort(symbol_tab, idx, sizeof(symbol_t), symcmp);

	return 0;
error:
	warning("sysmap file %s has invalid format, line %lu\n", filename, line);
	return -1;
error2:
	DPRINT(("sysmap load file cannot place new string\n"));
load_abort:
	close(fd);
	return -1;

}

int
load_kernel_syms(void)
{
	static int kernel_syms_loaded;
	module_symbols_t *p1, *p2;
	char *from;
	int ret = -1;

	if (kernel_syms_loaded) return 0;

	p1 = calloc(1, sizeof(module_symbols_t));
	if (p1 == NULL) 
		fatal_error("cannot allocate symbol table for kernel\n");

	p1->sym_type   = PFMON_TEXT_SYMBOL;
	p1->symbol_tab = NULL;
	p1->name_space = NULL;
	p1->nsyms      = 0;
	p1->name       = "kernel";
	p1->flags      = PFMON_SYMS_PERSISTENT;

	p2 = calloc(1, sizeof(module_symbols_t));
	if (p2 == NULL) 
		fatal_error("cannot allocate symbol table for kernel\n");

	p2->sym_type   = PFMON_DATA_SYMBOL;
	p2->symbol_tab = NULL;
	p2->name_space = NULL;
	p2->nsyms      = 0;
	p2->name       = "kernel";
	p2->flags      = PFMON_SYMS_PERSISTENT;

	kernel_syms.refcnt    = 1;
	kernel_syms.code_syms = p1;
	kernel_syms.data_syms = p2;

	/* 
	 * Despite /proc/kallsyms, System.map is still useful because it includes data symbols
	 * We use System.map if specified, otherwise we default to /proc/kallsyms
	 */
	if (options.opt_sysmap_syms) {
		ret  = load_sysmap_symbols(&kernel_syms);
		from = options.symbol_file;
	} else {
		ret  = load_kallsyms_symbols(&kernel_syms);
		from = PFMON_KALLSYMS;
	}
	if (p1->nsyms) {
		vbprintf("loaded %lu text symbols from %s\n", p1->nsyms, from);
	} else {
		free(p1);
		kernel_syms.code_syms = NULL;
	}
		
	if (p2->nsyms) {
		vbprintf("loaded %lu data symbols from %s\n", p2->nsyms, from);
	} else {
		free(p2);
		kernel_syms.data_syms = NULL;
	}
	return 0;
}

int
load_elf_syms(const char *filename, pfmon_syms_list_t *l)
{
	module_symbols_t *p;

	if (filename == NULL || l == NULL) return -1;

	p = malloc(sizeof(module_symbols_t));
	if (p == NULL) 
		fatal_error("cannot allocate symbol table module for %s\n", filename);

	p->sym_type   = PFMON_TEXT_SYMBOL;
	p->symbol_tab = NULL;
	p->name_space = NULL;
	p->nsyms      = 0;
	p->name       = strdup(basename(filename));

	if (load_elf_symbols(filename, p, PFMON_TEXT_SYMBOL, NULL)) {
		free(p);
		return -1;
	}
	vbprintf("loaded %lu text symbols from ELF file %s\n", p->nsyms, filename);

	if (p->nsyms) {
		add_syms_module(p, l);

		/*
		 * allocate new module for data symbols 
		 */
		p = malloc(sizeof(module_symbols_t));
		if (p == NULL) 
			fatal_error("cannot allocate symbol table module for %s\n", filename);
	}

	/* reuse allocated region, if first is not used */

	p->sym_type   = PFMON_DATA_SYMBOL;
	p->symbol_tab = NULL;
	p->name_space = NULL;
	p->nsyms      = 0;
	p->name       = strdup(basename(filename));

	if (load_elf_symbols(filename, p, PFMON_DATA_SYMBOL, NULL)) {
		free(p);
		return -1;
	}

	vbprintf("loaded %lu data symbols from ELF file %s\n", p->nsyms, filename);

	if (p->nsyms) 
		add_syms_module(p, l);
	else 
		free(p);
	return 0;
}

int
load_pid_syms(pid_t pid, pfmon_syms_list_t *l)
{
	module_symbols_t *p;
	pfmon_sym_type_t type;
	unsigned long end;
	map_desc_t map;
	char *line = NULL;
	size_t sz;
	ssize_t n;
	FILE *fp;
	char filename[32];
	char perm[5];
	char path[PFMON_MAX_FILENAME_LEN];

	sprintf(filename, "/proc/%d/maps", pid);
	fp = fopen(filename, "r");
	if (fp == NULL) 
		fatal_error("cannot access %d /proc maps file\n", pid);

	while((n=getline(&line, &sz, fp)) >0) {

		n = sscanf (line, "%lx-%lx %4s %lx %*x:%*x %*u %s\n", 
				&map.map_start, &end, perm, &map.map_offset, path);

		if (n == 5 && perm[3] == 'p') {

			map.map_size = end - map.map_start;

			type = PFMON_UNKNOWN_SYMBOL;
			if (perm[2] == 'x') {
				type = PFMON_TEXT_SYMBOL;
			} else if (perm[0] == 'r' && perm[1] == 'w') {
				type = PFMON_DATA_SYMBOL;
			}

			if (type != PFMON_UNKNOWN_SYMBOL) {

				p = malloc(sizeof(module_symbols_t));
				if (p == NULL) 
					fatal_error("cannot allocate symbol table module for %s\n", filename);

				p->sym_type   = type;
				p->symbol_tab = NULL;
				p->name_space = NULL;
				p->nsyms      = 0;
				p->name       = strdup(basename(path));

				if (load_elf_symbols(path, p, type, &map)) {
					free(p);
					continue;
				}

				vbprintf("loaded %lu %s symbols from ELF file %s\n", 
						p->nsyms, 
						type == PFMON_TEXT_SYMBOL ? "text" : "data",
						path);

				if (p->nsyms) {
					add_syms_module(p, l);
				} else {
					free(p);
				}
			}
		}
	}
	if (line) free(line);
	fclose(fp);
	vbprintf("done loading symbols from %s\n", filename);
	return 0;
}


int
find_sym_addr(char *name, pfmon_syms_list_t *list, pfmon_sym_type_t type, unsigned long *start, unsigned long *end)
{
	module_symbols_t *mod;
	symbol_t *symbol_tab;
	char *p;
	unsigned long i, nsyms;
	int has_mod_name = 0;
	char mod_name[32];

	if (name == NULL || *name == '\0' || start == NULL) return -1;

	if (type == PFMON_TEXT_SYMBOL)
		mod = (module_symbols_t *)list->code_syms;
	else
		mod = (module_symbols_t *)list->data_syms;
	/*
	 * check for module name
	 */
	mod_name[0] = '\0';
	p = strchr(name, ':');
	if (p) {
		strncpy(mod_name, name, p - name); 
		mod_name[p-name] = '\0';
		name = p + 1;
		has_mod_name = 1;
	}

	for(; mod; mod = mod->next) {
		if (has_mod_name && strcmp(mod_name, mod->name)) continue;
		nsyms      = mod->nsyms;
		symbol_tab = mod->symbol_tab;
		for (i = 0; i < nsyms; i++) {
			if (!strcmp(name, symbol_tab[i].name) && symbol_tab[i].type == type) 
				goto found;
		 }
	}

	return -1;
found:
	*start = symbol_tab[i].value;
	if (end) {
		if (symbol_tab[i].size != 0) {
			*end = *start + symbol_tab[i].size; 
			//vbprintf("symbol %s: [0x%lx-0x%lx)=%ld bytes\n", name, *start, *end, symbol_tab[i].size);
		} else {
			vbprintf("using approximation for size of symbol %s\n", name);

			if (i == (nsyms-1)) {
				warning("cannot find another symbol to approximate size of %s\n", name);
				return -1;
			}

		        /*
		 	 * XXX: Very approximative and maybe false at times
		 	 * Use carefully
		 	 */
			*end = symbol_tab[i+1].value;
		}
		vbprintf("symbol %s (%s): [%p-%p)=%ld bytes\n", 
				name, 
				type == PFMON_TEXT_SYMBOL ? "code" : "data",
				(void *)*start, 
				(void *)*end, 
				*end-*start);
	}
	return 0;
}

int
find_sym_byaddr(unsigned long addr, pfmon_syms_list_t *list, pfmon_sym_type_t type, char **name, char **module, unsigned long *start, unsigned long *end)
{
	module_symbols_t *mod;
	symbol_t *symbol_tab = NULL;
	size_t size;
	char *p, *q;
	unsigned long u = 0, l, nsyms;

	if (type == PFMON_TEXT_SYMBOL) {
		mod = list->code_syms;
	} else {
		mod = list->data_syms;
	}

	/* table is assumed sorted by address */
	for(; mod ; mod = mod->next) {
		nsyms      = mod->nsyms;
		symbol_tab = mod->symbol_tab;

		/* look for upper bound */
		for (u = 0; u < nsyms; u++) {
			if (symbol_tab[u].value > addr) goto found;
		}
	}
found:
	/*
	 * basic not found condition
	 */
	if (u == 0 || mod == NULL) {
		DPRINT(("type=%d addr=0x%lx not found u=%lu mod=%p\n", type, addr, u, mod));
		return -1;
	}
	/* u > 0 */

	/* lower bound */
	l = u - 1;

	size = symbol_tab[l].size;

	/*
	 * if the symbol has a size, ensure we do not
	 * have a hole
	 *
	 * size = 0 when we could not figure it out
	 */
	if(size && (symbol_tab[l].value+size) < addr) {
		DPRINT(("hole for 0x%lx\n", addr));
		return -1;
	}
	/*
	 * for kernel, check not crossing module boundary
	 */
	if (size == 0) {
		p = strchr(symbol_tab[l].name, '[');
		q = strchr(symbol_tab[u].name, '[');

		/*
		 * crossing from/to kernel modules module
		 */
		if ((p && q == NULL) || (p == NULL && q)) {
			return -1;
		}
		/*
		 * crossing between kernel modules
		 */
		if (p && q && strcmp(q+1, p+1)) {
			return -1;
		}
		/*
		 * we may still be getting the wrong answer if there is
		 * a hole with size = 0
		 */
	}
	if (name)  *name    = symbol_tab[l].name;
	if (start) *start   = symbol_tab[l].value;
	if (end)   *end     = size ? symbol_tab[l].value+size : symbol_tab[u].value ;
	if (module) *module = mod->name; /* symbol module */
	return 0;
}

int
pfmon_is_exact_sym(unsigned long addr, pfmon_syms_list_t *list, pfmon_sym_type_t type)
{
	module_symbols_t *mod;
	symbol_t *symbol_tab;
	unsigned long i, nsyms;

	if (type == PFMON_TEXT_SYMBOL)
		mod = list->code_syms;
	else
		mod = list->data_syms;

	/* table is assumed sorted by address */
	for(; mod ; mod = mod->next) {
		nsyms      = mod->nsyms;
		symbol_tab = mod->symbol_tab;
		for (i = 0; i < nsyms; i++) {
			if (symbol_tab[i].value == addr && symbol_tab[i].type == type) return 1;
		}
	}
	return 0;
}

int
pfmon_syms_hash_find(void *hash_desc, pfmon_syms_list_t *list, pfmon_sym_type_t type, unsigned long addr, char **name, char **module, unsigned long *start_addr)
{
	pfmon_hash_key_t key;
	sym_hash_data_t *hash_entry, entry;
	void *data;
	int ret;

	key = (pfmon_hash_key_t)addr;

	ret = pfmon_hash_find(hash_desc, key, &data);
	if (ret == 0) goto found;

	/*
	 * look for the symbol in the symbol table lists
	 */
	ret = find_sym_byaddr(addr, list, type, &entry.name, &entry.module, &entry.addr, &entry.eaddr);
	if (ret == -1) return -1;
	/*
	 * need to add an element to the hash
	  */
	pfmon_hash_add(hash_desc, key, &data);
	hash_entry = (sym_hash_data_t *)data;
	hash_entry->name   = entry.name;
	hash_entry->module = entry.module;
	hash_entry->addr   = entry.addr;
	hash_entry->eaddr  = entry.eaddr;
found:
	hash_entry  = (sym_hash_data_t *)data;
	*name       = hash_entry->name;
	*module     = hash_entry->module;
	*start_addr = hash_entry->addr;
	return 0;
}

int
pfmon_syms_hash_alloc(unsigned long hash_log_size, unsigned long max_entries, void **hash_desc)
{
	pfmon_hash_param_t param;
	int ret;


	param.hash_log_size = hash_log_size;
	param.max_entries   = max_entries;
	param.entry_size    = sizeof(sym_hash_data_t);
	param.shifter	    = 8;
	param.flags	    = PFMON_HASH_ACCESS_REORDER;

	ret = pfmon_hash_alloc(&param, hash_desc);
	if (ret) fatal_error("cannot allocate hash table\n");

	return 0;
}

void 
pfmon_syms_hash_free(void *hash_desc)
{
	pfmon_hash_free(hash_desc);
}

/*
 * mostly for debug
 */
void
print_syms(pfmon_syms_list_t *l)
{
	module_symbols_t *mod;
	symbol_t *symbol_tab;
	char *mod_name;
	unsigned long i, nsyms;

	for (mod = l->code_syms; mod ; mod = mod->next) {

		nsyms      = mod->nsyms;
		symbol_tab = mod->symbol_tab;
		mod_name   = mod->name;

		for (i = 0; i < nsyms; i++) {
			printf("%p T %8lu %s<%s>\n", 
				(void *)symbol_tab[i].value, 
				symbol_tab[i].size,
				symbol_tab[i].name, mod_name);
		}
	}

	for (mod = l->data_syms; mod ; mod = mod->next) {

		nsyms      = mod->nsyms;
		symbol_tab = mod->symbol_tab;
		mod_name   = mod->name;

		for (i = 0; i < nsyms; i++) {
			printf("%p D %8lu %s<%s>\n", 
				(void *)symbol_tab[i].value, 
				symbol_tab[i].size,
				symbol_tab[i].name, mod_name);
		}
	}

}

int
setup_syms_list(pfmon_syms_list_t *l)
{

	l->refcnt = 1;
	pthread_mutex_init(&l->lock, PTHREAD_MUTEX_TIMED_NP);

	/*
	 * kernel is shared by every task. Kernel ALWAYS has higher addresses, therefore
	 * it will ALWAYS be at the end of the list, i.e., next will remain NULL across
	 * all lists.
	 */
	l->code_syms  = kernel_syms.code_syms;
	l->data_syms  = kernel_syms.data_syms;

	return 0;
}

