/* ----------------------------------------------------------------------- *
 *   
 *   Copyright 2001 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
 *   USA; either version 2 of the License, or (at your option) any later
 *   version; incorporated herein by reference.
 *
 * ----------------------------------------------------------------------- */

/*
 * mkzffile.c
 *
 *	- Generate block-compression of files for use with
 *	  the "ZF" extension to the iso9660/RockRidge filesystem.
 *
 *	  The file compression technique used is the "deflate"
 *	  algorithm used by the zlib library; each block must have a
 *	  valid (12-byte) zlib header.  In addition, the file itself
 *	  has the following structure:
 *
 *	  Byte offset	iso9660 type	Contents
 *	    0		(8 bytes)	Magic number (37 E4 53 96 C9 DB D6 07)
 *	    8		7.3.1		Uncompressed file size
 *	   12		7.1.1		header_size >> 2 (currently 4)
 *	   13		7.1.1		log2(block_size)
 *	   14		(2 bytes)	Reserved, must be zero
 *
 * The header may get expanded in the future, at which point the
 * header size field will be used to increase the space for the
 * header.
 *
 * All implementations are required to support a block_size of 32K
 * (byte 13 == 15).
 *
 * Note that bytes 12 and 13 and the uncompressed length are also
 * present in the ZF record; THE TWO MUST BOTH BE CONSISTENT AND
 * CORRECT.
 *
 * Given the uncompressed size, block_size, and header_size:
 *
 *     nblocks := ceil(size/block_size)
 *
 * After the header follow (nblock+1) 32-bit pointers, recorded as
 * iso9660 7.3.1 (littleendian); each indicate the byte offset (from
 * the start of the file) to one block and the first byte beyond the
 * end of the previous block; the first pointer thus point to the
 * start of the data area and the last pointer to the first byte
 * beyond it:
 *
 *     block_no := floor(byte_offset/block_size)
 *
 *     block_start := read_pointer_731( (header_size+block_no)*4 )
 *     block_end   := read_pointer_731( (header_size+block_no+1)*4 )
 *
 * The block data is compressed according to "zlib".
 */

#include <dirent.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>
#include <utime.h>
#include <sys/stat.h>
#include <sys/types.h>

#include <zlib.h>

#define HAVE_LCHOWN 1		/* Should be obtained by autoconf or so */

/* Command line options */
int force = 0;			/* Always compress */
int level = 9;			/* Compression level */
int verbosity = 0;

/* Program name */
const char *program;

/* Convenience functions */
void *xmalloc(size_t size)
{
  void *p = malloc(size);

  if ( !p ) {
    perror(program);
    exit(1);
  }

  return p;
}

char *xstrdup(const char *str)
{
  char *s = strdup(str);

  if ( !s ) {
    perror(program);
    exit(1);
  }
  
  return s;
}

static void
set_721(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[0] = i & 0xff;
        pnt[1] = (i >> 8) & 0xff;
}

static void
set_722(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[0] = (i >> 8) & 0xff;
        pnt[1] = i & 0xff;
}

static void
set_723(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[3] = pnt[0] = i & 0xff;
        pnt[2] = pnt[1] = (i >> 8) & 0xff;
}

static void
set_731(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[0] = i & 0xff;
        pnt[1] = (i >> 8) & 0xff;
        pnt[2] = (i >> 16) & 0xff;
        pnt[3] = (i >> 24) & 0xff;
}

static void
set_732(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[3] = i & 0xff;
        pnt[2] = (i >> 8) & 0xff;
        pnt[1] = (i >> 16) & 0xff;
        pnt[0] = (i >> 24) & 0xff;
}

static void
set_733(pnt, i)
        char            *pnt;
        unsigned int    i;
{
        pnt[7] = pnt[0] = i & 0xff;
        pnt[6] = pnt[1] = (i >> 8) & 0xff;
        pnt[5] = pnt[2] = (i >> 16) & 0xff;
        pnt[4] = pnt[3] = (i >> 24) & 0xff;
}

#ifndef CBLOCK_SIZE_LG2
#define CBLOCK_SIZE_LG2	15	/* Compressed block size */
#endif
#define CBLOCK_SIZE	(1U << CBLOCK_SIZE_LG2)

/* Compressed file magic */
const unsigned char zisofs_magic[8] =
  { 0x37, 0xE4, 0x53, 0x96, 0xC9, 0xDB, 0xD6, 0x07 };

#define MAGIC		0xAD62A87F

/* VERY VERY VERY IMPORTANT: Must be a multiple of 4 bytes */
struct compressed_file_header {
  char magic[8];
  char uncompressed_len[4];
  char header_size;
  char block_size;
  char reserved[2];		/* Reserved for future use, MBZ */
};

int block_compress_file(FILE *input, FILE *output, unsigned long size)
{
  struct compressed_file_header hdr;
  char inbuf[CBLOCK_SIZE], outbuf[2*CBLOCK_SIZE];
  int bytes, pointer_bytes, nblocks, block;
  uLong cbytes;			/* uLong is a zlib datatype */
  char *pointer_block, *curptr;
  unsigned long position;
  int i;
  int header_size;
  int force_compress = force;

  if ( (sizeof hdr) & 3 ) {
    fputs("INTERNAL ERROR: header is not a multiple of 4\n", stderr);
    abort();
  }

  memset(&hdr, 0, sizeof hdr);
  memcpy(&hdr.magic, zisofs_magic, sizeof zisofs_magic);
  hdr.header_size = (sizeof hdr) >> 2;
  hdr.block_size = CBLOCK_SIZE_LG2;
  set_731(&hdr.uncompressed_len, size);

  if ( fwrite(&hdr, sizeof hdr, 1, output) != 1 )
    return -1;

  nblocks = (size+CBLOCK_SIZE-1) >> CBLOCK_SIZE_LG2;
  pointer_bytes = 4*(nblocks+1);
  pointer_block = calloc(pointer_bytes, 1);
  if ( !pointer_block )
    return -1;

  if ( fseek(output, pointer_bytes, SEEK_CUR) == -1 )
    return -1;

  curptr = pointer_block;
  position = sizeof hdr + pointer_bytes;
  
  block = 0;
  while ( (bytes = fread(inbuf, 1, CBLOCK_SIZE, input)) > 0 ) {
    if ( bytes < CBLOCK_SIZE && block < nblocks-1 ) {
      errno = EINVAL;		/* Someone changed the file on us */
      return -1;		/* Short read */
    }

    /* HACK: If the file has our magic number, always compress */
    if ( block == 0 && bytes >= sizeof zisofs_magic ) {
      if ( !memcmp(inbuf, zisofs_magic, sizeof zisofs_magic) )
	force_compress = 1;
    }

    set_731(curptr, position); curptr += 4;
    
    /* We have two special cases: a zero-length block is defined as all zero,
       and a block the length of which is equal to the block size is unencoded. */

    for ( i = 0 ; i < CBLOCK_SIZE ; i++ ) {
      if ( inbuf[i] ) break;
    }

    if ( i == CBLOCK_SIZE ) {
      /* All-zero block.  No output */
    } else {
      cbytes = 2*CBLOCK_SIZE;
      if ( compress2(outbuf, &cbytes, inbuf, bytes, level) != Z_OK )
	return -1;		/* Compression failure */
      
      if ( fwrite(outbuf, 1, cbytes, output) != cbytes )
	return -1;
      
      position += cbytes;
    }
    block++;
  }

  /* Set pointer to the end of the final block */
  set_731(curptr, position);

  /* Now write the pointer table */
  if ( fseek(output, sizeof hdr, SEEK_SET) == -1 )
    return -1;
    
  if ( fwrite(pointer_block, 1, pointer_bytes, output) != pointer_bytes )
    return -1;

  /* Now make sure that this was actually the right thing to do */
  if ( !force_compress && position >= size ) {
    /* Incompressible file, just copy it */
    rewind(input);
    rewind(output);

    position = 0;
    while ( (bytes = fread(inbuf, 1, CBLOCK_SIZE, input)) > 0 ) {
      if ( fwrite(inbuf, 1, bytes, output) != bytes )
	return -1;
      position += bytes;
    }

    /* Truncate the file to the correct size */
    fflush(output);
    ftruncate(fileno(output), position);
  }

  /* If we get here, we're done! */
  return 0;
}

int block_compress_path(const char *inpath, const char *outpath, unsigned long size)
{
  FILE *in, *out;
  int err, rv;

  in = fopen(inpath, "rb");
  if ( !in )
    return -1;
  out = fopen(outpath, "wb");
  if ( !out ) {
    err = errno;
    fclose(in);
    errno = err;
    return -1;
  }
  rv = block_compress_file(in, out, size);

  err = errno;			/* Just in case */
  fclose(in);
  fclose(out);
  errno = err;
  return rv;
}

/* Hash table used to find hard-linked files */
#define HASH_BUCKETS 	  2683
struct file_hash {
  struct file_hash *next;
  struct stat st;
  const char *outfile_name;
};

static struct file_hash *hashp[HASH_BUCKETS];

const char *hash_find_file(struct stat *st)
{
  int bucket = (st->st_ino + st->st_dev) % HASH_BUCKETS;
  struct file_hash *hp;

  for ( hp = hashp[bucket] ; hp ; hp = hp->next ) {
    if ( hp->st.st_ino   == st->st_ino &&
	 hp->st.st_dev   == st->st_dev &&
	 hp->st.st_mode  == st->st_mode &&
	 hp->st.st_nlink == st->st_nlink &&
	 hp->st.st_uid   == st->st_uid &&
	 hp->st.st_gid   == st->st_gid &&
	 hp->st.st_size  == st->st_size &&
	 hp->st.st_mtime == st->st_mtime ) {
      /* Good enough, it's the same file */
      return hp->outfile_name;
    }
  }
  return NULL;			/* No match */
}

/* Note: the stat structure is the input file; the name
   is the output file to link to */
void hash_insert_file(struct stat *st, const char *outfile)
{
  int bucket = (st->st_ino + st->st_dev) % HASH_BUCKETS;
  struct file_hash *hp = xmalloc(sizeof(struct file_hash));

  hp->next         = hashp[bucket];
  memcpy(&hp->st, st, sizeof(struct stat));
  hp->outfile_name = xstrdup(outfile);

  hashp[bucket]    = hp;
}


int compress_tree(const char *intree, const char *outtree)
{
  char buffer[BUFSIZ];
  char *in_path, *out_path, *in_file, *out_file;
  DIR *thisdir;
  struct dirent *dirent;
  struct stat st;
  struct utimbuf ut;
  int err = 0;
  
  /* Construct buffers with the common filename prefix, and point to the end */

  in_path = xmalloc(strlen(intree) + NAME_MAX + 2);
  out_path = xmalloc(strlen(outtree) + NAME_MAX + 2);

  strcpy(in_path, intree);
  strcpy(out_path, outtree);

  in_file = strchr(in_path, '\0');
  out_file = strchr(out_path, '\0');

  *in_file++ = '/';
  *out_file++ = '/';

  /* Open the directory */
  thisdir = opendir(intree);
  if ( !thisdir ) {
    fprintf(stderr, "%s: Failed to open directory %s: %s\n",
	    program, intree, strerror(errno));
    return 1;
  }

  /* Create output directory */
  if ( mkdir(outtree, 0700) ) {
    fprintf(stderr, "%s: Cannot create output directory %s: %s\n",
	    program, outtree, strerror(errno));
    return 1;
  }

  while ( (dirent = readdir(thisdir)) != NULL ) {
    if ( !strcmp(dirent->d_name, ".") ||
	 !strcmp(dirent->d_name, "..") )
      continue;			/* Ignore . and .. */

    strcpy(in_file, dirent->d_name);
    strcpy(out_file, dirent->d_name);

    if ( lstat(in_path, &st) ) {
      fprintf(stderr, "%s: Failed to stat file %s: %s\n",
	      program, in_path, strerror(errno));
      err = 1;
      break;
    }
    
    if ( S_ISREG(st.st_mode) ) {
      if ( st.st_nlink > 1 ) {
	/* Hard link. */
	const char *linkname;

	if ( (linkname = hash_find_file(&st)) != NULL ) {
	  /* We've seen it before, hard link it */

	  if ( link(linkname, out_path) ) {
	    fprintf(stderr, "%s: hard link %s -> %s failed: %s\n",
		    program, out_path, linkname, strerror(errno));
	    err = 1;
	    break;
	  }
	} else {
	  /* First encounter, compress and enter into hash */
	  if ( block_compress_path(in_path, out_path, st.st_size) ) {
	    fprintf(stderr, "%s: %s: %s", program, in_path, strerror(errno));
	    err = 1;
	    break;
	  }
	  hash_insert_file(&st, out_path);
	}
      } else {
	/* Singleton file; no funnies */
	if ( block_compress_path(in_path, out_path, st.st_size) ) {
	  fprintf(stderr, "%s: %s: %s", program, in_path, strerror(errno));
	  err = 1;
	  break;
	}
      }
    } else if ( S_ISDIR(st.st_mode) ) {
      /* Recursion: see recursion */
      err = compress_tree(in_path, out_path);
      if ( err )
	break;
    } else if ( S_ISLNK(st.st_mode) ) {
      int chars;
      if ( (chars = readlink(in_path, buffer, BUFSIZ)) < 0 ) {
	fprintf(stderr, "%s: readlink failed for %s: %s\n",
		program, in_path, strerror(errno));
	err = 1;
	break;
      }
      buffer[chars] = '\0';
      if ( symlink(buffer, out_path) ) {
	fprintf(stderr, "%s: symlink %s -> %s failed: %s\n",
		program, out_path, buffer, strerror(errno));
	err = 1;
	break;
      }
    } else {
      if ( st.st_nlink > 1 ) {
	/* Hard link. */
	const char *linkname;

	if ( (linkname = hash_find_file(&st)) != NULL ) {
	  /* We've seen it before, hard link it */

	  if ( link(linkname, out_path) ) {
	    fprintf(stderr, "%s: hard link %s -> %s failed: %s\n",
		    program, out_path, linkname, strerror(errno));
	    err = 1;
	    break;
	  }
	} else {
	  /* First encounter, create and enter into hash */
	  if ( mknod(out_path, st.st_mode, st.st_rdev) ) {
	    fprintf(stderr, "%s: mknod failed for %s: %s\n",
		    program, out_path, strerror(errno));
	    err = 1;
	    break;
	  }
	  hash_insert_file(&st, out_path);
	}
      } else {
	/* Singleton node; no funnies */
	  if ( mknod(out_path, st.st_mode, st.st_rdev) ) {
	    fprintf(stderr, "%s: mknod failed for %s: %s\n",
		    program, out_path, strerror(errno));
	    err = 1;
	    break;
	  }
      }
    }
#ifdef HAVE_LCHOWN
    lchown(out_path, st.st_uid, st.st_gid);
#endif
    if ( !S_ISLNK(st.st_mode) ) {
#ifndef HAVE_LCHOWN
      chown(out_path, st.st_uid, st.st_gid);
#endif
      chmod(out_path, st.st_mode);
      ut.actime  = st.st_atime;
      ut.modtime = st.st_mtime;
      utime(out_path, &ut);
    }
  }
  closedir(thisdir);

  free(in_path);
  free(out_path);

  return err;
}

static void usage(int err)
{
  fprintf(stderr,
	  "Usage: %s [-vfh] [-z level] intree outtree\n",
	  program);
  exit(err);
}

int main(int argc, char *argv[])
{
  const char *in, *out;
  struct stat st;
  struct utimbuf ut;
  int opt, err;

  program = argv[0];

  while ( (opt = getopt(argc, argv, "vfz:h")) != EOF ) {
    switch(opt) {
    case 'f':
      force = 1;		/* Always compress */
      break;
    case 'z':
      if ( optarg[0] < '0' || optarg[0] > '9' || optarg[1] ) {
	fprintf(stderr, "%s: invalid compression level: %s\n",
		program, optarg);
	exit(1);
      } else {
	level = optarg[0] - '0';
      }
      break;
    case 'h':
      usage(0);
      break;
    case 'v':
      verbosity++;
      break;
    default:
      usage(1);
      break;
    }
  }

  if ( (argc-optind) != 2 )
    usage(1);

  in  = argv[optind];		/* Input tree */
  out = argv[optind+1];		/* Output tree */

  umask(077);

  /* Special case: we use stat() for the root, not lstat() */
  if ( stat(in, &st) ) {
    fprintf(stderr, "%s: %s: %s\n", program, in, strerror(errno));
    exit(1);
  }
  if ( !S_ISDIR(st.st_mode) ) {
    fprintf(stderr, "%s: %s: Not a directory\n", program, in);
  }

  err = compress_tree(in, out);

  if ( err )
    exit(err);

  chown(out, st.st_uid, st.st_gid);
  chmod(out, st.st_mode);
  ut.actime  = st.st_atime;
  ut.modtime = st.st_mtime;
  utime(out, &ut);
  

}
