#!/usr/bin/env python 
from __future__ import division

""" A routine to clean up 454 sequencing data."""

__author__ = "Jens Reeder"
__copyright__ = "Copyright 2010, Jens Reeder, Rob Knight"
__credits__ = ["Jens Reeder", "Rob Knight"]
__license__ = "GPL"
__version__ = "0.91"
__maintainer__ = "Jens Reeder"
__email__ = "jens.reeder@gmail.com"
__status__ = "Release"

from os import makedirs, remove
from os.path import exists
from optparse import OptionParser

from cogent.app.util import get_tmp_filename

from Denoiser.preprocess import STANDARD_BACTERIAL_PRIMER
from Denoiser.flowgram_clustering import denoise_seqs, denoise_per_sample
from Denoiser.utils import create_dir, files_exist
from Denoiser.settings import PROJECT_HOME

def parse_command_line_parameters(commandline_args=None):
    """ Parses command line arguments """

    version = 'Version: %prog '+__version__
    example_usage = """

Example:
Run denoiser on flowgrams in 454Reads.sff.txt with read-to-barcode mapping in seqs.fna,
put results into Outdir, log progress in Outdir/random_dir/denoiser.log :

%prog -i 454Reads.sff.txt -f seqs.fna -v -o Outdir

Example:
Run denoiser on two flowgram files in 454Reads_1.sff.txt and 454Reads_2.sff.txt
with read-to-barcode mapping in seqs.fna, put results into Outdir,
log progress in Outdir/random_dir/denoiser.log :

%prog -i 454Reads_1.sff.txt,454Reads_2.sff.txt -f seqs.fna -v -o Outdir

Example:
Run denoiser on flowgrams in 454Reads.sff.txt with read-to-barcode mapping in seqs.fna,
split input files into libraries and process each library separately,
put results into Outdir, log progress in Outdir/random_dir/denoiser.log :

%prog -S -i 454Reads.sff.txt -f seqs.fna -v -o Outdir
"""
    usage = 'usage: %prog [options] -i data.sff.txt' + example_usage
    parser = OptionParser(usage=usage, version=version)
 
    parser.add_option('-v','--verbose',action='store_true',\
                          dest='verbose', help='Print information during execution '+\
                          'into log file [default: %default]')

    parser.add_option('-i','--input_file',action='store',\
                          type='string',dest='sff_fp',help='path to flowgram file. '+\
                          'Separate several files by commas '+\
                          '[REQUIRED]')

    parser.add_option('-f','--fasta_fp',action='store',\
                          type='string',dest='fasta_fp',help='path to fasta input file '+\
                          '[default: %default]')

    parser.add_option('-o','--output_dir',action='store',\
                          type='string',dest='output_dir',help='path to output'+\
                          ' directory [default: %default]')

    parser.add_option('-c','--cluster',action='store_true',
                      dest='cluster',
                      help='Use cluster/multiple CPUs for '+\
                          'flowgram alignments [default: %default]')

    parser.add_option('-p','--preprocess_fp',action='store',\
                          type='string',dest='preprocess_fp',\
                          help='Do not do preprocessing (phase I), instead use already preprocessed ' +\
                          'data in PREPROCESS_FP')

    parser.add_option('-s','--squeeze',action='store_true',\
                          dest='squeeze', help='Use run-length encoding for prefix '+\
                          'filtering [default: %default]')

    parser.add_option('-S','--split',action='store_true',\
                          dest='split', help='Split input into per library sets '+\
                          'and denoise separately [default: %default]')

    parser.add_option('--force',action='store_true',\
                          dest='force', help='Force overwrite of existing '
                      +'directory [default: %default]')

    parser.add_option('-l','--log_file',action='store',\
                          type='string',dest='log_fp',help='path to log file '+\
                          '[default: %default]')

    parser.add_option('--primer',action='store',\
                          type='string',dest='primer',\
                          help='primer sequence '+\
                          '[default: %default]')

    parser.add_option('-n','--num_cpus',action='store',
                      type='int',dest='num_cpus',
                      help='number of cpus, requires -c '+\
                      '[default: %default]')

    parser.add_option('-m','--max_num_iterations',action='store',
                      type='int',dest='max_num_iter',
                      help='maximal number of iterations in phase II '+\
                      '[default: %default]')
    
    parser.add_option('-b','--bail_out',action='store',
                      type='int',dest='bail',
                      help='stop clustering in phase II with '+
                      'clusters smaller or equal than BAIL after first cluster phase '+
                      '[default: %default]')

    parser.add_option('--percent_id',action='store',\
                      type='float',dest='percent_id',
                      help='sequence similarity clustering '+\
                      'threshold [default: %default]')

    parser.add_option('--low_cut-off',action='store',\
                      type='float',dest='low_cutoff',
                      help='low clustering threshold for phase II '+\
                      '[default: %default]')

    parser.add_option('--high_cut-off',action='store',\
                      type='float',dest='high_cutoff',
                      help='high clustering threshold for phase III '+\
                      '[default: %default]')

    parser.add_option('--low_memory',action='store_true',\
                      dest='low_memory', help='Use slower, low '+\
                      'memory method [default: %default]')

    parser.add_option('-e', '--error_profile',action='store',\
                      dest='error_profile', help='path to error profile '+\
                      '[default= %default]')

#might be needed once we switch to Titanium as default
#    parser.add_option('-flx', action='store_true',\
#                      dest='flx', help='shortcut for '+\
#                      '-e Data/FLX_error_profile.dat --low_cut-off=3.75 --high_cut_off=4.5')

    parser.add_option('--titanium', action='store_true',\
                      dest='titanium', help='shortcut for '+\
                      '-e '+PROJECT_HOME+'/Data/Titanium_error_profile.dat --low_cut-off=4 --high_cut_off=5 . '+\
                      'Warning: overwrites all previous values '+\
                      '[DEFAULT: %default]')

    # Define defaults
    parser.set_defaults(verbose=False, cluster=False,
                        log_fp="denoiser.log", preprocess_fp=None,
                        primer=STANDARD_BACTERIAL_PRIMER,
                        sff_fp=None, input_fp=None, squeeze=False,
                        num_cpus=1, output_dir=None, percent_id=0.97, bail=1,
                        max_num_iter=None,
                        low_cutoff=3.75, high_cutoff=4.5, force=False,
                        low_memory=False, error_profile=PROJECT_HOME+'Data/FLX_error_profile.dat')
    
    opts,args = parser.parse_args(commandline_args)
    
    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp)
        
    if(opts.split and opts.preprocess_fp):
        parser.error('Options --split and --preprocess_fp are exclusive')

    if(opts.preprocess_fp):
        pp_fp = opts.preprocess_fp
        if not exists(opts.preprocess_fp):
            parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp)
        if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)):
            parser.error('Specified preprocess directory does not contain expected files: ' +\
                             'prefix_mapping.txt and prefix_dereplicated.fasta')

    if opts.titanium:
        opts.error_profile = PROJECT_HOME+"/Data/Titanium_error_profile.dat"
        opts.low_cutoff = 4
        opts.high_cutoff = 5

    if not exists(opts.error_profile):
        parser.error('Specified error profile %s does not exist' % opts.error_profile)
    
    return opts,args

def main(commandline_args=None):
    opts, args = parse_command_line_parameters(commandline_args)

    if opts.output_dir:
        #make sure it always ends on /
        tmpoutdir=opts.output_dir+"/"
    else:
        #make random dir in current dir
        tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/")

    create_dir(tmpoutdir, not opts.force)
    
    if opts.split:
        denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster,
                           opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail,
                           opts.primer, opts.low_cutoff, opts.high_cutoff, opts.log_fp,
                           opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter)
    else:
        denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster,
                     opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer,
                     opts.low_cutoff, opts.high_cutoff, opts.log_fp, opts.low_memory,
                     opts.verbose, opts.error_profile, opts.max_num_iter)

    # return outdir for tests/test_denoiser
    return tmpoutdir

if __name__ == "__main__":
    main()
