/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 2003 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */

// 	$Id$	

#ifndef lint
static char vcid[] = "$Id$";
#endif /* lint */

#include "assembly.H"



/*************************************************************************
 * works a bit like assemble()
 * after assembly, all reads that are not singlets
 *  OR having a SRMB or WRMB tag are thought of as 'good'
 *
 * Then, all reads marked as good are saved into CAF files by strain,
 *  i.e., each strain gets its own CAF, with repeat tags (SRMx, WRMx)
 *  and allelic variances tags (PALV) and eventually SNP tags (PSNP etc.)
 *
 * Function returns a vector of strings where elements are paired by two:
 *    filename1    strainname1
 *    filename2    strainname2 etc...
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush(); }
vector<string> Assembly::assembleESTs()
{
  (void) vcid;

  FUNCSTART("Assembly::assembleESTs()");

  //AS_miraparams[0].setPathfinderuseGenomicAlgorithms(true);

  assemble();
  saveResults();

  vector<bool> goodreads;
  goodreads.resize(AS_readpool.size(),false);
  {
    CEBUG(AS_contigs.size() << " contigs in list.\n");
    list<Contig>::const_iterator cI=AS_contigs.begin();
    for(uint32 i=0; i<AS_contigs.size(); i++, cI++) {
      if(cI->getNumReadsInContig()<2) {
	continue;
      }
      const vector<Contig::contigread_t> & cr= cI->getContigReads();
      for(uint32 j=0; j<cr.size(); j++){
	if(cr[j].orpid>=0) goodreads[cr[j].orpid]=true;
      }
    }
    
    for(uint32 i=0; i<AS_readpool.size(); i++) {
      try {
	if(AS_readpool.getRead(i).hasTag(Read::REA_tagentry_idSROr)) goodreads[i]=true;
	if(AS_readpool.getRead(i).hasTag(Read::REA_tagentry_idSIOr)) goodreads[i]=true;
	if(AS_readpool.getRead(i).hasTag(Read::REA_tagentry_idSAOr)) goodreads[i]=true;
	if(AS_readpool.getRead(i).hasTag(Read::REA_tagentry_idSRMr)) goodreads[i]=true;
	if(AS_readpool.getRead(i).hasTag(Read::REA_tagentry_idWRMr)) goodreads[i]=true;
      }
      catch(...){
	CEBUG("Caught: " << AS_readpool.getRead(i).getName() << '\n');
      }
      CEBUG("GR[" << AS_readpool.getRead(i).getName() << "]: " << goodreads[i] << endl);
    }
  }
  
  vector<int32> goodstrainids;
  vector<string> goodstrainnames;
  for(uint32 i=0; i<AS_readpool.size();i++){
    if(find(goodstrainids.begin(), 
	    goodstrainids.end(),
	    AS_readpool.getRead(i).getStrainID()) == goodstrainids.end()) {
      goodstrainids.push_back(AS_readpool.getRead(i).getStrainID());
      if(AS_readpool.getRead(i).getStrain().size()>0){
	goodstrainnames.push_back(AS_readpool.getRead(i).getStrain());
      }else{
	goodstrainnames.push_back("default");
      }
    }
  }

  vector<string> returns;
  {
    string basename=AS_miraparams[0].getAssemblyParams().as_projectname_out;
    Read::setCoutType(Read::AS_CAF);
    for(uint32 i=0; i<goodstrainids.size(); i++){
      cout << "Writing CAF for reads for strain " << goodstrainnames[i] << endl;
      string filename=basename+"_snpsinSTRAIN_"+goodstrainnames[i]+".caf";
      returns.push_back(filename);
      returns.push_back(goodstrainnames[i]);
      ofstream cafout(filename.c_str(), ios::out | ios::trunc);
      for(uint32 j=0; j<AS_readpool.size(); j++){
	if(goodreads[j]==true && AS_readpool.getRead(j).getStrainID()==goodstrainids[i]){
	  AS_readpool.getRead(j).removeGapsFromRead();
	  cafout << AS_readpool.getRead(j);
	}
      }
      cafout.close();
    }

    // also put reads with no potential SNPs detected into an own "STRAIN"
    {
      string filename=basename+"_nosnps_remain.caf";
      returns.push_back(filename);
      returns.push_back("remain");
      ofstream cafout(filename.c_str(), ios::out | ios::trunc);
      for(uint32 j=0; j<AS_readpool.size(); j++){
	if(goodreads[j]==false) {
	  AS_readpool.getRead(j).removeGapsFromRead();
	  cafout << AS_readpool.getRead(j);
	}
      }
      cafout.close();
    }

  }

  FUNCEND();

  return returns;
}
//#define CEBUG(bla)   {cout << bla; cout.flush(); }

