/*===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 */

#include <sra/extern.h>

#include <sra/sradb.h>
#include <sra/srapath.h>
#include <sra/types.h>
#include <sra/sraschema.h>
#include <vdb/schema.h>
#include <vdb/table.h>
#include <vdb/cursor.h>
#include <vdb/vdb-priv.h>
#include <kdb/meta.h>
#include <kdb/table.h>
#include <kdb/kdb-priv.h>
#include <klib/refcount.h>
#include <klib/log.h>
#include <klib/debug.h>
#include <klib/rc.h>
#include <klib/text.h>
#include <kfs/toc.h>
#include <kfs/file.h>
#include <sysalloc.h>

#include "sra-priv.h"

#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>

/* Destroy
 */
static
void CC column_release ( void *item, void *ignore )
{
    SRAColumnRelease ( ( const void* ) item );
}

void SRATableDestroy ( SRATable *self )
{
    VectorWhack ( & self -> wcol, column_release, NULL );
    VCursorRelease(self->curs);
    KMetadataRelease ( self -> meta );
    VTableRelease ( self -> vtbl );
    SRAMgrSever ( self -> mgr );

    free ( self );
}

/* AddRef
 * Release
 *  see REFERENCE COUNTING, above
 */
LIB_EXPORT rc_t CC SRATableAddRef( const SRATable *self )
{
    if ( self != NULL )
    {
        switch ( KRefcountAdd ( & self -> refcount, "SRATable" ) )
        {
        case krefLimit:
            return RC ( rcSRA, rcTable, rcAttaching, rcRange, rcExcessive );
        }
    }
    return 0;
}

LIB_EXPORT rc_t CC SRATableRelease( const SRATable *self )
{
    rc_t rc = 0;
    
    if (self)
    {
        switch (KRefcountDrop(&self->refcount, "SRATable"))
        {
        case krefWhack:
            return SRATableWhack ( ( SRATable* ) self );
        case krefNegative:
            rc = RC (rcSRA, rcTable, rcDestroying, rcSelf, rcDestroyed);
            PLOGERR (klogInt,(klogInt, rc, "Released an SRATable $(B) with no more references",
                      PLOG_P(self)));
            break;
        }
    }
    return rc;
}

/* Attach
 * Sever
 */
SRATable *SRATableAttach ( const SRATable *self )
{
    if ( self != NULL )
    {
        switch ( KRefcountAddDep ( & self -> refcount, "SRATable" ) )
        {
        case krefLimit:
            return NULL;
        }
    }
    return ( SRATable* ) self;
}

rc_t SRATableSever ( const SRATable *self )
{
    if ( self != NULL )
    {
        switch ( KRefcountDropDep ( & self -> refcount, "SRATable" ) )
        {
        case krefWhack:
            return SRATableWhack ( ( SRATable* ) self );
        case krefLimit:
            return RC ( rcSRA, rcTable, rcReleasing, rcRange, rcExcessive );
        }
    }
    return 0;
}


/* FillOutTableRead
 *  creates an empty cursor
 *  accesses metadata
 */
static rc_t ReadSpotSequence_v1(SRATable *self)
{
    const KMDataNode *n;
    rc_t rc = KMetadataOpenNodeRead(self->meta, &n, ".seq");
    if (rc == 0)
    {
        rc = KMDataNodeReadAsU64(n, &self->stats.spot_count);
        KMDataNodeRelease(n);
    }
    return rc;
}

rc_t SRATableLoadMetadata( SRATable *self )
{
    rc_t rc = 0;
    uint32_t idx, len;
   
    assert(self->curs != NULL);
    assert(self->curs_open == true);

#define RD_META(nm, var, fail, dflt) \
    if( (rc = VCursorAddColumn(self->curs, &idx, nm)) != 0 || \
        (rc = VCursorReadDirect(self->curs, 1, idx, sizeof(var) * 8, \
                                &var, sizeof(var), &len)) != 0 ) { \
        if( fail ) return rc; else rc = 0; \
	var = dflt; \
    }

    if(self->metavers <= 1 ) {
        rc = ReadSpotSequence_v1 ( self );
        if ( rc != 0 ) return rc;
    } else {	
        RD_META("SPOT_COUNT", self->stats.spot_count, true, 0);
    }
    RD_META("BASE_COUNT", self->stats.base_count, false, 0);
    RD_META("MIN_SPOT_ID", self->stats.min_spot_id, false, 1);
    RD_META("MAX_SPOT_ID", self->stats.max_spot_id, false, self->stats.spot_count);
    return rc;
}

LIB_EXPORT rc_t CC SRATableFillOut ( SRATable *self )
{
    rc_t rc;
    
    /* require these operations to succeed */
    rc = VCursorPermitPostOpenAdd(self->curs);
    if ( rc != 0 )
        return rc;
    rc = VCursorOpen(self->curs);
    if ( rc != 0 )
        return rc;
    self -> curs_open = true;
    return SRATableLoadMetadata(self);
}


/* ResolveTablePath
 *  takes either an accession or path
 *  substitutes any arguments
 *  resolves via SRAPath mgr if present
 */
rc_t ResolveTablePath ( const SRAMgr *mgr,
        char *path, size_t psize, const char *spec, va_list args )
{
    int len;
    char tblpath [ 4096 ];
    const SRAPath *pmgr = mgr -> pmgr;

    /* if no path manager or if the spec string has embedded path separators,
       then this can't be an accession - just print it out */
    if ( mgr -> pmgr == NULL || strchr( spec, '/' ) != NULL )
    {
        len = vsnprintf ( path, psize, spec, args );
        if ( len < 0 || ( size_t ) len >= psize )
            return RC ( rcSRA, rcTable, rcOpening, rcPath, rcExcessive );
        return 0;
    }

    /* create a copy - not likely to be too large */
    len = vsnprintf ( tblpath, sizeof tblpath, spec, args );
    if ( len < 0 || ( size_t ) len >= sizeof tblpath )
        return RC ( rcSRA, rcTable, rcOpening, rcPath, rcExcessive );

    /* test if the path exists in current directory, i.e. with assumed dot */
    if ( ! SRAPathTest ( pmgr, tblpath ) )
    {
        /* try to resolve the path using mgr */
        rc_t rc = SRAPathFind ( pmgr, tblpath, path, psize );
        if ( rc == 0 )
            return 0;
    }

    /* use the path given */
    if ( ( size_t ) len >= psize )
        return RC ( rcSRA, rcTable, rcOpening, rcBuffer, rcInsufficient );
    strcpy ( path, tblpath );

    return 0;
}

/* OpenRead
 *  open an existing table
 *
 *  "tbl" [ OUT ] - return parameter for table
 *
 *  "spec" [ IN ] - NUL terminated UTF-8 string giving path
 *  to table.
 */
LIB_EXPORT rc_t CC SRAMgrVOpenTableRead ( const SRAMgr *self,
        const SRATable **rslt, const char *spec, va_list args )
{
    rc_t rc;

    if ( rslt == NULL )
        rc = RC ( rcSRA, rcTable, rcOpening, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcMgr, rcAccessing, rcSelf, rcNull );
        else if ( spec == NULL )
            rc = RC ( rcSRA, rcTable, rcOpening, rcName, rcNull );
        else if ( spec [ 0 ] == 0 )
            rc = RC ( rcSRA, rcTable, rcOpening, rcName, rcEmpty );
        else
        {
            char path [ 4096 ];
            rc = ResolveTablePath ( self, path, sizeof path, spec, args );
            if ( rc == 0 )
            {
                SRATable *tbl = calloc ( 1, sizeof *tbl );
                if ( tbl == NULL )
                    rc = RC ( rcSRA, rcTable, rcConstructing, rcMemory, rcExhausted );
                else
                {
		    VSchema *schema;
		    rc = VDBManagerMakeSRASchema ( self -> vmgr, & schema );
		    if(rc == 0) 
			{
			    rc = VDBManagerOpenTableRead ( self -> vmgr, & tbl -> vtbl, schema, path );
			    VSchemaRelease(schema);
			    if ( rc == 0 )
			    {
				rc = VTableOpenMetadataRead ( tbl -> vtbl, & tbl -> meta );
				if ( rc == 0 )
				{
				    rc = KMetadataVersion ( tbl -> meta, & tbl -> metavers );
				    if ( rc == 0 )
				    {
					rc = VTableCreateCursorRead ( tbl -> vtbl, & tbl -> curs );
					if ( rc == 0 )
					{
					    tbl -> mgr = SRAMgrAttach ( self );
					    tbl -> mode = self -> mode;
					    tbl -> read_only = true;
					    KRefcountInit ( & tbl -> refcount, 1, "SRATable", "OpenTableRead", path );

					    rc = SRATableFillOut ( tbl );
					    if ( rc == 0 )
					    {
						* rslt = tbl;
						return 0;
					    }
					}
				    }
				}
			    }

			}
		    SRATableWhack ( tbl );
                }
            }
        }

        * rslt = NULL;
    }
    return rc;
}

LIB_EXPORT rc_t CC SRAMgrOpenTableRead ( const SRAMgr *self,
        const SRATable **tbl, const char *spec, ... )
{
    rc_t rc;
    
    va_list args;
    va_start ( args, spec );

    rc = SRAMgrVOpenTableRead ( self, tbl, spec, args );

    va_end ( args );

    return rc;
}


/* Read - PRIVATE
 *  column message sent via table
 */
rc_t SRATableRead ( const SRATable *self, spotid_t id, uint32_t idx,
    const void **base, bitsz_t *offset, bitsz_t *size )
{
    rc_t rc;

    if ( base == NULL || offset == NULL || size == NULL )
        rc = RC ( rcSRA, rcColumn, rcReading, rcParam, rcNull );
    else if ( self == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
    else
    {
        rc = 0;

        /* open cursor */
        if ( ! self -> curs_open )
        {
            rc = VCursorOpen(self->curs);
            if ( rc == 0 )
                ((SRATable *)self)->curs_open = true;
        }

        if ( rc == 0 )
        {
            uint32_t elem_bits, elem_off, elem_cnt;
            rc = VCursorCellDataDirect ( self -> curs, id, idx,
                & elem_bits, base, & elem_off, & elem_cnt );
            if ( rc == 0 )
            {
                * offset = elem_off * elem_bits;
                * size   = elem_cnt * elem_bits;
                return 0;
            }
        }
    }

    if ( base != NULL )
        * base = NULL;
    if ( offset != NULL )
        * offset = 0;
    if ( size != NULL )
        * size = 0;

    return rc;
}


/* BaseCount
 *  get the number of stored bases
 *
 *  "num_bases" [ OUT ] - return parameter for base count
 */
LIB_EXPORT rc_t CC SRATableBaseCount ( const SRATable *self, uint64_t *rslt )
{
    rc_t rc;

    if ( rslt == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            *rslt = self->stats.base_count;
            return 0;
        }
        *rslt = 0;
    }
    return rc;
}


/* SpotCount
 *  get the number of stored spots
 *
 *  "spot_count" [ OUT ] - return parameter for spot count
 */
LIB_EXPORT rc_t CC SRATableSpotCount ( const SRATable *self, uint64_t *rslt )
{
    rc_t rc;

    if ( rslt == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            *rslt = self->stats.spot_count;
            return 0;
        }
        *rslt = 0;
    }
    return rc;
}

/* MinSpotId
 *  returns the minimum spot id
 *
 *  a table will contain a collection of spots with ids from
 *  min(spot_id) to max(spot_id) unless empty.
 *
 *  "id" [ OUT ] - return parameter of last spot id
 *  or zero if the table is empty.
 */
LIB_EXPORT rc_t CC SRATableMinSpotId ( const SRATable *self, spotid_t *rslt )
{
    rc_t rc;

    if ( rslt == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            *rslt = self ->stats.min_spot_id;
            return 0;
        }
        *rslt = 0;
    }
    return rc;
}

/* MaxSpotId
 *  returns the maximum spot id
 *
 *  a table will contain a collection of spots with ids from
 *  min(spot_id) to max(spot_id) unless empty.
 *
 *  "id" [ OUT ] - return parameter of last spot id
 *  or zero if the table is empty.
 */
LIB_EXPORT rc_t CC SRATableMaxSpotId ( const SRATable *self, spotid_t *rslt )
{
    rc_t rc;

    if ( rslt == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            *rslt = self->stats.max_spot_id;
            return 0;
        }
        *rslt = 0;
    }
    return rc;
}


/* GetSpotId
 *  convert spot name to spot id
 *
 *  "id" [ OUT ] - return parameter for min(spot_id)-max(spot_id)
 *
 *  "spot_name" [ IN ] - external spot name string
 *  in platform canonical format.
 */
LIB_EXPORT rc_t CC SRATableGetSpotId ( const SRATable *self,
        spotid_t *rslt, const char *spot_name )
{
    rc_t rc;
    if( self == NULL || spot_name == NULL){
        rc=RC(rcSRA, rcTable, rcListing, rcSelf, rcName);
    } else {
        rc=VCursorParamsSet((struct VCursorParams*)self->curs,"QUERY_BY_NAME" ,spot_name);
        if( rc == 0) {
            struct {
                uint64_t start_id;
                uint64_t id_count;
                int64_t x;
                int64_t y;
            } out;
            uint32_t idx,len;
            rc = VCursorAddColumn(self->curs, &idx, "SPOT_IDS_FOUND");
            if( rc == 0 || GetRCState(rc) == rcExists){
                rc = VCursorReadDirect(self->curs,1,idx,sizeof(out) * 8,&out, 1 , &len);
                if ( rc == 0 ) {
                    if(out.id_count==1) {
                        if(rslt) *rslt=out.start_id;
                        return 0;
                    } else if(out.id_count > 1) { /*** doing table range scan in Name space - not relying on X and Y***/
                        uint32_t x_idx;
                        rc = VCursorAddColumn(self->curs, &x_idx, "X");
                        if( rc == 0 || GetRCState(rc) == rcExists){
                            uint32_t y_idx;
                            rc = VCursorAddColumn(self->curs, &y_idx, "Y");
                            if(rc == 0 || GetRCState(rc) == rcExists){
                                spotid_t rowid;
                                for(rowid = out.start_id; rowid < out.start_id + out.id_count; rowid ++){
                                    int32_t x,y;
                                    rc = VCursorReadDirect(self->curs,rowid,x_idx,32,&x,1, &len);
                                    if(rc == 0){
                                        rc = VCursorReadDirect(self->curs,rowid,y_idx,32,&y,1, &len);
                                        if(rc == 0 && x==out.x && y==out.y){
                                            if(rslt) *rslt=rowid;
                                            return 0;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    rc = RC ( rcSRA, rcIndex, rcSearching, rcColumn, rcNotFound );
                }
            }
        }
    }
    return rc;
}


/* ListCol
 *  returns a list of simple column names
 *  each name represents at least one typed column
 *
 *  "names" [ out ] - return parameter for names list
 */
LIB_EXPORT rc_t CC SRATableListCol( const SRATable *self, SRANamelist **rslt ) {
    if ( self != NULL )
        return VTableListCol ( self -> vtbl, (KNamelist **)rslt );
    return RC(rcSRA, rcTable, rcListing, rcSelf, rcName);
}


/* ColDatatypes
 *  returns list of typedecls for named column
 *
 *  "col" [ IN ] - column name
 *
 *  "dflt_idx" [ OUT, NULL OKAY ] - returns the zero-based index
 *  into "typedecls" of the default datatype for the named column
 *
 *  "typedecls" [ OUT ] - list of datatypes available for named column
 */
LIB_EXPORT rc_t CC SRATableColDatatypes( const SRATable *self,
        const char *col, uint32_t *dflt_idx, SRANamelist **rslt ) {
    if ( self != NULL )
        return VTableColumnDatatypes ( self -> vtbl, col, dflt_idx, (KNamelist **)rslt );
    return RC(rcSRA, rcTable, rcListing, rcSelf, rcName);
}

rc_t SRATableColDatatype ( const SRATable *self,
        uint32_t idx, VTypedecl *type, VTypedef *def )
{
    rc_t rc;
    if ( type == NULL && def == NULL )
        rc = RC ( rcSRA, rcColumn, rcAccessing, rcParam, rcNull );
    else
    {
        union { VTypedecl td; VTypedef def; } dummy;
        if ( type == NULL )
            type = & dummy . td;
        else if ( def == NULL )
            def = & dummy . def;

        if ( idx == 0 )
            rc = RC ( rcSRA, rcColumn, rcAccessing, rcSelf, rcNull );
        else if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            VTypedesc desc;
            rc = VCursorDatatype ( self -> curs, idx, type, & desc );
            if ( rc == 0 )
            {
                rc = VSchemaDescribeTypedef ( VCursorGetSchema(self -> curs), def, type -> type_id );
                if ( rc == 0 )
                    return 0;
            }
        }

        memset ( type, 0, sizeof * type );
        memset ( def, 0, sizeof * def );
    }
    return rc;
}

/* MetaRevision
 *  returns current revision number
 *  where 0(zero) means tip
 */
LIB_EXPORT rc_t CC SRATableMetaRevision( const SRATable *self, uint32_t *rslt )
{
    if (self == NULL)
        return RC(rcSRA, rcTable, rcAccessing, rcSelf, rcNull);
    return KMetadataRevision (self->meta,rslt);
}


/* MaxRevision
 *  returns the maximum revision available
 */
LIB_EXPORT rc_t CC SRATableMaxMetaRevision( const SRATable *self, uint32_t *rslt )
{
    if (self == NULL)
        return RC(rcSRA, rcTable, rcAccessing, rcSelf, rcNull);
    return KMetadataMaxRevision(self->meta,rslt);
}

/* UseMetaRevision
 *  opens indicated revision of metadata
 *  all non-zero revisions are read-only
 */
LIB_EXPORT rc_t CC SRATableUseMetaRevision ( const SRATable *cself, uint32_t revision )
{
    rc_t rc;

    if ( cself == NULL )
        rc = RC ( rcSRA, rcTable, rcUpdating, rcSelf, rcNull );
    else if ( cself -> read_only == false )
        rc = RC ( rcSRA, rcTable, rcUpdating, rcMetadata, rcBusy );
    else
    {
        uint32_t cur;
        rc = KMetadataRevision ( cself -> meta, & cur );
        if ( rc == 0 && cur != revision )
        {
            SRATable *self = ( SRATable* ) cself;
            const KMetadata *meta;
            rc = KMetadataOpenRevision ( self -> meta, & meta, revision );
            if ( rc == 0 )
            {
                KMetadataRelease ( self -> meta );
                self -> meta = meta;
            }
        }
    }

    return rc;
}

/* OpenMDataNode
 *  open a metadata node
 *
 *  "node" [ OUT ] - return parameter for metadata node
 *
 *  "path" [ IN ] - simple or hierarchical NUL terminated
 *  path to node
 */
LIB_EXPORT rc_t CC SRATableOpenMDataNodeRead( const SRATable *self, 
        struct KMDataNode const **node, const char *path, ... ) {
    va_list va;
    rc_t rc;
    
    va_start(va, path);
    rc = SRATableVOpenMDataNodeRead(self, node, path, va);
    va_end(va);
    return rc;
}

LIB_EXPORT rc_t CC SRATableVOpenMDataNodeRead( const SRATable *self,
        struct KMDataNode const **rslt, const char *path, va_list args ) {
    if (self == NULL)
        return RC(rcSRA, rcTable, rcAccessing, rcSelf, rcNull);
    
    return KMetadataVOpenNodeRead(self->meta, rslt, path, args);
}

/* private */
rc_t SRATableGetIdRange ( const SRATable *self,
        uint32_t idx, spotid_t id, spotid_t *pfirst, spotid_t *plast )
{
    rc_t rc;

    if ( pfirst == NULL && plast == NULL )
        rc = RC ( rcSRA, rcColumn, rcAccessing, rcParam, rcNull );
    else
    {
        spotid_t dummy;
        if ( pfirst == NULL )
            pfirst = & dummy;
        else if ( plast == NULL )
            plast = & dummy;

        if ( idx == 0 )
            rc = RC ( rcSRA, rcColumn, rcAccessing, rcSelf, rcNull );
        else if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            int64_t first, last;
            rc = VCursorPageIdRange ( self -> curs, idx, id, & first, & last );
            if ( rc == 0 )
            {
                * pfirst = ( spotid_t ) first;
                * plast = ( spotid_t ) last;

                if ( ( int64_t ) * pfirst == first && ( int64_t ) * plast == last )
                    return 0;

                rc = RC ( rcSRA, rcColumn, rcAccessing, rcRange, rcExcessive );
            }
        }

        * pfirst = * plast = 0;
    }

    return rc;
}

/* semi-private for sra-dbcc */
LIB_EXPORT rc_t CC SRATableGetVTableRead( const SRATable *self, const VTable **rslt )
{
    if (rslt == NULL)
        return RC(rcSRA, rcTable, rcAccessing, rcParam, rcNull);
    
    if (self == NULL)
    {
        * rslt = NULL;
        return RC(rcSRA, rcTable, rcAccessing, rcSelf, rcNull);
    }

    *rslt = self->vtbl;
    return VTableAddRef(*rslt);
}

LIB_EXPORT rc_t CC SRATableGetKTableRead ( const SRATable *self, struct KTable const **ktbl )
{
    rc_t rc;

    if ( ktbl == NULL )
        rc = RC ( rcSRA, rcTable, rcAccessing, rcParam, rcNull );
    else
    {
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            return VTableGetKTableRead ( self -> vtbl, ktbl );
        }

        * ktbl = NULL;
    }

    return rc;
}

/* Locked
 *  returns true if locked
 */
LIB_EXPORT bool CC SRATableLocked( const SRATable *self )
{
    return self ? VTableLocked(self->vtbl) : false;
}

LIB_EXPORT struct VSchema const* CC SRATableGetSchema ( struct SRATable const *self )
{
        return self ? VCursorGetSchema( self->curs ) : NULL;
}

/* sfa_filter
 *  if a name is found in list, exclude it
 */
#define DEBUG_SORT(msg) DBGMSG (DBG_SRA, DBG_FLAG(DBG_SRA_SORT), msg)

static
bool CC sfa_filter(const KDirectory *dir, const char *leaf, void *ignore)
{
    bool ret = true;

    if( strncmp("idx/", leaf, 4) == 0 ) {
        ret = strncmp("fuse-", &leaf[4], 5) != 0;
    }
    return ret;
}

static
bool CC sfa_filter_light(const KDirectory *dir, const char *leaf, void *ignore)
{
    const char * path = leaf;
    bool ret = true;

    for (;;)
    {
        size_t sz = string_size ( path );

        if ( memcmp ( path, "col/", 4 ) == 0 )
        {
            switch( leaf [ 4 ] )
            {
            case 'S':
                ret = strcmp(&leaf[5], "IGNAL") != 0;
                break;
            case 'P':
                ret = strcmp(&leaf[5], "OSITION") != 0;
                break;
            case 'I':
                ret = strcmp(&leaf[5], "NTENSITY") != 0;
                break;
            case 'N':
                ret = strcmp(&leaf[5], "OISE") != 0;
                break;
            case 'Q':
                ret = strcmp(&leaf[5], "UALITY2") != 0;
                break;
            }
            break;
        }
        else
        {
            const char * slash = string_rchr ( path, sz, '/' );

            if ( slash == NULL )
                break;

            slash ++;
            sz -= slash - path;
            path = slash;
        }
    }

    return ret ? sfa_filter(dir, leaf, ignore) : false;
}

/* sfa_sort
 *  reorders list
 */
enum sfa_path_type_id
{
    sfa_not_set = -1,
    sfa_non_column,
    sfa_required,
    sfa_preferred,
    sfa_optional
};

typedef struct reorder_t
{
    const char * path;
    uint64_t     size;
    int          type_id;
}reorder_t;

static
int sfa_path_type ( const char *path )
{
    /* use match and advance macro */
#define MATCH( ptr, str ) \
    ( ( memcmp ( ptr, str, sizeof ( str ) - 1 ) == 0 ) ? \
      ( ( ptr ) += sizeof ( str )  - 1 ) : ( const char* ) 0 )

    /* use first character as distinguisher for match */
    switch ( path [ 0 ] )
    {
    case 'c':
        /* perhaps it's a column */
        if ( MATCH ( path, "col/" ) )
        {
            switch ( path [ 0 ] )
            {
            case 'I':
                if ( MATCH ( path, "INTENSITY/" ) )
                    return sfa_optional;
                break;
            case 'N':
                if ( MATCH ( path, "NAME_FMT/" ) )
                    return sfa_preferred;
                if ( MATCH ( path, "NAME/" ) )
                    return sfa_preferred;
                if ( MATCH ( path, "NOISE/" ) )
                    return sfa_optional;
                break;
            case 'P':
                if ( MATCH ( path, "POSITION/" ) )
                    return sfa_optional;
                break;
            case 'Q':
                if ( MATCH ( path, "QUALITY2/" ) )
                    return sfa_optional;
                break;
            case 'S':
                if ( MATCH ( path, "SIGNAL/" ) )
                    return sfa_optional;
                if ( MATCH ( path, "SPOT_NAME/" ) )
                    return sfa_preferred;
                break;
            case 'X':
            case 'Y':
                if ( path [ 1 ] == '/' )
                    return sfa_preferred;
                break;
            }
        }
        return sfa_required;

    case 'i':
        /* look for skey index */
        if ( MATCH ( path, "idx/skey" ) )
        {
            if ( path [ 0 ] == 0 || strcmp ( path, ".md5" ) == 0 )
                return sfa_preferred;
        }
        break;

    case 's':
        /* look for old skey index */
        if ( MATCH ( path, "skey" ) )
        {
            if ( path [ 0 ] == 0 || strcmp ( path, ".md5" ) == 0 )
                return sfa_preferred;
        }
        break;
    }

#undef MATCH

    /* anything not recognized is non-column required */
    return sfa_non_column;
}

static
int CC sfa_path_cmp ( const void **_a, const void **_b, void * ignored )
{
    const reorder_t * a = *_a;
    const reorder_t * b = *_b;
    int ret;

    DEBUG_SORT(("%s enter\t%s %u %lu \t%s %u %lu", __func__, 
                a->path, a->type_id, a->size, b->path, b->type_id, b->size));

    ret = a->type_id - b->type_id;
    if (ret == 0)
    {
        if (a->size > b->size)
            ret = 1;
        else if (a->size < b->size)
            ret = -1;
        else
            ret = strcmp (a->path, b->path);
    }
    DEBUG_SORT(("\t%d\n", ret));
    return ret;
}

typedef
struct to_nv_struct
{
    const KDirectory * d;
    Vector * v;
    rc_t rc;
} to_nv_struct;

static
void CC  to_nv (void * _item, void * _data)
{
    const char * path = _item;
    to_nv_struct * data = _data;
    reorder_t * obj;

    DEBUG_SORT(("%s enter\n", __func__));
    if (data->rc == 0)
    {
        obj = malloc (sizeof (*obj));
        if (obj == NULL)
            data->rc = RC (rcSRA, rcVector, rcConstructing, rcMemory, rcExhausted);
        else
        {
            rc_t rc = KDirectoryFileSize (data->d, &obj->size, path);
            if (rc == 0)
            {
                obj->path = path;
                obj->type_id = sfa_path_type (path);
                rc = VectorAppend (data->v, NULL, obj);
            }

            if (rc)
            {
                free (obj);
                data->rc = rc;
            }
        }
    }
    DEBUG_SORT(("%s exit\n", __func__));
}

static
void CC item_whack (void * item, void * ignored)
{
    free (item);
}

static
rc_t CC sfa_sort ( const KDirectory *dir, Vector *v )
{
    /* assume "v" is a vector of paths - hopefully relative to "dir" */
    Vector nv;
    to_nv_struct to_nv_data;
    uint32_t base;

    DEBUG_SORT(("%s enter\n", __func__));

    base = VectorStart (v);
    VectorInit (&nv, base, VectorLength (v));
    to_nv_data.d = dir;
    to_nv_data.v = &nv;
    to_nv_data.rc = 0;

    VectorForEach (v, false, to_nv, &to_nv_data);

    if (to_nv_data.rc == 0)
    {
        uint32_t idx = 0;
        uint32_t limit = VectorLength (v) + base;

        VectorReorder (&nv, sfa_path_cmp, NULL);

        for (idx = base; idx < limit; ++idx)
        {
            const reorder_t * tmp;
            void * ignore;

            tmp = VectorGet (&nv, idx);
            to_nv_data.rc = VectorSwap (v, idx + base, tmp->path, &ignore);
            if (to_nv_data.rc)
                break;

        }
    }
    VectorWhack (&nv, item_whack, NULL);
    DEBUG_SORT(("%s exit %d %R\n", __func__, to_nv_data.rc, to_nv_data.rc));
    return to_nv_data.rc;
}




/* MakeSingleFileArchive
 *  makes a single-file-archive file from an SRA table
 *
 *  contents are ordered by frequency and necessity of access
 *
 *  "lightweight" [ IN ] - when true, include only those components
 *  required for read and quality operations.
 */
LIB_EXPORT rc_t CC SRATableMakeSingleFileArchive ( const SRATable *self, const KFile **sfa, bool lightweight )
{
    rc_t rc;

    if ( sfa == NULL )
        rc = RC ( rcSRA, rcFile, rcConstructing, rcParam, rcNull );
    else
    {
        *sfa = NULL;
        if ( self == NULL )
            rc = RC ( rcSRA, rcTable, rcAccessing, rcSelf, rcNull );
        else
        {
            const KTable *ktbl;
            rc = SRATableGetKTableRead ( self, & ktbl );
            if ( rc == 0 )
            {
                const KDirectory *tbl_dir;
                rc = KTableGetDirectoryRead ( ktbl, & tbl_dir );
                if ( rc == 0 )
                {
                    rc = KDirectoryOpenTocFileRead ( tbl_dir, sfa, sraAlign4Byte,
                        lightweight ? sfa_filter_light : sfa_filter, NULL, sfa_sort );

                    KDirectoryRelease ( tbl_dir );
                }
                KTableRelease ( ktbl );
            }
        }
    }
    return rc;
}
