
// stringtokenizer.cpp
// 
// Copyright (c) 1998-2002 by The VoxBo Development Team

// This file is part of VoxBo
// 
// VoxBo is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// VoxBo is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with VoxBo.  If not, see <http://www.gnu.org/licenses/>.
// 
// For general information on VoxBo, including the latest complete
// source code and binary distributions, manual, and associated files,
// see the VoxBo home page at: http://www.voxbo.org/
// 
// original version written by Kosh Banerjee

/*********************************************************************
* This class is used to retrieve tokens from a string. The first     *
* token in the string is the zeroth token.                           *
*********************************************************************/
#include "stringtokenizer.h"

StringTokenizer::StringTokenizer()
{
  this->init("", "");
} // StringTokenizer::StringTokenizer()


/*********************************************************************
* This constructor takes a C-style string as its single argument.    *
* The default field delimiter for this constructor is a space.       *
*********************************************************************/
StringTokenizer::StringTokenizer(const char *myString)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(string(myString), string(" "));

} // StringTokenizer::StringTokenizer(const char *myString)

/*********************************************************************
* This constructor takes a string object as its single argument.     *
* The default field delimiter for this constructor is a space.       *
*********************************************************************/
StringTokenizer::StringTokenizer(const string& myString)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(myString, string(" "));

} // StringTokenizer::StringTokenizer(const string& myString)

/*********************************************************************
* This constructor takes a C-style string as the argument to         *
* tokenize. The field delimiters are also passed in as a C-style     *
* string.                                                            *
*********************************************************************/
StringTokenizer::StringTokenizer(const char *myString, const char *myDelims)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(string(myString), string(myDelims));

} // StringTokenizer::StringTokenizer(const char *myString, const char *myDelims)

/*********************************************************************
* This constructor takes a string object  as the argument to         *
* tokenize. The field delimiters are also passed in as a string      *
* object.                                                            *
*********************************************************************/
StringTokenizer::StringTokenizer(const string& myString, const string& myDelims)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(myString, myDelims);

} // StringTokenizer::StringTokenizer(const string& myString, const string& myDelims)

/*********************************************************************
* This constructor takes a string object as the argument to          *
* tokenize. The field delimiters are passed in as a C-style string.  *
*********************************************************************/
StringTokenizer::StringTokenizer(const string& myString, const char *myDelims)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(myString, string(myDelims));

} // StringTokenizer::StringTokenizer(const string& myString, const char *myDelims)

/*********************************************************************
* This constructor takes a C-style string as the argument to         *
* tokenize. The field delimiters are passed in as a string object.   *
*********************************************************************/
StringTokenizer::StringTokenizer(const char *myString, const string& myDelims)
{

/*********************************************************************
* Calling this->init() to initialize the data members.               *
*********************************************************************/
  this->init(string(myString), myDelims);

} // StringTokenizer::StringTokenizer(const char *myString, const string& myDelims)

/*********************************************************************
* This is the copy constructor.                                      *
*********************************************************************/
StringTokenizer::StringTokenizer(const StringTokenizer& theStringTokens)
{

/*********************************************************************
* Assigning the data members from theStringTokens to this instance   *
* of StringTokenizer.                                                *
*********************************************************************/
  this->theString = theStringTokens.theString;
  this->theDelimiters = theStringTokens.theDelimiters;
  this->currentToken = theStringTokens.currentToken;
  this->theTokens = theStringTokens.theTokens;
  this->theTokenStarts = theStringTokens.theTokenStarts;
  this->empty = theStringTokens.empty;
  this->success = theStringTokens.success;

} // StringTokenizer::StringTokenizer(const StringTokenizer& theStringTokens)

/*********************************************************************
* This method initializes the data members.                          *
*********************************************************************/
void StringTokenizer::init(const string& myString, const string& myDelims)
{

/*********************************************************************
* Assigning this->theString and this->theDelimiters.                 *
*********************************************************************/
  this->theString = myString;
  this->theDelimiters = myDelims;

/*********************************************************************
* Initializing this->currentToken to 0.                              *
*********************************************************************/
  this->currentToken = 0;

/*********************************************************************
* Now extracting the tokens from this->theString.                    *
*********************************************************************/
  this->tokenize();

/*********************************************************************
* If the number of elements in this->theTokens is positive, then     *
* this->empty is set to false and this->success is set to true.      *
*********************************************************************/

  if (this->theTokens.size() > 0)
  {
    this->empty = false;
    this->success = true;
  } // if

/*********************************************************************
* If program flow ends up in this "else" block, then one of two      *
* things happened:                                                   *
*                                                                    *
* 1. this->theString is the empty string.                            *
* 2. All the characters in this->theString are delimiter characters. *
*                                                                    *
* In this case, this->empty is set to true since there are no tokens *
* and this->success is set to false.                                 *
*********************************************************************/
  else
  {
    this->empty = true;
    this->success = false;
  } // else

} // void StringTokenizer::init(const string& myString, const string& myDelims)

/*********************************************************************
* This is the destructor. It does nothing.                           *
*********************************************************************/
StringTokenizer::~StringTokenizer()
{

} // StringTokenizer::~StringTokenizer()

/*********************************************************************
* This method sets this->currentToken to 0 and this->success to true.*
*********************************************************************/
void StringTokenizer::reset()
{

  this->currentToken = 0;
  this->success = true;

} // void StringTokenizer::reset()

/*********************************************************************
* This method returns the string this->theTokens[this->currentToken].*
* It is anticipated that this method will be used in a manner        *
* similar to:                                                        *
*                                                                    *
*   while (myTokenizer.getCurrentToken.size())                       *
*   {                                                                *
*     // do something                                                *
*   } // while                                                       *
*                                                                    *
*********************************************************************/
string StringTokenizer::getCurrentToken()
{

/*********************************************************************
* If this->currentToken is a valid index in this->theTokens, then    *
* this->success is set to true, this->currentToken is incremented,   *
* and this->theTokens[this->currentToken - 1] is returned.           *
*********************************************************************/
  if (this->currentToken < this->theTokens.size())
  {
    this->success = true;
    this->currentToken++;
    return this->theTokens[this->currentToken - 1];
  } // if

/*********************************************************************
* If program flow ends up here, then this->currentToken is >=        *
* this->theTokens.size(). In this case, this->success is set to      *
* false and the empty string is returned.                            *
*********************************************************************/
  this->success = false;
  return string("");

} // string StringTokenizer::getCurrentToken()

/*********************************************************************
* This method returns the length of the string                       *
* this->theTokens[this->currentToken].                               *
*********************************************************************/
unsigned long StringTokenizer::getCurrentTokenLength()
{

/*********************************************************************
* If this->currentToken is a valid index in this->theTokens, then    *
* this->success is set to true and the length of the string          *
* this->theTokens[this->currentToken] is returned.                   *
*********************************************************************/
  if (this->currentToken < this->theTokens.size())
  {
    this->success = true;
    return this->theTokens[this->currentToken].size();
  } // if

/*********************************************************************
* If program flow ends up here, then this->currentToken is >=        *
* this->theTokens.size(). In this case, this->success is set to      *
* false and 0 is returned.                                           *
*********************************************************************/
  this->success = false;
  return 0;

} // unsigned long StringTokenizer::getCurrentTokenLength()

/*********************************************************************
* This methods returns the token                                     *
* this->theTokens[this->currentTokens], but does not increment       *
* this->currentToken.                                                *
*********************************************************************/
string StringTokenizer::getSameToken()
{

/*********************************************************************
* If this->currentToken is a valid index in this->theTokens, then    *
* this->success is set to true and                                   *
* this->theTokens[this->currentToken] is returned.                   *
*********************************************************************/
  if (this->currentToken < this->theTokens.size())
  {
    this->success = true;
    return this->theTokens[this->currentToken];
  } // if

/*********************************************************************
* If program flow ends up here, then this->currentToken is >=        *
* this->theTokens.size(). In this case, this->success is set to      *
* false and the empty string is returned.                            *
*********************************************************************/
  this->success = false;
  return string("");

} // string StringTokenizer::getSameToken()

/*********************************************************************
* This method returns the specified token.                           *
*********************************************************************/
string StringTokenizer::getToken(const unsigned int i)
{

/*********************************************************************
* If i is < this->theTokens.size(), then this->success is set to     *
* true and this->theTokens[i] is returned.                           *
*********************************************************************/
  if (i < this->theTokens.size())
  {
    this->success = true;
    return this->theTokens[i];
  } // if

/*********************************************************************
* If program flow ends up here, then i exceeds the number of elements*
* in this->theTokens. Therefore, this->success is set to false and   *
* the empty string is returned.                                      *
*********************************************************************/
  this->success = false;
  return string("");

} // string StringTokenizer::getToken(const unsigned int i)

/*********************************************************************
* This method returns the length of the the specified token.         *
*********************************************************************/
unsigned long StringTokenizer::getTokenLength(const unsigned long i)
{

/*********************************************************************
* If i is < this->theTokens.size(), then this->success is set to     *
* true and this->theTokens[i].size() is returned.                    *
*********************************************************************/
  if (i < this->theTokens.size())
  {
    this->success = true;
    return this->theTokens[i].size();
  } // if

/*********************************************************************
* If program flow ends up here, then i exceeds the number of elements*
* in this->theTokens. Therefore, this->success is set to false and   *
* 0 is returned.                                                     *
*********************************************************************/
  this->success = false;
  return 0;

} // unsigned long StringTokenizer::getTokenLength(const unsigned long i)

/*********************************************************************
* This method will return true if this->currentToken is pointing to  *
* a field delimiter character. Otherwise, false is returned.         *
*********************************************************************/
bool StringTokenizer::isDelimiter(const unsigned long i) const
{

/*********************************************************************
* The following for loop is used to compare each of the field        *
* delimiter characters to the character this->theString.at(i).       *
*********************************************************************/
  for ( unsigned long j = 0; j < this->theDelimiters.size(); j++)
  {
    if (this->theString.at(i) == this->theDelimiters.at(j))
    {
      return true;
    } // if

  } // for j

/*********************************************************************
* If program flow ends up here, then this->theString.at(i) is not a  *
* delimiter character. Therefore, false is returned.                 *
*********************************************************************/
  return false;

} // bool StringTokenizer::isDelimiter(const unsigned long i) const

/*********************************************************************
* This method tokenizes this->theString.                             *
*********************************************************************/
void StringTokenizer::tokenize()
{

/*********************************************************************
* If we have tokenized previously, then this->theTokens and          *
* this->theTokenStarts are emptied.                                  *
*********************************************************************/
  if (this->theTokens.size() > 0)
  {
    this->theTokens.clear();
    this->theTokenStarts.clear();
  } // if

/*********************************************************************
* The following for loop is used to traverse the characters in       *
* this->theString.                                                   *
*********************************************************************/
  for (unsigned long i = 0; i < this->theString.size(); i++)
  {

/*********************************************************************
* While i is less than this->theString.size() and                    *
* this->theString.at(i) is a delimiter, i is incremented. After      *
* breaking out of the following while loop, i will be an index for a *
* non-delimiter character in this->theString; specifically the       *
* beginning of a string token.                                       *
*********************************************************************/
    while ( (i < this->theString.size()) && (this->isDelimiter(i)))
    {
      i++;
    } // if

/*********************************************************************
* The current value of i is saved to beginToken.                     *
*********************************************************************/
    unsigned long beginToken = i;

/*********************************************************************
* While i is less than this->theString.size() and                    *
* this->theString.at(i) is not a delimiter, i is incremented. After  *
* breaking out of the following while loop, i will be an index for   *
* the first delimiter after the end of a string token.               *
*********************************************************************/
    while ( (i < this->theString.size()) && (!this->isDelimiter(i)))
    {
      i++;
    } // if

/*********************************************************************
* If beginToken does not equal i, then we extract the appropriate    *
* string token, found in the range [beginToken, i), from             *
* this->theString and add it to this->theTokens. Also, beginToken is *
* added to this->theTokenStarts. NOTE: When i equals beginToken, we  *
* are at the end of this->theString and there are no more tokens     *
* left to add to this->theTokens.                                    *
*********************************************************************/
    if (beginToken != i)
    {
      this->theTokens.push_back(this->theString.substr(beginToken, i - beginToken));
      this->theTokenStarts.push_back(beginToken);
    } // if

  } // for i

} // void StringTokenizer::tokenize()

/*********************************************************************
* This method prints out the current values of the data members. It  *
* is meant to be used as a debugging aid.                            *
*********************************************************************/
void StringTokenizer::toString() const
{

  cout << "this->theString =    [" << this->theString << "]" << endl;
  cout << "this->theString.size() =    [" << this->theString.size() << "]" << endl;
  cout << "this->currentToken = [" << this->currentToken << "]" << endl;
  cout << "this->empty = [" << this->empty << "]" << endl;
  cout << "this->success = [" << this->success << "]" << endl;
  cout << "this->theDelimiters =   [" << this->theDelimiters << "]" << endl;
  cout << "this->theTokens.size() =    [" << this->theTokens.size() << "]" << endl;
  cout << "THE TOKENS BEGIN:" << endl;
  copy(this->theTokens.begin(), this->theTokens.end(), ostream_iterator<string>(cout, "\n"));
  cout << "THE TOKENS END:" << endl;
  cout << "THE TOKEN INDICES BEGIN:" << endl;
  copy(this->theTokenStarts.begin(), this->theTokenStarts.end(), ostream_iterator<unsigned long>(cout, "\n"));
  cout << "THE TOKEN INDICES END:" << endl;

} // void StringTokenizer::toString() const

/*********************************************************************
* This method takes the input string object and tokenizes it.        *
*********************************************************************/
void StringTokenizer::setString(const string& s)
{

/*********************************************************************
* Calling this->init() to set the data members and tokenize s.       *
*********************************************************************/
  this->init(s, this->theDelimiters);

} // void StringTokenizer::setString(const string& s)

/*********************************************************************
* This method takes the input C-style string and tokenizes it.       *
*********************************************************************/
void StringTokenizer::setString(const char *s)
{

/*********************************************************************
* Calling this->init() to set the data members and tokenize s.       *
*********************************************************************/
  this->init(string(s), this->theDelimiters);

} // void StringTokenizer::setString(const char *s)

/*********************************************************************
* This method returns the length of the current token.               *
*********************************************************************/
unsigned long StringTokenizer::getCurrentTokenLength() const
{

/*********************************************************************
* If this->currentToken is less than the number of available tokens, *
* then we return the length of the next token.                       *
*********************************************************************/
  if (this->currentToken < this->theTokens.size())
  {
    return this->theTokens[this->currentToken].size();
  } // if

/*********************************************************************
* If program flow ends up here, then this->currentToken exceeds      *
* the number of available tokens. Therefore, 0 is returned.          *
*********************************************************************/
  return 0;

} // unsigned long StringTokenizer::getCurrentTokenLength() const

/*********************************************************************
* This method returns the index of the beginning of                  *
* this->currentToken in this->theString.                             *
*********************************************************************/
int StringTokenizer::getCurrentTokenStart() const
{

/*********************************************************************
* If we have greater than zero tokens and this->currentToken is <    *
* this->tokens.size(), then we return                                *
* this->theTokenStarts[this->currentToken]. Otherwise, -1 is         *
* returned to indicate an error.                                     *
*********************************************************************/
  if ( (!this->empty) && (this->currentToken < this->theTokens.size()) )
  {
    return this->theTokenStarts[this->currentToken];
  }
  return -1;

} // int StringTokenizer::getCurrentTokenStart() const

/*********************************************************************
* This method returns the index of the beginning of token number     *
* i in this->theString.                                              *
*********************************************************************/
int StringTokenizer::getTokenStart(const unsigned int i) const
{

/*********************************************************************
* If we have greater than zero tokens and i is < this->tokens.size(),*
* then we return this->theTokenStarts[this->currentToken]. Otherwise,*
* -1 is returned to indicate an error.                               *
*********************************************************************/
  if ( (!this->empty) && (i < this->theTokens.size()) )
  {
    return this->theTokenStarts[i];
  } // if
  return -1;

} // int StringTokenizer::getTokenStart(const unsigned int i) const

/*********************************************************************
* This method assembles the desired range of tokens into a single    *
* string object and returns it. Each token in the assembled string   *
* object is separated by the input set of delimiter characters.      *
* NOTE: The range [begin, end] is inclusive.                         *
*                                                                    *
* INPUT VARIABLES:   TYPE:           DESCRIPTION:                    *
* ----------------   -----           ------------                    *
* begin       const unsigned long    The index of the starting       *
*                                    token. NOTE: The toekns are     *
*                                    indexed beginning with zero.    *
* end                unsigned long   The index of the last token.    *
* delims             const string&   The delimiters used to          *
*                                    separate the tokens in the      *
*                                    assembled string.               *
*                                                                    *
* OUTPUT VARIABLES:   TYPE:          DESCRIPTION:                    *
* -----------------   -----          ------------                    *
* N/A                 string         The string of assembled tokens. *
*                                                                    *
* EXCEPTIONS THROWN:                                                 *
* ------------------                                                 *
* None.                                                              *
*********************************************************************/
string StringTokenizer::getTokenRange(const unsigned long begin,
unsigned long end, const string& delims)
{

/*********************************************************************
* If end exceeds the number of tokens, then it is set to the last    *
* token.                                                             *
*********************************************************************/
  end = (end >= this->theTokens.size()) ? (this->theTokens.size() - 1) : end;

/*********************************************************************
* Setting this->success to true.                                     *
*********************************************************************/
  this->success = true;

/*********************************************************************
* If the ending index is less than the beginning index, then         *
* this->success is set to false and the emoty string is returned.    *
*********************************************************************/
  if (end < begin)
  {
    this->success = false;
    return "";
  } // if

/*********************************************************************
* If end and begin are the same, then we simply return a single      *
* token.                                                             *
*********************************************************************/
  if (end == begin)
  {
    return this->theTokens[begin];
  } // if

/*********************************************************************
* tokens will hold the assembled tokens.                             *
*********************************************************************/
  string tokens;

/*********************************************************************
* The following for loop is used to assemble the tokens.             *
*********************************************************************/
  for (unsigned long i = 0; i <= end; i++)
  {

/*********************************************************************
* Adding a token.                                                    *
*********************************************************************/
    tokens += this->theTokens[i];

/*********************************************************************
* If we are not at the final token, the the set of delimiter         *
* characters is added to tokens.                                     *
*********************************************************************/
    if (i < end)
    {
      tokens += delims;
    } // if

  } // for i

/*********************************************************************
* Now returning tokens.                                              *
*********************************************************************/
  return tokens;

} // string StringTokenizer::getTokenRange(const unsigned long begin,
  // const unsigned long end, const string& delims)

/*********************************************************************
* This method assembles the desired range of tokens into a single    *
* string object and returns it. Each token in the assembled string   *
* object is separated by this->theDelimiters. NOTE: The range        *
* [begin, end] is inclusive.                                         *
*                                                                    *
* INPUT VARIABLES:   TYPE:           DESCRIPTION:                    *
* ----------------   -----           ------------                    *
* begin       const unsigned long    The index of the starting       *
*                                    token. NOTE: The toekns are     *
*                                    indexed beginning with zero.    *
* end                unsigned long   The index of the last token.    *
*                                                                    *
* OUTPUT VARIABLES:   TYPE:          DESCRIPTION:                    *
* -----------------   -----          ------------                    *
* N/A                 string         The string of assembled tokens. *
*                                                                    *
* EXCEPTIONS THROWN:                                                 *
* ------------------                                                 *
* None.                                                              *
*********************************************************************/
string StringTokenizer::getTokenRange(const unsigned long begin,
unsigned long end)
{

/*********************************************************************
* Now returning the assembled tokens.                                *
*********************************************************************/
  return this->getTokenRange(begin, end, this->theDelimiters);

} // string StringTokenizer::getTokenRange(const unsigned long begin,
  // unsigned long end)
