/*****************************************************************/
/*  University of Nebraska-Lincoln                               */
/*  Department of Electrical Engineering                         */
/*  Bioinformatics Group                                         */
/*  Sam Way                                                      */
/*  2/12/10                                                      */
/*****************************************************************/

/*********************************************************************************/
/* Included Header Files                                                         */
/*********************************************************************************/
#include "classifierdatabase.h"
#include <QFile>
#include <QStringList>
#include <QTextStream>

/*********************************************************************************/
/* Private Module Constants                                                      */
/*********************************************************************************/

/*********************************************************************************/
/* Constructors / Destructors                                                    */
/*********************************************************************************/
ClassifierDatabase::ClassifierDatabase()
{
    m_items.clear();
    m_isValid = FALSE;
    m_wordLength = -1;
}

/*********************************************************************************/
/* Private / Public Functions                                                    */
/*********************************************************************************/
BOOLEAN ClassifierDatabase::IsValid()
{
    return m_isValid;
}

/*********************************************************************************/

INT ClassifierDatabase::WordLength()
{
    return m_wordLength;
}

/*********************************************************************************/

INT ClassifierDatabase::GetIndex(QString modelName)
{
    INT i;
    for (i=0; i<m_items.length(); i++)
        if (m_items.at(i).sequenceName.contains(modelName))
            return i;

    return -1;
}

/*********************************************************************************/

void ClassifierDatabase::ReadDatabase(QString filename)
{
    QFile inFile(filename);
    QTextStream inStream(&inFile);
    QStringList tempStringParts;
    QString tempString, statusString;
    Sequence tempSequence;
    ULONG i, j;

    if (inFile.open(QIODevice::ReadOnly|QIODevice::Text))
    {
        try
        {
            m_items.clear();
            m_isValid = FALSE;
            j = 0;

            tempString = inStream.readLine();
            tempStringParts = tempString.split(SEPARATOR);
            if (tempStringParts.length() != 2 || tempStringParts.at(0) != HEADER_WORD_LENGTH)
            {
                m_isValid = FALSE;
                emit(AlertUser("ERROR: Problem reading database. Database missing word length field."));
                return;
            }
            m_wordLength = tempStringParts.at(1).toInt();
            m_filename = filename;

            tempString = inStream.readLine();

            while(!tempString.isNull())
            {
                statusString.sprintf("Loading item %lu of database...", j);
                emit(AlertStatus(statusString));
                tempStringParts = tempString.split(SEPARATOR);
                tempSequence.sequenceName = tempStringParts.at(0);
                tempSequence.sequenceVector.clear();
                for (i=1; i < (ULONG)tempStringParts.length(); i++)
                    tempSequence.sequenceVector.append(tempStringParts.at(i).toDouble());
                m_items.append(tempSequence);
                j++;
                tempString = inStream.readLine();
            }
            m_isValid = TRUE;
            emit(AlertStatus(""));
            tempString.sprintf("Database loaded successfully!  (%d items).", m_items.count());
            emit(AlertUser(tempString));
        }
        catch (INT e)
        {
            m_isValid = FALSE;
            emit(AlertStatus(""));
            emit(AlertUser("ERROR: Problem reading database."));
        }
    }
    else
    {
        m_isValid = FALSE;
        emit(AlertUser("ERROR: Could not open database file for reading."));
    }
}

/*********************************************************************************/

void ClassifierDatabase::CreateDatabase(QStringList* inFilenames, QString* outFilename, INT wordLength)
{
    ULONG i, j;
    QFile outFile(*outFilename);
    QTextStream outStream(&outFile);
    QString statusString;

    if (outFile.open(QIODevice::WriteOnly|QIODevice::Text))
    {
        m_items.clear();

        try
        {
            outStream << HEADER_WORD_LENGTH << SEPARATOR << wordLength << endl;

            for (i=0; i < (ULONG)inFilenames->count(); i++)
            {
                statusString.sprintf("Profiling sequences from file %lu of %d...", i+1, inFilenames->count());
                emit(AlertStatus(statusString));
                m_profiler.ProfileFastaFile(inFilenames->at(i), wordLength, &m_items, TRUE);
            }

            for (i=0; i < (ULONG)m_items.count(); i++)
            {
                statusString.sprintf("Writing database vector %lu of %d...", i+1, m_items.count());
                emit(AlertStatus(statusString));
                outStream << m_items.at(i).sequenceName;
                for (j=0; j < (ULONG)m_items.at(i).sequenceVector.count(); j++)
                    outStream << SEPARATOR << m_items.at(i).sequenceVector.at(j);
                outStream << endl;
            }

            m_isValid = TRUE;
            m_wordLength = wordLength;
            m_filename = *outFilename;
            m_updateCount = 0;
            statusString.sprintf("Database successfully created (%d items).", m_items.count());
            emit(AlertUser(statusString));
        }
        catch (INT e)
        {
            m_isValid = FALSE;
            QFile::remove(*outFilename);
            emit(AlertUser("ERROR: Problem creating database file."));
        }
        emit(AlertStatus(""));
        outFile.close();
    }
    else
    {
        emit(AlertUser("ERROR: Could not open file to create database."));
    }
}

/*********************************************************************************/

BOOLEAN ClassifierDatabase::SaveDatabase()
{
    BOOLEAN updateStatus = TRUE;
    ULONG i, j;
    QFile outFile(m_filename);
    QTextStream outStream(&outFile);

    emit(AlertUser("Saving updated database..."));

    if (outFile.open(QIODevice::WriteOnly|QIODevice::Text))
    {
        try
        {
            outStream << HEADER_WORD_LENGTH << SEPARATOR << m_wordLength << endl;

            for (i=0; i < (ULONG)m_items.count(); i++)
            {
                outStream << m_items.at(i).sequenceName;
                for (j=0; j < (ULONG)m_items.at(i).sequenceVector.count(); j++)
                    outStream << SEPARATOR << m_items.at(i).sequenceVector.at(j);
                outStream << endl;
            }

            m_updateCount = 0;
            emit(AlertUser("Database successfully updated."));
        }
        catch (INT e)
        {
            emit(AlertUser("ERROR: Problem updating database file."));
        }
        outFile.close();
    }
    else
    {
        emit(AlertUser("ERROR: Could not open database file for updating."));
        updateStatus = FALSE;
    }
    return updateStatus;
}

/*********************************************************************************/

BOOLEAN ClassifierDatabase::UpdateDatabase(QString outputFilename)
{
    QFile outputFile(outputFilename);
    QTextStream inStream(&outputFile);
    QString tempString;
    QStringList tempStringParts;
    INT counts[m_items.length()];
    ULONG runningSums[m_items.length()];
    DOUBLE tempVector[(INT)pow(NUM_BASES,m_wordLength)];
    INT i, tempIndex, updates;
    BOOLEAN updateSuccess;

    for (i=0; i<m_items.length(); i++)
    {
        counts[i] = 0;
        runningSums[i] = 0;
    }

    if (outputFile.open(QIODevice::ReadOnly|QIODevice::Text))
    {
       try
       {
            updates = 0;
            tempString = inStream.readLine();
            tempStringParts = tempString.split(SEPARATOR);
            if (tempStringParts.length() != 2 || tempStringParts.at(0) != HEADER_WORD_LENGTH)
            {
                emit(AlertUser("ERROR: Problem updating database. Output file missing word length field."));
                return FALSE;
            }
            if (tempStringParts.at(1).toInt() != this->m_wordLength)
            {
                emit(AlertUser("ERROR: Could not update database.  Output file differs in word length."));
                return FALSE;
            }

            tempString = inStream.read(1);
            while(!inStream.atEnd())
            {
                while (!inStream.atEnd() && tempString != ">") tempString = inStream.read(1);
                if (!inStream.atEnd())
                {
                    tempString = inStream.readLine();       // Get the header line (">Example_Label")
                    tempString = inStream.readLine();       // Get the matching db vector ("Vector_Label")
                    tempIndex = GetIndex(tempString);       // Get index corresponding to the db vector
                    tempString = inStream.readLine();       // Get the fragment length

                    if (tempIndex >= 0)
                    {
                        counts[tempIndex] += 1;
                        runningSums[tempIndex] += tempString.toULong();
                    }
                    tempString = inStream.read(1);
                }
            }
        }
        catch (INT e)
        {
            emit(AlertUser("ERROR: Could not update database.  An unexpected error has occurred."));
            return FALSE;
        }
        outputFile.close();

        for (i=0; i<m_items.length(); i++)
            if (runningSums[i] >= UPDATE_THRESHOLD)
            {
                updateSuccess = UpdateVector(i, tempVector, (INT)pow(NUM_BASES,m_wordLength), outputFilename);
                updates++;
                if (!updateSuccess)
                {
                    emit(AlertUser("ERROR: Could not update database.  Failed to update vector."));
                    return FALSE;
                }
            }

        tempString.sprintf("Updated %d item(s) in the database.", updates);
        emit(AlertUser(tempString));
        return TRUE;
    }
    return FALSE;
}

/*********************************************************************************/

void ClassifierDatabase::UpdateVector(INT vectorIndex, DOUBLE *tempVector)
{
    INT i;

    this->m_profiler.PrepareVector(tempVector, (INT)pow(NUM_BASES,m_wordLength), m_wordLength);

    for ( i = 0; i < (INT)pow(NUM_BASES,m_wordLength); i++)
        this->m_items[vectorIndex].sequenceVector[i] = tempVector[i];
}

/*********************************************************************************/

BOOLEAN ClassifierDatabase::UpdateVector(INT vectorIndex, DOUBLE *tempVector, INT vectorLength, QString outputFilename)
{
    QFile outputFile(outputFilename);
    QTextStream inStream(&outputFile);
    QString tempString;
    QStringList tempStringParts;

    INT i, j, tempInt;
    DOUBLE tempDouble;

    for (i=0; i<vectorLength; i++) tempVector[i] = 0;

    if (outputFile.open(QIODevice::ReadOnly|QIODevice::Text))
    {
        try
        {
            while(!inStream.atEnd())
            {
                while (!inStream.atEnd() && tempString != ">") tempString = inStream.read(1);
                if (!inStream.atEnd())
                {
                    tempString = inStream.readLine();               // Get rest of the header line
                    tempString = inStream.readLine().trimmed();     // Get the db vector name

                    if (m_items[vectorIndex].sequenceName.contains(tempString.trimmed()))
                    {
                        tempString = inStream.readLine();           // Get fragment length
                        tempString = inStream.readLine();           // Get index:count fields
                        tempStringParts = tempString.split(SEPARATOR);

                        for (j=0; j<(tempStringParts.length()/2); j++)
                        {
                            tempInt = tempStringParts[j*2].toInt();
                            tempDouble = tempStringParts[(j*2)+1].toInt();
                            tempVector[tempStringParts[j*2].toInt()] += tempStringParts[(j*2)+1].toInt();
                        }
                    }
                }
            }

            m_profiler.PrepareVector(tempVector, vectorLength, m_wordLength);
            for (i=0; i<vectorLength; i++) m_items[vectorIndex].sequenceVector[i] = tempVector[i];
            return TRUE;
        }
        catch (INT e) {  }
        outputFile.close();
    }
    return FALSE;
}

/*********************************************************************************/

void ClassifierDatabase::AddItems(QStringList inFileNames)
{
    INT i;
    QList<Sequence> tempList;
    QString tempString;

    emit(AlertUser("Adding new items to the database..."));

    for (i=0; i<inFileNames.count(); i++)
    {
        tempString.sprintf("Adding sequences from file %d of %d...", i+1, inFileNames.count());
        emit(AlertStatus(tempString));
        m_profiler.ProfileFastaFile(inFileNames.at(i), m_wordLength, &tempList, TRUE);
    }

    for (i=0; i<tempList.count(); i++)
        m_items.append(tempList[i]);

    emit(AlertStatus(""));
    tempString.sprintf("Added %d items to the database.", tempList.count());
    emit(AlertUser(tempString));

    SaveDatabase();
}

/********************************* END OF FILE ***********************************/
