/*****************************************************************/
/*  University of Nebraska-Lincoln                               */
/*  Department of Electrical Engineering                         */
/*  Bioinformatics Group                                         */
/*  Sam Way                                                      */
/*  2/12/10                                                      */
/*****************************************************************/

/*********************************************************************************/
/* Included Header Files                                                         */
/*********************************************************************************/
#include "fragmentclassifier.h"
#include <QTextStream>
#include <QFile>
#include <QString>
#include <QTime>

/*********************************************************************************/
/* Private Module Constants                                                      */
/*********************************************************************************/
#define NOT_CLASSIFIED "(unknown)"

/*********************************************************************************/
/* Constructors / Destructors                                                    */
/*********************************************************************************/
FragmentClassifier::FragmentClassifier() {}

/*********************************************************************************/
/* Private / Public Functions                                                    */
/*********************************************************************************/

INT FragmentClassifier::ClassifySequence(ClassifierDatabase *database, SequenceCounts *tempCounts, DOUBLE *matchScore)
{
    INT i,j;
    INT bestIndex = 0;
    DOUBLE bestScore = 0;
    DOUBLE secondBestScore = 0;
    DOUBLE tempScore = 0;

    for (i=0; i < tempCounts->nonzeroCount.length(); i++)
    {
        bestScore += (database->m_items.at(0).sequenceVector.at(tempCounts->nonzeroIndex.at(i)) * tempCounts->nonzeroCount.at(i));
    }

    for (i=1; i < database->m_items.length(); i++)
    {
        tempScore = 0;

        for (j=0; j < tempCounts->nonzeroCount.length(); j++)
        {
            tempScore += (database->m_items.at(i).sequenceVector.at(tempCounts->nonzeroIndex.at(j)) * tempCounts->nonzeroCount.at(j));
        }

        if (tempScore > bestScore)
        {
            secondBestScore = bestScore;
            bestScore = tempScore;
            bestIndex = i;
        }
    }

    *matchScore = abs(bestScore - secondBestScore);

    return bestIndex;
}

/*********************************************************************************/

BOOLEAN FragmentClassifier::ClassifyFiles( ClassifierDatabase *database,
                                           QStringList inFilenames,
                                           QString outFilename,
                                           INT classificationThreshold )
{
    INT i, j, matchID;
    DOUBLE matchScore;
    DOUBLE *bestScores = NULL;
    Sequence tempSequence;
    SequenceCounts tempCounts;
    QFile outFile(outFilename);
    QString tempString, statusString;
    QTextStream outStream(&outFile);
    QFile inFile;
    QTextStream *inStream = NULL;

    emit(AlertUser("Binning DNA fragments..."));

    if (classificationThreshold < 100)
    {
        bestScores = new DOUBLE[database->m_items.length()];
        for (i=0; i < database->m_items.length(); i++) bestScores[i] = 0;
        outFile.setFileName(outFilename+".temp");
    }

    if (outFile.open(QIODevice::WriteOnly|QIODevice::Text))
    {
        outStream << HEADER_WORD_LENGTH << SEPARATOR << database->WordLength() << endl;
        for (i=0; i < inFilenames.count(); i++)
        {
            j = 0;
            inFile.setFileName(inFilenames[i]);

            if (inFile.open(QIODevice::ReadOnly|QIODevice::Text))
            {
                inStream = new QTextStream(&inFile);
                do { tempString = inStream->read(1); } while (!inStream->atEnd() && tempString != ">");
                if (inStream->atEnd())
                {
                    emit(AlertStatus(""));
                    emit(AlertUser("ERROR: No input sequences found in file! Program requires FASTA format."));
                    outFile.close();
                    inFile.close();
                    return FALSE;
                }
                while (!inStream->atEnd())
                {
                    statusString.sprintf("Processing sequence %d of file %d...", j+1, i+1);
                    emit(AlertStatus(statusString));

                    m_profiler.ProfileNextSequence(inStream, &tempSequence, database->WordLength(), &tempCounts);
                    matchID = ClassifySequence(database, &tempCounts, &matchScore);

                    if (classificationThreshold < 100)
                    {
                        outStream << ">" << tempSequence.sequenceName << endl
                                  << matchID << endl
                                  << matchScore << endl
                                  << database->m_items.at(matchID).sequenceName << endl;
                        if (matchScore > bestScores[matchID]) bestScores[matchID] = matchScore;
                    }
                    else
                        outStream << ">" << tempSequence.sequenceName << endl
                                  << database->m_items.at(matchID).sequenceName << endl;
                    j++;
                }
                inFile.close();
            }
        }

        emit(AlertStatus(""));
        emit(AlertUser("Processing complete!"));
        outFile.close();

        if (classificationThreshold < 100)
        {
            CleanOutput(outFilename, bestScores, database, classificationThreshold);
            delete [] bestScores;
        }

        return TRUE;
    }
    else
    {
        emit(AlertUser("ERROR: Could not create results/output file!"));
        return FALSE;
    }
}

/*********************************************************************************/

void FragmentClassifier::CleanOutput(QString outFilename, DOUBLE* bestScores, ClassifierDatabase* database, INT classificationThreshold)
{
    INT i, tempID;
    DOUBLE tempDouble;
    QFile outFile(outFilename);
    QTextStream outStream(&outFile);
    QFile inFile(outFilename+".temp");
    QTextStream inStream(&inFile);
    QString tempString, headerLine;

    if (outFile.open(QIODevice::WriteOnly|QIODevice::Text))
    {
        if (inFile.open(QIODevice::ReadOnly|QIODevice::Text))
        {
            tempDouble = (100 - (DOUBLE)classificationThreshold) / 100;
            for (i=0; i < database->m_items.length(); i++) bestScores[i] *= tempDouble;

            tempString = inStream.readLine();
            if (inStream.atEnd())
            {
                emit(AlertStatus(""));
                emit(AlertUser("ERROR: Could not reformat output."));
                outFile.close();
                inFile.close();
                return;
            }

            outStream << tempString << endl;


            while (!inStream.atEnd())
            {
                headerLine = inStream.readLine();   // Sequence name
                tempString = inStream.readLine();   // Match Index
                tempID = tempString.toInt();
                tempString = inStream.readLine();   // Match Score
                tempDouble = tempString.toDouble();
                tempString = inStream.readLine();   // Match Name

                outStream << headerLine << endl;

                if (tempDouble >= bestScores[tempID])
                {
                    outStream << tempString << endl;
                }
                else
                {
                    outStream << NOT_CLASSIFIED << endl;
                }
            }

            outFile.close();
            inFile.close();
            inFile.remove();
        }
    }
}

/*********************************************************************************/

BOOLEAN FragmentClassifier::ClassifyFilesAndUpdateDatabase ( ClassifierDatabase *database,
                                                             QStringList inFilenames,
                                                             QString outFilename,
                                                             INT* updatedCounts,
                                                             DOUBLE** updatedDatabase )
{
    INT i, j, k;
    INT matchID;
    DOUBLE matchScore;
    Sequence tempSequence;
    SequenceCounts tempCounts;
    QFile outFile(outFilename);
    QString tempString, statusString;
    QTextStream outStream(&outFile);
    QFile inFile;
    QTextStream *inStream;

    emit(AlertUser("Binning DNA fragments..."));

    if (outFile.open(QIODevice::WriteOnly|QIODevice::Text))
    {
        outStream << HEADER_WORD_LENGTH << SEPARATOR << database->WordLength() << endl;
        for (i=0; i < inFilenames.count(); i++)
        {
            j = 0;
            inFile.setFileName(inFilenames[i]);

            if (inFile.open(QIODevice::ReadOnly|QIODevice::Text))
            {
                inStream = new QTextStream(&inFile);
                do { tempString = inStream->read(1); } while (!inStream->atEnd() && tempString != ">");
                if (inStream->atEnd())
                {
                    emit(AlertStatus(""));
                    emit(AlertUser("ERROR: No input sequences found in file! Program requires FASTA format."));
                    outFile.close();
                    inFile.close();
                    return FALSE;
                }
                while (!inStream->atEnd())
                {
                    statusString.sprintf("Processing sequence %d of file %d...", j+1, i+1);
                    emit(AlertStatus(statusString));

                    m_profiler.ProfileNextSequence(inStream, &tempSequence, database->WordLength(), &tempCounts);
                    matchID = ClassifySequence(database, &tempCounts, &matchScore);
                    outStream << ">" << tempSequence.sequenceName << endl << database->m_items.at(matchID).sequenceName << endl;

                    updatedCounts[matchID] += tempSequence.sequenceLength;
                    for (k=0; k < tempCounts.nonzeroCount.length(); k++)
                    {
                        updatedDatabase[matchID][tempCounts.nonzeroIndex.at(k)] += tempCounts.nonzeroCount.at(k);
                    }
                    j++;
                }
                inFile.close();
            }

            j = 0;
            emit(AlertUser("Updating the database..."));

            for ( i = 0; i < database->m_items.length(); i++ )
            {
                if (updatedCounts[i] >= UPDATE_THRESHOLD)
                {
                    j++;
                    statusString.sprintf("Updating database item %d...", i+1);
                    emit(AlertStatus(statusString));
                    database->UpdateVector(i, updatedDatabase[i]);
                }
            }

            tempString.sprintf("Updated %d item(s) in the database.", j);
            emit(AlertUser(tempString));
        }

        emit(AlertStatus(""));
        emit(AlertUser("Processing complete!"));
        outFile.close();
        return TRUE;
    }
    else
    {
        emit(AlertUser("ERROR: Could not create results/output file!"));
        return FALSE;
    }
}

/********************************* END OF FILE ***********************************/
