/*****************************************************************/
/*  University of Nebraska-Lincoln                               */
/*  Department of Electrical Engineering                         */
/*  David J. Russell                                             */
/*****************************************************************/

#define _FILEIO

/*********************************************************************************/
/* Included Header Files */
/*********************************************************************************/
#include "Globals.h"
#include "StringADT.h"
#include "StringListADT.h"
#include "SparseMatrixFloatADT.h"
#include "FileIO.h"
#include "Parameters.h"

/*********************************************************************************/
/* Private Module Constants */
/*********************************************************************************/
#define FILEIO_SEQUENCE_PART_LENGTH                   512

/*********************************************************************************/
/* Private Type Definitions */
/*********************************************************************************/
typedef void (*FILEIO_POINTER_TO_STATE_FUNCTION) (BYTE);

typedef enum
{
   FILEIO_STATE_IDLE,
   FILEIO_STATE_HEADER_LINE,
   FILEIO_STATE_SEQUENCE,
} FILEIO_STATE;

typedef struct
{
   FILEIO_STATE State;
   FILEIO_POINTER_TO_STATE_FUNCTION StateFunction;
} FILEIO_STATE_ENTRY;

/*********************************************************************************/
/* Private Module Function Prototypes */
/*********************************************************************************/

/* States */
static void FILEIO_StateIdle (BYTE Ch);
static void FILEIO_StateHeaderLine(BYTE Ch);
static void FILEIO_StateSequence(BYTE Ch);

static void FILEIO_StoreSequencePart(void);
static void FILEIO_CreateNewSequence(void);
static void FILEIO_StoreLastSequencePart(void);
static void FILEIO_CreateNewHeaderLine(void);
static void FILEIO_StoreHeaderLine(void);

/*********************************************************************************/
/* Private Module Variables */
/*********************************************************************************/
static STRINGLISTADT_STRINGLIST_TYPE *m_Sequences;
static STRINGLISTADT_STRINGLIST_TYPE *m_MergedAlphabetSequences;
static BYTE m_SequencePart[FILEIO_SEQUENCE_PART_LENGTH];
static BYTE m_MergedAlphabetSequencePart[FILEIO_SEQUENCE_PART_LENGTH];
static USHORT m_CurrentSequenceIndex;
static STRINGADT_STRING_TYPE m_CurrentSequence;
static STRINGADT_STRING_TYPE m_CurrentMergedAlphabetSequence;
static STRINGLISTADT_STRINGLIST_TYPE *m_HeaderLines;
static STRINGADT_STRING_TYPE m_CurrentHeaderLine;
static PARMS_SEQUENCE_TYPE m_SequenceType;

static FILEIO_STATE m_State;

const static FILEIO_STATE_ENTRY m_StateTable[] =
{
   {FILEIO_STATE_IDLE, FILEIO_StateIdle},
   {FILEIO_STATE_HEADER_LINE, FILEIO_StateHeaderLine},
   {FILEIO_STATE_SEQUENCE, FILEIO_StateSequence},
};

/*********************************************************************************/
/* Public and Private Module Functions */
/*********************************************************************************/

void FILEIO_ReadFASTASequences(STRINGLISTADT_STRINGLIST_TYPE *Sequences,
                                 STRINGLISTADT_STRINGLIST_TYPE *MergedAlphabetSequences,
                                 STRINGLISTADT_STRINGLIST_TYPE *HeaderLines)
{
   FILE *infile;
   BYTE Ch;
   
   m_State = FILEIO_STATE_IDLE;
   m_CurrentSequenceIndex = 0;
   m_Sequences = Sequences;
   m_MergedAlphabetSequences = MergedAlphabetSequences;
   m_HeaderLines = HeaderLines;

   if (PARMS_GetInputFileFormat() == PARMS_INPUT_SEQUENCE_AMINO_ACID)
   {
      m_SequenceType = PARMS_SEQUENCE_AMINO_ACID;
   }
   else
   {
      m_SequenceType = PARMS_SEQUENCE_DNA;
   }

   infile = fopen(PARMS_GetInputFileName(), "r");
   if (infile != NULL)
   {
      do
      {
         Ch = getc(infile);
         m_StateTable[m_State].StateFunction(Ch);
      } while (Ch != EOF);

      fclose(infile);
   }
   else
   {
      printf("Cannot open %s.\n", PARMS_GetInputFileName());
      exit(1);
   }
   
   PARMS_SetSequenceType(m_SequenceType);
}

/*********************************************************************************/

void FILEIO_WriteDistanceMatrix(SMF_ADT_MATRIX_TYPE *DistanceMatrix)
{
   FILE *outfile;
   ULONG i;
   ULONG j;
   ULONG N;
   
   if (DistanceMatrix->NumRows == DistanceMatrix->NumCols)
   {   
      N = DistanceMatrix->NumRows;

      outfile = fopen(PARMS_GetOutputFileName(), "w");
      if (outfile != NULL)
      {
         fprintf(outfile, "%ld\n", N);
         for (i = 0; i < N; i++)
         {
            fprintf(outfile, "Seq%ld\t", i);
            for (j = 0; j < N; j++)
            {
               fprintf(outfile, "%3.3f", SMF_ADT_GetElement(DistanceMatrix, i, j));
               if ((j + 1) >= N)
               {
                  fprintf(outfile, "\n");
               }
               else
               {
                  fprintf(outfile, "\t");
               }
            }
         }
         fclose(outfile);
      }
      else
      {
         printf("Cannot create %s.\n", PARMS_GetOutputFileName());
         exit(1);
      }
   }
   else
   {
      printf("Matrix must be square.\n");
      exit(1);
   }   
}

/*********************************************************************************/

void FILEIO_StateIdle(BYTE Ch)
{
   if (Ch == '>')
   {
      FILEIO_CreateNewHeaderLine();
      
      m_State = FILEIO_STATE_HEADER_LINE;
   }
}

/*********************************************************************************/

void FILEIO_StateHeaderLine(BYTE Ch)
{
   if ((Ch == '\n') || (Ch == '\r'))
   {
      FILEIO_StoreHeaderLine();
      FILEIO_CreateNewSequence();

      m_State = FILEIO_STATE_SEQUENCE;
   }
   else
   {
      if (Ch == ':')
      {
         STRINGADT_AppendStringFromChar(&m_CurrentHeaderLine, 
                                          '_', 
                                          "FILEIO_StateHeaderLine");
      }
      else
      {
         STRINGADT_AppendStringFromChar(&m_CurrentHeaderLine, 
                                          Ch, 
                                          "FILEIO_StateHeaderLine");
      }
   }
}

/*********************************************************************************/

void FILEIO_StateSequence(BYTE Ch)
{
   BYTE CH = toupper(Ch);
   
   switch(CH)
   {
      case '>':
         FILEIO_StoreLastSequencePart();
         FILEIO_CreateNewHeaderLine();
         m_State = FILEIO_STATE_HEADER_LINE;
         break;

      case 'A':
      case 'C':
      case 'G':
      case 'T':
      case 'U':
      case 'X':
         m_MergedAlphabetSequencePart[m_CurrentSequenceIndex] = PARMS_MergedAlphabetLookup[ALPHA_INDEX(CH)];
         m_SequencePart[m_CurrentSequenceIndex++] = CH;
         if (m_CurrentSequenceIndex >= FILEIO_SEQUENCE_PART_LENGTH)
         {
            FILEIO_StoreSequencePart();
         }
         break;

      case 'B':
      case 'D':
      case 'E':
      case 'F':
      case 'H':
      case 'I':
      case 'K':
      case 'L':
      case 'M':
      case 'N':
      case 'P':
      case 'Q':
      case 'R':
      case 'S':
      case 'V':
      case 'W':
      case 'Y':
      case 'Z':
         m_MergedAlphabetSequencePart[m_CurrentSequenceIndex] = PARMS_MergedAlphabetLookup[ALPHA_INDEX(CH)];
         m_SequencePart[m_CurrentSequenceIndex++] = CH;
         if (m_CurrentSequenceIndex >= FILEIO_SEQUENCE_PART_LENGTH)
         {
            FILEIO_StoreSequencePart();
         }

         if (PARMS_GetInputFileFormat() == PARMS_INPUT_SEQUENCE_AUTO_DETECT)
         {
            m_SequenceType = PARMS_SEQUENCE_AMINO_ACID;
         }
         break;

      case EOF:
         FILEIO_StoreLastSequencePart();
         break;

      default:
         break;
   }
}

/*********************************************************************************/

void FILEIO_StoreSequencePart(void)
{
   if (m_CurrentSequenceIndex > 0)
   {
      STRINGADT_AppendStringFromChars(&m_CurrentSequence, 
                                       (UBYTE *) m_SequencePart,
                                       m_CurrentSequenceIndex, 
                                       "FILEIO_StoreSequencePart");
      STRINGADT_AppendStringFromChars(&m_CurrentMergedAlphabetSequence, 
                                       (UBYTE *) m_MergedAlphabetSequencePart,
                                       m_CurrentSequenceIndex, 
                                       "FILEIO_StoreSequencePart");
      m_CurrentSequenceIndex = 0;
   }
}

/*********************************************************************************/

void FILEIO_CreateNewSequence(void)
{
   m_CurrentSequenceIndex = 0;
   STRINGADT_InitializeString(&m_CurrentSequence);
   STRINGADT_InitializeString(&m_CurrentMergedAlphabetSequence);
}

/*********************************************************************************/

void FILEIO_StoreLastSequencePart(void)
{
   FILEIO_StoreSequencePart();
   STRINGLISTADT_AppendStringToStringList(m_Sequences, 
                                          &m_CurrentSequence, 
                                          "FILEIO_StoreLastSequencePart");
   STRINGLISTADT_AppendStringToStringList(m_MergedAlphabetSequences, 
                                          &m_CurrentMergedAlphabetSequence, 
                                          "FILEIO_StoreLastSequencePart");
   STRINGADT_CleanUpString(&m_CurrentSequence);
   STRINGADT_CleanUpString(&m_CurrentMergedAlphabetSequence);
}

/*********************************************************************************/

void FILEIO_CreateNewHeaderLine(void)
{
   STRINGADT_InitializeString(&m_CurrentHeaderLine);
}

/*********************************************************************************/

void FILEIO_StoreHeaderLine(void)
{
   STRINGLISTADT_AppendStringToStringList(m_HeaderLines, 
                                          &m_CurrentHeaderLine, 
                                          "FILEIO_StoreHeaderLine");
   STRINGADT_CleanUpString(&m_CurrentHeaderLine);
}

