
/*	Lexical analysis implementation file
	Pretty much just a file tokenizer
	by Jason Plumb */

#include <stdio.h>
#include <memory.h>
#include <malloc.h>
#include <io.h>
#include "defines.h"
#include "helper_func.h"
#include "lex.h"


//Local global vars here
char input_filename[200];
bool bInComments;				//Flag to indicate that we're in comment block
char *pFileBuffer;				//Buffer to hold entire file in memory
int iCurrOffset;				//Offset into file buffer
int iCurrLine;					//Indicates the current line
int iLastNLOffset;
int iLineOffset;

TOKEN SymTable[SYM_TABLE_SIZE];	//The symbol table
int iTokenCt;					//The number of tokens
int iDefinedTokCt;				//The number of predefined tokens
int iNT;						//Index into table for next token

char *InitTokens[] = {	"+", "-", "*", "/", "<", ">", "=", ">=", "<=", "<>", ",",
							";", "**", ":=", "(", ")", "[", "]", "/*", "*//", ":", "..",
							"and", "begin", "else", "elsif", "end", "for", "if", "in",
							"integer", "is", "loop", "null", "or", "procedure", "read",
							"readln", "then", "while", "write", "writeln", "xor", "abs",
							"sqrt", "" };

int iTokenIndexTrue, iTokenIndexFalse;

//Init - Used to open files, prepare memory, etc
//Returns <= 0  on error
int initLex(char *pFilename){

	FILE *in = NULL;
	pFileBuffer = NULL;

	if(pFilename == NULL)
		return ERR_BAD_PARAMETER;			//Bad filename

	memset(input_filename, 0, 200);
	ccstrncpy(input_filename, pFilename, 199);

	struct _finddata_t fd;
	long hFile = _findfirst(input_filename, &fd);
	if(hFile == -1)
		return ERR_FILE_IO;				
	
	pFileBuffer = (char*)malloc(fd.size);			//Allocate memory for file in memory
	memset(pFileBuffer, 0, fd.size);
	_findclose(hFile);
	
	in = fopen(pFilename, "rt");
	if(in == NULL)
		return ERR_FILE_IO;

	fread(pFileBuffer, sizeof(char), fd.size, in);
	fclose(in);
	
	ccRemoveComments(pFileBuffer);					//Strip out all comments
	iCurrOffset = 0;								//Offset into buffer = 0
	iCurrLine = 0;
	iLineOffset = 0;

	initSymTable();

	return 1;
}//initLex function

//Uninit - free up memory, close file handles, etc.
void uninitLex(){
	if(pFileBuffer != NULL){
		free(pFileBuffer);
		pFileBuffer = NULL;
	}
}//uninit function

void initSymTable(){
	memset(&SymTable, 0, SYM_TABLE_SIZE * sizeof(TOKEN));
	iTokenCt = 0;
	int i = 0;
	while( ccstrlen(InitTokens[i]) != 0){

		addToken(InitTokens[i], i);
		i++;
	}//while

	iTokenIndexTrue = addToken("1", TOKEN_TYPE_CONST);				//Add token for 1/true
	iTokenIndexFalse = addToken("0", TOKEN_TYPE_CONST);				//Add token for 0/false

	iDefinedTokCt = i+1;
}//intiSymTable function

//Get next token from input file buffer.  If not found, will return ERR_NOT_FOUND.
int getNT(){

	char buffer[81];
	memset(buffer, 0, 81);
	int iTokStart = iCurrOffset;
	int i;

	if(pFileBuffer == NULL)
		return ERR_FILE_IO;			//File not read into memory yet??

	// Skip over leading whitespace
	while(pFileBuffer[iCurrOffset] != '\0'){
		if(pFileBuffer[iCurrOffset] == '\n'){
			iLastNLOffset = iCurrOffset;
			iCurrLine++;
			iLineOffset = 0;
		}//if

		if(isWS(pFileBuffer[iCurrOffset]))
			iCurrOffset++;
		else
			break;
	}//while
	if(pFileBuffer[iCurrOffset] == '\0'){
		iNT = ERR_EOF;
		return ERR_EOF;
	}

	if((pFileBuffer[iCurrOffset] >= '0') && (pFileBuffer[iCurrOffset] <= '9')){	//Numeric
		iTokStart = iCurrOffset;
		while((pFileBuffer[iCurrOffset] >= '0') && (pFileBuffer[iCurrOffset] <= '9'))
			iCurrOffset++;

		if(isWS(pFileBuffer[iCurrOffset])){		//We have whitespace following our number
			ccstrncpy(buffer, &pFileBuffer[iTokStart], iCurrOffset-iTokStart);
			i = findToken(buffer);
			if(i == ERR_NOT_FOUND){						//Need to create new token
				i = addToken(buffer, TOKEN_TYPE_CONST);	//Add new token to table
			}//if
		}//if
		else{
			ccstrncpy(buffer, &pFileBuffer[iCurrOffset], 2);
			if((isOp(buffer, 2)) || (isOp(buffer, 1))){	
				//If we made it here, then we have an operator immediately following
				//our constant number
				ccstrncpy(buffer, &pFileBuffer[iTokStart], iCurrOffset-iTokStart);
				i = findToken(buffer);
				if(i == ERR_NOT_FOUND)
					i = addToken(buffer, TOKEN_TYPE_CONST);
			}//if
			else{
				//TODO: ERROR HANDLING: INVALID LEXEME
				i = ERR_INVALID_LEXEME;
			}//else
		}//else		
		
//		return i;
	}//if
	else if(isAlpha(pFileBuffer[iCurrOffset])){
		iTokStart = iCurrOffset;
		while(isAlpha(pFileBuffer[iCurrOffset]))
			iCurrOffset++;
		
		if(isWS(pFileBuffer[iCurrOffset])){			//Valid whitespace following an alpha token

			ccstrncpy(buffer, &pFileBuffer[iTokStart], iCurrOffset-iTokStart);
			i = findToken(buffer);

			//TODO:  Check to make sure the ID is not a reserved word

			if(i == ERR_NOT_FOUND){			//If not already in table, need to insert it
				i = addToken(buffer, TOKEN_TYPE_ID);
			}//if
			
		}//if
		else{					//Not alpha token not followed by whitespace
			ccstrncpy(buffer, &pFileBuffer[iCurrOffset], 2);		//Pull out next 2 chars
			if((isOp(buffer, 2)) || (isOp(buffer, 1))){
				//If we're here, we have a valid operator following our alpha token
				ccstrncpy(buffer, &pFileBuffer[iTokStart], iCurrOffset-iTokStart);
				i = findToken(buffer);

				//TODO:  Check to make sure the ID is not a reserved word

				if(i == ERR_NOT_FOUND)
					i = addToken(buffer, TOKEN_TYPE_ID);
			}//if
			else{
				//TODO: ERROR HANDLING: INVALID LEXEME
				i = ERR_INVALID_LEXEME;
			}//else
		}//else

	}//else if
	else if(pFileBuffer[iCurrOffset] == '\''){			//String literal
		iTokStart = iCurrOffset;
		iCurrOffset++;
		while(((isAlpha(pFileBuffer[iCurrOffset])) || (pFileBuffer[iCurrOffset]==' ') || 
			(pFileBuffer[iCurrOffset] == '\t')) && (pFileBuffer[iCurrOffset]!= '\'')){
			iCurrOffset++;
		}//while

		if((pFileBuffer[iCurrOffset] == '\r') || (pFileBuffer[iCurrOffset] == '\n')){
			//If here, we hit end of line without terminating a string
			i = ERR_STRING_NOT_TERM;
		}//if
		else if(pFileBuffer[iCurrOffset] == '\''){			//We found the end of the string
			iCurrOffset++;
			ccstrncpy(buffer, &pFileBuffer[iTokStart], iCurrOffset-iTokStart);
			i = findToken(buffer);
			if(i == ERR_NOT_FOUND){
				i = addToken(buffer, TOKEN_TYPE_LITERAL);
			}//if
		}//if
		else{
			//If we made it here, then it looks like we have an invalid char in our string
			i =  ERR_INVALID_STRING_CHAR;
			//Now skip over remainder of string looking for string terminating char
			while((pFileBuffer[iCurrOffset] != '\'') && (pFileBuffer[iCurrOffset] != '\0')){
				iCurrOffset++;
			}//while
			if(pFileBuffer[iCurrOffset] != '\0')
				iCurrOffset++;
		}

	}//else if
	else{
		//If we've made it here, we're hoping that we have an op
		ccstrncpy(buffer, &pFileBuffer[iCurrOffset], 2);		//Pull out next 2 chars
		
		i = findToken(buffer);
		if(i == ERR_NOT_FOUND){
			buffer[1] = '\0';
			i = findToken(buffer);
			if(i == ERR_NOT_FOUND){
				//TODO:  ERROR REPORTING: INVALID LEXICAL ELEMENT
				i = ERR_INVALID_LEXEME;
			}//if
			
			iCurrOffset++;
		}//if
		else{
			iCurrOffset++;
			if(iCurrOffset != '\0')
				iCurrOffset++;
		}//else
	}//else

	if(i >= 0)
		iLineOffset = iTokStart - iLastNLOffset;
	else
		iLineOffset = iCurrOffset - iLastNLOffset;

	iNT = i;
	return i;
}//getNT function

int addToken(char *pStr, int iTokenType){
	if(iTokenCt >= SYM_TABLE_SIZE)
		return ERR_SYM_TABLE_FULL;

	if(findToken(pStr) != ERR_NOT_FOUND){					//If it's already in the table
		return ERR_EXISTS;
	}

	SymTable[iTokenCt].iTokenType = iTokenType;
	ccstrncpy(SymTable[iTokenCt].lexemeID, pStr, 79);
	if(iTokenType == TOKEN_TYPE_CONST){
		SymTable[iTokenCt].lexemeInt = ccatoi(pStr);		//Assign const int if necessary
	}//if

	iTokenCt++;				//Increment token count

	return iTokenCt-1;
}//addToken function

//This will find a token in the table.  Upon error it will return ERR_NOT_FOUND.
//Note: This could be made MUCH MUCH faster...using a balanced binary tree or hash table...
//...but we just do a simple array search.
int findToken(char *pStr){
	int i;
	for(i=0;i<iTokenCt;i++){
		if(ccstrcmp(SymTable[i].lexemeID, pStr)==0)
			return i;
	}//for

	return ERR_NOT_FOUND;

}//findToken

//This will find the token type associated with a table item.  Upon error it will return ERR_NOT_FOUND.
//Note: This could be made MUCH MUCH faster...using a balanced binary tree or hash table...
//...but we just do a simple array search.
int findTokenType(char *pStr){
	int i;
	for(i=0;i<iTokenCt;i++){
		if(ccstrcmp(SymTable[i].lexemeID, pStr)==0)
			return SymTable[i].iTokenType;
	}//for

	return ERR_NOT_FOUND;

}//findToken


void debugPrintSymTable(){
	for(int i=0;i<iTokenCt;i++){
		
	}//for
}//debugPrintSymTable