package io;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;

import model.Citation;
import model.Stemmer;
import model.StopWords;


public class Reader {

	public Hashtable<String, Integer> tf;

	public Hashtable<String, Integer> getTf() {
		return tf;
	}

	public void setTf(String term, int freq) {
		this.tf.put(term, freq);
	}

	public ArrayList<Citation> Reader(String base) throws IOException {

		String fileName = "/home/herculano/workspace/MonografiaFinal/" + base
				+ "/Base_Completa.txt";

		StopWords stpWords = new StopWords();
		ArrayList<String> listStpWords = new ArrayList<String>();
		listStpWords = stpWords.listStpWords();

		FileReader in = new FileReader(fileName);
		BufferedReader buf = new BufferedReader(in);
		String line;

		ArrayList<Citation> collection = new ArrayList<Citation>();

		while ((line = buf.readLine()) != null) {

			String termFreq = null;
			StringTokenizer token = new StringTokenizer(line, "<>");
			Citation cit = new Citation();
			String author, aux;
			StringReader sTitle,sVenue;
			int year, id, idCluster, idAuthor;
			ArrayList<String> coauthors = new ArrayList<String>();
			ArrayList<String> title = new ArrayList<String>();
			ArrayList<String> venue = new ArrayList<String>();

			// Le id da citacao.
			id = Integer.parseInt(token.nextToken());
			cit.setId(id);

			// Le o id do author e da variacao do author.
			aux = token.nextToken();
			StringTokenizer id_Author = new StringTokenizer(aux, "_");

			idCluster = Integer.parseInt(id_Author.nextToken());
			cit.setIdCluster(idCluster);

			idAuthor = Integer.parseInt(id_Author.nextToken());
			cit.setIdAuthor(idAuthor);

			// Lista de coautores.
			aux = token.nextToken();
			

			if (aux.equals("nulo")) {
				cit.setCoauthors(null);
			} else {
				StringTokenizer coau = new StringTokenizer(aux, ":");
				while (coau.hasMoreTokens())
					coauthors.add(coau.nextToken());
				this.addTermFreq(coauthors);
				cit.setCoauthors(coauthors);
			}

			// Le o titulo da publicação.
			aux = token.nextToken();
			aux = stpWords.removeStpWords(aux, listStpWords);
			Stemmer stem = new Stemmer();
			sTitle = new StringReader(aux);
			title = stem.runStem(sTitle);
			cit.setTitle(title);
			this.addTermFreq(title);

			// Le o veiculo de publicação
			aux = token.nextToken();
			aux = stpWords.removeStpWords(aux,listStpWords);
			sVenue = new StringReader(aux);
			venue = stem.runStem(sVenue);
			cit.setVenue(venue);
			this.addTermFreq(venue);

			// Le o author
			author = token.nextToken();
			cit.setAuthor(author);
			this.addTermFreq(author," ");

			// Le o Ano
			aux = token.nextToken();
			year = Integer.parseInt(aux);
			cit.setYear(year);
			//this.addTermFreq(year)
			
			collection.add(cit);
		}
		buf.close();
		in.close();

		return collection;
	}

	public void addTermFreq(ArrayList<String> listTerms) {
		if (!listTerms.isEmpty()) {

			Iterator<String> iteTerms = listTerms.iterator();
			while (iteTerms.hasNext()) {
				int freq = 0;
				String term = iteTerms.next();
				if (this.tf.containsKey(term)) {
					freq = this.tf.get(term);
					freq++;
					this.tf.remove(term);
					this.tf.put(term, freq);
				} else {
					freq++;
					this.tf.put(term, freq);
				}
			}
		}

		return;
	}

	public void addTermFreq(String cit, String srtToken) {

		StringTokenizer token = new StringTokenizer(cit, srtToken);

		while (token.hasMoreTokens()) {
			String term = token.nextToken();
			int freq = 0;
			if (this.tf.containsKey(term)) {
				freq = this.tf.get(term);
				this.tf.remove(term);
				freq++;
				this.tf.put(term, freq);
			} else
				this.tf.put(term, freq);
		}
	}
}
