-

/**
*
* Copyright: Copyright (c) 2004 Carnegie Mellon University
*
* This program is part of an implementation for the PARKR project which is
* about developing a search engine using efficient Datastructures.
*
* Modified by Mahender on 12-10-2009
*/
package searchengine.indexer;
import
import
import
import
import
import
import
import
import
java.io.FileInputStream;
java.io.FileOutputStream;
java.io.IOException;
java.io.ObjectOutputStream;
java.net.URL;
java.util.HashMap;
java.util.Hashtable;
java.util.Map.Entry;
java.util.Vector;
//import searchengine.dictionary.AVLDictionary;
//import searchengine.dictionary.BSTDictionary;
import searchengine.dictionary.DictionaryInterface;
import searchengine.dictionary.HashDictionary;
//import searchengine.dictionary.ListDictionary;
//import searchengine.dictionary.MyHashDictionary;
import searchengine.dictionary.ObjectIterator;
import searchengine.element.PageElementInterface;
import searchengine.element.PageWord;
/**
* Web-indexing objects. This class implements the Indexer interface
* using a list-based index structure.
*
*A Hash Map based implementation of Indexing
*
*/
public class Indexer implements IndexerInterface
{
/** The constructor for ListWebIndex.
*/
// Index Structure
DictionaryInterface index;
// This is for calculating the term frequency
HashMap<String,DictionaryInterface> wordFrequency = new HashMap<String,D
ictionaryInterface>();
public Indexer(String mode)
{
if (mode.equals("hash"))
index = new HashDictionary();
}
public void addPage(URL url, ObjectIterator<?> keywords)
{
String str1 = url.toString();
int count;
Vector<?> keys = keywords.returnVec();
int n = keys.size();
int i = 0;
if(wordFrequency.isEmpty()){
while(keywords.hasNext()){
count = 0;
String str = (String) keywords.next();
while(i < n){
if(str.equals(keys.get(i))){
count++;
}
i++;
}
index.insert(str1,count);
wordFrequency.put(str, index);
}
} else {
while(keywords.hasNext()) {
count = 0;
String s = (String) keywords.next();
DictionaryInterface index1;
if(wordFrequency.containsKey(s)) {
index1 = wordFrequency.get(s);
while(i < n) {
if(s.equals(keys.get(i))){
count++;
}
i++;
}
index1.insert(str1, count);
wordFrequency.put(s,index1);
}
}
}
}
public void print1() {
//index.print();
for(Entry<String, DictionaryInterface> m:wordFrequency.entrySet
()) {
System.out.println(m.getKey()+" "+m.getV
alue());
}
}
/** Produce a printable representation of the index.
*
* @return a String representation of the index structure
*/
//public String toString()
//{
////////////////////////////////////////////////////////////////
////
// Write your Code here as part of Integrating and Running Mini Goo
gle assignment
//
///////////////////////////////////////////////////////////////////
//return "You dont need to implement it\n";
//}
/** Retrieve all of the web pages that contain the given keyword.
*
* @param keyword The keyword to search on
* @return An iterator of the web pages that match.
*/
public ObjectIterator<?> retrievePages(PageWord keyword)
{
// String str1 = keyword.toString();
// Vector vec = new Vector();
// Vector<String> vec1 = new Vector<String>();
// ObjectIterator al = new ObjectIterator(vec);
// String[] keys = index.getKeys();
// for (int i = 0;i < keys.length ;i++ ) {
//
String str = (String)keys[i];
//
vec1 = index.getValue(str);
//
if (vec1.contains(str1)) {
//
vec.add(keys[i]);
//
}
// }
return new ObjectIterator<PageElementInterface>(new Vector<PageE
lementInterface>());
}
/** Retrieve all of the web pages that contain any of the given keywords
.
*
* @param keywords The keywords to search on
* @return An iterator of the web pages that match.
*
* Calculating the Intersection of the pages here itself
**/
public ObjectIterator<?> retrievePages(ObjectIterator<?> keywords)
{
////////////////////////////////////////////////////////////////
////
// Write your Code here as part of Integrating and Running Mini Goo
gle assignment
//
///////////////////////////////////////////////////////////////////
return new ObjectIterator<PageElementInterface>(new Vector<PageE
lementInterface>());
}
/** Save the index to a file.
*
* @param stream The stream to write the index
*/
@SuppressWarnings("resource")
public void save(FileOutputStream stream) throws IOException
{
stream = new FileOutputStream("kumar.txt");
ObjectOutputStream oos = new ObjectOutputStream(stream);
oos.writeObject(wordFrequency);
oos.close();
}
/** Restore the index from a file.
*
* @param stream The stream to read the index
*/
public void restore(FileInputStream stream) throws IOException

{
}
/* Remove Page method not implemented right now
* @see searchengine.indexer#removePage(java.net.URL)
*/
public void removePage(URL url) {
}
};

-

Hochgeladen von

Dokumentinformationen

Copyright

Verfügbare Formate

Dieses Dokument teilen

Dokument teilen oder einbetten

Freigabeoptionen

Stufen Sie dieses Dokument als nützlich ein?

Sind diese Inhalte unangemessen?

Copyright:

Verfügbare Formate

-

Hochgeladen von

Copyright:

Verfügbare Formate

/**

public void restore(FileInputStream stream) throws IOException

Das könnte Ihnen auch gefallen