eTextReader.search
Class Indexer

java.lang.Object
  extended by eTextReader.search.Indexer
All Implemented Interfaces:
Index, java.io.Serializable

public class Indexer
extends java.lang.Object
implements java.io.Serializable, Index

See Also:
Serialized Form

Field Summary
private  java.lang.StringBuffer addresses
           
private  boolean caseSensitive
           
private  boolean lastWasSearchTerm
           
private  org.apache.log4j.Logger logger
           
private  java.util.ArrayList lowerCaseDocument
           
private  java.lang.String pageTitle
          Note that storing the pageTitle here means that if it changes, we won't notice.
private  java.util.ArrayList theDocument
           
private  java.net.URL website
           
private  java.util.List<java.lang.String> wordList
           
private  java.util.Map<java.lang.String,java.lang.Integer> wordOccurrences
           
 
Constructor Summary
Indexer(java.net.URL website, boolean caseSensitive)
           
 
Method Summary
private  java.lang.StringBuffer addPageTitle(java.lang.StringBuffer allContextsString)
           
private  java.lang.StringBuffer appendWord(java.lang.StringBuffer context, WordWithAddress wwa, java.util.List<java.lang.String> terms)
           
 java.util.ArrayList getAppropriateDocument()
           
 java.util.ArrayList getDocument()
           
 java.lang.String getPageTitle()
           
 int getWordFrequency(java.lang.String word)
           
 java.util.List<java.lang.String> getWordList()
           
 Indexer indexWebpage()
           
 java.lang.String makeContextString(java.util.List<java.lang.String> terms, int textlength, boolean phrase)
           
 void manipulateArray()
           
 void parseTheDocument()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

website

private java.net.URL website

caseSensitive

private boolean caseSensitive

theDocument

private java.util.ArrayList theDocument

lowerCaseDocument

private java.util.ArrayList lowerCaseDocument

wordOccurrences

private java.util.Map<java.lang.String,java.lang.Integer> wordOccurrences

wordList

private java.util.List<java.lang.String> wordList

pageTitle

private java.lang.String pageTitle
Note that storing the pageTitle here means that if it changes, we won't notice. But, we won't notice changes to the content, either :-(


addresses

private transient java.lang.StringBuffer addresses

lastWasSearchTerm

private transient boolean lastWasSearchTerm

logger

private transient org.apache.log4j.Logger logger
Constructor Detail

Indexer

public Indexer(java.net.URL website,
               boolean caseSensitive)
Method Detail

indexWebpage

public Indexer indexWebpage()

getAppropriateDocument

public java.util.ArrayList getAppropriateDocument()
Specified by:
getAppropriateDocument in interface Index

getDocument

public java.util.ArrayList getDocument()

parseTheDocument

public void parseTheDocument()
Specified by:
parseTheDocument in interface Index

manipulateArray

public void manipulateArray()
Specified by:
manipulateArray in interface Index

getWordFrequency

public int getWordFrequency(java.lang.String word)
Specified by:
getWordFrequency in interface Index

makeContextString

public java.lang.String makeContextString(java.util.List<java.lang.String> terms,
                                          int textlength,
                                          boolean phrase)
Specified by:
makeContextString in interface Index

appendWord

private java.lang.StringBuffer appendWord(java.lang.StringBuffer context,
                                          WordWithAddress wwa,
                                          java.util.List<java.lang.String> terms)

addPageTitle

private java.lang.StringBuffer addPageTitle(java.lang.StringBuffer allContextsString)

getWordList

public java.util.List<java.lang.String> getWordList()
Returns:

getPageTitle

public java.lang.String getPageTitle()