edu.harvard.hul.ois.jhove.module.html
Class HtmlCharStream

java.lang.Object
  extended by edu.harvard.hul.ois.jhove.module.html.HtmlCharStream
All Implemented Interfaces:
CharStream

public class HtmlCharStream
extends java.lang.Object
implements CharStream

An implementation of interface CharStream, where the stream is assumed to contain only ASCII characters (without unicode processing).


Field Summary
protected  boolean _lineEndCR
           
protected  boolean _lineEndCRLF
           
protected  boolean _lineEndLF
           
protected  int[] bufcolumn
           
protected  char[] buffer
           
protected  int[] bufline
           
 int bufpos
           
protected  int column
           
protected  int inBuf
           
protected  java.io.Reader inputStream
           
protected  int line
           
protected  int maxNextCharInd
           
protected  boolean prevCharIsCR
           
protected  boolean prevCharIsLF
           
static boolean staticFlag
           
 
Constructor Summary
HtmlCharStream(java.io.InputStream dstream, int startline, int startcolumn, int buffersize, java.lang.String charset)
           
HtmlCharStream(java.io.InputStream dstream, int startline, int startcolumn, java.lang.String charset)
           
HtmlCharStream(java.io.InputStream dstream, java.lang.String charset)
           
HtmlCharStream(java.io.Reader dstream)
           
HtmlCharStream(java.io.Reader dstream, int startline, int startcolumn)
           
HtmlCharStream(java.io.Reader dstream, int startline, int startcolumn, int buffersize)
           
 
Method Summary
 void adjustBeginLineColumn(int newLine, int newCol)
          Method to adjust line and column numbers for the start of a token.
 void backup(int amount)
          Backs up the input stream by amount steps.
 char BeginToken()
          Returns the next character that marks the beginning of the next token.
 void Done()
          The lexer calls this function to indicate that it is done with the stream and hence implementations can free any resources held by this class.
protected  void ExpandBuff(boolean wrapAround)
           
protected  void FillBuff()
           
 int getBeginColumn()
          Returns the column number of the first character for current token (being matched after the last call to BeginTOken).
 int getBeginLine()
          Returns the line number of the first character for current token (being matched after the last call to BeginTOken).
 int getColumn()
          Deprecated.  
 int getEndColumn()
          Returns the column number of the last character for current token (being matched after the last call to BeginTOken).
 int getEndLine()
          Returns the line number of the last character for current token (being matched after the last call to BeginTOken).
 java.lang.String GetImage()
          Returns a string made up of characters from the marked token beginning to the current buffer position.
 java.lang.String getKindOfLineEnd()
          Retrieve the kind of end of line.
 int getLine()
          Deprecated.  
 char[] GetSuffix(int len)
          Returns an array of characters that make up the suffix of length 'len' for the currently matched token.
 char readChar()
          Returns the next character from the selected input.
 void ReInit(java.io.InputStream dstream)
           
 void ReInit(java.io.InputStream dstream, int startline, int startcolumn)
           
 void ReInit(java.io.InputStream dstream, int startline, int startcolumn, int buffersize)
           
 void ReInit(java.io.Reader dstream)
           
 void ReInit(java.io.Reader dstream, int startline, int startcolumn)
           
 void ReInit(java.io.Reader dstream, int startline, int startcolumn, int buffersize)
           
protected  void UpdateLineColumn(char c)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

staticFlag

public static final boolean staticFlag
See Also:
Constant Field Values

bufpos

public int bufpos

bufline

protected int[] bufline

bufcolumn

protected int[] bufcolumn

column

protected int column

line

protected int line

prevCharIsCR

protected boolean prevCharIsCR

prevCharIsLF

protected boolean prevCharIsLF

_lineEndCR

protected boolean _lineEndCR

_lineEndLF

protected boolean _lineEndLF

_lineEndCRLF

protected boolean _lineEndCRLF

inputStream

protected java.io.Reader inputStream

buffer

protected char[] buffer

maxNextCharInd

protected int maxNextCharInd

inBuf

protected int inBuf
Constructor Detail

HtmlCharStream

public HtmlCharStream(java.io.Reader dstream,
                      int startline,
                      int startcolumn,
                      int buffersize)

HtmlCharStream

public HtmlCharStream(java.io.Reader dstream,
                      int startline,
                      int startcolumn)

HtmlCharStream

public HtmlCharStream(java.io.Reader dstream)

HtmlCharStream

public HtmlCharStream(java.io.InputStream dstream,
                      int startline,
                      int startcolumn,
                      int buffersize,
                      java.lang.String charset)
               throws java.io.UnsupportedEncodingException
Throws:
java.io.UnsupportedEncodingException

HtmlCharStream

public HtmlCharStream(java.io.InputStream dstream,
                      int startline,
                      int startcolumn,
                      java.lang.String charset)
               throws java.io.UnsupportedEncodingException
Throws:
java.io.UnsupportedEncodingException

HtmlCharStream

public HtmlCharStream(java.io.InputStream dstream,
                      java.lang.String charset)
               throws java.io.UnsupportedEncodingException
Throws:
java.io.UnsupportedEncodingException
Method Detail

ExpandBuff

protected void ExpandBuff(boolean wrapAround)

FillBuff

protected void FillBuff()
                 throws java.io.IOException
Throws:
java.io.IOException

BeginToken

public char BeginToken()
                throws java.io.IOException
Description copied from interface: CharStream
Returns the next character that marks the beginning of the next token. All characters must remain in the buffer between two successive calls to this method to implement backup correctly.

Specified by:
BeginToken in interface CharStream
Throws:
java.io.IOException

UpdateLineColumn

protected void UpdateLineColumn(char c)

readChar

public char readChar()
              throws java.io.IOException
Description copied from interface: CharStream
Returns the next character from the selected input. The method of selecting the input is the responsibility of the class implementing this interface. Can throw any java.io.IOException.

Specified by:
readChar in interface CharStream
Throws:
java.io.IOException

getColumn

public int getColumn()
Deprecated. 

Description copied from interface: CharStream
Returns the column position of the character last read.

Specified by:
getColumn in interface CharStream
See Also:
getEndColumn()

getLine

public int getLine()
Deprecated. 

Description copied from interface: CharStream
Returns the line number of the character last read.

Specified by:
getLine in interface CharStream
See Also:
getEndLine()

getEndColumn

public int getEndColumn()
Description copied from interface: CharStream
Returns the column number of the last character for current token (being matched after the last call to BeginTOken).

Specified by:
getEndColumn in interface CharStream

getEndLine

public int getEndLine()
Description copied from interface: CharStream
Returns the line number of the last character for current token (being matched after the last call to BeginTOken).

Specified by:
getEndLine in interface CharStream

getBeginColumn

public int getBeginColumn()
Description copied from interface: CharStream
Returns the column number of the first character for current token (being matched after the last call to BeginTOken).

Specified by:
getBeginColumn in interface CharStream

getBeginLine

public int getBeginLine()
Description copied from interface: CharStream
Returns the line number of the first character for current token (being matched after the last call to BeginTOken).

Specified by:
getBeginLine in interface CharStream

backup

public void backup(int amount)
Description copied from interface: CharStream
Backs up the input stream by amount steps. Lexer calls this method if it had already read some characters, but could not use them to match a (longer) token. So, they will be used again as the prefix of the next token and it is the implemetation's responsibility to do this right.

Specified by:
backup in interface CharStream

ReInit

public void ReInit(java.io.Reader dstream,
                   int startline,
                   int startcolumn,
                   int buffersize)

ReInit

public void ReInit(java.io.Reader dstream,
                   int startline,
                   int startcolumn)

ReInit

public void ReInit(java.io.Reader dstream)

ReInit

public void ReInit(java.io.InputStream dstream,
                   int startline,
                   int startcolumn,
                   int buffersize)

ReInit

public void ReInit(java.io.InputStream dstream)

ReInit

public void ReInit(java.io.InputStream dstream,
                   int startline,
                   int startcolumn)

GetImage

public java.lang.String GetImage()
Description copied from interface: CharStream
Returns a string made up of characters from the marked token beginning to the current buffer position. Implementations have the choice of returning anything that they want to. For example, for efficiency, one might decide to just return null, which is a valid implementation.

Specified by:
GetImage in interface CharStream

GetSuffix

public char[] GetSuffix(int len)
Description copied from interface: CharStream
Returns an array of characters that make up the suffix of length 'len' for the currently matched token. This is used to build up the matched string for use in actions in the case of MORE. A simple and inefficient implementation of this is as follows : { String t = GetImage(); return t.substring(t.length() - len, t.length()).toCharArray(); }

Specified by:
GetSuffix in interface CharStream

Done

public void Done()
Description copied from interface: CharStream
The lexer calls this function to indicate that it is done with the stream and hence implementations can free any resources held by this class. Again, the body of this function can be just empty and it will not affect the lexer's operation.

Specified by:
Done in interface CharStream

adjustBeginLineColumn

public void adjustBeginLineColumn(int newLine,
                                  int newCol)
Method to adjust line and column numbers for the start of a token.


getKindOfLineEnd

public java.lang.String getKindOfLineEnd()
Retrieve the kind of end of line.

Returns: