edu.harvard.hul.ois.jhove
Class TextMDMetadata

java.lang.Object
  extended by edu.harvard.hul.ois.jhove.TextMDMetadata

public class TextMDMetadata
extends java.lang.Object

Encapsulation of the textMD metadata for text files. See http://www.loc.gov/standards/textMd for more information.

Author:
Thomas Ledoux

Field Summary
static java.lang.String[] BYTE_ORDER
          Uses enumerated values of 'big', 'little', and 'middle' endian.
static int BYTE_ORDER_BIG
           
static int BYTE_ORDER_LITTLE
           
static int BYTE_ORDER_MIDDLE
           
static java.lang.String CHARSET_ASCII
           
static java.lang.String CHARSET_ISO8859_1
           
static java.lang.String CHARSET_UTF8
           
static java.lang.String DEFAULT_LOCATION
           
protected static java.util.Map<java.lang.String,java.lang.String> fromISO_639_2_T2B
          Map from ISO 639/2 T to ISO 639/2 B
static java.lang.String[] LINEBREAK
          Uses enumerated values of 'CR', 'LF' and 'CR/LF' for the idenntification of the linebreak.
static int LINEBREAK_CR
           
static int LINEBREAK_CRLF
           
static int LINEBREAK_LF
           
static java.lang.String NAMESPACE
          textMD namespace and version
static int NILL
          To represent the unknown
protected static java.util.Set setOfUnknownJavaCharset
          Set of unknown charsets in Java
protected static java.lang.String[] UNKNOWN_JAVA_CHARSET
          Array of textMD charsets unknown by java.nio.charset.Charsets
static java.lang.String VERSION
           
 
Constructor Summary
TextMDMetadata()
           
 
Method Summary
 int getByte_order()
           
 java.lang.String getByte_orderString()
           
 java.lang.String getByte_size()
           
 java.lang.String getCharacter_size()
           
 java.lang.String getCharset()
           
 java.lang.String getLanguage()
           
 int getLinebreak()
           
 java.lang.String getLinebreakString()
           
 java.lang.String getMarkup_basis_version()
           
 java.lang.String getMarkup_basis()
           
 java.lang.String getMarkup_language_version()
           
 java.lang.String getMarkup_language()
           
 void setByte_order(int byte_order)
           
 void setByte_size(java.lang.String byte_size)
           
 void setCharacter_size(java.lang.String character_size)
           
 void setCharset(java.lang.String charset)
           
 void setLanguage(java.lang.String language)
           
 void setLinebreak(int linebreak)
           
 void setMarkup_basis_version(java.lang.String markup_basis_version)
           
 void setMarkup_basis(java.lang.String markup_basis)
           
 void setMarkup_language_version(java.lang.String markup_language_version)
           
 void setMarkup_language(java.lang.String markup_language)
           
static java.lang.String toISO_639_2(java.lang.String srcLang)
          Transform a language to the ISO_639-2 language (only enumeration allowed in textMD schema).
static java.lang.String toTextMDCharset(java.lang.String srcCharset)
          Transform a given charset in the "authorized" list given in the textMD schema enumeration.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

NAMESPACE

public static final java.lang.String NAMESPACE
textMD namespace and version

See Also:
Constant Field Values

DEFAULT_LOCATION

public static final java.lang.String DEFAULT_LOCATION
See Also:
Constant Field Values

VERSION

public static final java.lang.String VERSION
See Also:
Constant Field Values

BYTE_ORDER

public static final java.lang.String[] BYTE_ORDER
Uses enumerated values of 'big', 'little', and 'middle' endian.


BYTE_ORDER_BIG

public static final int BYTE_ORDER_BIG
See Also:
Constant Field Values

BYTE_ORDER_LITTLE

public static final int BYTE_ORDER_LITTLE
See Also:
Constant Field Values

BYTE_ORDER_MIDDLE

public static final int BYTE_ORDER_MIDDLE
See Also:
Constant Field Values

LINEBREAK

public static final java.lang.String[] LINEBREAK
Uses enumerated values of 'CR', 'LF' and 'CR/LF' for the idenntification of the linebreak.


LINEBREAK_CR

public static final int LINEBREAK_CR
See Also:
Constant Field Values

LINEBREAK_LF

public static final int LINEBREAK_LF
See Also:
Constant Field Values

LINEBREAK_CRLF

public static final int LINEBREAK_CRLF
See Also:
Constant Field Values

UNKNOWN_JAVA_CHARSET

protected static final java.lang.String[] UNKNOWN_JAVA_CHARSET
Array of textMD charsets unknown by java.nio.charset.Charsets


setOfUnknownJavaCharset

protected static java.util.Set setOfUnknownJavaCharset
Set of unknown charsets in Java


fromISO_639_2_T2B

protected static java.util.Map<java.lang.String,java.lang.String> fromISO_639_2_T2B
Map from ISO 639/2 T to ISO 639/2 B


CHARSET_ASCII

public static final java.lang.String CHARSET_ASCII
See Also:
Constant Field Values

CHARSET_UTF8

public static final java.lang.String CHARSET_UTF8
See Also:
Constant Field Values

CHARSET_ISO8859_1

public static final java.lang.String CHARSET_ISO8859_1
See Also:
Constant Field Values

NILL

public static final int NILL
To represent the unknown

See Also:
Constant Field Values
Constructor Detail

TextMDMetadata

public TextMDMetadata()
Method Detail

getCharset

public java.lang.String getCharset()
Returns:
the charset

setCharset

public void setCharset(java.lang.String charset)
Parameters:
charset - the charset to set

getByte_order

public int getByte_order()
Returns:
the byte_order

getByte_orderString

public java.lang.String getByte_orderString()

setByte_order

public void setByte_order(int byte_order)
Parameters:
byte_order - the byte_order to set

getByte_size

public java.lang.String getByte_size()
Returns:
the byte_size

setByte_size

public void setByte_size(java.lang.String byte_size)
Parameters:
byte_size - the byte_size to set

getCharacter_size

public java.lang.String getCharacter_size()
Returns:
the character_size

setCharacter_size

public void setCharacter_size(java.lang.String character_size)
Parameters:
character_size - the character_size to set

getLinebreak

public int getLinebreak()
Returns:
the linebreak

getLinebreakString

public java.lang.String getLinebreakString()
Returns:
the linebreak in String form

setLinebreak

public void setLinebreak(int linebreak)
Parameters:
linebreak - the linebreak to set

getLanguage

public java.lang.String getLanguage()
Returns:
the language

setLanguage

public void setLanguage(java.lang.String language)
Parameters:
language - the language to set

getMarkup_basis

public java.lang.String getMarkup_basis()
Returns:
the markup_basis

setMarkup_basis

public void setMarkup_basis(java.lang.String markup_basis)
Parameters:
markup_basis - the markup_basis to set

getMarkup_basis_version

public java.lang.String getMarkup_basis_version()
Returns:
the markup_basis_version

setMarkup_basis_version

public void setMarkup_basis_version(java.lang.String markup_basis_version)
Parameters:
markup_basis_version - the markup_basis_version to set

getMarkup_language

public java.lang.String getMarkup_language()
Returns:
the markup_language

setMarkup_language

public void setMarkup_language(java.lang.String markup_language)
Parameters:
markup_language - the markup_language to set

getMarkup_language_version

public java.lang.String getMarkup_language_version()
Returns:
the markup_language_version

setMarkup_language_version

public void setMarkup_language_version(java.lang.String markup_language_version)
Parameters:
markup_language_version - the markup_language_version to set

toTextMDCharset

public static java.lang.String toTextMDCharset(java.lang.String srcCharset)
Transform a given charset in the "authorized" list given in the textMD schema enumeration. From the schema documentation on charset (http://www.loc.gov/standards/textMD/elementSet/index.html#element_charset). The character set employed by the text. Controlled vocab using IANA names for character sets: http://www.iana.org/assignments/character-sets. The problem arises because the java Charset uses the (preferred MIME name) where textMD uses the Name ...

Parameters:
srcCharset - charset from the file
Returns:
normalized charset

toISO_639_2

public static java.lang.String toISO_639_2(java.lang.String srcLang)
Transform a language to the ISO_639-2 language (only enumeration allowed in textMD schema).

Parameters:
srcLang - language in the file
Returns:
normalized language in 3 letters (except qaa-qtz)