|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.harvard.hul.ois.jhove.ModuleBase
edu.harvard.hul.ois.jhove.module.PdfModule
public class PdfModule
Module for identification and validation of PDF files.
Field Summary | |
---|---|
protected boolean |
_actionsExist
|
protected ObjectStream |
_cachedObjectStream
Cached object stream. |
protected int |
_cachedStreamIndex
Object number of cached object stream. |
protected java.util.Map<java.lang.Integer,PdfObject> |
_cid0FontsMap
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_cid2FontsMap
|
protected NameTreeNode |
_destNames
|
protected PdfDictionary |
_destsDict
|
protected java.util.List<Property> |
_docCatalogList
|
protected PdfDictionary |
_docCatDict
|
protected PdfIndirectObj |
_docCatDictRef
|
protected PdfDictionary |
_docInfoDict
|
protected PdfIndirectObj |
_docInfoDictRef
|
protected java.util.List<Property> |
_docInfoList
|
protected PageTreeNode |
_docTreeRoot
|
protected NameTreeNode |
_embeddedFiles
|
protected PdfDictionary |
_encryptDict
|
protected PdfIndirectObj |
_encryptDictRef
|
protected boolean |
_encrypted
|
protected java.util.List<Property> |
_encryptList
|
protected long |
_eof
|
protected java.util.List<Property> |
_extStreamsList
|
protected java.util.List<Property> |
_filtersList
|
protected Property |
_idProperty
|
protected java.util.List<Property> |
_imagesList
|
protected Property |
_metadata
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_mmFontsMap
|
protected int |
_nFonts
Number of fonts reported so far. |
protected int |
_numFreeObjects
|
protected int |
_numObjects
|
protected int |
_numTrailers
|
protected int |
_objCount
|
protected java.util.Map |
_objects
|
protected PdfDictionary |
_outlineDict
|
protected PdfDictionary |
_pageLabelDict
|
protected PageLabelNode |
_pageLabelRoot
|
protected PdfIndirectObj |
_pagesDictRef
|
protected java.util.Map<java.lang.Integer,java.lang.Integer> |
_pageSeqMap
|
protected java.util.List<Property> |
_pagesList
|
protected Parser |
_parser
|
protected boolean |
_pdfACompliant
|
protected long |
_prevxref
|
protected java.util.List<PdfProfile> |
_profile
List of profile checkers |
protected java.io.RandomAccessFile |
_raf
|
protected boolean |
_recursionWarned
|
protected boolean |
_showAnnotations
|
protected boolean |
_showFonts
|
protected boolean |
_showOutlines
|
protected boolean |
_showPages
|
protected boolean |
_skippedAnnotationsReported
|
protected boolean |
_skippedFontsReported
|
protected boolean |
_skippedOutlinesReported
|
protected boolean |
_skippedPagesReported
|
protected long |
_startxref
|
protected PdfDictionary |
_trailerDict
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_trueTypeFontsMap
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_type0FontsMap
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_type1FontsMap
|
protected java.util.Map<java.lang.Integer,PdfObject> |
_type3FontsMap
|
protected java.lang.String |
_version
|
protected PdfDictionary |
_viewPrefDict
|
protected java.util.Set<java.lang.Integer> |
_visitedOutlineNodes
Map of visited nodes when walking through an outline. |
protected Property |
_xmpProp
|
protected long[] |
_xref
|
protected int[][] |
_xref2
|
protected boolean |
_xrefIsStream
|
protected int |
DEFAULT_MAX_FONTS
PRIVATE INSTANCE FIELDS. |
static int |
F_CID0
Font type selectors. |
static int |
F_CID2
Font type selectors. |
static int |
F_MM1
Font type selectors. |
static int |
F_TT
Font type selectors. |
static int |
F_TYPE0
Font type selectors. |
static int |
F_TYPE1
Font type selectors. |
static int |
F_TYPE3
Font type selectors. |
protected int |
maxFonts
maximum number of fonts to report full information on. |
protected static java.lang.String |
outlinesRecursiveString
|
Fields inherited from class edu.harvard.hul.ois.jhove.ModuleBase |
---|
_app, _bigEndian, _checksumFinished, _countStream, _coverage, _crc32, _date, _defaultParams, _features, _format, _init, _isRandomAccess, _je, _logger, _md5, _mimeType, _name, _nByte, _note, _param, _release, _repInfoNote, _rights, _sha1, _signature, _specification, _validityNote, _vendor, _verbosity, _wellFormedNote |
Fields inherited from interface edu.harvard.hul.ois.jhove.Module |
---|
MAXIMUM_VERBOSITY, MINIMUM_VERBOSITY |
Constructor Summary | |
---|---|
PdfModule()
Creates an instance of the module and initializes identifying information. |
Method Summary | |
---|---|
protected void |
addDateProperty(PdfDictionary dict,
java.util.List<Property> propList,
java.lang.String key,
java.lang.String propName)
Add a date proprerty, based on a dictionary entry with a string value, to a specified List. |
protected void |
addDestination(PdfObject itemObj,
java.lang.String propName,
java.util.List<Property> propList,
RepInfo info)
|
protected void |
addFontsProperty(java.util.List<Property> metadataList)
Add the various font lists as a fonts property. |
protected java.lang.String |
addFontToMap(PdfDictionary font)
Add the font to the appropriate map, and return its subtype. |
protected void |
addPagesProperty(java.util.List<Property> metadataList,
RepInfo info)
|
protected void |
addStringProperty(PdfDictionary dict,
java.util.List<Property> propList,
java.lang.String key,
java.lang.String propName)
Add a string proprerty, based on a dictionary entry with a string value, to a specified List. |
protected Property |
buildAnnotProperty(PdfDictionary annot,
RepInfo info)
|
protected Property |
buildBitmaskProperty(int val,
java.lang.String name,
java.lang.String[] valueNames,
java.lang.String defaultStr)
|
protected Property |
buildCIDInfoProperty(PdfDictionary dict)
|
protected Property |
buildCMapDictProperty(PdfStream encoding)
|
protected Property |
buildEncodingDictProperty(PdfDictionary encodingDict)
|
protected Property |
buildFontDescriptorProperty(PdfDictionary encodingDict)
|
protected Property |
buildFontProperty(java.lang.String name,
java.util.Map map,
int fontType)
|
protected Property |
buildMeasureProperty(PdfDictionary meas)
|
protected Property |
buildOutlineItemProperty(PdfDictionary dict,
RepInfo info)
|
protected Property |
buildOutlinesProperty(PdfDictionary dict,
RepInfo info)
|
protected Property |
buildPageLabelProperty(PageObject page,
int pageIndex,
int[] nomNumRef)
|
protected Property |
buildPageProperty(PageObject page,
int idx,
RepInfo info)
|
protected Property |
buildUserPermProperty(int flags,
java.lang.String[] flagStrs)
|
protected Property |
buildViewPrefProperty(PdfDictionary prefDict)
|
protected boolean |
doOutlineStuff(RepInfo info)
|
protected java.lang.String |
extractFilters(Filter[] filters,
PdfStream stream)
Finds the filters in a stream or array object which is the value of a stream's Filter key, and put them in _filtersList if a duplicate isn't there already. |
protected void |
findExternalStreams(RepInfo info)
|
protected boolean |
findFilters(RepInfo info)
Locates the filters in the content stream dictionaries and generate a list of unique pipelines. |
protected void |
findFonts(RepInfo info)
|
protected void |
findImages(RepInfo info)
|
protected boolean |
findLastTrailer(RepInfo info)
Locate the last trailer of the file |
boolean |
getActionsExist()
Return true if Actions have been detected in the file. |
PdfDictionary |
getCatalogDict()
Returns the catalog dictionary object. |
PdfDictionary |
getDocInfo()
Returns the document information dictionary. |
PageTreeNode |
getDocumentTree()
Returns the document tree root. |
NameTreeNode |
getEmbeddedFiles()
Returns a NameTreeNode for the EmbeddedFiles entry of the Names dictionary. |
PdfDictionary |
getEncryptionDict()
Returns the encryption dictionary. |
java.io.RandomAccessFile |
getFile()
Return the RandomAccessFile being read. |
java.util.Map<java.lang.Integer,PdfObject> |
getFontMap(int selector)
Get a font map. |
java.util.List<java.util.Map<java.lang.Integer,PdfObject>> |
getFontMaps()
Return a List of all the font maps. |
protected PdfObject |
getObject(int objIndex,
int recGuard)
Returns an object of a given number. |
PdfDictionary |
getOutlineDict()
Returns the outlines dictionary object. |
PdfDictionary |
getTrailerDict()
Returns the trailer dictionary object. |
PdfDictionary |
getViewPrefDict()
Returns the viewer preferences dictionary object. |
protected void |
initParse()
Initialize the module. |
protected boolean |
isFontSubset(java.lang.String baseStr)
|
protected Property |
makeRectProperty(PdfArray arrObj,
java.lang.String name)
|
boolean |
mayBePDFACompliant()
Returns true if the module hasn't detected any violations of PDF/A compliance. |
protected int |
nameToNiso(java.lang.String name,
java.lang.String[] nameArray,
int[] valArray)
|
protected java.util.List<Property> |
oneFontPropList(PdfDictionary dict,
int fontType)
|
void |
param(java.lang.String param)
Per-action initialization. |
void |
parse(java.io.RandomAccessFile raf,
RepInfo info)
Parse a file and stores descriptive information. |
protected boolean |
parseHeader(RepInfo info)
|
protected boolean |
parseTrailer(RepInfo info,
boolean prevOnly)
|
protected boolean |
readDocCatalogDict(RepInfo info)
|
protected boolean |
readDocInfoDict(RepInfo info)
|
protected boolean |
readDocumentTree(RepInfo info)
|
protected boolean |
readEncryptDict(RepInfo info)
|
protected boolean |
readPageLabelTree(RepInfo info)
|
protected boolean |
readXMPData(RepInfo info)
|
protected boolean |
readXRefInfo(RepInfo info)
|
protected boolean |
readXRefStreams(RepInfo info)
|
protected boolean |
readXRefTables(RepInfo info)
|
void |
resetParams()
Reset parameter settings. |
protected int |
resolveIndirectDest(PdfSimpleObject key)
|
PdfObject |
resolveIndirectObject(PdfObject indObj)
If the argument is an indirect object reference, returns the object it resolves to, otherwise returns the object itself. |
protected static java.lang.String |
toHex(java.lang.String s)
PRIVATE CLASS METHODS. |
protected static java.lang.String |
toHex(java.util.Vector<java.lang.Integer> v)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int F_TYPE0
public static final int F_TYPE1
public static final int F_TT
public static final int F_TYPE3
public static final int F_MM1
public static final int F_CID0
public static final int F_CID2
protected int DEFAULT_MAX_FONTS
protected java.io.RandomAccessFile _raf
protected Parser _parser
protected java.lang.String _version
protected Property _metadata
protected Property _xmpProp
protected long _eof
protected long _startxref
protected long _prevxref
protected int _numFreeObjects
protected Property _idProperty
protected int _objCount
protected int _numObjects
protected int _numTrailers
protected java.util.Map _objects
protected long[] _xref
protected int[][] _xref2
protected boolean _xrefIsStream
protected boolean _encrypted
protected java.util.List<Property> _docCatalogList
protected java.util.List<Property> _encryptList
protected java.util.List<Property> _docInfoList
protected java.util.List<Property> _extStreamsList
protected java.util.List<Property> _imagesList
protected java.util.List<Property> _filtersList
protected java.util.List<Property> _pagesList
protected java.util.Map<java.lang.Integer,PdfObject> _type0FontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _type1FontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _mmFontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _type3FontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _trueTypeFontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _cid0FontsMap
protected java.util.Map<java.lang.Integer,PdfObject> _cid2FontsMap
protected java.util.Map<java.lang.Integer,java.lang.Integer> _pageSeqMap
protected PdfIndirectObj _docCatDictRef
protected PdfIndirectObj _encryptDictRef
protected PdfIndirectObj _docInfoDictRef
protected PdfIndirectObj _pagesDictRef
protected PdfDictionary _docCatDict
protected PdfDictionary _docInfoDict
protected PageTreeNode _docTreeRoot
protected PdfDictionary _pageLabelDict
protected PageLabelNode _pageLabelRoot
protected NameTreeNode _embeddedFiles
protected NameTreeNode _destNames
protected PdfDictionary _encryptDict
protected PdfDictionary _trailerDict
protected PdfDictionary _viewPrefDict
protected PdfDictionary _outlineDict
protected PdfDictionary _destsDict
protected boolean _showFonts
protected boolean _showOutlines
protected boolean _showAnnotations
protected boolean _showPages
protected boolean _actionsExist
protected boolean _pdfACompliant
protected boolean _recursionWarned
protected boolean _skippedFontsReported
protected boolean _skippedOutlinesReported
protected boolean _skippedAnnotationsReported
protected boolean _skippedPagesReported
protected java.util.List<PdfProfile> _profile
protected ObjectStream _cachedObjectStream
protected int _cachedStreamIndex
protected java.util.Set<java.lang.Integer> _visitedOutlineNodes
protected int maxFonts
protected int _nFonts
protected static final java.lang.String outlinesRecursiveString
Constructor Detail |
---|
public PdfModule()
Method Detail |
---|
public void resetParams() throws java.lang.Exception
resetParams
in interface Module
resetParams
in class ModuleBase
java.lang.Exception
public void param(java.lang.String param)
param
in interface Module
param
in class ModuleBase
param
- The module parameter; under command-line Jhove, the -p parameter.
If the parameter contains the indicated characters, then the
specified information is omitted; otherwise, it is included.
(This is the reverse of the behavior prior to beta 3.)
These characters may be provided as separate parameters,
or all in a single parameter.
public final void parse(java.io.RandomAccessFile raf, RepInfo info) throws java.io.IOException
parse
in interface Module
parse
in class ModuleBase
raf
- A PDF fileinfo
- A clean RepInfo object, which will be modified to hold
the descriptive information
java.io.IOException
public boolean mayBePDFACompliant()
AProfile
profiler makes the final determination.
public PageTreeNode getDocumentTree()
public PdfDictionary getDocInfo()
public PdfDictionary getEncryptionDict()
public boolean getActionsExist()
protected final void initParse()
initParse
in class ModuleBase
protected boolean parseHeader(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean findLastTrailer(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean parseTrailer(RepInfo info, boolean prevOnly) throws java.io.IOException
java.io.IOException
protected boolean readXRefInfo(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readXRefStreams(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readXRefTables(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readDocCatalogDict(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readEncryptDict(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readDocInfoDict(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readDocumentTree(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean readPageLabelTree(RepInfo info)
protected boolean readXMPData(RepInfo info)
protected void findExternalStreams(RepInfo info) throws java.io.IOException
java.io.IOException
protected boolean findFilters(RepInfo info) throws java.io.IOException
false
if the filter structure is
defective.
java.io.IOException
protected java.lang.String extractFilters(Filter[] filters, PdfStream stream)
protected void findImages(RepInfo info) throws java.io.IOException
java.io.IOException
protected int nameToNiso(java.lang.String name, java.lang.String[] nameArray, int[] valArray)
protected void findFonts(RepInfo info) throws java.io.IOException
java.io.IOException
protected java.lang.String addFontToMap(PdfDictionary font)
protected static java.lang.String toHex(java.lang.String s)
protected static java.lang.String toHex(java.util.Vector<java.lang.Integer> v)
public PdfObject resolveIndirectObject(PdfObject indObj) throws PdfException, java.io.IOException
PdfException
java.io.IOException
protected PdfObject getObject(int objIndex, int recGuard) throws PdfException, java.io.IOException
objIndex
- The object number to look uprecGuard
- The maximum permitted number of recursion levels;
no particular value is required, but 30 or more
should avoid false exceptions.
PdfException
java.io.IOException
public java.io.RandomAccessFile getFile()
public PdfDictionary getCatalogDict()
public PdfDictionary getTrailerDict()
public PdfDictionary getViewPrefDict()
public PdfDictionary getOutlineDict()
public java.util.Map<java.lang.Integer,PdfObject> getFontMap(int selector)
public java.util.List<java.util.Map<java.lang.Integer,PdfObject>> getFontMaps()
public NameTreeNode getEmbeddedFiles()
protected void addFontsProperty(java.util.List<Property> metadataList)
protected void addPagesProperty(java.util.List<Property> metadataList, RepInfo info)
protected Property buildPageProperty(PageObject page, int idx, RepInfo info) throws PdfException
PdfException
protected Property buildPageLabelProperty(PageObject page, int pageIndex, int[] nomNumRef) throws PdfException
PdfException
protected Property buildMeasureProperty(PdfDictionary meas)
protected Property buildAnnotProperty(PdfDictionary annot, RepInfo info) throws PdfException
PdfException
protected void addDestination(PdfObject itemObj, java.lang.String propName, java.util.List<Property> propList, RepInfo info) throws PdfException
PdfException
protected Property buildFontProperty(java.lang.String name, java.util.Map map, int fontType)
protected java.util.List<Property> oneFontPropList(PdfDictionary dict, int fontType)
protected Property buildCMapDictProperty(PdfStream encoding)
protected Property buildCIDInfoProperty(PdfDictionary dict)
protected Property buildEncodingDictProperty(PdfDictionary encodingDict)
protected Property buildFontDescriptorProperty(PdfDictionary encodingDict)
protected Property buildViewPrefProperty(PdfDictionary prefDict)
protected boolean isFontSubset(java.lang.String baseStr)
protected Property buildOutlinesProperty(PdfDictionary dict, RepInfo info) throws PdfException
PdfException
protected Property buildOutlineItemProperty(PdfDictionary dict, RepInfo info) throws PdfException
PdfException
protected boolean doOutlineStuff(RepInfo info)
protected int resolveIndirectDest(PdfSimpleObject key) throws PdfException
PdfException
protected Property buildUserPermProperty(int flags, java.lang.String[] flagStrs)
protected void addStringProperty(PdfDictionary dict, java.util.List<Property> propList, java.lang.String key, java.lang.String propName)
protected void addDateProperty(PdfDictionary dict, java.util.List<Property> propList, java.lang.String key, java.lang.String propName) throws PdfException
PdfException
protected Property buildBitmaskProperty(int val, java.lang.String name, java.lang.String[] valueNames, java.lang.String defaultStr)
protected Property makeRectProperty(PdfArray arrObj, java.lang.String name) throws PdfException
PdfException
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |