Class Document (2.52.0)

public final class Document extends GeneratedMessageV3 implements DocumentOrBuilder

Document represents the canonical document resource in Document AI. It is an interchange format that provides insights into documents and allows for collaboration between users and Document AI to iterate and optimize for quality.

Protobuf type google.cloud.documentai.v1beta2.Document

Implements

DocumentOrBuilder

Static Fields

CONTENT_FIELD_NUMBER

public static final int CONTENT_FIELD_NUMBER
Field Value
Type Description
int

ENTITIES_FIELD_NUMBER

public static final int ENTITIES_FIELD_NUMBER
Field Value
Type Description
int

ENTITY_RELATIONS_FIELD_NUMBER

public static final int ENTITY_RELATIONS_FIELD_NUMBER
Field Value
Type Description
int

ERROR_FIELD_NUMBER

public static final int ERROR_FIELD_NUMBER
Field Value
Type Description
int

LABELS_FIELD_NUMBER

public static final int LABELS_FIELD_NUMBER
Field Value
Type Description
int

MIME_TYPE_FIELD_NUMBER

public static final int MIME_TYPE_FIELD_NUMBER
Field Value
Type Description
int

PAGES_FIELD_NUMBER

public static final int PAGES_FIELD_NUMBER
Field Value
Type Description
int

REVISIONS_FIELD_NUMBER

public static final int REVISIONS_FIELD_NUMBER
Field Value
Type Description
int

SHARD_INFO_FIELD_NUMBER

public static final int SHARD_INFO_FIELD_NUMBER
Field Value
Type Description
int

TEXT_CHANGES_FIELD_NUMBER

public static final int TEXT_CHANGES_FIELD_NUMBER
Field Value
Type Description
int

TEXT_FIELD_NUMBER

public static final int TEXT_FIELD_NUMBER
Field Value
Type Description
int

TEXT_STYLES_FIELD_NUMBER

public static final int TEXT_STYLES_FIELD_NUMBER
Field Value
Type Description
int

URI_FIELD_NUMBER

public static final int URI_FIELD_NUMBER
Field Value
Type Description
int

Static Methods

getDefaultInstance()

public static Document getDefaultInstance()
Returns
Type Description
Document

getDescriptor()

public static final Descriptors.Descriptor getDescriptor()
Returns
Type Description
Descriptor

newBuilder()

public static Document.Builder newBuilder()
Returns
Type Description
Document.Builder

newBuilder(Document prototype)

public static Document.Builder newBuilder(Document prototype)
Parameter
Name Description
prototype Document
Returns
Type Description
Document.Builder

parseDelimitedFrom(InputStream input)

public static Document parseDelimitedFrom(InputStream input)
Parameter
Name Description
input InputStream
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
input InputStream
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseFrom(byte[] data)

public static Document parseFrom(byte[] data)
Parameter
Name Description
data byte[]
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
data byte[]
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parseFrom(ByteString data)

public static Document parseFrom(ByteString data)
Parameter
Name Description
data ByteString
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
data ByteString
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parseFrom(CodedInputStream input)

public static Document parseFrom(CodedInputStream input)
Parameter
Name Description
input CodedInputStream
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
input CodedInputStream
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseFrom(InputStream input)

public static Document parseFrom(InputStream input)
Parameter
Name Description
input InputStream
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
input InputStream
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
IOException

parseFrom(ByteBuffer data)

public static Document parseFrom(ByteBuffer data)
Parameter
Name Description
data ByteBuffer
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)

public static Document parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)
Parameters
Name Description
data ByteBuffer
extensionRegistry ExtensionRegistryLite
Returns
Type Description
Document
Exceptions
Type Description
InvalidProtocolBufferException

parser()

public static Parser<Document> parser()
Returns
Type Description
Parser<Document>

Methods

equals(Object obj)

public boolean equals(Object obj)
Parameter
Name Description
obj Object
Returns
Type Description
boolean
Overrides

getContent()

public ByteString getContent()

Optional. Inline document content, represented as a stream of bytes. Note: As with all bytes fields, protobuffers use a pure binary representation, whereas JSON representations use base64.

bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
ByteString

The content.

getDefaultInstanceForType()

public Document getDefaultInstanceForType()
Returns
Type Description
Document

getEntities(int index)

public Document.Entity getEntities(int index)

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1beta2.Document.Entity entities = 7;

Parameter
Name Description
index int
Returns
Type Description
Document.Entity

getEntitiesCount()

public int getEntitiesCount()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1beta2.Document.Entity entities = 7;

Returns
Type Description
int

getEntitiesList()

public List<Document.Entity> getEntitiesList()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1beta2.Document.Entity entities = 7;

Returns
Type Description
List<Entity>

getEntitiesOrBuilder(int index)

public Document.EntityOrBuilder getEntitiesOrBuilder(int index)

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1beta2.Document.Entity entities = 7;

Parameter
Name Description
index int
Returns
Type Description
Document.EntityOrBuilder

getEntitiesOrBuilderList()

public List<? extends Document.EntityOrBuilder> getEntitiesOrBuilderList()

A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.

repeated .google.cloud.documentai.v1beta2.Document.Entity entities = 7;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.EntityOrBuilder>

getEntityRelations(int index)

public Document.EntityRelation getEntityRelations(int index)

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1beta2.Document.EntityRelation entity_relations = 8;

Parameter
Name Description
index int
Returns
Type Description
Document.EntityRelation

getEntityRelationsCount()

public int getEntityRelationsCount()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1beta2.Document.EntityRelation entity_relations = 8;

Returns
Type Description
int

getEntityRelationsList()

public List<Document.EntityRelation> getEntityRelationsList()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1beta2.Document.EntityRelation entity_relations = 8;

Returns
Type Description
List<EntityRelation>

getEntityRelationsOrBuilder(int index)

public Document.EntityRelationOrBuilder getEntityRelationsOrBuilder(int index)

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1beta2.Document.EntityRelation entity_relations = 8;

Parameter
Name Description
index int
Returns
Type Description
Document.EntityRelationOrBuilder

getEntityRelationsOrBuilderList()

public List<? extends Document.EntityRelationOrBuilder> getEntityRelationsOrBuilderList()

Placeholder. Relationship among Document.entities.

repeated .google.cloud.documentai.v1beta2.Document.EntityRelation entity_relations = 8;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.EntityRelationOrBuilder>

getError()

public Status getError()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
Type Description
com.google.rpc.Status

The error.

getErrorOrBuilder()

public StatusOrBuilder getErrorOrBuilder()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
Type Description
com.google.rpc.StatusOrBuilder

getLabels(int index)

public Document.Label getLabels(int index)

Labels for this document.

repeated .google.cloud.documentai.v1beta2.Document.Label labels = 11;

Parameter
Name Description
index int
Returns
Type Description
Document.Label

getLabelsCount()

public int getLabelsCount()

Labels for this document.

repeated .google.cloud.documentai.v1beta2.Document.Label labels = 11;

Returns
Type Description
int

getLabelsList()

public List<Document.Label> getLabelsList()

Labels for this document.

repeated .google.cloud.documentai.v1beta2.Document.Label labels = 11;

Returns
Type Description
List<Label>

getLabelsOrBuilder(int index)

public Document.LabelOrBuilder getLabelsOrBuilder(int index)

Labels for this document.

repeated .google.cloud.documentai.v1beta2.Document.Label labels = 11;

Parameter
Name Description
index int
Returns
Type Description
Document.LabelOrBuilder

getLabelsOrBuilderList()

public List<? extends Document.LabelOrBuilder> getLabelsOrBuilderList()

Labels for this document.

repeated .google.cloud.documentai.v1beta2.Document.Label labels = 11;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.LabelOrBuilder>

getMimeType()

public String getMimeType()

An IANA published media type (MIME type).

string mime_type = 3;

Returns
Type Description
String

The mimeType.

getMimeTypeBytes()

public ByteString getMimeTypeBytes()

An IANA published media type (MIME type).

string mime_type = 3;

Returns
Type Description
ByteString

The bytes for mimeType.

getPages(int index)

public Document.Page getPages(int index)

Visual page layout for the Document.

repeated .google.cloud.documentai.v1beta2.Document.Page pages = 6;

Parameter
Name Description
index int
Returns
Type Description
Document.Page

getPagesCount()

public int getPagesCount()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1beta2.Document.Page pages = 6;

Returns
Type Description
int

getPagesList()

public List<Document.Page> getPagesList()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1beta2.Document.Page pages = 6;

Returns
Type Description
List<Page>

getPagesOrBuilder(int index)

public Document.PageOrBuilder getPagesOrBuilder(int index)

Visual page layout for the Document.

repeated .google.cloud.documentai.v1beta2.Document.Page pages = 6;

Parameter
Name Description
index int
Returns
Type Description
Document.PageOrBuilder

getPagesOrBuilderList()

public List<? extends Document.PageOrBuilder> getPagesOrBuilderList()

Visual page layout for the Document.

repeated .google.cloud.documentai.v1beta2.Document.Page pages = 6;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.PageOrBuilder>

getParserForType()

public Parser<Document> getParserForType()
Returns
Type Description
Parser<Document>
Overrides

getRevisions(int index)

public Document.Revision getRevisions(int index)

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1beta2.Document.Revision revisions = 13;

Parameter
Name Description
index int
Returns
Type Description
Document.Revision

getRevisionsCount()

public int getRevisionsCount()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1beta2.Document.Revision revisions = 13;

Returns
Type Description
int

getRevisionsList()

public List<Document.Revision> getRevisionsList()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1beta2.Document.Revision revisions = 13;

Returns
Type Description
List<Revision>

getRevisionsOrBuilder(int index)

public Document.RevisionOrBuilder getRevisionsOrBuilder(int index)

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1beta2.Document.Revision revisions = 13;

Parameter
Name Description
index int
Returns
Type Description
Document.RevisionOrBuilder

getRevisionsOrBuilderList()

public List<? extends Document.RevisionOrBuilder> getRevisionsOrBuilderList()

Placeholder. Revision history of this document.

repeated .google.cloud.documentai.v1beta2.Document.Revision revisions = 13;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.RevisionOrBuilder>

getSerializedSize()

public int getSerializedSize()
Returns
Type Description
int
Overrides

getShardInfo()

public Document.ShardInfo getShardInfo()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1beta2.Document.ShardInfo shard_info = 9;

Returns
Type Description
Document.ShardInfo

The shardInfo.

getShardInfoOrBuilder()

public Document.ShardInfoOrBuilder getShardInfoOrBuilder()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1beta2.Document.ShardInfo shard_info = 9;

Returns
Type Description
Document.ShardInfoOrBuilder

getSourceCase()

public Document.SourceCase getSourceCase()
Returns
Type Description
Document.SourceCase

getText()

public String getText()

Optional. UTF-8 encoded text in reading order from the document.

string text = 4 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
String

The text.

getTextBytes()

public ByteString getTextBytes()

Optional. UTF-8 encoded text in reading order from the document.

string text = 4 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
ByteString

The bytes for text.

getTextChanges(int index)

public Document.TextChange getTextChanges(int index)

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1beta2.Document.TextChange text_changes = 14;

Parameter
Name Description
index int
Returns
Type Description
Document.TextChange

getTextChangesCount()

public int getTextChangesCount()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1beta2.Document.TextChange text_changes = 14;

Returns
Type Description
int

getTextChangesList()

public List<Document.TextChange> getTextChangesList()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1beta2.Document.TextChange text_changes = 14;

Returns
Type Description
List<TextChange>

getTextChangesOrBuilder(int index)

public Document.TextChangeOrBuilder getTextChangesOrBuilder(int index)

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1beta2.Document.TextChange text_changes = 14;

Parameter
Name Description
index int
Returns
Type Description
Document.TextChangeOrBuilder

getTextChangesOrBuilderList()

public List<? extends Document.TextChangeOrBuilder> getTextChangesOrBuilderList()

Placeholder. A list of text corrections made to Document.text. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other.

repeated .google.cloud.documentai.v1beta2.Document.TextChange text_changes = 14;

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.TextChangeOrBuilder>

getTextStyles(int index) (deprecated)

public Document.Style getTextStyles(int index)

Styles for the Document.text.

repeated .google.cloud.documentai.v1beta2.Document.Style text_styles = 5 [deprecated = true];

Parameter
Name Description
index int
Returns
Type Description
Document.Style

getTextStylesCount() (deprecated)

public int getTextStylesCount()

Styles for the Document.text.

repeated .google.cloud.documentai.v1beta2.Document.Style text_styles = 5 [deprecated = true];

Returns
Type Description
int

getTextStylesList() (deprecated)

public List<Document.Style> getTextStylesList()

Styles for the Document.text.

repeated .google.cloud.documentai.v1beta2.Document.Style text_styles = 5 [deprecated = true];

Returns
Type Description
List<Style>

getTextStylesOrBuilder(int index) (deprecated)

public Document.StyleOrBuilder getTextStylesOrBuilder(int index)

Styles for the Document.text.

repeated .google.cloud.documentai.v1beta2.Document.Style text_styles = 5 [deprecated = true];

Parameter
Name Description
index int
Returns
Type Description
Document.StyleOrBuilder

getTextStylesOrBuilderList() (deprecated)

public List<? extends Document.StyleOrBuilder> getTextStylesOrBuilderList()

Styles for the Document.text.

repeated .google.cloud.documentai.v1beta2.Document.Style text_styles = 5 [deprecated = true];

Returns
Type Description
List<? extends com.google.cloud.documentai.v1beta2.Document.StyleOrBuilder>

getUri()

public String getUri()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
String

The uri.

getUriBytes()

public ByteString getUriBytes()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
ByteString

The bytes for uri.

hasContent()

public boolean hasContent()

Optional. Inline document content, represented as a stream of bytes. Note: As with all bytes fields, protobuffers use a pure binary representation, whereas JSON representations use base64.

bytes content = 2 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
boolean

Whether the content field is set.

hasError()

public boolean hasError()

Any error that occurred while processing this document.

.google.rpc.Status error = 10;

Returns
Type Description
boolean

Whether the error field is set.

hasShardInfo()

public boolean hasShardInfo()

Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified.

.google.cloud.documentai.v1beta2.Document.ShardInfo shard_info = 9;

Returns
Type Description
boolean

Whether the shardInfo field is set.

hasUri()

public boolean hasUri()

Optional. Currently supports Google Cloud Storage URI of the form gs://bucket_name/object_name. Object versioning is not supported. For more information, refer to Google Cloud Storage Request URIs.

string uri = 1 [(.google.api.field_behavior) = OPTIONAL];

Returns
Type Description
boolean

Whether the uri field is set.

hashCode()

public int hashCode()
Returns
Type Description
int
Overrides

internalGetFieldAccessorTable()

protected GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
Returns
Type Description
FieldAccessorTable
Overrides

isInitialized()

public final boolean isInitialized()
Returns
Type Description
boolean
Overrides

newBuilderForType()

public Document.Builder newBuilderForType()
Returns
Type Description
Document.Builder

newBuilderForType(GeneratedMessageV3.BuilderParent parent)

protected Document.Builder newBuilderForType(GeneratedMessageV3.BuilderParent parent)
Parameter
Name Description
parent BuilderParent
Returns
Type Description
Document.Builder
Overrides

newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)

protected Object newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)
Parameter
Name Description
unused UnusedPrivateParameter
Returns
Type Description
Object
Overrides

toBuilder()

public Document.Builder toBuilder()
Returns
Type Description
Document.Builder

writeTo(CodedOutputStream output)

public void writeTo(CodedOutputStream output)
Parameter
Name Description
output CodedOutputStream
Overrides
Exceptions
Type Description
IOException