1 // Copyright 2018 Google LLC.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
18 package google.cloud.vision.v1;
20 import "google/api/annotations.proto";
21 import "google/cloud/vision/v1/geometry.proto";
23 option cc_enable_arenas = true;
24 option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
25 option java_multiple_files = true;
26 option java_outer_classname = "TextAnnotationProto";
27 option java_package = "com.google.cloud.vision.v1";
28 option objc_class_prefix = "GCVN";
30 // TextAnnotation contains a structured representation of OCR extracted text.
31 // The hierarchy of an OCR extracted text structure is like this:
32 // TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
33 // Each structural component, starting from Page, may further have their own
34 // properties. Properties describe detected languages, breaks etc.. Please refer
36 // [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty]
37 // message definition below for more detail.
38 message TextAnnotation {
39 // Detected language for a structural component.
40 message DetectedLanguage {
41 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
43 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
44 string language_code = 1;
46 // Confidence of detected language. Range [0, 1].
50 // Detected start or end of a structural component.
51 message DetectedBreak {
52 // Enum to denote the type of break found. New line, space etc.
54 // Unknown break label type.
60 // Sure space (very wide).
63 // Line-wrapping break.
66 // End-line hyphen that is not present in text; does not co-occur with
67 // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
70 // Line break that ends a paragraph.
74 // Detected break type.
77 // True if break prepends the element.
81 // Additional information detected on the structural component.
82 message TextProperty {
83 // A list of detected languages together with confidence.
84 repeated DetectedLanguage detected_languages = 1;
86 // Detected start or end of a text segment.
87 DetectedBreak detected_break = 2;
90 // List of pages detected by OCR.
91 repeated Page pages = 1;
93 // UTF-8 text detected on the pages.
97 // Detected page from OCR.
99 // Additional information detected on the page.
100 TextAnnotation.TextProperty property = 1;
102 // Page width. For PDFs the unit is points. For images (including
103 // TIFFs) the unit is pixels.
106 // Page height. For PDFs the unit is points. For images (including
107 // TIFFs) the unit is pixels.
110 // List of blocks of text, images etc on this page.
111 repeated Block blocks = 4;
113 // Confidence of the OCR results on the page. Range [0, 1].
114 float confidence = 5;
117 // Logical element on the page.
119 // Type of a block (text, image etc) as identified by OCR.
121 // Unknown block type.
124 // Regular text block.
133 // Horizontal/vertical line box.
140 // Additional information detected for the block.
141 TextAnnotation.TextProperty property = 1;
143 // The bounding box for the block.
144 // The vertices are in the order of top-left, top-right, bottom-right,
145 // bottom-left. When a rotation of the bounding box is detected the rotation
146 // is represented as around the top-left corner as defined when the text is
147 // read in the 'natural' orientation.
150 // * when the text is horizontal it might look like:
156 // * when it's rotated 180 degrees around the top-left corner it becomes:
162 // and the vertex order will still be (0, 1, 2, 3).
163 BoundingPoly bounding_box = 2;
165 // List of paragraphs in this block (if this blocks is of type text).
166 repeated Paragraph paragraphs = 3;
168 // Detected block type (text, image etc) for this block.
169 BlockType block_type = 4;
171 // Confidence of the OCR results on the block. Range [0, 1].
172 float confidence = 5;
175 // Structural unit of text representing a number of words in certain order.
177 // Additional information detected for the paragraph.
178 TextAnnotation.TextProperty property = 1;
180 // The bounding box for the paragraph.
181 // The vertices are in the order of top-left, top-right, bottom-right,
182 // bottom-left. When a rotation of the bounding box is detected the rotation
183 // is represented as around the top-left corner as defined when the text is
184 // read in the 'natural' orientation.
186 // * when the text is horizontal it might look like:
190 // * when it's rotated 180 degrees around the top-left corner it becomes:
194 // and the vertex order will still be (0, 1, 2, 3).
195 BoundingPoly bounding_box = 2;
197 // List of words in this paragraph.
198 repeated Word words = 3;
200 // Confidence of the OCR results for the paragraph. Range [0, 1].
201 float confidence = 4;
204 // A word representation.
206 // Additional information detected for the word.
207 TextAnnotation.TextProperty property = 1;
209 // The bounding box for the word.
210 // The vertices are in the order of top-left, top-right, bottom-right,
211 // bottom-left. When a rotation of the bounding box is detected the rotation
212 // is represented as around the top-left corner as defined when the text is
213 // read in the 'natural' orientation.
215 // * when the text is horizontal it might look like:
219 // * when it's rotated 180 degrees around the top-left corner it becomes:
223 // and the vertex order will still be (0, 1, 2, 3).
224 BoundingPoly bounding_box = 2;
226 // List of symbols in the word.
227 // The order of the symbols follows the natural reading order.
228 repeated Symbol symbols = 3;
230 // Confidence of the OCR results for the word. Range [0, 1].
231 float confidence = 4;
234 // A single symbol representation.
236 // Additional information detected for the symbol.
237 TextAnnotation.TextProperty property = 1;
239 // The bounding box for the symbol.
240 // The vertices are in the order of top-left, top-right, bottom-right,
241 // bottom-left. When a rotation of the bounding box is detected the rotation
242 // is represented as around the top-left corner as defined when the text is
243 // read in the 'natural' orientation.
245 // * when the text is horizontal it might look like:
249 // * when it's rotated 180 degrees around the top-left corner it becomes:
253 // and the vertice order will still be (0, 1, 2, 3).
254 BoundingPoly bounding_box = 2;
256 // The actual UTF-8 representation of the symbol.
259 // Confidence of the OCR results for the symbol. Range [0, 1].
260 float confidence = 4;