push all website files

This commit is contained in:
Jacob Levine
2019-01-06 13:14:45 -06:00
parent d7301e26c3
commit d2d5d4c04e
15662 changed files with 2166516 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1;
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
option java_multiple_files = true;
option java_outer_classname = "GeometryProto";
option java_package = "com.google.cloud.vision.v1";
// A vertex represents a 2D point in the image.
// NOTE: the vertex coordinates are in the same scale as the original image.
message Vertex {
// X coordinate.
int32 x = 1;
// Y coordinate.
int32 y = 2;
}
// A vertex represents a 2D point in the image.
// NOTE: the normalized vertex coordinates are relative to the original image
// and range from 0 to 1.
message NormalizedVertex {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
};
// A bounding polygon for the detected image annotation.
message BoundingPoly {
// The bounding polygon vertices.
repeated Vertex vertices = 1;
// The bounding polygon normalized vertices.
repeated NormalizedVertex normalized_vertices = 2;
}
// A 3D position in the image, used primarily for Face detection landmarks.
// A valid Position must have both x and y coordinates.
// The position coordinates are in the same scale as the original image.
message Position {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
// Z coordinate (or depth).
float z = 3;
}

View File

@@ -0,0 +1,766 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1/geometry.proto";
import "google/cloud/vision/v1/text_annotation.proto";
import "google/cloud/vision/v1/web_detection.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1";
// Service that performs Google Cloud Vision API detection tasks over client
// images, such as face, landmark, logo, label, and text detection. The
// ImageAnnotator service returns detected entities from the images.
service ImageAnnotator {
// Run image detection and annotation for a batch of images.
rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
option (google.api.http) = {
post: "/v1/images:annotate"
body: "*"
};
}
// Run asynchronous image detection and annotation for a list of generic
// files, such as PDF files, which may contain multiple pages and multiple
// images per page. Progress and results can be retrieved through the
// `google.longrunning.Operations` interface.
// `Operation.metadata` contains `OperationMetadata` (metadata).
// `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/files:asyncBatchAnnotate"
body: "*"
};
}
}
// The type of Google Cloud Vision API detection to perform, and the maximum
// number of results to return for that type. Multiple `Feature` objects can
// be specified in the `features` list.
message Feature {
// Type of Google Cloud Vision API feature to be extracted.
enum Type {
// Unspecified feature type.
TYPE_UNSPECIFIED = 0;
// Run face detection.
FACE_DETECTION = 1;
// Run landmark detection.
LANDMARK_DETECTION = 2;
// Run logo detection.
LOGO_DETECTION = 3;
// Run label detection.
LABEL_DETECTION = 4;
// Run text detection / optical character recognition (OCR). Text detection
// is optimized for areas of text within a larger image; if the image is
// a document, use `DOCUMENT_TEXT_DETECTION` instead.
TEXT_DETECTION = 5;
// Run dense text document OCR. Takes precedence when both
// `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
DOCUMENT_TEXT_DETECTION = 11;
// Run Safe Search to detect potentially unsafe
// or undesirable content.
SAFE_SEARCH_DETECTION = 6;
// Compute a set of image properties, such as the
// image's dominant colors.
IMAGE_PROPERTIES = 7;
// Run crop hints.
CROP_HINTS = 9;
// Run web detection.
WEB_DETECTION = 10;
}
// The feature type.
Type type = 1;
// Maximum number of results of this type. Does not apply to
// `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
int32 max_results = 2;
// Model to use for the feature.
// Supported values: "builtin/stable" (the default if unset) and
// "builtin/latest".
string model = 3;
}
// External image source (Google Cloud Storage or web URL image location).
message ImageSource {
// **Use `image_uri` instead.**
//
// The Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported. See
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
string gcs_image_uri = 1;
// The URI of the source image. Can be either:
//
// 1. A Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported. See
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
//
// 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
// HTTP/HTTPS URLs, Google cannot guarantee that the request will be
// completed. Your request may fail if the specified host denies the
// request (e.g. due to request throttling or DOS prevention), or if Google
// throttles requests to the site for abuse prevention. You should not
// depend on externally-hosted images for production applications.
//
// When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
// precedence.
string image_uri = 2;
}
// Client image to perform Google Cloud Vision API tasks over.
message Image {
// Image content, represented as a stream of bytes.
// Note: As with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 1;
// Google Cloud Storage image location, or publicly-accessible image
// URL. If both `content` and `source` are provided for an image, `content`
// takes precedence and is used to perform the image annotation request.
ImageSource source = 2;
}
// A face annotation object contains the results of face detection.
message FaceAnnotation {
// A face-specific landmark (for example, a face feature).
message Landmark {
// Face landmark (feature) type.
// Left and right are defined from the vantage of the viewer of the image
// without considering mirror projections typical of photos. So, `LEFT_EYE`,
// typically, is the person's right eye.
enum Type {
// Unknown face landmark detected. Should not be filled.
UNKNOWN_LANDMARK = 0;
// Left eye.
LEFT_EYE = 1;
// Right eye.
RIGHT_EYE = 2;
// Left of left eyebrow.
LEFT_OF_LEFT_EYEBROW = 3;
// Right of left eyebrow.
RIGHT_OF_LEFT_EYEBROW = 4;
// Left of right eyebrow.
LEFT_OF_RIGHT_EYEBROW = 5;
// Right of right eyebrow.
RIGHT_OF_RIGHT_EYEBROW = 6;
// Midpoint between eyes.
MIDPOINT_BETWEEN_EYES = 7;
// Nose tip.
NOSE_TIP = 8;
// Upper lip.
UPPER_LIP = 9;
// Lower lip.
LOWER_LIP = 10;
// Mouth left.
MOUTH_LEFT = 11;
// Mouth right.
MOUTH_RIGHT = 12;
// Mouth center.
MOUTH_CENTER = 13;
// Nose, bottom right.
NOSE_BOTTOM_RIGHT = 14;
// Nose, bottom left.
NOSE_BOTTOM_LEFT = 15;
// Nose, bottom center.
NOSE_BOTTOM_CENTER = 16;
// Left eye, top boundary.
LEFT_EYE_TOP_BOUNDARY = 17;
// Left eye, right corner.
LEFT_EYE_RIGHT_CORNER = 18;
// Left eye, bottom boundary.
LEFT_EYE_BOTTOM_BOUNDARY = 19;
// Left eye, left corner.
LEFT_EYE_LEFT_CORNER = 20;
// Right eye, top boundary.
RIGHT_EYE_TOP_BOUNDARY = 21;
// Right eye, right corner.
RIGHT_EYE_RIGHT_CORNER = 22;
// Right eye, bottom boundary.
RIGHT_EYE_BOTTOM_BOUNDARY = 23;
// Right eye, left corner.
RIGHT_EYE_LEFT_CORNER = 24;
// Left eyebrow, upper midpoint.
LEFT_EYEBROW_UPPER_MIDPOINT = 25;
// Right eyebrow, upper midpoint.
RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
// Left ear tragion.
LEFT_EAR_TRAGION = 27;
// Right ear tragion.
RIGHT_EAR_TRAGION = 28;
// Left eye pupil.
LEFT_EYE_PUPIL = 29;
// Right eye pupil.
RIGHT_EYE_PUPIL = 30;
// Forehead glabella.
FOREHEAD_GLABELLA = 31;
// Chin gnathion.
CHIN_GNATHION = 32;
// Chin left gonion.
CHIN_LEFT_GONION = 33;
// Chin right gonion.
CHIN_RIGHT_GONION = 34;
}
// Face landmark type.
Type type = 3;
// Face landmark position.
Position position = 4;
}
// The bounding polygon around the face. The coordinates of the bounding box
// are in the original image's scale, as returned in `ImageParams`.
// The bounding box is computed to "frame" the face in accordance with human
// expectations. It is based on the landmarker results.
// Note that one or more x and/or y coordinates may not be generated in the
// `BoundingPoly` (the polygon will be unbounded) if only a partial face
// appears in the image to be annotated.
BoundingPoly bounding_poly = 1;
// The `fd_bounding_poly` bounding polygon is tighter than the
// `boundingPoly`, and encloses only the skin part of the face. Typically, it
// is used to eliminate the face from any image analysis that detects the
// "amount of skin" visible in an image. It is not based on the
// landmarker results, only on the initial face detection, hence
// the <code>fd</code> (face detection) prefix.
BoundingPoly fd_bounding_poly = 2;
// Detected face landmarks.
repeated Landmark landmarks = 3;
// Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
// of the face relative to the image vertical about the axis perpendicular to
// the face. Range [-180,180].
float roll_angle = 4;
// Yaw angle, which indicates the leftward/rightward angle that the face is
// pointing relative to the vertical plane perpendicular to the image. Range
// [-180,180].
float pan_angle = 5;
// Pitch angle, which indicates the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
float tilt_angle = 6;
// Detection confidence. Range [0, 1].
float detection_confidence = 7;
// Face landmarking confidence. Range [0, 1].
float landmarking_confidence = 8;
// Joy likelihood.
Likelihood joy_likelihood = 9;
// Sorrow likelihood.
Likelihood sorrow_likelihood = 10;
// Anger likelihood.
Likelihood anger_likelihood = 11;
// Surprise likelihood.
Likelihood surprise_likelihood = 12;
// Under-exposed likelihood.
Likelihood under_exposed_likelihood = 13;
// Blurred likelihood.
Likelihood blurred_likelihood = 14;
// Headwear likelihood.
Likelihood headwear_likelihood = 15;
}
// Detected entity location information.
message LocationInfo {
// lat/long location coordinates.
google.type.LatLng lat_lng = 1;
}
// A `Property` consists of a user-supplied name/value pair.
message Property {
// Name of the property.
string name = 1;
// Value of the property.
string value = 2;
// Value of numeric properties.
uint64 uint64_value = 3;
}
// Set of detected entity features.
message EntityAnnotation {
// Opaque entity ID. Some IDs may be available in
// [Google Knowledge Graph Search
// API](https://developers.google.com/knowledge-graph/).
string mid = 1;
// The language code for the locale in which the entity textual
// `description` is expressed.
string locale = 2;
// Entity textual description, expressed in its `locale` language.
string description = 3;
// Overall score of the result. Range [0, 1].
float score = 4;
// **Deprecated. Use `score` instead.**
// The accuracy of the entity detection in an image.
// For example, for an image in which the "Eiffel Tower" entity is detected,
// this field represents the confidence that there is a tower in the query
// image. Range [0, 1].
float confidence = 5;
// The relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of "tower" is likely higher to an image
// containing the detected "Eiffel Tower" than to an image containing a
// detected distant towering building, even though the confidence that
// there is a tower in each image may be the same. Range [0, 1].
float topicality = 6;
// Image region to which this entity belongs. Not produced
// for `LABEL_DETECTION` features.
BoundingPoly bounding_poly = 7;
// The location information for the detected entity. Multiple
// `LocationInfo` elements can be present because one location may
// indicate the location of the scene in the image, and another location
// may indicate the location of the place where the image was taken.
// Location information is usually present for landmarks.
repeated LocationInfo locations = 8;
// Some entities may have optional user-supplied `Property` (name/value)
// fields, such a score or string that qualifies the entity.
repeated Property properties = 9;
}
// Set of features pertaining to the image, computed by computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
// Represents the adult content likelihood for the image. Adult content may
// contain elements such as nudity, pornographic images or cartoons, or
// sexual activities.
Likelihood adult = 1;
// Spoof likelihood. The likelihood that an modification
// was made to the image's canonical version to make it appear
// funny or offensive.
Likelihood spoof = 2;
// Likelihood that this is a medical image.
Likelihood medical = 3;
// Likelihood that this image contains violent content.
Likelihood violence = 4;
// Likelihood that the request image contains racy content. Racy content may
// include (but is not limited to) skimpy or sheer clothing, strategically
// covered nudity, lewd or provocative poses, or close-ups of sensitive
// body areas.
Likelihood racy = 9;
}
// Rectangle determined by min and max `LatLng` pairs.
message LatLongRect {
// Min lat/long pair.
google.type.LatLng min_lat_lng = 1;
// Max lat/long pair.
google.type.LatLng max_lat_lng = 2;
}
// Color information consists of RGB channels, score, and the fraction of
// the image that the color occupies in the image.
message ColorInfo {
// RGB components of the color.
google.type.Color color = 1;
// Image-specific score for this color. Value in range [0, 1].
float score = 2;
// The fraction of pixels the color occupies in the image.
// Value in range [0, 1].
float pixel_fraction = 3;
}
// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
// RGB color values with their score and pixel fraction.
repeated ColorInfo colors = 1;
}
// Stores image properties, such as dominant colors.
message ImageProperties {
// If present, dominant colors completed successfully.
DominantColorsAnnotation dominant_colors = 1;
}
// Single crop hint that is used to generate a new crop when serving an image.
message CropHint {
// The bounding polygon for the crop region. The coordinates of the bounding
// box are in the original image's scale, as returned in `ImageParams`.
BoundingPoly bounding_poly = 1;
// Confidence of this being a salient region. Range [0, 1].
float confidence = 2;
// Fraction of importance of this salient region with respect to the original
// image.
float importance_fraction = 3;
}
// Set of crop hints that are used to generate new crops when serving images.
message CropHintsAnnotation {
// Crop hint results.
repeated CropHint crop_hints = 1;
}
// Parameters for crop hints annotation request.
message CropHintsParams {
// Aspect ratios in floats, representing the ratio of the width to the height
// of the image. For example, if the desired aspect ratio is 4/3, the
// corresponding float value should be 1.33333. If not specified, the
// best possible crop is returned. The number of provided aspect ratios is
// limited to a maximum of 16; any aspect ratios provided after the 16th are
// ignored.
repeated float aspect_ratios = 1;
}
// Parameters for web detection request.
message WebDetectionParams {
// Whether to include results derived from the geo information in the image.
bool include_geo_results = 2;
}
// Image context and/or feature-specific parameters.
message ImageContext {
// Not used.
LatLongRect lat_long_rect = 1;
// List of languages to use for TEXT_DETECTION. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Text detection returns an
// error if one or more of the specified languages is not one of the
// [supported languages](/vision/docs/languages).
repeated string language_hints = 2;
// Parameters for crop hints annotation request.
CropHintsParams crop_hints_params = 4;
// Parameters for web detection.
WebDetectionParams web_detection_params = 6;
}
// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features.
message AnnotateImageRequest {
// The image to be processed.
Image image = 1;
// Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image.
ImageContext image_context = 3;
}
// If an image was produced from a file (e.g. a PDF), this message gives
// information about the source of that image.
message ImageAnnotationContext {
// The URI of the file used to produce the image.
string uri = 1;
// If the file was a PDF or TIFF, this field gives the page number within
// the file used to produce the image.
int32 page_number = 2;
}
// Response to an image annotation request.
message AnnotateImageResponse {
// If present, face detection has completed successfully.
repeated FaceAnnotation face_annotations = 1;
// If present, landmark detection has completed successfully.
repeated EntityAnnotation landmark_annotations = 2;
// If present, logo detection has completed successfully.
repeated EntityAnnotation logo_annotations = 3;
// If present, label detection has completed successfully.
repeated EntityAnnotation label_annotations = 4;
// If present, text (OCR) detection has completed successfully.
repeated EntityAnnotation text_annotations = 5;
// If present, text (OCR) detection or document (OCR) text detection has
// completed successfully.
// This annotation provides the structural hierarchy for the OCR detected
// text.
TextAnnotation full_text_annotation = 12;
// If present, safe-search annotation has completed successfully.
SafeSearchAnnotation safe_search_annotation = 6;
// If present, image properties were extracted successfully.
ImageProperties image_properties_annotation = 8;
// If present, crop hints have completed successfully.
CropHintsAnnotation crop_hints_annotation = 11;
// If present, web detection has completed successfully.
WebDetection web_detection = 13;
// If set, represents the error message for the operation.
// Note that filled-in image annotations are guaranteed to be
// correct, even when `error` is set.
google.rpc.Status error = 9;
// If present, contextual information is needed to understand where this image
// comes from.
ImageAnnotationContext context = 21;
}
// Response to a single file annotation request. A file may contain one or more
// images, which individually have their own responses.
message AnnotateFileResponse {
// Information about the file for which this response is generated.
InputConfig input_config = 1;
// Individual responses to images found within the file.
repeated AnnotateImageResponse responses = 2;
}
// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
// Individual image annotation requests for this batch.
repeated AnnotateImageRequest requests = 1;
}
// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
// Individual responses to image annotation requests within the batch.
repeated AnnotateImageResponse responses = 1;
}
// An offline file annotation request.
message AsyncAnnotateFileRequest {
// Required. Information about the input file.
InputConfig input_config = 1;
// Required. Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image(s) in the file.
ImageContext image_context = 3;
// Required. The desired output location and metadata (e.g. format).
OutputConfig output_config = 4;
}
// The response for a single offline file annotation request.
message AsyncAnnotateFileResponse {
// The output location and metadata from AsyncAnnotateFileRequest.
OutputConfig output_config = 1;
}
// Multiple async file annotation requests are batched into a single service
// call.
message AsyncBatchAnnotateFilesRequest {
// Individual async file annotation requests for this batch.
repeated AsyncAnnotateFileRequest requests = 1;
}
// Response to an async batch file annotation request.
message AsyncBatchAnnotateFilesResponse {
// The list of file annotation responses, one for each request in
// AsyncBatchAnnotateFilesRequest.
repeated AsyncAnnotateFileResponse responses = 1;
}
// The desired input location and metadata.
message InputConfig {
// The Google Cloud Storage location to read the input from.
GcsSource gcs_source = 1;
// The type of the file. Currently only "application/pdf" and "image/tiff"
// are supported. Wildcards are not supported.
string mime_type = 2;
}
// The desired output location and metadata.
message OutputConfig {
// The Google Cloud Storage location to write the output(s) to.
GcsDestination gcs_destination = 1;
// The max number of response protos to put into each output JSON file on
// Google Cloud Storage.
// The valid range is [1, 100]. If not specified, the default value is 20.
//
// For example, for one pdf file with 100 pages, 100 response protos will
// be generated. If `batch_size` = 20, then 5 json files each
// containing 20 response protos will be written under the prefix
// `gcs_destination`.`uri`.
//
// Currently, batch_size only applies to GcsDestination, with potential future
// support for other output configurations.
int32 batch_size = 2;
}
// The Google Cloud Storage location where the input will be read from.
message GcsSource {
// Google Cloud Storage URI for the input file. This must only be a
// Google Cloud Storage object. Wildcards are not currently supported.
string uri = 1;
}
// The Google Cloud Storage location where the output will be written to.
message GcsDestination {
// Google Cloud Storage URI where the results will be stored. Results will
// be in JSON format and preceded by its corresponding input URI. This field
// can either represent a single file, or a prefix for multiple outputs.
// Prefixes must end in a `/`.
//
// Examples:
//
// * File: gs://bucket-name/filename.json
// * Prefix: gs://bucket-name/prefix/here/
// * File: gs://bucket-name/prefix/here
//
// If multiple outputs, each response is still AnnotateFileResponse, each of
// which contains some subset of the full list of AnnotateImageResponse.
// Multiple outputs can happen if, for example, the output JSON is too large
// and overflows into multiple sharded files.
string uri = 1;
}
// Contains metadata for the BatchAnnotateImages operation.
message OperationMetadata {
// Batch operation states.
enum State {
// Invalid.
STATE_UNSPECIFIED = 0;
// Request is received.
CREATED = 1;
// Request is actively being processed.
RUNNING = 2;
// The batch processing is done.
DONE = 3;
// The batch processing was cancelled.
CANCELLED = 4;
}
// Current state of the batch operation.
State state = 1;
// The time when the batch request was received.
google.protobuf.Timestamp create_time = 5;
// The time when the operation result was last updated.
google.protobuf.Timestamp update_time = 6;
}
// A bucketized representation of likelihood, which is intended to give clients
// highly stable results across model upgrades.
enum Likelihood {
// Unknown likelihood.
UNKNOWN = 0;
// It is very unlikely that the image belongs to the specified vertical.
VERY_UNLIKELY = 1;
// It is unlikely that the image belongs to the specified vertical.
UNLIKELY = 2;
// It is possible that the image belongs to the specified vertical.
POSSIBLE = 3;
// It is likely that the image belongs to the specified vertical.
LIKELY = 4;
// It is very likely that the image belongs to the specified vertical.
VERY_LIKELY = 5;
}

View File

@@ -0,0 +1,259 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1/geometry.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
option java_multiple_files = true;
option java_outer_classname = "TextAnnotationProto";
option java_package = "com.google.cloud.vision.v1";
// TextAnnotation contains a structured representation of OCR extracted text.
// The hierarchy of an OCR extracted text structure is like this:
// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
// Each structural component, starting from Page, may further have their own
// properties. Properties describe detected languages, breaks etc.. Please refer
// to the [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty] message definition below for more
// detail.
message TextAnnotation {
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;
// Confidence of detected language. Range [0, 1].
float confidence = 2;
}
// Detected start or end of a structural component.
message DetectedBreak {
// Enum to denote the type of break found. New line, space etc.
enum BreakType {
// Unknown break label type.
UNKNOWN = 0;
// Regular space.
SPACE = 1;
// Sure space (very wide).
SURE_SPACE = 2;
// Line-wrapping break.
EOL_SURE_SPACE = 3;
// End-line hyphen that is not present in text; does not co-occur with
// `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
HYPHEN = 4;
// Line break that ends a paragraph.
LINE_BREAK = 5;
}
// Detected break type.
BreakType type = 1;
// True if break prepends the element.
bool is_prefix = 2;
}
// Additional information detected on the structural component.
message TextProperty {
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 1;
// Detected start or end of a text segment.
DetectedBreak detected_break = 2;
}
// List of pages detected by OCR.
repeated Page pages = 1;
// UTF-8 text detected on the pages.
string text = 2;
}
// Detected page from OCR.
message Page {
// Additional information detected on the page.
TextAnnotation.TextProperty property = 1;
// Page width. For PDFs the unit is points. For images (including
// TIFFs) the unit is pixels.
int32 width = 2;
// Page height. For PDFs the unit is points. For images (including
// TIFFs) the unit is pixels.
int32 height = 3;
// List of blocks of text, images etc on this page.
repeated Block blocks = 4;
// Confidence of the OCR results on the page. Range [0, 1].
float confidence = 5;
}
// Logical element on the page.
message Block {
// Type of a block (text, image etc) as identified by OCR.
enum BlockType {
// Unknown block type.
UNKNOWN = 0;
// Regular text block.
TEXT = 1;
// Table block.
TABLE = 2;
// Image block.
PICTURE = 3;
// Horizontal/vertical line box.
RULER = 4;
// Barcode block.
BARCODE = 5;
}
// Additional information detected for the block.
TextAnnotation.TextProperty property = 1;
// The bounding box for the block.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
//
// * when the text is horizontal it might look like:
//
// 0----1
// | |
// 3----2
//
// * when it's rotated 180 degrees around the top-left corner it becomes:
//
// 2----3
// | |
// 1----0
//
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of paragraphs in this block (if this blocks is of type text).
repeated Paragraph paragraphs = 3;
// Detected block type (text, image etc) for this block.
BlockType block_type = 4;
// Confidence of the OCR results on the block. Range [0, 1].
float confidence = 5;
}
// Structural unit of text representing a number of words in certain order.
message Paragraph {
// Additional information detected for the paragraph.
TextAnnotation.TextProperty property = 1;
// The bounding box for the paragraph.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of words in this paragraph.
repeated Word words = 3;
// Confidence of the OCR results for the paragraph. Range [0, 1].
float confidence = 4;
}
// A word representation.
message Word {
// Additional information detected for the word.
TextAnnotation.TextProperty property = 1;
// The bounding box for the word.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of symbols in the word.
// The order of the symbols follows the natural reading order.
repeated Symbol symbols = 3;
// Confidence of the OCR results for the word. Range [0, 1].
float confidence = 4;
}
// A single symbol representation.
message Symbol {
// Additional information detected for the symbol.
TextAnnotation.TextProperty property = 1;
// The bounding box for the symbol.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// The actual UTF-8 representation of the symbol.
string text = 3;
// Confidence of the OCR results for the symbol. Range [0, 1].
float confidence = 4;
}

View File

@@ -0,0 +1,105 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
option java_multiple_files = true;
option java_outer_classname = "WebDetectionProto";
option java_package = "com.google.cloud.vision.v1";
// Relevant information for the image from the Internet.
message WebDetection {
// Entity deduced from similar images on the Internet.
message WebEntity {
// Opaque entity ID.
string entity_id = 1;
// Overall relevancy score for the entity.
// Not normalized and not comparable across different image queries.
float score = 2;
// Canonical description of the entity, in English.
string description = 3;
}
// Metadata for online images.
message WebImage {
// The result image URL.
string url = 1;
// (Deprecated) Overall relevancy score for the image.
float score = 2;
}
// Metadata for web pages.
message WebPage {
// The result web page URL.
string url = 1;
// (Deprecated) Overall relevancy score for the web page.
float score = 2;
// Title for the web page, may contain HTML markups.
string page_title = 3;
// Fully matching images on the page.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 4;
// Partial matching images on the page.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its
// crops.
repeated WebImage partial_matching_images = 5;
}
// Label to provide extra metadata for the web detection.
message WebLabel {
// Label for extra metadata.
string label = 1;
// The BCP-47 language code for `label`, such as "en-US" or "sr-Latn".
// For more information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 2;
}
// Deduced entities from similar images on the Internet.
repeated WebEntity web_entities = 1;
// Fully matching images from the Internet.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 2;
// Partial matching images from the Internet.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its crops.
repeated WebImage partial_matching_images = 3;
// Web pages containing the matching images from the Internet.
repeated WebPage pages_with_matching_images = 4;
// The visually similar image results.
repeated WebImage visually_similar_images = 6;
// Best guess text labels for the request image.
repeated WebLabel best_guess_labels = 8;
}

View File

@@ -0,0 +1,53 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p1beta1;
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p1beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "GeometryProto";
option java_package = "com.google.cloud.vision.v1p1beta1";
// A vertex represents a 2D point in the image.
// NOTE: the vertex coordinates are in the same scale as the original image.
message Vertex {
// X coordinate.
int32 x = 1;
// Y coordinate.
int32 y = 2;
}
// A bounding polygon for the detected image annotation.
message BoundingPoly {
// The bounding polygon vertices.
repeated Vertex vertices = 1;
}
// A 3D position in the image, used primarily for Face detection landmarks.
// A valid Position must have both x and y coordinates.
// The position coordinates are in the same scale as the original image.
message Position {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
// Z coordinate (or depth).
float z = 3;
}

View File

@@ -0,0 +1,591 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p1beta1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1p1beta1/geometry.proto";
import "google/cloud/vision/v1p1beta1/text_annotation.proto";
import "google/cloud/vision/v1p1beta1/web_detection.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p1beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1p1beta1";
// Service that performs Google Cloud Vision API detection tasks over client
// images, such as face, landmark, logo, label, and text detection. The
// ImageAnnotator service returns detected entities from the images.
service ImageAnnotator {
// Run image detection and annotation for a batch of images.
rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
returns (BatchAnnotateImagesResponse) {
option (google.api.http) = {
post: "/v1p1beta1/images:annotate"
body: "*"
};
}
}
// Users describe the type of Google Cloud Vision API tasks to perform over
// images by using *Feature*s. Each Feature indicates a type of image
// detection task to perform. Features encode the Cloud Vision API
// vertical to operate on and the number of top-scoring results to return.
message Feature {
// Type of image feature.
enum Type {
// Unspecified feature type.
TYPE_UNSPECIFIED = 0;
// Run face detection.
FACE_DETECTION = 1;
// Run landmark detection.
LANDMARK_DETECTION = 2;
// Run logo detection.
LOGO_DETECTION = 3;
// Run label detection.
LABEL_DETECTION = 4;
// Run OCR.
TEXT_DETECTION = 5;
// Run dense text document OCR. Takes precedence when both
// DOCUMENT_TEXT_DETECTION and TEXT_DETECTION are present.
DOCUMENT_TEXT_DETECTION = 11;
// Run computer vision models to compute image safe-search properties.
SAFE_SEARCH_DETECTION = 6;
// Compute a set of image properties, such as the image's dominant colors.
IMAGE_PROPERTIES = 7;
// Run crop hints.
CROP_HINTS = 9;
// Run web detection.
WEB_DETECTION = 10;
}
// The feature type.
Type type = 1;
// Maximum number of results of this type.
int32 max_results = 2;
// Model to use for the feature.
// Supported values: "builtin/stable" (the default if unset) and
// "builtin/latest".
string model = 3;
}
// External image source (Google Cloud Storage image location).
message ImageSource {
// NOTE: For new code `image_uri` below is preferred.
// Google Cloud Storage image URI, which must be in the following form:
// `gs://bucket_name/object_name` (for details, see
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris)).
// NOTE: Cloud Storage object versioning is not supported.
string gcs_image_uri = 1;
// Image URI which supports:
// 1) Google Cloud Storage image URI, which must be in the following form:
// `gs://bucket_name/object_name` (for details, see
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris)).
// NOTE: Cloud Storage object versioning is not supported.
// 2) Publicly accessible image HTTP/HTTPS URL.
// This is preferred over the legacy `gcs_image_uri` above. When both
// `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
// precedence.
string image_uri = 2;
}
// Client image to perform Google Cloud Vision API tasks over.
message Image {
// Image content, represented as a stream of bytes.
// Note: as with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 1;
// Google Cloud Storage image location. If both `content` and `source`
// are provided for an image, `content` takes precedence and is
// used to perform the image annotation request.
ImageSource source = 2;
}
// A face annotation object contains the results of face detection.
message FaceAnnotation {
// A face-specific landmark (for example, a face feature).
message Landmark {
// Face landmark (feature) type.
// Left and right are defined from the vantage of the viewer of the image
// without considering mirror projections typical of photos. So, `LEFT_EYE`,
// typically, is the person's right eye.
enum Type {
// Unknown face landmark detected. Should not be filled.
UNKNOWN_LANDMARK = 0;
// Left eye.
LEFT_EYE = 1;
// Right eye.
RIGHT_EYE = 2;
// Left of left eyebrow.
LEFT_OF_LEFT_EYEBROW = 3;
// Right of left eyebrow.
RIGHT_OF_LEFT_EYEBROW = 4;
// Left of right eyebrow.
LEFT_OF_RIGHT_EYEBROW = 5;
// Right of right eyebrow.
RIGHT_OF_RIGHT_EYEBROW = 6;
// Midpoint between eyes.
MIDPOINT_BETWEEN_EYES = 7;
// Nose tip.
NOSE_TIP = 8;
// Upper lip.
UPPER_LIP = 9;
// Lower lip.
LOWER_LIP = 10;
// Mouth left.
MOUTH_LEFT = 11;
// Mouth right.
MOUTH_RIGHT = 12;
// Mouth center.
MOUTH_CENTER = 13;
// Nose, bottom right.
NOSE_BOTTOM_RIGHT = 14;
// Nose, bottom left.
NOSE_BOTTOM_LEFT = 15;
// Nose, bottom center.
NOSE_BOTTOM_CENTER = 16;
// Left eye, top boundary.
LEFT_EYE_TOP_BOUNDARY = 17;
// Left eye, right corner.
LEFT_EYE_RIGHT_CORNER = 18;
// Left eye, bottom boundary.
LEFT_EYE_BOTTOM_BOUNDARY = 19;
// Left eye, left corner.
LEFT_EYE_LEFT_CORNER = 20;
// Right eye, top boundary.
RIGHT_EYE_TOP_BOUNDARY = 21;
// Right eye, right corner.
RIGHT_EYE_RIGHT_CORNER = 22;
// Right eye, bottom boundary.
RIGHT_EYE_BOTTOM_BOUNDARY = 23;
// Right eye, left corner.
RIGHT_EYE_LEFT_CORNER = 24;
// Left eyebrow, upper midpoint.
LEFT_EYEBROW_UPPER_MIDPOINT = 25;
// Right eyebrow, upper midpoint.
RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
// Left ear tragion.
LEFT_EAR_TRAGION = 27;
// Right ear tragion.
RIGHT_EAR_TRAGION = 28;
// Left eye pupil.
LEFT_EYE_PUPIL = 29;
// Right eye pupil.
RIGHT_EYE_PUPIL = 30;
// Forehead glabella.
FOREHEAD_GLABELLA = 31;
// Chin gnathion.
CHIN_GNATHION = 32;
// Chin left gonion.
CHIN_LEFT_GONION = 33;
// Chin right gonion.
CHIN_RIGHT_GONION = 34;
}
// Face landmark type.
Type type = 3;
// Face landmark position.
Position position = 4;
}
// The bounding polygon around the face. The coordinates of the bounding box
// are in the original image's scale, as returned in `ImageParams`.
// The bounding box is computed to "frame" the face in accordance with human
// expectations. It is based on the landmarker results.
// Note that one or more x and/or y coordinates may not be generated in the
// `BoundingPoly` (the polygon will be unbounded) if only a partial face
// appears in the image to be annotated.
BoundingPoly bounding_poly = 1;
// The `fd_bounding_poly` bounding polygon is tighter than the
// `boundingPoly`, and encloses only the skin part of the face. Typically, it
// is used to eliminate the face from any image analysis that detects the
// "amount of skin" visible in an image. It is not based on the
// landmarker results, only on the initial face detection, hence
// the <code>fd</code> (face detection) prefix.
BoundingPoly fd_bounding_poly = 2;
// Detected face landmarks.
repeated Landmark landmarks = 3;
// Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
// of the face relative to the image vertical about the axis perpendicular to
// the face. Range [-180,180].
float roll_angle = 4;
// Yaw angle, which indicates the leftward/rightward angle that the face is
// pointing relative to the vertical plane perpendicular to the image. Range
// [-180,180].
float pan_angle = 5;
// Pitch angle, which indicates the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
float tilt_angle = 6;
// Detection confidence. Range [0, 1].
float detection_confidence = 7;
// Face landmarking confidence. Range [0, 1].
float landmarking_confidence = 8;
// Joy likelihood.
Likelihood joy_likelihood = 9;
// Sorrow likelihood.
Likelihood sorrow_likelihood = 10;
// Anger likelihood.
Likelihood anger_likelihood = 11;
// Surprise likelihood.
Likelihood surprise_likelihood = 12;
// Under-exposed likelihood.
Likelihood under_exposed_likelihood = 13;
// Blurred likelihood.
Likelihood blurred_likelihood = 14;
// Headwear likelihood.
Likelihood headwear_likelihood = 15;
}
// Detected entity location information.
message LocationInfo {
// lat/long location coordinates.
google.type.LatLng lat_lng = 1;
}
// A `Property` consists of a user-supplied name/value pair.
message Property {
// Name of the property.
string name = 1;
// Value of the property.
string value = 2;
// Value of numeric properties.
uint64 uint64_value = 3;
}
// Set of detected entity features.
message EntityAnnotation {
// Opaque entity ID. Some IDs may be available in
// [Google Knowledge Graph Search API](https://developers.google.com/knowledge-graph/).
string mid = 1;
// The language code for the locale in which the entity textual
// `description` is expressed.
string locale = 2;
// Entity textual description, expressed in its `locale` language.
string description = 3;
// Overall score of the result. Range [0, 1].
float score = 4;
// The accuracy of the entity detection in an image.
// For example, for an image in which the "Eiffel Tower" entity is detected,
// this field represents the confidence that there is a tower in the query
// image. Range [0, 1].
float confidence = 5;
// The relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of "tower" is likely higher to an image
// containing the detected "Eiffel Tower" than to an image containing a
// detected distant towering building, even though the confidence that
// there is a tower in each image may be the same. Range [0, 1].
float topicality = 6;
// Image region to which this entity belongs. Not produced
// for `LABEL_DETECTION` features.
BoundingPoly bounding_poly = 7;
// The location information for the detected entity. Multiple
// `LocationInfo` elements can be present because one location may
// indicate the location of the scene in the image, and another location
// may indicate the location of the place where the image was taken.
// Location information is usually present for landmarks.
repeated LocationInfo locations = 8;
// Some entities may have optional user-supplied `Property` (name/value)
// fields, such a score or string that qualifies the entity.
repeated Property properties = 9;
}
// Set of features pertaining to the image, computed by computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
// Represents the adult content likelihood for the image. Adult content may
// contain elements such as nudity, pornographic images or cartoons, or
// sexual activities.
Likelihood adult = 1;
// Spoof likelihood. The likelihood that an modification
// was made to the image's canonical version to make it appear
// funny or offensive.
Likelihood spoof = 2;
// Likelihood that this is a medical image.
Likelihood medical = 3;
// Likelihood that this image contains violent content.
Likelihood violence = 4;
// Likelihood that the request image contains racy content. Racy content may
// include (but is not limited to) skimpy or sheer clothing, strategically
// covered nudity, lewd or provocative poses, or close-ups of sensitive
// body areas.
Likelihood racy = 9;
}
// Rectangle determined by min and max `LatLng` pairs.
message LatLongRect {
// Min lat/long pair.
google.type.LatLng min_lat_lng = 1;
// Max lat/long pair.
google.type.LatLng max_lat_lng = 2;
}
// Color information consists of RGB channels, score, and the fraction of
// the image that the color occupies in the image.
message ColorInfo {
// RGB components of the color.
google.type.Color color = 1;
// Image-specific score for this color. Value in range [0, 1].
float score = 2;
// The fraction of pixels the color occupies in the image.
// Value in range [0, 1].
float pixel_fraction = 3;
}
// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
// RGB color values with their score and pixel fraction.
repeated ColorInfo colors = 1;
}
// Stores image properties, such as dominant colors.
message ImageProperties {
// If present, dominant colors completed successfully.
DominantColorsAnnotation dominant_colors = 1;
}
// Single crop hint that is used to generate a new crop when serving an image.
message CropHint {
// The bounding polygon for the crop region. The coordinates of the bounding
// box are in the original image's scale, as returned in `ImageParams`.
BoundingPoly bounding_poly = 1;
// Confidence of this being a salient region. Range [0, 1].
float confidence = 2;
// Fraction of importance of this salient region with respect to the original
// image.
float importance_fraction = 3;
}
// Set of crop hints that are used to generate new crops when serving images.
message CropHintsAnnotation {
// Crop hint results.
repeated CropHint crop_hints = 1;
}
// Parameters for crop hints annotation request.
message CropHintsParams {
// Aspect ratios in floats, representing the ratio of the width to the height
// of the image. For example, if the desired aspect ratio is 4/3, the
// corresponding float value should be 1.33333. If not specified, the
// best possible crop is returned. The number of provided aspect ratios is
// limited to a maximum of 16; any aspect ratios provided after the 16th are
// ignored.
repeated float aspect_ratios = 1;
}
// Parameters for web detection request.
message WebDetectionParams {
// Whether to include results derived from the geo information in the image.
bool include_geo_results = 2;
}
// Image context and/or feature-specific parameters.
message ImageContext {
// lat/long rectangle that specifies the location of the image.
LatLongRect lat_long_rect = 1;
// List of languages to use for TEXT_DETECTION. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Text detection returns an
// error if one or more of the specified languages is not one of the
// [supported languages](/vision/docs/languages).
repeated string language_hints = 2;
// Parameters for crop hints annotation request.
CropHintsParams crop_hints_params = 4;
// Parameters for web detection.
WebDetectionParams web_detection_params = 6;
}
// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features.
message AnnotateImageRequest {
// The image to be processed.
Image image = 1;
// Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image.
ImageContext image_context = 3;
}
// Response to an image annotation request.
message AnnotateImageResponse {
// If present, face detection has completed successfully.
repeated FaceAnnotation face_annotations = 1;
// If present, landmark detection has completed successfully.
repeated EntityAnnotation landmark_annotations = 2;
// If present, logo detection has completed successfully.
repeated EntityAnnotation logo_annotations = 3;
// If present, label detection has completed successfully.
repeated EntityAnnotation label_annotations = 4;
// If present, text (OCR) detection has completed successfully.
repeated EntityAnnotation text_annotations = 5;
// If present, text (OCR) detection or document (OCR) text detection has
// completed successfully.
// This annotation provides the structural hierarchy for the OCR detected
// text.
TextAnnotation full_text_annotation = 12;
// If present, safe-search annotation has completed successfully.
SafeSearchAnnotation safe_search_annotation = 6;
// If present, image properties were extracted successfully.
ImageProperties image_properties_annotation = 8;
// If present, crop hints have completed successfully.
CropHintsAnnotation crop_hints_annotation = 11;
// If present, web detection has completed successfully.
WebDetection web_detection = 13;
// If set, represents the error message for the operation.
// Note that filled-in image annotations are guaranteed to be
// correct, even when `error` is set.
google.rpc.Status error = 9;
}
// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
// Individual image annotation requests for this batch.
repeated AnnotateImageRequest requests = 1;
}
// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
// Individual responses to image annotation requests within the batch.
repeated AnnotateImageResponse responses = 1;
}
// A bucketized representation of likelihood, which is intended to give clients
// highly stable results across model upgrades.
enum Likelihood {
// Unknown likelihood.
UNKNOWN = 0;
// It is very unlikely that the image belongs to the specified vertical.
VERY_UNLIKELY = 1;
// It is unlikely that the image belongs to the specified vertical.
UNLIKELY = 2;
// It is possible that the image belongs to the specified vertical.
POSSIBLE = 3;
// It is likely that the image belongs to the specified vertical.
LIKELY = 4;
// It is very likely that the image belongs to the specified vertical.
VERY_LIKELY = 5;
}

View File

@@ -0,0 +1,252 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p1beta1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1p1beta1/geometry.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p1beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "TextAnnotationProto";
option java_package = "com.google.cloud.vision.v1p1beta1";
// TextAnnotation contains a structured representation of OCR extracted text.
// The hierarchy of an OCR extracted text structure is like this:
// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
// Each structural component, starting from Page, may further have their own
// properties. Properties describe detected languages, breaks etc.. Please refer
// to the
// [TextAnnotation.TextProperty][google.cloud.vision.v1p1beta1.TextAnnotation.TextProperty]
// message definition below for more detail.
message TextAnnotation {
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;
// Confidence of detected language. Range [0, 1].
float confidence = 2;
}
// Detected start or end of a structural component.
message DetectedBreak {
// Enum to denote the type of break found. New line, space etc.
enum BreakType {
// Unknown break label type.
UNKNOWN = 0;
// Regular space.
SPACE = 1;
// Sure space (very wide).
SURE_SPACE = 2;
// Line-wrapping break.
EOL_SURE_SPACE = 3;
// End-line hyphen that is not present in text; does not co-occur with
// `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
HYPHEN = 4;
// Line break that ends a paragraph.
LINE_BREAK = 5;
}
// Detected break type.
BreakType type = 1;
// True if break prepends the element.
bool is_prefix = 2;
}
// Additional information detected on the structural component.
message TextProperty {
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 1;
// Detected start or end of a text segment.
DetectedBreak detected_break = 2;
}
// List of pages detected by OCR.
repeated Page pages = 1;
// UTF-8 text detected on the pages.
string text = 2;
}
// Detected page from OCR.
message Page {
// Additional information detected on the page.
TextAnnotation.TextProperty property = 1;
// Page width in pixels.
int32 width = 2;
// Page height in pixels.
int32 height = 3;
// List of blocks of text, images etc on this page.
repeated Block blocks = 4;
// Confidence of the OCR results on the page. Range [0, 1].
float confidence = 5;
}
// Logical element on the page.
message Block {
// Type of a block (text, image etc) as identified by OCR.
enum BlockType {
// Unknown block type.
UNKNOWN = 0;
// Regular text block.
TEXT = 1;
// Table block.
TABLE = 2;
// Image block.
PICTURE = 3;
// Horizontal/vertical line box.
RULER = 4;
// Barcode block.
BARCODE = 5;
}
// Additional information detected for the block.
TextAnnotation.TextProperty property = 1;
// The bounding box for the block.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of paragraphs in this block (if this blocks is of type text).
repeated Paragraph paragraphs = 3;
// Detected block type (text, image etc) for this block.
BlockType block_type = 4;
// Confidence of the OCR results on the block. Range [0, 1].
float confidence = 5;
}
// Structural unit of text representing a number of words in certain order.
message Paragraph {
// Additional information detected for the paragraph.
TextAnnotation.TextProperty property = 1;
// The bounding box for the paragraph.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of words in this paragraph.
repeated Word words = 3;
// Confidence of the OCR results for the paragraph. Range [0, 1].
float confidence = 4;
}
// A word representation.
message Word {
// Additional information detected for the word.
TextAnnotation.TextProperty property = 1;
// The bounding box for the word.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of symbols in the word.
// The order of the symbols follows the natural reading order.
repeated Symbol symbols = 3;
// Confidence of the OCR results for the word. Range [0, 1].
float confidence = 4;
}
// A single symbol representation.
message Symbol {
// Additional information detected for the symbol.
TextAnnotation.TextProperty property = 1;
// The bounding box for the symbol.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// The actual UTF-8 representation of the symbol.
string text = 3;
// Confidence of the OCR results for the symbol. Range [0, 1].
float confidence = 4;
}

View File

@@ -0,0 +1,104 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p1beta1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p1beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "WebDetectionProto";
option java_package = "com.google.cloud.vision.v1p1beta1";
// Relevant information for the image from the Internet.
message WebDetection {
// Entity deduced from similar images on the Internet.
message WebEntity {
// Opaque entity ID.
string entity_id = 1;
// Overall relevancy score for the entity.
// Not normalized and not comparable across different image queries.
float score = 2;
// Canonical description of the entity, in English.
string description = 3;
}
// Metadata for online images.
message WebImage {
// The result image URL.
string url = 1;
// (Deprecated) Overall relevancy score for the image.
float score = 2;
}
// Metadata for web pages.
message WebPage {
// The result web page URL.
string url = 1;
// (Deprecated) Overall relevancy score for the web page.
float score = 2;
// Title for the web page, may contain HTML markups.
string page_title = 3;
// Fully matching images on the page.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 4;
// Partial matching images on the page.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its
// crops.
repeated WebImage partial_matching_images = 5;
}
// Label to provide extra metadata for the web detection.
message WebLabel {
// Label for extra metadata.
string label = 1;
// The BCP-47 language code for `label`, such as "en-US" or "sr-Latn".
// For more information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 2;
}
// Deduced entities from similar images on the Internet.
repeated WebEntity web_entities = 1;
// Fully matching images from the Internet.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 2;
// Partial matching images from the Internet.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its crops.
repeated WebImage partial_matching_images = 3;
// Web pages containing the matching images from the Internet.
repeated WebPage pages_with_matching_images = 4;
// The visually similar image results.
repeated WebImage visually_similar_images = 6;
// Best guess text labels for the request image.
repeated WebLabel best_guess_labels = 8;
}

View File

@@ -0,0 +1,68 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p2beta1;
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p2beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "GeometryProto";
option java_package = "com.google.cloud.vision.v1p2beta1";
// A vertex represents a 2D point in the image.
// NOTE: the vertex coordinates are in the same scale as the original image.
message Vertex {
// X coordinate.
int32 x = 1;
// Y coordinate.
int32 y = 2;
}
// A vertex represents a 2D point in the image.
// NOTE: the normalized vertex coordinates are relative to the original image
// and range from 0 to 1.
message NormalizedVertex {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
}
// A bounding polygon for the detected image annotation.
message BoundingPoly {
// The bounding polygon vertices.
repeated Vertex vertices = 1;
// The bounding polygon normalized vertices.
repeated NormalizedVertex normalized_vertices = 2;
}
// A 3D position in the image, used primarily for Face detection landmarks.
// A valid Position must have both x and y coordinates.
// The position coordinates are in the same scale as the original image.
message Position {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
// Z coordinate (or depth).
float z = 3;
}

View File

@@ -0,0 +1,764 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p2beta1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1p2beta1/geometry.proto";
import "google/cloud/vision/v1p2beta1/text_annotation.proto";
import "google/cloud/vision/v1p2beta1/web_detection.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p2beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1p2beta1";
// Service that performs Google Cloud Vision API detection tasks over client
// images, such as face, landmark, logo, label, and text detection. The
// ImageAnnotator service returns detected entities from the images.
service ImageAnnotator {
// Run image detection and annotation for a batch of images.
rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
option (google.api.http) = {
post: "/v1p2beta1/images:annotate"
body: "*"
};
}
// Run async image detection and annotation for a list of generic files (e.g.
// PDF) which may contain multiple pages and multiple images per page.
// Progress and results can be retrieved through the
// `google.longrunning.Operations` interface.
// `Operation.metadata` contains `OperationMetadata` (metadata).
// `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1p2beta1/files:asyncBatchAnnotate"
body: "*"
};
}
}
// The type of Google Cloud Vision API detection to perform, and the maximum
// number of results to return for that type. Multiple `Feature` objects can
// be specified in the `features` list.
message Feature {
// Type of Google Cloud Vision API feature to be extracted.
enum Type {
// Unspecified feature type.
TYPE_UNSPECIFIED = 0;
// Run face detection.
FACE_DETECTION = 1;
// Run landmark detection.
LANDMARK_DETECTION = 2;
// Run logo detection.
LOGO_DETECTION = 3;
// Run label detection.
LABEL_DETECTION = 4;
// Run text detection / optical character recognition (OCR). Text detection
// is optimized for areas of text within a larger image; if the image is
// a document, use `DOCUMENT_TEXT_DETECTION` instead.
TEXT_DETECTION = 5;
// Run dense text document OCR. Takes precedence when both
// `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
DOCUMENT_TEXT_DETECTION = 11;
// Run Safe Search to detect potentially unsafe
// or undesirable content.
SAFE_SEARCH_DETECTION = 6;
// Compute a set of image properties, such as the
// image's dominant colors.
IMAGE_PROPERTIES = 7;
// Run crop hints.
CROP_HINTS = 9;
// Run web detection.
WEB_DETECTION = 10;
}
// The feature type.
Type type = 1;
// Maximum number of results of this type. Does not apply to
// `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
int32 max_results = 2;
// Model to use for the feature.
// Supported values: "builtin/stable" (the default if unset) and
// "builtin/latest".
string model = 3;
}
// External image source (Google Cloud Storage or web URL image location).
message ImageSource {
// **Use `image_uri` instead.**
//
// The Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported. See
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
string gcs_image_uri = 1;
// The URI of the source image. Can be either:
//
// 1. A Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported. See
// [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
//
// 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
// HTTP/HTTPS URLs, Google cannot guarantee that the request will be
// completed. Your request may fail if the specified host denies the
// request (e.g. due to request throttling or DOS prevention), or if Google
// throttles requests to the site for abuse prevention. You should not
// depend on externally-hosted images for production applications.
//
// When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
// precedence.
string image_uri = 2;
}
// Client image to perform Google Cloud Vision API tasks over.
message Image {
// Image content, represented as a stream of bytes.
// Note: As with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 1;
// Google Cloud Storage image location, or publicly-accessible image
// URL. If both `content` and `source` are provided for an image, `content`
// takes precedence and is used to perform the image annotation request.
ImageSource source = 2;
}
// A face annotation object contains the results of face detection.
message FaceAnnotation {
// A face-specific landmark (for example, a face feature).
message Landmark {
// Face landmark (feature) type.
// Left and right are defined from the vantage of the viewer of the image
// without considering mirror projections typical of photos. So, `LEFT_EYE`,
// typically, is the person's right eye.
enum Type {
// Unknown face landmark detected. Should not be filled.
UNKNOWN_LANDMARK = 0;
// Left eye.
LEFT_EYE = 1;
// Right eye.
RIGHT_EYE = 2;
// Left of left eyebrow.
LEFT_OF_LEFT_EYEBROW = 3;
// Right of left eyebrow.
RIGHT_OF_LEFT_EYEBROW = 4;
// Left of right eyebrow.
LEFT_OF_RIGHT_EYEBROW = 5;
// Right of right eyebrow.
RIGHT_OF_RIGHT_EYEBROW = 6;
// Midpoint between eyes.
MIDPOINT_BETWEEN_EYES = 7;
// Nose tip.
NOSE_TIP = 8;
// Upper lip.
UPPER_LIP = 9;
// Lower lip.
LOWER_LIP = 10;
// Mouth left.
MOUTH_LEFT = 11;
// Mouth right.
MOUTH_RIGHT = 12;
// Mouth center.
MOUTH_CENTER = 13;
// Nose, bottom right.
NOSE_BOTTOM_RIGHT = 14;
// Nose, bottom left.
NOSE_BOTTOM_LEFT = 15;
// Nose, bottom center.
NOSE_BOTTOM_CENTER = 16;
// Left eye, top boundary.
LEFT_EYE_TOP_BOUNDARY = 17;
// Left eye, right corner.
LEFT_EYE_RIGHT_CORNER = 18;
// Left eye, bottom boundary.
LEFT_EYE_BOTTOM_BOUNDARY = 19;
// Left eye, left corner.
LEFT_EYE_LEFT_CORNER = 20;
// Right eye, top boundary.
RIGHT_EYE_TOP_BOUNDARY = 21;
// Right eye, right corner.
RIGHT_EYE_RIGHT_CORNER = 22;
// Right eye, bottom boundary.
RIGHT_EYE_BOTTOM_BOUNDARY = 23;
// Right eye, left corner.
RIGHT_EYE_LEFT_CORNER = 24;
// Left eyebrow, upper midpoint.
LEFT_EYEBROW_UPPER_MIDPOINT = 25;
// Right eyebrow, upper midpoint.
RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
// Left ear tragion.
LEFT_EAR_TRAGION = 27;
// Right ear tragion.
RIGHT_EAR_TRAGION = 28;
// Left eye pupil.
LEFT_EYE_PUPIL = 29;
// Right eye pupil.
RIGHT_EYE_PUPIL = 30;
// Forehead glabella.
FOREHEAD_GLABELLA = 31;
// Chin gnathion.
CHIN_GNATHION = 32;
// Chin left gonion.
CHIN_LEFT_GONION = 33;
// Chin right gonion.
CHIN_RIGHT_GONION = 34;
}
// Face landmark type.
Type type = 3;
// Face landmark position.
Position position = 4;
}
// The bounding polygon around the face. The coordinates of the bounding box
// are in the original image's scale, as returned in `ImageParams`.
// The bounding box is computed to "frame" the face in accordance with human
// expectations. It is based on the landmarker results.
// Note that one or more x and/or y coordinates may not be generated in the
// `BoundingPoly` (the polygon will be unbounded) if only a partial face
// appears in the image to be annotated.
BoundingPoly bounding_poly = 1;
// The `fd_bounding_poly` bounding polygon is tighter than the
// `boundingPoly`, and encloses only the skin part of the face. Typically, it
// is used to eliminate the face from any image analysis that detects the
// "amount of skin" visible in an image. It is not based on the
// landmarker results, only on the initial face detection, hence
// the <code>fd</code> (face detection) prefix.
BoundingPoly fd_bounding_poly = 2;
// Detected face landmarks.
repeated Landmark landmarks = 3;
// Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
// of the face relative to the image vertical about the axis perpendicular to
// the face. Range [-180,180].
float roll_angle = 4;
// Yaw angle, which indicates the leftward/rightward angle that the face is
// pointing relative to the vertical plane perpendicular to the image. Range
// [-180,180].
float pan_angle = 5;
// Pitch angle, which indicates the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
float tilt_angle = 6;
// Detection confidence. Range [0, 1].
float detection_confidence = 7;
// Face landmarking confidence. Range [0, 1].
float landmarking_confidence = 8;
// Joy likelihood.
Likelihood joy_likelihood = 9;
// Sorrow likelihood.
Likelihood sorrow_likelihood = 10;
// Anger likelihood.
Likelihood anger_likelihood = 11;
// Surprise likelihood.
Likelihood surprise_likelihood = 12;
// Under-exposed likelihood.
Likelihood under_exposed_likelihood = 13;
// Blurred likelihood.
Likelihood blurred_likelihood = 14;
// Headwear likelihood.
Likelihood headwear_likelihood = 15;
}
// Detected entity location information.
message LocationInfo {
// lat/long location coordinates.
google.type.LatLng lat_lng = 1;
}
// A `Property` consists of a user-supplied name/value pair.
message Property {
// Name of the property.
string name = 1;
// Value of the property.
string value = 2;
// Value of numeric properties.
uint64 uint64_value = 3;
}
// Set of detected entity features.
message EntityAnnotation {
// Opaque entity ID. Some IDs may be available in
// [Google Knowledge Graph Search
// API](https://developers.google.com/knowledge-graph/).
string mid = 1;
// The language code for the locale in which the entity textual
// `description` is expressed.
string locale = 2;
// Entity textual description, expressed in its `locale` language.
string description = 3;
// Overall score of the result. Range [0, 1].
float score = 4;
// **Deprecated. Use `score` instead.**
// The accuracy of the entity detection in an image.
// For example, for an image in which the "Eiffel Tower" entity is detected,
// this field represents the confidence that there is a tower in the query
// image. Range [0, 1].
float confidence = 5;
// The relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of "tower" is likely higher to an image
// containing the detected "Eiffel Tower" than to an image containing a
// detected distant towering building, even though the confidence that
// there is a tower in each image may be the same. Range [0, 1].
float topicality = 6;
// Image region to which this entity belongs. Not produced
// for `LABEL_DETECTION` features.
BoundingPoly bounding_poly = 7;
// The location information for the detected entity. Multiple
// `LocationInfo` elements can be present because one location may
// indicate the location of the scene in the image, and another location
// may indicate the location of the place where the image was taken.
// Location information is usually present for landmarks.
repeated LocationInfo locations = 8;
// Some entities may have optional user-supplied `Property` (name/value)
// fields, such a score or string that qualifies the entity.
repeated Property properties = 9;
}
// Set of features pertaining to the image, computed by computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
// Represents the adult content likelihood for the image. Adult content may
// contain elements such as nudity, pornographic images or cartoons, or
// sexual activities.
Likelihood adult = 1;
// Spoof likelihood. The likelihood that an modification
// was made to the image's canonical version to make it appear
// funny or offensive.
Likelihood spoof = 2;
// Likelihood that this is a medical image.
Likelihood medical = 3;
// Likelihood that this image contains violent content.
Likelihood violence = 4;
// Likelihood that the request image contains racy content. Racy content may
// include (but is not limited to) skimpy or sheer clothing, strategically
// covered nudity, lewd or provocative poses, or close-ups of sensitive
// body areas.
Likelihood racy = 9;
}
// Rectangle determined by min and max `LatLng` pairs.
message LatLongRect {
// Min lat/long pair.
google.type.LatLng min_lat_lng = 1;
// Max lat/long pair.
google.type.LatLng max_lat_lng = 2;
}
// Color information consists of RGB channels, score, and the fraction of
// the image that the color occupies in the image.
message ColorInfo {
// RGB components of the color.
google.type.Color color = 1;
// Image-specific score for this color. Value in range [0, 1].
float score = 2;
// The fraction of pixels the color occupies in the image.
// Value in range [0, 1].
float pixel_fraction = 3;
}
// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
// RGB color values with their score and pixel fraction.
repeated ColorInfo colors = 1;
}
// Stores image properties, such as dominant colors.
message ImageProperties {
// If present, dominant colors completed successfully.
DominantColorsAnnotation dominant_colors = 1;
}
// Single crop hint that is used to generate a new crop when serving an image.
message CropHint {
// The bounding polygon for the crop region. The coordinates of the bounding
// box are in the original image's scale, as returned in `ImageParams`.
BoundingPoly bounding_poly = 1;
// Confidence of this being a salient region. Range [0, 1].
float confidence = 2;
// Fraction of importance of this salient region with respect to the original
// image.
float importance_fraction = 3;
}
// Set of crop hints that are used to generate new crops when serving images.
message CropHintsAnnotation {
// Crop hint results.
repeated CropHint crop_hints = 1;
}
// Parameters for crop hints annotation request.
message CropHintsParams {
// Aspect ratios in floats, representing the ratio of the width to the height
// of the image. For example, if the desired aspect ratio is 4/3, the
// corresponding float value should be 1.33333. If not specified, the
// best possible crop is returned. The number of provided aspect ratios is
// limited to a maximum of 16; any aspect ratios provided after the 16th are
// ignored.
repeated float aspect_ratios = 1;
}
// Parameters for web detection request.
message WebDetectionParams {
// Whether to include results derived from the geo information in the image.
bool include_geo_results = 2;
}
// Image context and/or feature-specific parameters.
message ImageContext {
// Not used.
LatLongRect lat_long_rect = 1;
// List of languages to use for TEXT_DETECTION. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Text detection returns an
// error if one or more of the specified languages is not one of the
// [supported languages](/vision/docs/languages).
repeated string language_hints = 2;
// Parameters for crop hints annotation request.
CropHintsParams crop_hints_params = 4;
// Parameters for web detection.
WebDetectionParams web_detection_params = 6;
}
// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features.
message AnnotateImageRequest {
// The image to be processed.
Image image = 1;
// Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image.
ImageContext image_context = 3;
}
// If an image was produced from a file (e.g. a PDF), this message gives
// information about the source of that image.
message ImageAnnotationContext {
// The URI of the file used to produce the image.
string uri = 1;
// If the file was a PDF or TIFF, this field gives the page number within
// the file used to produce the image.
int32 page_number = 2;
}
// Response to an image annotation request.
message AnnotateImageResponse {
// If present, face detection has completed successfully.
repeated FaceAnnotation face_annotations = 1;
// If present, landmark detection has completed successfully.
repeated EntityAnnotation landmark_annotations = 2;
// If present, logo detection has completed successfully.
repeated EntityAnnotation logo_annotations = 3;
// If present, label detection has completed successfully.
repeated EntityAnnotation label_annotations = 4;
// If present, text (OCR) detection has completed successfully.
repeated EntityAnnotation text_annotations = 5;
// If present, text (OCR) detection or document (OCR) text detection has
// completed successfully.
// This annotation provides the structural hierarchy for the OCR detected
// text.
TextAnnotation full_text_annotation = 12;
// If present, safe-search annotation has completed successfully.
SafeSearchAnnotation safe_search_annotation = 6;
// If present, image properties were extracted successfully.
ImageProperties image_properties_annotation = 8;
// If present, crop hints have completed successfully.
CropHintsAnnotation crop_hints_annotation = 11;
// If present, web detection has completed successfully.
WebDetection web_detection = 13;
// If set, represents the error message for the operation.
// Note that filled-in image annotations are guaranteed to be
// correct, even when `error` is set.
google.rpc.Status error = 9;
// If present, contextual information is needed to understand where this image
// comes from.
ImageAnnotationContext context = 21;
}
// Response to a single file annotation request. A file may contain one or more
// images, which individually have their own responses.
message AnnotateFileResponse {
// Information about the file for which this response is generated.
InputConfig input_config = 1;
// Individual responses to images found within the file.
repeated AnnotateImageResponse responses = 2;
}
// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
// Individual image annotation requests for this batch.
repeated AnnotateImageRequest requests = 1;
}
// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
// Individual responses to image annotation requests within the batch.
repeated AnnotateImageResponse responses = 1;
}
// An offline file annotation request.
message AsyncAnnotateFileRequest {
// Required. Information about the input file.
InputConfig input_config = 1;
// Required. Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image(s) in the file.
ImageContext image_context = 3;
// Required. The desired output location and metadata (e.g. format).
OutputConfig output_config = 4;
}
// The response for a single offline file annotation request.
message AsyncAnnotateFileResponse {
// The output location and metadata from AsyncAnnotateFileRequest.
OutputConfig output_config = 1;
}
// Multiple async file annotation requests are batched into a single service
// call.
message AsyncBatchAnnotateFilesRequest {
// Individual async file annotation requests for this batch.
repeated AsyncAnnotateFileRequest requests = 1;
}
// Response to an async batch file annotation request.
message AsyncBatchAnnotateFilesResponse {
// The list of file annotation responses, one for each request in
// AsyncBatchAnnotateFilesRequest.
repeated AsyncAnnotateFileResponse responses = 1;
}
// The desired input location and metadata.
message InputConfig {
// The Google Cloud Storage location to read the input from.
GcsSource gcs_source = 1;
// The type of the file. Currently only "application/pdf" and "image/tiff"
// are supported. Wildcards are not supported.
string mime_type = 2;
}
// The desired output location and metadata.
message OutputConfig {
// The Google Cloud Storage location to write the output(s) to.
GcsDestination gcs_destination = 1;
// The max number of response protos to put into each output JSON file on GCS.
// The valid range is [1, 100]. If not specified, the default value is 20.
//
// For example, for one pdf file with 100 pages, 100 response protos will
// be generated. If `batch_size` = 20, then 5 json files each
// containing 20 response protos will be written under the prefix
// `gcs_destination`.`uri`.
//
// Currently, batch_size only applies to GcsDestination, with potential future
// support for other output configurations.
int32 batch_size = 2;
}
// The Google Cloud Storage location where the input will be read from.
message GcsSource {
// Google Cloud Storage URI for the input file. This must only be a GCS
// object. Wildcards are not currently supported.
string uri = 1;
}
// The Google Cloud Storage location where the output will be written to.
message GcsDestination {
// Google Cloud Storage URI where the results will be stored. Results will
// be in JSON format and preceded by its corresponding input URI. This field
// can either represent a single file, or a prefix for multiple outputs.
// Prefixes must end in a `/`.
//
// Examples:
//
// * File: gs://bucket-name/filename.json
// * Prefix: gs://bucket-name/prefix/here/
// * File: gs://bucket-name/prefix/here
//
// If multiple outputs, each response is still AnnotateFileResponse, each of
// which contains some subset of the full list of AnnotateImageResponse.
// Multiple outputs can happen if, for example, the output JSON is too large
// and overflows into multiple sharded files.
string uri = 1;
}
// Contains metadata for the BatchAnnotateImages operation.
message OperationMetadata {
// Batch operation states.
enum State {
// Invalid.
STATE_UNSPECIFIED = 0;
// Request is received.
CREATED = 1;
// Request is actively being processed.
RUNNING = 2;
// The batch processing is done.
DONE = 3;
// The batch processing was cancelled.
CANCELLED = 4;
}
// Current state of the batch operation.
State state = 1;
// The time when the batch request was received.
google.protobuf.Timestamp create_time = 5;
// The time when the operation result was last updated.
google.protobuf.Timestamp update_time = 6;
}
// A bucketized representation of likelihood, which is intended to give clients
// highly stable results across model upgrades.
enum Likelihood {
// Unknown likelihood.
UNKNOWN = 0;
// It is very unlikely that the image belongs to the specified vertical.
VERY_UNLIKELY = 1;
// It is unlikely that the image belongs to the specified vertical.
UNLIKELY = 2;
// It is possible that the image belongs to the specified vertical.
POSSIBLE = 3;
// It is likely that the image belongs to the specified vertical.
LIKELY = 4;
// It is very likely that the image belongs to the specified vertical.
VERY_LIKELY = 5;
}

View File

@@ -0,0 +1,259 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p2beta1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1p2beta1/geometry.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p2beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "TextAnnotationProto";
option java_package = "com.google.cloud.vision.v1p2beta1";
// TextAnnotation contains a structured representation of OCR extracted text.
// The hierarchy of an OCR extracted text structure is like this:
// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
// Each structural component, starting from Page, may further have their own
// properties. Properties describe detected languages, breaks etc.. Please refer
// to the [TextAnnotation.TextProperty][google.cloud.vision.v1p2beta1.TextAnnotation.TextProperty] message definition below for more
// detail.
message TextAnnotation {
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;
// Confidence of detected language. Range [0, 1].
float confidence = 2;
}
// Detected start or end of a structural component.
message DetectedBreak {
// Enum to denote the type of break found. New line, space etc.
enum BreakType {
// Unknown break label type.
UNKNOWN = 0;
// Regular space.
SPACE = 1;
// Sure space (very wide).
SURE_SPACE = 2;
// Line-wrapping break.
EOL_SURE_SPACE = 3;
// End-line hyphen that is not present in text; does not co-occur with
// `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
HYPHEN = 4;
// Line break that ends a paragraph.
LINE_BREAK = 5;
}
// Detected break type.
BreakType type = 1;
// True if break prepends the element.
bool is_prefix = 2;
}
// Additional information detected on the structural component.
message TextProperty {
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 1;
// Detected start or end of a text segment.
DetectedBreak detected_break = 2;
}
// List of pages detected by OCR.
repeated Page pages = 1;
// UTF-8 text detected on the pages.
string text = 2;
}
// Detected page from OCR.
message Page {
// Additional information detected on the page.
TextAnnotation.TextProperty property = 1;
// Page width. For PDFs the unit is points. For images (including
// TIFFs) the unit is pixels.
int32 width = 2;
// Page height. For PDFs the unit is points. For images (including
// TIFFs) the unit is pixels.
int32 height = 3;
// List of blocks of text, images etc on this page.
repeated Block blocks = 4;
// Confidence of the OCR results on the page. Range [0, 1].
float confidence = 5;
}
// Logical element on the page.
message Block {
// Type of a block (text, image etc) as identified by OCR.
enum BlockType {
// Unknown block type.
UNKNOWN = 0;
// Regular text block.
TEXT = 1;
// Table block.
TABLE = 2;
// Image block.
PICTURE = 3;
// Horizontal/vertical line box.
RULER = 4;
// Barcode block.
BARCODE = 5;
}
// Additional information detected for the block.
TextAnnotation.TextProperty property = 1;
// The bounding box for the block.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
//
// * when the text is horizontal it might look like:
//
// 0----1
// | |
// 3----2
//
// * when it's rotated 180 degrees around the top-left corner it becomes:
//
// 2----3
// | |
// 1----0
//
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of paragraphs in this block (if this blocks is of type text).
repeated Paragraph paragraphs = 3;
// Detected block type (text, image etc) for this block.
BlockType block_type = 4;
// Confidence of the OCR results on the block. Range [0, 1].
float confidence = 5;
}
// Structural unit of text representing a number of words in certain order.
message Paragraph {
// Additional information detected for the paragraph.
TextAnnotation.TextProperty property = 1;
// The bounding box for the paragraph.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of words in this paragraph.
repeated Word words = 3;
// Confidence of the OCR results for the paragraph. Range [0, 1].
float confidence = 4;
}
// A word representation.
message Word {
// Additional information detected for the word.
TextAnnotation.TextProperty property = 1;
// The bounding box for the word.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// List of symbols in the word.
// The order of the symbols follows the natural reading order.
repeated Symbol symbols = 3;
// Confidence of the OCR results for the word. Range [0, 1].
float confidence = 4;
}
// A single symbol representation.
message Symbol {
// Additional information detected for the symbol.
TextAnnotation.TextProperty property = 1;
// The bounding box for the symbol.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingPoly bounding_box = 2;
// The actual UTF-8 representation of the symbol.
string text = 3;
// Confidence of the OCR results for the symbol. Range [0, 1].
float confidence = 4;
}

View File

@@ -0,0 +1,105 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1p2beta1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p2beta1;vision";
option java_multiple_files = true;
option java_outer_classname = "WebDetectionProto";
option java_package = "com.google.cloud.vision.v1p2beta1";
// Relevant information for the image from the Internet.
message WebDetection {
// Entity deduced from similar images on the Internet.
message WebEntity {
// Opaque entity ID.
string entity_id = 1;
// Overall relevancy score for the entity.
// Not normalized and not comparable across different image queries.
float score = 2;
// Canonical description of the entity, in English.
string description = 3;
}
// Metadata for online images.
message WebImage {
// The result image URL.
string url = 1;
// (Deprecated) Overall relevancy score for the image.
float score = 2;
}
// Metadata for web pages.
message WebPage {
// The result web page URL.
string url = 1;
// (Deprecated) Overall relevancy score for the web page.
float score = 2;
// Title for the web page, may contain HTML markups.
string page_title = 3;
// Fully matching images on the page.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 4;
// Partial matching images on the page.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its
// crops.
repeated WebImage partial_matching_images = 5;
}
// Label to provide extra metadata for the web detection.
message WebLabel {
// Label for extra metadata.
string label = 1;
// The BCP-47 language code for `label`, such as "en-US" or "sr-Latn".
// For more information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 2;
}
// Deduced entities from similar images on the Internet.
repeated WebEntity web_entities = 1;
// Fully matching images from the Internet.
// Can include resized copies of the query image.
repeated WebImage full_matching_images = 2;
// Partial matching images from the Internet.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its crops.
repeated WebImage partial_matching_images = 3;
// Web pages containing the matching images from the Internet.
repeated WebPage pages_with_matching_images = 4;
// The visually similar image results.
repeated WebImage visually_similar_images = 6;
// Best guess text labels for the request image.
repeated WebLabel best_guess_labels = 8;
}