Skip to content

Commit d69fa07

Browse files
authored
Update OCR sample from beta to ga (#1123)
1 parent 8763e3e commit d69fa07

File tree

4 files changed

+187
-2
lines changed

4 files changed

+187
-2
lines changed

vision/cloud-client/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar
9393
```
9494
mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg"
9595
```
96+
97+
#### OCR
98+
```
99+
mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \
100+
gs://<BUCKET_ID>/"
101+
```

vision/cloud-client/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
<artifactId>google-cloud-vision</artifactId>
4343
<version>1.32.0</version>
4444
</dependency>
45+
<dependency>
46+
<groupId>com.google.cloud</groupId>
47+
<artifactId>google-cloud-storage</artifactId>
48+
<version>1.32.0</version>
49+
</dependency>
4550
<!-- [END dependencies] -->
4651

4752
<!-- Test dependencies -->

vision/cloud-client/src/main/java/com/example/vision/Detect.java

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,19 @@
1616

1717
package com.example.vision;
1818

19+
import com.google.api.gax.longrunning.OperationFuture;
20+
import com.google.cloud.storage.Blob;
21+
import com.google.cloud.storage.Bucket;
22+
import com.google.cloud.storage.Storage;
23+
import com.google.cloud.storage.Storage.BlobListOption;
24+
import com.google.cloud.storage.StorageOptions;
25+
import com.google.cloud.vision.v1.AnnotateFileResponse;
26+
import com.google.cloud.vision.v1.AnnotateFileResponse.Builder;
1927
import com.google.cloud.vision.v1.AnnotateImageRequest;
2028
import com.google.cloud.vision.v1.AnnotateImageResponse;
29+
import com.google.cloud.vision.v1.AsyncAnnotateFileRequest;
30+
import com.google.cloud.vision.v1.AsyncAnnotateFileResponse;
31+
import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesResponse;
2132
import com.google.cloud.vision.v1.BatchAnnotateImagesResponse;
2233
import com.google.cloud.vision.v1.Block;
2334
import com.google.cloud.vision.v1.ColorInfo;
@@ -28,11 +39,16 @@
2839
import com.google.cloud.vision.v1.FaceAnnotation;
2940
import com.google.cloud.vision.v1.Feature;
3041
import com.google.cloud.vision.v1.Feature.Type;
42+
import com.google.cloud.vision.v1.GcsDestination;
43+
import com.google.cloud.vision.v1.GcsSource;
3144
import com.google.cloud.vision.v1.Image;
3245
import com.google.cloud.vision.v1.ImageAnnotatorClient;
3346
import com.google.cloud.vision.v1.ImageContext;
3447
import com.google.cloud.vision.v1.ImageSource;
48+
import com.google.cloud.vision.v1.InputConfig;
3549
import com.google.cloud.vision.v1.LocationInfo;
50+
import com.google.cloud.vision.v1.OperationMetadata;
51+
import com.google.cloud.vision.v1.OutputConfig;
3652
import com.google.cloud.vision.v1.Page;
3753
import com.google.cloud.vision.v1.Paragraph;
3854
import com.google.cloud.vision.v1.SafeSearchAnnotation;
@@ -48,12 +64,16 @@
4864

4965
import com.google.protobuf.ByteString;
5066

67+
import com.google.protobuf.util.JsonFormat;
5168
import java.io.FileInputStream;
5269
import java.io.IOException;
5370
import java.io.PrintStream;
5471
import java.util.ArrayList;
5572
import java.util.Arrays;
5673
import java.util.List;
74+
import java.util.concurrent.TimeUnit;
75+
import java.util.regex.Matcher;
76+
import java.util.regex.Pattern;
5777

5878
public class Detect {
5979

@@ -78,11 +98,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
7898
out.println("Usage:");
7999
out.printf(
80100
"\tmvn exec:java -DDetect -Dexec.args=\"<command> <path-to-image>\"\n"
101+
+ "\tmvn exec:java -DDetect -Dexec.args=\"ocr <path-to-file> <path-to-destination>\""
102+
+ "\n"
81103
+ "Commands:\n"
82104
+ "\tfaces | labels | landmarks | logos | text | safe-search | properties"
83-
+ "| web | web-entities | web-entities-include-geo | crop \n"
105+
+ "| web | web-entities | web-entities-include-geo | crop | ocr \n"
84106
+ "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
85-
+ "resource (gs://...)\n");
107+
+ "resource (gs://...)\n"
108+
+ "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
109+
+ "Path to Destination\n\tA path to the remote destination on Cloud Storage for the"
110+
+ " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n");
86111
return;
87112
}
88113
String command = args[0];
@@ -162,6 +187,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
162187
} else {
163188
detectDocumentText(path, out);
164189
}
190+
} else if (command.equals("ocr")) {
191+
String destPath = args.length > 2 ? args[2] : "";
192+
detectDocumentsGcs(path, destPath);
165193
}
166194
}
167195

@@ -1277,4 +1305,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
12771305
}
12781306
}
12791307
// [END vision_detect_document_uri]
1308+
1309+
// [START vision_async_detect_document_ocr]
1310+
/**
1311+
* Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
1312+
*
1313+
* @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
1314+
* text on.
1315+
* @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
1316+
* results on.
1317+
* @throws Exception on errors while closing the client.
1318+
*/
1319+
public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws
1320+
Exception {
1321+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1322+
List<AsyncAnnotateFileRequest> requests = new ArrayList<>();
1323+
1324+
// Set the GCS source path for the remote file.
1325+
GcsSource gcsSource = GcsSource.newBuilder()
1326+
.setUri(gcsSourcePath)
1327+
.build();
1328+
1329+
// Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
1330+
// types
1331+
InputConfig inputConfig = InputConfig.newBuilder()
1332+
.setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff"
1333+
.setGcsSource(gcsSource)
1334+
.build();
1335+
1336+
// Set the GCS destination path for where to save the results.
1337+
GcsDestination gcsDestination = GcsDestination.newBuilder()
1338+
.setUri(gcsDestinationPath)
1339+
.build();
1340+
1341+
// Create the configuration for the output with the batch size.
1342+
// The batch size sets how many pages should be grouped into each json output file.
1343+
OutputConfig outputConfig = OutputConfig.newBuilder()
1344+
.setBatchSize(2)
1345+
.setGcsDestination(gcsDestination)
1346+
.build();
1347+
1348+
// Select the Feature required by the vision API
1349+
Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
1350+
1351+
// Build the OCR request
1352+
AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
1353+
.addFeatures(feature)
1354+
.setInputConfig(inputConfig)
1355+
.setOutputConfig(outputConfig)
1356+
.build();
1357+
1358+
requests.add(request);
1359+
1360+
// Perform the OCR request
1361+
OperationFuture<AsyncBatchAnnotateFilesResponse, OperationMetadata> response =
1362+
client.asyncBatchAnnotateFilesAsync(requests);
1363+
1364+
System.out.println("Waiting for the operation to finish.");
1365+
1366+
// Wait for the request to finish. (The result is not used, since the API saves the result to
1367+
// the specified location on GCS.)
1368+
List<AsyncAnnotateFileResponse> result = response.get(180, TimeUnit.SECONDS)
1369+
.getResponsesList();
1370+
1371+
// Once the request has completed and the output has been
1372+
// written to GCS, we can list all the output files.
1373+
Storage storage = StorageOptions.getDefaultInstance().getService();
1374+
1375+
// Get the destination location from the gcsDestinationPath
1376+
Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
1377+
Matcher matcher = pattern.matcher(gcsDestinationPath);
1378+
1379+
if (matcher.find()) {
1380+
String bucketName = matcher.group(1);
1381+
String prefix = matcher.group(2);
1382+
1383+
// Get the list of objects with the given prefix from the GCS bucket
1384+
Bucket bucket = storage.get(bucketName);
1385+
com.google.api.gax.paging.Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));
1386+
1387+
Blob firstOutputFile = null;
1388+
1389+
// List objects with the given prefix.
1390+
System.out.println("Output files:");
1391+
for (Blob blob : pageList.iterateAll()) {
1392+
System.out.println(blob.getName());
1393+
1394+
// Process the first output file from GCS.
1395+
// Since we specified batch size = 2, the first response contains
1396+
// the first two pages of the input file.
1397+
if (firstOutputFile == null) {
1398+
firstOutputFile = blob;
1399+
}
1400+
}
1401+
1402+
// Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
1403+
// object. If the Blob is small read all its content in one request
1404+
// (Note: the file is a .json file)
1405+
// Storage guide: https://cloud.google.com/storage/docs/downloading-objects
1406+
String jsonContents = new String(firstOutputFile.getContent());
1407+
Builder builder = AnnotateFileResponse.newBuilder();
1408+
JsonFormat.parser().merge(jsonContents, builder);
1409+
1410+
// Build the AnnotateFileResponse object
1411+
AnnotateFileResponse annotateFileResponse = builder.build();
1412+
1413+
// Parse through the object to get the actual response for the first page of the input file.
1414+
AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0);
1415+
1416+
// Here we print the full text from the first page.
1417+
// The response contains more information:
1418+
// annotation/pages/blocks/paragraphs/words/symbols
1419+
// including confidence score and bounding boxes
1420+
System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText());
1421+
} else {
1422+
System.out.println("No MATCH");
1423+
}
1424+
}
1425+
}
1426+
// [END vision_async_detect_document_ocr]
12801427
}

vision/cloud-client/src/test/java/com/example/vision/DetectIT.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818

1919
import static com.google.common.truth.Truth.assertThat;
2020

21+
import com.google.api.gax.paging.Page;
22+
import com.google.cloud.storage.Blob;
23+
import com.google.cloud.storage.Storage;
24+
import com.google.cloud.storage.Storage.BlobListOption;
25+
import com.google.cloud.storage.StorageOptions;
2126
import java.io.ByteArrayOutputStream;
2227
import java.io.IOException;
2328
import java.io.PrintStream;
@@ -36,6 +41,7 @@ public class DetectIT {
3641
private PrintStream out;
3742
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
3843
private static final String BUCKET = PROJECT_ID;
44+
private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT";
3945

4046
@Before
4147
public void setUp() throws IOException {
@@ -346,4 +352,25 @@ public void testDocumentTextGcs() throws Exception {
346352
assertThat(got).contains("37%");
347353
assertThat(got).contains("Word text: class (confidence:");
348354
}
355+
356+
@Test
357+
public void testDetectDocumentsGcs() throws Exception {
358+
// Act
359+
String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf",
360+
"gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"};
361+
Detect.argsHelper(args, out);
362+
363+
// Assert
364+
String got = bout.toString();
365+
assertThat(got).contains("HODGE'S GENERAL CONJECTURE");
366+
367+
Storage storage = StorageOptions.getDefaultInstance().getService();
368+
369+
Page<Blob> blobs = storage.list(BUCKET, BlobListOption.currentDirectory(),
370+
BlobListOption.prefix(OUTPUT_PREFIX + "/"));
371+
372+
for (Blob blob : blobs.iterateAll()) {
373+
blob.delete();
374+
}
375+
}
349376
}

0 commit comments

Comments
 (0)