Skip to content

added gcs read for audio file #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 2, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions speech/grpc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ limitations under the License.

<!-- // [START dependency] -->
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


package com.google.cloud.speech.grpc.demos;

import com.google.cloud.speech.v1.AudioRequest;
import com.google.protobuf.ByteString;

import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/*
* AudioRequestFactory takes a URI as an input and creates an AudioRequest. The URI can point to a
* local file or a file on Google Cloud Storage.
*/
public class AudioRequestFactory {

private static final String FILE_SCHEME = "file";
private static final String GS_SCHEME = "gs";

/**
* Takes an input URI of form $scheme:// and converts to audio request.
*
* @param uri input uri
* @return AudioRequest audio request
*/
public static AudioRequest createRequest(URI uri)
throws IOException {
if (uri.getScheme() == null || uri.getScheme().equals(FILE_SCHEME)) {
Path path = Paths.get(uri);
return audioFromBytes(Files.readAllBytes(path));
} else if (uri.getScheme().equals(GS_SCHEME)) {
return AudioRequest.newBuilder().setUri(uri.toString()).build();
}
throw new RuntimeException("scheme not supported " + uri.getScheme());
}

/**
* Convert bytes to AudioRequest.
*
* @param bytes input bytes
* @return AudioRequest audio request
*/
private static AudioRequest audioFromBytes(byte[] bytes) {
return AudioRequest.newBuilder()
.setContent(ByteString.copyFrom(bytes))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import com.google.cloud.speech.v1.NonStreamingRecognizeResponse;
import com.google.cloud.speech.v1.RecognizeRequest;
import com.google.cloud.speech.v1.SpeechGrpc;
import com.google.protobuf.ByteString;
import com.google.protobuf.TextFormat;

import io.grpc.ManagedChannel;
Expand All @@ -49,9 +48,7 @@
import org.apache.commons.cli.ParseException;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Executors;
Expand All @@ -72,7 +69,7 @@ public class NonStreamingRecognizeClient {

private final String host;
private final int port;
private final String file;
private final URI input;
private final int samplingRate;

private final ManagedChannel channel;
Expand All @@ -81,11 +78,11 @@ public class NonStreamingRecognizeClient {
/**
* Construct client connecting to Cloud Speech server at {@code host:port}.
*/
public NonStreamingRecognizeClient(String host, int port, String file, int samplingRate)
public NonStreamingRecognizeClient(String host, int port, URI input, int samplingRate)
throws IOException {
this.host = host;
this.port = port;
this.file = file;
this.input = input;
this.samplingRate = samplingRate;

GoogleCredentials creds = GoogleCredentials.getApplicationDefault();
Expand All @@ -99,10 +96,7 @@ public NonStreamingRecognizeClient(String host, int port, String file, int sampl
}

private AudioRequest createAudioRequest() throws IOException {
Path path = Paths.get(file);
return AudioRequest.newBuilder()
.setContent(ByteString.copyFrom(Files.readAllBytes(path)))
.build();
return AudioRequestFactory.createRequest(this.input);
}

public void shutdown() throws InterruptedException {
Expand All @@ -115,10 +109,10 @@ public void recognize() {
try {
audio = createAudioRequest();
} catch (IOException e) {
logger.log(Level.WARNING, "Failed to read audio file: " + file);
logger.log(Level.WARNING, "Failed to read audio uri input: " + input);
return;
}
logger.info("Sending " + audio.getContent().size() + " bytes from audio file: " + file);
logger.info("Sending " + audio.getContent().size() + " bytes from audio uri input: " + input);
InitialRecognizeRequest initial = InitialRecognizeRequest.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(samplingRate)
Expand Down Expand Up @@ -147,8 +141,8 @@ public static void main(String[] args) throws Exception {
CommandLineParser parser = new DefaultParser();

Options options = new Options();
options.addOption(OptionBuilder.withLongOpt("file")
.withDescription("path to audio file")
options.addOption(OptionBuilder.withLongOpt("uri")
.withDescription("path to audio uri")
.hasArg()
.withArgName("FILE_PATH")
.create());
Expand All @@ -170,10 +164,10 @@ public static void main(String[] args) throws Exception {

try {
CommandLine line = parser.parse(options, args);
if (line.hasOption("file")) {
audioFile = line.getOptionValue("file");
if (line.hasOption("uri")) {
audioFile = line.getOptionValue("uri");
} else {
System.err.println("An Audio file path must be specified (e.g. /foo/baz.raw).");
System.err.println("An Audio uri must be specified (e.g. file:///foo/baz.raw).");
System.exit(1);
}

Expand Down Expand Up @@ -203,7 +197,7 @@ public static void main(String[] args) throws Exception {
}

NonStreamingRecognizeClient client =
new NonStreamingRecognizeClient(host, port, audioFile, sampling);
new NonStreamingRecognizeClient(host, port, URI.create(audioFile), sampling);
try {
client.recognize();
} finally {
Expand Down
53 changes: 42 additions & 11 deletions speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ option java_multiple_files = true;
option java_outer_classname = "SpeechProto";
option java_package = "com.google.cloud.speech.v1";


// Service that implements Google Cloud Speech API.
service Speech {
// Perform bidirectional streaming speech recognition on audio using gRPC.
rpc Recognize(stream RecognizeRequest) returns (stream RecognizeResponse);

// Perform non-streaming speech recognition on audio using HTTPS.
rpc NonStreamingRecognize(RecognizeRequest) returns (NonStreamingRecognizeResponse) {
option (.google.api.http) = { post: "/v1/speech:recognize" body: "*" };
option (google.api.http) = { post: "/v1/speech:recognize" body: "*" };
}
}

Expand All @@ -54,7 +55,7 @@ message RecognizeRequest {

// The audio data to be recognized. For `NonStreamingRecognize`, all the
// audio data must be contained in the first (and only) `RecognizeRequest`
// message. For streaming `Recognize`, sequential chunks of audio data are
// message. For streaming `Recognize`, sequential chunks of audio data are
// sent in sequential `RecognizeRequest` messages.
AudioRequest audio_request = 2;
}
Expand All @@ -64,7 +65,7 @@ message RecognizeRequest {
message InitialRecognizeRequest {
// Audio encoding of the data sent in the audio message.
enum AudioEncoding {
// Not specified. Will return result `INVALID_ARGUMENT`.
// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
ENCODING_UNSPECIFIED = 0;

// Uncompressed 16-bit signed little-endian samples.
Expand Down Expand Up @@ -118,8 +119,6 @@ message InitialRecognizeRequest {
// profanities, replacing all but the initial character in each filtered word
// with asterisks, e.g. "f***". If set to `false` or omitted, profanities
// won't be filtered out.
// Note that profanity filtering is not implemented for all languages.
// If the language is not supported, this setting has no effect.
bool profanity_filter = 5;

// [Optional] If `false` or omitted, the recognizer will detect a single
Expand All @@ -146,13 +145,38 @@ message InitialRecognizeRequest {
// as they become available.
// If `false` or omitted, no `EndpointerEvents` are returned.
bool enable_endpointer_events = 8;

// [Optional] URI that points to a file where the recognition result should
// be stored in JSON format. If omitted or empty string, the recognition
// result is returned in the response. Should be specified only for
// `NonStreamingRecognize`. If specified in a `Recognize` request,
// `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
// If specified in a `NonStreamingRecognize` request,
// `NonStreamingRecognize` returns immediately, and the output file
// is created asynchronously once the audio processing completes.
// Currently, only Google Cloud Storage URIs are supported, which must be
// specified in the following format: `gs://bucket_name/object_name`
// (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
// more information, see [Request URIs](/storage/docs/reference-uris).
string output_uri = 9;
}

// Contains audio data in the format specified in the `InitialRecognizeRequest`.
// Either `content` or `uri` must be supplied. Supplying both or neither
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
message AudioRequest {
// [Required] The audio data bytes encoded as specified in
// `InitialRecognizeRequest`.
// The audio data bytes encoded as specified in
// `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers
// use a pure binary representation, whereas JSON representations use base64.
bytes content = 1;

// URI that points to a file that contains audio data bytes as specified in
// `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are
// supported, which must be specified in the following format:
// `gs://bucket_name/object_name` (other URI formats return
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
// [Request URIs](/storage/docs/reference-uris).
string uri = 2;
}

// `NonStreamingRecognizeResponse` is the only message returned to the client by
Expand Down Expand Up @@ -191,10 +215,14 @@ message RecognizeResponse {

// [Output-only] If set, returns a [google.rpc.Status][] message that
// specifies the error for the operation.
.google.rpc.Status error = 1;

// [Output-only] May contain zero or one `is_final=true` result (the newly
// settled portion). May also contain zero or more `is_final=false` results.
google.rpc.Status error = 1;

// [Output-only] For `continuous=false`, this repeated list contains zero or
// one result that corresponds to all of the audio processed so far. For
// `continuous=true`, this repeated list contains zero or more results that
// correspond to consecutive portions of the audio being processed.
// In both cases, contains zero or one `is_final=true` result (the newly
// settled portion), followed by zero or more `is_final=false` results.
repeated SpeechRecognitionResult results = 2;

// [Output-only] Indicates the lowest index in the `results` array that has
Expand All @@ -206,7 +234,10 @@ message RecognizeResponse {
EndpointerEvent endpoint = 4;
}

// A speech recognition result corresponding to a portion of the audio.
message SpeechRecognitionResult {
// [Output-only] May contain one or more recognition hypotheses (up to the
// maximum specified in `max_alternatives`).
repeated SpeechRecognitionAlternative alternatives = 1;

// [Output-only] Set `true` if this is the final time the speech service will
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.speech.grpc.demos;

import static org.junit.Assert.assertEquals;

import com.google.cloud.speech.v1.AudioRequest;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.io.File;
import java.io.IOException;
import java.net.URI;

/**
* Unit tests for {@link AudioRequestFactory}.
*/
@RunWith(JUnit4.class)
public class AudioRequestFactoryTest {

@Test
public void verifyBytesInSizeFromLocalFile() throws IOException {
URI uri = new File("resources/audio.raw").toURI();
AudioRequest audio = AudioRequestFactory.createRequest(uri);

int numBytes = audio.getContent().toByteArray().length;

//assert the number of bytes in the audio as 57958
assertEquals(57958, numBytes);
}

@Test
public void verifyBytesInSizeFromGoogleStorageFile() throws IOException {
String audioUri = "gs://cloud-samples-tests/speech/audio.raw";

URI uri = URI.create(audioUri);
AudioRequest audio = AudioRequestFactory.createRequest(uri);

int numBytes = audio.getContent().toByteArray().length;

//assert the number of bytes in the audio as 0
assertEquals(0, numBytes);

//assert the uri
assertEquals(audioUri, audio.getUri());
}
}