Skip to content
This repository was archived by the owner on Aug 1, 2024. It is now read-only.

Commit 19ae2f1

Browse files
shickscopybara-github
authored andcommitted
Add goog.crypt.base64.{en,de}codeStringUtf8
The non-`Utf8` version has lossy behavior when passed strings with characters outside the Latin-1 range (i.e. charCode > 255). This cannot be fixed in-place because the correct behavior depends on whether the input string is intended as text or binary. If it's binary, then the correct behavior is to throw (and a future change will start throwing asynchronously, so that this case should start showing up in logs). If it's text, then it's appropriate to first encode non-ASCII characters (i.e. charCode > 127) with UTF-8, but note that this is wholly inappropriate for binary input, since it changes the encoding of bytes in the [128..255] range. RELNOTES[NEW]: Added `goog.crypt.base64.{en,de}codeStringUtf8` PiperOrigin-RevId: 469817557 Change-Id: I8accaa98859aa05ec337cd0aaa2f1e2f7d185fa8
1 parent 50a2ae0 commit 19ae2f1

File tree

2 files changed

+96
-23
lines changed

2 files changed

+96
-23
lines changed

closure/goog/crypt/base64.js

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,10 @@ goog.crypt.base64.encodeByteArray = function(input, alphabet) {
211211

212212

213213
/**
214-
* Base64-encode a string.
214+
* Base64-encode a binary string.
215215
*
216-
* @param {string} input A string to encode.
216+
* @param {string} input A string to encode. Must not contain characters
217+
* outside of the Latin-1 range (i.e. charCode > 255).
217218
* @param {!goog.crypt.base64.Alphabet=} alphabet Base 64 alphabet to
218219
* use in encoding. Alphabet.DEFAULT is used by default.
219220
* @return {string} The base64 encoded string.
@@ -231,7 +232,28 @@ goog.crypt.base64.encodeString = function(input, alphabet) {
231232

232233

233234
/**
234-
* Base64-decode a string.
235+
* Base64-encode a text string. Non-ASCII characters (charCode > 127) will be
236+
* encoded as UTF-8.
237+
*
238+
* @param {string} input A string to encode.
239+
* @param {!goog.crypt.base64.Alphabet=} alphabet Base 64 alphabet to
240+
* use in encoding. Alphabet.DEFAULT is used by default.
241+
* @return {string} The base64 encoded string.
242+
*/
243+
goog.crypt.base64.encodeStringUtf8 = function(input, alphabet) {
244+
'use strict';
245+
// Shortcut for browsers that implement
246+
// a native base64 encoder in the form of "btoa/atob"
247+
if (goog.crypt.base64.HAS_NATIVE_ENCODE_ && !alphabet) {
248+
return goog.global.btoa(unescape(encodeURIComponent(input)));
249+
}
250+
return goog.crypt.base64.encodeByteArray(
251+
goog.crypt.stringToUtf8ByteArray(input), alphabet);
252+
};
253+
254+
255+
/**
256+
* Base64-decode a string into a binary bytestring.
235257
*
236258
* @param {string} input Input to decode. Any whitespace is ignored, and the
237259
* input maybe encoded with either supported alphabet (or a mix thereof).
@@ -258,6 +280,25 @@ goog.crypt.base64.decodeString = function(input, useCustomDecoder) {
258280
};
259281

260282

283+
/**
284+
* Base64-decode a string. The input should be the result of a double-encoding
285+
* a unicode string: first the unicode characters (>127) are encoded as UTF-8
286+
* bytes, and then the resulting bytes are base64-encoded.
287+
*
288+
* @param {string} input Input to decode. Any whitespace is ignored, and the
289+
* input maybe encoded with either supported alphabet (or a mix thereof).
290+
* @param {boolean=} useCustomDecoder True indicates the custom decoder is used,
291+
* which supports alternative alphabets. Note that passing false may still
292+
* use the custom decoder on browsers without native support.
293+
* @return {string} string representing the decoded value.
294+
*/
295+
goog.crypt.base64.decodeStringUtf8 = function(input, useCustomDecoder) {
296+
'use strict';
297+
return decodeURIComponent(
298+
escape(goog.crypt.base64.decodeString(input, useCustomDecoder)));
299+
};
300+
301+
261302
/**
262303
* Base64-decode a string to an Array of numbers.
263304
*

closure/goog/crypt/base64_test.js

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ const tests = [
2626

2727
// Testing non-ascii characters (1-10 in chinese)
2828
[
29-
'\xe4\xb8\x80\xe4\xba\x8c\xe4\xb8\x89\xe5\x9b\x9b\xe4\xba\x94\xe5' +
30-
'\x85\xad\xe4\xb8\x83\xe5\x85\xab\xe4\xb9\x9d\xe5\x8d\x81',
29+
{
30+
binary: '\xe4\xb8\x80\xe4\xba\x8c\xe4\xb8\x89\xe5\x9b\x9b\xe4\xba\x94' +
31+
'\xe5\x85\xad\xe4\xb8\x83\xe5\x85\xab\xe4\xb9\x9d\xe5\x8d\x81',
32+
text: '一二三四五六七八九十',
33+
},
3134
[
3235
'5LiA5LqM5LiJ5Zub5LqU5YWt5LiD5YWr5Lmd5Y2B',
3336
'5LiA5LqM5LiJ5Zub5LqU5YWt5LiD5YWr5Lmd5Y2B',
@@ -40,73 +43,100 @@ const tests = [
4043
// Testing for web-safe alphabets
4144
[
4245
'>>>???>>>???=/',
43-
['Pj4+Pz8/Pj4+Pz8/PS8=', 'Pj4+Pz8/Pj4+Pz8/PS8', 'Pj4-Pz8_Pj4-Pz8_PS8=', 'Pj4-Pz8_Pj4-Pz8_PS8.', 'Pj4-Pz8_Pj4-Pz8_PS8'],
46+
[
47+
'Pj4+Pz8/Pj4+Pz8/PS8=',
48+
'Pj4+Pz8/Pj4+Pz8/PS8',
49+
'Pj4-Pz8_Pj4-Pz8_PS8=',
50+
'Pj4-Pz8_Pj4-Pz8_PS8.',
51+
'Pj4-Pz8_Pj4-Pz8_PS8',
52+
],
4453
],
4554
];
4655
// clang-format on
4756

4857
/**
4958
* Asserts encodings
50-
* @param {string} input an input string.
59+
* @param {string|{binary: string, text: string}} input an input string.
5160
* @param {!Array<string>} expectedOutputs expected outputs in the order of
5261
* base64.Alphabet enum.
5362
*/
5463
function assertEncodings(input, expectedOutputs) {
55-
const arr = crypt.stringToByteArray(input);
64+
const {text, binary} =
65+
typeof input === 'string' ? {text: input, binary: input} : input;
66+
const arr = crypt.stringToByteArray(binary);
67+
68+
// quick validity test
69+
assertArrayEquals(arr, crypt.stringToUtf8ByteArray(text));
5670

5771
// encodeString
5872
for (const name in base64.Alphabet) {
5973
const alphabet = base64.Alphabet[name];
6074
assertEquals(
61-
base64.encodeString(input, alphabet), expectedOutputs[alphabet]);
75+
expectedOutputs[alphabet], base64.encodeStringUtf8(text, alphabet));
76+
assertEquals(
77+
expectedOutputs[alphabet], base64.encodeString(binary, alphabet));
6278
}
79+
// default case
80+
assertEquals(
81+
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeStringUtf8(text));
6382
assertEquals(
64-
base64.encodeString(input), // default case
65-
expectedOutputs[base64.Alphabet.DEFAULT]);
83+
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeString(binary));
6684

6785
// encodeByteArray with Array<number>
6886
for (const name in base64.Alphabet) {
6987
const alphabet = base64.Alphabet[name];
7088
assertEquals(
71-
base64.encodeByteArray(arr, alphabet), expectedOutputs[alphabet]);
89+
expectedOutputs[alphabet], base64.encodeByteArray(arr, alphabet));
7290
}
91+
// default case
7392
assertEquals(
74-
base64.encodeByteArray(arr), // default case
75-
expectedOutputs[base64.Alphabet.DEFAULT]);
93+
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeByteArray(arr));
7694

7795
// encodeByteArray with Uint8Array
7896
if (SUPPORT_TYPED_ARRAY) {
7997
const uint8Arr = new Uint8Array(arr);
8098
for (const name in base64.Alphabet) {
8199
const alphabet = base64.Alphabet[name];
82100
assertEquals(
83-
base64.encodeByteArray(uint8Arr, alphabet),
84-
expectedOutputs[alphabet]);
101+
expectedOutputs[alphabet],
102+
base64.encodeByteArray(uint8Arr, alphabet));
85103
}
104+
// default case
86105
assertEquals(
87-
base64.encodeByteArray(uint8Arr), // default case
88-
expectedOutputs[base64.Alphabet.DEFAULT]);
106+
expectedOutputs[base64.Alphabet.DEFAULT],
107+
base64.encodeByteArray(uint8Arr));
89108
}
90109
}
91110

92111
/**
93112
* Assert decodings
94113
* @param {!Array<string>} inputs input strings in various encodings.
95-
* @param {string} stringOutput expected output in string.
114+
* @param {string|{text: string, binary: string}} expectedOutput expected output
115+
* in string (optionally split out for text/binary).
96116
*/
97-
function assertDecodings(inputs, stringOutput) {
98-
const arrOutput = crypt.stringToByteArray(stringOutput);
117+
function assertDecodings(inputs, expectedOutput) {
118+
const textOutput =
119+
typeof expectedOutput === 'string' ? expectedOutput : expectedOutput.text;
120+
const binaryOutput = typeof expectedOutput === 'string' ?
121+
expectedOutput :
122+
expectedOutput.binary;
123+
const arrOutput = crypt.stringToByteArray(binaryOutput);
99124
const uint8ArrOutput = SUPPORT_TYPED_ARRAY ? new Uint8Array(arrOutput) : null;
100125

126+
// Quick validity check that decoding the text version is equivalent.
127+
assertArrayEquals(arrOutput, crypt.stringToUtf8ByteArray(textOutput));
128+
101129
for (let i = 0; i < inputs.length; i++) {
102130
const input = inputs[i];
103131

104132
// decodeString
105-
assertEquals(base64.decodeString(input, true), stringOutput);
133+
assertEquals(textOutput, base64.decodeStringUtf8(input, true));
134+
assertEquals(binaryOutput, base64.decodeString(input, true));
106135

107136
if (i === 0) {
108137
// For Alphabet.DEFAULT, test with native decoder too
109-
assertEquals(base64.decodeString(input), stringOutput);
138+
assertEquals(textOutput, base64.decodeStringUtf8(input));
139+
assertEquals(binaryOutput, base64.decodeString(input));
110140
}
111141

112142
// decodeStringToByteArray
@@ -163,7 +193,9 @@ testSuite({
163193
const decodedArr = crypt.stringToByteArray(decoded);
164194

165195
assertEquals(base64.decodeString(encoded), decoded); // native
196+
assertEquals(base64.decodeStringUtf8(encoded), decoded);
166197
assertEquals(base64.decodeString(encoded, true), decoded); // custom
198+
assertEquals(base64.decodeStringUtf8(encoded, true), decoded);
167199
assertArrayEquals(base64.decodeStringToByteArray(encoded), decodedArr);
168200

169201
if (SUPPORT_TYPED_ARRAY) {

0 commit comments

Comments
 (0)