Skip to content

Commit 79f767a

Browse files
gh-144001: Support ignoring the invalid pad character in Base64 decoding
1 parent 9b154ab commit 79f767a

File tree

5 files changed

+113
-61
lines changed

5 files changed

+113
-61
lines changed

Doc/library/base64.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ POST request.
8787

8888
If *ignorechars* is specified, it should be a :term:`bytes-like object`
8989
containing characters to ignore from the input when *validate* is true.
90+
If *ignorechars* contains the pad character ``'='``, the pad character
91+
presented before the end of the encoded data and the excess pad characters
92+
will be ignored.
9093
The default value of *validate* is ``True`` if *ignorechars* is specified,
9194
``False`` otherwise.
9295

Doc/library/binascii.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ The :mod:`binascii` module defines the following functions:
5656

5757
If *ignorechars* is specified, it should be a :term:`bytes-like object`
5858
containing characters to ignore from the input when *strict_mode* is true.
59+
If *ignorechars* contains the pad character ``'='``, the pad character
60+
presented before the end of the encoded data and the excess pad characters
61+
will be ignored.
5962
The default value of *strict_mode* is ``True`` if *ignorechars* is specified,
6063
``False`` otherwise.
6164

Lib/test/test_base64.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,14 +305,20 @@ def test_b64decode_invalid_chars(self):
305305
# issue 1466065: Test some invalid characters.
306306
tests = ((b'%3d==', b'\xdd', b'%$'),
307307
(b'$3d==', b'\xdd', b'%$'),
308-
(b'[==', b'', None),
308+
(b'[==', b'', b'[='),
309309
(b'YW]3=', b'am', b']'),
310310
(b'3{d==', b'\xdd', b'{}'),
311311
(b'3d}==', b'\xdd', b'{}'),
312312
(b'@@', b'', b'@!'),
313313
(b'!', b'', b'@!'),
314314
(b"YWJj\n", b"abc", b'\n'),
315315
(b'YWJj\nYWI=', b'abcab', b'\n'),
316+
(b'=YWJj', b'abc', b'='),
317+
(b'Y=WJj', b'abc', b'='),
318+
(b'Y==WJj', b'abc', b'='),
319+
(b'Y===WJj', b'abc', b'='),
320+
(b'YW=Jj', b'abc', b'='),
321+
(b'YWJj=', b'abc', b'='),
316322
(b'YW\nJj', b'abc', b'\n'),
317323
(b'YW\nJj', b'abc', bytearray(b'\n')),
318324
(b'YW\nJj', b'abc', memoryview(b'\n')),
@@ -334,9 +340,8 @@ def test_b64decode_invalid_chars(self):
334340
with self.assertRaises(binascii.Error):
335341
# Even empty ignorechars enables the strict mode.
336342
base64.b64decode(bstr, ignorechars=b'')
337-
if ignorechars is not None:
338-
r = base64.b64decode(bstr, ignorechars=ignorechars)
339-
self.assertEqual(r, res)
343+
r = base64.b64decode(bstr, ignorechars=ignorechars)
344+
self.assertEqual(r, res)
340345

341346
with self.assertRaises(TypeError):
342347
base64.b64decode(b'', ignorechars='')

Lib/test/test_binascii.py

Lines changed: 83 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -118,66 +118,77 @@ def addnoise(line):
118118
# empty strings. TBD: shouldn't it raise an exception instead ?
119119
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
120120

121-
def test_base64_strict_mode(self):
122-
# Test base64 with strict mode on
123-
def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
121+
def test_base64_bad_padding(self):
122+
# Test malformed padding
123+
def _assertRegexTemplate(assert_regex, data, non_strict_mode_expected_result):
124+
data = self.type2test(data)
124125
with self.assertRaisesRegex(binascii.Error, assert_regex):
125-
binascii.a2b_base64(self.type2test(data), strict_mode=True)
126-
self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
126+
binascii.a2b_base64(data, strict_mode=True)
127+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
127128
non_strict_mode_expected_result)
128-
self.assertEqual(binascii.a2b_base64(self.type2test(data)),
129+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
130+
ignorechars=b'='),
131+
non_strict_mode_expected_result)
132+
self.assertEqual(binascii.a2b_base64(data),
129133
non_strict_mode_expected_result)
130134

131-
def assertExcessData(data, non_strict_mode_expected_result: bytes):
132-
_assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
133-
134-
def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
135-
_assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
135+
def assertLeadingPadding(*args):
136+
_assertRegexTemplate(r'(?i)Leading padding', *args)
136137

137-
def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
138-
_assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
138+
def assertDiscontinuousPadding(*args):
139+
_assertRegexTemplate(r'(?i)Discontinuous padding', *args)
139140

140-
def assertDiscontinuousPadding(data, non_strict_mode_expected_result: bytes):
141-
_assertRegexTemplate(r'(?i)Discontinuous padding', data, non_strict_mode_expected_result)
141+
def assertExcessPadding(*args):
142+
_assertRegexTemplate(r'(?i)Excess padding', *args)
142143

143-
def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
144-
_assertRegexTemplate(r'(?i)Excess padding', data, non_strict_mode_expected_result)
144+
def assertInvalidLength(*args):
145+
_assertRegexTemplate(r'(?i)Invalid.+number of data characters', *args)
145146

146-
# Test excess data exceptions
147-
assertExcessData(b'ab==a', b'i')
148147
assertExcessPadding(b'ab===', b'i')
149148
assertExcessPadding(b'ab====', b'i')
150-
assertNonBase64Data(b'ab==:', b'i')
151-
assertExcessData(b'abc=a', b'i\xb7')
152-
assertNonBase64Data(b'abc=:', b'i\xb7')
153-
assertNonBase64Data(b'ab==\n', b'i')
154149
assertExcessPadding(b'abc==', b'i\xb7')
155150
assertExcessPadding(b'abc===', b'i\xb7')
156151
assertExcessPadding(b'abc====', b'i\xb7')
157152
assertExcessPadding(b'abc=====', b'i\xb7')
158153

159-
# Test non-base64 data exceptions
160-
assertNonBase64Data(b'\nab==', b'i')
161-
assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
162-
assertNonBase64Data(b'a\nb==', b'i')
163-
assertNonBase64Data(b'a\x00b==', b'i')
164-
165-
# Test malformed padding
166154
assertLeadingPadding(b'=', b'')
167155
assertLeadingPadding(b'==', b'')
168156
assertLeadingPadding(b'===', b'')
169157
assertLeadingPadding(b'====', b'')
170158
assertLeadingPadding(b'=====', b'')
159+
assertLeadingPadding(b'=abcd', b'i\xb7\x1d')
160+
assertLeadingPadding(b'==abcd', b'i\xb7\x1d')
161+
assertLeadingPadding(b'===abcd', b'i\xb7\x1d')
162+
assertLeadingPadding(b'====abcd', b'i\xb7\x1d')
163+
assertLeadingPadding(b'=====abcd', b'i\xb7\x1d')
164+
165+
assertInvalidLength(b'a=b==', b'i')
166+
assertInvalidLength(b'a=bc=', b'i\xb7')
167+
assertInvalidLength(b'a=bc==', b'i\xb7')
168+
assertInvalidLength(b'a=bcd', b'i\xb7\x1d')
169+
assertInvalidLength(b'a=bcd=', b'i\xb7\x1d')
170+
171171
assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
172-
assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
173-
assertNonBase64Data(b'ab=:=', b'i')
172+
assertDiscontinuousPadding(b'ab=cd', b'i\xb7\x1d')
173+
assertDiscontinuousPadding(b'ab=cd==', b'i\xb7\x1d')
174+
174175
assertExcessPadding(b'abcd=', b'i\xb7\x1d')
175176
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
176177
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
177178
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
178179
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
180+
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
181+
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
182+
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
183+
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
184+
assertExcessPadding(b'abcd=efgh', b'i\xb7\x1dy\xf8!')
185+
assertExcessPadding(b'abcd==efgh', b'i\xb7\x1dy\xf8!')
186+
assertExcessPadding(b'abcd===efgh', b'i\xb7\x1dy\xf8!')
187+
assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!')
188+
assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!')
179189

180190
def test_base64_invalidchars(self):
191+
# Test non-base64 data exceptions
181192
def assertNonBase64Data(data, expected, ignorechars):
182193
data = self.type2test(data)
183194
assert_regex = r'(?i)Only base64 data'
@@ -195,10 +206,11 @@ def assertNonBase64Data(data, expected, ignorechars):
195206
assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
196207
assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
197208
assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
209+
assertNonBase64Data(b'ab:==', b'i', ignorechars=b':')
210+
assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
198211
assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
199212
assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
200213
assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
201-
assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
202214
assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
203215
assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
204216

@@ -210,36 +222,67 @@ def assertNonBase64Data(data, expected, ignorechars):
210222
with self.assertRaises(TypeError):
211223
binascii.a2b_base64(data, ignorechars=None)
212224

225+
def test_base64_excess_data(self):
226+
# Test excess data exceptions
227+
def assertExcessData(data, non_strict_expected, ignore_padchar_expected=None):
228+
assert_regex = r'(?i)Excess data'
229+
data = self.type2test(data)
230+
with self.assertRaisesRegex(binascii.Error, assert_regex):
231+
binascii.a2b_base64(data, strict_mode=True)
232+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
233+
non_strict_expected)
234+
if ignore_padchar_expected is not None:
235+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
236+
ignorechars=b'='),
237+
ignore_padchar_expected)
238+
self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
239+
240+
assertExcessData(b'ab==c', b'i')
241+
assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
242+
assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
243+
213244
def test_base64errors(self):
214245
# Test base64 with invalid padding
215-
def assertIncorrectPadding(data):
246+
def assertIncorrectPadding(data, strict_mode=True):
247+
data = self.type2test(data)
216248
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
217-
binascii.a2b_base64(self.type2test(data))
249+
binascii.a2b_base64(data)
250+
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
251+
binascii.a2b_base64(data, strict_mode=False)
252+
if strict_mode:
253+
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
254+
binascii.a2b_base64(data, strict_mode=True)
218255

219256
assertIncorrectPadding(b'ab')
220257
assertIncorrectPadding(b'ab=')
221258
assertIncorrectPadding(b'abc')
222259
assertIncorrectPadding(b'abcdef')
223260
assertIncorrectPadding(b'abcdef=')
224261
assertIncorrectPadding(b'abcdefg')
225-
assertIncorrectPadding(b'a=b=')
226-
assertIncorrectPadding(b'a\nb=')
262+
assertIncorrectPadding(b'a=b=', strict_mode=False)
263+
assertIncorrectPadding(b'a\nb=', strict_mode=False)
227264

228265
# Test base64 with invalid number of valid characters (1 mod 4)
229-
def assertInvalidLength(data):
266+
def assertInvalidLength(data, strict_mode=True):
230267
n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data))
268+
data = self.type2test(data)
231269
expected_errmsg_re = \
232270
r'(?i)Invalid.+number of data characters.+' + str(n_data_chars)
233271
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
234-
binascii.a2b_base64(self.type2test(data))
272+
binascii.a2b_base64(data)
273+
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
274+
binascii.a2b_base64(data, strict_mode=False)
275+
if strict_mode:
276+
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
277+
binascii.a2b_base64(data, strict_mode=True)
235278

236279
assertInvalidLength(b'a')
237280
assertInvalidLength(b'a=')
238281
assertInvalidLength(b'a==')
239282
assertInvalidLength(b'a===')
240283
assertInvalidLength(b'a' * 5)
241284
assertInvalidLength(b'a' * (4 * 87 + 1))
242-
assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters
285+
assertInvalidLength(b'A\tB\nC ??DE', strict_mode=False) # only 5 valid characters
243286

244287
def test_uu(self):
245288
MAX_UU = 45

Modules/binascii.c

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -546,26 +546,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
546546
pads++;
547547

548548
if (strict_mode) {
549-
if (quad_pos == 0) {
550-
state = get_binascii_state(module);
551-
if (state) {
552-
PyErr_SetString(state->Error, (i == 0)
553-
? "Leading padding not allowed"
554-
: "Excess padding not allowed");
555-
}
556-
goto error_end;
549+
if (quad_pos >= 2 && quad_pos + pads <= 4) {
550+
continue;
551+
}
552+
if (ignorechar(BASE64_PAD, ignorechars)) {
553+
continue;
557554
}
558555
if (quad_pos == 1) {
559556
/* Set an error below. */
560557
break;
561558
}
562-
if (quad_pos + pads > 4) {
563-
state = get_binascii_state(module);
564-
if (state) {
565-
PyErr_SetString(state->Error, "Excess padding not allowed");
566-
}
567-
goto error_end;
559+
state = get_binascii_state(module);
560+
if (state) {
561+
PyErr_SetString(state->Error,
562+
(quad_pos == 0 && i == 0)
563+
? "Leading padding not allowed"
564+
: "Excess padding not allowed");
568565
}
566+
goto error_end;
569567
}
570568
else {
571569
if (quad_pos >= 2 && quad_pos + pads >= 4) {
@@ -574,8 +572,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
574572
*/
575573
goto done;
576574
}
575+
continue;
577576
}
578-
continue;
579577
}
580578

581579
unsigned char v = table_a2b_base64[this_ch];
@@ -591,7 +589,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
591589
}
592590

593591
// Characters that are not '=', in the middle of the padding, are not allowed
594-
if (strict_mode && pads) {
592+
if (pads && strict_mode && !ignorechar(BASE64_PAD, ignorechars)) {
595593
state = get_binascii_state(module);
596594
if (state) {
597595
PyErr_SetString(state->Error, (quad_pos + pads == 4)
@@ -642,7 +640,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
642640
goto error_end;
643641
}
644642

645-
if (quad_pos != 0 && quad_pos + pads != 4) {
643+
if (quad_pos != 0 && quad_pos + pads < 4) {
646644
state = get_binascii_state(module);
647645
if (state) {
648646
PyErr_SetString(state->Error, "Incorrect padding");

0 commit comments

Comments
 (0)