From 157454d7c29721c643ebe05a78708157d94b656e Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Thu, 29 Jan 2026 19:13:33 +0800 Subject: [PATCH 1/7] Improve Unicode handling --- Objects/stringlib/unicode_format.h | 34 +++++++++------- Objects/unicode_formatter.c | 64 +++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index ff32db65b11a0b..f72937c72641ea 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -759,13 +759,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, return 2; } - -/* do the !r or !s conversion on obj */ static PyObject * do_conversion(PyObject *obj, Py_UCS4 conversion) { - /* XXX in pre-3.0, do we need to convert this to unicode, since it - might have returned a string? */ switch (conversion) { case 'r': return PyObject_Repr(obj); @@ -774,17 +770,25 @@ do_conversion(PyObject *obj, Py_UCS4 conversion) case 'a': return PyObject_ASCII(obj); default: - if (conversion > 32 && conversion < 127) { - /* It's the ASCII subrange; casting to char is safe - (assuming the execution character set is an ASCII - superset). */ - PyErr_Format(PyExc_ValueError, - "Unknown conversion specifier %c", - (char)conversion); - } else - PyErr_Format(PyExc_ValueError, - "Unknown conversion specifier \\x%x", - (unsigned int)conversion); + if (conversion == '\'') { + PyErr_SetString(PyExc_ValueError, + "Unknown conversion specifier \"'\""); + } + else if (conversion >= 32 && conversion < 127) { + PyErr_Format(PyExc_ValueError, + "Unknown conversion specifier '%c'", + (int)conversion); + } + else if (Py_UNICODE_ISPRINTABLE(conversion)) { + PyErr_Format(PyExc_ValueError, + "Unknown conversion specifier '%c' (U+%04X)", + (int)conversion, (int)conversion); + } + else { + PyErr_Format(PyExc_ValueError, + "Unknown conversion specifier U+%04X", + (int)conversion); + } return NULL; } } diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index b8604d1355940a..f083c49b93e117 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -243,46 +243,72 @@ _PyUnicode_InsertThousandsGrouping( } -/* Raises an exception about an unknown presentation type for this - * type. */ - static void unknown_presentation_type(Py_UCS4 presentation_type, const char* type_name) { - /* %c might be out-of-range, hence the two cases. */ - if (presentation_type > 32 && presentation_type < 128) + if (presentation_type == '\'') { + PyErr_Format(PyExc_ValueError, + "Unknown format code \"'\" " + "for object of type '%.200s'", + type_name); + } + else if (presentation_type >= 32 && presentation_type < 127) { PyErr_Format(PyExc_ValueError, "Unknown format code '%c' " "for object of type '%.200s'", - (char)presentation_type, + (int)presentation_type, type_name); - else + } + else if (Py_UNICODE_ISPRINTABLE(presentation_type)) { PyErr_Format(PyExc_ValueError, - "Unknown format code '\\x%x' " + "Unknown format code '%c' (U+%04X) " "for object of type '%.200s'", - (unsigned int)presentation_type, + (int)presentation_type, (int)presentation_type, type_name); + } + else { + PyErr_Format(PyExc_ValueError, + "Unknown format code U+%04X " + "for object of type '%.200s'", + (int)presentation_type, + type_name); + } } static void -invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type) +invalid_thousands_separator_type(char separator, Py_UCS4 presentation_type) { - assert(specifier == ',' || specifier == '_'); - if (presentation_type > 32 && presentation_type < 128) + assert(separator == ',' || separator == '_'); + if (presentation_type == '\'') { PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with '%c'.", - specifier, (char)presentation_type); - else + "Cannot specify '%c' with type code \"'\"", + separator); + } + else if (presentation_type >= 32 && presentation_type < 127) { + PyErr_Format(PyExc_ValueError, + "Cannot specify '%c' with type code '%c'", + separator, + (int)presentation_type); + } + else if (Py_UNICODE_ISPRINTABLE(presentation_type)) { PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with '\\x%x'.", - specifier, (unsigned int)presentation_type); + "Cannot specify '%c' with type code '%c' (U+%04X)", + separator, + (int)presentation_type, (int)presentation_type); + } + else { + PyErr_Format(PyExc_ValueError, + "Cannot specify '%c' with type code U+%04X", + separator, + (int)presentation_type); + } } static void invalid_comma_and_underscore(void) { - PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); + PyErr_SetString(PyExc_ValueError, "Cannot specify both ',' and '_'"); } /* @@ -547,7 +573,7 @@ parse_internal_render_format_spec(PyObject *obj, end-start); if (actual_format_spec != NULL) { PyErr_Format(PyExc_ValueError, - "Invalid format specifier '%U' for object of type '%.200s'", + "Invalid format specifier %R for object of type '%.200s'", actual_format_spec, Py_TYPE(obj)->tp_name); Py_DECREF(actual_format_spec); } From af8045fd214ed62dbd02f8c9c524c09493f8f33a Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Thu, 29 Jan 2026 19:50:32 +0800 Subject: [PATCH 2/7] Check type code before thousands separator; limit fractional part grouping separator --- Objects/unicode_formatter.c | 79 ++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index f083c49b93e117..a8abe71eca5263 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -280,29 +280,11 @@ static void invalid_thousands_separator_type(char separator, Py_UCS4 presentation_type) { assert(separator == ',' || separator == '_'); - if (presentation_type == '\'') { - PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with type code \"'\"", - separator); - } - else if (presentation_type >= 32 && presentation_type < 127) { - PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with type code '%c'", - separator, - (int)presentation_type); - } - else if (Py_UNICODE_ISPRINTABLE(presentation_type)) { - PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with type code '%c' (U+%04X)", - separator, - (int)presentation_type, (int)presentation_type); - } - else { - PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with type code U+%04X", - separator, - (int)presentation_type); - } + /* presentation_type has been checked before thousands separator. */ + assert(presentation_type >= 32 && presentation_type < 127); + PyErr_Format(PyExc_ValueError, + "Cannot specify '%c' with type code '%c'", + separator, (int)presentation_type); } static void @@ -589,7 +571,31 @@ parse_internal_render_format_spec(PyObject *obj, specifier. Do not take into account what type of formatting we're doing (int, float, string). */ - if (format->thousands_separators) { + switch (format->type) { + case 'b': + case 'c': + case 'd': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'n': + case 'o': + case 's': + case 'x': + case 'X': + case '%': + case '\0': + /* These are all valid types. */ + break; + default: + unknown_presentation_type(format->type, Py_TYPE(obj)->tp_name); + return 0; + } + + if (format->thousands_separators != LT_NO_LOCALE) { switch (format->type) { case 'd': case 'e': @@ -614,17 +620,28 @@ parse_internal_render_format_spec(PyObject *obj, } _Py_FALLTHROUGH; default: - invalid_thousands_separator_type(format->thousands_separators, format->type); + invalid_thousands_separator_type(format->thousands_separators, + format->type); return 0; } } - if (format->type == 'n' - && format->frac_thousands_separator != LT_NO_LOCALE) - { - invalid_thousands_separator_type(format->frac_thousands_separator, - format->type); - return 0; + if (format->frac_thousands_separator != LT_NO_LOCALE) { + switch (format->type) { + case 'e': + case 'f': + case 'g': + case 'E': + case 'G': + case '%': + case 'F': + case '\0': + break; + default: + invalid_thousands_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } } assert (format->align <= 127); From b6b0e29167059a496c8113e5cf90b26dbda8a35b Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Thu, 29 Jan 2026 20:13:31 +0800 Subject: [PATCH 3/7] Clarify which part is wrong --- Objects/unicode_formatter.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index a8abe71eca5263..8a550a709af011 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -287,6 +287,17 @@ invalid_thousands_separator_type(char separator, Py_UCS4 presentation_type) separator, (int)presentation_type); } +static void +invalid_fraction_separator_type(char separator, Py_UCS4 presentation_type) +{ + assert(separator == ',' || separator == '_'); + /* presentation_type has been checked before thousands separator. */ + assert(presentation_type >= 32 && presentation_type < 127); + PyErr_Format(PyExc_ValueError, + "Cannot specify '%c' in fractional part with type code '%c'", + separator, (int)presentation_type); +} + static void invalid_comma_and_underscore(void) { @@ -638,8 +649,8 @@ parse_internal_render_format_spec(PyObject *obj, case '\0': break; default: - invalid_thousands_separator_type(format->frac_thousands_separator, - format->type); + invalid_fraction_separator_type(format->frac_thousands_separator, + format->type); return 0; } } From 80673fb8d48d9e285115f361c338cfbee06c95ff Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Thu, 29 Jan 2026 20:22:11 +0800 Subject: [PATCH 4/7] Unify the type code switch --- Objects/unicode_formatter.c | 87 +++++++++++++++---------------------- 1 file changed, 35 insertions(+), 52 deletions(-) diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index 8a550a709af011..d1baa6e7ce2106 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -583,76 +583,59 @@ parse_internal_render_format_spec(PyObject *obj, we're doing (int, float, string). */ switch (format->type) { - case 'b': - case 'c': - case 'd': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': + case '%': + case '\0': + break; + case 'd': + if (format->frac_thousands_separator != LT_NO_LOCALE) { + invalid_fraction_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } + break; + case 'c': + case 's': case 'n': + if (format->thousands_separators != LT_NO_LOCALE) { + invalid_thousands_separator_type(format->thousands_separators, + format->type); + return 0; + } + if (format->frac_thousands_separator != LT_NO_LOCALE) { + invalid_fraction_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } + break; + case 'b': case 'o': - case 's': case 'x': case 'X': - case '%': - case '\0': - /* These are all valid types. */ - break; - default: - unknown_presentation_type(format->type, Py_TYPE(obj)->tp_name); - return 0; - } - - if (format->thousands_separators != LT_NO_LOCALE) { - switch (format->type) { - case 'd': - case 'e': - case 'f': - case 'g': - case 'E': - case 'G': - case '%': - case 'F': - case '\0': - /* These are allowed. See PEP 378.*/ - break; - case 'b': - case 'o': - case 'x': - case 'X': - /* Underscores are allowed in bin/oct/hex. See PEP 515. */ - if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { - /* Every four digits, not every three, in bin/oct/hex. */ - format->thousands_separators = LT_UNDER_FOUR_LOCALE; - break; - } - _Py_FALLTHROUGH; - default: + /* Underscores are allowed in bin/oct/hex. See PEP 515. */ + if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { + /* Every four digits, not every three, in bin/oct/hex. */ + format->thousands_separators = LT_UNDER_FOUR_LOCALE; + } + else if (format->thousands_separators != LT_NO_LOCALE) { invalid_thousands_separator_type(format->thousands_separators, format->type); return 0; } - } - - if (format->frac_thousands_separator != LT_NO_LOCALE) { - switch (format->type) { - case 'e': - case 'f': - case 'g': - case 'E': - case 'G': - case '%': - case 'F': - case '\0': - break; - default: + if (format->frac_thousands_separator != LT_NO_LOCALE) { invalid_fraction_separator_type(format->frac_thousands_separator, format->type); return 0; } + break; + default: + unknown_presentation_type(format->type, Py_TYPE(obj)->tp_name); + return 0; } assert (format->align <= 127); From e4d8bc99a4f46c5c05c1fe9c29ee587d62575468 Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Thu, 29 Jan 2026 21:49:24 +0800 Subject: [PATCH 5/7] Pass current tests --- Lib/test/test_format.py | 22 +++++++++++++++++----- Lib/test/test_fstring.py | 18 +++++++++++++----- Objects/unicode_formatter.c | 16 +++++++++++++--- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 00f1ab44b0a8fa..d459bfa33922b0 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -661,26 +661,38 @@ def test_g_format_has_no_trailing_zeros(self): self.assertEqual(format(12300050.0, "#.6g"), "1.23000e+07") def test_with_two_commas_in_format_specifier(self): - error_msg = re.escape("Cannot specify ',' with ','.") + error_msg = re.escape("Cannot specify grouping ',' more than once") with self.assertRaisesRegex(ValueError, error_msg): '{:,,}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.,,}'.format(1.1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.,,f}'.format(1.1) def test_with_two_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify '_' with '_'.") + error_msg = re.escape("Cannot specify grouping '_' more than once") with self.assertRaisesRegex(ValueError, error_msg): '{:__}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.__}'.format(1.1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.__f}'.format(1.1) - def test_with_a_commas_and_an_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify both ',' and '_'.") + def test_with_a_comma_and_an_underscore_in_format_specifier(self): + error_msg = re.escape("Cannot specify both ',' and '_'") with self.assertRaisesRegex(ValueError, error_msg): '{:,_}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:.,_}'.format(1.1) with self.assertRaisesRegex(ValueError, error_msg): '{:.,_f}'.format(1.1) def test_with_an_underscore_and_a_comma_in_format_specifier(self): - error_msg = re.escape("Cannot specify both ',' and '_'.") + error_msg = re.escape("Cannot specify both ',' and '_'") with self.assertRaisesRegex(ValueError, error_msg): '{:_,}'.format(1) + with self.assertRaisesRegex(ValueError, error_msg): + '{:._,}'.format(1.1) with self.assertRaisesRegex(ValueError, error_msg): '{:._,f}'.format(1.1) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 05d0cbd2445c4c..a86cdf66b94ef8 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1697,24 +1697,32 @@ def test_invalid_syntax_error_message(self): compile("f'{a $ b}'", "?", "exec") def test_with_two_commas_in_format_specifier(self): - error_msg = re.escape("Cannot specify ',' with ','.") + error_msg = re.escape("Cannot specify grouping ',' more than once") with self.assertRaisesRegex(ValueError, error_msg): f'{1:,,}' + with self.assertRaisesRegex(ValueError, error_msg): + f'{1.1:.,,}' def test_with_two_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify '_' with '_'.") + error_msg = re.escape("Cannot specify grouping '_' more than once") with self.assertRaisesRegex(ValueError, error_msg): f'{1:__}' + with self.assertRaisesRegex(ValueError, error_msg): + f'{1.1:.__}' - def test_with_a_commas_and_an_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify both ',' and '_'.") + def test_with_a_comma_and_an_underscore_in_format_specifier(self): + error_msg = re.escape("Cannot specify both ',' and '_'") with self.assertRaisesRegex(ValueError, error_msg): f'{1:,_}' + with self.assertRaisesRegex(ValueError, error_msg): + f'{1.1:.,_}' def test_with_an_underscore_and_a_comma_in_format_specifier(self): - error_msg = re.escape("Cannot specify both ',' and '_'.") + error_msg = re.escape("Cannot specify both ',' and '_'") with self.assertRaisesRegex(ValueError, error_msg): f'{1:_,}' + with self.assertRaisesRegex(ValueError, error_msg): + f'{1.1:._,}' def test_syntax_error_for_starred_expressions(self): with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"): diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index d1baa6e7ce2106..dbb92377901743 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -283,7 +283,7 @@ invalid_thousands_separator_type(char separator, Py_UCS4 presentation_type) /* presentation_type has been checked before thousands separator. */ assert(presentation_type >= 32 && presentation_type < 127); PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' with type code '%c'", + "Cannot specify '%c' with '%c'", separator, (int)presentation_type); } @@ -291,10 +291,10 @@ static void invalid_fraction_separator_type(char separator, Py_UCS4 presentation_type) { assert(separator == ',' || separator == '_'); - /* presentation_type has been checked before thousands separator. */ + /* presentation_type has been checked before fraction separator. */ assert(presentation_type >= 32 && presentation_type < 127); PyErr_Format(PyExc_ValueError, - "Cannot specify '%c' in fractional part with type code '%c'", + "Cannot specify '%c' in fractional part with '%c'", separator, (int)presentation_type); } @@ -551,7 +551,17 @@ parse_internal_render_format_spec(PyObject *obj, "Format specifier missing precision"); return 0; } + } + if (end-pos) { + Py_UCS4 next = READ_spec(pos); + if (next == ',' || next == '_') { + /* Expect type, got another grouping character */ + PyErr_Format(PyExc_ValueError, + "Cannot specify grouping '%c' more than once", + next); + return 0; + } } /* Finally, parse the type field. */ From 943d1296bd5c5c3d7cc1fd4fd324116899236a3f Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Fri, 30 Jan 2026 01:40:13 +0800 Subject: [PATCH 6/7] Add new tests --- Lib/test/test_format.py | 173 ++++++++++++++++++++++++++++++------ Lib/test/test_fstring.py | 6 +- Lib/test/test_str.py | 25 +++++- Objects/unicode_formatter.c | 3 +- 4 files changed, 178 insertions(+), 29 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index d459bfa33922b0..85fa8ca374fa6d 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -661,7 +661,8 @@ def test_g_format_has_no_trailing_zeros(self): self.assertEqual(format(12300050.0, "#.6g"), "1.23000e+07") def test_with_two_commas_in_format_specifier(self): - error_msg = re.escape("Cannot specify grouping ',' more than once") + error_msg = re.escape( + "Cannot specify grouping character ',' more than once") with self.assertRaisesRegex(ValueError, error_msg): '{:,,}'.format(1) with self.assertRaisesRegex(ValueError, error_msg): @@ -670,7 +671,8 @@ def test_with_two_commas_in_format_specifier(self): '{:.,,f}'.format(1.1) def test_with_two_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify grouping '_' more than once") + error_msg = re.escape( + "Cannot specify grouping character '_' more than once") with self.assertRaisesRegex(ValueError, error_msg): '{:__}'.format(1) with self.assertRaisesRegex(ValueError, error_msg): @@ -696,32 +698,153 @@ def test_with_an_underscore_and_a_comma_in_format_specifier(self): with self.assertRaisesRegex(ValueError, error_msg): '{:._,f}'.format(1.1) - def test_better_error_message_format(self): + def test_invalid_format_specifier_error_message(self): # https://bugs.python.org/issue20524 for value in [12j, 12, 12.0, "12"]: - with self.subTest(value=value): + for bad_spec in ["%M", "ЫйXЯЧ", "\n'\\"]: # The format spec must be invalid for all types we're testing. - # '%M' will suffice. - bad_format_spec = '%M' - err = re.escape("Invalid format specifier " - f"'{bad_format_spec}' for object of type " - f"'{type(value).__name__}'") - with self.assertRaisesRegex(ValueError, err): - f"xx{{value:{bad_format_spec}}}yy".format(value=value) - - # Also test the builtin format() function. - with self.assertRaisesRegex(ValueError, err): - format(value, bad_format_spec) - - # Also test f-strings. - with self.assertRaisesRegex(ValueError, err): - eval("f'xx{value:{bad_format_spec}}yy'") - - def test_unicode_in_error_message(self): - str_err = re.escape( - "Invalid format specifier '%ЫйЯЧ' for object of type 'str'") - with self.assertRaisesRegex(ValueError, str_err): - "{a:%ЫйЯЧ}".format(a='a') + with self.subTest(value=value, bad_spec=bad_spec): + err = re.escape("Invalid format specifier " + f"{bad_spec!r} for object of type " + f"'{type(value).__name__}'") + with self.assertRaisesRegex(ValueError, err): + f"xx{{value:{bad_spec}}}yy".format(value=value) + + # Also test the builtin format() function. + with self.assertRaisesRegex(ValueError, err): + format(value, bad_spec) + + # Also test f-strings. + with self.assertRaisesRegex(ValueError, err): + eval("f'xx{value:{bad_spec}}yy'") + + def test_invalid_specifier_type_error_message(self): + for value in [12j, 12, 12.0, "12"]: + for bad_spec, repr in [ + ("M", "'M'"), + ("10$", "'$'"), + ("\t", "U+0009"), + (",\x7f", "U+007F"), + ("о", "'о' (U+043E)"), + ("+#020,🐍", "'🐍' (U+1F40D)") + ]: + with self.subTest(value=value, bad_spec=bad_spec): + err = re.escape("Unknown format code " + f"{repr} for object of type " + f"'{type(value).__name__}'") + with self.assertRaisesRegex(ValueError, err): + f"xx{{value:{bad_spec}}}yy".format(value=value) + + # Also test the builtin format() function. + with self.assertRaisesRegex(ValueError, err): + format(value, bad_spec) + + # Also test f-strings. + with self.assertRaisesRegex(ValueError, err): + eval("f'xx{value:{bad_spec}}yy'") + + def test_specifier_grouping_with_types(self): + def assertEqualGroup(spec, value, expected): + with self.subTest(spec=spec, value=value): + self.assertEqual(("{:%s}" % spec).format(value), expected) + self.assertEqual(format(value, spec), expected) + self.assertEqual(f"{value:{spec}}", expected) + + def assertRaisesGroup(spec, value, error_msg): + with self.subTest(spec=spec, value=value): + error_msg = re.escape(error_msg) + with self.assertRaisesRegex(ValueError, error_msg): + ("{:%s}" % spec).format(value) + with self.assertRaisesRegex(ValueError, error_msg): + format(value, spec) + with self.assertRaisesRegex(ValueError, error_msg): + f"{value:{spec}}" + + value = 1234567 + assertEqualGroup(",", value, "1,234,567") + assertRaisesGroup("._", value, + "Cannot specify '_' in fractional part with 'd'") + assertEqualGroup(",d", value, "1,234,567") + assertRaisesGroup("._d", value, + "Cannot specify '_' in fractional part with 'd'") + assertEqualGroup(",e", value, "1.234567e+06") + assertEqualGroup("._e", value, "1.234_567e+06") + assertRaisesGroup(",b", value, "Cannot specify ',' with 'b'") + assertEqualGroup("_b", 1234, "100_1101_0010") + assertRaisesGroup("._b", value, + "Cannot specify '_' in fractional part with 'b'") + assertRaisesGroup(",s", value, "Cannot specify ',' with 's'") + assertRaisesGroup("._s", value, + "Cannot specify '_' in fractional part with 's'") + assertRaisesGroup(",n", value, "Cannot specify ',' with 'n'") + assertRaisesGroup("._n", value, + "Cannot specify '_' in fractional part with 'n'") + + value = 1234567.1234567 + assertEqualGroup(",", value, "1,234,567.1234567") + assertEqualGroup("._", value, "1234567.123_456_7") + assertRaisesGroup(",d", value, + "Unknown format code 'd' for object of type 'float'") + assertRaisesGroup("._d", value, + "Cannot specify '_' in fractional part with 'd'") + assertEqualGroup(",e", value, "1.234567e+06") + assertEqualGroup("._e", value, "1.234_567e+06") + assertRaisesGroup(",b", value, "Cannot specify ',' with 'b'") + assertRaisesGroup("_b", value, + "Unknown format code 'b' for object of type 'float'") + assertRaisesGroup("._b", value, + "Cannot specify '_' in fractional part with 'b'") + assertRaisesGroup(",s", value, "Cannot specify ',' with 's'") + assertRaisesGroup("._s", value, + "Cannot specify '_' in fractional part with 's'") + assertRaisesGroup(",n", value, "Cannot specify ',' with 'n'") + assertRaisesGroup("._n", value, + "Cannot specify '_' in fractional part with 'n'") + + value = 1234567.1234567+1234567.1234567j + assertEqualGroup(",", value, "(1,234,567.1234567+1,234,567.1234567j)") + assertEqualGroup("._", value, "(1234567.123_456_7+1234567.123_456_7j)") + assertRaisesGroup(",d", value, + "Unknown format code 'd' for object of type 'complex'") + assertRaisesGroup("._d", value, + "Cannot specify '_' in fractional part with 'd'") + assertEqualGroup(",e", value, "1.234567e+06+1.234567e+06j") + assertEqualGroup("._e", value, "1.234_567e+06+1.234_567e+06j") + assertRaisesGroup(",b", value, "Cannot specify ',' with 'b'") + assertRaisesGroup("_b", value, + "Unknown format code 'b' for object of type 'complex'") + assertRaisesGroup("._b", value, + "Cannot specify '_' in fractional part with 'b'") + assertRaisesGroup(",s", value, "Cannot specify ',' with 's'") + assertRaisesGroup("._s", value, + "Cannot specify '_' in fractional part with 's'") + assertRaisesGroup(",n", value, "Cannot specify ',' with 'n'") + assertRaisesGroup("._n", value, + "Cannot specify '_' in fractional part with 'n'") + + value = "1234567" + assertRaisesGroup(",", value, "Cannot specify ',' with 's'") + assertRaisesGroup("._", value, + "Cannot specify '_' in fractional part with 's'") + assertRaisesGroup(",d", value, + "Unknown format code 'd' for object of type 'str'") + assertRaisesGroup("._d", value, + "Cannot specify '_' in fractional part with 'd'") + assertRaisesGroup(",e", value, + "Unknown format code 'e' for object of type 'str'") + assertRaisesGroup("._e", value, + "Unknown format code 'e' for object of type 'str'") + assertRaisesGroup(",b", value, "Cannot specify ',' with 'b'") + assertRaisesGroup("_b", value, + "Unknown format code 'b' for object of type 'str'") + assertRaisesGroup("._b", value, + "Cannot specify '_' in fractional part with 'b'") + assertRaisesGroup(",s", value, "Cannot specify ',' with 's'") + assertRaisesGroup("._s", value, + "Cannot specify '_' in fractional part with 's'") + assertRaisesGroup(",n", value, "Cannot specify ',' with 'n'") + assertRaisesGroup("._n", value, + "Cannot specify '_' in fractional part with 'n'") def test_negative_zero(self): ## default behavior diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index a86cdf66b94ef8..90f79869f6bf08 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1697,14 +1697,16 @@ def test_invalid_syntax_error_message(self): compile("f'{a $ b}'", "?", "exec") def test_with_two_commas_in_format_specifier(self): - error_msg = re.escape("Cannot specify grouping ',' more than once") + error_msg = re.escape( + "Cannot specify grouping character ',' more than once") with self.assertRaisesRegex(ValueError, error_msg): f'{1:,,}' with self.assertRaisesRegex(ValueError, error_msg): f'{1.1:.,,}' def test_with_two_underscore_in_format_specifier(self): - error_msg = re.escape("Cannot specify grouping '_' more than once") + error_msg = re.escape( + "Cannot specify grouping character '_' more than once") with self.assertRaisesRegex(ValueError, error_msg): f'{1:__}' with self.assertRaisesRegex(ValueError, error_msg): diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 0a8dddb026f6c8..a821667ee25da8 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1262,6 +1262,14 @@ def __repr__(self): self.assertEqual('{0!a}'.format(F('Hello')), 'F(Hello)') self.assertEqual('{0!a}'.format(F('\u0374')), 'F(\\u0374)') + self.assertEqual('{0:10.10}'.format(1.25), ' 1.25') + self.assertEqual('{0!s:10.10}'.format(1.25), '1.25 ') + self.assertEqual('{0!r:10.10}'.format(1.25), '1.25 ') + self.assertEqual('{0!a:10.10}'.format(1.25), '1.25 ') + + # Not a conversion, but show that ! is allowed in a format spec. + self.assertEqual('{0:!<10.10}'.format(3.14), '3.14!!!!!!') + # test fallback to object.__format__ self.assertEqual('{0}'.format({}), '{}') self.assertEqual('{0}'.format([]), '[]') @@ -1320,9 +1328,24 @@ def __repr__(self): self.assertRaises(ValueError, "{0}}".format, 0) self.assertRaises(KeyError, "{foo}".format, bar=3) self.assertRaises(ValueError, "{0!x}".format, 3) + self.assertRaises(ValueError, '{0!A}'.format, 3) + self.assertRaises(ValueError, '{0!G}'.format, 3) + self.assertRaises(ValueError, '{0!ä}'.format, 3) + self.assertRaises(ValueError, '{0!ɐ}'.format, 3) + self.assertRaises(ValueError, '{0!3}'.format, 3) + self.assertRaises(ValueError, '{0!!}'.format, 3) self.assertRaises(ValueError, "{0!}".format, 0) + self.assertRaises(ValueError, "{0!s }".format, 0) + self.assertRaises(ValueError, "{0!s :10}".format, 0) + self.assertRaises(ValueError, '{0! s}'.format, 0) + self.assertRaises(ValueError, '{0! s }'.format, 0) + self.assertRaises(ValueError, '{0!ss}'.format, 0) self.assertRaises(ValueError, "{0!rs}".format, 0) - self.assertRaises(ValueError, "{!}".format) + self.assertRaises(ValueError, '{0!rs:}'.format, 0) + self.assertRaises(ValueError, '{0!rs:s}'.format, 0) + self.assertRaises(ValueError, "{!}".format, 0) + self.assertRaises(ValueError, "{!:}".format, 0) + self.assertRaises(ValueError, "{!:8}".format, 0) self.assertRaises(IndexError, "{:}".format) self.assertRaises(IndexError, "{:s}".format) self.assertRaises(IndexError, "{}".format) diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index dbb92377901743..f33893abedfb51 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -558,7 +558,8 @@ parse_internal_render_format_spec(PyObject *obj, if (next == ',' || next == '_') { /* Expect type, got another grouping character */ PyErr_Format(PyExc_ValueError, - "Cannot specify grouping '%c' more than once", + "Cannot specify grouping character '%c' " + "more than once", next); return 0; } From 78a767793d8ca811156d5582bc182525742a8953 Mon Sep 17 00:00:00 2001 From: Lil-Ran Date: Fri, 30 Jan 2026 02:09:00 +0800 Subject: [PATCH 7/7] blurb --- .../2026-01-30-02-08-34.gh-issue-144325.O3GZ80.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-30-02-08-34.gh-issue-144325.O3GZ80.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-30-02-08-34.gh-issue-144325.O3GZ80.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-30-02-08-34.gh-issue-144325.O3GZ80.rst new file mode 100644 index 00000000000000..edaec6619a05ca --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-30-02-08-34.gh-issue-144325.O3GZ80.rst @@ -0,0 +1,5 @@ +Improves error messages for ``{}``-style formatters for ``str``, ``float``, +``int``, and ``complex``. Make error messages handle Unicode characters +properly. Make grouping characters ``,`` and ``_`` in fractional part only +allowed for floating-point presentation types (``e``, ``f``, ``g``, ``E``, +``G``, ``%``, and ``F``).