Skip to content

Commit 9526211

Browse files
authored
Serialize raw errors in cache metas (#20372)
Fixes #20353 This makes us respect e.g. `--output json` for cached files without re-checking the files (which is the desired behavior for users, see issue). This is also a first step towards resolving the "foo defined here" conundrum for parallel checking. The fix is straightforward. The only question was whether to continue using `ErrorTuple`s or switch to a proper class. I decided to keep the tuples for now to minimize the scope of change. Note I am also adjusting generic "JSON" fixed-format helpers to natively support tuples (unlike real JSON). We already use tuples in few other places, so it makes sense to just make it "official" (this format is still internal to mypy obviously).
1 parent fefc070 commit 9526211

File tree

4 files changed

+129
-32
lines changed

4 files changed

+129
-32
lines changed

mypy/build.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@
4343
from librt.internal import cache_version
4444

4545
import mypy.semanal_main
46-
from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer, write_json
46+
from mypy.cache import (
47+
CACHE_VERSION,
48+
CacheMeta,
49+
ReadBuffer,
50+
SerializedError,
51+
WriteBuffer,
52+
write_json,
53+
)
4754
from mypy.checker import TypeChecker
4855
from mypy.defaults import (
4956
WORKER_CONNECTION_TIMEOUT,
@@ -52,7 +59,7 @@
5259
WORKER_START_TIMEOUT,
5360
)
5461
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
55-
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
62+
from mypy.errors import CompileError, ErrorInfo, Errors, ErrorTuple, report_internal_error
5663
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
5764
from mypy.indirection import TypeIndirectionVisitor
5865
from mypy.ipc import BadStatus, IPCClient, read_status, ready_to_read, receive, send
@@ -2046,7 +2053,7 @@ class State:
20462053
dep_hashes: dict[str, bytes] = {}
20472054

20482055
# List of errors reported for this file last time.
2049-
error_lines: list[str] = []
2056+
error_lines: list[SerializedError] = []
20502057

20512058
# Parent package, its parent, etc.
20522059
ancestors: list[str] | None = None
@@ -3511,9 +3518,13 @@ def find_stale_sccs(
35113518
scc = order_ascc_ex(graph, ascc)
35123519
for id in scc:
35133520
if graph[id].error_lines:
3514-
manager.flush_errors(
3515-
manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
3521+
path = manager.errors.simplify_path(graph[id].xpath)
3522+
formatted = manager.errors.format_messages(
3523+
path,
3524+
deserialize_codes(graph[id].error_lines),
3525+
formatter=manager.error_formatter,
35163526
)
3527+
manager.flush_errors(path, formatted, False)
35173528
fresh_sccs.append(ascc)
35183529
else:
35193530
size = len(ascc.mod_ids)
@@ -3759,21 +3770,24 @@ def process_stale_scc(
37593770
# Flush errors, and write cache in two phases: first data files, then meta files.
37603771
meta_tuples = {}
37613772
errors_by_id = {}
3773+
formatted_by_id = {}
37623774
for id in stale:
37633775
if graph[id].xpath not in manager.errors.ignored_files:
3764-
errors = manager.errors.file_messages(
3765-
graph[id].xpath, formatter=manager.error_formatter
3776+
errors = manager.errors.file_messages(graph[id].xpath)
3777+
formatted = manager.errors.format_messages(
3778+
graph[id].xpath, errors, formatter=manager.error_formatter
37663779
)
3767-
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
3780+
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), formatted, False)
37683781
errors_by_id[id] = errors
3782+
formatted_by_id[id] = formatted
37693783
meta_tuples[id] = graph[id].write_cache()
37703784
for id in stale:
37713785
meta_tuple = meta_tuples[id]
37723786
if meta_tuple is None:
37733787
continue
37743788
meta, meta_file = meta_tuple
37753789
meta.dep_hashes = [graph[dep].interface_hash for dep in graph[id].dependencies]
3776-
meta.error_lines = errors_by_id.get(id, [])
3790+
meta.error_lines = serialize_codes(errors_by_id.get(id, []))
37773791
write_cache_meta(meta, manager, meta_file)
37783792
manager.done_sccs.add(ascc.id)
37793793
manager.add_stats(
@@ -3785,7 +3799,7 @@ def process_stale_scc(
37853799
)
37863800
scc_result = {}
37873801
for id in scc:
3788-
scc_result[id] = graph[id].interface_hash.hex(), errors_by_id.get(id, [])
3802+
scc_result[id] = graph[id].interface_hash.hex(), formatted_by_id.get(id, [])
37893803
return scc_result
37903804

37913805

@@ -3932,3 +3946,26 @@ def sccs_to_bytes(sccs: list[SCC]) -> bytes:
39323946
buf = WriteBuffer()
39333947
write_json(buf, {"sccs": scc_tuples})
39343948
return buf.getvalue()
3949+
3950+
3951+
def serialize_codes(errs: list[ErrorTuple]) -> list[SerializedError]:
3952+
return [
3953+
(path, line, column, end_line, end_column, severity, message, code.code if code else None)
3954+
for path, line, column, end_line, end_column, severity, message, code in errs
3955+
]
3956+
3957+
3958+
def deserialize_codes(errs: list[SerializedError]) -> list[ErrorTuple]:
3959+
return [
3960+
(
3961+
path,
3962+
line,
3963+
column,
3964+
end_line,
3965+
end_column,
3966+
severity,
3967+
message,
3968+
codes.error_codes.get(code) if code else None,
3969+
)
3970+
for path, line, column, end_line, end_column, severity, message, code in errs
3971+
]

mypy/cache.py

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@
6969
from mypy_extensions import u8
7070

7171
# High-level cache layout format
72-
CACHE_VERSION: Final = 0
72+
CACHE_VERSION: Final = 1
73+
74+
SerializedError: _TypeAlias = tuple[str | None, int, int, int, int, str, str, str | None]
7375

7476

7577
class CacheMeta:
@@ -92,7 +94,7 @@ def __init__(
9294
dep_lines: list[int],
9395
dep_hashes: list[bytes],
9496
interface_hash: bytes,
95-
error_lines: list[str],
97+
error_lines: list[SerializedError],
9698
version_id: str,
9799
ignore_all: bool,
98100
plugin_data: Any,
@@ -157,7 +159,7 @@ def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None:
157159
dep_lines=meta["dep_lines"],
158160
dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]],
159161
interface_hash=bytes.fromhex(meta["interface_hash"]),
160-
error_lines=meta["error_lines"],
162+
error_lines=[tuple(err) for err in meta["error_lines"]],
161163
version_id=meta["version_id"],
162164
ignore_all=meta["ignore_all"],
163165
plugin_data=meta["plugin_data"],
@@ -179,7 +181,7 @@ def write(self, data: WriteBuffer) -> None:
179181
write_int_list(data, self.dep_lines)
180182
write_bytes_list(data, self.dep_hashes)
181183
write_bytes(data, self.interface_hash)
182-
write_str_list(data, self.error_lines)
184+
write_errors(data, self.error_lines)
183185
write_str(data, self.version_id)
184186
write_bool(data, self.ignore_all)
185187
# Plugin data may be not a dictionary, so we use
@@ -204,7 +206,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
204206
dep_lines=read_int_list(data),
205207
dep_hashes=read_bytes_list(data),
206208
interface_hash=read_bytes(data),
207-
error_lines=read_str_list(data),
209+
error_lines=read_errors(data),
208210
version_id=read_str(data),
209211
ignore_all=read_bool(data),
210212
plugin_data=read_json_value(data),
@@ -231,6 +233,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
231233
LIST_INT: Final[Tag] = 21
232234
LIST_STR: Final[Tag] = 22
233235
LIST_BYTES: Final[Tag] = 23
236+
TUPLE_GEN: Final[Tag] = 24
234237
DICT_STR_GEN: Final[Tag] = 30
235238

236239
# Misc classes.
@@ -391,12 +394,11 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None:
391394

392395

393396
Value: _TypeAlias = None | int | str | bool
394-
JsonValue: _TypeAlias = Value | list["JsonValue"] | dict[str, "JsonValue"]
395397

396-
# Currently tuples are used by mypyc plugin. They will be normalized to
397-
# JSON lists after a roundtrip.
398-
JsonValueEx: _TypeAlias = (
399-
Value | list["JsonValueEx"] | dict[str, "JsonValueEx"] | tuple["JsonValueEx", ...]
398+
# Our JSON format is somewhat non-standard as we distinguish lists and tuples.
399+
# This is convenient for some internal things, like mypyc plugin and error serialization.
400+
JsonValue: _TypeAlias = (
401+
Value | list["JsonValue"] | dict[str, "JsonValue"] | tuple["JsonValue", ...]
400402
)
401403

402404

@@ -415,13 +417,16 @@ def read_json_value(data: ReadBuffer) -> JsonValue:
415417
if tag == LIST_GEN:
416418
size = read_int_bare(data)
417419
return [read_json_value(data) for _ in range(size)]
420+
if tag == TUPLE_GEN:
421+
size = read_int_bare(data)
422+
return tuple(read_json_value(data) for _ in range(size))
418423
if tag == DICT_STR_GEN:
419424
size = read_int_bare(data)
420425
return {read_str_bare(data): read_json_value(data) for _ in range(size)}
421426
assert False, f"Invalid JSON tag: {tag}"
422427

423428

424-
def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None:
429+
def write_json_value(data: WriteBuffer, value: JsonValue) -> None:
425430
if value is None:
426431
write_tag(data, LITERAL_NONE)
427432
elif isinstance(value, bool):
@@ -432,11 +437,16 @@ def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None:
432437
elif isinstance(value, str):
433438
write_tag(data, LITERAL_STR)
434439
write_str_bare(data, value)
435-
elif isinstance(value, (list, tuple)):
440+
elif isinstance(value, list):
436441
write_tag(data, LIST_GEN)
437442
write_int_bare(data, len(value))
438443
for val in value:
439444
write_json_value(data, val)
445+
elif isinstance(value, tuple):
446+
write_tag(data, TUPLE_GEN)
447+
write_int_bare(data, len(value))
448+
for val in value:
449+
write_json_value(data, val)
440450
elif isinstance(value, dict):
441451
write_tag(data, DICT_STR_GEN)
442452
write_int_bare(data, len(value))
@@ -461,3 +471,38 @@ def write_json(data: WriteBuffer, value: dict[str, Any]) -> None:
461471
for key in sorted(value):
462472
write_str_bare(data, key)
463473
write_json_value(data, value[key])
474+
475+
476+
def write_errors(data: WriteBuffer, errs: list[SerializedError]) -> None:
477+
write_tag(data, LIST_GEN)
478+
write_int_bare(data, len(errs))
479+
for path, line, column, end_line, end_column, severity, message, code in errs:
480+
write_tag(data, TUPLE_GEN)
481+
write_str_opt(data, path)
482+
write_int(data, line)
483+
write_int(data, column)
484+
write_int(data, end_line)
485+
write_int(data, end_column)
486+
write_str(data, severity)
487+
write_str(data, message)
488+
write_str_opt(data, code)
489+
490+
491+
def read_errors(data: ReadBuffer) -> list[SerializedError]:
492+
assert read_tag(data) == LIST_GEN
493+
result = []
494+
for _ in range(read_int_bare(data)):
495+
assert read_tag(data) == TUPLE_GEN
496+
result.append(
497+
(
498+
read_str_opt(data),
499+
read_int(data),
500+
read_int(data),
501+
read_int(data),
502+
read_int(data),
503+
read_str(data),
504+
read_str(data),
505+
read_str_opt(data),
506+
)
507+
)
508+
return result

mypy/errors.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn:
951951
self.new_messages(), use_stdout=use_stdout, module_with_blocker=self.blocker_module()
952952
)
953953

954-
def format_messages(
954+
def format_messages_default(
955955
self, error_tuples: list[ErrorTuple], source_lines: list[str] | None
956956
) -> list[str]:
957957
"""Return a string list that represents the error messages.
@@ -1009,24 +1009,28 @@ def format_messages(
10091009
a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker)
10101010
return a
10111011

1012-
def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> list[str]:
1013-
"""Return a string list of new error messages from a given file.
1014-
1015-
Use a form suitable for displaying to the user.
1016-
"""
1012+
def file_messages(self, path: str) -> list[ErrorTuple]:
1013+
"""Return an error tuple list of new error messages from a given file."""
10171014
if path not in self.error_info_map:
10181015
return []
10191016

10201017
error_info = self.error_info_map[path]
10211018
error_info = [info for info in error_info if not info.hidden]
10221019
error_info = self.remove_duplicates(self.sort_messages(error_info))
1023-
error_tuples = self.render_messages(error_info)
1020+
return self.render_messages(error_info)
10241021

1022+
def format_messages(
1023+
self, path: str, error_tuples: list[ErrorTuple], formatter: ErrorFormatter | None = None
1024+
) -> list[str]:
1025+
"""Return a string list of new error messages from a given file.
1026+
1027+
Use a form suitable for displaying to the user.
1028+
"""
1029+
self.flushed_files.add(path)
10251030
if formatter is not None:
10261031
errors = create_errors(error_tuples)
10271032
return [formatter.report_error(err) for err in errors]
10281033

1029-
self.flushed_files.add(path)
10301034
source_lines = None
10311035
if self.options.pretty and self.read_source:
10321036
# Find shadow file mapping and read source lines if a shadow file exists for the given path.
@@ -1036,7 +1040,7 @@ def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> l
10361040
source_lines = self.read_source(mapped_path)
10371041
else:
10381042
source_lines = self.read_source(path)
1039-
return self.format_messages(error_tuples, source_lines)
1043+
return self.format_messages_default(error_tuples, source_lines)
10401044

10411045
def find_shadow_file_mapping(self, path: str) -> str | None:
10421046
"""Return the shadow file path for a given source file path or None."""
@@ -1058,7 +1062,8 @@ def new_messages(self) -> list[str]:
10581062
msgs = []
10591063
for path in self.error_info_map.keys():
10601064
if path not in self.flushed_files:
1061-
msgs.extend(self.file_messages(path))
1065+
error_tuples = self.file_messages(path)
1066+
msgs.extend(self.format_messages(path, error_tuples))
10621067
return msgs
10631068

10641069
def targets(self) -> set[str]:

test-data/unit/check-incremental.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7626,3 +7626,13 @@ y = 1
76267626
class C: ...
76277627
[out2]
76287628
tmp/m.py:2: note: Revealed type is "def () -> other.C"
7629+
7630+
[case testOutputFormatterIncremental]
7631+
# flags2: --output json
7632+
def wrong() -> int:
7633+
if wrong():
7634+
return 0
7635+
[out]
7636+
main:2: error: Missing return statement
7637+
[out2]
7638+
{"file": "main", "line": 2, "column": 0, "message": "Missing return statement", "hint": null, "code": "return", "severity": "error"}

0 commit comments

Comments
 (0)