Skip to content

Commit 2b23b50

Browse files
ilevkivskyihauntsaninja
authored andcommitted
Serialize raw errors in cache metas (#20372)
Fixes #20353 This makes us respect e.g. `--output json` for cached files without re-checking the files (which is the desired behavior for users, see issue). This is also a first step towards resolving the "foo defined here" conundrum for parallel checking. The fix is straightforward. The only question was whether to continue using `ErrorTuple`s or switch to a proper class. I decided to keep the tuples for now to minimize the scope of change. Note I am also adjusting generic "JSON" fixed-format helpers to natively support tuples (unlike real JSON). We already use tuples in few other places, so it makes sense to just make it "official" (this format is still internal to mypy obviously).
1 parent f60f90f commit 2b23b50

File tree

4 files changed

+146
-30
lines changed

4 files changed

+146
-30
lines changed

mypy/build.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,17 @@
3131
from librt.internal import cache_version
3232

3333
import mypy.semanal_main
34-
from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer
34+
from mypy.cache import (
35+
CACHE_VERSION,
36+
CacheMeta,
37+
ReadBuffer,
38+
SerializedError,
39+
WriteBuffer,
40+
write_json,
41+
)
3542
from mypy.checker import TypeChecker
3643
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
37-
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
44+
from mypy.errors import CompileError, ErrorInfo, Errors, ErrorTuple, report_internal_error
3845
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
3946
from mypy.indirection import TypeIndirectionVisitor
4047
from mypy.messages import MessageBuilder
@@ -1869,7 +1876,7 @@ class State:
18691876
dep_hashes: dict[str, bytes] = {}
18701877

18711878
# List of errors reported for this file last time.
1872-
error_lines: list[str] = []
1879+
error_lines: list[SerializedError] = []
18731880

18741881
# Parent package, its parent, etc.
18751882
ancestors: list[str] | None = None
@@ -3286,9 +3293,13 @@ def find_stale_sccs(
32863293
scc = order_ascc_ex(graph, ascc)
32873294
for id in scc:
32883295
if graph[id].error_lines:
3289-
manager.flush_errors(
3290-
manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
3296+
path = manager.errors.simplify_path(graph[id].xpath)
3297+
formatted = manager.errors.format_messages(
3298+
path,
3299+
deserialize_codes(graph[id].error_lines),
3300+
formatter=manager.error_formatter,
32913301
)
3302+
manager.flush_errors(path, formatted, False)
32923303
fresh_sccs.append(ascc)
32933304
else:
32943305
size = len(ascc.mod_ids)
@@ -3492,13 +3503,16 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
34923503
# Flush errors, and write cache in two phases: first data files, then meta files.
34933504
meta_tuples = {}
34943505
errors_by_id = {}
3506+
formatted_by_id = {}
34953507
for id in stale:
34963508
if graph[id].xpath not in manager.errors.ignored_files:
3497-
errors = manager.errors.file_messages(
3498-
graph[id].xpath, formatter=manager.error_formatter
3509+
errors = manager.errors.file_messages(graph[id].xpath)
3510+
formatted = manager.errors.format_messages(
3511+
graph[id].xpath, errors, formatter=manager.error_formatter
34993512
)
3500-
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
3513+
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), formatted, False)
35013514
errors_by_id[id] = errors
3515+
formatted_by_id[id] = formatted
35023516
meta_tuples[id] = graph[id].write_cache()
35033517
graph[id].mark_as_rechecked()
35043518
for id in stale:
@@ -3507,7 +3521,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
35073521
continue
35083522
meta, meta_file = meta_tuple
35093523
meta.dep_hashes = [graph[dep].interface_hash for dep in graph[id].dependencies]
3510-
meta.error_lines = errors_by_id.get(id, [])
3524+
meta.error_lines = serialize_codes(errors_by_id.get(id, []))
35113525
write_cache_meta(meta, manager, meta_file)
35123526
manager.done_sccs.add(ascc.id)
35133527

@@ -3640,3 +3654,40 @@ def write_undocumented_ref_info(
36403654

36413655
deps_json = get_undocumented_ref_info_json(state.tree, type_map)
36423656
metastore.write(ref_info_file, json_dumps(deps_json))
3657+
3658+
3659+
def sources_to_bytes(sources: list[BuildSource]) -> bytes:
3660+
source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources]
3661+
buf = WriteBuffer()
3662+
write_json(buf, {"sources": source_tuples})
3663+
return buf.getvalue()
3664+
3665+
3666+
def sccs_to_bytes(sccs: list[SCC]) -> bytes:
3667+
scc_tuples = [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs]
3668+
buf = WriteBuffer()
3669+
write_json(buf, {"sccs": scc_tuples})
3670+
return buf.getvalue()
3671+
3672+
3673+
def serialize_codes(errs: list[ErrorTuple]) -> list[SerializedError]:
3674+
return [
3675+
(path, line, column, end_line, end_column, severity, message, code.code if code else None)
3676+
for path, line, column, end_line, end_column, severity, message, code in errs
3677+
]
3678+
3679+
3680+
def deserialize_codes(errs: list[SerializedError]) -> list[ErrorTuple]:
3681+
return [
3682+
(
3683+
path,
3684+
line,
3685+
column,
3686+
end_line,
3687+
end_column,
3688+
severity,
3689+
message,
3690+
codes.error_codes.get(code) if code else None,
3691+
)
3692+
for path, line, column, end_line, end_column, severity, message, code in errs
3693+
]

mypy/cache.py

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
from __future__ import annotations
4949

5050
from collections.abc import Sequence
51-
from typing import Any, Final, Union
51+
from typing import Any, Final, Optional, Union
5252
from typing_extensions import TypeAlias as _TypeAlias
5353

5454
from librt.internal import (
@@ -70,7 +70,9 @@
7070
from mypy_extensions import u8
7171

7272
# High-level cache layout format
73-
CACHE_VERSION: Final = 0
73+
CACHE_VERSION: Final = 1
74+
75+
SerializedError: _TypeAlias = tuple[Optional[str], int, int, int, int, str, str, Optional[str]]
7476

7577

7678
class CacheMeta:
@@ -93,7 +95,7 @@ def __init__(
9395
dep_lines: list[int],
9496
dep_hashes: list[bytes],
9597
interface_hash: bytes,
96-
error_lines: list[str],
98+
error_lines: list[SerializedError],
9799
version_id: str,
98100
ignore_all: bool,
99101
plugin_data: Any,
@@ -158,7 +160,7 @@ def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None:
158160
dep_lines=meta["dep_lines"],
159161
dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]],
160162
interface_hash=bytes.fromhex(meta["interface_hash"]),
161-
error_lines=meta["error_lines"],
163+
error_lines=[tuple(err) for err in meta["error_lines"]],
162164
version_id=meta["version_id"],
163165
ignore_all=meta["ignore_all"],
164166
plugin_data=meta["plugin_data"],
@@ -180,7 +182,7 @@ def write(self, data: WriteBuffer) -> None:
180182
write_int_list(data, self.dep_lines)
181183
write_bytes_list(data, self.dep_hashes)
182184
write_bytes(data, self.interface_hash)
183-
write_str_list(data, self.error_lines)
185+
write_errors(data, self.error_lines)
184186
write_str(data, self.version_id)
185187
write_bool(data, self.ignore_all)
186188
# Plugin data may be not a dictionary, so we use
@@ -205,7 +207,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
205207
dep_lines=read_int_list(data),
206208
dep_hashes=read_bytes_list(data),
207209
interface_hash=read_bytes(data),
208-
error_lines=read_str_list(data),
210+
error_lines=read_errors(data),
209211
version_id=read_str(data),
210212
ignore_all=read_bool(data),
211213
plugin_data=read_json_value(data),
@@ -232,6 +234,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
232234
LIST_INT: Final[Tag] = 21
233235
LIST_STR: Final[Tag] = 22
234236
LIST_BYTES: Final[Tag] = 23
237+
TUPLE_GEN: Final[Tag] = 24
235238
DICT_STR_GEN: Final[Tag] = 30
236239

237240
# Misc classes.
@@ -391,7 +394,13 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None:
391394
write_str_opt(data, item)
392395

393396

394-
JsonValue: _TypeAlias = Union[None, int, str, bool, list["JsonValue"], dict[str, "JsonValue"]]
397+
Value: _TypeAlias = Union[None, int, str, bool]
398+
399+
# Our JSON format is somewhat non-standard as we distinguish lists and tuples.
400+
# This is convenient for some internal things, like mypyc plugin and error serialization.
401+
JsonValue: _TypeAlias = Union[
402+
Value, list["JsonValue"], dict[str, "JsonValue"], tuple["JsonValue", ...]
403+
]
395404

396405

397406
def read_json_value(data: ReadBuffer) -> JsonValue:
@@ -409,15 +418,16 @@ def read_json_value(data: ReadBuffer) -> JsonValue:
409418
if tag == LIST_GEN:
410419
size = read_int_bare(data)
411420
return [read_json_value(data) for _ in range(size)]
421+
if tag == TUPLE_GEN:
422+
size = read_int_bare(data)
423+
return tuple(read_json_value(data) for _ in range(size))
412424
if tag == DICT_STR_GEN:
413425
size = read_int_bare(data)
414426
return {read_str_bare(data): read_json_value(data) for _ in range(size)}
415427
assert False, f"Invalid JSON tag: {tag}"
416428

417429

418-
# Currently tuples are used by mypyc plugin. They will be normalized to
419-
# JSON lists after a roundtrip.
420-
def write_json_value(data: WriteBuffer, value: JsonValue | tuple[JsonValue, ...]) -> None:
430+
def write_json_value(data: WriteBuffer, value: JsonValue) -> None:
421431
if value is None:
422432
write_tag(data, LITERAL_NONE)
423433
elif isinstance(value, bool):
@@ -428,11 +438,16 @@ def write_json_value(data: WriteBuffer, value: JsonValue | tuple[JsonValue, ...]
428438
elif isinstance(value, str):
429439
write_tag(data, LITERAL_STR)
430440
write_str_bare(data, value)
431-
elif isinstance(value, (list, tuple)):
441+
elif isinstance(value, list):
432442
write_tag(data, LIST_GEN)
433443
write_int_bare(data, len(value))
434444
for val in value:
435445
write_json_value(data, val)
446+
elif isinstance(value, tuple):
447+
write_tag(data, TUPLE_GEN)
448+
write_int_bare(data, len(value))
449+
for val in value:
450+
write_json_value(data, val)
436451
elif isinstance(value, dict):
437452
write_tag(data, DICT_STR_GEN)
438453
write_int_bare(data, len(value))
@@ -457,3 +472,38 @@ def write_json(data: WriteBuffer, value: dict[str, Any]) -> None:
457472
for key in sorted(value):
458473
write_str_bare(data, key)
459474
write_json_value(data, value[key])
475+
476+
477+
def write_errors(data: WriteBuffer, errs: list[SerializedError]) -> None:
478+
write_tag(data, LIST_GEN)
479+
write_int_bare(data, len(errs))
480+
for path, line, column, end_line, end_column, severity, message, code in errs:
481+
write_tag(data, TUPLE_GEN)
482+
write_str_opt(data, path)
483+
write_int(data, line)
484+
write_int(data, column)
485+
write_int(data, end_line)
486+
write_int(data, end_column)
487+
write_str(data, severity)
488+
write_str(data, message)
489+
write_str_opt(data, code)
490+
491+
492+
def read_errors(data: ReadBuffer) -> list[SerializedError]:
493+
assert read_tag(data) == LIST_GEN
494+
result = []
495+
for _ in range(read_int_bare(data)):
496+
assert read_tag(data) == TUPLE_GEN
497+
result.append(
498+
(
499+
read_str_opt(data),
500+
read_int(data),
501+
read_int(data),
502+
read_int(data),
503+
read_int(data),
504+
read_str(data),
505+
read_str(data),
506+
read_str_opt(data),
507+
)
508+
)
509+
return result

mypy/errors.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn:
951951
self.new_messages(), use_stdout=use_stdout, module_with_blocker=self.blocker_module()
952952
)
953953

954-
def format_messages(
954+
def format_messages_default(
955955
self, error_tuples: list[ErrorTuple], source_lines: list[str] | None
956956
) -> list[str]:
957957
"""Return a string list that represents the error messages.
@@ -1009,24 +1009,28 @@ def format_messages(
10091009
a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker)
10101010
return a
10111011

1012-
def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> list[str]:
1013-
"""Return a string list of new error messages from a given file.
1014-
1015-
Use a form suitable for displaying to the user.
1016-
"""
1012+
def file_messages(self, path: str) -> list[ErrorTuple]:
1013+
"""Return an error tuple list of new error messages from a given file."""
10171014
if path not in self.error_info_map:
10181015
return []
10191016

10201017
error_info = self.error_info_map[path]
10211018
error_info = [info for info in error_info if not info.hidden]
10221019
error_info = self.remove_duplicates(self.sort_messages(error_info))
1023-
error_tuples = self.render_messages(error_info)
1020+
return self.render_messages(error_info)
10241021

1022+
def format_messages(
1023+
self, path: str, error_tuples: list[ErrorTuple], formatter: ErrorFormatter | None = None
1024+
) -> list[str]:
1025+
"""Return a string list of new error messages from a given file.
1026+
1027+
Use a form suitable for displaying to the user.
1028+
"""
1029+
self.flushed_files.add(path)
10251030
if formatter is not None:
10261031
errors = create_errors(error_tuples)
10271032
return [formatter.report_error(err) for err in errors]
10281033

1029-
self.flushed_files.add(path)
10301034
source_lines = None
10311035
if self.options.pretty and self.read_source:
10321036
# Find shadow file mapping and read source lines if a shadow file exists for the given path.
@@ -1036,7 +1040,7 @@ def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> l
10361040
source_lines = self.read_source(mapped_path)
10371041
else:
10381042
source_lines = self.read_source(path)
1039-
return self.format_messages(error_tuples, source_lines)
1043+
return self.format_messages_default(error_tuples, source_lines)
10401044

10411045
def find_shadow_file_mapping(self, path: str) -> str | None:
10421046
"""Return the shadow file path for a given source file path or None."""
@@ -1058,7 +1062,8 @@ def new_messages(self) -> list[str]:
10581062
msgs = []
10591063
for path in self.error_info_map.keys():
10601064
if path not in self.flushed_files:
1061-
msgs.extend(self.file_messages(path))
1065+
error_tuples = self.file_messages(path)
1066+
msgs.extend(self.format_messages(path, error_tuples))
10621067
return msgs
10631068

10641069
def targets(self) -> set[str]:

test-data/unit/check-incremental.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7626,3 +7626,13 @@ y = 1
76267626
class C: ...
76277627
[out2]
76287628
tmp/m.py:2: note: Revealed type is "def () -> other.C"
7629+
7630+
[case testOutputFormatterIncremental]
7631+
# flags2: --output json
7632+
def wrong() -> int:
7633+
if wrong():
7634+
return 0
7635+
[out]
7636+
main:2: error: Missing return statement
7637+
[out2]
7638+
{"file": "main", "line": 2, "column": 0, "message": "Missing return statement", "hint": null, "code": "return", "severity": "error"}

0 commit comments

Comments
 (0)