From e62db824707c9ddec24936aec6f9ebc203f2db01 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:32:33 -0500 Subject: [PATCH 1/6] Add sample ETL output with row spans and column spans in the table --- .../4725/112731/112257/etl_output_rs_cs.json | 1 + .../4725/112731/112257/page_0_text.txt | 7 + .../4725/112731/112257/page_0_tokens.json | 1 + .../4725/112731/112257/tables_0.json | 1 + tests/etloutput/test_rowspan_colspan.py | 126 ++++++++++++++++++ 5 files changed, 136 insertions(+) create mode 100644 tests/data/etloutput/4725/112731/112257/etl_output_rs_cs.json create mode 100644 tests/data/etloutput/4725/112731/112257/page_0_text.txt create mode 100644 tests/data/etloutput/4725/112731/112257/page_0_tokens.json create mode 100644 tests/data/etloutput/4725/112731/112257/tables_0.json create mode 100644 tests/etloutput/test_rowspan_colspan.py diff --git a/tests/data/etloutput/4725/112731/112257/etl_output_rs_cs.json b/tests/data/etloutput/4725/112731/112257/etl_output_rs_cs.json new file mode 100644 index 0000000..7f58aef --- /dev/null +++ b/tests/data/etloutput/4725/112731/112257/etl_output_rs_cs.json @@ -0,0 +1 @@ +{"email_metadata":{},"full_text":"indico-file:///storage/submission/4725/112731/112257/full_text.txt","num_pages":1,"pages":[{"blocks":"indico-file:///storage/submission/4725/112731/112257/page_0_blocks.json","characters":"indico-file:///storage/submission/4725/112731/112257/page_0_chars.json","doc_offset":{"end":151,"start":0},"dpi":{"dpix":300,"dpiy":300},"filename":"Rowspan Colspan Sample.png","image":"indico-file:///storage/submission/4725/112731/112257/original_page_0.png","page_info":"indico-file:///storage/submission/4725/112731/112257/page_info_0.json","page_num":0,"size":{"height":3883,"width":2556},"tables":"indico-file:///storage/submission/4725/112731/112257/tables_0.json","text":"indico-file:///storage/submission/4725/112731/112257/page_0_text.txt","thumbnail":"indico-file:///storage/submission/4725/112731/112257/original_thumbnail_0.png","tokens":"indico-file:///storage/submission/4725/112731/112257/page_0_tokens.json"}]} diff --git a/tests/data/etloutput/4725/112731/112257/page_0_text.txt b/tests/data/etloutput/4725/112731/112257/page_0_text.txt new file mode 100644 index 0000000..649da50 --- /dev/null +++ b/tests/data/etloutput/4725/112731/112257/page_0_text.txt @@ -0,0 +1,7 @@ +Rowspan / Colspan Sample +Alfa Bravo Charlie Delta +Echo Foxtrot Golf + Hotel India Juliett +Kilo Mike +November Lima Oscar +formatted by Markdeep 1.18_d \ No newline at end of file diff --git a/tests/data/etloutput/4725/112731/112257/page_0_tokens.json b/tests/data/etloutput/4725/112731/112257/page_0_tokens.json new file mode 100644 index 0000000..f9b5f00 --- /dev/null +++ b/tests/data/etloutput/4725/112731/112257/page_0_tokens.json @@ -0,0 +1 @@ +[{"block_offset":{"end":7,"start":0},"doc_offset":{"end":7,"start":0},"page_num":0,"page_offset":{"end":7,"start":0},"position":{"bbBot":287,"bbLeft":561,"bbRight":1015,"bbTop":192,"bottom":287,"left":561,"right":1015,"top":192},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Rowspan"},{"block_offset":{"end":9,"start":8},"doc_offset":{"end":9,"start":8},"page_num":0,"page_offset":{"end":9,"start":8},"position":{"bbBot":290,"bbLeft":1070,"bbRight":1123,"bbTop":187,"bottom":290,"left":1070,"right":1123,"top":187},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"/"},{"block_offset":{"end":17,"start":10},"doc_offset":{"end":17,"start":10},"page_num":0,"page_offset":{"end":17,"start":10},"position":{"bbBot":291,"bbLeft":1158,"bbRight":1547,"bbTop":185,"bottom":291,"left":1158,"right":1547,"top":185},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Colspan"},{"block_offset":{"end":24,"start":18},"doc_offset":{"end":24,"start":18},"page_num":0,"page_offset":{"end":24,"start":18},"position":{"bbBot":288,"bbLeft":1601,"bbRight":1978,"bbTop":184,"bottom":288,"left":1601,"right":1978,"top":184},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Sample"},{"block_offset":{"end":4,"start":0},"doc_offset":{"end":29,"start":25},"page_num":0,"page_offset":{"end":29,"start":25},"position":{"bbBot":520,"bbLeft":772,"bbRight":858,"bbTop":477,"bottom":520,"left":772,"right":858,"top":477},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Alfa"},{"block_offset":{"end":10,"start":5},"doc_offset":{"end":35,"start":30},"page_num":0,"page_offset":{"end":35,"start":30},"position":{"bbBot":520,"bbLeft":1109,"bbRight":1242,"bbTop":476,"bottom":520,"left":1109,"right":1242,"top":476},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Bravo"},{"block_offset":{"end":18,"start":11},"doc_offset":{"end":43,"start":36},"page_num":0,"page_offset":{"end":43,"start":36},"position":{"bbBot":521,"bbLeft":1358,"bbRight":1523,"bbTop":475,"bottom":521,"left":1358,"right":1523,"top":475},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Charlie"},{"block_offset":{"end":24,"start":19},"doc_offset":{"end":49,"start":44},"page_num":0,"page_offset":{"end":49,"start":44},"position":{"bbBot":518,"bbLeft":1635,"bbRight":1757,"bbTop":473,"bottom":518,"left":1635,"right":1757,"top":473},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Delta"},{"block_offset":{"end":29,"start":25},"doc_offset":{"end":54,"start":50},"page_num":0,"page_offset":{"end":54,"start":50},"position":{"bbBot":697,"bbLeft":767,"bbRight":875,"bbTop":652,"bottom":697,"left":767,"right":875,"top":652},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Echo"},{"block_offset":{"end":37,"start":30},"doc_offset":{"end":62,"start":55},"page_num":0,"page_offset":{"end":62,"start":55},"position":{"bbBot":648,"bbLeft":1106,"bbRight":1269,"bbTop":592,"bottom":648,"left":1106,"right":1269,"top":592},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Foxtrot"},{"block_offset":{"end":43,"start":39},"doc_offset":{"end":68,"start":64},"page_num":0,"page_offset":{"end":68,"start":64},"position":{"bbBot":640,"bbLeft":1636,"bbRight":1727,"bbTop":597,"bottom":640,"left":1636,"right":1727,"top":597},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Golf"},{"block_offset":{"end":50,"start":45},"doc_offset":{"end":75,"start":70},"page_num":0,"page_offset":{"end":75,"start":70},"position":{"bbBot":754,"bbLeft":1106,"bbRight":1231,"bbTop":698,"bottom":754,"left":1106,"right":1231,"top":698},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Hotel"},{"block_offset":{"end":56,"start":51},"doc_offset":{"end":81,"start":76},"page_num":0,"page_offset":{"end":81,"start":76},"position":{"bbBot":748,"bbLeft":1355,"bbRight":1465,"bbTop":706,"bottom":748,"left":1355,"right":1465,"top":706},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"India"},{"block_offset":{"end":64,"start":57},"doc_offset":{"end":89,"start":82},"page_num":0,"page_offset":{"end":89,"start":82},"position":{"bbBot":749,"bbLeft":1636,"bbRight":1771,"bbTop":703,"bottom":749,"left":1636,"right":1771,"top":703},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Juliett"},{"block_offset":{"end":69,"start":65},"doc_offset":{"end":94,"start":90},"page_num":0,"page_offset":{"end":94,"start":90},"position":{"bbBot":867,"bbLeft":765,"bbRight":852,"bbTop":811,"bottom":867,"left":765,"right":852,"top":811},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Kilo"},{"block_offset":{"end":76,"start":72},"doc_offset":{"end":101,"start":97},"page_num":0,"page_offset":{"end":101,"start":97},"position":{"bbBot":857,"bbLeft":1633,"bbRight":1738,"bbTop":813,"bottom":857,"left":1633,"right":1738,"top":813},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Mike"},{"block_offset":{"end":85,"start":77},"doc_offset":{"end":110,"start":102},"page_num":0,"page_offset":{"end":110,"start":102},"position":{"bbBot":968,"bbLeft":767,"bbRight":1001,"bbTop":923,"bottom":968,"left":767,"right":1001,"top":923},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"November"},{"block_offset":{"end":90,"start":86},"doc_offset":{"end":115,"start":111},"page_num":0,"page_offset":{"end":115,"start":111},"position":{"bbBot":914,"bbLeft":1108,"bbRight":1216,"bbTop":867,"bottom":914,"left":1108,"right":1216,"top":867},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Lima"},{"block_offset":{"end":97,"start":92},"doc_offset":{"end":122,"start":117},"page_num":0,"page_offset":{"end":122,"start":117},"position":{"bbBot":963,"bbLeft":1637,"bbRight":1772,"bbTop":922,"bottom":963,"left":1637,"right":1772,"top":922},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Oscar"},{"block_offset":{"end":9,"start":0},"doc_offset":{"end":132,"start":123},"page_num":0,"page_offset":{"end":132,"start":123},"position":{"bbBot":1393,"bbLeft":2007,"bbRight":2145,"bbTop":1352,"bottom":1393,"left":2007,"right":2145,"top":1352},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"formatted"},{"block_offset":{"end":12,"start":10},"doc_offset":{"end":135,"start":133},"page_num":0,"page_offset":{"end":135,"start":133},"position":{"bbBot":1390,"bbLeft":2154,"bbRight":2187,"bbTop":1352,"bottom":1390,"left":2154,"right":2187,"top":1352},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"by"},{"block_offset":{"end":21,"start":13},"doc_offset":{"end":144,"start":136},"page_num":0,"page_offset":{"end":144,"start":136},"position":{"bbBot":1390,"bbLeft":2196,"bbRight":2342,"bbTop":1351,"bottom":1390,"left":2196,"right":2342,"top":1351},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"Markdeep"},{"block_offset":{"end":28,"start":22},"doc_offset":{"end":151,"start":145},"page_num":0,"page_offset":{"end":151,"start":145},"position":{"bbBot":1390,"bbLeft":2354,"bbRight":2448,"bbTop":1351,"bottom":1390,"left":2354,"right":2448,"top":1351},"style":{"background_color":null,"bold":null,"font_face":null,"font_size":null,"handwriting":false,"italic":null,"text_color":null,"underlined":null},"text":"1.18_d"}] diff --git a/tests/data/etloutput/4725/112731/112257/tables_0.json b/tests/data/etloutput/4725/112731/112257/tables_0.json new file mode 100644 index 0000000..8800fc7 --- /dev/null +++ b/tests/data/etloutput/4725/112731/112257/tables_0.json @@ -0,0 +1 @@ +[{"cells":[{"cell_type":"header","columns":[0],"doc_offsets":[{"end":29,"start":25}],"page_offsets":[{"end":29,"start":25}],"position":{"bottom":562,"left":713,"right":1052,"top":435},"rows":[0],"text":"Alfa"},{"cell_type":"header","columns":[1],"doc_offsets":[{"end":35,"start":30}],"page_offsets":[{"end":35,"start":30}],"position":{"bottom":561,"left":1051,"right":1301,"top":434},"rows":[0],"text":"Bravo"},{"cell_type":"header","columns":[2],"doc_offsets":[{"end":43,"start":36}],"page_offsets":[{"end":43,"start":36}],"position":{"bottom":560,"left":1300,"right":1578,"top":433},"rows":[0],"text":"Charlie"},{"cell_type":"header","columns":[3],"doc_offsets":[{"end":49,"start":44}],"page_offsets":[{"end":49,"start":44}],"position":{"bottom":561,"left":1580,"right":1821,"top":430},"rows":[0],"text":"Delta"},{"cell_type":"content","columns":[0],"doc_offsets":[{"end":54,"start":50}],"page_offsets":[{"end":54,"start":50}],"position":{"bottom":778,"left":712,"right":1052,"top":561},"rows":[1,2],"text":"Echo"},{"cell_type":"content","columns":[1,2],"doc_offsets":[{"end":62,"start":55}],"page_offsets":[{"end":62,"start":55}],"position":{"bottom":670,"left":1052,"right":1580,"top":561},"rows":[1],"text":"Foxtrot"},{"cell_type":"content","columns":[3],"doc_offsets":[{"end":68,"start":64}],"page_offsets":[{"end":68,"start":64}],"position":{"bottom":669,"left":1580,"right":1821,"top":560},"rows":[1],"text":"Golf"},{"cell_type":"content","columns":[1],"doc_offsets":[{"end":75,"start":70}],"page_offsets":[{"end":75,"start":70}],"position":{"bottom":778,"left":1052,"right":1301,"top":670},"rows":[2],"text":"Hotel"},{"cell_type":"content","columns":[2],"doc_offsets":[{"end":81,"start":76}],"page_offsets":[{"end":81,"start":76}],"position":{"bottom":777,"left":1301,"right":1581,"top":669},"rows":[2],"text":"India"},{"cell_type":"content","columns":[3],"doc_offsets":[{"end":89,"start":82}],"page_offsets":[{"end":89,"start":82}],"position":{"bottom":776,"left":1580,"right":1822,"top":668},"rows":[2],"text":"Juliett"},{"cell_type":"content","columns":[0],"doc_offsets":[{"end":94,"start":90}],"page_offsets":[{"end":94,"start":90}],"position":{"bottom":891,"left":712,"right":1053,"top":778},"rows":[3],"text":"Kilo"},{"cell_type":"content","columns":[1,2],"doc_offsets":[],"page_offsets":[],"position":{"bottom":997,"left":1052,"right":1582,"top":776},"rows":[3,4],"text":"Lima"},{"cell_type":"content","columns":[3],"doc_offsets":[{"end":101,"start":97}],"page_offsets":[{"end":101,"start":97}],"position":{"bottom":889,"left":1581,"right":1822,"top":775},"rows":[3],"text":"Mike"},{"cell_type":"content","columns":[0],"doc_offsets":[{"end":110,"start":102}],"page_offsets":[{"end":110,"start":102}],"position":{"bottom":998,"left":713,"right":1053,"top":889},"rows":[4],"text":"November"},{"cell_type":"content","columns":[3],"doc_offsets":[{"end":122,"start":117}],"page_offsets":[{"end":122,"start":117}],"position":{"bottom":995,"left":1581,"right":1824,"top":888},"rows":[4],"text":"Oscar"}],"doc_offsets":[{"end":94,"start":25},{"end":122,"start":97}],"num_columns":4,"num_rows":5,"page_num":0,"page_offsets":[{"end":94,"start":25},{"end":122,"start":97}],"position":{"bottom":998,"left":711,"right":1824,"top":430},"table_id":0,"table_offset":{"column":0,"row":0}}] diff --git a/tests/etloutput/test_rowspan_colspan.py b/tests/etloutput/test_rowspan_colspan.py new file mode 100644 index 0000000..b21409c --- /dev/null +++ b/tests/etloutput/test_rowspan_colspan.py @@ -0,0 +1,126 @@ +from pathlib import Path + +import pytest + +from indico_toolkit import etloutput +from indico_toolkit.etloutput import EtlOutput, Table +from indico_toolkit.results import Span + +data_folder = Path(__file__).parent.parent / "data" / "etloutput" +etl_output_file = data_folder / "4725" / "112731" / "112257" / "etl_output_rs_cs.json" + + +def read_uri(uri: str | Path) -> bytes: + uri = str(uri) + storage_folder_path = uri.split("/storage/submission/")[-1] + file_path = data_folder / storage_folder_path + return file_path.read_bytes() + + +@pytest.fixture(scope="module") +def etl_output() -> EtlOutput: + return etloutput.load(etl_output_file, reader=read_uri) + + +@pytest.fixture(scope="module") +def table(etl_output: EtlOutput) -> Table: + """ + Return the table from the rowspan / colspan sample: + + | Alfa | Bravo | Charlie | Delta | + |----------|-------------------|---------| + | | Foxtrot | Golf | + | Echo |-------------------|---------| + | | Hotel | India | Juliett | + |----------|-------------------|---------| + | Kilo | | Mike | + |----------| Lima |---------| + | November | | Oscar | + ---------------------------------------- + """ + return etl_output.tables[0] + + +def test_cells(table: Table) -> None: + parsed_cells = [cell.text for cell in table.cells] + expected_cells = [ + "Alfa", "Bravo", "Charlie", "Delta", + "Echo", "Foxtrot", "Golf", + "Hotel", "India", "Juliett", + "Kilo", "Lima", "Mike", + "November", "Oscar", + ] # fmt: skip + assert parsed_cells == expected_cells + + +def test_rows(table: Table) -> None: + parsed_rows = [[cell.text for cell in row] for row in table.rows] + expected_rows = [ + ["Alfa", "Bravo", "Charlie", "Delta"], + ["Echo", "Foxtrot", "Foxtrot", "Golf"], + ["Echo", "Hotel", "India", "Juliett"], + ["Kilo", "Lima", "Lima", "Mike"], + ["November", "Lima", "Lima", "Oscar"], + ] + assert parsed_rows == expected_rows + + +def test_columns(table: Table) -> None: + parsed_columns = [[cell.text for cell in column] for column in table.columns] + expected_columns = [ + [ + "Alfa", + "Echo", + "Echo", + "Kilo", + "November", + ], + [ + "Bravo", + "Foxtrot", + "Hotel", + "Lima", + "Lima", + ], + [ + "Charlie", + "Foxtrot", + "India", + "Lima", + "Lima", + ], + [ + "Delta", + "Golf", + "Juliett", + "Mike", + "Oscar", + ], + ] + assert parsed_columns == expected_columns + + +@pytest.mark.parametrize( + "span, expected_text", + [ + (Span(page=0, start=25, end=29), "Alfa"), + (Span(page=0, start=30, end=35), "Bravo"), + (Span(page=0, start=36, end=43), "Charlie"), + (Span(page=0, start=44, end=49), "Delta"), + (Span(page=0, start=50, end=54), "Echo"), + (Span(page=0, start=55, end=62), "Foxtrot"), + (Span(page=0, start=64, end=68), "Golf"), + (Span(page=0, start=70, end=75), "Hotel"), + (Span(page=0, start=76, end=81), "India"), + (Span(page=0, start=82, end=89), "Juliett"), + (Span(page=0, start=90, end=94), "Kilo"), + (Span(page=0, start=111, end=115), "Lima"), + (Span(page=0, start=97, end=101), "Mike"), + (Span(page=0, start=102, end=110), "November"), + (Span(page=0, start=117, end=122), "Oscar"), + ], +) +def test_table_cell_for(etl_output: EtlOutput, span: Span, expected_text: str) -> None: + token = etl_output.token_for(span) + table, cell = etl_output.table_cell_for(token) + assert cell.text == expected_text From ada146a9aa1cce7c70a71e64e34be369f0c2086b Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:34:13 -0500 Subject: [PATCH 2/6] Account for rowspans and colspans in `Table.from_dict()` --- indico_toolkit/etloutput/table.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/indico_toolkit/etloutput/table.py b/indico_toolkit/etloutput/table.py index 6e03610..807f7ea 100644 --- a/indico_toolkit/etloutput/table.py +++ b/indico_toolkit/etloutput/table.py @@ -20,6 +20,8 @@ def from_dict(table: object) -> "Table": """ page = get(table, int, "page_num") get(table, dict, "position")["page_num"] = page + row_count = get(table, int, "num_rows") + column_count = get(table, int, "num_columns") cells = tuple( sorted( @@ -27,14 +29,26 @@ def from_dict(table: object) -> "Table": key=attrgetter("range"), ) ) + cells_by_row_col = { + (row, column): cell + for cell in cells + for row in cell.range.rows + for column in cell.range.columns + } rows = tuple( - tuple(cell for cell in cells if row in cell.range.rows) - for row in range(get(table, int, "num_rows")) - ) + tuple( + cells_by_row_col[row, column] + for column in range(column_count) + ) + for row in range(row_count) + ) # fmt: skip columns = tuple( - tuple(cell for cell in cells if column in cell.range.columns) - for column in range(get(table, int, "num_columns")) - ) + tuple( + cells_by_row_col[row, column] + for row in range(row_count) + ) + for column in range(column_count) + ) # fmt: skip return Table( box=Box.from_dict(get(table, dict, "position")), From 2782446950b4cc506939c4fdd199c3b3002337b1 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:34:56 -0500 Subject: [PATCH 3/6] Ensure `Range.from_dict()` uses the min row and column --- indico_toolkit/etloutput/range.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indico_toolkit/etloutput/range.py b/indico_toolkit/etloutput/range.py index efe890a..7d26431 100644 --- a/indico_toolkit/etloutput/range.py +++ b/indico_toolkit/etloutput/range.py @@ -21,8 +21,8 @@ def from_dict(cell: object) -> "Range": columns = get(cell, list, "columns") return Range( - row=rows[0], - column=columns[0], + row=min(rows), + column=min(columns), rowspan=len(rows), columnspan=len(columns), rows=tuple(rows), From f920beb806d58e4e5a8e429a61820a0738a87c69 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:47:22 -0500 Subject: [PATCH 4/6] Account for rowspans and colspans in `EtlOutput.table_cell_for()` --- indico_toolkit/etloutput/etloutput.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/indico_toolkit/etloutput/etloutput.py b/indico_toolkit/etloutput/etloutput.py index 2c4f79b..899dade 100644 --- a/indico_toolkit/etloutput/etloutput.py +++ b/indico_toolkit/etloutput/etloutput.py @@ -96,15 +96,11 @@ def table_cell_for(self, token: Token) -> "tuple[Table, Cell]": else: raise TableCellNotFoundError(f"no table contains {token!r}") - try: - row_index = bisect_left( - table.rows, token_vmid, key=lambda row: row[0].box.bottom - ) - row = table.rows[row_index] - - cell_index = bisect_left(row, token_hmid, key=attrgetter("box.right")) - cell = row[cell_index] - except (IndexError, ValueError) as error: - raise TableCellNotFoundError(f"no cell contains {token!r}") from error - - return table, cell + for cell in table.cells: + if ( + (cell.box.top <= token_vmid <= cell.box.bottom) and + (cell.box.left <= token_hmid <= cell.box.right) + ): # fmt: skip + return table, cell + else: + raise TableCellNotFoundError(f"no cell contains {token!r}") From 3247aa801fa95b000c47569428d8fedc8870f5da Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:51:11 -0500 Subject: [PATCH 5/6] Narrow `AutoReviewed.changes` type to v3 result file changes As v1 is no longer supported by `results` or `polling`. --- indico_toolkit/polling/autoreview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indico_toolkit/polling/autoreview.py b/indico_toolkit/polling/autoreview.py index 3939cac..7f20533 100644 --- a/indico_toolkit/polling/autoreview.py +++ b/indico_toolkit/polling/autoreview.py @@ -30,7 +30,7 @@ @dataclass class AutoReviewed: - changes: "dict[str, Any] | list[dict[str, Any]]" + changes: "list[dict[str, Any]]" reject: bool = False stp: bool = False From f83c2ef6af2fd138e172fdb4bdd8703e57265fdc Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Tue, 9 Sep 2025 16:58:44 -0500 Subject: [PATCH 6/6] Bump version and update changelog --- CHANGELOG.md | 11 +++++++++++ indico_toolkit/__init__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f109eab..5e5cbbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and versions match the minimum IPA version required to use functionality. +## [v7.2.1] - 2025-09-09 + +### Fixed + +- Account for row spans and column spans in ETL output tables. + Affects `Table.from_dict()`, `Range.from_dict()`, and `EtlOutput.table_cell_for()`. +- Narrow `AutoReviewed.changes` type to v3 result file changes, + as that's the only version supported by `results` and `polling` modules. + + ## [v7.2.0] - 2025-06-17 ### Added @@ -255,6 +265,7 @@ This is the first major version release tested to work on Indico 6.X. - Row Association now also sorting on 'bbtop'. +[v7.2.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v7.2.0...v7.2.1 [v7.2.0]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.2...v7.2.0 [v6.14.2]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.1...v6.14.2 [v6.14.1]: https://github.com/IndicoDataSolutions/indico-toolkit-python/compare/v6.14.0...v6.14.1 diff --git a/indico_toolkit/__init__.py b/indico_toolkit/__init__.py index bf75f9a..57b848f 100644 --- a/indico_toolkit/__init__.py +++ b/indico_toolkit/__init__.py @@ -21,4 +21,4 @@ "ToolkitStaggeredLoopError", "ToolkitStatusError", ) -__version__ = "7.2.0" +__version__ = "7.2.1" diff --git a/pyproject.toml b/pyproject.toml index 8ca6bfc..7b1bfe5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ readme = "README.md" urls = { source = "https://github.com/IndicoDataSolutions/Indico-Solutions-Toolkit" } requires-python = ">=3.10" -version = "7.2.0" +version = "7.2.1" dependencies = ["indico-client (>=6.14.0,<7.0.0)"] [project.optional-dependencies]