From 83e38846245db7d774e84efc875b6509b7d1b173 Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Wed, 28 May 2025 17:28:25 +0100 Subject: [PATCH 1/3] Fixes bugs in VCF table meta data and mutated columns --- lib/ndr_import/table.rb | 16 ++++++++++++++-- lib/ndr_import/vcf/table.rb | 6 +++++- test/resources/regex_column_names.zip | Bin 0 -> 370 bytes test/universal_importer_helper_test.rb | 24 ++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 test/resources/regex_column_names.zip diff --git a/lib/ndr_import/table.rb b/lib/ndr_import/table.rb index 27711d3..f70f310 100644 --- a/lib/ndr_import/table.rb +++ b/lib/ndr_import/table.rb @@ -51,12 +51,21 @@ def transform(lines, &block) @header_valid = false @header_best_guess = nil @notifier.try(:started) - + # Keep a copy of the original column mappings for use later if columns are mutated + original_columns = @columns.deep_dup + @columns_mutated = false last_col = last_column_to_transform skip_footer_lines(lines, footer_lines).each do |line| line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block) end + # @columns may have been mutated where column is a regular expression. + # We want to restore `@columns` back to its original state, so the column regexp + # will work on the next file + @columns = original_columns if @columns_mutated + # Also ensure that @masked_mappings are recalculated if @columns has been mutated + @masked_mappings = nil if @columns_mutated + @notifier.try(:finished) end @@ -85,7 +94,10 @@ def mutate_regexp_columns(line) @columns.each_with_index do |column, index| next unless column['column'].is_a? Regexp - column['column'] = line[index] if line[index].match? column['column'] + if line[index].match? column['column'] + column['column'] = line[index] + @columns_mutated = true + end end end diff --git a/lib/ndr_import/vcf/table.rb b/lib/ndr_import/vcf/table.rb index 4359bf7..ef8c6ba 100644 --- a/lib/ndr_import/vcf/table.rb +++ b/lib/ndr_import/vcf/table.rb @@ -5,10 +5,14 @@ module Vcf # Syntatic sugar to ensure `header_lines` and `footer_lines` are 1 and 0 respectively. # All other Table logic is inherited from `NdrImport::Table` class Table < ::NdrImport::Table + VCF_OPTIONS = %w[vcf_file_metadata].freeze + def self.all_valid_options - super - %w[delimiter header_lines footer_lines] + %w[vcf_file_metadata] + super - %w[delimiter header_lines footer_lines] + VCF_OPTIONS end + attr_reader(*VCF_OPTIONS) + def header_lines 1 end diff --git a/test/resources/regex_column_names.zip b/test/resources/regex_column_names.zip new file mode 100644 index 0000000000000000000000000000000000000000..c65461517441022f2624b7caa35c6ee3a99c9138 GIT binary patch literal 370 zcmWIWW@Zs#U|`^2@X6m3b 'one' }, + { 'column' => 'two' }, + { 'column' => /\A[AB]\d{3}\z/i }]) + ] + source_file = @permanent_test_files.join('regex_column_names.zip') + @test_importer.stubs(:get_table_mapping).returns(table_mappings.first) + mapped_rows = [] + @test_importer.extract(source_file) { |table, rows| mapped_rows << table.transform(rows).to_a } + + expected_mapped_data = [ + [['SomeTestClass', { rawtext: { 'one' => '2', 'two' => '2', 'b456' => '2' } }, 1]], + [['SomeTestClass', { rawtext: { 'one' => '1', 'two' => '1', 'a123' => '1' } }, 1]] + ] + + assert_equal expected_mapped_data, mapped_rows + end + test 'get_notifier' do class TestImporterWithoutNotifier include NdrImport::UniversalImporterHelper From 82e029b1269c0c2cae100bb8481a22ff704e2545 Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Wed, 28 May 2025 17:30:44 +0100 Subject: [PATCH 2/3] Changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 373f1e4..9e0aaaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [Unreleased] -* no unreleased changes + +### Fixed +* Fix bug in VCF metadata +* Fix bug where table columns are mutated by a regexp column ## 11.3.0/ 2025-02-11 ### Fixed From e72a86ae3c2f63adaf6b1d1a348967e94b20b742 Mon Sep 17 00:00:00 2001 From: ollietulloch Date: Mon, 2 Jun 2025 11:46:54 +0100 Subject: [PATCH 3/3] Changes following review --- lib/ndr_import/table.rb | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/ndr_import/table.rb b/lib/ndr_import/table.rb index f70f310..8f6274d 100644 --- a/lib/ndr_import/table.rb +++ b/lib/ndr_import/table.rb @@ -34,6 +34,8 @@ def initialize(options = {}) end @row_index = 0 + # Keep a copy of the original column mappings for use later if columns are mutated + @original_columns = @columns.deep_dup end def match(filename, tablename) @@ -51,9 +53,6 @@ def transform(lines, &block) @header_valid = false @header_best_guess = nil @notifier.try(:started) - # Keep a copy of the original column mappings for use later if columns are mutated - original_columns = @columns.deep_dup - @columns_mutated = false last_col = last_column_to_transform skip_footer_lines(lines, footer_lines).each do |line| line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block) @@ -62,9 +61,9 @@ def transform(lines, &block) # @columns may have been mutated where column is a regular expression. # We want to restore `@columns` back to its original state, so the column regexp # will work on the next file - @columns = original_columns if @columns_mutated - # Also ensure that @masked_mappings are recalculated if @columns has been mutated - @masked_mappings = nil if @columns_mutated + @columns = @original_columns + # Also ensure that @masked_mappings are recalculated + @masked_mappings = nil @notifier.try(:finished) end @@ -94,10 +93,7 @@ def mutate_regexp_columns(line) @columns.each_with_index do |column, index| next unless column['column'].is_a? Regexp - if line[index].match? column['column'] - column['column'] = line[index] - @columns_mutated = true - end + column['column'] = line[index] if line[index].match? column['column'] end end