diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e09537..65208ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## [Unreleased] ======= -* no unreleased changes * + +### Added +* Handling excel files with incorrect file extension ## 11.2.0 / 2024-04-10 ### Added diff --git a/lib/ndr_import/file/excel.rb b/lib/ndr_import/file/excel.rb index cd76555..94a9ca6 100644 --- a/lib/ndr_import/file/excel.rb +++ b/lib/ndr_import/file/excel.rb @@ -20,7 +20,10 @@ class Excel < Base def tables return enum_for(:tables) unless block_given? - workbook = load_workbook(@filename) + # Create a new file using the given format as the extension, if a format is provided + path = @format.present? ? create_file_with_correct_extension : @filename + + workbook = load_workbook(path) workbook.sheets.each do |sheet_name| yield sheet_name, excel_rows(workbook, sheet_name) end @@ -91,11 +94,7 @@ def load_workbook(path) when '.xls' Roo::Excel.new(SafeFile.safepath_to_string(path)) when '.xlsm', '.xlsx' - if @options['file_password'] - Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password']))) - else - Roo::Excelx.new(SafeFile.safepath_to_string(path)) - end + load_xlsm_xlsx(path) else raise "Received file path with unexpected extension #{SafeFile.extname(path)}" end @@ -107,13 +106,42 @@ def load_workbook(path) # so we create a duplicate file in xlsx extension raise e.message unless /(.*)\.xls$/.match(path) + load_workbook create_amended_xlsx_from(path) + rescue RuntimeError, ::Zip::Error => e + raise ["Unable to read the file '#{path}'", e.message].join('; ') + ensure + SafeFile.delete(path) if @remove_updated_extension_file + end + + def load_xlsm_xlsx(path) + if @options['file_password'] + Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password']))) + else + Roo::Excelx.new(SafeFile.safepath_to_string(path)) + end + end + + def create_amended_xlsx_from(path) new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx') new_file_path = SafeFile.dirname(path).join(new_file_name) copy_file(path, new_file_path) - load_workbook(new_file_path) - rescue RuntimeError, ::Zip::Error => e - raise ["Unable to read the file '#{path}'", e.message].join('; ') + new_file_path + end + + def create_file_with_correct_extension + # We shouldn't attempt to change one excel format to another + return @filename if %w[.xls .xlsx .xlsm].include? SafeFile.extname(@filename).downcase + + new_file_name = SafeFile.basename(@filename).sub(/\..+\z/, ".#{@format}") + new_file_path = SafeFile.dirname(@filename).join(new_file_name) + return @filename if ::File.exist?(new_file_path) + + copy_file(@filename, new_file_path) + # Flag the newly made file for deletion once we've finished with it + @remove_updated_extension_file = true + + new_file_path end # Note that this method can produce insecure calls. All callers must protect diff --git a/test/file/excel_test.rb b/test/file/excel_test.rb index 92836b6..3c49642 100644 --- a/test/file/excel_test.rb +++ b/test/file/excel_test.rb @@ -129,6 +129,37 @@ def setup SafeFile.delete @permanent_test_files.join('txt_file_xls_extension_amend.xlsx') end + + test 'should use format provided to read excel file with incorrect file extension' do + file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat') + handler = NdrImport::File::Excel.new(file_path, 'xlsx') + handler.tables.each do |tablename, sheet| + assert_equal 'Sheet1', tablename + assert_instance_of Enumerator, sheet + assert_equal %w[1A 1B], sheet.first + end + end + + test 'should read xlsx file when an xls format provided incorrectly' do + file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat') + handler = NdrImport::File::Excel.new(file_path, 'xls') + handler.tables.each do |tablename, sheet| + assert_equal 'Sheet1', tablename + assert_instance_of Enumerator, sheet + assert_equal %w[1A 1B], sheet.first + end + + # Existing logic doesn't remove the _amend.xlsx file + SafeFile.delete @permanent_test_files.join('sample_xlsx_with_dat_extension_amend.xlsx') + end + + test 'should fail to load if format provided is not an excel extension' do + file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat') + handler = NdrImport::File::Excel.new(file_path, 'cabbage') + exception = assert_raises(RuntimeError) { handler.tables.to_a } + + assert_match(/Received file path with unexpected extension \.cabbage/, exception.message) + end end end end diff --git a/test/resources/sample_xlsx_with_dat_extension.dat b/test/resources/sample_xlsx_with_dat_extension.dat new file mode 100644 index 0000000..d046e72 Binary files /dev/null and b/test/resources/sample_xlsx_with_dat_extension.dat differ