Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
## [Unreleased]
=======
* no unreleased changes *

### Added
* Handling excel files with incorrect file extension

## 11.2.0 / 2024-04-10
### Added
Expand Down
46 changes: 37 additions & 9 deletions lib/ndr_import/file/excel.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ class Excel < Base
def tables
return enum_for(:tables) unless block_given?

workbook = load_workbook(@filename)
# Create a new file using the given format as the extension, if a format is provided
path = @format.present? ? create_file_with_correct_extension : @filename

workbook = load_workbook(path)
workbook.sheets.each do |sheet_name|
yield sheet_name, excel_rows(workbook, sheet_name)
end
Expand Down Expand Up @@ -91,11 +94,7 @@ def load_workbook(path)
when '.xls'
Roo::Excel.new(SafeFile.safepath_to_string(path))
when '.xlsm', '.xlsx'
if @options['file_password']
Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password'])))
else
Roo::Excelx.new(SafeFile.safepath_to_string(path))
end
load_xlsm_xlsx(path)
else
raise "Received file path with unexpected extension #{SafeFile.extname(path)}"
end
Expand All @@ -107,13 +106,42 @@ def load_workbook(path)
# so we create a duplicate file in xlsx extension
raise e.message unless /(.*)\.xls$/.match(path)

load_workbook create_amended_xlsx_from(path)
rescue RuntimeError, ::Zip::Error => e
raise ["Unable to read the file '#{path}'", e.message].join('; ')
ensure
SafeFile.delete(path) if @remove_updated_extension_file
end

def load_xlsm_xlsx(path)
if @options['file_password']
Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password'])))
else
Roo::Excelx.new(SafeFile.safepath_to_string(path))
end
end

def create_amended_xlsx_from(path)
new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
new_file_path = SafeFile.dirname(path).join(new_file_name)
copy_file(path, new_file_path)

load_workbook(new_file_path)
rescue RuntimeError, ::Zip::Error => e
raise ["Unable to read the file '#{path}'", e.message].join('; ')
new_file_path
end

def create_file_with_correct_extension
# We shouldn't attempt to change one excel format to another
return @filename if %w[.xls .xlsx .xlsm].include? SafeFile.extname(@filename).downcase

new_file_name = SafeFile.basename(@filename).sub(/\..+\z/, ".#{@format}")
new_file_path = SafeFile.dirname(@filename).join(new_file_name)
return @filename if ::File.exist?(new_file_path)

copy_file(@filename, new_file_path)
# Flag the newly made file for deletion once we've finished with it
@remove_updated_extension_file = true

new_file_path
end

# Note that this method can produce insecure calls. All callers must protect
Expand Down
31 changes: 31 additions & 0 deletions test/file/excel_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,37 @@ def setup

SafeFile.delete @permanent_test_files.join('txt_file_xls_extension_amend.xlsx')
end

test 'should use format provided to read excel file with incorrect file extension' do
file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat')
handler = NdrImport::File::Excel.new(file_path, 'xlsx')
handler.tables.each do |tablename, sheet|
assert_equal 'Sheet1', tablename
assert_instance_of Enumerator, sheet
assert_equal %w[1A 1B], sheet.first
end
end

test 'should read xlsx file when an xls format provided incorrectly' do
file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat')
handler = NdrImport::File::Excel.new(file_path, 'xls')
handler.tables.each do |tablename, sheet|
assert_equal 'Sheet1', tablename
assert_instance_of Enumerator, sheet
assert_equal %w[1A 1B], sheet.first
end

# Existing logic doesn't remove the _amend.xlsx file
SafeFile.delete @permanent_test_files.join('sample_xlsx_with_dat_extension_amend.xlsx')
end

test 'should fail to load if format provided is not an excel extension' do
file_path = @permanent_test_files.join('sample_xlsx_with_dat_extension.dat')
handler = NdrImport::File::Excel.new(file_path, 'cabbage')
exception = assert_raises(RuntimeError) { handler.tables.to_a }

assert_match(/Received file path with unexpected extension \.cabbage/, exception.message)
end
end
end
end
Binary file added test/resources/sample_xlsx_with_dat_extension.dat
Binary file not shown.