diff --git a/Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1 b/Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1 new file mode 100644 index 0000000..0f46c09 --- /dev/null +++ b/Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1 @@ -0,0 +1,7 @@ +Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force + +$xlfile = "$PSScriptRoot\yearlySales.xlsx" + +$result = Import-Excel $xlfile * -NotAsDictionary + +$result | Measure-Object \ No newline at end of file diff --git a/Examples/Import-Excel/ImportMultipleSheetsAsHashtable.ps1 b/Examples/Import-Excel/ImportMultipleSheetsAsHashtable.ps1 new file mode 100644 index 0000000..d99c61c --- /dev/null +++ b/Examples/Import-Excel/ImportMultipleSheetsAsHashtable.ps1 @@ -0,0 +1,9 @@ +Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force + +$xlfile = "$PSScriptRoot\yearlySales.xlsx" + +$result = Import-Excel $xlfile * + +foreach ($sheet in $result.Values) { + $sheet +} \ No newline at end of file diff --git a/Examples/Import-Excel/yearlySales.xlsx b/Examples/Import-Excel/yearlySales.xlsx new file mode 100644 index 0000000..d30993a Binary files /dev/null and b/Examples/Import-Excel/yearlySales.xlsx differ diff --git a/Public/Import-Excel.ps1 b/Public/Import-Excel.ps1 index 49982cb..b9574e7 100644 --- a/Public/Import-Excel.ps1 +++ b/Public/Import-Excel.ps1 @@ -15,7 +15,7 @@ [Alias('Sheet')] [Parameter(Position = 1)] [ValidateNotNullOrEmpty()] - [String]$WorksheetName, + [String[]]$WorksheetName, [Parameter(ParameterSetName = 'PathB' , Mandatory)] [Parameter(ParameterSetName = 'PackageB', Mandatory)] [String[]]$HeaderName , @@ -36,7 +36,8 @@ [string[]]$AsDate, [ValidateNotNullOrEmpty()] [String]$Password, - [Int[]]$ImportColumns + [Int[]]$ImportColumns, + [Switch]$NotAsDictionary ) end { $sw = [System.Diagnostics.Stopwatch]::StartNew() @@ -64,7 +65,7 @@ try { if ($ImportColumns) { - $end = $Worksheet.Dimension.End.Column + $end = $sheet.Dimension.End.Column # Check $ImportColumns if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return } # Check $StartColumn and $EndColumn @@ -95,7 +96,7 @@ foreach ($C in $Columns) { #allow "False" or "0" to be column headings - $Worksheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value + $sheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value } } } @@ -125,103 +126,109 @@ } try { #Select worksheet - if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] } + if ($WorksheetName -eq '*') { $Worksheet = $ExcelPackage.Workbook.Worksheets } + elseif (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] } elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) { throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return } - #region Get rows and columns - #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells. - if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row } - if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column } - $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0) - if ($DataOnly) { - #If we are using headers startrow will be the header-row so examine data from startRow + 1, - if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress } - else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress } - #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data. - #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square - #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen, - #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times". - $colHash = @{ } - $rowHash = @{ } - foreach ($cell in $Worksheet.Cells[$range]) { - if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 } - } - $rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] }) - $columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] }) - } - else { - $Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." } - if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } } - elseif ($HeaderName) { $rows = $StartRow..$EndRow } - else { - $rows = (1 + $StartRow)..$EndRow - if ($StartRow -eq 1 -and $EndRow -eq 1) { - $rows = 0 + $xlBook = [Ordered]@{} + foreach ($sheet in $Worksheet) { + $targetSheetname = $sheet.Name + $xlBook["$targetSheetname"] = @() + #region Get rows and columns + #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells. + if (-not $EndRow ) { $EndRow = $sheet.Dimension.End.Row } + if (-not $EndColumn) { $EndColumn = $sheet.Dimension.End.Column } + $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0) + if ($DataOnly) { + #If we are using headers startrow will be the header-row so examine data from startRow + 1, + if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress } + else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress } + #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data. + #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square + #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen, + #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times". + $colHash = @{ } + $rowHash = @{ } + foreach ($cell in $sheet.Cells[$range]) { + if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 } } + $rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] }) + $columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] }) } + else { + $Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." } + if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } } + elseif ($HeaderName) { $rows = $StartRow..$EndRow } + else { + $rows = (1 + $StartRow)..$EndRow + if ($StartRow -eq 1 -and $EndRow -eq 1) { + $rows = 0 + } + } - # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } } - } - #endregion - #region Create property names - if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) { - throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return - } - if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) { - throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return - } - #endregion - if (-not $rows) { - Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'" - } - else { - #region Create one object per row - if ($AsText -or $AsDate) { - <#join items in AsText together with ~~~ . Escape any regex special characters... + # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } } + } + #endregion + #region Create property names + if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) { + throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return + } + if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) { + throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return + } + #endregion + if (-not $rows) { + Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'" + } + else { + #region Create one object per row + if ($AsText -or $AsDate) { + <#join items in AsText together with ~~~ . Escape any regex special characters... # which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%; So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$ $make a regex for this which is case insensitive (option 1) and compiled (option 8) #> - $TextColExpression = '' - if ($AsText) { - $TextColExpression += '(?^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)' - } - if ($AsText -and $AsDate) { - $TextColExpression += "|" - } - if ($AsDate) { - $TextColExpression += '(?^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)' - } - $TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9 - } - else { $TextColRegEx = $null } - foreach ($R in $rows) { - #Disabled write-verbose for speed - # Write-Verbose "Import row '$R'" - $NewRow = [Ordered]@{ } - if ($TextColRegEx) { - foreach ($P in $PropertyNames) { - $MatchTest = $TextColRegEx.Match($P.value) - if ($MatchTest.groups.name -eq "astext") { - $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text - } - elseif ($MatchTest.groups.name -eq "asdate" -and $Worksheet.Cells[$R, $P.Column].Value -is [System.ValueType]) { - $NewRow[$P.Value] = [datetime]::FromOADate(($Worksheet.Cells[$R, $P.Column].Value)) - } - else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value } + $TextColExpression = '' + if ($AsText) { + $TextColExpression += '(?^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)' } - } - else { - foreach ($P in $PropertyNames) { - $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value - # Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'." + if ($AsText -and $AsDate) { + $TextColExpression += "|" } + if ($AsDate) { + $TextColExpression += '(?^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)' + } + $TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9 } - [PSCustomObject]$NewRow + else { $TextColRegEx = $null } + foreach ($R in $rows) { + #Disabled write-verbose for speed + # Write-Verbose "Import row '$R'" + $NewRow = [Ordered]@{ } + if ($TextColRegEx) { + foreach ($P in $PropertyNames) { + $MatchTest = $TextColRegEx.Match($P.value) + if ($MatchTest.groups.name -eq "astext") { + $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Text + } + elseif ($MatchTest.groups.name -eq "asdate" -and $sheet.Cells[$R, $P.Column].Value -is [System.ValueType]) { + $NewRow[$P.Value] = [datetime]::FromOADate(($sheet.Cells[$R, $P.Column].Value)) + } + else { $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value } + } + } + else { + foreach ($P in $PropertyNames) { + $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value + # Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'." + } + } + $xlBook["$targetSheetname"] += [PSCustomObject]$NewRow + } + #endregion } - #endregion } } catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return } @@ -229,6 +236,18 @@ $EndRow = 0 $EndColumn = 0 if ($Path) { $stream.close(); $ExcelPackage.Dispose() } + + if ($NotAsDictionary) { + foreach ($entry in $xlbook.GetEnumerator()) { + $entry.Value + } + } + elseif ($Worksheet.Count -eq 1) { + $xlBook["$targetSheetname"] + } + else { + $xlBook + } } } } diff --git a/__tests__/ImportExcelHeaderName.tests.ps1 b/__tests__/ImportExcelHeaderName.tests.ps1 index 90ef05e..48d8572 100644 --- a/__tests__/ImportExcelHeaderName.tests.ps1 +++ b/__tests__/ImportExcelHeaderName.tests.ps1 @@ -63,6 +63,10 @@ Describe "Import-Excel on a sheet with no headings" { } } + AfterAll { + Remove-Item $PSScriptRoot\testImportExcelSparse.xlsx -ErrorAction SilentlyContinue + } + It "Import-Excel should have this shape" { $actual = @(Import-Excel $xlfile) diff --git a/__tests__/ImportExcelTests/ImportExcelReadSheets.tests.ps1 b/__tests__/ImportExcelTests/ImportExcelReadSheets.tests.ps1 new file mode 100644 index 0000000..92d1589 --- /dev/null +++ b/__tests__/ImportExcelTests/ImportExcelReadSheets.tests.ps1 @@ -0,0 +1,63 @@ +#Requires -Modules Pester +[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseDeclaredVarsMoreThanAssignments', '', Justification = 'False Positives')] +param() + +Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force + +Describe 'Different ways to import sheets' -Tag ImportExcelReadSheets { + BeforeAll { + $xlFilename = "$PSScriptRoot\yearlySales.xlsx" + } + + Context 'Test reading sheets' { + It 'Should read one sheet' { + $actual = Import-Excel $xlFilename + + $actual.Count | Should -Be 100 + $actual[0].Month | Should -BeExactly "April" + $actual[99].Month | Should -BeExactly "April" + } + + It 'Should read two sheets' { + $actual = Import-Excel $xlFilename march, june + + $actual.keys.Count | Should -Be 2 + $actual["March"].Count | Should -Be 100 + $actual["June"].Count | Should -Be 100 + } + + It 'Should read all the sheets' { + $actual = Import-Excel $xlFilename * + + $actual.keys.Count | Should -Be 12 + + $actual["January"].Count | Should -Be 100 + $actual["February"].Count | Should -Be 100 + $actual["March"].Count | Should -Be 100 + $actual["April"].Count | Should -Be 100 + $actual["May"].Count | Should -Be 100 + $actual["June"].Count | Should -Be 100 + $actual["July"].Count | Should -Be 100 + $actual["August"].Count | Should -Be 100 + $actual["September"].Count | Should -Be 100 + $actual["October"].Count | Should -Be 100 + $actual["November"].Count | Should -Be 100 + $actual["December"].Count | Should -Be 100 + } + + It 'Should throw if it cannot find the sheet' { + { Import-Excel $xlFilename april, june, notthere } | Should -Throw + } + + It 'Should return an array not a dictionary' { + $actual = Import-Excel $xlFilename april, june -NotAsDictionary + + $actual.Count | Should -Be 200 + $group = $actual | Group-Object month -NoElement + + $group.Count | Should -Be 2 + $group[0].Name | Should -BeExactly 'April' + $group[1].Name | Should -BeExactly 'June' + } + } +} \ No newline at end of file diff --git a/__tests__/ImportExcelTests/yearlySales.xlsx b/__tests__/ImportExcelTests/yearlySales.xlsx new file mode 100644 index 0000000..d30993a Binary files /dev/null and b/__tests__/ImportExcelTests/yearlySales.xlsx differ diff --git a/changelog.md b/changelog.md index 0381954..8959cff 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,12 @@ - Importing multiple files with Import-Excel by pipeline uses only the first file for the row count https://github.com/dfinke/ImportExcel/issues/1172 +## New Features + +- Import-Excel now supports importing multiple sheets. It can either return a dictionary of all sheets, or as a single array of all sheets combined. + - `Import-Excel $xlfile *` # reads all sheets, returns all data in a dictionary + - `Import-Excel $xlfile * -NotAsDictionary` # reads all sheets, returns all data in a single array + # v7.4.2 - Thank you [James Mueller](https://github.com/jamesmmueller) Updated `ConvertFrom-ExcelToSQLInsert` to handle single quotes in the SQL statement.