Merge pull request #1161 from dfinke/adding-to-import-excel

Update and improve Import-Excel for reading nultiple sheets
2026-02-22 20:33:24 +00:00 · 2022-04-30 08:04:39 -04:00
parent 9273261b34 b2119f08f5
commit 8d56a351ff
8 changed files with 194 additions and 86 deletions
--- a/Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1
+++ b/Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1
@@ -0,0 +1,7 @@
+Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
+
+$xlfile = "$PSScriptRoot\yearlySales.xlsx"
+
+$result = Import-Excel $xlfile * -NotAsDictionary
+
+$result | Measure-Object
--- a/Examples/Import-Excel/ImportMultipleSheetsAsHashtable.ps1
+++ b/Examples/Import-Excel/ImportMultipleSheetsAsHashtable.ps1
@@ -0,0 +1,9 @@
+Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
+
+$xlfile = "$PSScriptRoot\yearlySales.xlsx"
+
+$result = Import-Excel $xlfile *
+
+foreach ($sheet in $result.Values) {
+    $sheet
+}
--- a/Examples/Import-Excel/yearlySales.xlsx
+++ b/Examples/Import-Excel/yearlySales.xlsx
--- a/Public/Import-Excel.ps1
+++ b/Public/Import-Excel.ps1
@@ -15,7 +15,7 @@
        [Alias('Sheet')]
        [Parameter(Position = 1)]
        [ValidateNotNullOrEmpty()]
-        [String]$WorksheetName,
+        [String[]]$WorksheetName,
        [Parameter(ParameterSetName = 'PathB'   , Mandatory)]
        [Parameter(ParameterSetName = 'PackageB', Mandatory)]
        [String[]]$HeaderName ,
@@ -36,7 +36,8 @@
        [string[]]$AsDate,
        [ValidateNotNullOrEmpty()]
        [String]$Password,
-        [Int[]]$ImportColumns
+        [Int[]]$ImportColumns,
+        [Switch]$NotAsDictionary
    )
    end {
        $sw = [System.Diagnostics.Stopwatch]::StartNew()
@@ -64,7 +65,7 @@

            try {
                if ($ImportColumns) {
-                    $end = $Worksheet.Dimension.End.Column
+                    $end = $sheet.Dimension.End.Column
                    # Check $ImportColumns
                    if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return }
                    # Check $StartColumn and $EndColumn
@@ -95,7 +96,7 @@

                    foreach ($C in $Columns) {
                        #allow "False" or "0" to be column headings
-                        $Worksheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
+                        $sheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
                    }
                }
            }
@@ -125,103 +126,109 @@
            }
            try {
                #Select worksheet
-                if (-not  $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
+                if ($WorksheetName -eq '*') { $Worksheet = $ExcelPackage.Workbook.Worksheets }
+                elseif (-not  $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
                elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) {
                    throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
                }

-                #region Get rows and columns
-                #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
-                if (-not $EndRow   ) { $EndRow = $Worksheet.Dimension.End.Row }
-                if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
-                $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
-                if ($DataOnly) {
-                    #If we are using headers startrow will be the header-row so examine data from startRow + 1,
-                    if ($NoHeader) { $range = "A" + ($StartRow     ) + ":" + $endAddress }
-                    else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
-                    #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
-                    #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
-                    #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
-                    #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
-                    $colHash = @{ }
-                    $rowHash = @{ }
-                    foreach ($cell in $Worksheet.Cells[$range]) {
-                        if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
-                    }
-                    $rows = (   $StartRow..$EndRow   ).Where( { $rowHash[$_] })
-                    $columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
-                }
-                else {
-                    $Columns = $StartColumn .. $EndColumn  ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
-                    if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
-                    elseif ($HeaderName) { $rows = $StartRow..$EndRow }
-                    else {
-                        $rows = (1 + $StartRow)..$EndRow
-                        if ($StartRow -eq 1 -and $EndRow -eq 1) {
-                            $rows = 0
+                $xlBook = [Ordered]@{}
+                foreach ($sheet in $Worksheet) {
+                    $targetSheetname = $sheet.Name
+                    $xlBook["$targetSheetname"] = @()
+                    #region Get rows and columns
+                    #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
+                    if (-not $EndRow   ) { $EndRow = $sheet.Dimension.End.Row }
+                    if (-not $EndColumn) { $EndColumn = $sheet.Dimension.End.Column }
+                    $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
+                    if ($DataOnly) {
+                        #If we are using headers startrow will be the header-row so examine data from startRow + 1,
+                        if ($NoHeader) { $range = "A" + ($StartRow     ) + ":" + $endAddress }
+                        else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
+                        #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
+                        #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
+                        #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
+                        #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
+                        $colHash = @{ }
+                        $rowHash = @{ }
+                        foreach ($cell in $sheet.Cells[$range]) {
+                            if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
                        }
+                        $rows = (   $StartRow..$EndRow   ).Where( { $rowHash[$_] })
+                        $columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
                    }
+                    else {
+                        $Columns = $StartColumn .. $EndColumn  ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
+                        if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
+                        elseif ($HeaderName) { $rows = $StartRow..$EndRow }
+                        else {
+                            $rows = (1 + $StartRow)..$EndRow
+                            if ($StartRow -eq 1 -and $EndRow -eq 1) {
+                                $rows = 0
+                            }
+                        }

-                    # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
-                }
-                #endregion
-                #region Create property names
-                if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
-                    throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
-                }
-                if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
-                    throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
-                }
-                #endregion
-                if (-not $rows) {
-                    Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
-                }
-                else {
-                    #region Create one object per row
-                    if ($AsText -or $AsDate) {
-                        <#join items in AsText together with ~~~ . Escape any regex special characters...
+                        # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
+                    }
+                    #endregion
+                    #region Create property names
+                    if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
+                        throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
+                    }
+                    if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
+                        throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
+                    }
+                    #endregion
+                    if (-not $rows) {
+                        Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
+                    }
+                    else {
+                        #region Create one object per row
+                        if ($AsText -or $AsDate) {
+                            <#join items in AsText together with ~~~ . Escape any regex special characters...
                        # which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
                        So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
                        $make a regex for this which is case insensitive (option 1) and compiled (option 8)
                        #>
-                        $TextColExpression = ''
-                        if ($AsText) {
-                            $TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
-                        }
-                        if ($AsText -and $AsDate) {
-                            $TextColExpression += "|"
-                        }
-                        if ($AsDate) {
-                            $TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
-                        }
-                        $TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
-                    }
-                    else { $TextColRegEx = $null }
-                    foreach ($R in $rows) {
-                        #Disabled write-verbose for speed
-                        #  Write-Verbose "Import row '$R'"
-                        $NewRow = [Ordered]@{ }
-                        if ($TextColRegEx) {
-                            foreach ($P in $PropertyNames) {
-                                $MatchTest = $TextColRegEx.Match($P.value)
-                                if ($MatchTest.groups.name -eq "astext") {
-                                    $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
-                                }
-                                elseif ($MatchTest.groups.name -eq "asdate" -and $Worksheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
-                                    $NewRow[$P.Value] = [datetime]::FromOADate(($Worksheet.Cells[$R, $P.Column].Value))
-                                }
-                                else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
+                            $TextColExpression = ''
+                            if ($AsText) {
+                                $TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
                            }
-                        }
-                        else {
-                            foreach ($P in $PropertyNames) {
-                                $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
-                                #    Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
+                            if ($AsText -and $AsDate) {
+                                $TextColExpression += "|"
                            }
+                            if ($AsDate) {
+                                $TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
+                            }
+                            $TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
                        }
-                        [PSCustomObject]$NewRow
+                        else { $TextColRegEx = $null }
+                        foreach ($R in $rows) {
+                            #Disabled write-verbose for speed
+                            #  Write-Verbose "Import row '$R'"
+                            $NewRow = [Ordered]@{ }
+                            if ($TextColRegEx) {
+                                foreach ($P in $PropertyNames) {
+                                    $MatchTest = $TextColRegEx.Match($P.value)
+                                    if ($MatchTest.groups.name -eq "astext") {
+                                        $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Text
+                                    }
+                                    elseif ($MatchTest.groups.name -eq "asdate" -and $sheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
+                                        $NewRow[$P.Value] = [datetime]::FromOADate(($sheet.Cells[$R, $P.Column].Value))
+                                    }
+                                    else { $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value }
+                                }
+                            }
+                            else {
+                                foreach ($P in $PropertyNames) {
+                                    $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value
+                                    #    Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
+                                }
+                            }
+                            $xlBook["$targetSheetname"] += [PSCustomObject]$NewRow
+                        }
+                        #endregion
                    }
-                    #endregion
                }
            }
            catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return }
@@ -229,6 +236,18 @@
                $EndRow = 0
                $EndColumn = 0
                if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
+
+                if ($NotAsDictionary) {
+                    foreach ($entry in $xlbook.GetEnumerator()) {
+                        $entry.Value
+                    }
+                }
+                elseif ($Worksheet.Count -eq 1) {
+                    $xlBook["$targetSheetname"]
+                }
+                else {
+                    $xlBook
+                }
            }
        }
    }
--- a/tests/ImportExcelHeaderName.tests.ps1
+++ b/tests/ImportExcelHeaderName.tests.ps1
@@ -63,6 +63,10 @@ Describe "Import-Excel on a sheet with no headings" {
        }
    }

+    AfterAll {
+        Remove-Item $PSScriptRoot\testImportExcelSparse.xlsx -ErrorAction SilentlyContinue
+    }
+
    It "Import-Excel should have this shape" {
        $actual = @(Import-Excel $xlfile)

--- a/tests/ImportExcelTests/ImportExcelReadSheets.tests.ps1
+++ b/tests/ImportExcelTests/ImportExcelReadSheets.tests.ps1
@@ -0,0 +1,63 @@
+#Requires -Modules Pester
+[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseDeclaredVarsMoreThanAssignments', '', Justification = 'False Positives')]
+param()
+
+Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
+
+Describe 'Different ways to import sheets' -Tag ImportExcelReadSheets {
+    BeforeAll {
+        $xlFilename = "$PSScriptRoot\yearlySales.xlsx"
+    }
+
+    Context 'Test reading sheets' {
+        It 'Should read one sheet' {
+            $actual = Import-Excel $xlFilename
+
+            $actual.Count | Should -Be 100
+            $actual[0].Month | Should -BeExactly "April"
+            $actual[99].Month | Should -BeExactly "April"
+        }
+
+        It 'Should read two sheets' {
+            $actual = Import-Excel $xlFilename march, june
+
+            $actual.keys.Count | Should -Be 2
+            $actual["March"].Count | Should -Be 100
+            $actual["June"].Count | Should -Be 100
+        }
+
+        It 'Should read all the sheets' {
+            $actual = Import-Excel $xlFilename *
+
+            $actual.keys.Count | Should -Be 12
+
+            $actual["January"].Count | Should -Be 100
+            $actual["February"].Count | Should -Be 100
+            $actual["March"].Count | Should -Be 100
+            $actual["April"].Count | Should -Be 100
+            $actual["May"].Count | Should -Be 100
+            $actual["June"].Count | Should -Be 100
+            $actual["July"].Count | Should -Be 100
+            $actual["August"].Count | Should -Be 100
+            $actual["September"].Count | Should -Be 100
+            $actual["October"].Count | Should -Be 100
+            $actual["November"].Count | Should -Be 100
+            $actual["December"].Count | Should -Be 100
+        }
+
+        It 'Should throw if it cannot find the sheet' {
+            { Import-Excel $xlFilename april, june, notthere } | Should -Throw
+        }
+
+        It 'Should return an array not a dictionary' {
+            $actual = Import-Excel $xlFilename april, june -NotAsDictionary
+            
+            $actual.Count | Should -Be 200
+            $group = $actual | Group-Object month -NoElement
+
+            $group.Count | Should -Be 2
+            $group[0].Name | Should -BeExactly 'April'
+            $group[1].Name | Should -BeExactly 'June'
+        }
+    }
+}
--- a/tests/ImportExcelTests/yearlySales.xlsx
+++ b/tests/ImportExcelTests/yearlySales.xlsx
--- a/changelog.md
+++ b/changelog.md
@@ -4,6 +4,12 @@

 - Importing multiple files with Import-Excel by pipeline uses only the first file for the row count https://github.com/dfinke/ImportExcel/issues/1172

+## New Features
+
+- Import-Excel now supports importing multiple sheets. It can either return a dictionary of all sheets, or as a single array of all sheets combined.
+    - `Import-Excel $xlfile *`                  # reads all sheets, returns all data in a dictionary
+    - `Import-Excel $xlfile * -NotAsDictionary` # reads all sheets, returns all data in a single array
+
 # v7.4.2

 - Thank you [James Mueller](https://github.com/jamesmmueller) Updated `ConvertFrom-ExcelToSQLInsert` to handle single quotes in the SQL statement.