mirror of
https://github.com/dfinke/ImportExcel.git
synced 2025-12-06 00:23:20 +00:00
Merge pull request #1161 from dfinke/adding-to-import-excel
Update and improve Import-Excel for reading nultiple sheets
This commit is contained in:
7
Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1
Normal file
7
Examples/Import-Excel/ImportMultipleSheetsAsArray.ps1
Normal file
@@ -0,0 +1,7 @@
|
||||
Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
|
||||
|
||||
$xlfile = "$PSScriptRoot\yearlySales.xlsx"
|
||||
|
||||
$result = Import-Excel $xlfile * -NotAsDictionary
|
||||
|
||||
$result | Measure-Object
|
||||
@@ -0,0 +1,9 @@
|
||||
Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
|
||||
|
||||
$xlfile = "$PSScriptRoot\yearlySales.xlsx"
|
||||
|
||||
$result = Import-Excel $xlfile *
|
||||
|
||||
foreach ($sheet in $result.Values) {
|
||||
$sheet
|
||||
}
|
||||
BIN
Examples/Import-Excel/yearlySales.xlsx
Normal file
BIN
Examples/Import-Excel/yearlySales.xlsx
Normal file
Binary file not shown.
@@ -15,7 +15,7 @@
|
||||
[Alias('Sheet')]
|
||||
[Parameter(Position = 1)]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]$WorksheetName,
|
||||
[String[]]$WorksheetName,
|
||||
[Parameter(ParameterSetName = 'PathB' , Mandatory)]
|
||||
[Parameter(ParameterSetName = 'PackageB', Mandatory)]
|
||||
[String[]]$HeaderName ,
|
||||
@@ -36,7 +36,8 @@
|
||||
[string[]]$AsDate,
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]$Password,
|
||||
[Int[]]$ImportColumns
|
||||
[Int[]]$ImportColumns,
|
||||
[Switch]$NotAsDictionary
|
||||
)
|
||||
end {
|
||||
$sw = [System.Diagnostics.Stopwatch]::StartNew()
|
||||
@@ -64,7 +65,7 @@
|
||||
|
||||
try {
|
||||
if ($ImportColumns) {
|
||||
$end = $Worksheet.Dimension.End.Column
|
||||
$end = $sheet.Dimension.End.Column
|
||||
# Check $ImportColumns
|
||||
if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return }
|
||||
# Check $StartColumn and $EndColumn
|
||||
@@ -95,7 +96,7 @@
|
||||
|
||||
foreach ($C in $Columns) {
|
||||
#allow "False" or "0" to be column headings
|
||||
$Worksheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
|
||||
$sheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,103 +126,109 @@
|
||||
}
|
||||
try {
|
||||
#Select worksheet
|
||||
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||
if ($WorksheetName -eq '*') { $Worksheet = $ExcelPackage.Workbook.Worksheets }
|
||||
elseif (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) {
|
||||
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
||||
}
|
||||
|
||||
#region Get rows and columns
|
||||
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
|
||||
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
||||
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
||||
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
|
||||
if ($DataOnly) {
|
||||
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
|
||||
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
|
||||
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
|
||||
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
|
||||
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
|
||||
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
|
||||
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
|
||||
$colHash = @{ }
|
||||
$rowHash = @{ }
|
||||
foreach ($cell in $Worksheet.Cells[$range]) {
|
||||
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
|
||||
}
|
||||
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
|
||||
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
|
||||
}
|
||||
else {
|
||||
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
|
||||
if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
|
||||
elseif ($HeaderName) { $rows = $StartRow..$EndRow }
|
||||
else {
|
||||
$rows = (1 + $StartRow)..$EndRow
|
||||
if ($StartRow -eq 1 -and $EndRow -eq 1) {
|
||||
$rows = 0
|
||||
$xlBook = [Ordered]@{}
|
||||
foreach ($sheet in $Worksheet) {
|
||||
$targetSheetname = $sheet.Name
|
||||
$xlBook["$targetSheetname"] = @()
|
||||
#region Get rows and columns
|
||||
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
|
||||
if (-not $EndRow ) { $EndRow = $sheet.Dimension.End.Row }
|
||||
if (-not $EndColumn) { $EndColumn = $sheet.Dimension.End.Column }
|
||||
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
|
||||
if ($DataOnly) {
|
||||
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
|
||||
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
|
||||
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
|
||||
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
|
||||
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
|
||||
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
|
||||
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
|
||||
$colHash = @{ }
|
||||
$rowHash = @{ }
|
||||
foreach ($cell in $sheet.Cells[$range]) {
|
||||
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
|
||||
}
|
||||
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
|
||||
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
|
||||
}
|
||||
else {
|
||||
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
|
||||
if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
|
||||
elseif ($HeaderName) { $rows = $StartRow..$EndRow }
|
||||
else {
|
||||
$rows = (1 + $StartRow)..$EndRow
|
||||
if ($StartRow -eq 1 -and $EndRow -eq 1) {
|
||||
$rows = 0
|
||||
}
|
||||
}
|
||||
|
||||
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
|
||||
}
|
||||
#endregion
|
||||
#region Create property names
|
||||
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
|
||||
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||
}
|
||||
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
|
||||
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||
}
|
||||
#endregion
|
||||
if (-not $rows) {
|
||||
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
|
||||
}
|
||||
else {
|
||||
#region Create one object per row
|
||||
if ($AsText -or $AsDate) {
|
||||
<#join items in AsText together with ~~~ . Escape any regex special characters...
|
||||
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
|
||||
}
|
||||
#endregion
|
||||
#region Create property names
|
||||
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
|
||||
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||
}
|
||||
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
|
||||
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||
}
|
||||
#endregion
|
||||
if (-not $rows) {
|
||||
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
|
||||
}
|
||||
else {
|
||||
#region Create one object per row
|
||||
if ($AsText -or $AsDate) {
|
||||
<#join items in AsText together with ~~~ . Escape any regex special characters...
|
||||
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
|
||||
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
|
||||
$make a regex for this which is case insensitive (option 1) and compiled (option 8)
|
||||
#>
|
||||
$TextColExpression = ''
|
||||
if ($AsText) {
|
||||
$TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
|
||||
}
|
||||
if ($AsText -and $AsDate) {
|
||||
$TextColExpression += "|"
|
||||
}
|
||||
if ($AsDate) {
|
||||
$TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
|
||||
}
|
||||
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
|
||||
}
|
||||
else { $TextColRegEx = $null }
|
||||
foreach ($R in $rows) {
|
||||
#Disabled write-verbose for speed
|
||||
# Write-Verbose "Import row '$R'"
|
||||
$NewRow = [Ordered]@{ }
|
||||
if ($TextColRegEx) {
|
||||
foreach ($P in $PropertyNames) {
|
||||
$MatchTest = $TextColRegEx.Match($P.value)
|
||||
if ($MatchTest.groups.name -eq "astext") {
|
||||
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
|
||||
}
|
||||
elseif ($MatchTest.groups.name -eq "asdate" -and $Worksheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
|
||||
$NewRow[$P.Value] = [datetime]::FromOADate(($Worksheet.Cells[$R, $P.Column].Value))
|
||||
}
|
||||
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
|
||||
$TextColExpression = ''
|
||||
if ($AsText) {
|
||||
$TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($P in $PropertyNames) {
|
||||
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
|
||||
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
|
||||
if ($AsText -and $AsDate) {
|
||||
$TextColExpression += "|"
|
||||
}
|
||||
if ($AsDate) {
|
||||
$TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
|
||||
}
|
||||
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
|
||||
}
|
||||
[PSCustomObject]$NewRow
|
||||
else { $TextColRegEx = $null }
|
||||
foreach ($R in $rows) {
|
||||
#Disabled write-verbose for speed
|
||||
# Write-Verbose "Import row '$R'"
|
||||
$NewRow = [Ordered]@{ }
|
||||
if ($TextColRegEx) {
|
||||
foreach ($P in $PropertyNames) {
|
||||
$MatchTest = $TextColRegEx.Match($P.value)
|
||||
if ($MatchTest.groups.name -eq "astext") {
|
||||
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Text
|
||||
}
|
||||
elseif ($MatchTest.groups.name -eq "asdate" -and $sheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
|
||||
$NewRow[$P.Value] = [datetime]::FromOADate(($sheet.Cells[$R, $P.Column].Value))
|
||||
}
|
||||
else { $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value }
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($P in $PropertyNames) {
|
||||
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value
|
||||
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
|
||||
}
|
||||
}
|
||||
$xlBook["$targetSheetname"] += [PSCustomObject]$NewRow
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return }
|
||||
@@ -229,6 +236,18 @@
|
||||
$EndRow = 0
|
||||
$EndColumn = 0
|
||||
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
|
||||
|
||||
if ($NotAsDictionary) {
|
||||
foreach ($entry in $xlbook.GetEnumerator()) {
|
||||
$entry.Value
|
||||
}
|
||||
}
|
||||
elseif ($Worksheet.Count -eq 1) {
|
||||
$xlBook["$targetSheetname"]
|
||||
}
|
||||
else {
|
||||
$xlBook
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,6 +63,10 @@ Describe "Import-Excel on a sheet with no headings" {
|
||||
}
|
||||
}
|
||||
|
||||
AfterAll {
|
||||
Remove-Item $PSScriptRoot\testImportExcelSparse.xlsx -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
It "Import-Excel should have this shape" {
|
||||
$actual = @(Import-Excel $xlfile)
|
||||
|
||||
|
||||
63
__tests__/ImportExcelTests/ImportExcelReadSheets.tests.ps1
Normal file
63
__tests__/ImportExcelTests/ImportExcelReadSheets.tests.ps1
Normal file
@@ -0,0 +1,63 @@
|
||||
#Requires -Modules Pester
|
||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseDeclaredVarsMoreThanAssignments', '', Justification = 'False Positives')]
|
||||
param()
|
||||
|
||||
Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
|
||||
|
||||
Describe 'Different ways to import sheets' -Tag ImportExcelReadSheets {
|
||||
BeforeAll {
|
||||
$xlFilename = "$PSScriptRoot\yearlySales.xlsx"
|
||||
}
|
||||
|
||||
Context 'Test reading sheets' {
|
||||
It 'Should read one sheet' {
|
||||
$actual = Import-Excel $xlFilename
|
||||
|
||||
$actual.Count | Should -Be 100
|
||||
$actual[0].Month | Should -BeExactly "April"
|
||||
$actual[99].Month | Should -BeExactly "April"
|
||||
}
|
||||
|
||||
It 'Should read two sheets' {
|
||||
$actual = Import-Excel $xlFilename march, june
|
||||
|
||||
$actual.keys.Count | Should -Be 2
|
||||
$actual["March"].Count | Should -Be 100
|
||||
$actual["June"].Count | Should -Be 100
|
||||
}
|
||||
|
||||
It 'Should read all the sheets' {
|
||||
$actual = Import-Excel $xlFilename *
|
||||
|
||||
$actual.keys.Count | Should -Be 12
|
||||
|
||||
$actual["January"].Count | Should -Be 100
|
||||
$actual["February"].Count | Should -Be 100
|
||||
$actual["March"].Count | Should -Be 100
|
||||
$actual["April"].Count | Should -Be 100
|
||||
$actual["May"].Count | Should -Be 100
|
||||
$actual["June"].Count | Should -Be 100
|
||||
$actual["July"].Count | Should -Be 100
|
||||
$actual["August"].Count | Should -Be 100
|
||||
$actual["September"].Count | Should -Be 100
|
||||
$actual["October"].Count | Should -Be 100
|
||||
$actual["November"].Count | Should -Be 100
|
||||
$actual["December"].Count | Should -Be 100
|
||||
}
|
||||
|
||||
It 'Should throw if it cannot find the sheet' {
|
||||
{ Import-Excel $xlFilename april, june, notthere } | Should -Throw
|
||||
}
|
||||
|
||||
It 'Should return an array not a dictionary' {
|
||||
$actual = Import-Excel $xlFilename april, june -NotAsDictionary
|
||||
|
||||
$actual.Count | Should -Be 200
|
||||
$group = $actual | Group-Object month -NoElement
|
||||
|
||||
$group.Count | Should -Be 2
|
||||
$group[0].Name | Should -BeExactly 'April'
|
||||
$group[1].Name | Should -BeExactly 'June'
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
__tests__/ImportExcelTests/yearlySales.xlsx
Normal file
BIN
__tests__/ImportExcelTests/yearlySales.xlsx
Normal file
Binary file not shown.
@@ -4,6 +4,12 @@
|
||||
|
||||
- Importing multiple files with Import-Excel by pipeline uses only the first file for the row count https://github.com/dfinke/ImportExcel/issues/1172
|
||||
|
||||
## New Features
|
||||
|
||||
- Import-Excel now supports importing multiple sheets. It can either return a dictionary of all sheets, or as a single array of all sheets combined.
|
||||
- `Import-Excel $xlfile *` # reads all sheets, returns all data in a dictionary
|
||||
- `Import-Excel $xlfile * -NotAsDictionary` # reads all sheets, returns all data in a single array
|
||||
|
||||
# v7.4.2
|
||||
|
||||
- Thank you [James Mueller](https://github.com/jamesmmueller) Updated `ConvertFrom-ExcelToSQLInsert` to handle single quotes in the SQL statement.
|
||||
|
||||
Reference in New Issue
Block a user