Update how Import-Excel reads sheets, and remain backward compatible

This commit is contained in:
dfinke
2022-04-11 16:58:55 -04:00
parent 6a956dbd7e
commit f83f654c4a
3 changed files with 135 additions and 85 deletions

View File

@@ -15,7 +15,7 @@
[Alias('Sheet')] [Alias('Sheet')]
[Parameter(Position = 1)] [Parameter(Position = 1)]
[ValidateNotNullOrEmpty()] [ValidateNotNullOrEmpty()]
[String]$WorksheetName, [String[]]$WorksheetName,
[Parameter(ParameterSetName = 'PathB' , Mandatory)] [Parameter(ParameterSetName = 'PathB' , Mandatory)]
[Parameter(ParameterSetName = 'PackageB', Mandatory)] [Parameter(ParameterSetName = 'PackageB', Mandatory)]
[String[]]$HeaderName , [String[]]$HeaderName ,
@@ -64,7 +64,7 @@
try { try {
if ($ImportColumns) { if ($ImportColumns) {
$end = $Worksheet.Dimension.End.Column $end = $sheet.Dimension.End.Column
# Check $ImportColumns # Check $ImportColumns
if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return } if ($ImportColumns[0] -le 0) { throw "The first entry in ImportColumns must be equal or greater 1" ; return }
# Check $StartColumn and $EndColumn # Check $StartColumn and $EndColumn
@@ -95,7 +95,7 @@
foreach ($C in $Columns) { foreach ($C in $Columns) {
#allow "False" or "0" to be column headings #allow "False" or "0" to be column headings
$Worksheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value $sheet.Cells[$StartRow, $C] | Where-Object { -not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Column'; E = { $C } }, Value
} }
} }
} }
@@ -125,103 +125,106 @@
} }
try { try {
#Select worksheet #Select worksheet
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] } if ($WorksheetName -eq '*') { $Worksheet = $ExcelPackage.Workbook.Worksheets }
elseif (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) { elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorksheetName])) {
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
} }
#region Get rows and columns foreach ($sheet in $Worksheet) {
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells. #region Get rows and columns
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row } #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column } if (-not $EndRow ) { $EndRow = $sheet.Dimension.End.Row }
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0) if (-not $EndColumn) { $EndColumn = $sheet.Dimension.End.Column }
if ($DataOnly) { $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
#If we are using headers startrow will be the header-row so examine data from startRow + 1, if ($DataOnly) {
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress } #If we are using headers startrow will be the header-row so examine data from startRow + 1,
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress } if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data. else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen, #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times". #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
$colHash = @{ } #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
$rowHash = @{ } $colHash = @{ }
foreach ($cell in $Worksheet.Cells[$range]) { $rowHash = @{ }
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 } foreach ($cell in $sheet.Cells[$range]) {
} if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
}
else {
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
elseif ($HeaderName) { $rows = $StartRow..$EndRow }
else {
$rows = (1 + $StartRow)..$EndRow
if ($StartRow -eq 1 -and $EndRow -eq 1) {
$rows = 0
} }
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
} }
else {
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
if ($NoHeader) { $rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
elseif ($HeaderName) { $rows = $StartRow..$EndRow }
else {
$rows = (1 + $StartRow)..$EndRow
if ($StartRow -eq 1 -and $EndRow -eq 1) {
$rows = 0
}
}
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } } # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
} }
#endregion #endregion
#region Create property names #region Create property names
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) { if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
} }
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) { if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
} }
#endregion #endregion
if (-not $rows) { if (-not $rows) {
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'" Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
} }
else { else {
#region Create one object per row #region Create one object per row
if ($AsText -or $AsDate) { if ($AsText -or $AsDate) {
<#join items in AsText together with ~~~ . Escape any regex special characters... <#join items in AsText together with ~~~ . Escape any regex special characters...
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%; # which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$ So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
$make a regex for this which is case insensitive (option 1) and compiled (option 8) $make a regex for this which is case insensitive (option 1) and compiled (option 8)
#> #>
$TextColExpression = '' $TextColExpression = ''
if ($AsText) { if ($AsText) {
$TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)' $TextColExpression += '(?<astext>^' + [regex]::Escape($AsText -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
}
if ($AsText -and $AsDate) {
$TextColExpression += "|"
}
if ($AsDate) {
$TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
}
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
}
else { $TextColRegEx = $null }
foreach ($R in $rows) {
#Disabled write-verbose for speed
# Write-Verbose "Import row '$R'"
$NewRow = [Ordered]@{ }
if ($TextColRegEx) {
foreach ($P in $PropertyNames) {
$MatchTest = $TextColRegEx.Match($P.value)
if ($MatchTest.groups.name -eq "astext") {
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
}
elseif ($MatchTest.groups.name -eq "asdate" -and $Worksheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
$NewRow[$P.Value] = [datetime]::FromOADate(($Worksheet.Cells[$R, $P.Column].Value))
}
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
} }
} if ($AsText -and $AsDate) {
else { $TextColExpression += "|"
foreach ($P in $PropertyNames) {
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
} }
if ($AsDate) {
$TextColExpression += '(?<asDate>^' + [regex]::Escape($AsDate -join '~~~').replace('\*', '.*').replace('~~~', '$|^') + '$)'
}
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
} }
[PSCustomObject]$NewRow else { $TextColRegEx = $null }
foreach ($R in $rows) {
#Disabled write-verbose for speed
# Write-Verbose "Import row '$R'"
$NewRow = [Ordered]@{ }
if ($TextColRegEx) {
foreach ($P in $PropertyNames) {
$MatchTest = $TextColRegEx.Match($P.value)
if ($MatchTest.groups.name -eq "astext") {
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Text
}
elseif ($MatchTest.groups.name -eq "asdate" -and $sheet.Cells[$R, $P.Column].Value -is [System.ValueType]) {
$NewRow[$P.Value] = [datetime]::FromOADate(($sheet.Cells[$R, $P.Column].Value))
}
else { $NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value }
}
}
else {
foreach ($P in $PropertyNames) {
$NewRow[$P.Value] = $sheet.Cells[$R, $P.Column].Value
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
}
}
[PSCustomObject]$NewRow
}
#endregion
} }
#endregion
} }
} }
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return } catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$WorksheetName': $_"; return }

View File

@@ -0,0 +1,47 @@
Import-Module $PSScriptRoot\..\..\ImportExcel.psd1 -Force
Describe 'Different ways to import sheets' -Tag ImportExcelReadSheets {
BeforeAll {
$xlFilename = "$PSScriptRoot\yearlySales.xlsx"
}
Context 'Test reading sheets' {
It 'Should read one sheet' {
$actual = Import-Excel $xlFilename
$actual.Count | Should -Be 100
$actual[0].Month | Should -BeExactly "April"
}
It 'Should read two sheets' {
$actual = Import-Excel $xlFilename march, june
$actual.Count | Should -Be 200
$actual[0].Month | Should -BeExactly "March"
$actual[100].Month | Should -BeExactly "June"
}
It 'Should read all the sheets' {
$actual = Import-Excel $xlFilename *
$actual.Count | Should -Be 1200
$actual[0].Month | Should -BeExactly "April"
$actual[100].Month | Should -BeExactly "August"
$actual[200].Month | Should -BeExactly "December"
$actual[300].Month | Should -BeExactly "February"
$actual[400].Month | Should -BeExactly "January"
$actual[500].Month | Should -BeExactly "July"
$actual[600].Month | Should -BeExactly "June"
$actual[700].Month | Should -BeExactly "March"
$actual[800].Month | Should -BeExactly "May"
$actual[900].Month | Should -BeExactly "November"
$actual[1000].Month | Should -BeExactly "October"
$actual[1100].Month | Should -BeExactly "September"
}
It 'Should throw if it cannot find the sheet' {
{ Import-Excel $xlFilename april, june, notthere } | Should -Throw
}
}
}

Binary file not shown.