Spike to fix #792

This commit is contained in:
dfinke
2020-02-29 15:55:49 -05:00
parent 96b61d38f1
commit 13454cce8e
2 changed files with 180 additions and 142 deletions

View File

@@ -1,8 +1,8 @@
function Import-Excel { function Import-Excel {
[CmdLetBinding()] [CmdLetBinding()]
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")] [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSPossibleIncorrectUsageOfAssignmentOperator', '', Justification = 'Intentional')] [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSPossibleIncorrectUsageOfAssignmentOperator', '', Justification = 'Intentional')]
param ( param (
[Alias('FullName')] [Alias('FullName')]
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )] [Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )] [Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
@@ -36,164 +36,171 @@
[ValidateNotNullOrEmpty()] [ValidateNotNullOrEmpty()]
[String]$Password [String]$Password
) )
end { end {
$sw = [System.Diagnostics.Stopwatch]::StartNew() $sw = [System.Diagnostics.Stopwatch]::StartNew()
if ($input) { if ($input) {
$Paths = $input $Paths = $input
} }
elseif ($Path) { elseif ($Path) {
$Paths = $Path $Paths = $Path
} }
else { else {
$Paths = '' $Paths = ''
} }
function Get-PropertyNames { function Get-PropertyNames {
<# <#
.SYNOPSIS .SYNOPSIS
Create objects containing the column number and the column name for each of the different header types. Create objects containing the column number and the column name for each of the different header types.
#> #>
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")] [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
param( param(
[Parameter(Mandatory)] [Parameter(Mandatory)]
[Int[]]$Columns, [Int[]]$Columns,
[Parameter(Mandatory)] [Parameter(Mandatory)]
[Int]$StartRow [Int]$StartRow
) )
try { try {
if ($HeaderName) { if ($HeaderName) {
$i = 0 $i = 0
foreach ($H in $HeaderName) { foreach ($H in $HeaderName) {
$H | Select-Object @{N = 'Column'; E = { $Columns[$i] } }, @{N = 'Value'; E = { $H } } $H | Select-Object @{N = 'Column'; E = { $Columns[$i] } }, @{N = 'Value'; E = { $H } }
$i++ $i++
}
}
elseif ($NoHeader) {
$i = 0
foreach ($C in $Columns) {
$i++
$C | Select-Object @{N = 'Column'; E = { $_ } }, @{N = 'Value'; E = { 'P' + $i } }
}
}
else {
if ($StartRow -lt 1) {
throw 'The top row can never be less than 1 when we need to retrieve headers from the worksheet.' ; return
}
foreach ($C in $Columns) {
$Worksheet.Cells[$StartRow, $C] | Where-Object { $_.Value } | Select-Object @{N = 'Column'; E = { $C } }, Value
}
} }
} }
elseif ($NoHeader) { catch {
$i = 0 throw "Failed creating property names: $_" ; return
foreach ($C in $Columns) {
$i++
$C | Select-Object @{N = 'Column'; E = { $_ } }, @{N = 'Value'; E = { 'P' + $i } }
}
}
else {
if ($StartRow -lt 1) {
throw 'The top row can never be less than 1 when we need to retrieve headers from the worksheet.' ; return
}
foreach ($C in $Columns) {
$Worksheet.Cells[$StartRow, $C] | Where-Object { $_.Value } | Select-Object @{N = 'Column'; E = { $C } }, Value
}
} }
} }
catch { foreach ($Path in $Paths) {
throw "Failed creating property names: $_" ; return if ($path) {
} $extension = [System.IO.Path]::GetExtension($Path)
} if ($extension -notmatch '.xlsx$|.xlsm$') {
foreach ($Path in $Paths) { throw "Import-Excel does not support reading this extension type $($extension)"
if ($path) {
$extension = [System.IO.Path]::GetExtension($Path)
if ($extension -notmatch '.xlsx$|.xlsm$') {
throw "Import-Excel does not support reading this extension type $($extension)"
}
$resolvedPath = (Resolve-Path $Path -ErrorAction SilentlyContinue)
if ($resolvedPath) {
$Path = $resolvedPath.ProviderPath
}
else {
throw "'$($Path)' file not found"
}
$stream = New-Object -TypeName System.IO.FileStream -ArgumentList $Path, 'Open', 'Read', 'ReadWrite'
$ExcelPackage = New-Object -TypeName OfficeOpenXml.ExcelPackage
if ($Password) { $ExcelPackage.Load($stream, $Password) }
else { $ExcelPackage.Load($stream) }
}
try {
#Select worksheet
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
}
#region Get rows and columns
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
if ($DataOnly) {
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
$colHash = @{ }
$rowHash = @{ }
foreach ($cell in $Worksheet.Cells[$range]) {
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
} }
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] }) $resolvedPath = (Resolve-Path $Path -ErrorAction SilentlyContinue)
if ($resolvedPath) {
$Path = $resolvedPath.ProviderPath
}
else {
throw "'$($Path)' file not found"
}
$stream = New-Object -TypeName System.IO.FileStream -ArgumentList $Path, 'Open', 'Read', 'ReadWrite'
$ExcelPackage = New-Object -TypeName OfficeOpenXml.ExcelPackage
if ($Password) { $ExcelPackage.Load($stream, $Password) }
else { $ExcelPackage.Load($stream) }
} }
else { try {
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." } #Select worksheet
if ($NoHeader) { $Rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } } if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
elseif ($HeaderName) { $Rows = $StartRow..$EndRow } elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
else { $Rows = (1 + $StartRow)..$EndRow } # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } } throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
} }
#endregion
#region Create property names #region Get rows and columns
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) { #If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
} if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) { $endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return if ($DataOnly) {
} #If we are using headers startrow will be the header-row so examine data from startRow + 1,
#endregion if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
if (-not $Rows) { else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'" #We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
} #Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
else { #of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
#region Create one object per row #using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
if ($AsText) { $colHash = @{ }
<#join items in AsText together with ~~~ . Escape any regex special characters... $rowHash = @{ }
foreach ($cell in $Worksheet.Cells[$range]) {
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
}
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
}
else {
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
if ($NoHeader) { $Rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
elseif ($HeaderName) { $Rows = $StartRow..$EndRow }
else {
$Rows = (1 + $StartRow)..$EndRow
if ($StartRow -eq 1 -and $EndRow -eq 1) {
$Rows = 0
}
}
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
}
#endregion
#region Create property names
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
}
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
}
#endregion
if (-not $Rows) {
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
}
else {
#region Create one object per row
if ($AsText) {
<#join items in AsText together with ~~~ . Escape any regex special characters...
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%; # which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$ So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
$make a regex for this which is case insensitive (option 1) and compiled (option 8) $make a regex for this which is case insensitive (option 1) and compiled (option 8)
#> #>
$TextColExpression = "^" + [regex]::Escape($AsText -join "~~~").replace("\*", ".*").replace("~~~", "$|^") + "$" $TextColExpression = "^" + [regex]::Escape($AsText -join "~~~").replace("\*", ".*").replace("~~~", "$|^") + "$"
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9 $TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
} }
foreach ($R in $Rows) { foreach ($R in $Rows) {
#Disabled write-verbose for speed #Disabled write-verbose for speed
# Write-Verbose "Import row '$R'" # Write-Verbose "Import row '$R'"
$NewRow = [Ordered]@{ } $NewRow = [Ordered]@{ }
if ($TextColRegEx) { if ($TextColRegEx) {
foreach ($P in $PropertyNames) { foreach ($P in $PropertyNames) {
if ($TextColRegEx.IsMatch($P.Value)) { if ($TextColRegEx.IsMatch($P.Value)) {
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
}
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
} }
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
} }
} else {
else { foreach ($P in $PropertyNames) {
foreach ($P in $PropertyNames) { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value # Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'." }
} }
[PSCustomObject]$NewRow
} }
[PSCustomObject]$NewRow #endregion
} }
#endregion }
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$Worksheetname': $_"; return }
finally {
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
} }
} }
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$Worksheetname': $_"; return }
finally {
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
}
} }
}
} }

View File

@@ -1,4 +1,5 @@
$xlfile = "TestDrive:\testImportExcel.xlsx" $xlfile = "TestDrive:\testImportExcel.xlsx"
$xlfileHeaderOnly = "TestDrive:\testImportExcelHeaderOnly.xlsx"
Describe "Import-Excel on a sheet with no headings" { Describe "Import-Excel on a sheet with no headings" {
BeforeAll { BeforeAll {
@@ -18,6 +19,15 @@ Describe "Import-Excel on a sheet with no headings" {
Set-ExcelRange -Worksheet $xl.Sheet1 -Range C3 -Value 'I' Set-ExcelRange -Worksheet $xl.Sheet1 -Range C3 -Value 'I'
Close-ExcelPackage $xl Close-ExcelPackage $xl
# crate $xlfileHeaderOnly
$xl = "" | Export-excel $xlfileHeaderOnly -PassThru
Set-ExcelRange -Worksheet $xl.Sheet1 -Range A1 -Value 'A'
Set-ExcelRange -Worksheet $xl.Sheet1 -Range B1 -Value 'B'
Set-ExcelRange -Worksheet $xl.Sheet1 -Range C1 -Value 'C'
Close-ExcelPackage $xl
} }
It "Import-Excel should have this shape" { It "Import-Excel should have this shape" {
@@ -193,4 +203,25 @@ Describe "Import-Excel on a sheet with no headings" {
# $actual[0].City | Should BeExactly 'Brussels' # $actual[0].City | Should BeExactly 'Brussels'
} }
It "Should handle data correctly if there is only a single row" {
$actual = Import-Excel $xlfileHeaderOnly
$names = $actual.psobject.properties.Name
$names | should be $null
$actual.Count | should be 0
}
It "Should handle data correctly if there is only a single row and using -NoHeader " {
$actual = @(Import-Excel $xlfileHeaderOnly -WorksheetName Sheet1 -NoHeader)
$names = $actual[0].psobject.properties.Name
$names.count | should be 3
$names[0] | should be 'P1'
$names[1] | should be 'P2'
$names[2] | should be 'P3'
$actual.Count | should be 1
$actual[0].P1 | should be 'A'
$actual[0].P2 | should be 'B'
$actual[0].P3 | should be 'C'
}
} }