mirror of
https://github.com/dfinke/ImportExcel.git
synced 2025-12-15 07:43:23 +00:00
Spike to fix #792
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
function Import-Excel {
|
function Import-Excel {
|
||||||
[CmdLetBinding()]
|
[CmdLetBinding()]
|
||||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
|
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
|
||||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSPossibleIncorrectUsageOfAssignmentOperator', '', Justification = 'Intentional')]
|
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSPossibleIncorrectUsageOfAssignmentOperator', '', Justification = 'Intentional')]
|
||||||
param (
|
param (
|
||||||
[Alias('FullName')]
|
[Alias('FullName')]
|
||||||
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||||
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||||
@@ -36,164 +36,171 @@
|
|||||||
[ValidateNotNullOrEmpty()]
|
[ValidateNotNullOrEmpty()]
|
||||||
[String]$Password
|
[String]$Password
|
||||||
)
|
)
|
||||||
end {
|
end {
|
||||||
$sw = [System.Diagnostics.Stopwatch]::StartNew()
|
$sw = [System.Diagnostics.Stopwatch]::StartNew()
|
||||||
if ($input) {
|
if ($input) {
|
||||||
$Paths = $input
|
$Paths = $input
|
||||||
}
|
}
|
||||||
elseif ($Path) {
|
elseif ($Path) {
|
||||||
$Paths = $Path
|
$Paths = $Path
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$Paths = ''
|
$Paths = ''
|
||||||
}
|
}
|
||||||
function Get-PropertyNames {
|
function Get-PropertyNames {
|
||||||
<#
|
<#
|
||||||
.SYNOPSIS
|
.SYNOPSIS
|
||||||
Create objects containing the column number and the column name for each of the different header types.
|
Create objects containing the column number and the column name for each of the different header types.
|
||||||
#>
|
#>
|
||||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
||||||
param(
|
param(
|
||||||
[Parameter(Mandatory)]
|
[Parameter(Mandatory)]
|
||||||
[Int[]]$Columns,
|
[Int[]]$Columns,
|
||||||
[Parameter(Mandatory)]
|
[Parameter(Mandatory)]
|
||||||
[Int]$StartRow
|
[Int]$StartRow
|
||||||
)
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if ($HeaderName) {
|
if ($HeaderName) {
|
||||||
$i = 0
|
$i = 0
|
||||||
foreach ($H in $HeaderName) {
|
foreach ($H in $HeaderName) {
|
||||||
$H | Select-Object @{N = 'Column'; E = { $Columns[$i] } }, @{N = 'Value'; E = { $H } }
|
$H | Select-Object @{N = 'Column'; E = { $Columns[$i] } }, @{N = 'Value'; E = { $H } }
|
||||||
$i++
|
$i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif ($NoHeader) {
|
||||||
|
$i = 0
|
||||||
|
foreach ($C in $Columns) {
|
||||||
|
$i++
|
||||||
|
$C | Select-Object @{N = 'Column'; E = { $_ } }, @{N = 'Value'; E = { 'P' + $i } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
if ($StartRow -lt 1) {
|
||||||
|
throw 'The top row can never be less than 1 when we need to retrieve headers from the worksheet.' ; return
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($C in $Columns) {
|
||||||
|
$Worksheet.Cells[$StartRow, $C] | Where-Object { $_.Value } | Select-Object @{N = 'Column'; E = { $C } }, Value
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
elseif ($NoHeader) {
|
catch {
|
||||||
$i = 0
|
throw "Failed creating property names: $_" ; return
|
||||||
foreach ($C in $Columns) {
|
|
||||||
$i++
|
|
||||||
$C | Select-Object @{N = 'Column'; E = { $_ } }, @{N = 'Value'; E = { 'P' + $i } }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
else {
|
|
||||||
if ($StartRow -lt 1) {
|
|
||||||
throw 'The top row can never be less than 1 when we need to retrieve headers from the worksheet.' ; return
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($C in $Columns) {
|
|
||||||
$Worksheet.Cells[$StartRow, $C] | Where-Object { $_.Value } | Select-Object @{N = 'Column'; E = { $C } }, Value
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch {
|
foreach ($Path in $Paths) {
|
||||||
throw "Failed creating property names: $_" ; return
|
if ($path) {
|
||||||
}
|
$extension = [System.IO.Path]::GetExtension($Path)
|
||||||
}
|
if ($extension -notmatch '.xlsx$|.xlsm$') {
|
||||||
foreach ($Path in $Paths) {
|
throw "Import-Excel does not support reading this extension type $($extension)"
|
||||||
if ($path) {
|
|
||||||
$extension = [System.IO.Path]::GetExtension($Path)
|
|
||||||
if ($extension -notmatch '.xlsx$|.xlsm$') {
|
|
||||||
throw "Import-Excel does not support reading this extension type $($extension)"
|
|
||||||
}
|
|
||||||
|
|
||||||
$resolvedPath = (Resolve-Path $Path -ErrorAction SilentlyContinue)
|
|
||||||
if ($resolvedPath) {
|
|
||||||
$Path = $resolvedPath.ProviderPath
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw "'$($Path)' file not found"
|
|
||||||
}
|
|
||||||
|
|
||||||
$stream = New-Object -TypeName System.IO.FileStream -ArgumentList $Path, 'Open', 'Read', 'ReadWrite'
|
|
||||||
$ExcelPackage = New-Object -TypeName OfficeOpenXml.ExcelPackage
|
|
||||||
if ($Password) { $ExcelPackage.Load($stream, $Password) }
|
|
||||||
else { $ExcelPackage.Load($stream) }
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
#Select worksheet
|
|
||||||
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
|
||||||
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
|
||||||
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
|
||||||
}
|
|
||||||
|
|
||||||
#region Get rows and columns
|
|
||||||
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
|
|
||||||
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
|
||||||
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
|
||||||
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
|
|
||||||
if ($DataOnly) {
|
|
||||||
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
|
|
||||||
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
|
|
||||||
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
|
|
||||||
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
|
|
||||||
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
|
|
||||||
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
|
|
||||||
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
|
|
||||||
$colHash = @{ }
|
|
||||||
$rowHash = @{ }
|
|
||||||
foreach ($cell in $Worksheet.Cells[$range]) {
|
|
||||||
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
|
|
||||||
}
|
}
|
||||||
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
|
|
||||||
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
|
$resolvedPath = (Resolve-Path $Path -ErrorAction SilentlyContinue)
|
||||||
|
if ($resolvedPath) {
|
||||||
|
$Path = $resolvedPath.ProviderPath
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw "'$($Path)' file not found"
|
||||||
|
}
|
||||||
|
|
||||||
|
$stream = New-Object -TypeName System.IO.FileStream -ArgumentList $Path, 'Open', 'Read', 'ReadWrite'
|
||||||
|
$ExcelPackage = New-Object -TypeName OfficeOpenXml.ExcelPackage
|
||||||
|
if ($Password) { $ExcelPackage.Load($stream, $Password) }
|
||||||
|
else { $ExcelPackage.Load($stream) }
|
||||||
}
|
}
|
||||||
else {
|
try {
|
||||||
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
|
#Select worksheet
|
||||||
if ($NoHeader) { $Rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
|
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||||
elseif ($HeaderName) { $Rows = $StartRow..$EndRow }
|
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
||||||
else { $Rows = (1 + $StartRow)..$EndRow } # ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
|
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
||||||
}
|
}
|
||||||
#endregion
|
|
||||||
#region Create property names
|
#region Get rows and columns
|
||||||
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
|
#If we are doing dataonly it is quicker to work out which rows to ignore before processing the cells.
|
||||||
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
|
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
||||||
}
|
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
||||||
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
|
$endAddress = [OfficeOpenXml.ExcelAddress]::TranslateFromR1C1("R[$EndRow]C[$EndColumn]", 0, 0)
|
||||||
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
|
if ($DataOnly) {
|
||||||
}
|
#If we are using headers startrow will be the header-row so examine data from startRow + 1,
|
||||||
#endregion
|
if ($NoHeader) { $range = "A" + ($StartRow ) + ":" + $endAddress }
|
||||||
if (-not $Rows) {
|
else { $range = "A" + ($StartRow + 1 ) + ":" + $endAddress }
|
||||||
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
|
#We're going to look at every cell and build 2 hash tables holding rows & columns which contain data.
|
||||||
}
|
#Want to Avoid 'select unique' operations & large Sorts, becuse time time taken increases with square
|
||||||
else {
|
#of number of items (PS uses heapsort at large size). Instead keep a list of what we have seen,
|
||||||
#region Create one object per row
|
#using Hash tables: "we've seen it" is all we need, no need to worry about "seen it before" / "Seen it many times".
|
||||||
if ($AsText) {
|
$colHash = @{ }
|
||||||
<#join items in AsText together with ~~~ . Escape any regex special characters...
|
$rowHash = @{ }
|
||||||
|
foreach ($cell in $Worksheet.Cells[$range]) {
|
||||||
|
if ($null -ne $cell.Value ) { $colHash[$cell.Start.Column] = 1; $rowHash[$cell.Start.row] = 1 }
|
||||||
|
}
|
||||||
|
$rows = ( $StartRow..$EndRow ).Where( { $rowHash[$_] })
|
||||||
|
$columns = ($StartColumn..$EndColumn).Where( { $colHash[$_] })
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$Columns = $StartColumn .. $EndColumn ; if ($StartColumn -gt $EndColumn) { Write-Warning -Message "Selecting columns $StartColumn to $EndColumn might give odd results." }
|
||||||
|
if ($NoHeader) { $Rows = $StartRow..$EndRow ; if ($StartRow -gt $EndRow) { Write-Warning -Message "Selecting rows $StartRow to $EndRow might give odd results." } }
|
||||||
|
elseif ($HeaderName) { $Rows = $StartRow..$EndRow }
|
||||||
|
else {
|
||||||
|
$Rows = (1 + $StartRow)..$EndRow
|
||||||
|
if ($StartRow -eq 1 -and $EndRow -eq 1) {
|
||||||
|
$Rows = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ; if ($StartRow -ge $EndRow) { Write-Warning -Message "Selecting $StartRow as the header with data in $(1+$StartRow) to $EndRow might give odd results." } }
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
#region Create property names
|
||||||
|
if ((-not $Columns) -or (-not ($PropertyNames = Get-PropertyNames -Columns $Columns -StartRow $StartRow))) {
|
||||||
|
throw "No column headers found on top row '$StartRow'. If column headers in the worksheet are not a requirement then please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||||
|
}
|
||||||
|
if ($Duplicates = $PropertyNames | Group-Object Value | Where-Object Count -GE 2) {
|
||||||
|
throw "Duplicate column headers found on row '$StartRow' in columns '$($Duplicates.Group.Column)'. Column headers must be unique, if this is not a requirement please use the '-NoHeader' or '-HeaderName' parameter."; return
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
if (-not $Rows) {
|
||||||
|
Write-Warning "Worksheet '$WorksheetName' in workbook '$Path' contains no data in the rows after top row '$StartRow'"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
#region Create one object per row
|
||||||
|
if ($AsText) {
|
||||||
|
<#join items in AsText together with ~~~ . Escape any regex special characters...
|
||||||
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
|
# which turns "*" into "\*" make it ".*". Convert ~~~ to $|^ and top and tail with ^%;
|
||||||
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
|
So if we get "Week", "[Time]" and "*date*" ; make the expression ^week$|^\[Time\]$|^.*Date.*$
|
||||||
$make a regex for this which is case insensitive (option 1) and compiled (option 8)
|
$make a regex for this which is case insensitive (option 1) and compiled (option 8)
|
||||||
#>
|
#>
|
||||||
$TextColExpression = "^" + [regex]::Escape($AsText -join "~~~").replace("\*", ".*").replace("~~~", "$|^") + "$"
|
$TextColExpression = "^" + [regex]::Escape($AsText -join "~~~").replace("\*", ".*").replace("~~~", "$|^") + "$"
|
||||||
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
|
$TextColRegEx = New-Object -TypeName regex -ArgumentList $TextColExpression , 9
|
||||||
}
|
}
|
||||||
foreach ($R in $Rows) {
|
foreach ($R in $Rows) {
|
||||||
#Disabled write-verbose for speed
|
#Disabled write-verbose for speed
|
||||||
# Write-Verbose "Import row '$R'"
|
# Write-Verbose "Import row '$R'"
|
||||||
$NewRow = [Ordered]@{ }
|
$NewRow = [Ordered]@{ }
|
||||||
if ($TextColRegEx) {
|
if ($TextColRegEx) {
|
||||||
foreach ($P in $PropertyNames) {
|
foreach ($P in $PropertyNames) {
|
||||||
if ($TextColRegEx.IsMatch($P.Value)) {
|
if ($TextColRegEx.IsMatch($P.Value)) {
|
||||||
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
|
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Text
|
||||||
|
}
|
||||||
|
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
|
||||||
}
|
}
|
||||||
else { $NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value }
|
|
||||||
}
|
}
|
||||||
}
|
else {
|
||||||
else {
|
foreach ($P in $PropertyNames) {
|
||||||
foreach ($P in $PropertyNames) {
|
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
|
||||||
$NewRow[$P.Value] = $Worksheet.Cells[$R, $P.Column].Value
|
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
|
||||||
# Write-Verbose "Import cell '$($Worksheet.Cells[$R, $P.Column].Address)' with property name '$($p.Value)' and value '$($Worksheet.Cells[$R, $P.Column].Value)'."
|
}
|
||||||
}
|
}
|
||||||
|
[PSCustomObject]$NewRow
|
||||||
}
|
}
|
||||||
[PSCustomObject]$NewRow
|
#endregion
|
||||||
}
|
}
|
||||||
#endregion
|
}
|
||||||
|
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$Worksheetname': $_"; return }
|
||||||
|
finally {
|
||||||
|
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch { throw "Failed importing the Excel workbook '$Path' with worksheet '$Worksheetname': $_"; return }
|
|
||||||
finally {
|
|
||||||
if ($Path) { $stream.close(); $ExcelPackage.Dispose() }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
$xlfile = "TestDrive:\testImportExcel.xlsx"
|
$xlfile = "TestDrive:\testImportExcel.xlsx"
|
||||||
|
$xlfileHeaderOnly = "TestDrive:\testImportExcelHeaderOnly.xlsx"
|
||||||
|
|
||||||
Describe "Import-Excel on a sheet with no headings" {
|
Describe "Import-Excel on a sheet with no headings" {
|
||||||
BeforeAll {
|
BeforeAll {
|
||||||
@@ -18,6 +19,15 @@ Describe "Import-Excel on a sheet with no headings" {
|
|||||||
Set-ExcelRange -Worksheet $xl.Sheet1 -Range C3 -Value 'I'
|
Set-ExcelRange -Worksheet $xl.Sheet1 -Range C3 -Value 'I'
|
||||||
|
|
||||||
Close-ExcelPackage $xl
|
Close-ExcelPackage $xl
|
||||||
|
|
||||||
|
# crate $xlfileHeaderOnly
|
||||||
|
$xl = "" | Export-excel $xlfileHeaderOnly -PassThru
|
||||||
|
|
||||||
|
Set-ExcelRange -Worksheet $xl.Sheet1 -Range A1 -Value 'A'
|
||||||
|
Set-ExcelRange -Worksheet $xl.Sheet1 -Range B1 -Value 'B'
|
||||||
|
Set-ExcelRange -Worksheet $xl.Sheet1 -Range C1 -Value 'C'
|
||||||
|
|
||||||
|
Close-ExcelPackage $xl
|
||||||
}
|
}
|
||||||
|
|
||||||
It "Import-Excel should have this shape" {
|
It "Import-Excel should have this shape" {
|
||||||
@@ -193,4 +203,25 @@ Describe "Import-Excel on a sheet with no headings" {
|
|||||||
# $actual[0].City | Should BeExactly 'Brussels'
|
# $actual[0].City | Should BeExactly 'Brussels'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
It "Should handle data correctly if there is only a single row" {
|
||||||
|
$actual = Import-Excel $xlfileHeaderOnly
|
||||||
|
$names = $actual.psobject.properties.Name
|
||||||
|
$names | should be $null
|
||||||
|
$actual.Count | should be 0
|
||||||
|
}
|
||||||
|
|
||||||
|
It "Should handle data correctly if there is only a single row and using -NoHeader " {
|
||||||
|
$actual = @(Import-Excel $xlfileHeaderOnly -WorksheetName Sheet1 -NoHeader)
|
||||||
|
|
||||||
|
$names = $actual[0].psobject.properties.Name
|
||||||
|
$names.count | should be 3
|
||||||
|
$names[0] | should be 'P1'
|
||||||
|
$names[1] | should be 'P2'
|
||||||
|
$names[2] | should be 'P3'
|
||||||
|
|
||||||
|
$actual.Count | should be 1
|
||||||
|
$actual[0].P1 | should be 'A'
|
||||||
|
$actual[0].P2 | should be 'B'
|
||||||
|
$actual[0].P3 | should be 'C'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user