mirror of
https://github.com/dfinke/ImportExcel.git
synced 2025-12-20 10:13:16 +00:00
Merge pull request #1200 from jhoneill/master
Update to Get-HTMLTable, added Import-ByColuumns function and example
This commit is contained in:
@@ -15,6 +15,6 @@ param(
|
|||||||
$galleryUrl = "https://www.powershellgallery.com/packages/$moduleName"
|
$galleryUrl = "https://www.powershellgallery.com/packages/$moduleName"
|
||||||
$nolegend = '-nolegend'
|
$nolegend = '-nolegend'
|
||||||
if($chartType -eq 'pie') {$nolegend = $null}
|
if($chartType -eq 'pie') {$nolegend = $null}
|
||||||
$code = "$($chartType)Chart (Get-HtmlTable $galleryUrl 0 | sort lastupdated -desc) -title 'Download stats for $moduleName' $nolegend"
|
$code = "$($chartType)Chart (Get-HtmlTable $galleryUrl -FirstDataRow 1 | sort lastupdated -desc) -title 'Download stats for $moduleName' $nolegend"
|
||||||
|
|
||||||
$code | Invoke-Expression
|
$code | Invoke-Expression
|
||||||
BIN
Examples/ImportByColumns/FruitCity.xlsx
Normal file
BIN
Examples/ImportByColumns/FruitCity.xlsx
Normal file
Binary file not shown.
BIN
Examples/ImportByColumns/VM_Build_Example.xlsx
Normal file
BIN
Examples/ImportByColumns/VM_Build_Example.xlsx
Normal file
Binary file not shown.
146
Examples/ImportByColumns/import-by-columns.ps1
Normal file
146
Examples/ImportByColumns/import-by-columns.ps1
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
function Import-ByColumns {
|
||||||
|
<#
|
||||||
|
.synopsis
|
||||||
|
Works like Import-Excel but with data in columns instead of the conventional rows.
|
||||||
|
.Description.
|
||||||
|
Import-excel will read the sample file in this folder like this
|
||||||
|
> Import-excel FruitCity.xlsx | ft *
|
||||||
|
GroupAs Apple Orange Banana
|
||||||
|
------- ----- ------ ------
|
||||||
|
London 1 4 9
|
||||||
|
Paris 2 4 10
|
||||||
|
NewYork 6 5 11
|
||||||
|
Munich 7 8 12
|
||||||
|
Import-ByColumns transposes it
|
||||||
|
> Import-Bycolumns FruitCity.xlsx | ft *
|
||||||
|
GroupAs London Paris NewYork Munich
|
||||||
|
------- ------ ----- ------- ------
|
||||||
|
Apple 1 2 6 7
|
||||||
|
Orange 4 4 5 8
|
||||||
|
Banana 9 10 11 12
|
||||||
|
.Example
|
||||||
|
C:\> Import-Bycolumns -path .\VM_Build_Example.xlsx -StartRow 7 -EndRow 21 -EndColumn 7 -HeaderName Desc,size,type,
|
||||||
|
cpu,ram,NetAcc,OS,OSDiskSize,DataDiskSize,LogDiskSize,TempDbDiskSize,BackupDiskSize,ImageDiskDize,AzureBackup,AzureReplication | ft -a *
|
||||||
|
|
||||||
|
This reads a spreadsheet which has a block from row 7 to 21 containing 14 properties of virtual machines.
|
||||||
|
The properties names are in column A and the 6 VMS are in columns B-G
|
||||||
|
Because the property names are written for easy reading by the person completing the spreadsheet, they are replaced with new names.
|
||||||
|
All the parameters work as they would for Import-Excel
|
||||||
|
#>
|
||||||
|
|
||||||
|
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
|
||||||
|
param(
|
||||||
|
[Alias('FullName')]
|
||||||
|
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||||
|
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||||
|
[Parameter(ParameterSetName = "PathC", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||||
|
[String]$Path,
|
||||||
|
|
||||||
|
[Parameter(ParameterSetName = "PackageA", Mandatory)]
|
||||||
|
[Parameter(ParameterSetName = "PackageB", Mandatory)]
|
||||||
|
[Parameter(ParameterSetName = "PackageC", Mandatory)]
|
||||||
|
[OfficeOpenXml.ExcelPackage]$ExcelPackage,
|
||||||
|
|
||||||
|
[Alias('Sheet')]
|
||||||
|
[Parameter(Position = 1)]
|
||||||
|
[ValidateNotNullOrEmpty()]
|
||||||
|
[String]$WorksheetName,
|
||||||
|
|
||||||
|
[Parameter(ParameterSetName = 'PathB' , Mandatory)]
|
||||||
|
[Parameter(ParameterSetName = 'PackageB', Mandatory)]
|
||||||
|
[String[]]$HeaderName ,
|
||||||
|
[Parameter(ParameterSetName = 'PathC' , Mandatory)]
|
||||||
|
[Parameter(ParameterSetName = 'PackageC', Mandatory)]
|
||||||
|
[Switch]$NoHeader,
|
||||||
|
|
||||||
|
[Alias('TopRow')]
|
||||||
|
[ValidateRange(1, 9999)]
|
||||||
|
[Int]$StartRow = 1,
|
||||||
|
|
||||||
|
[Alias('StopRow', 'BottomRow')]
|
||||||
|
[Int]$EndRow ,
|
||||||
|
|
||||||
|
[Alias('LeftColumn','LabelColumn')]
|
||||||
|
[Int]$StartColumn = 1,
|
||||||
|
|
||||||
|
[Int]$EndColumn,
|
||||||
|
[switch]$DataOnly,
|
||||||
|
[switch]$AsHash,
|
||||||
|
|
||||||
|
[ValidateNotNullOrEmpty()]
|
||||||
|
[String]$Password
|
||||||
|
)
|
||||||
|
function Get-PropertyNames {
|
||||||
|
<#
|
||||||
|
.SYNOPSIS
|
||||||
|
Create objects containing the row number and the row name for each of the different header types.
|
||||||
|
#>
|
||||||
|
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
||||||
|
param(
|
||||||
|
[Parameter(Mandatory)]
|
||||||
|
[Int[]]$Rows,
|
||||||
|
[Parameter(Mandatory)]
|
||||||
|
[Int]$StartColumn
|
||||||
|
)
|
||||||
|
if ($HeaderName) {
|
||||||
|
$i = 0
|
||||||
|
foreach ($h in $HeaderName) {
|
||||||
|
$h | Select-Object @{n='Row'; e={$rows[$i]}}, @{n='Value'; e={$h} }
|
||||||
|
$i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif ($NoHeader) {
|
||||||
|
$i = 0
|
||||||
|
foreach ($r in $rows) {
|
||||||
|
$i++
|
||||||
|
$r | Select-Object @{n='Row'; e={$_}}, @{n='Value'; e={"P$i"} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
foreach ($r in $Rows) {
|
||||||
|
#allow "False" or "0" to be headings
|
||||||
|
$Worksheet.Cells[$r, $StartColumn] | Where-Object {-not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{n='Row'; e={$r} }, Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#region open file if necessary, find worksheet and ensure we have start/end row/columns
|
||||||
|
if ($Path -and -not $ExcelPackage -and $Password) {
|
||||||
|
$ExcelPackage = Open-ExcelPackage -Path $Path -Password $Password
|
||||||
|
}
|
||||||
|
elseif ($Path -and -not $ExcelPackage ) {
|
||||||
|
$ExcelPackage = Open-ExcelPackage -Path $Path
|
||||||
|
}
|
||||||
|
if (-not $ExcelPackage) {
|
||||||
|
throw 'Could not get an Excel workbook to work on' ; return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||||
|
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
||||||
|
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
||||||
|
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
$Rows = $Startrow .. $EndRow ;
|
||||||
|
$Columns = (1 + $StartColumn)..$EndColumn
|
||||||
|
|
||||||
|
if ((-not $rows) -or (-not ($PropertyNames = Get-PropertyNames -Rows $Rows -StartColumn $StartColumn))) {
|
||||||
|
throw "No headers found in left coulmn '$Startcolumn'. "; return
|
||||||
|
}
|
||||||
|
if (-not $Columns) {
|
||||||
|
Write-Warning "Worksheet '$WorksheetName' in workbook contains no data in the rows after left column '$StartColumn'"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
foreach ($c in $Columns) {
|
||||||
|
$NewColumn = [Ordered]@{ }
|
||||||
|
foreach ($p in $PropertyNames) {
|
||||||
|
$NewColumn[$p.Value] = $Worksheet.Cells[$p.row,$c].text
|
||||||
|
}
|
||||||
|
if ($AsHash) {$NewColumn}
|
||||||
|
elseif (($NewColumn.Values -ne "") -or -not $dataonly) {[PSCustomObject]$NewColumn}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
|
|
||||||
Function Import-Bycolumns {
|
|
||||||
Param(
|
|
||||||
[Parameter(Mandatory=$true)]
|
|
||||||
[OfficeOpenXml.ExcelPackage]$ExcelPackage,
|
|
||||||
[Int]$StartRow = 1,
|
|
||||||
[String]$WorksheetName,
|
|
||||||
[Int]$EndRow ,
|
|
||||||
[Int]$StartColumn = 1,
|
|
||||||
[Int]$EndColumn
|
|
||||||
)
|
|
||||||
Function Get-RowNames {
|
|
||||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
|
||||||
param(
|
|
||||||
[Parameter(Mandatory)]
|
|
||||||
[Int[]]$Rows,
|
|
||||||
[Parameter(Mandatory)]
|
|
||||||
[Int]$StartColumn
|
|
||||||
)
|
|
||||||
foreach ($R in $Rows) {
|
|
||||||
#allow "False" or "0" to be headings
|
|
||||||
$Worksheet.Cells[$R, $StartColumn] | Where-Object {-not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Row'; E = { $R } }, Value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
|
||||||
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
|
||||||
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
|
||||||
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
|
||||||
|
|
||||||
$Rows = $Startrow .. $EndRow ;
|
|
||||||
$Columns = (1 + $StartColumn)..$EndColumn
|
|
||||||
|
|
||||||
if ((-not $rows) -or (-not ($PropertyNames = Get-RowNames -Rows $Rows -StartColumn $StartColumn))) {
|
|
||||||
throw "No headers found in left coulmn '$Startcolumn'. "; return
|
|
||||||
}
|
|
||||||
if (-not $Columns) {
|
|
||||||
Write-Warning "Worksheet '$WorksheetName' in workbook contains no data in the rows after left column '$StartColumn'"
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
foreach ($c in $Columns) {
|
|
||||||
$NewColumn = [Ordered]@{ }
|
|
||||||
foreach ($p in $PropertyNames) {
|
|
||||||
$NewColumn[$p.Value] = $Worksheet.Cells[$p.row,$c].text
|
|
||||||
}
|
|
||||||
[PSCustomObject]$NewColumn
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -9,36 +9,75 @@ function Get-HtmlTable {
|
|||||||
[int]$FirstDataRow=0,
|
[int]$FirstDataRow=0,
|
||||||
[Switch]$UseDefaultCredentials
|
[Switch]$UseDefaultCredentials
|
||||||
)
|
)
|
||||||
|
if ($PSVersionTable.PSVersion.Major -gt 5 -and -not (Get-Command ConvertFrom-Html -ErrorAction SilentlyContinue)) {
|
||||||
|
# Invoke-WebRequest on .NET core doesn't have ParsedHtml so we need HtmlAgilityPack or similiar Justin Grote's PowerHTML wraps that nicely
|
||||||
|
throw "This version of PowerShell needs the PowerHTML module to process HTML Tables."
|
||||||
|
}
|
||||||
|
|
||||||
$r = Invoke-WebRequest $Url -UseDefaultCredentials: $UseDefaultCredentials
|
$r = Invoke-WebRequest $Url -UseDefaultCredentials: $UseDefaultCredentials
|
||||||
|
$propertyNames = $Header
|
||||||
|
|
||||||
$table = $r.ParsedHtml.getElementsByTagName("table")[$TableIndex]
|
if ($PSVersionTable.PSVersion.Major -le 5) {
|
||||||
$propertyNames=$Header
|
$table = $r.ParsedHtml.getElementsByTagName("table")[$TableIndex]
|
||||||
$totalRows=@($table.rows).count
|
$totalRows=@($table.rows).count
|
||||||
|
|
||||||
for ($idx = $FirstDataRow; $idx -lt $totalRows; $idx++) {
|
for ($idx = $FirstDataRow; $idx -lt $totalRows; $idx++) {
|
||||||
|
|
||||||
$row = $table.rows[$idx]
|
$row = $table.rows[$idx]
|
||||||
$cells = @($row.cells)
|
$cells = @($row.cells)
|
||||||
|
|
||||||
if(!$propertyNames) {
|
if(!$propertyNames) {
|
||||||
if($cells[0].tagName -eq 'th') {
|
if($cells[0].tagName -eq 'th') {
|
||||||
$propertyNames = @($cells | ForEach-Object {$_.innertext -replace ' ',''})
|
$propertyNames = @($cells | ForEach-Object {$_.innertext -replace ' ',''})
|
||||||
} else {
|
} else {
|
||||||
$propertyNames = @(1..($cells.Count + 2) | Foreach-Object { "P$_" })
|
$propertyNames = @(1..($cells.Count + 2) | Foreach-Object { "P$_" })
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
|
$result = [ordered]@{}
|
||||||
|
|
||||||
|
for($counter = 0; $counter -lt $cells.Count; $counter++) {
|
||||||
|
$propertyName = $propertyNames[$counter]
|
||||||
|
|
||||||
|
if(!$propertyName) { $propertyName= '[missing]'}
|
||||||
|
$result.$propertyName= $cells[$counter].InnerText
|
||||||
|
}
|
||||||
|
|
||||||
|
[PSCustomObject]$result
|
||||||
}
|
}
|
||||||
|
}
|
||||||
$result = [ordered]@{}
|
else {
|
||||||
|
$h = ConvertFrom-Html -Content $r.Content
|
||||||
for($counter = 0; $counter -lt $cells.Count; $counter++) {
|
if ($TableIndex -is [valuetype]) { $TableIndex += 1}
|
||||||
$propertyName = $propertyNames[$counter]
|
$rows = $h.SelectNodes("//table[$TableIndex]//tr")
|
||||||
|
if (-not $rows) {Write-Warning "Could not find rows for `"//table[$TableIndex]`" in $Url ."}
|
||||||
if(!$propertyName) { $propertyName= '[missing]'}
|
if ( -not $propertyNames) {
|
||||||
$result.$propertyName= $cells[$counter].InnerText
|
if ( $tableHeaders = $rows[$FirstDataRow].SelectNodes("th")) {
|
||||||
|
$propertyNames = $tableHeaders.foreach({[System.Web.HttpUtility]::HtmlDecode( $_.innerText ) -replace '\W+','_' -replace '(\w)_+$','$1' })
|
||||||
|
$FirstDataRow += 1
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$c = 0
|
||||||
|
$propertyNames = $rows[$FirstDataRow].SelectNodes("td") | Foreach-Object { "P$c" ; $c ++ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Write-Verbose ("Property names: " + ($propertyNames -join ","))
|
||||||
|
foreach ($n in $FirstDataRow..($rows.Count-1)) {
|
||||||
|
$r = $rows[$n].SelectNodes("td|th")
|
||||||
|
if ($r -and $r.innerText -ne "" -and $r.count -gt $rows[$n].SelectNodes("th").count ) {
|
||||||
|
$c = 0
|
||||||
|
$newObj = [ordered]@{}
|
||||||
|
foreach ($p in $propertyNames) {
|
||||||
|
$n = $null
|
||||||
|
#Join descentandts for cases where the text in the cell is split (e.g with a <BR> ). We also want to remove HTML codes, trim and convert unicode minus sign to "-"
|
||||||
|
$cellText = $r[$c].Descendants().where({$_.NodeType -eq "Text"}).foreach({[System.Web.HttpUtility]::HtmlDecode( $_.innerText ).Trim()}) -Join " " -replace "\u2212","-"
|
||||||
|
if ([double]::TryParse($cellText, [ref]$n)) {$newObj[$p] = $n }
|
||||||
|
else {$newObj[$p] = $cellText }
|
||||||
|
$c ++
|
||||||
|
}
|
||||||
|
[pscustomObject]$newObj
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[PSCustomObject]$result
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,13 +3,13 @@ function Import-Html {
|
|||||||
[CmdletBinding()]
|
[CmdletBinding()]
|
||||||
param(
|
param(
|
||||||
$Url,
|
$Url,
|
||||||
$Index,
|
[int]$Index = 0,
|
||||||
$Header,
|
$Header,
|
||||||
[int]$FirstDataRow=0,
|
[int]$FirstDataRow = 0,
|
||||||
[Switch]$UseDefaultCredentials
|
[Switch]$UseDefaultCredentials
|
||||||
)
|
)
|
||||||
|
|
||||||
$xlFile = [System.IO.Path]::GetTempFileName() -replace "tmp","xlsx"
|
$xlFile = [System.IO.Path]::GetTempFileName() -replace "tmp", "xlsx"
|
||||||
Remove-Item $xlFile -ErrorAction Ignore
|
Remove-Item $xlFile -ErrorAction Ignore
|
||||||
|
|
||||||
Write-Verbose "Exporting to Excel file $($xlFile)"
|
Write-Verbose "Exporting to Excel file $($xlFile)"
|
||||||
|
|||||||
@@ -1,5 +1,12 @@
|
|||||||
# 7.6.0
|
# 7.6.0
|
||||||
- Fix -StartRow and -StartColumn being ignored.
|
|
||||||
|
- **_[Under investigation]_** Fix -StartRow and -StartColumn being ignored.
|
||||||
|
- James O'Neill:
|
||||||
|
- Update Get-HtmlTable to support to use PowerHTML (maintained by [Justin Grote](https://twitter.com/**JustinWGrote**)).
|
||||||
|
- Added example to including a new function Import-ByColumn. Works like Import-Excel but with data in columns instead of the conventional rows.
|
||||||
|
- Update Import-HTML with better defaults
|
||||||
|
- Fixed example `Get-ModuleStats.ps1` which reads the PowerShell Gallery page and extracts the stats table
|
||||||
|
|
||||||
|
|
||||||
# v7.5.2
|
# v7.5.2
|
||||||
- Changed the switch `-NotAsDictionary` to `-Raw`. Works with `-Worksheetname *` reads all the sheets in the xlsx file and returns an array.
|
- Changed the switch `-NotAsDictionary` to `-Raw`. Works with `-Worksheetname *` reads all the sheets in the xlsx file and returns an array.
|
||||||
|
|||||||
Reference in New Issue
Block a user