mirror of
https://github.com/dfinke/ImportExcel.git
synced 2025-12-15 15:53:32 +00:00
Merge pull request #1200 from jhoneill/master
Update to Get-HTMLTable, added Import-ByColuumns function and example
This commit is contained in:
@@ -15,6 +15,6 @@ param(
|
||||
$galleryUrl = "https://www.powershellgallery.com/packages/$moduleName"
|
||||
$nolegend = '-nolegend'
|
||||
if($chartType -eq 'pie') {$nolegend = $null}
|
||||
$code = "$($chartType)Chart (Get-HtmlTable $galleryUrl 0 | sort lastupdated -desc) -title 'Download stats for $moduleName' $nolegend"
|
||||
$code = "$($chartType)Chart (Get-HtmlTable $galleryUrl -FirstDataRow 1 | sort lastupdated -desc) -title 'Download stats for $moduleName' $nolegend"
|
||||
|
||||
$code | Invoke-Expression
|
||||
BIN
Examples/ImportByColumns/FruitCity.xlsx
Normal file
BIN
Examples/ImportByColumns/FruitCity.xlsx
Normal file
Binary file not shown.
BIN
Examples/ImportByColumns/VM_Build_Example.xlsx
Normal file
BIN
Examples/ImportByColumns/VM_Build_Example.xlsx
Normal file
Binary file not shown.
146
Examples/ImportByColumns/import-by-columns.ps1
Normal file
146
Examples/ImportByColumns/import-by-columns.ps1
Normal file
@@ -0,0 +1,146 @@
|
||||
function Import-ByColumns {
|
||||
<#
|
||||
.synopsis
|
||||
Works like Import-Excel but with data in columns instead of the conventional rows.
|
||||
.Description.
|
||||
Import-excel will read the sample file in this folder like this
|
||||
> Import-excel FruitCity.xlsx | ft *
|
||||
GroupAs Apple Orange Banana
|
||||
------- ----- ------ ------
|
||||
London 1 4 9
|
||||
Paris 2 4 10
|
||||
NewYork 6 5 11
|
||||
Munich 7 8 12
|
||||
Import-ByColumns transposes it
|
||||
> Import-Bycolumns FruitCity.xlsx | ft *
|
||||
GroupAs London Paris NewYork Munich
|
||||
------- ------ ----- ------- ------
|
||||
Apple 1 2 6 7
|
||||
Orange 4 4 5 8
|
||||
Banana 9 10 11 12
|
||||
.Example
|
||||
C:\> Import-Bycolumns -path .\VM_Build_Example.xlsx -StartRow 7 -EndRow 21 -EndColumn 7 -HeaderName Desc,size,type,
|
||||
cpu,ram,NetAcc,OS,OSDiskSize,DataDiskSize,LogDiskSize,TempDbDiskSize,BackupDiskSize,ImageDiskDize,AzureBackup,AzureReplication | ft -a *
|
||||
|
||||
This reads a spreadsheet which has a block from row 7 to 21 containing 14 properties of virtual machines.
|
||||
The properties names are in column A and the 6 VMS are in columns B-G
|
||||
Because the property names are written for easy reading by the person completing the spreadsheet, they are replaced with new names.
|
||||
All the parameters work as they would for Import-Excel
|
||||
#>
|
||||
|
||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSAvoidUsingPlainTextForPassword", "")]
|
||||
param(
|
||||
[Alias('FullName')]
|
||||
[Parameter(ParameterSetName = "PathA", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||
[Parameter(ParameterSetName = "PathB", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||
[Parameter(ParameterSetName = "PathC", Mandatory, ValueFromPipelineByPropertyName, ValueFromPipeline, Position = 0 )]
|
||||
[String]$Path,
|
||||
|
||||
[Parameter(ParameterSetName = "PackageA", Mandatory)]
|
||||
[Parameter(ParameterSetName = "PackageB", Mandatory)]
|
||||
[Parameter(ParameterSetName = "PackageC", Mandatory)]
|
||||
[OfficeOpenXml.ExcelPackage]$ExcelPackage,
|
||||
|
||||
[Alias('Sheet')]
|
||||
[Parameter(Position = 1)]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]$WorksheetName,
|
||||
|
||||
[Parameter(ParameterSetName = 'PathB' , Mandatory)]
|
||||
[Parameter(ParameterSetName = 'PackageB', Mandatory)]
|
||||
[String[]]$HeaderName ,
|
||||
[Parameter(ParameterSetName = 'PathC' , Mandatory)]
|
||||
[Parameter(ParameterSetName = 'PackageC', Mandatory)]
|
||||
[Switch]$NoHeader,
|
||||
|
||||
[Alias('TopRow')]
|
||||
[ValidateRange(1, 9999)]
|
||||
[Int]$StartRow = 1,
|
||||
|
||||
[Alias('StopRow', 'BottomRow')]
|
||||
[Int]$EndRow ,
|
||||
|
||||
[Alias('LeftColumn','LabelColumn')]
|
||||
[Int]$StartColumn = 1,
|
||||
|
||||
[Int]$EndColumn,
|
||||
[switch]$DataOnly,
|
||||
[switch]$AsHash,
|
||||
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[String]$Password
|
||||
)
|
||||
function Get-PropertyNames {
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Create objects containing the row number and the row name for each of the different header types.
|
||||
#>
|
||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
||||
param(
|
||||
[Parameter(Mandatory)]
|
||||
[Int[]]$Rows,
|
||||
[Parameter(Mandatory)]
|
||||
[Int]$StartColumn
|
||||
)
|
||||
if ($HeaderName) {
|
||||
$i = 0
|
||||
foreach ($h in $HeaderName) {
|
||||
$h | Select-Object @{n='Row'; e={$rows[$i]}}, @{n='Value'; e={$h} }
|
||||
$i++
|
||||
}
|
||||
}
|
||||
elseif ($NoHeader) {
|
||||
$i = 0
|
||||
foreach ($r in $rows) {
|
||||
$i++
|
||||
$r | Select-Object @{n='Row'; e={$_}}, @{n='Value'; e={"P$i"} }
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($r in $Rows) {
|
||||
#allow "False" or "0" to be headings
|
||||
$Worksheet.Cells[$r, $StartColumn] | Where-Object {-not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{n='Row'; e={$r} }, Value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#region open file if necessary, find worksheet and ensure we have start/end row/columns
|
||||
if ($Path -and -not $ExcelPackage -and $Password) {
|
||||
$ExcelPackage = Open-ExcelPackage -Path $Path -Password $Password
|
||||
}
|
||||
elseif ($Path -and -not $ExcelPackage ) {
|
||||
$ExcelPackage = Open-ExcelPackage -Path $Path
|
||||
}
|
||||
if (-not $ExcelPackage) {
|
||||
throw 'Could not get an Excel workbook to work on' ; return
|
||||
}
|
||||
|
||||
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
||||
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
||||
}
|
||||
|
||||
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
||||
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
||||
#endregion
|
||||
|
||||
$Rows = $Startrow .. $EndRow ;
|
||||
$Columns = (1 + $StartColumn)..$EndColumn
|
||||
|
||||
if ((-not $rows) -or (-not ($PropertyNames = Get-PropertyNames -Rows $Rows -StartColumn $StartColumn))) {
|
||||
throw "No headers found in left coulmn '$Startcolumn'. "; return
|
||||
}
|
||||
if (-not $Columns) {
|
||||
Write-Warning "Worksheet '$WorksheetName' in workbook contains no data in the rows after left column '$StartColumn'"
|
||||
}
|
||||
else {
|
||||
foreach ($c in $Columns) {
|
||||
$NewColumn = [Ordered]@{ }
|
||||
foreach ($p in $PropertyNames) {
|
||||
$NewColumn[$p.Value] = $Worksheet.Cells[$p.row,$c].text
|
||||
}
|
||||
if ($AsHash) {$NewColumn}
|
||||
elseif (($NewColumn.Values -ne "") -or -not $dataonly) {[PSCustomObject]$NewColumn}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
|
||||
Function Import-Bycolumns {
|
||||
Param(
|
||||
[Parameter(Mandatory=$true)]
|
||||
[OfficeOpenXml.ExcelPackage]$ExcelPackage,
|
||||
[Int]$StartRow = 1,
|
||||
[String]$WorksheetName,
|
||||
[Int]$EndRow ,
|
||||
[Int]$StartColumn = 1,
|
||||
[Int]$EndColumn
|
||||
)
|
||||
Function Get-RowNames {
|
||||
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '', Justification = "Name would be incorrect, and command is not exported")]
|
||||
param(
|
||||
[Parameter(Mandatory)]
|
||||
[Int[]]$Rows,
|
||||
[Parameter(Mandatory)]
|
||||
[Int]$StartColumn
|
||||
)
|
||||
foreach ($R in $Rows) {
|
||||
#allow "False" or "0" to be headings
|
||||
$Worksheet.Cells[$R, $StartColumn] | Where-Object {-not [string]::IsNullOrEmpty($_.Value) } | Select-Object @{N = 'Row'; E = { $R } }, Value
|
||||
}
|
||||
}
|
||||
|
||||
if (-not $WorksheetName) { $Worksheet = $ExcelPackage.Workbook.Worksheets[1] }
|
||||
elseif (-not ($Worksheet = $ExcelPackage.Workbook.Worksheets[$WorkSheetName])) {
|
||||
throw "Worksheet '$WorksheetName' not found, the workbook only contains the worksheets '$($ExcelPackage.Workbook.Worksheets)'. If you only wish to select the first worksheet, please remove the '-WorksheetName' parameter." ; return
|
||||
}
|
||||
|
||||
if (-not $EndRow ) { $EndRow = $Worksheet.Dimension.End.Row }
|
||||
if (-not $EndColumn) { $EndColumn = $Worksheet.Dimension.End.Column }
|
||||
|
||||
$Rows = $Startrow .. $EndRow ;
|
||||
$Columns = (1 + $StartColumn)..$EndColumn
|
||||
|
||||
if ((-not $rows) -or (-not ($PropertyNames = Get-RowNames -Rows $Rows -StartColumn $StartColumn))) {
|
||||
throw "No headers found in left coulmn '$Startcolumn'. "; return
|
||||
}
|
||||
if (-not $Columns) {
|
||||
Write-Warning "Worksheet '$WorksheetName' in workbook contains no data in the rows after left column '$StartColumn'"
|
||||
}
|
||||
else {
|
||||
foreach ($c in $Columns) {
|
||||
$NewColumn = [Ordered]@{ }
|
||||
foreach ($p in $PropertyNames) {
|
||||
$NewColumn[$p.Value] = $Worksheet.Cells[$p.row,$c].text
|
||||
}
|
||||
[PSCustomObject]$NewColumn
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,36 +9,75 @@ function Get-HtmlTable {
|
||||
[int]$FirstDataRow=0,
|
||||
[Switch]$UseDefaultCredentials
|
||||
)
|
||||
if ($PSVersionTable.PSVersion.Major -gt 5 -and -not (Get-Command ConvertFrom-Html -ErrorAction SilentlyContinue)) {
|
||||
# Invoke-WebRequest on .NET core doesn't have ParsedHtml so we need HtmlAgilityPack or similiar Justin Grote's PowerHTML wraps that nicely
|
||||
throw "This version of PowerShell needs the PowerHTML module to process HTML Tables."
|
||||
}
|
||||
|
||||
$r = Invoke-WebRequest $Url -UseDefaultCredentials: $UseDefaultCredentials
|
||||
$propertyNames = $Header
|
||||
|
||||
$table = $r.ParsedHtml.getElementsByTagName("table")[$TableIndex]
|
||||
$propertyNames=$Header
|
||||
$totalRows=@($table.rows).count
|
||||
if ($PSVersionTable.PSVersion.Major -le 5) {
|
||||
$table = $r.ParsedHtml.getElementsByTagName("table")[$TableIndex]
|
||||
$totalRows=@($table.rows).count
|
||||
|
||||
for ($idx = $FirstDataRow; $idx -lt $totalRows; $idx++) {
|
||||
for ($idx = $FirstDataRow; $idx -lt $totalRows; $idx++) {
|
||||
|
||||
$row = $table.rows[$idx]
|
||||
$cells = @($row.cells)
|
||||
$row = $table.rows[$idx]
|
||||
$cells = @($row.cells)
|
||||
|
||||
if(!$propertyNames) {
|
||||
if($cells[0].tagName -eq 'th') {
|
||||
$propertyNames = @($cells | ForEach-Object {$_.innertext -replace ' ',''})
|
||||
} else {
|
||||
$propertyNames = @(1..($cells.Count + 2) | Foreach-Object { "P$_" })
|
||||
if(!$propertyNames) {
|
||||
if($cells[0].tagName -eq 'th') {
|
||||
$propertyNames = @($cells | ForEach-Object {$_.innertext -replace ' ',''})
|
||||
} else {
|
||||
$propertyNames = @(1..($cells.Count + 2) | Foreach-Object { "P$_" })
|
||||
}
|
||||
continue
|
||||
}
|
||||
continue
|
||||
|
||||
$result = [ordered]@{}
|
||||
|
||||
for($counter = 0; $counter -lt $cells.Count; $counter++) {
|
||||
$propertyName = $propertyNames[$counter]
|
||||
|
||||
if(!$propertyName) { $propertyName= '[missing]'}
|
||||
$result.$propertyName= $cells[$counter].InnerText
|
||||
}
|
||||
|
||||
[PSCustomObject]$result
|
||||
}
|
||||
|
||||
$result = [ordered]@{}
|
||||
|
||||
for($counter = 0; $counter -lt $cells.Count; $counter++) {
|
||||
$propertyName = $propertyNames[$counter]
|
||||
|
||||
if(!$propertyName) { $propertyName= '[missing]'}
|
||||
$result.$propertyName= $cells[$counter].InnerText
|
||||
}
|
||||
else {
|
||||
$h = ConvertFrom-Html -Content $r.Content
|
||||
if ($TableIndex -is [valuetype]) { $TableIndex += 1}
|
||||
$rows = $h.SelectNodes("//table[$TableIndex]//tr")
|
||||
if (-not $rows) {Write-Warning "Could not find rows for `"//table[$TableIndex]`" in $Url ."}
|
||||
if ( -not $propertyNames) {
|
||||
if ( $tableHeaders = $rows[$FirstDataRow].SelectNodes("th")) {
|
||||
$propertyNames = $tableHeaders.foreach({[System.Web.HttpUtility]::HtmlDecode( $_.innerText ) -replace '\W+','_' -replace '(\w)_+$','$1' })
|
||||
$FirstDataRow += 1
|
||||
}
|
||||
else {
|
||||
$c = 0
|
||||
$propertyNames = $rows[$FirstDataRow].SelectNodes("td") | Foreach-Object { "P$c" ; $c ++ }
|
||||
}
|
||||
}
|
||||
Write-Verbose ("Property names: " + ($propertyNames -join ","))
|
||||
foreach ($n in $FirstDataRow..($rows.Count-1)) {
|
||||
$r = $rows[$n].SelectNodes("td|th")
|
||||
if ($r -and $r.innerText -ne "" -and $r.count -gt $rows[$n].SelectNodes("th").count ) {
|
||||
$c = 0
|
||||
$newObj = [ordered]@{}
|
||||
foreach ($p in $propertyNames) {
|
||||
$n = $null
|
||||
#Join descentandts for cases where the text in the cell is split (e.g with a <BR> ). We also want to remove HTML codes, trim and convert unicode minus sign to "-"
|
||||
$cellText = $r[$c].Descendants().where({$_.NodeType -eq "Text"}).foreach({[System.Web.HttpUtility]::HtmlDecode( $_.innerText ).Trim()}) -Join " " -replace "\u2212","-"
|
||||
if ([double]::TryParse($cellText, [ref]$n)) {$newObj[$p] = $n }
|
||||
else {$newObj[$p] = $cellText }
|
||||
$c ++
|
||||
}
|
||||
[pscustomObject]$newObj
|
||||
}
|
||||
}
|
||||
|
||||
[PSCustomObject]$result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,13 +3,13 @@ function Import-Html {
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
$Url,
|
||||
$Index,
|
||||
[int]$Index = 0,
|
||||
$Header,
|
||||
[int]$FirstDataRow=0,
|
||||
[int]$FirstDataRow = 0,
|
||||
[Switch]$UseDefaultCredentials
|
||||
)
|
||||
|
||||
$xlFile = [System.IO.Path]::GetTempFileName() -replace "tmp","xlsx"
|
||||
$xlFile = [System.IO.Path]::GetTempFileName() -replace "tmp", "xlsx"
|
||||
Remove-Item $xlFile -ErrorAction Ignore
|
||||
|
||||
Write-Verbose "Exporting to Excel file $($xlFile)"
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# 7.6.0
|
||||
- Fix -StartRow and -StartColumn being ignored.
|
||||
|
||||
- **_[Under investigation]_** Fix -StartRow and -StartColumn being ignored.
|
||||
- James O'Neill:
|
||||
- Update Get-HtmlTable to support to use PowerHTML (maintained by [Justin Grote](https://twitter.com/**JustinWGrote**)).
|
||||
- Added example to including a new function Import-ByColumn. Works like Import-Excel but with data in columns instead of the conventional rows.
|
||||
- Update Import-HTML with better defaults
|
||||
- Fixed example `Get-ModuleStats.ps1` which reads the PowerShell Gallery page and extracts the stats table
|
||||
|
||||
|
||||
# v7.5.2
|
||||
- Changed the switch `-NotAsDictionary` to `-Raw`. Works with `-Worksheetname *` reads all the sheets in the xlsx file and returns an array.
|
||||
|
||||
Reference in New Issue
Block a user