From 45ed6a06dcb42419a9c0a7646ae04c359e79dd40 Mon Sep 17 00:00:00 2001 From: Matthew Gray Date: Wed, 23 Apr 2025 13:36:32 +0300 Subject: [PATCH] [Get-HtmlTable] XPath optimization ```powershell $rows = $h.SelectNodes("//table[$TableIndex]//tr") ``` XPath selector in line 53 uses complex expression that can lead to unexpected result. The problem is that HtmlAgilityPack may have specific issues. In particular, on websites containing multiple tables this selector can find not one table. This is aggravated by the fact that tables can have different structures. To avoid ambiguity this PR suggests to separate queries. Oneliner simplifies error checking ```powershell $rows = try { $h.SelectSingleNode("//table[$TableIndex]").SelectNodes(".//tr") } catch {} if (-not $rows) {Write-Warning "Could not find rows for `"//table[$TableIndex]`" in $Url ."} ``` This expression doesn't even need testing, it just works. --- Public/Get-HtmlTable.ps1 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Public/Get-HtmlTable.ps1 b/Public/Get-HtmlTable.ps1 index 4851d4f..bc16842 100644 --- a/Public/Get-HtmlTable.ps1 +++ b/Public/Get-HtmlTable.ps1 @@ -50,7 +50,9 @@ function Get-HtmlTable { else { $h = ConvertFrom-Html -Content $r.Content if ($TableIndex -is [valuetype]) { $TableIndex += 1} - $rows = $h.SelectNodes("//table[$TableIndex]//tr") + $rows = try { + $h.SelectSingleNode("//table[$TableIndex]").SelectNodes(".//tr") + } catch {} if (-not $rows) {Write-Warning "Could not find rows for `"//table[$TableIndex]`" in $Url ."} if ( -not $propertyNames) { if ( $tableHeaders = $rows[$FirstDataRow].SelectNodes("th")) {