1
0
mirror of https://github.com/bitwarden/server synced 2026-02-20 19:33:32 +00:00

Rewrite Icon fetching (#3023)

* Rewrite Icon fetching

* Move validation to IconUri, Uri, or UriBuilder

* `dotnet format` 🤖

* PR suggestions

* Add not null compiler hint

* Add twitter to test case

* Move Uri manipulation to UriService

* Implement MockedHttpClient

Presents better, fluent handling of message matching and response
building.

* Add redirect handling tests

* Add testing to models

* More aggressively dispose content in icon link

* Format 🤖

* Update icon lockfile

* Convert to cloned stream for HttpResponseBuilder

Content was being disposed when HttResponseMessage was being disposed.
This avoids losing our reference to our content and allows multiple
usages of the same `MockedHttpMessageResponse`

* Move services to extension

Extension is shared by testing and allows access to services from
our service tests

* Remove unused `using`

* Prefer awaiting asyncs for better exception handling

* `dotnet format` 🤖

* Await async

* Update tests to use test TLD and ip ranges

* Remove unused interfaces

* Make assignments static when possible

* Prefer invariant comparer to downcasing

* Prefer injecting interface services to implementations

* Prefer comparer set in HashSet initialization

* Allow SVG icons

* Filter out icons with unknown formats

* Seek to beginning of MemoryStream after writing it

* More appropriate to not return icon if it's invalid

* Add svg icon test
This commit is contained in:
Matt Gibson
2023-08-08 15:29:40 -04:00
committed by GitHub
parent ca368466ce
commit 4377c7a897
31 changed files with 1685 additions and 522 deletions

View File

@@ -0,0 +1,100 @@
#nullable enable
using System.Collections;
using AngleSharp.Html.Parser;
using Bit.Icons.Extensions;
using Bit.Icons.Services;
namespace Bit.Icons.Models;
public class DomainIcons : IEnumerable<Icon>
{
private readonly ILogger<IIconFetchingService> _logger;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IUriService _uriService;
private readonly List<Icon> _icons = new();
public string Domain { get; }
public Icon this[int i]
{
get
{
return _icons[i];
}
}
public IEnumerator<Icon> GetEnumerator() => ((IEnumerable<Icon>)_icons).GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)_icons).GetEnumerator();
private DomainIcons(string domain, ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IUriService uriService)
{
_logger = logger;
_httpClientFactory = httpClientFactory;
_uriService = uriService;
Domain = domain;
}
public static async Task<DomainIcons> FetchAsync(string domain, ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IHtmlParser parser, IUriService uriService)
{
var pageIcons = new DomainIcons(domain, logger, httpClientFactory, uriService);
await pageIcons.FetchIconsAsync(parser);
return pageIcons;
}
private async Task FetchIconsAsync(IHtmlParser parser)
{
if (!Uri.TryCreate($"https://{Domain}", UriKind.Absolute, out var uri))
{
_logger.LogWarning("Bad domain: {domain}.", Domain);
return;
}
var host = uri.Host;
// first try https
using (var response = await IconHttpRequest.FetchAsync(uri, _logger, _httpClientFactory, _uriService))
{
if (response.IsSuccessStatusCode)
{
_icons.AddRange(await response.RetrieveIconsAsync(uri, Domain, parser));
return;
}
}
// then try http
uri = uri.ChangeScheme("http");
using (var response = await IconHttpRequest.FetchAsync(uri, _logger, _httpClientFactory, _uriService))
{
if (response.IsSuccessStatusCode)
{
_icons.AddRange(await response.RetrieveIconsAsync(uri, Domain, parser));
return;
}
}
var dotCount = Domain.Count(c => c == '.');
// Then try base domain
if (dotCount > 1 && DomainName.TryParseBaseDomain(Domain, out var baseDomain) &&
Uri.TryCreate($"https://{baseDomain}", UriKind.Absolute, out uri))
{
using var response = await IconHttpRequest.FetchAsync(uri, _logger, _httpClientFactory, _uriService);
if (response.IsSuccessStatusCode)
{
_icons.AddRange(await response.RetrieveIconsAsync(uri, Domain, parser));
return;
}
}
// Then try www
if (dotCount < 2 && Uri.TryCreate($"https://www.{host}", UriKind.Absolute, out uri))
{
using var response = await IconHttpRequest.FetchAsync(uri, _logger, _httpClientFactory, _uriService);
if (response.IsSuccessStatusCode)
{
_icons.AddRange(await response.RetrieveIconsAsync(uri, Domain, parser));
return;
}
}
}
}

View File

@@ -0,0 +1,110 @@
#nullable enable
using System.Net;
using Bit.Icons.Extensions;
using Bit.Icons.Services;
namespace Bit.Icons.Models;
public class IconHttpRequest
{
private const int _maxRedirects = 2;
private static readonly HttpStatusCode[] _redirectStatusCodes = new HttpStatusCode[] { HttpStatusCode.Redirect, HttpStatusCode.MovedPermanently, HttpStatusCode.RedirectKeepVerb, HttpStatusCode.SeeOther };
private readonly ILogger<IIconFetchingService> _logger;
private readonly HttpClient _httpClient;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IUriService _uriService;
private readonly int _redirectsCount;
private readonly Uri _uri;
private static HttpResponseMessage NotFound => new(HttpStatusCode.NotFound);
private IconHttpRequest(Uri uri, ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IUriService uriService, int redirectsCount)
{
_logger = logger;
_httpClientFactory = httpClientFactory;
_httpClient = _httpClientFactory.CreateClient("Icons");
_uriService = uriService;
_redirectsCount = redirectsCount;
_uri = uri;
}
public static async Task<IconHttpResponse> FetchAsync(Uri uri, ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IUriService uriService)
{
var pageIcons = new IconHttpRequest(uri, logger, httpClientFactory, uriService, 0);
var httpResponse = await pageIcons.FetchAsync();
return new IconHttpResponse(httpResponse, logger, httpClientFactory, uriService);
}
private async Task<HttpResponseMessage> FetchAsync()
{
if (!_uriService.TryGetUri(_uri, out var iconUri) || !iconUri!.IsValid)
{
return NotFound;
}
var response = await GetAsync(iconUri);
if (response.IsSuccessStatusCode)
{
return response;
}
using var responseForRedirect = response;
return await FollowRedirectsAsync(responseForRedirect, iconUri);
}
private async Task<HttpResponseMessage> GetAsync(IconUri iconUri)
{
using var message = new HttpRequestMessage();
message.RequestUri = iconUri.InnerUri;
message.Headers.Host = iconUri.Host;
message.Method = HttpMethod.Get;
try
{
return await _httpClient.SendAsync(message);
}
catch
{
return NotFound;
}
}
private async Task<HttpResponseMessage> FollowRedirectsAsync(HttpResponseMessage response, IconUri originalIconUri)
{
if (_redirectsCount >= _maxRedirects || response.Headers.Location == null ||
!_redirectStatusCodes.Contains(response.StatusCode))
{
return NotFound;
}
using var responseForRedirect = response;
var redirectUri = DetermineRedirectUri(responseForRedirect.Headers.Location, originalIconUri);
return await new IconHttpRequest(redirectUri, _logger, _httpClientFactory, _uriService, _redirectsCount + 1).FetchAsync();
}
private static Uri DetermineRedirectUri(Uri responseUri, IconUri originalIconUri)
{
if (responseUri.IsAbsoluteUri)
{
if (!responseUri.IsHypertext())
{
return responseUri.ChangeScheme("https");
}
return responseUri;
}
else
{
return new UriBuilder
{
Scheme = originalIconUri.Scheme,
Host = originalIconUri.Host,
Path = responseUri.ToString()
}.Uri;
}
}
}

View File

@@ -0,0 +1,72 @@
#nullable enable
using System.Net;
using AngleSharp.Html.Parser;
using Bit.Icons.Services;
namespace Bit.Icons.Models;
public class IconHttpResponse : IDisposable
{
private const int _maxIconLinksProcessed = 200;
private const int _maxRetrievedIcons = 10;
private readonly HttpResponseMessage _response;
private readonly ILogger<IIconFetchingService> _logger;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IUriService _uriService;
public HttpStatusCode StatusCode => _response.StatusCode;
public bool IsSuccessStatusCode => _response.IsSuccessStatusCode;
public string? ContentType => _response.Content.Headers.ContentType?.MediaType;
public HttpContent Content => _response.Content;
public IconHttpResponse(HttpResponseMessage response, ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IUriService uriService)
{
_response = response;
_logger = logger;
_httpClientFactory = httpClientFactory;
_uriService = uriService;
}
public async Task<IEnumerable<Icon>> RetrieveIconsAsync(Uri requestUri, string domain, IHtmlParser parser)
{
using var htmlStream = await _response.Content.ReadAsStreamAsync();
var head = await parser.ParseHeadAsync(htmlStream);
if (head == null)
{
_logger.LogWarning("No DocumentElement for {domain}.", domain);
return Array.Empty<Icon>();
}
// Make sure uri uses domain name, not ip
var uri = _response.RequestMessage?.RequestUri;
if (uri == null || IPAddress.TryParse(_response.RequestMessage!.RequestUri!.Host, out var _))
{
uri = requestUri;
}
var baseUrl = head.QuerySelector("base[href]")?.Attributes["href"]?.Value;
if (string.IsNullOrWhiteSpace(baseUrl))
{
baseUrl = "/";
}
var links = head.QuerySelectorAll("link[href]")
?.Take(_maxIconLinksProcessed)
.Select(l => new IconLink(l, uri, baseUrl))
.Where(l => l.IsUsable())
.OrderBy(l => l.Priority)
.Take(_maxRetrievedIcons)
.ToArray() ?? Array.Empty<IconLink>();
var results = await Task.WhenAll(links.Select(l => l.FetchAsync(_logger, _httpClientFactory, _uriService)));
return results.Where(r => r != null).Select(r => r!);
}
public void Dispose()
{
_response.Dispose();
}
}

View File

@@ -0,0 +1,220 @@
#nullable enable
using System.Text;
using AngleSharp.Dom;
using Bit.Icons.Extensions;
using Bit.Icons.Services;
namespace Bit.Icons.Models;
public class IconLink
{
private static readonly HashSet<string> _iconRels = new(StringComparer.InvariantCultureIgnoreCase) { "icon", "apple-touch-icon", "shortcut icon" };
private static readonly HashSet<string> _blocklistedRels = new(StringComparer.InvariantCultureIgnoreCase) { "preload", "image_src", "preconnect", "canonical", "alternate", "stylesheet" };
private static readonly HashSet<string> _iconExtensions = new(StringComparer.InvariantCultureIgnoreCase) { ".ico", ".png", ".jpg", ".jpeg" };
private const string _pngMediaType = "image/png";
private static readonly byte[] _pngHeader = new byte[] { 137, 80, 78, 71 };
private static readonly byte[] _webpHeader = Encoding.UTF8.GetBytes("RIFF");
private const string _icoMediaType = "image/x-icon";
private const string _icoAltMediaType = "image/vnd.microsoft.icon";
private static readonly byte[] _icoHeader = new byte[] { 00, 00, 01, 00 };
private const string _jpegMediaType = "image/jpeg";
private static readonly byte[] _jpegHeader = new byte[] { 255, 216, 255 };
private const string _svgXmlMediaType = "image/svg+xml";
private static readonly HashSet<string> _allowedMediaTypes = new(StringComparer.InvariantCultureIgnoreCase)
{
_pngMediaType,
_icoMediaType,
_icoAltMediaType,
_jpegMediaType,
_svgXmlMediaType,
};
private bool _useUriDirectly = false;
private bool _validated = false;
private int? _width;
private int? _height;
public IAttr? Href { get; }
public IAttr? Rel { get; }
public IAttr? Type { get; }
public IAttr? Sizes { get; }
public Uri ParentUri { get; }
public string BaseUrlPath { get; }
public int Priority
{
get
{
if (_width == null || _width != _height)
{
return 200;
}
return _width switch
{
32 => 1,
64 => 2,
>= 24 and <= 128 => 3,
16 => 4,
_ => 100,
};
}
}
public IconLink(Uri parentPage)
{
_useUriDirectly = true;
_validated = true;
ParentUri = parentPage;
BaseUrlPath = parentPage.PathAndQuery;
}
public IconLink(IElement element, Uri parentPage, string baseUrlPath)
{
Href = element.Attributes["href"];
ParentUri = parentPage;
BaseUrlPath = baseUrlPath;
Rel = element.Attributes["rel"];
Type = element.Attributes["type"];
Sizes = element.Attributes["sizes"];
if (!string.IsNullOrWhiteSpace(Sizes?.Value))
{
var sizeParts = Sizes.Value.Split('x');
if (sizeParts.Length == 2 && int.TryParse(sizeParts[0].Trim(), out var width) &&
int.TryParse(sizeParts[1].Trim(), out var height))
{
_width = width;
_height = height;
}
}
}
public bool IsUsable()
{
if (string.IsNullOrWhiteSpace(Href?.Value))
{
return false;
}
if (Rel != null && _iconRels.Contains(Rel.Value))
{
_validated = true;
}
if (Rel == null || !_blocklistedRels.Contains(Rel.Value))
{
try
{
var extension = Path.GetExtension(Href.Value);
if (_iconExtensions.Contains(extension))
{
_validated = true;
}
}
catch (ArgumentException) { }
}
return _validated;
}
/// <summary>
/// Fetches the icon from the Href. Will always fail unless first validated with IsUsable().
/// </summary>
public async Task<Icon?> FetchAsync(ILogger<IIconFetchingService> logger, IHttpClientFactory httpClientFactory, IUriService uriService)
{
if (!_validated)
{
return null;
}
var uri = BuildUri();
if (uri == null)
{
return null;
}
using var response = await IconHttpRequest.FetchAsync(uri, logger, httpClientFactory, uriService);
if (!response.IsSuccessStatusCode)
{
return null;
}
var format = response.Content.Headers.ContentType?.MediaType;
var bytes = await response.Content.ReadAsByteArrayAsync();
response.Content.Dispose();
if (format == null || !_allowedMediaTypes.Contains(format))
{
format = DetermineImageFormatFromFile(bytes);
}
if (format == null || !_allowedMediaTypes.Contains(format))
{
return null;
}
return new Icon { Image = bytes, Format = format };
}
private Uri? BuildUri()
{
if (_useUriDirectly)
{
return ParentUri;
}
if (Href == null)
{
return null;
}
if (Href.Value.StartsWith("//") && Uri.TryCreate($"{ParentUri.Scheme}://{Href.Value[2..]}", UriKind.Absolute, out var uri))
{
return uri;
}
if (Uri.TryCreate(Href.Value, UriKind.Relative, out uri))
{
return new UriBuilder()
{
Scheme = ParentUri.Scheme,
Host = ParentUri.Host,
}.Uri.ConcatPath(BaseUrlPath, uri.OriginalString);
}
if (Uri.TryCreate(Href.Value, UriKind.Absolute, out uri))
{
return uri;
}
return null;
}
private static bool HeaderMatch(byte[] imageBytes, byte[] header)
{
return imageBytes.Length >= header.Length && header.SequenceEqual(imageBytes.Take(header.Length));
}
private static string DetermineImageFormatFromFile(byte[] imageBytes)
{
if (HeaderMatch(imageBytes, _icoHeader))
{
return _icoMediaType;
}
else if (HeaderMatch(imageBytes, _pngHeader) || HeaderMatch(imageBytes, _webpHeader))
{
return _pngMediaType;
}
else if (HeaderMatch(imageBytes, _jpegHeader))
{
return _jpegMediaType;
}
else
{
return string.Empty;
}
}
}

View File

@@ -1,65 +0,0 @@
namespace Bit.Icons.Models;
public class IconResult
{
public IconResult(string href, string sizes)
{
Path = href;
if (!string.IsNullOrWhiteSpace(sizes))
{
var sizeParts = sizes.Split('x');
if (sizeParts.Length == 2 && int.TryParse(sizeParts[0].Trim(), out var width) &&
int.TryParse(sizeParts[1].Trim(), out var height))
{
DefinedWidth = width;
DefinedHeight = height;
if (width == height)
{
if (width == 32)
{
Priority = 1;
}
else if (width == 64)
{
Priority = 2;
}
else if (width >= 24 && width <= 128)
{
Priority = 3;
}
else if (width == 16)
{
Priority = 4;
}
else
{
Priority = 100;
}
}
}
}
if (Priority == 0)
{
Priority = 200;
}
}
public IconResult(Uri uri, byte[] bytes, string format)
{
Path = uri.ToString();
Icon = new Icon
{
Image = bytes,
Format = format
};
Priority = 10;
}
public string Path { get; set; }
public int? DefinedWidth { get; set; }
public int? DefinedHeight { get; set; }
public Icon Icon { get; set; }
public int Priority { get; set; }
}

View File

@@ -0,0 +1,52 @@
#nullable enable
using System.Net;
using Bit.Icons.Extensions;
namespace Bit.Icons.Models;
public class IconUri
{
private readonly IPAddress _ip;
public string Host { get; }
public Uri InnerUri { get; }
public string Scheme => InnerUri.Scheme;
public bool IsValid
{
get
{
// Prevent direct access to any ip
if (IPAddress.TryParse(Host, out _))
{
return false;
}
// Prevent non-http(s) and non-default ports
if ((InnerUri.Scheme != "http" && InnerUri.Scheme != "https") || !InnerUri.IsDefaultPort)
{
return false;
}
// Prevent local hosts (localhost, bobs-pc, etc) and IP addresses
if (!Host.Contains('.') || _ip.IsInternal())
{
return false;
}
return true;
}
}
/// <summary>
/// Represents an ip-validated Uri for use in grabbing an icon.
/// </summary>
/// <param name="uriString"></param>
/// <param name="ip"></param>
public IconUri(Uri uri, IPAddress ip)
{
_ip = ip;
InnerUri = uri.ChangeHost(_ip.ToString());
Host = uri.Host;
}
}