2
* Copyright (C) 2023 KeePassXC Team <team@keepassxc.org>
4
* This program is free software: you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation, either version 2 or (at your option)
7
* version 3 of the License.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
20
#include <QHostAddress>
21
#include <QNetworkCookie>
22
#include <QNetworkCookieJar>
24
#include <QRegularExpression>
27
Q_GLOBAL_STATIC(UrlTools, s_urlTools)
29
UrlTools* UrlTools::instance()
34
QUrl UrlTools::convertVariantToUrl(const QVariant& var) const
37
if (var.canConvert<QUrl>()) {
43
#if defined(WITH_XC_NETWORKING) || defined(WITH_XC_BROWSER)
44
QUrl UrlTools::getRedirectTarget(QNetworkReply* reply) const
46
QVariant var = reply->attribute(QNetworkRequest::RedirectionTargetAttribute);
47
QUrl url = convertVariantToUrl(var);
52
* Gets the base domain of URL or hostname.
54
* Returns the base domain, e.g. https://another.example.co.uk -> example.co.uk
55
* Up-to-date list can be found: https://publicsuffix.org/list/public_suffix_list.dat
57
QString UrlTools::getBaseDomainFromUrl(const QString& url) const
59
auto qUrl = QUrl::fromUserInput(url);
61
auto host = qUrl.host();
62
if (isIpAddress(host)) {
66
const auto tld = getTopLevelDomainFromUrl(qUrl.toString());
67
if (tld.isEmpty() || tld.length() + 1 >= host.length()) {
71
// Remove the top level domain part from the hostname, e.g. https://another.example.co.uk -> https://another.example
72
host.chop(tld.length() + 1);
73
// Split the URL and select the last part, e.g. https://another.example -> example
74
QString baseDomain = host.split('.').last();
75
// Append the top level domain back to the URL, e.g. example -> example.co.uk
76
baseDomain.append(QString(".%1").arg(tld));
82
* Gets the top level domain from URL.
84
* Returns the TLD e.g. https://another.example.co.uk -> co.uk
86
QString UrlTools::getTopLevelDomainFromUrl(const QString& url) const
88
auto host = QUrl::fromUserInput(url).host();
89
if (isIpAddress(host)) {
93
const auto numberOfDomainParts = host.split('.').length();
94
static const auto dummy = QByteArrayLiteral("");
96
// Only loop the amount of different parts found
97
for (auto i = 0; i < numberOfDomainParts; ++i) {
98
// Cut the first part from host
99
host = host.mid(host.indexOf('.') + 1);
101
QNetworkCookie cookie(dummy, dummy);
102
cookie.setDomain(host);
104
// Check if dummy cookie's domain/TLD matches with public suffix list
105
if (!QNetworkCookieJar{}.setCookiesFromUrl(QList{cookie}, QUrl::fromUserInput(url))) {
113
bool UrlTools::isIpAddress(const QString& host) const
115
// Handle IPv6 host with brackets, e.g [::1]
116
const auto hostAddress = host.startsWith('[') && host.endsWith(']') ? host.mid(1, host.length() - 2) : host;
117
QHostAddress address(hostAddress);
118
return address.protocol() == QAbstractSocket::IPv4Protocol || address.protocol() == QAbstractSocket::IPv6Protocol;
122
// Returns true if URLs are identical. Paths with "/" are removed during comparison.
123
// URLs without scheme reverts to https.
124
// Special handling is needed because QUrl::matches() with QUrl::StripTrailingSlash does not strip "/" paths.
125
bool UrlTools::isUrlIdentical(const QString& first, const QString& second) const
127
auto trimUrl = [](QString url) {
129
if (url.endsWith("/")) {
130
url.remove(url.length() - 1, 1);
136
if (first.isEmpty() || second.isEmpty()) {
140
const auto firstUrl = trimUrl(first);
141
const auto secondUrl = trimUrl(second);
142
if (firstUrl == secondUrl) {
146
return QUrl(firstUrl).matches(QUrl(secondUrl), QUrl::StripTrailingSlash);
149
bool UrlTools::isUrlValid(const QString& urlField) const
151
if (urlField.isEmpty() || urlField.startsWith("cmd://", Qt::CaseInsensitive)
152
|| urlField.startsWith("kdbx://", Qt::CaseInsensitive) || urlField.startsWith("{REF:A", Qt::CaseInsensitive)) {
157
if (urlField.contains("://")) {
160
url = QUrl::fromUserInput(urlField);
163
if (url.scheme() != "file" && url.host().isEmpty()) {
167
// Check for illegal characters. Adds also the wildcard * to the list
168
QRegularExpression re("[<>\\^`{|}\\*]");
169
auto match = re.match(urlField);
170
if (match.hasMatch()) {
177
bool UrlTools::domainHasIllegalCharacters(const QString& domain) const
179
QRegularExpression re(R"([\s\^#|/:<>\?@\[\]\\])");
180
return re.match(domain).hasMatch();