'use strict'; /** * Check if `vhost` is a valid suffix of `hostname` (top-domain) * * It means that `vhost` needs to be a suffix of `hostname` and we then need to * make sure that: either they are equal, or the character preceding `vhost` in * `hostname` is a '.' (it should not be a partial label). * * * hostname = 'not.evil.com' and vhost = 'vil.com' => not ok * * hostname = 'not.evil.com' and vhost = 'evil.com' => ok * * hostname = 'not.evil.com' and vhost = 'not.evil.com' => ok */ function shareSameDomainSuffix(hostname, vhost) { if (hostname.endsWith(vhost)) { return (hostname.length === vhost.length || hostname[hostname.length - vhost.length - 1] === '.'); } return false; } /** * Given a hostname and its public suffix, extract the general domain. */ function extractDomainWithSuffix(hostname, publicSuffix) { // Locate the index of the last '.' in the part of the `hostname` preceding // the public suffix. // // examples: // 1. not.evil.co.uk => evil.co.uk // ^ ^ // | | start of public suffix // | index of the last dot // // 2. example.co.uk => example.co.uk // ^ ^ // | | start of public suffix // | // | (-1) no dot found before the public suffix const publicSuffixIndex = hostname.length - publicSuffix.length - 2; const lastDotBeforeSuffixIndex = hostname.lastIndexOf('.', publicSuffixIndex); // No '.' found, then `hostname` is the general domain (no sub-domain) if (lastDotBeforeSuffixIndex === -1) { return hostname; } // Extract the part between the last '.' return hostname.slice(lastDotBeforeSuffixIndex + 1); } /** * Detects the domain based on rules and upon and a host string */ function getDomain(suffix, hostname, options) { // Check if `hostname` ends with a member of `validHosts`. if (options.validHosts !== null) { const validHosts = options.validHosts; for (const vhost of validHosts) { if ( /*@__INLINE__*/shareSameDomainSuffix(hostname, vhost)) { return vhost; } } } let numberOfLeadingDots = 0; if (hostname.startsWith('.')) { while (numberOfLeadingDots < hostname.length && hostname[numberOfLeadingDots] === '.') { numberOfLeadingDots += 1; } } // If `hostname` is a valid public suffix, then there is no domain to return. // Since we already know that `getPublicSuffix` returns a suffix of `hostname` // there is no need to perform a string comparison and we only compare the // size. if (suffix.length === hostname.length - numberOfLeadingDots) { return null; } // To extract the general domain, we start by identifying the public suffix // (if any), then consider the domain to be the public suffix with one added // level of depth. (e.g.: if hostname is `not.evil.co.uk` and public suffix: // `co.uk`, then we take one more level: `evil`, giving the final result: // `evil.co.uk`). return /*@__INLINE__*/ extractDomainWithSuffix(hostname, suffix); } /** * Return the part of domain without suffix. * * Example: for domain 'foo.com', the result would be 'foo'. */ function getDomainWithoutSuffix(domain, suffix) { // Note: here `domain` and `suffix` cannot have the same length because in // this case we set `domain` to `null` instead. It is thus safe to assume // that `suffix` is shorter than `domain`. return domain.slice(0, -suffix.length - 1); } /** * @param url - URL we want to extract a hostname from. * @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname. */ function extractHostname(url, urlIsValidHostname) { let start = 0; let end = url.length; let hasUpper = false; // If url is not already a valid hostname, then try to extract hostname. if (!urlIsValidHostname) { // Special handling of data URLs if (url.startsWith('data:')) { return null; } // Trim leading spaces while (start < url.length && url.charCodeAt(start) <= 32) { start += 1; } // Trim trailing spaces while (end > start + 1 && url.charCodeAt(end - 1) <= 32) { end -= 1; } // Skip scheme. if (url.charCodeAt(start) === 47 /* '/' */ && url.charCodeAt(start + 1) === 47 /* '/' */) { start += 2; } else { const indexOfProtocol = url.indexOf(':/', start); if (indexOfProtocol !== -1) { // Implement fast-path for common protocols. We expect most protocols // should be one of these 4 and thus we will not need to perform the // more expansive validity check most of the time. const protocolSize = indexOfProtocol - start; const c0 = url.charCodeAt(start); const c1 = url.charCodeAt(start + 1); const c2 = url.charCodeAt(start + 2); const c3 = url.charCodeAt(start + 3); const c4 = url.charCodeAt(start + 4); if (protocolSize === 5 && c0 === 104 /* 'h' */ && c1 === 116 /* 't' */ && c2 === 116 /* 't' */ && c3 === 112 /* 'p' */ && c4 === 115 /* 's' */) ; else if (protocolSize === 4 && c0 === 104 /* 'h' */ && c1 === 116 /* 't' */ && c2 === 116 /* 't' */ && c3 === 112 /* 'p' */) ; else if (protocolSize === 3 && c0 === 119 /* 'w' */ && c1 === 115 /* 's' */ && c2 === 115 /* 's' */) ; else if (protocolSize === 2 && c0 === 119 /* 'w' */ && c1 === 115 /* 's' */) ; else { // Check that scheme is valid for (let i = start; i < indexOfProtocol; i += 1) { const lowerCaseCode = url.charCodeAt(i) | 32; if (!(((lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z] (lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9] lowerCaseCode === 46 || // '.' lowerCaseCode === 45 || // '-' lowerCaseCode === 43) // '+' )) { return null; } } } // Skip 0, 1 or more '/' after ':/' start = indexOfProtocol + 2; while (url.charCodeAt(start) === 47 /* '/' */) { start += 1; } } } // Detect first occurrence of '/', '?' or '#'. We also keep track of the // last occurrence of '@', ']' or ':' to speed-up subsequent parsing of // (respectively), identifier, ipv6 or port. let indexOfIdentifier = -1; let indexOfClosingBracket = -1; let indexOfPort = -1; for (let i = start; i < end; i += 1) { const code = url.charCodeAt(i); if (code === 35 || // '#' code === 47 || // '/' code === 63 // '?' ) { end = i; break; } else if (code === 64) { // '@' indexOfIdentifier = i; } else if (code === 93) { // ']' indexOfClosingBracket = i; } else if (code === 58) { // ':' indexOfPort = i; } else if (code >= 65 && code <= 90) { hasUpper = true; } } // Detect identifier: '@' if (indexOfIdentifier !== -1 && indexOfIdentifier > start && indexOfIdentifier < end) { start = indexOfIdentifier + 1; } // Handle ipv6 addresses if (url.charCodeAt(start) === 91 /* '[' */) { if (indexOfClosingBracket !== -1) { return url.slice(start + 1, indexOfClosingBracket).toLowerCase(); } return null; } else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) { // Detect port: ':' end = indexOfPort; } } // Trim trailing dots while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) { end -= 1; } const hostname = start !== 0 || end !== url.length ? url.slice(start, end) : url; if (hasUpper) { return hostname.toLowerCase(); } return hostname; } /** * Check if a hostname is an IP. You should be aware that this only works * because `hostname` is already garanteed to be a valid hostname! */ function isProbablyIpv4(hostname) { // Cannot be shorted than 1.1.1.1 if (hostname.length < 7) { return false; } // Cannot be longer than: 255.255.255.255 if (hostname.length > 15) { return false; } let numberOfDots = 0; for (let i = 0; i < hostname.length; i += 1) { const code = hostname.charCodeAt(i); if (code === 46 /* '.' */) { numberOfDots += 1; } else if (code < 48 /* '0' */ || code > 57 /* '9' */) { return false; } } return (numberOfDots === 3 && hostname.charCodeAt(0) !== 46 /* '.' */ && hostname.charCodeAt(hostname.length - 1) !== 46 /* '.' */); } /** * Similar to isProbablyIpv4. */ function isProbablyIpv6(hostname) { if (hostname.length < 3) { return false; } let start = hostname.startsWith('[') ? 1 : 0; let end = hostname.length; if (hostname[end - 1] === ']') { end -= 1; } // We only consider the maximum size of a normal IPV6. Note that this will // fail on so-called "IPv4 mapped IPv6 addresses" but this is a corner-case // and a proper validation library should be used for these. if (end - start > 39) { return false; } let hasColon = false; for (; start < end; start += 1) { const code = hostname.charCodeAt(start); if (code === 58 /* ':' */) { hasColon = true; } else if (!(((code >= 48 && code <= 57) || // 0-9 (code >= 97 && code <= 102) || // a-f (code >= 65 && code <= 90)) // A-F )) { return false; } } return hasColon; } /** * Check if `hostname` is *probably* a valid ip addr (either ipv6 or ipv4). * This *will not* work on any string. We need `hostname` to be a valid * hostname. */ function isIp(hostname) { return isProbablyIpv6(hostname) || isProbablyIpv4(hostname); } /** * Implements fast shallow verification of hostnames. This does not perform a * struct check on the content of labels (classes of Unicode characters, etc.) * but instead check that the structure is valid (number of labels, length of * labels, etc.). * * If you need stricter validation, consider using an external library. */ function isValidAscii(code) { return ((code >= 97 && code <= 122) || (code >= 48 && code <= 57) || code > 127); } /** * Check if a hostname string is valid. It's usually a preliminary check before * trying to use getDomain or anything else. * * Beware: it does not check if the TLD exists. */ function isValidHostname (hostname) { if (hostname.length > 255) { return false; } if (hostname.length === 0) { return false; } if ( /*@__INLINE__*/ !isValidAscii(hostname.charCodeAt(0)) && hostname.charCodeAt(0) !== 46 && // '.' (dot) hostname.charCodeAt(0) !== 95 // '_' (underscore) ) { return false; } // Validate hostname according to RFC let lastDotIndex = -1; let lastCharCode = -1; const len = hostname.length; for (let i = 0; i < len; i += 1) { const code = hostname.charCodeAt(i); if (code === 46 /* '.' */) { if ( // Check that previous label is < 63 bytes long (64 = 63 + '.') i - lastDotIndex > 64 || // Check that previous character was not already a '.' lastCharCode === 46 || // Check that the previous label does not end with a '-' (dash) lastCharCode === 45 || // Check that the previous label does not end with a '_' (underscore) lastCharCode === 95) { return false; } lastDotIndex = i; } else if (!( /*@__INLINE__*/(isValidAscii(code) || code === 45 || code === 95))) { // Check if there is a forbidden character in the label return false; } lastCharCode = code; } return ( // Check that last label is shorter than 63 chars len - lastDotIndex - 1 <= 63 && // Check that the last character is an allowed trailing label character. // Since we already checked that the char is a valid hostname character, // we only need to check that it's different from '-'. lastCharCode !== 45); } function setDefaultsImpl({ allowIcannDomains = true, allowPrivateDomains = false, detectIp = true, extractHostname = true, mixedInputs = true, validHosts = null, validateHostname = true, }) { return { allowIcannDomains, allowPrivateDomains, detectIp, extractHostname, mixedInputs, validHosts, validateHostname, }; } const DEFAULT_OPTIONS = /*@__INLINE__*/ setDefaultsImpl({}); function setDefaults(options) { if (options === undefined) { return DEFAULT_OPTIONS; } return /*@__INLINE__*/ setDefaultsImpl(options); } /** * Returns the subdomain of a hostname string */ function getSubdomain(hostname, domain) { // If `hostname` and `domain` are the same, then there is no sub-domain if (domain.length === hostname.length) { return ''; } return hostname.slice(0, -domain.length - 1); } /** * Implement a factory allowing to plug different implementations of suffix * lookup (e.g.: using a trie or the packed hashes datastructures). This is used * and exposed in `tldts.ts` and `tldts-experimental.ts` bundle entrypoints. */ function getEmptyResult() { return { domain: null, domainWithoutSuffix: null, hostname: null, isIcann: null, isIp: null, isPrivate: null, publicSuffix: null, subdomain: null, }; } function resetResult(result) { result.domain = null; result.domainWithoutSuffix = null; result.hostname = null; result.isIcann = null; result.isIp = null; result.isPrivate = null; result.publicSuffix = null; result.subdomain = null; } function parseImpl(url, step, suffixLookup, partialOptions, result) { const options = /*@__INLINE__*/ setDefaults(partialOptions); // Very fast approximate check to make sure `url` is a string. This is needed // because the library will not necessarily be used in a typed setup and // values of arbitrary types might be given as argument. if (typeof url !== 'string') { return result; } // Extract hostname from `url` only if needed. This can be made optional // using `options.extractHostname`. This option will typically be used // whenever we are sure the inputs to `parse` are already hostnames and not // arbitrary URLs. // // `mixedInput` allows to specify if we expect a mix of URLs and hostnames // as input. If only hostnames are expected then `extractHostname` can be // set to `false` to speed-up parsing. If only URLs are expected then // `mixedInputs` can be set to `false`. The `mixedInputs` is only a hint // and will not change the behavior of the library. if (!options.extractHostname) { result.hostname = url; } else if (options.mixedInputs) { result.hostname = extractHostname(url, isValidHostname(url)); } else { result.hostname = extractHostname(url, false); } if (step === 0 /* FLAG.HOSTNAME */ || result.hostname === null) { return result; } // Check if `hostname` is a valid ip address if (options.detectIp) { result.isIp = isIp(result.hostname); if (result.isIp) { return result; } } // Perform optional hostname validation. If hostname is not valid, no need to // go further as there will be no valid domain or sub-domain. if (options.validateHostname && options.extractHostname && !isValidHostname(result.hostname)) { result.hostname = null; return result; } // Extract public suffix suffixLookup(result.hostname, options, result); if (step === 2 /* FLAG.PUBLIC_SUFFIX */ || result.publicSuffix === null) { return result; } // Extract domain result.domain = getDomain(result.publicSuffix, result.hostname, options); if (step === 3 /* FLAG.DOMAIN */ || result.domain === null) { return result; } // Extract subdomain result.subdomain = getSubdomain(result.hostname, result.domain); if (step === 4 /* FLAG.SUB_DOMAIN */) { return result; } // Extract domain without suffix result.domainWithoutSuffix = getDomainWithoutSuffix(result.domain, result.publicSuffix); return result; } function fastPath (hostname, options, out) { // Fast path for very popular suffixes; this allows to by-pass lookup // completely as well as any extra allocation or string manipulation. if (!options.allowPrivateDomains && hostname.length > 3) { const last = hostname.length - 1; const c3 = hostname.charCodeAt(last); const c2 = hostname.charCodeAt(last - 1); const c1 = hostname.charCodeAt(last - 2); const c0 = hostname.charCodeAt(last - 3); if (c3 === 109 /* 'm' */ && c2 === 111 /* 'o' */ && c1 === 99 /* 'c' */ && c0 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'com'; return true; } else if (c3 === 103 /* 'g' */ && c2 === 114 /* 'r' */ && c1 === 111 /* 'o' */ && c0 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'org'; return true; } else if (c3 === 117 /* 'u' */ && c2 === 100 /* 'd' */ && c1 === 101 /* 'e' */ && c0 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'edu'; return true; } else if (c3 === 118 /* 'v' */ && c2 === 111 /* 'o' */ && c1 === 103 /* 'g' */ && c0 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'gov'; return true; } else if (c3 === 116 /* 't' */ && c2 === 101 /* 'e' */ && c1 === 110 /* 'n' */ && c0 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'net'; return true; } else if (c3 === 101 /* 'e' */ && c2 === 100 /* 'd' */ && c1 === 46 /* '.' */) { out.isIcann = true; out.isPrivate = false; out.publicSuffix = 'de'; return true; } } return false; } exports.fastPathLookup = fastPath; exports.getEmptyResult = getEmptyResult; exports.parseImpl = parseImpl; exports.resetResult = resetResult; exports.setDefaults = setDefaults; //# sourceMappingURL=index.js.map