Skip to content

Commit

Permalink
Adding new mwthods to UriString
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Dec 27, 2024
1 parent 97d13ab commit 3e15f5e
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 86 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"benchmark": "phpbench run --report=default",
"phpcs": "PHP_CS_FIXER_IGNORE_ENV=1 php-cs-fixer fix -v --diff --dry-run --allow-risky=yes --ansi",
"phpcs:fix": "php-cs-fixer fix -vvv --allow-risky=yes --ansi",
"phpstan": "phpstan analyse -l max -c phpstan.neon --ansi --memory-limit=256M",
"phpstan": "phpstan analyse -l max -c phpstan.neon --ansi --memory-limit=512M",
"phpunit": "XDEBUG_MODE=coverage phpunit --coverage-text",
"phpunit:min": "phpunit --no-coverage",
"test": [
Expand Down
56 changes: 56 additions & 0 deletions docs/interfaces/7.0/uri-parser-builder.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The class act as a drop-in replacement for PHP's `parse_url` feature.
## URI parsing

~~~php
UriString::resolve(string $uri, ?string $baseUri = null): array
UriString::parse(string $uri): array
UriString::parseAuthority(string $autority): array
~~~
Expand Down Expand Up @@ -67,6 +68,25 @@ var_export(UriString::parse('http:www.example.com'));
<p class="message-warning">This invalid HTTP URI is successfully parsed.</p>
<p class="message-notice">The class also exposes a <code>UriString::parseAuthority</code> you can use to parse an authority string.</p>

If you need to resolve your URI in the context of a Base URI the `resolve` public static method will let you
do just that. The method expect either a full URI as its single parameter or a relative URI following by
a base URI which must be absolute, the URI will then be resolved using the base URI.

```php
$components = UriString::resolve('"/foo", "https://example.com");
//returns the following array
//array(
// 'scheme' => 'https',
// 'user' => null,
// 'pass' => null,
// 'host' => 'example.com'',
// 'port' => null,
// 'path' => '/foo',
// 'query' => null,
// 'fragment' => null,
//);
```

## URI Building

~~~php
Expand Down Expand Up @@ -99,3 +119,39 @@ echo UriString::build($components); //displays http://hello:[email protected][email protected]
The `build` method provides similar functionality to the `http_build_url()` function from v1.x of the [`pecl_http`](https://pecl.php.net/package/pecl_http) PECL extension.

<p class="message-notice">The class also exposes a <code>UriString::buildAuthority</code> you can use to build an authority from its hash representation.</p>

## URI Normalization

It is possible to normalize a URI against the RFC3986 rules using the `UriString::normalize` method.
The method expects a string and will return the same array as `UriString::parse` but each component will
have been normalized.

```php
use League\Uri\UriString;

$parsed = UriString::parse("https://EXAMPLE.COM/foo/../bar");
//returns the following array
//array(
// 'scheme' => 'http',
// 'user' => null,
// 'pass' => null,
// 'host' => 'EXAMPLE.COM',
// 'port' => null,
// 'path' => '/foo/../bar',
// 'query' => null,
// 'fragment' => null,
//);

$normalized = UriString::normalize("https://EXAMPLE.COM/foo/../bar");
//returns the following array
//array(
// 'scheme' => 'http',
// 'user' => null,
// 'pass' => null,
// 'host' => 'example.com',
// 'port' => null,
// 'path' => '/bar',
// 'query' => null,
// 'fragment' => null,
//);
```
3 changes: 0 additions & 3 deletions docs/uri/7.0/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ install the `fileinfo` extension otherwise an exception will be thrown.
To convert a URI into an HTML anchor tag you need to have the `ext-dom` extension
installed in your system.

To enable URI normalization, the `ext-mbstring` extension or a polyfill
like `symfony/polyfill-mbstring` must be present in your system.

Installation
--------

Expand Down
3 changes: 3 additions & 0 deletions interfaces/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ All Notable changes to `League\Uri\Interfaces` will be documented in this file
- `UriInterface::toNormalizedString`
- `UriInterface::getUser`
- `League\Uri\IPv6\Converter::isIpv6`
- `UriString::resolve`
- `UriString::removeDotSegments`
- `UriString::normalize`

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion interfaces/Idna/Converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public static function toUnicode(Stringable|string $domain, Option|int|null $opt
$domain = rawurldecode((string) $domain);

if (false === stripos($domain, 'xn--')) {
return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
return Result::fromIntl(['result' => strtolower($domain), 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
}

FeatureDetection::supportsIdn();
Expand Down
183 changes: 183 additions & 0 deletions interfaces/UriString.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@
use League\Uri\Exceptions\MissingFeature;
use League\Uri\Exceptions\SyntaxError;
use League\Uri\Idna\Converter;
use League\Uri\Idna\Converter as IdnaConverter;
use League\Uri\IPv6\Converter as IPv6Converter;
use Stringable;

use function array_merge;
use function array_pop;
use function array_reduce;
use function end;
use function explode;
use function filter_var;
use function implode;
use function inet_pton;
use function preg_match;
use function preg_replace_callback;
use function rawurldecode;
use function sprintf;
use function strpos;
Expand Down Expand Up @@ -159,6 +166,16 @@ final class UriString
*/
private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/';

/**
* Unreserved characters.
*
* @see https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3
*/
private const REGEXP_UNRESERVED_CHARACTERS = ',%(2[1-9A-Fa-f]|[3-7][0-9A-Fa-f]|61|62|64|65|66|7[AB]|5F),';

/** @var array<string,int> */
private const DOT_SEGMENTS = ['.' => 1, '..' => 1];

/**
* Only the address block fe80::/10 can have a Zone ID attach to
* let's detect the link local significant 10 bits.
Expand Down Expand Up @@ -262,6 +279,172 @@ public static function buildAuthority(array $components): ?string
return $components['user'].':'.$components['pass'].$authority;
}

/**
* Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
*
* @throws SyntaxError if the URI is not parsable
*
* @return ComponentMap
*/
public static function normalize(Stringable|string $uri): array
{
$components = UriString::parse($uri);
if (null !== $components['scheme']) {
$components['scheme'] = strtolower($components['scheme']);
}

if (null !== $components['host']) {
$components['host'] = IdnaConverter::toUnicode((string)IPv6Converter::compress($components['host']))->domain();
}

$path = $components['path'];
if ('/' === ($path[0] ?? '') || '' !== $components['scheme'].self::buildAuthority($components)) {
$path = self::removeDotSegments($path);
}

$path = (string) self::decodeUnreservedCharacters($path);
if (null !== self::buildAuthority($components) && '' === $path) {
$path = '/';
}

$components['path'] = $path;
$components['query'] = (string) self::decodeUnreservedCharacters($components['query']);
$components['fragment'] = (string) self::decodeUnreservedCharacters($components['fragment']);
$components['user'] = self::decodeUnreservedCharacters($components['user']);
$components['pass'] = self::decodeUnreservedCharacters($components['pass']);

return $components;
}

private static function decodeUnreservedCharacters(?string $str): ?string
{
return match (true) {
null === $str,
'' === $str => $str,
default => preg_replace_callback(
self::REGEXP_UNRESERVED_CHARACTERS,
static fn (array $matches): string => rawurldecode($matches[0]),
$str
) ?? '',
};
}

/**
* Resolves a URI against a base URI using RFC3986 rules.
*
* This method MUST retain the state of the submitted URI instance, and return
* a URI instance of the same type that contains the applied modifications.
*
* This method MUST be transparent when dealing with error and exceptions.
* It MUST not alter or silence them apart from validating its own parameters.
*
* @see https://www.rfc-editor.org/rfc/rfc3986.html#section-5
*
* @throws SyntaxError if the BaseUri is not absolute or in absence of a BaseUri if the uri is not absolute
*
* @return ComponentMap
*/
public static function resolve(Stringable|string $uri, Stringable|string|null $baseUri = null): array
{
$uri = self::parse($uri);
$baseUri = null !== $baseUri ? self::parse($baseUri) : $uri;
if (null === $baseUri['scheme']) {
throw new SyntaxError('The base URI must be an absolute URI or null; If the base URI is null the URI must be an absolute URI.');
}

if (null !== $uri['scheme'] && '' !== $uri['scheme']) {
$uri['path'] = self::removeDotSegments($uri['path']);

return $uri;
}

if (null !== self::buildAuthority($uri)) {
$uri['scheme'] = $baseUri['scheme'];
$uri['path'] = self::removeDotSegments($uri['path']);

return $uri;
}

[$path, $query] = self::resolvePathAndQuery($uri, $baseUri);
$path = UriString::removeDotSegments($path);
if ('' !== $path && '/' !== $path[0] && null !== self::buildAuthority($baseUri)) {
$path = '/'.$path;
}

$baseUri['path'] = $path;
$baseUri['query'] = $query;
$baseUri['fragment'] = $uri['fragment'];

return $baseUri;
}

/**
* Remove dot segments from the URI path as per RFC specification.
*/
public static function removeDotSegments(Stringable|string $path): string
{
$path = (string) $path;
if (!str_contains($path, '.')) {
return $path;
}

$reducer = function (array $carry, string $segment): array {
if ('..' === $segment) {
array_pop($carry);

return $carry;
}

if (!isset(static::DOT_SEGMENTS[$segment])) {
$carry[] = $segment;
}

return $carry;
};

$oldSegments = explode('/', $path);
$newPath = implode('/', array_reduce($oldSegments, $reducer(...), []));
if (isset(static::DOT_SEGMENTS[end($oldSegments)])) {
$newPath .= '/';
}

return $newPath;
}

/**
* Resolves an URI path and query component.
*
* @param ComponentMap $uri
* @param ComponentMap $baseUri
*
* @return array{0:string, 1:string|null}
*/
private static function resolvePathAndQuery(array $uri, array $baseUri): array
{
if (str_starts_with($uri['path'], '/')) {
return [$uri['path'], $uri['query']];
}

if ('' === $uri['path']) {
return [$baseUri['path'], $uri['query'] ?? $baseUri['query']];
}

$targetPath = $uri['path'];
if (null !== self::buildAuthority($baseUri) && '' === $baseUri['path']) {
$targetPath = '/'.$targetPath;
}

if ('' !== $baseUri['path']) {
$segments = explode('/', $baseUri['path']);
array_pop($segments);
if ([] !== $segments) {
$targetPath = implode('/', $segments).'/'.$targetPath;
}
}

return [$targetPath, $uri['query']];
}

/**
* Parse a URI string into its components.
*
Expand Down
53 changes: 53 additions & 0 deletions interfaces/UriStringTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

final class UriStringTest extends TestCase
{
private const BASE_URI = 'http://a/b/c/d;p?q';

#[DataProvider('validUriProvider')]
public function testParseSucced(Stringable|string|int $uri, array $expected): void
{
Expand Down Expand Up @@ -974,4 +976,55 @@ public static function buildUriProvider(): array
],
];
}

#[DataProvider('resolveProvider')]
public function testCreateResolve(string $baseUri, string $uri, string $expected): void
{
self::assertSame($expected, UriString::build(UriString::resolve($uri, $baseUri)));
}

public static function resolveProvider(): array
{
return [
'base uri' => [self::BASE_URI, '', self::BASE_URI],
'scheme' => [self::BASE_URI, 'http://d/e/f', 'http://d/e/f'],
'path 1' => [self::BASE_URI, 'g', 'http://a/b/c/g'],
'path 2' => [self::BASE_URI, './g', 'http://a/b/c/g'],
'path 3' => [self::BASE_URI, 'g/', 'http://a/b/c/g/'],
'path 4' => [self::BASE_URI, '/g', 'http://a/g'],
'authority' => [self::BASE_URI, '//g', 'http://g'],
'query' => [self::BASE_URI, '?y', 'http://a/b/c/d;p?y'],
'path + query' => [self::BASE_URI, 'g?y', 'http://a/b/c/g?y'],
'fragment' => [self::BASE_URI, '#s', 'http://a/b/c/d;p?q#s'],
'path + fragment' => [self::BASE_URI, 'g#s', 'http://a/b/c/g#s'],
'path + query + fragment' => [self::BASE_URI, 'g?y#s', 'http://a/b/c/g?y#s'],
'single dot 1' => [self::BASE_URI, '.', 'http://a/b/c/'],
'single dot 2' => [self::BASE_URI, './', 'http://a/b/c/'],
'single dot 3' => [self::BASE_URI, './g/.', 'http://a/b/c/g/'],
'single dot 4' => [self::BASE_URI, 'g/./h', 'http://a/b/c/g/h'],
'double dot 1' => [self::BASE_URI, '..', 'http://a/b/'],
'double dot 2' => [self::BASE_URI, '../', 'http://a/b/'],
'double dot 3' => [self::BASE_URI, '../g', 'http://a/b/g'],
'double dot 4' => [self::BASE_URI, '../..', 'http://a/'],
'double dot 5' => [self::BASE_URI, '../../', 'http://a/'],
'double dot 6' => [self::BASE_URI, '../../g', 'http://a/g'],
'double dot 7' => [self::BASE_URI, '../../../g', 'http://a/g'],
'double dot 8' => [self::BASE_URI, '../../../../g', 'http://a/g'],
'double dot 9' => [self::BASE_URI, 'g/../h' , 'http://a/b/c/h'],
'mulitple slashes' => [self::BASE_URI, 'foo////g', 'http://a/b/c/foo////g'],
'complex path 1' => [self::BASE_URI, ';x', 'http://a/b/c/;x'],
'complex path 2' => [self::BASE_URI, 'g;x', 'http://a/b/c/g;x'],
'complex path 3' => [self::BASE_URI, 'g;x?y#s', 'http://a/b/c/g;x?y#s'],
'complex path 4' => [self::BASE_URI, 'g;x=1/./y', 'http://a/b/c/g;x=1/y'],
'complex path 5' => [self::BASE_URI, 'g;x=1/../y', 'http://a/b/c/y'],
'dot segments presence 1' => [self::BASE_URI, '/./g', 'http://a/g'],
'dot segments presence 2' => [self::BASE_URI, '/../g', 'http://a/g'],
'dot segments presence 3' => [self::BASE_URI, 'g.', 'http://a/b/c/g.'],
'dot segments presence 4' => [self::BASE_URI, '.g', 'http://a/b/c/.g'],
'dot segments presence 5' => [self::BASE_URI, 'g..', 'http://a/b/c/g..'],
'dot segments presence 6' => [self::BASE_URI, '..g', 'http://a/b/c/..g'],
'origin uri without path' => ['http://h:b@a', 'b/../y', 'http://h:b@a/y'],
'not same origin' => [self::BASE_URI, 'ftp://a/b/c/d', 'ftp://a/b/c/d'],
];
}
}
Loading

0 comments on commit 3e15f5e

Please sign in to comment.