Skip to content

Commit 0b3cfd8

Browse files
committed
uriparser improvements
1 parent cc46d1f commit 0b3cfd8

File tree

11 files changed

+451
-303
lines changed

11 files changed

+451
-303
lines changed

ext/uri/config.m4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of urip
1212
AC_DEFINE([URI_NO_UNICODE], [1], [Define to 1 for disabling unicode support of uriparser.])
1313

1414
URIPARSER_DIR="uriparser"
15-
URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriEscape.c \
16-
$URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
15+
URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriCopy.c \
16+
$URIPARSER_DIR/src/UriEscape.c $URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
1717
$URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \
1818
$URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \
1919
$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c"

ext/uri/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri
55
ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD");
66

77
ADD_EXTENSION_DEP('uri', 'lexbor');
8-
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri");
8+
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri");
99
PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h php_uriparser.h uriparser/src uriparser/include");

ext/uri/php_uri_common.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,11 @@ static void uri_write_component_ex(INTERNAL_FUNCTION_PARAMETERS, uri_property_na
9090
ZEND_ASSERT(property_handler != NULL);
9191

9292
zend_object *new_object = uri_clone_obj_handler(Z_OBJ_P(ZEND_THIS));
93-
if (UNEXPECTED(EG(exception) != NULL)) {
94-
zend_object_release(new_object);
95-
RETURN_THROWS();
96-
}
93+
ZEND_ASSERT(new_object != NULL);
9794

9895
uri_internal_t *new_internal_uri = uri_internal_from_obj(new_object);
9996
URI_ASSERT_INITIALIZATION(new_internal_uri);
100-
if (property_handler->write_func == NULL) {
97+
if (UNEXPECTED(property_handler->write_func == NULL)) {
10198
zend_readonly_property_modification_error_ex(ZSTR_VAL(Z_OBJ_P(ZEND_THIS)->ce->name),
10299
ZSTR_VAL(get_known_string_by_property_name(property_name)));
103100
zend_object_release(new_object);
@@ -106,7 +103,7 @@ static void uri_write_component_ex(INTERNAL_FUNCTION_PARAMETERS, uri_property_na
106103

107104
zval errors;
108105
ZVAL_UNDEF(&errors);
109-
if (property_handler->write_func(new_internal_uri, property_zv, &errors) == FAILURE) {
106+
if (UNEXPECTED(property_handler->write_func(new_internal_uri, property_zv, &errors) == FAILURE)) {
110107
zval_ptr_dtor(&errors);
111108
zend_object_release(new_object);
112109
RETURN_THROWS();

ext/uri/php_uriparser.c

Lines changed: 31 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -28,25 +28,21 @@ static inline size_t get_text_range_length(const UriTextRangeA *range)
2828
return range->afterLast - range->first;
2929
}
3030

31-
static UriUriA *uriparser_copy_uri(UriUriA *uriparser_uri) // TODO add to uriparser
31+
static UriUriA *uriparser_copy_uri(UriUriA *uriparser_uri)
3232
{
33+
ZEND_ASSERT(uriparser_uri != NULL);
34+
3335
UriUriA *new_uriparser_uri = emalloc(sizeof(UriUriA));
3436

35-
if (uriCopyUriA(new_uriparser_uri, uriparser_uri) != URI_SUCCESS) {
36-
efree(new_uriparser_uri);
37-
return NULL; /* TODO check for null on call sites */
38-
}
37+
int result = uriCopyUriA(new_uriparser_uri, uriparser_uri);
38+
ZEND_ASSERT(result == URI_SUCCESS && new_uriparser_uri != NULL);
3939

4040
return new_uriparser_uri;
4141
}
4242

43-
static zend_result uriparser_normalize_uri(UriUriA *uriparser_uri)
43+
static void uriparser_normalize_uri(UriUriA *uriparser_uri)
4444
{
45-
if (uriNormalizeSyntaxExA(uriparser_uri, (unsigned int)-1) != URI_SUCCESS) {
46-
return FAILURE;
47-
}
48-
49-
return SUCCESS;
45+
ZEND_ASSERT(uriNormalizeSyntaxExA(uriparser_uri, (unsigned int)-1) == URI_SUCCESS);
5046
}
5147

5248
static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_component_read_mode_t read_mode)
@@ -59,13 +55,7 @@ static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_compone
5955
case URI_COMPONENT_READ_NORMALIZED_UNICODE:
6056
if (uriparser_uris->normalized_uri == NULL) {
6157
uriparser_uris->normalized_uri = uriparser_copy_uri(uriparser_uris->uri);
62-
if (uriparser_normalize_uri(uriparser_uris->normalized_uri) == FAILURE) {
63-
uriFreeUriMembersA(uriparser_uris->normalized_uri);
64-
efree(uriparser_uris->normalized_uri);
65-
uriparser_uris->normalized_uri = NULL;
66-
67-
return NULL;
68-
}
58+
uriparser_normalize_uri(uriparser_uris->normalized_uri);
6959
}
7060

7161
return uriparser_uris->normalized_uri;
@@ -76,9 +66,7 @@ static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_compone
7666
static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
7767
{
7868
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
79-
if (UNEXPECTED(uriparser_uri == NULL)) {
80-
return FAILURE;
81-
}
69+
ZEND_ASSERT(uriparser_uri != NULL);
8270

8371
if (uriparser_uri->scheme.first != NULL && uriparser_uri->scheme.afterLast != NULL) {
8472
zend_string *str = zend_string_init(uriparser_uri->scheme.first, get_text_range_length(&uriparser_uri->scheme), false);
@@ -93,9 +81,7 @@ static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri
9381
zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
9482
{
9583
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
96-
if (UNEXPECTED(uriparser_uri == NULL)) {
97-
return FAILURE;
98-
}
84+
ZEND_ASSERT(uriparser_uri != NULL);
9985

10086
if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
10187
ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, get_text_range_length(&uriparser_uri->userInfo));
@@ -109,9 +95,7 @@ zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_comp
10995
static zend_result uriparser_read_username(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
11096
{
11197
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
112-
if (UNEXPECTED(uriparser_uri == NULL)) {
113-
return FAILURE;
114-
}
98+
ZEND_ASSERT(uriparser_uri != NULL);
11599

116100
if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
117101
size_t length = get_text_range_length(&uriparser_uri->userInfo);
@@ -134,9 +118,7 @@ static zend_result uriparser_read_username(const uri_internal_t *internal_uri, u
134118
static zend_result uriparser_read_password(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
135119
{
136120
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
137-
if (UNEXPECTED(uriparser_uri == NULL)) {
138-
return FAILURE;
139-
}
121+
ZEND_ASSERT(uriparser_uri != NULL);
140122

141123
if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
142124
const char *c = memchr(uriparser_uri->userInfo.first, ':', get_text_range_length(&uriparser_uri->userInfo));
@@ -156,9 +138,7 @@ static zend_result uriparser_read_password(const uri_internal_t *internal_uri, u
156138
static zend_result uriparser_read_host(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
157139
{
158140
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
159-
if (UNEXPECTED(uriparser_uri == NULL)) {
160-
return FAILURE;
161-
}
141+
ZEND_ASSERT(uriparser_uri != NULL);
162142

163143
if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL && get_text_range_length(&uriparser_uri->hostText) > 0) {
164144
if (uriparser_uri->hostData.ip6 != NULL) {
@@ -193,9 +173,7 @@ static int str_to_int(const char *str, int len)
193173
static zend_result uriparser_read_port(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
194174
{
195175
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
196-
if (UNEXPECTED(uriparser_uri == NULL)) {
197-
return FAILURE;
198-
}
176+
ZEND_ASSERT(uriparser_uri != NULL);
199177

200178
if (uriparser_uri->portText.first != NULL && uriparser_uri->portText.afterLast != NULL) {
201179
ZVAL_LONG(retval, str_to_int(uriparser_uri->portText.first, get_text_range_length(&uriparser_uri->portText)));
@@ -209,9 +187,7 @@ static zend_result uriparser_read_port(const uri_internal_t *internal_uri, uri_c
209187
static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
210188
{
211189
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
212-
if (UNEXPECTED(uriparser_uri == NULL)) {
213-
return FAILURE;
214-
}
190+
ZEND_ASSERT(uriparser_uri != NULL);
215191

216192
if (uriparser_uri->pathHead != NULL) {
217193
const UriPathSegmentA *p;
@@ -241,9 +217,7 @@ static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_c
241217
static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
242218
{
243219
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
244-
if (UNEXPECTED(uriparser_uri == NULL)) {
245-
return FAILURE;
246-
}
220+
ZEND_ASSERT(uriparser_uri != NULL);
247221

248222
if (uriparser_uri->query.first != NULL && uriparser_uri->query.afterLast != NULL) {
249223
ZVAL_STRINGL(retval, uriparser_uri->query.first, get_text_range_length(&uriparser_uri->query));
@@ -257,9 +231,7 @@ static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_
257231
static zend_result uriparser_read_fragment(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
258232
{
259233
UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
260-
if (UNEXPECTED(uriparser_uri == NULL)) {
261-
return FAILURE;
262-
}
234+
ZEND_ASSERT(uriparser_uri != NULL);
263235

264236
if (uriparser_uri->fragment.first != NULL && uriparser_uri->fragment.afterLast != NULL) {
265237
ZVAL_STRINGL(retval, uriparser_uri->fragment.first, get_text_range_length(&uriparser_uri->fragment));
@@ -373,6 +345,10 @@ void *uriparser_parse_uri(const zend_string *uri_str, const void *base_url, zval
373345
return uriparser_parse_uri_ex(uri_str, base_url, silent);
374346
}
375347

348+
/* TODO make the clone handler accept a flag to distingish between clone() calls and withers.
349+
* When calling a wither successfully, the normalized URI is surely invalidated, therefore
350+
* it doesn't make sense to copy it. In case of failure, an exeption is thrown, and the URI object
351+
* is discarded altogether. */
376352
static void *uriparser_clone_uri(void *uri)
377353
{
378354
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) uri;
@@ -386,30 +362,25 @@ static void *uriparser_clone_uri(void *uri)
386362
static zend_string *uriparser_uri_to_string(void *uri, uri_recomposition_mode_t recomposition_mode, bool exclude_fragment)
387363
{
388364
uriparser_uris_t *uriparser_uris = (uriparser_uris_t *) uri;
389-
UriUriA *uriparser_uri = uriparser_uris->uri;
365+
UriUriA *uriparser_uri;
390366

391-
if ((recomposition_mode == URI_RECOMPOSITION_NORMALIZED_UNICODE || recomposition_mode == URI_RECOMPOSITION_NORMALIZED_ASCII) &&
392-
uriparser_uris->normalized_uri == NULL
393-
) {
394-
uriparser_uris->normalized_uri = uriparser_copy_uri(uriparser_uris->uri);
395-
if (uriparser_normalize_uri(uriparser_uris->normalized_uri) == FAILURE) {
396-
return NULL;
367+
if (recomposition_mode == URI_RECOMPOSITION_RAW_ASCII || recomposition_mode == URI_RECOMPOSITION_RAW_UNICODE) {
368+
uriparser_uri = uriparser_uris->uri;
369+
} else {
370+
if (uriparser_uris->normalized_uri == NULL) {
371+
uriparser_uris->normalized_uri = uriparser_copy_uri(uriparser_uris->uri);
372+
uriparser_normalize_uri(uriparser_uris->normalized_uri);
397373
}
398374
uriparser_uri = uriparser_uris->normalized_uri;
399375
}
400376

401-
int charsRequired;
402-
if (uriToStringCharsRequiredA(uriparser_uri, &charsRequired) != URI_SUCCESS) {
403-
return NULL;
404-
}
377+
int charsRequired = 0;
378+
ZEND_ASSERT(uriToStringCharsRequiredA(uriparser_uri, &charsRequired) == URI_SUCCESS);
405379

406380
charsRequired++;
407381

408382
zend_string *uri_string = zend_string_alloc(charsRequired - 1, false);
409-
if (uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL) != URI_SUCCESS) {
410-
zend_string_efree(uri_string);
411-
return NULL;
412-
}
383+
ZEND_ASSERT(uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL) == URI_SUCCESS);
413384

414385
if (exclude_fragment) {
415386
const char *pos = zend_memrchr(ZSTR_VAL(uri_string), '#', ZSTR_LEN(uri_string));

ext/uri/uriparser/include/uriparser/Uri.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ typedef struct URI_TYPE(QueryListStruct) {
201201
} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */
202202

203203

204+
204205
URI_PUBLIC UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri);
205206

206207

@@ -646,6 +647,12 @@ URI_PUBLIC int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri,
646647
int maxChars, int * charsWritten);
647648

648649

650+
651+
void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
652+
unsigned int revertMask, UriMemoryManager * memory);
653+
654+
655+
649656
/**
650657
* Copies a %URI structure.
651658
*

ext/uri/uriparser/include/uriparser/UriBase.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,10 @@ typedef enum UriNormalizationMaskEnum {
256256
URI_NORMALIZE_SCHEME = 1 << 0, /**< Normalize scheme (fix uppercase letters) */
257257
URI_NORMALIZE_USER_INFO = 1 << 1, /**< Normalize user info (fix uppercase percent-encodings) */
258258
URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */
259-
URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */
260-
URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */
261-
URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */
259+
URI_NORMALIZE_PORT = 1 << 3, /**< Normalize port (unused) */
260+
URI_NORMALIZE_PATH = 1 << 4, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */
261+
URI_NORMALIZE_QUERY = 1 << 5, /**< Normalize query (fix uppercase percent-encodings) */
262+
URI_NORMALIZE_FRAGMENT = 1 << 6 /**< Normalize fragment (fix uppercase percent-encodings) */
262263
} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */
263264

264265

0 commit comments

Comments
 (0)