Dillo v3.1.1-46-g8a360e32
Loading...
Searching...
No Matches
url.c
Go to the documentation of this file.
1/*
2 * File: url.c
3 *
4 * Copyright (C) 2001-2009 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
20/*
21 * Regular Expression as given in RFC3986 for URL parsing.
22 *
23 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
24 * 12 3 4 5 6 7 8 9
25 *
26 * scheme = $2
27 * authority = $4
28 * path = $5
29 * query = $7
30 * fragment = $9
31 *
32 *
33 * RFC-2396 BNF:
34 *
35 * absoluteURI = scheme ":" (hier_part | opaque_part)
36 * hier_part = (net_path | abs_path) ["?" query]
37 * net_path = "//" authority[abs_path]
38 * abs_path = "/" path_segments
39 *
40 * Notes:
41 * - "undefined" means "preceeding separator does not appear".
42 * - path is never "undefined" though it may be "empty".
43 */
44
45#include <stdlib.h>
46#include <string.h>
47#include <ctype.h>
48
49#include "url.h"
50#include "hsts.h"
51#include "misc.h"
52#include "msg.h"
53
54static const char *HEX = "0123456789ABCDEF";
55
56/* URL-field compare methods */
57#define URL_STR_FIELD_CMP(s1,s2) \
58 (s1) && (s2) ? strcmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
59#define URL_STR_FIELD_I_CMP(s1,s2) \
60 (s1) && (s2) ? dStrAsciiCasecmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
61
66char *a_Url_str(const DilloUrl *u)
67{
68 /* Internal url handling IS transparent to the caller */
69 DilloUrl *url = (DilloUrl *) u;
70
71 dReturn_val_if_fail (url != NULL, NULL);
72
73 if (!url->url_string) {
74 url->url_string = dStr_sized_new(60);
76 url->url_string, "%s%s%s%s%s%s%s%s%s%s",
77 url->scheme ? url->scheme : "",
78 url->scheme ? ":" : "",
79 url->authority ? "//" : "",
80 url->authority ? url->authority : "",
81 // (url->path && url->path[0] != '/' && url->authority) ? "/" : "",
82 (url->authority && (!url->path || *url->path != '/')) ? "/" : "",
83 url->path ? url->path : "",
84 url->query ? "?" : "",
85 url->query ? url->query : "",
86 url->fragment ? "#" : "",
87 url->fragment ? url->fragment : "");
88 }
89
90 return url->url_string->str;
91}
92
98const char *a_Url_hostname(const DilloUrl *u)
99{
100 char *p;
101 /* Internal url handling IS transparent to the caller */
102 DilloUrl *url = (DilloUrl *) u;
103
104 if (!url->hostname && url->authority) {
105 if (url->authority[0] == '[' && (p = strchr(url->authority, ']'))) {
106 /* numeric ipv6 address, strip the brackets */
107 url->hostname = dStrndup(url->authority + 1,
108 (uint_t)(p - url->authority - 1));
109 if ((p = strchr(p, ':'))) {
110 url->port = strtol(p + 1, NULL, 10);
111 }
112 } else {
113 /* numeric ipv4 or hostname */
114 if ((p = strchr(url->authority, ':'))) {
115 url->port = strtol(p + 1, NULL, 10);
116 url->hostname = dStrndup(url->authority,
117 (uint_t)(p - url->authority));
118 } else {
119 url->hostname = url->authority;
120 }
121 }
122 }
123
124 if (!url->port) {
125 if (!dStrAsciiCasecmp(url->scheme, "http"))
126 url->port = URL_HTTP_PORT;
127 else if (!dStrAsciiCasecmp(url->scheme, "https"))
128 url->port = URL_HTTPS_PORT;
129 }
130 return url->hostname;
131}
132
137static DilloUrl *Url_object_new(const char *uri_str)
138{
139 DilloUrl *url;
140 char *s, *p;
141
142 dReturn_val_if_fail (uri_str != NULL, NULL);
143
144 url = dNew0(DilloUrl, 1);
145
146 /* url->buffer is given a little extra room in case HSTS needs to transform
147 * a URL string ending in ":80" to ":443".
148 */
149 int len = strlen(uri_str)+2;
150 s = dNew(char, len);
151 memcpy(s, uri_str, len-1);
152 s = dStrstrip(s);
153
154 /* remove leading & trailing space from buffer */
155 url->buffer = s;
156
157 p = strpbrk(s, ":/?#");
158 if (p && p[0] == ':' && p > s) { /* scheme */
159 *p = 0;
160 url->scheme = s;
161 s = ++p;
162 }
163 /* p = strpbrk(s, "/"); */
164 if (p == s && p[0] == '/' && p[1] == '/') { /* authority */
165 s = p + 2;
166 p = strpbrk(s, "/?#");
167 if (p) {
168 memmove(s - 2, s, (size_t)MAX(p - s, 1));
169 url->authority = s - 2;
170 p[-2] = 0;
171 s = p;
172 } else if (*s) {
173 url->authority = s;
174 return url;
175 }
176 }
177
178 p = strpbrk(s, "?#");
179 if (p) { /* path */
180 url->path = (p > s) ? s : NULL;
181 s = p;
182 } else if (*s) {
183 url->path = s;
184 return url;
185 }
186
187 p = strpbrk(s, "?#");
188 if (p && p[0] == '?') { /* query */
189 *p = 0;
190 s = p + 1;
191 url->query = s;
192 p = strpbrk(s, "#");
193 url->flags |= URL_Get;
194 }
195 if (p && p[0] == '#') { /* fragment */
196 *p = 0;
197 s = p + 1;
198 url->fragment = s;
199 }
200
201 return url;
202}
203
209{
210 if (url) {
211 if (url->url_string)
213 if (url->hostname != url->authority)
214 dFree((char *)url->hostname);
215 dFree((char *)url->buffer);
216 dStr_free(url->data, 1);
217 dFree(url);
218 }
219}
220
224static Dstr *Url_resolve_relative(const char *RelStr,
225 const char *BaseStr)
226{
227 char *p, *s, *e;
228 int i;
229 Dstr *SolvedUrl, *Path;
230 DilloUrl *RelUrl, *BaseUrl = NULL;
231
232 /* parse relative URL */
233 RelUrl = Url_object_new(RelStr);
234
235 if (RelUrl->scheme == NULL) {
236 /* only required when there's no <scheme> in RelStr */
237 BaseUrl = Url_object_new(BaseStr);
238 }
239
240 SolvedUrl = dStr_sized_new(64);
241 Path = dStr_sized_new(64);
242
243 /* path empty && scheme and authority undefined */
244 if (!RelUrl->path && !RelUrl->scheme && !RelUrl->authority) {
245 dStr_append(SolvedUrl, BaseStr);
246 if ((p = strchr(SolvedUrl->str, '#')))
247 dStr_truncate(SolvedUrl, p - SolvedUrl->str);
248 if (!BaseUrl->path)
249 dStr_append_c(SolvedUrl, '/');
250
251 if (RelUrl->query) { /* query */
252 if (BaseUrl->query)
253 dStr_truncate(SolvedUrl, BaseUrl->query - BaseUrl->buffer - 1);
254 dStr_append_c(SolvedUrl, '?');
255 dStr_append(SolvedUrl, RelUrl->query);
256 }
257 if (RelUrl->fragment) { /* fragment */
258 dStr_append_c(SolvedUrl, '#');
259 dStr_append(SolvedUrl, RelUrl->fragment);
260 }
261 goto done;
262
263 } else if (RelUrl->scheme) { /* scheme */
264 dStr_append(SolvedUrl, RelStr);
265 goto done;
266
267 } else if (RelUrl->authority) { /* authority */
268 // Set the Path buffer and goto "STEP 7";
269 if (RelUrl->path)
270 dStr_append(Path, RelUrl->path);
271
272 } else {
273 if (RelUrl->path && RelUrl->path[0] == '/') { /* absolute path */
274 ; /* Ignore BaseUrl path */
275 } else if (BaseUrl->path) { /* relative path */
276 dStr_append(Path, BaseUrl->path);
277 for (i = Path->len; --i >= 0 && Path->str[i] != '/'; ) ;
278 if (i >= 0 && Path->str[i] == '/')
279 dStr_truncate(Path, ++i);
280 }
281 if (RelUrl->path)
282 dStr_append(Path, RelUrl->path);
283
284 // erase "./"
285 while ((p=strstr(Path->str, "./")) &&
286 (p == Path->str || p[-1] == '/'))
287 dStr_erase(Path, p - Path->str, 2);
288 // erase last "."
289 if (Path->len && Path->str[Path->len - 1] == '.' &&
290 (Path->len == 1 || Path->str[Path->len - 2] == '/'))
291 dStr_truncate(Path, Path->len - 1);
292
293 // erase "<segment>/../" and "<segment>/.."
294 s = p = Path->str;
295 while ( (p = strstr(p, "/..")) != NULL ) {
296 if (p[3] == '/' || !p[3]) { // "/../" | "/.."
297 for (e = p + 3 ; p > s && p[-1] != '/'; --p) ;
298 dStr_erase(Path, p - Path->str, e - p + (p > s && *e != 0));
299 p -= (p > Path->str);
300 } else
301 p += 3;
302 }
303 }
304
305 /* STEP 7
306 */
307
308 /* scheme */
309 if (BaseUrl->scheme) {
310 dStr_append(SolvedUrl, BaseUrl->scheme);
311 dStr_append_c(SolvedUrl, ':');
312 }
313
314 /* authority */
315 if (RelUrl->authority) {
316 dStr_append(SolvedUrl, "//");
317 dStr_append(SolvedUrl, RelUrl->authority);
318 } else if (BaseUrl->authority) {
319 dStr_append(SolvedUrl, "//");
320 dStr_append(SolvedUrl, BaseUrl->authority);
321 }
322
323 /* path */
324 if ((RelUrl->authority || BaseUrl->authority) &&
325 ((Path->len == 0 && (RelUrl->query || RelUrl->fragment)) ||
326 (Path->len && Path->str[0] != '/')))
327 dStr_append_c(SolvedUrl, '/'); /* hack? */
328 dStr_append(SolvedUrl, Path->str);
329
330 /* query */
331 if (RelUrl->query) {
332 dStr_append_c(SolvedUrl, '?');
333 dStr_append(SolvedUrl, RelUrl->query);
334 }
335
336 /* fragment */
337 if (RelUrl->fragment) {
338 dStr_append_c(SolvedUrl, '#');
339 dStr_append(SolvedUrl, RelUrl->fragment);
340 }
341
342done:
343 dStr_free(Path, TRUE);
344 a_Url_free(RelUrl);
345 a_Url_free(BaseUrl);
346 return SolvedUrl;
347}
348
371DilloUrl* a_Url_new(const char *url_str, const char *base_url)
372{
373 DilloUrl *url;
374 char *urlstr = (char *)url_str; /* auxiliar variable, don't free */
375 char *p, *str1 = NULL, *str2 = NULL;
376 Dstr *SolvedUrl;
377 int i, n_ic, n_ic_spc;
378
379 if (!url_str)
380 return NULL;
381
382 /* Empty URL without base_url is not valid.
383 * They are used for action="" in forms with base_url set. */
384 if (url_str[0] == '\0' && base_url == NULL)
385 return NULL;
386
387 /* Count illegal characters (0x00-0x1F, 0x7F-0xFF and space) */
388 n_ic = n_ic_spc = 0;
389 for (p = (char*)url_str; *p; p++) {
390 n_ic_spc += (*p == ' ') ? 1 : 0;
391 n_ic += (*p != ' ' && *p > 0x1F && *p < 0x7F) ? 0 : 1;
392 }
393 if (n_ic) {
394 /* Encode illegal characters (they could also be stripped).
395 * There's no standard for illegal chars; we chose to encode. */
396 p = str1 = dNew(char, strlen(url_str) + 2*n_ic + 1);
397 for (i = 0; url_str[i]; ++i)
398 if (url_str[i] > 0x1F && url_str[i] < 0x7F && url_str[i] != ' ')
399 *p++ = url_str[i];
400 else {
401 *p++ = '%';
402 *p++ = HEX[(url_str[i] >> 4) & 15];
403 *p++ = HEX[url_str[i] & 15];
404 }
405 *p = 0;
406 urlstr = str1;
407 }
408
409 /* let's use a heuristic to set http: as default */
410 if (!base_url) {
411 base_url = "http:";
412 if (urlstr[0] != '/') {
413 p = strpbrk(urlstr, "/#?:");
414 if (!p || *p != ':')
415 urlstr = str2 = dStrconcat("//", urlstr, NULL);
416 } else if (urlstr[1] != '/')
417 urlstr = str2 = dStrconcat("/", urlstr, NULL);
418 }
419
420 /* Resolve the URL */
421 SolvedUrl = Url_resolve_relative(urlstr, base_url);
422 _MSG("SolvedUrl = %s\n", SolvedUrl->str);
423
424 /* Fill url data */
425 url = Url_object_new(SolvedUrl->str);
426 url->data = dStr_new("");
427 url->url_string = SolvedUrl;
428 url->illegal_chars = n_ic;
429 url->illegal_chars_spc = n_ic_spc;
430
431 dFree(str1);
432 dFree(str2);
433
434 bool_t switch_to_https = FALSE;
435
436 if (url->scheme && !dStrAsciiCasecmp(url->scheme, "http")) {
437 /*
438 * A site's HTTP Strict Transport Security policy may direct us to transform
439 * URLs like "http://en.wikipedia.org:80" to "https://en.wikipedia.org:443".
440 */
443 _MSG("url: HSTS transformation for %s.\n", url->url_string->str);
444 switch_to_https = TRUE;
445 } else if (prefs.http_force_https) {
446 _MSG("url: Force HTTPS transformation for %s.\n", url->url_string->str);
447 switch_to_https = TRUE;
448 }
449 }
450
451 if (switch_to_https) {
452 const char *const scheme = "https";
453
454 url->scheme = scheme;
455 if (url->port == URL_HTTP_PORT)
456 url->port = URL_HTTPS_PORT;
457
458 if (url->authority) {
459 int len = strlen(url->authority);
460
461 if (len >= 3 && !strcmp(url->authority + len-3, ":80")) {
462 strcpy((char *)url->authority + len-2, "443");
463 }
464 }
465
467 url->url_string = NULL;
468 }
469
470 return url;
471}
472
473
478{
479 DilloUrl *url;
480
481 url = Url_object_new(URL_STR_(ori));
482 dReturn_val_if_fail (url != NULL, NULL);
483
484 url->url_string = dStr_new(URL_STR(ori));
485 url->port = ori->port;
486 url->flags = ori->flags;
487 url->ismap_url_len = ori->ismap_url_len;
488 url->illegal_chars = ori->illegal_chars;
490 url->data = dStr_sized_new(URL_DATA(ori)->len);
491 dStr_append_l(url->data, URL_DATA(ori)->str, URL_DATA(ori)->len);
492 return url;
493}
494
506int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
507{
508 int st;
509
510 dReturn_val_if_fail(A && B, 1);
511
512 if (A == B ||
513 ((st = URL_STR_FIELD_I_CMP(A->authority, B->authority)) == 0 &&
514 (st = strcmp(A->path ? A->path + (*A->path == '/') : "",
515 B->path ? B->path + (*B->path == '/') : "")) == 0 &&
516 //(st = URL_STR_FIELD_CMP(A->path, B->path)) == 0 &&
517 (st = URL_STR_FIELD_CMP(A->query, B->query)) == 0 &&
518 (st = dStr_cmp(A->data, B->data)) == 0 &&
519 (st = URL_STR_FIELD_I_CMP(A->scheme, B->scheme)) == 0))
520 return 0;
521 return st;
522}
523
527void a_Url_set_flags(DilloUrl *u, int flags)
528{
529 if (u)
530 u->flags = flags;
531}
532
537{
538 if (u) {
539 dStr_free(u->data, 1);
540 u->data = *data;
541 *data = NULL;
542 }
543}
544
549void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
550{
551 dReturn_if_fail (u && coord_str);
552
553 if (!u->ismap_url_len) {
554 /* Save base-url length (without coords) */
555 u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0;
556 }
557 if (u->url_string) {
559 dStr_append(u->url_string, coord_str);
560 u->query = u->url_string->str + u->ismap_url_len + 1;
561 }
562}
563
568static int Url_decode_hex_octet(const char *s)
569{
570 int hex_value;
571 char *tail, hex[3];
572
573 if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
574 hex[2] = 0;
575 hex_value = strtol(hex, &tail, 16);
576 if (tail - hex == 2)
577 return hex_value;
578 }
579 return -1;
580}
581
586char *a_Url_decode_hex_str(const char *str)
587{
588 char *new_str, *dest;
589 int i, val;
590
591 if (!str)
592 return NULL;
593
594 /* most cases won't have hex octets */
595 if (!strchr(str, '%'))
596 return dStrdup(str);
597
598 dest = new_str = dNew(char, strlen(str) + 1);
599
600 for (i = 0; str[i]; i++) {
601 *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
602 i+=2, val : str[i];
603 }
604 *dest++ = 0;
605
606 new_str = dRealloc(new_str, sizeof(char) * (dest - new_str));
607 return new_str;
608}
609
620char *a_Url_encode_hex_str(const char *str)
621{
622 static const char *const verbatim = "-_.*";
623 char *newstr, *c;
624
625 if (!str)
626 return NULL;
627
628 newstr = dNew(char, 6*strlen(str)+1);
629
630 for (c = newstr; *str; str++)
631 if ((dIsalnum(*str) && d_isascii(*str)) || strchr(verbatim, *str))
632 *c++ = *str;
633 else if (*str == ' ')
634 *c++ = '+';
635 else if (*str == '\n') {
636 *c++ = '%';
637 *c++ = '0';
638 *c++ = 'D';
639 *c++ = '%';
640 *c++ = '0';
641 *c++ = 'A';
642 } else {
643 *c++ = '%';
644 *c++ = HEX[(*str >> 4) & 15];
645 *c++ = HEX[*str & 15];
646 }
647 *c = 0;
648
649 return newstr;
650}
651
652
658char *a_Url_string_strip_delimiters(const char *str)
659{
660 char *p, *new_str, *text;
661
662 new_str = text = dStrdup(str);
663
664 if (new_str) {
665 if (strncmp(new_str, "URL:", 4) == 0)
666 text += 4;
667 if (*text == '<')
668 text++;
669
670 for (p = new_str; *text; text++)
671 if (*text > 0x1F && *text < 0x7F && *text != ' ')
672 *p++ = *text;
673 if (p > new_str && p[-1] == '>')
674 --p;
675 *p = 0;
676 }
677 return new_str;
678}
679
683int a_Url_host_type(const char *host)
684{
685 uint_t len;
686
687 if (!host || !*host)
688 return URL_HOST_ERROR;
689
690 len = strlen(host);
691
692 if (len == strspn(host, "0123456789.")) {
693 return URL_HOST_IPV4;
694 }
695 if (strchr(host, ':') &&
696 (len == strspn(host, "0123456789abcdefABCDEF:."))) {
697 /* The precise format is shown in section 3.2.2 of rfc 3986 */
698 return URL_HOST_IPV6;
699 }
700 return URL_HOST_NAME;
701}
702
711static uint_t Url_host_public_internal_dots(const char *host)
712{
713 uint_t ret = 1;
714
715 if (host) {
716 int start, after, tld_len;
717
718 /* We may be able to trust the format of the host string more than
719 * I am here. Trailing dots and no dots are real possibilities, though.
720 */
721 after = strlen(host);
722 if (after > 0 && host[after - 1] == '.')
723 after--;
724 start = after;
725 while (start > 0 && host[start - 1] != '.')
726 start--;
727 tld_len = after - start;
728
729 if (tld_len > 0) {
730 /* These TLDs were chosen by examining the current publicsuffix list
731 * in July 2016 and picking out those where it was simplest for
732 * them to describe the situation by beginning with a "*.[tld]" rule
733 * or every rule was "[something].[tld]".
734 *
735 * TODO: Consider the old publicsuffix code again. This TLD list has
736 * shrunk and shrunk over the years, and has become a poorer and
737 * poorer approximation of administrative boundaries.
738 */
739 const char *const tlds[] = {"bd","bn","ck","cy","er","fj","fk",
740 "gu","jm","ke","kh","kw","mm","mz",
741 "ni","np","pg","ye","za","zw"};
742 uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
743
744 for (i = 0; i < tld_num; i++) {
745 if (strlen(tlds[i]) == (uint_t) tld_len &&
746 !dStrnAsciiCasecmp(tlds[i], host + start, tld_len)) {
747 _MSG("TLD code matched %s\n", tlds[i]);
748 ret++;
749 break;
750 }
751 }
752 }
753 }
754 return ret;
755}
756
762static const char *Url_host_find_public_suffix(const char *host)
763{
764 const char *s;
765 uint_t dots;
766
767 if (a_Url_host_type(host) != URL_HOST_NAME)
768 return host;
769
770 s = host;
771
772 while (s[1])
773 s++;
774
775 if (s > host && *s == '.') {
776 /* don't want to deal with trailing dot */
777 s--;
778 }
779
781
782 /* With a proper host string, we should not be pointing to a dot now. */
783
784 while (s > host) {
785 if (s[-1] == '.') {
786 if (dots == 0)
787 break;
788 else
789 dots--;
790 }
791 s--;
792 }
793
794 _MSG("public suffix of %s is %s\n", host, s);
795 return s;
796}
797
799{
800 if (!u1 || !u2)
801 return FALSE;
802
805 ? FALSE : TRUE;
806}
#define _MSG(...)
Definition bookmarks.c:45
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:102
int dStr_cmp(Dstr *ds1, Dstr *ds2)
Compare two dStrs.
Definition dlib.c:478
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
char * dStrstrip(char *s)
Remove leading and trailing whitespace.
Definition dlib.c:122
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:316
char * dStrdup(const char *s)
Definition dlib.c:77
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:450
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
void * dRealloc(void *mem, size_t size)
Definition dlib.c:53
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:368
#define dReturn_if_fail(expr)
Definition dlib.h:72
#define dNew0(type, count)
Definition dlib.h:51
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:76
#define dIsalnum(c)
Definition dlib.h:34
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define MAX(a, b)
Definition dlib.h:27
#define dNew(type, count)
Definition dlib.h:49
bool_t a_Hsts_require_https(const char *host)
Definition hsts.c:271
#define d_isascii(c)
Definition misc.h:11
DilloPrefs prefs
Global Data.
Definition prefs.c:33
bool_t http_strict_transport_security
Definition prefs.h:101
bool_t http_force_https
Definition prefs.h:102
Definition url.h:88
int illegal_chars_spc
number of illegal space chars
Definition url.h:102
const char * buffer
Definition url.h:90
Dstr * data
POST.
Definition url.h:99
int ismap_url_len
Used by server side image maps.
Definition url.h:100
const char * fragment
Definition url.h:95
const char * query
Definition url.h:94
int illegal_chars
number of illegal chars
Definition url.h:101
const char * path
Definition url.h:93
Dstr * url_string
Definition url.h:89
const char * hostname
Definition url.h:96
int port
Definition url.h:97
const char * scheme
Definition url.h:91
const char * authority
Definition url.h:92
int flags
Definition url.h:98
Definition dlib.h:102
Dstr_char_t * str
Definition dlib.h:105
int len
Definition dlib.h:104
char * a_Url_string_strip_delimiters(const char *str)
RFC-3986 suggests this stripping when "importing" URLs from other media.
Definition url.c:658
static uint_t Url_host_public_internal_dots(const char *host)
How many internal dots are in the public portion of this hostname?.
Definition url.c:711
static const char * Url_host_find_public_suffix(const char *host)
Given a URL host string, return the portion that is public.
Definition url.c:762
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
Definition url.c:527
char * a_Url_encode_hex_str(const char *str)
Urlencode 'str'.
Definition url.c:620
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
Definition url.c:506
static int Url_decode_hex_octet(const char *s)
Given an hex octet (e.g., e3, 2F, 20), return the corresponding character if the octet is valid,...
Definition url.c:568
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
Definition url.c:208
bool_t a_Url_same_organization(const DilloUrl *u1, const DilloUrl *u2)
Definition url.c:798
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
Definition url.c:549
static const char * HEX
Definition url.c:54
void a_Url_set_data(DilloUrl *u, Dstr **data)
Set DilloUrl data (like POST info, etc.)
Definition url.c:536
static Dstr * Url_resolve_relative(const char *RelStr, const char *BaseStr)
Resolve the URL as RFC3986 suggests.
Definition url.c:224
const char * a_Url_hostname(const DilloUrl *u)
Return the hostname as a string.
Definition url.c:98
#define URL_STR_FIELD_I_CMP(s1, s2)
Definition url.c:59
char * a_Url_decode_hex_str(const char *str)
Parse possible hexadecimal octets in the URI path.
Definition url.c:586
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
Definition url.c:371
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
Definition url.c:66
static DilloUrl * Url_object_new(const char *uri_str)
Create a DilloUrl object and initialize it.
Definition url.c:137
#define URL_STR_FIELD_CMP(s1, s2)
Definition url.c:57
int a_Url_host_type(const char *host)
What type of host is this?
Definition url.c:683
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Definition url.c:477
Parse and normalize all URL's inside Dillo.
#define URL_HOST_ERROR
Definition url.h:23
#define URL_HOST_NAME
Definition url.h:24
#define URL_DATA(u)
Definition url.h:77
#define URL_HOST_IPV4
Definition url.h:25
#define URL_STR(u)
Definition url.h:76
#define URL_HTTP_PORT
Definition url.h:19
#define URL_STR_(u)
Definition url.h:55
#define URL_HOST_IPV6
Definition url.h:26
#define URL_Get
Definition url.h:32
#define URL_HTTPS_PORT
Definition url.h:20
#define URL_HOST(u)
Definition url.h:75