54static const char *
HEX =
"0123456789ABCDEF";
57#define URL_STR_FIELD_CMP(s1,s2) \
58 (s1) && (s2) ? strcmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
59#define URL_STR_FIELD_I_CMP(s1,s2) \
60 (s1) && (s2) ? dStrAsciiCasecmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
84 url->
query ?
"?" :
"",
109 if ((p = strchr(p,
':'))) {
110 url->
port = strtol(p + 1, NULL, 10);
115 url->
port = strtol(p + 1, NULL, 10);
149 int len = strlen(uri_str)+2;
151 memcpy(s, uri_str, len-1);
157 p = strpbrk(s,
":/?#");
158 if (p && p[0] ==
':' && p > s) {
164 if (p == s && p[0] ==
'/' && p[1] ==
'/') {
166 p = strpbrk(s,
"/?#");
168 memmove(s - 2, s, (
size_t)
MAX(p - s, 1));
178 p = strpbrk(s,
"?#");
180 url->
path = (p > s) ? s : NULL;
187 p = strpbrk(s,
"?#");
188 if (p && p[0] ==
'?') {
195 if (p && p[0] ==
'#') {
229 Dstr *SolvedUrl, *Path;
235 if (RelUrl->
scheme == NULL) {
246 if ((p = strchr(SolvedUrl->
str,
'#')))
263 }
else if (RelUrl->
scheme) {
273 if (RelUrl->
path && RelUrl->
path[0] ==
'/') {
275 }
else if (BaseUrl->
path) {
277 for (i = Path->
len; --i >= 0 && Path->
str[i] !=
'/'; ) ;
278 if (i >= 0 && Path->
str[i] ==
'/')
285 while ((p=strstr(Path->
str,
"./")) &&
286 (p == Path->
str || p[-1] ==
'/'))
289 if (Path->
len && Path->
str[Path->
len - 1] ==
'.' &&
290 (Path->
len == 1 || Path->
str[Path->
len - 2] ==
'/'))
295 while ( (p = strstr(p,
"/..")) != NULL ) {
296 if (p[3] ==
'/' || !p[3]) {
297 for (e = p + 3 ; p > s && p[-1] !=
'/'; --p) ;
299 p -= (p > Path->
str);
326 (Path->
len && Path->
str[0] !=
'/')))
374 char *urlstr = (
char *)url_str;
375 char *p, *str1 = NULL, *str2 = NULL;
377 int i, n_ic, n_ic_spc;
384 if (url_str[0] ==
'\0' && base_url == NULL)
389 for (p = (
char*)url_str; *p; p++) {
390 n_ic_spc += (*p ==
' ') ? 1 : 0;
391 n_ic += (*p !=
' ' && *p > 0x1F && *p < 0x7F) ? 0 : 1;
396 p = str1 =
dNew(
char, strlen(url_str) + 2*n_ic + 1);
397 for (i = 0; url_str[i]; ++i)
398 if (url_str[i] > 0x1F && url_str[i] < 0x7F && url_str[i] !=
' ')
402 *p++ =
HEX[(url_str[i] >> 4) & 15];
403 *p++ =
HEX[url_str[i] & 15];
412 if (urlstr[0] !=
'/') {
413 p = strpbrk(urlstr,
"/#?:");
415 urlstr = str2 =
dStrconcat(
"//", urlstr, NULL);
416 }
else if (urlstr[1] !=
'/')
417 urlstr = str2 =
dStrconcat(
"/", urlstr, NULL);
422 _MSG(
"SolvedUrl = %s\n", SolvedUrl->
str);
444 switch_to_https =
TRUE;
447 switch_to_https =
TRUE;
451 if (switch_to_https) {
452 const char *
const scheme =
"https";
461 if (len >= 3 && !strcmp(url->
authority + len-3,
":80")) {
462 strcpy((
char *)url->
authority + len-2,
"443");
514 (st = strcmp(A->
path ? A->
path + (*A->
path ==
'/') :
"",
573 if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
575 hex_value = strtol(hex, &tail, 16);
588 char *new_str, *dest;
595 if (!strchr(str,
'%'))
598 dest = new_str =
dNew(
char, strlen(str) + 1);
600 for (i = 0; str[i]; i++) {
606 new_str =
dRealloc(new_str,
sizeof(
char) * (dest - new_str));
622 static const char *
const verbatim =
"-_.*";
628 newstr =
dNew(
char, 6*strlen(str)+1);
630 for (c = newstr; *str; str++)
633 else if (*str ==
' ')
635 else if (*str ==
'\n') {
644 *c++ =
HEX[(*str >> 4) & 15];
645 *c++ =
HEX[*str & 15];
660 char *p, *new_str, *text;
665 if (strncmp(new_str,
"URL:", 4) == 0)
670 for (p = new_str; *text; text++)
671 if (*text > 0x1F && *text < 0x7F && *text !=
' ')
673 if (p > new_str && p[-1] ==
'>')
692 if (len == strspn(host,
"0123456789.")) {
695 if (strchr(host,
':') &&
696 (len == strspn(host,
"0123456789abcdefABCDEF:."))) {
716 int start, after, tld_len;
721 after = strlen(host);
722 if (after > 0 && host[after - 1] ==
'.')
725 while (start > 0 && host[start - 1] !=
'.')
727 tld_len = after - start;
739 const char *
const tlds[] = {
"bd",
"bn",
"ck",
"cy",
"er",
"fj",
"fk",
740 "gu",
"jm",
"ke",
"kh",
"kw",
"mm",
"mz",
741 "ni",
"np",
"pg",
"ye",
"za",
"zw"};
742 uint_t i, tld_num =
sizeof(tlds) /
sizeof(tlds[0]);
744 for (i = 0; i < tld_num; i++) {
745 if (strlen(tlds[i]) == (
uint_t) tld_len &&
747 _MSG(
"TLD code matched %s\n", tlds[i]);
775 if (s > host && *s ==
'.') {
794 _MSG(
"public suffix of %s is %s\n", host, s);
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
int dStr_cmp(Dstr *ds1, Dstr *ds2)
Compare two dStrs.
int dStrAsciiCasecmp(const char *s1, const char *s2)
char * dStrstrip(char *s)
Remove leading and trailing whitespace.
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
char * dStrdup(const char *s)
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
void dStr_free(Dstr *ds, int all)
Free a dillo string.
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
void dStr_append_c(Dstr *ds, int c)
Append one character.
char * dStrndup(const char *s, size_t sz)
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Dstr * dStr_new(const char *s)
Create a new string.
void * dRealloc(void *mem, size_t size)
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
#define dReturn_if_fail(expr)
#define dNew0(type, count)
#define dReturn_val_if_fail(expr, val)
#define dNew(type, count)
bool_t a_Hsts_require_https(const char *host)
DilloPrefs prefs
Global Data.
bool_t http_strict_transport_security
int illegal_chars_spc
number of illegal space chars
int ismap_url_len
Used by server side image maps.
int illegal_chars
number of illegal chars
char * a_Url_string_strip_delimiters(const char *str)
RFC-3986 suggests this stripping when "importing" URLs from other media.
static uint_t Url_host_public_internal_dots(const char *host)
How many internal dots are in the public portion of this hostname?.
static const char * Url_host_find_public_suffix(const char *host)
Given a URL host string, return the portion that is public.
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
char * a_Url_encode_hex_str(const char *str)
Urlencode 'str'.
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
static int Url_decode_hex_octet(const char *s)
Given an hex octet (e.g., e3, 2F, 20), return the corresponding character if the octet is valid,...
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
bool_t a_Url_same_organization(const DilloUrl *u1, const DilloUrl *u2)
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
void a_Url_set_data(DilloUrl *u, Dstr **data)
Set DilloUrl data (like POST info, etc.)
static Dstr * Url_resolve_relative(const char *RelStr, const char *BaseStr)
Resolve the URL as RFC3986 suggests.
const char * a_Url_hostname(const DilloUrl *u)
Return the hostname as a string.
#define URL_STR_FIELD_I_CMP(s1, s2)
char * a_Url_decode_hex_str(const char *str)
Parse possible hexadecimal octets in the URI path.
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
static DilloUrl * Url_object_new(const char *uri_str)
Create a DilloUrl object and initialize it.
#define URL_STR_FIELD_CMP(s1, s2)
int a_Url_host_type(const char *host)
What type of host is this?
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Parse and normalize all URL's inside Dillo.