Dillo v3.1.1-46-g8a360e32
Loading...
Searching...
No Matches
html.cc
Go to the documentation of this file.
1/*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
17/*-----------------------------------------------------------------------------
18 * Includes
19 *---------------------------------------------------------------------------*/
20#include <ctype.h> /* for isspace */
21#include <string.h> /* for memcpy and memmove */
22#include <stdlib.h>
23#include <stdio.h> /* for sprintf */
24#include <errno.h>
25
26#include "bw.h" /* for BrowserWindow */
27#include "msg.h"
28#include "binaryconst.h"
29#include "colors.h"
30#include "html_charrefs.h"
31#include "utf8.hh"
32
33#include "misc.h"
34#include "uicmd.hh"
35#include "history.h"
36#include "menu.hh"
37#include "prefs.h"
38#include "capi.h"
39#include "html.hh"
40#include "html_common.hh"
41#include "form.hh"
42#include "table.hh"
43
44#include "dw/textblock.hh"
45#include "dw/bullet.hh"
46#include "dw/listitem.hh"
47#include "dw/image.hh"
48#include "dw/ruler.hh"
49
50/*-----------------------------------------------------------------------------
51 * Defines
52 *---------------------------------------------------------------------------*/
53
54/* Define to 1 to ignore white space immediately after an open tag,
55 * and immediately before a close tag. */
56#define SGML_SPCDEL 0
57
58#define TAB_SIZE 8
59
60/*-----------------------------------------------------------------------------
61 * Name spaces
62 *---------------------------------------------------------------------------*/
63using namespace lout;
64using namespace dw;
65using namespace dw::core;
66using namespace dw::core::ui;
67using namespace dw::core::style;
68
69/*-----------------------------------------------------------------------------
70 * Typedefs
71 *---------------------------------------------------------------------------*/
72class DilloHtml;
73typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
74typedef void (*TagCloseFunct) (DilloHtml *html);
75
85
91
92
93/*
94 * Exported function with C linkage.
95 */
96extern "C" {
97void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
98}
99
100/*-----------------------------------------------------------------------------
101 * Forward declarations
102 *---------------------------------------------------------------------------*/
103static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
104static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
105 const DilloUrl *requester, DilloImage *image);
106static void Html_callback(int Op, CacheClient_t *Client);
107static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
108int a_Html_tag_index(const char *tag);
109
110/*-----------------------------------------------------------------------------
111 * Local Data
112 *---------------------------------------------------------------------------*/
114typedef struct {
115 const char *name; /* element name */
116 unsigned char Flags; /* flags (explained near the table data) */
117 char EndTag; /* Is it Required, Optional or Forbidden */
118 TagOpenFunct open; /* Open function */
119 TagOpenFunct content; /* Content function */
120 TagCloseFunct close; /* Close function */
121} TagInfo;
122
123/* Some element indexes required in scattered places */
124static int
141
142
143/*-----------------------------------------------------------------------------
144 *-----------------------------------------------------------------------------
145 * Main Code
146 *-----------------------------------------------------------------------------
147 *---------------------------------------------------------------------------*/
148
152void DilloHtml::bugMessage(const char *format, ... )
153{
154 va_list argp;
155
156 if (bw->num_page_bugs)
157 dStr_append_c(bw->page_bugs, '\n');
159 "HTML warning: line %d, ",
161 va_start(argp, format);
162 dStr_vsprintfa(bw->page_bugs, format, argp);
163 va_end(argp);
165}
166
172 const char *url_str, const char *base_url,
173 int use_base_url)
174{
175 if (!url_str) {
176 MSG("a_Html_url_new: URL is NULL\n");
177 return NULL;
178 }
179
180 DilloUrl *url = a_Url_new(url_str,
181 (use_base_url) ? base_url : URL_STR_(html->base_url));
182
183 if (!url) {
184 BUG_MSG("URL is not valid '%s'.", url_str);
185 return NULL;
186 }
187
188 int n_ic, n_ic_spc;
189 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
190 const char *suffix = (n_ic) > 1 ? "s" : "";
191 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
192 if (n_ic == n_ic_spc) {
193 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
194 } else if (n_ic_spc == 0) {
195 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
196 n_ic, suffix, url_str);
197 } else {
198 BUG_MSG("URL has %d illegal byte%s: "
199 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
200 n_ic, suffix,
201 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
202 }
203 }
204 return url;
205}
206
210void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
211{
212 DilloWeb *web = (DilloWeb*)P;
213 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
214
215 *Data = (void*)html;
217
218 return (void*)html->dw;
219}
220
221static void Html_free(void *data)
222{
223 delete ((DilloHtml*)data);
224}
225
229void a_Html_load_images(void *v_html, DilloUrl *pattern)
230{
231 DilloHtml *html = (DilloHtml*)v_html;
232
233 html->loadImages(pattern);
234}
235
239static bool Html_contains_form(DilloHtml *html, void *v_form)
240{
241 for (int i = 0; i < html->forms->size(); i++) {
242 if (html->forms->get(i) == v_form) {
243 return true;
244 }
245 }
246 return false;
247}
248
252void a_Html_form_submit(void *v_html, void *v_form)
253{
254 DilloHtml *html = (DilloHtml*)v_html;
255
256 if (Html_contains_form(html, v_form)) {
257 /* it's still valid */
258 a_Html_form_submit2(v_form);
259 }
260}
261
265void a_Html_form_reset(void *v_html, void *v_form)
266{
267 DilloHtml *html = (DilloHtml*)v_html;
268
269 if (Html_contains_form(html, v_form)) {
270 /* it's still valid */
271 a_Html_form_reset2(v_form);
272 }
273}
274
278void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
279{
280 DilloHtml *html = (DilloHtml*)v_html;
281
282 if (Html_contains_form(html, v_form)) {
283 /* it's still valid */
284 a_Html_form_display_hiddens2(v_form, (display != 0));
285 }
286}
287
291static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
292{
293 char data[64];
294
295 if (x != -1) {
296 snprintf(data, 64, "?%d,%d", x, y);
297 a_Url_set_ismap_coords(html->links->get(link), data);
298 }
299}
300
305static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
306{
307 int nl = html->links->size();
308 html->links->increase();
309 html->links->set(nl, (*url) ? *url : NULL);
310 return nl;
311}
312
317void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
318{
319 const char *align;
320
321 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
322 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
323
324 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
325 BUG_MSG("The align attribute is obsolete in HTML5.");
326
327 if (dStrAsciiCasecmp (align, "left") == 0)
328 textAlignType = TEXT_ALIGN_LEFT;
329 else if (dStrAsciiCasecmp (align, "right") == 0)
330 textAlignType = TEXT_ALIGN_RIGHT;
331 else if (dStrAsciiCasecmp (align, "center") == 0)
332 textAlignType = TEXT_ALIGN_CENTER;
333 else if (dStrAsciiCasecmp (align, "justify") == 0)
334 textAlignType = TEXT_ALIGN_JUSTIFY;
335#if 0
336 else if (dStrAsciiCasecmp (align, "char") == 0) {
337 /* TODO: Actually not supported for <p> etc. */
338 v.textAlign = TEXT_ALIGN_STRING;
339 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
340 if (charattr[0] == 0)
341 /* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in
342 * an empty string (don't know whether the latter is
343 * correct, has to be clarified with the specs), so
344 * that for empty strings, " " is assumed. */
345 style_attrs.textAlignChar = ' ';
346 else
347 style_attrs.textAlignChar = charattr[0];
348 } else
349 /* TODO: Examine LANG attr of <html>. */
350 style_attrs.textAlignChar = '.';
351 }
352#endif
354 textAlignType);
355 }
356}
357
362bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
363{
364 const char *attr;
365 VAlignType valign;
366
367 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
368 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
369 BUG_MSG("The valign attribute is obsolete in HTML5.");
370
371 if (dStrAsciiCasecmp (attr, "top") == 0)
372 valign = VALIGN_TOP;
373 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
374 valign = VALIGN_BOTTOM;
375 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
376 valign = VALIGN_BASELINE;
377 else
378 valign = VALIGN_MIDDLE;
379
381 CSS_TYPE_ENUM, valign);
382 return true;
383 } else
384 return false;
385}
386
387
392static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
393 bool addBreakOpt)
394{
395 Textblock *textblock = new Textblock (prefs.limit_text_width);
396 Style *style;
397
398 if (addBreaks) {
399 StyleAttrs attrs = *(html->style ());
400 attrs.display = DISPLAY_BLOCK;
401 style = Style::create (&attrs);
402 } else {
403 style = html->style ();
404 style->ref ();
405 }
406
407 if (addBreaks)
408 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
409
410 HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */
411 if (addBreakOpt)
412 HT2TB(html)->addBreakOption (html->style (), false);
413
414 if (addBreaks)
415 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
416 S_TOP(html)->textblock = html->dw = textblock;
417 if (addBreaks)
418 S_TOP(html)->hand_over_break = true;
419
420 style->unref ();
421}
422
424{
425 return HT2TB(html)->mustAddBreaks (html->style ());
426}
427
432 const char *content_type)
433{
434 /* Init main variables */
435 bw = p_bw;
436 page_url = a_Url_dup(url);
437 base_url = a_Url_dup(url);
438 dw = NULL;
439
440 /* Init event receiver */
441 linkReceiver.html = this;
442 HT2LT(this)->connectLink (&linkReceiver);
443
444 a_Bw_add_doc(p_bw, this);
445
446 /* Init for-parsing variables */
447 Start_Buf = NULL;
448 Start_Ofs = 0;
449
450 _MSG("DilloHtml(): content type: %s\n", content_type);
451 this->content_type = dStrdup(content_type);
452
453 /* get charset */
455
456 stop_parser = false;
457
458 CurrOfs = OldOfs = 0;
459 OldLine = 1;
460
461 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
462 DocTypeVersion = 0.0f;
463
465
466 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
467
468 stack = new misc::SimpleVector <DilloHtmlState> (16);
469 stack->increase();
473 stack->getRef(0)->cell_text_align_set = false;
474 stack->getRef(0)->display_none = false;
476 stack->getRef(0)->list_number = 0;
477 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
478 stack->getRef(0)->textblock = NULL;
479 stack->getRef(0)->table = NULL;
480 stack->getRef(0)->ref_list_item = NULL;
481 stack->getRef(0)->hand_over_break = false;
482
484
485 Stash = dStr_new("");
486 StashSpace = false;
487
488 pre_column = 0;
489 PreFirstChar = false;
490 PrevWasCR = false;
491 InVisitedLink = false;
492 ReqTagClose = false;
493 TagSoup = true;
494 loadCssFromStash = false;
495 PrevWasBodyClose = false;
496 PrevWasHtmlClose = false;
497
499
501
504 visited_color = -1;
505
506 /* Init page-handling variables */
507 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
508 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
509 links = new misc::SimpleVector <DilloUrl*> (64);
510 images = new misc::SimpleVector <DilloHtmlImage*> (16);
511
512 /* Initialize the main widget */
513 initDw();
514 /* Hook destructor to the dw delete call */
515 dw->setDeleteCallback(Html_free, this);
516}
517
522{
523 dReturn_if_fail (dw == NULL);
524
525 /* Create the main widget */
527
528 bw->num_page_bugs = 0;
530}
531
536{
537 _MSG("::~DilloHtml(this=%p)\n", this);
538
540
541 a_Bw_remove_doc(bw, this);
542
545
546 for (int i = 0; i < cssUrls->size(); i++)
548 delete (cssUrls);
549
550 for (int i = 0; i < forms->size(); i++)
552 delete(forms);
553
554 for (int i = 0; i < inputs_outside_form->size(); i++)
556 delete(inputs_outside_form);
557
558 for (int i = 0; i < links->size(); i++)
559 a_Url_free(links->get(i));
560 delete (links);
561
562 for (int i = 0; i < images->size(); i++) {
563 DilloHtmlImage *img = images->get(i);
564 a_Url_free(img->url);
565 a_Image_unref(img->image);
566 dFree(img);
567 }
568 delete (images);
569
570 delete styleEngine;
571}
572
577void DilloHtml::write(char *Buf, int BufSize, int Eof)
578{
579 int token_start;
580 char *buf = Buf + Start_Ofs;
581 int bufsize = BufSize - Start_Ofs;
582
583 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
584#if 0
585 char *aux = dStrndup(Buf, BufSize);
586 MSG(" {%s}\n", aux);
587 dFree(aux);
588#endif
589
590 /* Update Start_Buf. It may be used after the parser is stopped */
591 Start_Buf = Buf;
592
593 dReturn_if (dw == NULL);
594 dReturn_if (stop_parser == true);
595
596 token_start = Html_write_raw(this, buf, bufsize, Eof);
597 Start_Ofs += token_start;
598}
599
605{
606 int i, ofs, line;
607 const char *p = Start_Buf;
608
609 dReturn_val_if_fail(p != NULL, -1);
610 /* Disable line counting for META hack. Buffers differ. */
612
613 ofs = CurrOfs;
614 line = OldLine;
615 for (i = OldOfs; i < ofs; ++i)
616 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
617 ++line;
618 OldOfs = CurrOfs;
619 OldLine = line;
620 return line;
621}
622
627{
628 delete(stack);
629
633 dFree(charset);
634}
635
640void DilloHtml::finishParsing(int ClientKey)
641{
642 int si;
643
644 dReturn_if (stop_parser == true);
645
646 /* flag we've already parsed up to the last byte */
647 InFlags |= IN_EOF;
648
649 /* force the close of elements left open (TODO: not for XHTML) */
650 while ((si = stack->size() - 1)) {
651 if (stack->getRef(si)->tag_idx != -1) {
653 }
654 }
655
656 /* Nothing left to do with the parser. Clear all flags, except EOF. */
657 InFlags = IN_EOF;
658
659 /* Remove this client from our active list */
660 a_Bw_close_client(bw, ClientKey);
661}
662
667 DilloHtmlEnc enc, const char *charset)
668{
669 // avoid data loss on repush after CSS stylesheets have been loaded
670 bool enabled = bw->NumPendingStyleSheets == 0;
671 DilloHtmlForm *form = a_Html_form_new (this, method, action,
672 enc, charset, enabled);
673 int nf = forms->size ();
674 forms->increase ();
675 forms->set (nf, form);
676 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
677 return forms->size();
678}
679
684{
685 return forms->get (forms->size() - 1);
686}
687
689{
690 for (int i = 0; i < images->size(); i++) {
691 if (images->get(i)->image != NULL) {
692 return TRUE;
693 }
694 }
695 return FALSE;
696}
697
701void DilloHtml::loadImages (const DilloUrl *pattern)
702{
704
705 /* If the user asked for a specific image, the user (NULL) is the requester,
706 * and the domain mechanism will always permit the request. But if the user
707 * just asked for all images (clicking "Load images"), use the page URL as
708 * the requester so that the domain mechanism can act as a filter.
709 * If the possible patterns become more complex, it might be good to have
710 * the caller supply the requester instead.
711 */
712 const DilloUrl *requester = pattern ? NULL : this->page_url;
713
714 for (int i = 0; i < images->size(); i++) {
715 DilloHtmlImage *hi = images->get(i);
716
717 if (hi->image) {
718 assert(hi->url);
719 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
720 if (Html_load_image(bw, hi->url, requester, hi->image)) {
721 a_Image_unref (hi->image);
722 hi->image = NULL; // web owns it now
723 }
724 }
725 }
726 }
727}
728
733{
734 int nu = cssUrls->size();
735 cssUrls->increase();
736 cssUrls->set(nu, a_Url_dup(url));
737}
738
739bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
740 int x, int y)
741{
743
744 _MSG(" ** ");
745 if (link == -1) {
746 _MSG(" Link LEAVE notify...\n");
747 a_UIcmd_set_msg(bw, "");
748 } else {
749 _MSG(" Link ENTER notify...\n");
750 Html_set_link_coordinates(html, link, x, y);
751 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
752 }
753 return true;
754}
755
759bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
760 int x, int y, EventButton *event)
761{
762 BrowserWindow *bw = html->bw;
763 int ret = false;
764 DilloUrl *linkurl = NULL;
765
766 _MSG("pressed button %d\n", event->button);
767 if (event->button == 3) {
768 // popup menus
769 if (img != -1) {
770 // image menu
771 if (link != -1)
772 linkurl = html->links->get(link);
773 const bool_t loaded_img = (html->images->get(img)->image == NULL);
774 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
775 html->page_url, linkurl);
776 ret = true;
777 } else {
778 if (link == -1) {
779 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
780 ret = true;
781 } else {
782 a_UIcmd_link_popup(bw, html->links->get(link));
783 ret = true;
784 }
785 }
786 }
787 return ret;
788}
789
793bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
794 int x, int y, EventButton *event)
795{
796 BrowserWindow *bw = html->bw;
797
798 if ((img != -1) && (html->images->get(img)->image)) {
799 // clicked an image that has not already been loaded
800 if (event->button == 1){
801 // load all instances of this image
802 DilloUrl *pattern = html->images->get(img)->url;
803 html->loadImages(pattern);
804 return true;
805 }
806 }
807
808 if (link != -1) {
809 DilloUrl *url = html->links->get(link);
810 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
811
812 Html_set_link_coordinates(html, link, x, y);
813
814 if (event->button == 1) {
815 a_UIcmd_open_url(bw, url);
816 } else if (event->button == 2) {
818 int focus = prefs.focus_new_tab ? 1 : 0;
819 if (event->state == SHIFT_MASK) focus = !focus;
820 a_UIcmd_open_url_nt(bw, url, focus);
821 } else
823 } else {
824 return false;
825 }
826
827 /* Change the link color to "visited" as visual feedback */
828 for (Widget *w = widget; w; w = w->getParent()) {
829 _MSG(" ->%s\n", w->getClassName());
830 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
831 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
832 break;
833 }
834 }
835 }
836 return true;
837}
838
843{
844 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
845 html->StashSpace = false;
846 dStr_truncate(html->Stash, 0);
847}
848
855static int Html_ms_stupid_quotes_2ucs(int codepoint)
856{
857 int ret;
858 switch (codepoint) {
859 case 145:
860 case 146: ret = '\''; break;
861 case 147:
862 case 148: ret = '"'; break;
863 case 149: ret = 176; break;
864 case 150:
865 case 151: ret = '-'; break;
866 default: ret = codepoint; break;
867 }
868 return ret;
869}
870
875static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
876 bool_t is_attr, int *entsize)
877{
878 static char buf[5];
879 char *s = tok;
880 int n, codepoint = -1;
881
882 errno = 0;
883
884 if (*s == 'x' || *s == 'X') {
885 if (isxdigit(*++s)) {
886 /* strtol with base 16 accepts leading "0x" - we don't */
887 if (*s == '0' && s[1] == 'x') {
888 s++;
889 codepoint = 0;
890 } else {
891 codepoint = strtol(s, &s, 16);
892 }
893 }
894 } else if (isdigit(*s)) {
895 codepoint = strtol(s, &s, 10);
896 }
897 if (errno)
898 codepoint = -1;
899
900 if (*s == ';')
901 s++;
902 else {
903 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
904 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
905 char c = *s;
906 *s = '\0';
907 BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
908 *s = c;
909 }
910 /* Don't require ';' for old HTML, except that our current heuristic
911 * is to require it in attributes to avoid cases like "&copy=1" found
912 * in URLs.
913 */
914 if (is_attr || html->DocType == DT_XHTML ||
915 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
916 return NULL;
917 }
918
919 }
920 if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
921 codepoint != '\f') ||
922 (codepoint >= 0x7f && codepoint <= 0x9f) ||
923 (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
924 ((codepoint & 0xfffe) == 0xfffe) ||
925 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
926 codepoint > 0xffff)) {
927 /* this catches null bytes, errors, codes out of range, disallowed
928 * control chars, permanently undefined chars, and surrogates.
929 */
930 char c = *s;
931 *s = '\0';
932 BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
933 *s = c;
934
935 codepoint = (codepoint >= 145 && codepoint <= 151) ?
936 Html_ms_stupid_quotes_2ucs(codepoint) : -1;
937 }
938 if (codepoint != -1) {
939 if (codepoint >= 128) {
940 n = a_Utf8_encode(codepoint, buf);
941 } else {
942 n = 1;
943 buf[0] = (char) codepoint;
944 }
945 assert(n < 5);
946 buf[n] = '\0';
947 *entsize = s-tok+2;
948 return buf;
949 } else {
950 return NULL;
951 }
952}
953
957static int Html_charref_comp(const void *a, const void *b)
958{
959 return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
960}
961
966{
967 Charref_t RefKey;
968
969 RefKey.ref = key;
970 return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
972}
973
978static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
979 bool_t is_attr, int *entsize)
980{
981 Charref_t *p;
982 char c;
983 char *s = tok;
984 const char *ret = NULL;
985
986 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
987 c = *s;
988 *s = '\0';
989 if (c != ';') {
990 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
991 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
992 BUG_MSG("Character reference '&%s' lacks ';'.", tok);
993
994 /* Don't require ';' for old HTML, except that our current heuristic
995 * is to require it in attributes to avoid cases like "&copy=1" found
996 * in URLs.
997 */
998 if (is_attr || html->DocType == DT_XHTML ||
999 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
1000 return ret;
1001 }
1002 }
1003
1004 if ((p = Html_charref_search(tok))) {
1005 ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
1006 p->html5_str : p->html4_str;
1007 }
1008
1009 if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
1010 ret = "'";
1011
1012 *s = c;
1013 if (c == ';')
1014 s++;
1015
1016 if (!ret) {
1017 c = *s;
1018 *s = '\0';
1019 BUG_MSG("Undefined character reference '&%s'.", tok);
1020 *s = c;
1021 }
1022 *entsize = s-tok+1;
1023 return ret;
1024}
1025
1034static const char *Html_parse_entity(DilloHtml *html, const char *token,
1035 int toksize, int *entsize, bool_t is_attr)
1036{
1037 const char *ret = NULL;
1038 char *tok;
1039
1040 if (toksize > 50) {
1041 /* In pathological cases, attributes can be megabytes long and filled
1042 * with character references. As of HTML5, the longest defined character
1043 * reference is about 32 bytes long.
1044 */
1045 toksize = 50;
1046 }
1047
1048 token++;
1049 tok = dStrndup(token, (uint_t)toksize);
1050
1051 if (*tok == '#') {
1052 ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
1053 } else if (isalpha(*tok)) {
1054 ret = Html_parse_named_charref(html, tok, is_attr, entsize);
1055 } else if (prefs.show_extra_warnings &&
1056 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
1057 // HTML5 doesn't mind literal '&'s.
1058 BUG_MSG("Literal '&'.");
1059 }
1060 dFree(tok);
1061
1062 return ret;
1063}
1064
1069char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1070{
1071 const char *esc_set = "&";
1072 int i, s, entsize;
1073 char *str;
1074
1075 s = strcspn(token, esc_set);
1076 if (s >= toksize) {
1077 /* no ampersands */
1078 str = dStrndup(token, toksize);
1079 } else {
1080 Dstr *ds = dStr_sized_new(toksize);
1081
1082 dStr_append_l(ds, token, s);
1083
1084 for (i = s; i < toksize; i++) {
1085 const char *entstr;
1086 const bool_t is_attr = FALSE;
1087
1088 if (token[i] == '&' &&
1089 (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
1090 is_attr))) {
1091 dStr_append(ds, entstr);
1092 i += entsize-1;
1093 } else {
1094 dStr_append_c(ds, token[i]);
1095 }
1096 }
1097 str = ds->str;
1098 dStr_free(ds, 0);
1099 }
1100 return str;
1101}
1102
1107static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1108 int spacesize)
1109{
1110 int i, breakCnt = 0;
1111
1112 for (i = 0; i < spacesize; i++) {
1113 /* Support for "\r", "\n" and "\r\n" line breaks */
1114 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1115 breakCnt++;
1116 html->PrevWasCR = (space[i] == '\r');
1117
1118 HT2TB(html)->addLinebreak (html->wordStyle ());
1119 }
1120 }
1121 if (breakCnt == 0) {
1122 HT2TB(html)->addSpace(html->wordStyle ());
1123 }
1124}
1125
1129static void Html_process_space(DilloHtml *html, const char *space,
1130 int spacesize)
1131{
1132 char *spc;
1133 int i, offset;
1134 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1135
1136 if (S_TOP(html)->display_none) {
1137 /* do nothing */
1138 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1139 html->StashSpace = (html->Stash->len > 0);
1140
1141 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1142 dStr_append_l(html->Stash, space, spacesize);
1143
1144 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1145 int spaceCnt = 0;
1146
1147 /* re-scan the string for characters that cause line breaks */
1148 for (i = 0; i < spacesize; i++) {
1149 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1150 if (!html->PreFirstChar &&
1151 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1152
1153 if (spaceCnt) {
1154 spc = dStrnfill(spaceCnt, ' ');
1155 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1156 dFree(spc);
1157 spaceCnt = 0;
1158 }
1159 HT2TB(html)->addLinebreak (html->wordStyle ());
1160 html->pre_column = 0;
1161 }
1162 html->PreFirstChar = false;
1163
1164 /* cr and lf should not be rendered -- they appear as a break */
1165 switch (space[i]) {
1166 case '\r':
1167 case '\n':
1168 break;
1169 case '\t':
1171 BUG_MSG("TAB character inside <pre>.");
1172 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1173 spaceCnt += offset;
1174 html->pre_column += offset;
1175 break;
1176 default:
1177 spaceCnt++;
1178 html->pre_column++;
1179 break;
1180 }
1181
1182 html->PrevWasCR = (space[i] == '\r');
1183 }
1184
1185 if (spaceCnt) {
1186 // add break possibility for the white-space:pre-wrap case
1187 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1188 spc = dStrnfill(spaceCnt, ' ');
1189 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1190 dFree(spc);
1191 }
1192
1193 } else {
1194 if (SGML_SPCDEL) {
1195 /* SGML_SPCDEL ignores white space immediately after an open tag */
1196 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1197 Html_process_space_pre_line(html, space, spacesize);
1198 } else {
1199 HT2TB(html)->addSpace(html->wordStyle ());
1200 }
1201
1202 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1203 html->StashSpace = (html->Stash->len > 0);
1204 }
1205}
1206
1215static void Html_process_word(DilloHtml *html, const char *word, int size)
1216{
1217 int i, j, start;
1218 char *Pword;
1219 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1220
1221 if (S_TOP(html)->display_none)
1222 return;
1223 if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
1224 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
1225 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
1226 }
1227
1228 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1230 if (html->StashSpace) {
1231 dStr_append_c(html->Stash, ' ');
1232 html->StashSpace = false;
1233 }
1234 Pword = a_Html_parse_entities(html, word, size);
1235 dStr_append(html->Stash, Pword);
1236 dFree(Pword);
1237
1238 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1239 /* word goes in untouched, it is not processed here. */
1240 dStr_append_l(html->Stash, word, size);
1241 }
1242
1243 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1244 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1245 /* skip until the closing instructions */
1246
1247 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1248 /* all this overhead is to catch white-space entities */
1249 Pword = a_Html_parse_entities(html, word, size);
1250 for (start = i = 0; Pword[i]; start = i)
1251 if (isspace(Pword[i])) {
1252 while (Pword[++i] && isspace(Pword[i])) ;
1253 Html_process_space(html, Pword + start, i - start);
1254 } else {
1255 while (Pword[++i] && !isspace(Pword[i])) ;
1256 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1257 html->pre_column += i - start;
1258 html->PreFirstChar = false;
1259 }
1260 dFree(Pword);
1261
1262 } else {
1263 const char *word2, *beyond_word2;
1264
1265 Pword = NULL;
1266 if (!memchr(word,'&', size)) {
1267 /* No entities */
1268 word2 = word;
1269 beyond_word2 = word + size;
1270 } else {
1271 /* Collapse white-space entities inside the word (except &nbsp;) */
1272 Pword = a_Html_parse_entities(html, word, size);
1273 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1274 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1275 if (strchr(" \t\f\n\r", Pword[i])) {
1276 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1277 Pword[i] = ' ';
1278 else
1279 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1280 ;
1281 }
1282 }
1283 word2 = Pword;
1284 beyond_word2 = word2 + strlen(word2);
1285 }
1286 for (start = i = 0; word2[i]; start = i) {
1287 int len;
1288
1289 if (isspace(word2[i])) {
1290 while (word2[++i] && isspace(word2[i])) ;
1291 Html_process_space(html, word2 + start, i - start);
1292 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1293 i += 3;
1294 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1295 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1296 i += len;
1297 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1298 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1299 } else {
1300 do {
1301 i += len;
1302 } while (word2[i] && !isspace(word2[i]) &&
1303 strncmp(word2+i, utf8_zero_width_space, 3) &&
1304 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1305 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1306 }
1307 }
1308 if (Pword == word2)
1309 dFree(Pword);
1310 }
1311}
1312
1317static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1318{
1319 int i;
1320
1321 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1322 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1323 return false;
1324 }
1325 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1326 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1327 return true;
1328 return false;
1329}
1330
1335static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1336{
1337 if (html->dw != S_TOP(html)->textblock) {
1338 if (hand_over_break)
1339 HT2TB(html)->handOverBreak (html->style ());
1340 HT2TB(html)->flush ();
1341 html->dw = S_TOP(html)->textblock;
1342 }
1343}
1344
1348static void Html_push_tag(DilloHtml *html, int tag_idx)
1349{
1350 int n_items;
1351
1352 n_items = html->stack->size ();
1353 html->stack->increase ();
1354 /* We'll copy the former stack item and just change the tag and its index
1355 * instead of copying all fields except for tag. --Jcid */
1356 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1357 html->stack->getRef(n_items)->tag_idx = tag_idx;
1358 html->dw = S_TOP(html)->textblock;
1359}
1360
1365static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1366{
1367 html->startElement (tag_idx);
1368 Html_push_tag(html, tag_idx);
1369}
1370
1375{
1376 bool hand_over_break;
1377
1378 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1379 hand_over_break = S_TOP(html)->hand_over_break;
1380 html->stack->setSize (html->stack->size() - 1);
1381 Html_eventually_pop_dw(html, hand_over_break);
1382}
1383
1384
1385
1386/*
1387 * Some parsing routines.
1388 */
1389
1390/*
1391 * Used by a_Html_parse_length
1392 */
1394 char **endptr)
1395{
1396 CssLength l;
1397 double v;
1398 char *end;
1399
1400 v = strtod (attr, &end);
1401 switch (*end) {
1402 case '%':
1403 end++;
1405 break;
1406
1407 case '*':
1408 end++;
1410 break;
1411/*
1412 The "px" suffix seems not allowed by HTML4.01 SPEC.
1413 case 'p':
1414 if (end[1] == 'x')
1415 end += 2;
1416*/
1417 default:
1419 break;
1420 }
1421
1422 if (endptr)
1423 *endptr = end;
1424 return l;
1425}
1426
1427
1432CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1433{
1434 CssLength l;
1435 char *end;
1436
1437 l = Html_parse_length_or_multi_length (attr, &end);
1439 /* not allowed as &Length; */
1441 else {
1442 /* allow only whitespaces */
1443 if (*end && !isspace (*end)) {
1444 BUG_MSG("Garbage after length: '%s'.", attr);
1446 }
1447 }
1448
1449 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1450 return l;
1451}
1452
1457int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1458 int32_t default_color)
1459{
1460 int err = 1;
1461 int32_t color = a_Color_parse(str, default_color, &err);
1462
1463 if (err) {
1464 BUG_MSG("Color \"%s\" is not in \"#RRGGBB\" format.", str);
1465 }
1466 return color;
1467}
1468
1474static int
1475 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1476{
1477 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1478 bool valid = *val && !strchr(val, ' ');
1479
1480 if (!valid) {
1481 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1482 "spaces.", attrname, val);
1483 }
1484 return valid ? 1 : 0;
1485 } else {
1486 int i;
1487
1488 for (i = 0; val[i]; ++i)
1489 if (!d_isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1490 break;
1491
1492 if (val[i] || !(d_isascii(val[0]) && isalpha(val[0])))
1493 BUG_MSG("%s attribute value \"%s\" is not of the form "
1494 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1495
1496 return !(val[i]);
1497 }
1498}
1499
1519static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1520{
1521 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1522 static const char HTML20 [] = "-//IETF//DTD HTML";
1523 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1524 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1525 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1526 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1527 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1528 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1529 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1530 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1531
1532 size_t i;
1533 int quote;
1534 char *p, *ntag = dStrndup(tag, tagsize);
1535
1536 /* Tag sanitization: Collapse whitespace between tokens
1537 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1538 for (i = 0, p = ntag; *p; ++p) {
1539 if (isspace(*p)) {
1540 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1541 } else if ((quote = *p) == '"' || *p == '\'') {
1542 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1543 if (*p == '\n' || *p == '\r')
1544 ntag[i - 1] = ' ';
1545 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1546 }
1547 } else {
1548 ntag[i++] = *p;
1549 }
1550 if (!*p)
1551 break;
1552 }
1553 ntag[i] = 0;
1554
1555 _MSG("New: {%s}\n", ntag);
1556
1557 if (html->DocType != DT_NONE)
1558 BUG_MSG("Multiple DOCTYPE declarations.");
1559
1560 /* The default DT_NONE type is TagSoup */
1561 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1562 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1563 p = ntag + strlen(HTML_SGML_sig) + 1;
1564 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1565 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1566 html->DocType = DT_HTML;
1567 html->DocTypeVersion = 4.01f;
1568 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1569 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1570 html->DocType = DT_XHTML;
1571 html->DocTypeVersion = 1.0f;
1572 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1573 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1574 html->DocType = DT_XHTML;
1575 html->DocTypeVersion = 1.1f;
1576 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1577 html->DocType = DT_HTML;
1578 html->DocTypeVersion = 4.0f;
1579 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1580 html->DocType = DT_HTML;
1581 html->DocTypeVersion = 3.2f;
1582 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1583 html->DocType = DT_HTML;
1584 html->DocTypeVersion = 2.0f;
1585 }
1586 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1587 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1588 !dStrAsciiCasecmp(ntag,
1589 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1590 !dStrAsciiCasecmp(ntag,
1591 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1592 html->DocType = DT_HTML;
1593 html->DocTypeVersion = 5.0f;
1594 }
1595 if (html->DocType == DT_NONE) {
1596 html->DocType = DT_UNRECOGNIZED;
1597 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1598 }
1599 dFree(ntag);
1600}
1601
1605static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1606{
1607 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1608 * This allows to handle pages with multiple or uneven HTML tags */
1609
1610 if (!(html->InFlags & IN_HTML))
1611 html->InFlags |= IN_HTML;
1612 if (html->Num_HTML < UCHAR_MAX)
1613 ++html->Num_HTML;
1614
1615 if (html->Num_HTML > 1) {
1616 BUG_MSG("<html> was already open.");
1617 html->ReqTagClose = true;
1618 }
1619}
1620
1625{
1626 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1627
1628 /* As some Tag soup pages use multiple HTML tags, this function
1629 * gets called only on EOF and upon and extra HTML open.
1630 * Also, we defer clearing the IN_HTML flag until IN_EOF */
1631}
1632
1636static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1637{
1638 if (html->InFlags & IN_BODY) {
1639 BUG_MSG("<head> must go before the BODY section.");
1640 html->ReqTagClose = true;
1641 return;
1642 }
1643
1644 if (html->Num_HEAD < UCHAR_MAX)
1645 ++html->Num_HEAD;
1646 if (html->InFlags & IN_HEAD) {
1647 BUG_MSG("<head> was already open.");
1648 html->ReqTagClose = true;
1649 } else if (html->Num_HEAD > 1) {
1650 BUG_MSG("<head> already finished -- ignoring.");
1651 html->ReqTagClose = true;
1652 } else {
1653 html->InFlags |= IN_HEAD;
1654 }
1655}
1656
1662{
1663 if (html->InFlags & IN_HEAD) {
1664 if (html->Num_HEAD == 1) {
1665 /* match for the well formed start of HEAD section */
1666 if (html->Num_TITLE == 0)
1667 BUG_MSG("<head> lacks <title>.");
1668
1669 html->InFlags &= ~IN_HEAD;
1670
1671 /* charset is already set, load remote stylesheets now */
1672 for (int i = 0; i < html->cssUrls->size(); i++) {
1673 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1674 }
1675 } else if (html->Num_HEAD > 1) {
1676 --html->Num_HEAD;
1677 }
1678 } else {
1679 /* not reached, see Html_tag_cleanup_at_close() */
1680 }
1681}
1682
1687static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1688{
1689 /* fill the stash buffer so TITLE content can be ignored
1690 * when not valid, redundant or outside HEAD section */
1691 a_Html_stash_init(html);
1692
1693 if (html->InFlags & IN_HEAD) {
1694 if (html->Num_TITLE < UCHAR_MAX)
1695 ++html->Num_TITLE;
1696 if (html->Num_TITLE > 1)
1697 BUG_MSG("Redundant <title>.");
1698 } else {
1699 BUG_MSG("<title> must be inside <head> -- ignoring.");
1700 }
1701}
1702
1708{
1709 /* title is only valid inside HEAD */
1710 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1711 /* Ignore empty titles: <title></title> */
1712 char *title = html->Stash->str;
1713 if (!title || title[0] == '\0')
1714 return;
1715 a_UIcmd_set_page_title(html->bw, title);
1717 }
1718}
1719
1725static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1726{
1727 a_Html_stash_init(html);
1728 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1729}
1730
1735{
1736 /* eventually the stash will be sent to an interpreter for parsing */
1737}
1738
1743static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1744{
1745 const char *attrbuf;
1746
1747 html->loadCssFromStash = true;
1748
1749 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1750 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1751 BUG_MSG("<style> requires type attribute.");
1752 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1753 html->loadCssFromStash = false;
1754 }
1755 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1756 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1757 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1758 * but sec. 14.2.3 says that the attribute is case-insensitive.
1759 * TODO can be a comma-separated list.
1760 * TODO handheld.
1761 */
1762 html->loadCssFromStash = false;
1763 }
1764
1765 a_Html_stash_init(html);
1766 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1767}
1768
1773{
1775 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1776 html->Stash->len, CSS_ORIGIN_AUTHOR);
1777}
1778
1779/*
1780 * <BODY>
1781 */
1782static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1783{
1784 const char *attrbuf;
1785 int32_t color;
1786 style::Color *bgColor;
1787 style::StyleImage *bgImage;
1788 style::BackgroundRepeat bgRepeat;
1789 style::BackgroundAttachment bgAttachment;
1790 style::Length bgPositionX, bgPositionY;
1791
1792 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1793 if (!(html->InFlags & IN_BODY))
1794 html->InFlags |= IN_BODY;
1795 if (html->Num_BODY < UCHAR_MAX)
1796 ++html->Num_BODY;
1797
1798 if (html->Num_BODY > 1) {
1799 BUG_MSG("<body> was already open.");
1800 html->ReqTagClose = true;
1801 return;
1802 }
1803
1804 if (html->InFlags & IN_HEAD) {
1805 /* if we're here, it's bad XHTML, no need to recover */
1806 BUG_MSG("Unclosed <head>.");
1807 }
1808
1809 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1810 color = a_Html_color_parse(html, attrbuf, -1);
1811
1812 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1813 BUG_MSG("<body> bgcolor attribute is obsolete.");
1814
1815 if (color != -1)
1817 CSS_TYPE_COLOR, color);
1818 }
1819
1820 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1821 color = a_Html_color_parse(html, attrbuf, -1);
1822
1823 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1824 BUG_MSG("<body> text attribute is obsolete.");
1825
1826 if (color != -1)
1828 CSS_TYPE_COLOR, color);
1829 }
1830
1831 html->restyle ();
1832
1833 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1834 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1835 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1836 BUG_MSG("<body> link attribute is obsolete.");
1837 }
1838
1839 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1840 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1841 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1842 BUG_MSG("<body> vlink attribute is obsolete.");
1843 }
1844
1845 html->dw->setStyle (html->style ());
1846
1847 bgColor = html->styleEngine->backgroundColor ();
1848 if (bgColor)
1849 HT2LT(html)->setBgColor(bgColor);
1850
1851 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1852 &bgPositionX, &bgPositionY);
1853 if (bgImage)
1854 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1855 bgPositionY);
1856
1857 /* Determine a color for visited links.
1858 * This color is computed once per page and used for immediate feedback
1859 * when clicking a link.
1860 * On reload style including color for visited links is computed properly
1861 * according to CSS.
1862 */
1863 html->startElement (i_A);
1864 html->styleEngine->setPseudoVisited ();
1865 if (html->non_css_visited_color != -1) {
1867 html->non_css_visited_color);
1868 }
1869 html->visited_color = html->style ()->color->getColor ();
1870 html->styleEngine->endElement (i_A);
1871
1873 /* get a color that has a "safe distance" from text, link and bg */
1874 html->visited_color =
1876 html->style ()->color->getColor(),
1877 html->non_css_link_color,
1879 }
1880
1881
1882 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1883}
1884
1885/*
1886 * BODY
1887 */
1889{
1890 _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);
1891
1892 /* As some Tag soup pages use multiple BODY tags, this function
1893 * gets called only on EOF and upon and extra BODY open.
1894 * Also, we defer clearing the IN_BODY flag until IN_EOF */
1895}
1896
1897/*
1898 * <P>
1899 * TODO: what's the point between adding the parbreak before and
1900 * after the push?
1901 */
1902static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1903{
1904 CssPropertyList props;
1905
1906 a_Html_tag_set_align_attr (html, tag, tagsize);
1907}
1908
1909/*
1910 * <FRAME>, <IFRAME>
1911 * TODO: This is just a temporary fix while real frame support
1912 * isn't finished. Imitates lynx/w3m's frames.
1913 */
1914static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1915{
1916 const char *attrbuf;
1917 DilloUrl *url;
1918 CssPropertyList props;
1919
1920 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1921 return;
1922
1923 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1924 return;
1925
1927 /* visited frame */
1928 html->styleEngine->setPseudoVisited ();
1929 } else {
1930 /* unvisited frame */
1931 html->styleEngine->setPseudoLink ();
1932 }
1933
1935 Html_set_new_link(html,&url));
1936}
1937
1938static void
1939 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1940{
1941 const char *attrbuf;
1942 char *src;
1943 Textblock *textblock;
1944 Widget *bullet;
1945
1946 textblock = HT2TB(html);
1947
1948 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1949 return;
1950
1951 src = dStrdup(attrbuf);
1952
1953 textblock->addParbreak (5, html->wordStyle ());
1954
1955 bullet = new Bullet();
1956 textblock->addWidget(bullet, html->wordStyle ());
1957 textblock->addSpace(html->wordStyle ());
1958
1959 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1960 /* IFRAME usually comes with very long advertising/spying URLS,
1961 * to not break rendering we will force name="IFRAME" */
1962 textblock->addText ("IFRAME", html->wordStyle ());
1963
1964 } else {
1965 /* FRAME:
1966 * If 'name' tag is present use it, if not use 'src' value */
1967 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1968 textblock->addText (src, html->wordStyle ());
1969 } else {
1970 textblock->addText (attrbuf, html->wordStyle ());
1971 }
1972 }
1973
1974 textblock->addParbreak (5, html->wordStyle ());
1975
1976 dFree(src);
1977}
1978
1979/*
1980 * <FRAMESET>
1981 * TODO: This is just a temporary fix while real frame support
1982 * isn't finished. Imitates lynx/w3m's frames.
1983 */
1985 const char *tag, int tagsize)
1986{
1987 HT2TB(html)->addParbreak (9, html->wordStyle ());
1988 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
1989 Html_add_textblock(html, true, 5, false);
1990}
1991
1992/*
1993 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
1994 */
1995static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
1996{
1997 a_Html_tag_set_align_attr (html, tag, tagsize);
1998
1999 a_Html_stash_init(html);
2000 S_TOP(html)->parse_mode =
2002}
2003
2004/*
2005 * <BR>
2006 */
2007static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2008{
2009 HT2TB(html)->addLinebreak (html->wordStyle ());
2010}
2011
2012/*
2013 * <FONT>
2014 */
2015static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2016{
2017 const char *attrbuf;
2018 char *fontFamily = NULL;
2019 int32_t color;
2020
2021 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2023 color = html->visited_color;
2024 } else {
2025 /* use the tag-specified color */
2026 color = a_Html_color_parse(html, attrbuf, -1);
2027 }
2028 if (color != -1)
2030 CSS_TYPE_COLOR, color);
2031 }
2032
2033 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2034 fontFamily = dStrdup(attrbuf);
2036 CSS_TYPE_SYMBOL, fontFamily);
2037 }
2038
2039 dFree(fontFamily);
2040}
2041
2042/*
2043 * <ABBR>
2044 */
2045static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2046{
2047 const char *attrbuf;
2048
2050
2051 if (prefs.show_tooltip &&
2052 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2053
2055 attrbuf);
2056 }
2057}
2058
2062void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2063{
2064 char *width_ptr, *height_ptr;
2065 const char *attrbuf;
2068 int w = 0, h = 0;
2069
2070 if (prefs.show_tooltip &&
2071 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2073 attrbuf);
2074 }
2075 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2076 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2077 // Check for malicious values
2078 // TODO: the same for percentage and relative lengths.
2079 if (width_ptr) {
2080 l_w = a_Html_parse_length (html, width_ptr);
2081 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2082 CSS_LENGTH_VALUE(l_w) : 0);
2083 }
2084 if (height_ptr) {
2085 l_h = a_Html_parse_length (html, height_ptr);
2086 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2087 CSS_LENGTH_VALUE(l_h) : 0);
2088 }
2089 /* Check for suspicious image size request that would cause
2090 * an excessive amount of memory to be allocated for the
2091 * image buffer.
2092 * Be careful to avoid integer overflows during the checks.
2093 * There is an additional check in dw/image.cc to catch cases
2094 * where only one dimension is given and the image is scaled
2095 * preserving its original aspect ratio.
2096 * Size requests passed via CSS are also checked there.
2097 */
2098 if (w < 0 || h < 0 ||
2099 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2100 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2101 dFree(width_ptr);
2102 dFree(height_ptr);
2103 width_ptr = height_ptr = NULL;
2104 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2105 w, h);
2106 } else {
2113 }
2114
2115 /* TODO: we should scale the image respecting its ratio.
2116 * As the image size is not known at this time, maybe a flag
2117 * can be set to scale it later.
2118 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2119 [...]
2120 */
2121
2122 /* x_img is an index to a list of {url,image} pairs.
2123 * We know a_Html_image_new() will use size() as its next index */
2125 html->images->size());
2126
2127
2128 dFree(width_ptr);
2129 dFree(height_ptr);
2130}
2131
2132DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2133{
2134 bool load_now;
2135 char *alt_ptr;
2136 const char *attrbuf;
2137 DilloUrl *url;
2139
2140 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2141 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2142 return NULL;
2143
2144 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2145 if (!alt_ptr || !*alt_ptr) {
2146 dFree(alt_ptr);
2147 alt_ptr = dStrdup("[IMG]");
2148 }
2149
2150 dw::Image *dw = new dw::Image(alt_ptr);
2151 image =
2152 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0, 0);
2153
2155
2156 if (HT2TB(html)->getBgColor())
2157 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2158
2159 if (HT2TB(html)->getFgColor())
2160 image->fg_color = HT2TB(html)->getFgColor()->getColor();
2161
2163 hi->url = url;
2164 html->images->increase();
2165 html->images->set(html->images->size() - 1, hi);
2166
2167 load_now = prefs.load_images ||
2168 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2170
2171 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2172 // hi->image is NULL if dillo tries to load the image immediately
2173 hi->image = NULL;
2175 } else {
2176 // otherwise a reference is kept in html->images
2177 hi->image = image;
2178 }
2179
2180 dFree(alt_ptr);
2181 return image;
2182}
2183
2188 const DilloUrl *requester, DilloImage *Image)
2189{
2190 DilloWeb *Web;
2191 int ClientKey;
2192 /* Fill a Web structure for the cache query */
2193 Web = a_Web_new(bw, url, requester);
2194 Web->Image = Image;
2196 Web->flags |= WEB_Image;
2197 /* Request image data from the cache */
2198 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2199 a_Bw_add_client(bw, ClientKey, 0);
2200 a_Bw_add_url(bw, url);
2201 }
2202 return ClientKey != 0;
2203}
2204
2205static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2206{
2207 int space, border;
2208 const char *attrbuf;
2209
2210 a_Html_common_image_attrs(html, tag, tagsize);
2211
2212 /* Spacing to the left and right */
2213 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2214 space = strtol(attrbuf, NULL, 10);
2215 if (space > 0) {
2216 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2221 }
2222 }
2223
2224 /* Spacing at the top and bottom */
2225 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2226 space = strtol(attrbuf, NULL, 10);
2227 if (space > 0) {
2228 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2233 }
2234 }
2235
2236 /* Border */
2237 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2238 border = strtol(attrbuf, NULL, 10);
2239 if (border >= 0) {
2240 border = CSS_CREATE_LENGTH(border, CSS_LENGTH_TYPE_PX);
2249
2258 }
2259 }
2260
2261}
2262
2268static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2269{
2271 DilloUrl *usemap_url;
2272 const char *attrbuf;
2273
2274 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2275 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2276 return;
2277
2278 Image = a_Html_image_new(html, tag, tagsize);
2279 if (!Image)
2280 return;
2281
2282 usemap_url = NULL;
2283 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2284 /* TODO: usemap URLs outside of the document are not used. */
2285 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2286
2287 // At this point, we know that Image->ir represents an image
2288 // widget. Notice that the order of the casts matters, because of
2289 // multiple inheritance.
2290 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2291 HT2TB(html)->addWidget(dwi, html->style());
2292 HT2TB(html)->addBreakOption (html->style (), false);
2293
2294 /* Image maps */
2295 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2296 dwi->setIsMap();
2297 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2298 } else if (html->style ()->x_link != -1 &&
2299 usemap_url == NULL) {
2300 /* For simple links, we have to suppress the "image_pressed" signal.
2301 * This is overridden for USEMAP images. */
2302// a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2303 }
2304
2305 if (usemap_url) {
2306 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2307 a_Url_free (usemap_url);
2308 }
2309}
2310
2311/*
2312 * <map>
2313 */
2314static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2315{
2316 char *hash_name;
2317 const char *attrbuf;
2318 DilloUrl *url;
2319
2320 if (html->InFlags & IN_MAP) {
2321 BUG_MSG("Nested <map>.");
2322 } else {
2323 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2324 html->InFlags |= IN_MAP;
2325 hash_name = dStrconcat("#", attrbuf, NULL);
2326 url = a_Html_url_new(html, hash_name, NULL, 0);
2327 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2328 a_Url_free (url);
2329 dFree(hash_name);
2330 } else {
2331 BUG_MSG("<map> requires name attribute.");
2332 }
2333 }
2334}
2335
2340{
2341 /* This is a hack for the perhaps frivolous feature of drawing image map
2342 * shapes when there is no image to display. If this map is defined after
2343 * an image that has not been loaded (img != NULL), tell the image to
2344 * redraw. (It will only do so if it uses a map.)
2345 */
2346 for (int i = 0; i < html->images->size(); i++) {
2347 DilloImage *img = html->images->get(i)->image;
2348
2349 if (img) {
2350 // At this point, we know that img->ir represents an image
2351 // widget. (Really? Is this assumtion safe?) Notice that the
2352 // order of the casts matters, because of multiple
2353 // inheritance.
2355 dwi->forceMapRedraw();
2356 }
2357 }
2358 html->InFlags &= ~IN_MAP;
2359}
2360
2364static
2366{
2367 int coord;
2368 const char *tail = str;
2369 char *newtail = NULL;
2371
2372 while (1) {
2373 coord = strtol(tail, &newtail, 10);
2374 if (coord == 0 && newtail == tail)
2375 break;
2376 coords->increase();
2377 coords->set(coords->size() - 1, coord);
2378 while (isspace(*newtail))
2379 newtail++;
2380 if (!*newtail)
2381 break;
2382 if (*newtail != ',') {
2383 BUG_MSG("<area> coords must be integers separated by commas.");
2384 }
2385 tail = newtail + 1;
2386 }
2387
2388 return coords;
2389}
2390
2391/*
2392 * <AREA>
2393 */
2394static void
2395 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2396{
2397 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2398 types type;
2399 misc::SimpleVector<int> *coords = NULL;
2400 DilloUrl* url;
2401 const char *attrbuf;
2402 int link = -1;
2403 Shape *shape = NULL;
2404
2405 if (!(html->InFlags & IN_MAP)) {
2406 BUG_MSG("<area> not inside <map>.");
2407 return;
2408 }
2409 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2410
2411 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2412 /* the default shape is a rectangle */
2413 type = RECTANGLE;
2414 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2415 /* "default" is the background */
2416 type = BACKGROUND;
2417 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2418 type = CIRCLE;
2419 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2420 type = POLYGON;
2421 } else {
2422 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2423 type = UNKNOWN;
2424 }
2425 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2426 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2427 coords = Html_read_coords(html, attrbuf);
2428
2429 if (type == RECTANGLE) {
2430 if (coords->size() != 4)
2431 BUG_MSG("<area> rectangle must have four coordinate values.");
2432 if (coords->size() >= 4)
2433 shape = new Rectangle(coords->get(0),
2434 coords->get(1),
2435 coords->get(2) - coords->get(0),
2436 coords->get(3) - coords->get(1));
2437 } else if (type == CIRCLE) {
2438 if (coords->size() != 3)
2439 BUG_MSG("<area> circle must have three coordinate values.");
2440 if (coords->size() >= 3)
2441 shape = new Circle(coords->get(0), coords->get(1),
2442 coords->get(2));
2443 } else if (type == POLYGON) {
2444 Polygon *poly;
2445 int i;
2446 if (coords->size() % 2)
2447 BUG_MSG("<area> polygon with odd number of coordinates.");
2448 shape = poly = new Polygon();
2449 for (i = 0; i < (coords->size() / 2); i++)
2450 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2451 }
2452 delete(coords);
2453 }
2454 }
2455 if (shape != NULL || type == BACKGROUND) {
2456 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2457 url = a_Html_url_new(html, attrbuf, NULL, 0);
2458 dReturn_if_fail ( url != NULL );
2459
2460 link = Html_set_new_link(html, &url);
2461 }
2462 if (type == BACKGROUND)
2463 html->maps.setCurrentMapDefaultLink(link);
2464 else
2465 html->maps.addShapeToCurrentMap(shape, link);
2466 }
2467}
2468
2469/*
2470 * <OBJECT>
2471 * Simply provide a link if the object is something downloadable.
2472 */
2473static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2474{
2475 DilloUrl *url, *base_url = NULL;
2476 const char *attrbuf;
2477
2478 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2479 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2480 }
2481
2482 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2483 url = a_Html_url_new(html, attrbuf,
2484 URL_STR(base_url), (base_url != NULL));
2485 dReturn_if_fail ( url != NULL );
2486
2488 html->styleEngine->setPseudoVisited ();
2489 } else {
2490 html->styleEngine->setPseudoLink ();
2491 }
2492
2494 Html_set_new_link(html, &url));
2495 }
2497}
2498
2499static void Html_tag_content_object(DilloHtml *html, const char *tag,
2500 int tagsize)
2501{
2502 if (a_Html_get_attr(html, tag, tagsize, "data"))
2503 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2504}
2505
2506/*
2507 * <VIDEO>
2508 * Provide a link to the video.
2509 */
2510static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2511{
2512 DilloUrl *url;
2513 const char *attrbuf;
2514
2515 if (html->InFlags & IN_MEDIA) {
2516 MSG("<video> not handled when already inside a media element.\n");
2517 return;
2518 }
2519 /* TODO: poster attr */
2520
2521 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2522 url = a_Html_url_new(html, attrbuf, NULL, 0);
2523 dReturn_if_fail ( url != NULL );
2524
2526 html->styleEngine->setPseudoVisited ();
2527 } else {
2528 html->styleEngine->setPseudoLink ();
2529 }
2530
2532 Html_set_new_link(html, &url));
2533
2534 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2535 }
2536 html->InFlags |= IN_MEDIA;
2537}
2538
2539/*
2540 * <AUDIO>
2541 * Provide a link to the audio.
2542 */
2543static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2544{
2545 DilloUrl *url;
2546 const char *attrbuf;
2547
2548 if (html->InFlags & IN_MEDIA) {
2549 MSG("<audio> not handled when already inside a media element.\n");
2550 return;
2551 }
2552
2553 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2554 url = a_Html_url_new(html, attrbuf, NULL, 0);
2555 dReturn_if_fail ( url != NULL );
2556
2558 html->styleEngine->setPseudoVisited ();
2559 } else {
2560 html->styleEngine->setPseudoLink ();
2561 }
2562
2564 Html_set_new_link(html, &url));
2565
2566 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2567 }
2568 html->InFlags |= IN_MEDIA;
2569}
2570
2571/*
2572 * <SOURCE>
2573 * Media resource; provide a link to its address.
2574 */
2575static void Html_tag_open_source(DilloHtml *html, const char *tag,
2576 int tagsize)
2577{
2578 const char *attrbuf;
2579
2580 if (!(html->InFlags & IN_MEDIA)) {
2581 // Can also be inside a picture element.
2582 // BUG_MSG("<source> not inside a media element.");
2583 return;
2584 }
2585 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2586 BUG_MSG("<source> requires src attribute.");
2587 return;
2588 } else {
2589 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2590
2591 dReturn_if_fail ( url != NULL );
2592
2594 html->styleEngine->setPseudoVisited ();
2595 } else {
2596 html->styleEngine->setPseudoLink ();
2597 }
2599 Html_set_new_link(html, &url));
2600 }
2601}
2602
2603static void Html_tag_content_source(DilloHtml *html, const char *tag,
2604 int tagsize)
2605{
2606 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2607 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2608}
2609
2614{
2615 html->InFlags &= ~IN_MEDIA;
2616}
2617
2618/*
2619 * <EMBED>
2620 * Provide a link to embedded content.
2621 */
2622static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2623{
2624 const char *attrbuf;
2625
2626 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2627 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2628
2629 dReturn_if_fail ( url != NULL );
2630
2632 html->styleEngine->setPseudoVisited ();
2633 } else {
2634 html->styleEngine->setPseudoLink ();
2635 }
2636
2638 Html_set_new_link(html, &url));
2639 }
2640}
2641
2642static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2643{
2644 if (a_Html_get_attr(html, tag, tagsize, "src"))
2645 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2646}
2647
2651static const char* Html_get_javascript_link(DilloHtml *html)
2652{
2653 size_t i;
2654 char ch, *p1, *p2;
2655 Dstr *Buf = html->attr_data;
2656
2657 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2658 i = strcspn(Buf->str, "'\"");
2659 ch = Buf->str[i];
2660 if ((ch == '"' || ch == '\'') &&
2661 (p2 = strchr(Buf->str + i + 1 , ch))) {
2662 p1 = Buf->str + i;
2663 BUG_MSG("Link depends on javascript().");
2664 dStr_truncate(Buf, p2 - Buf->str);
2665 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2666 }
2667 }
2668 return Buf->str;
2669}
2670
2674static void Html_add_anchor(DilloHtml *html, const char *name)
2675{
2676 _MSG("Registering ANCHOR: %s\n", name);
2677 if (!HT2TB(html)->addAnchor (name, html->style ()))
2678 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2679 name);
2680 /*
2681 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2682 * differ only in case may not appear in the same document", but
2683 * "comparisons between fragment identifiers and anchor names must be
2684 * done by exact (case-sensitive) match." We ignore the case issue and
2685 * always test for exact matches. Moreover, what does uppercase mean
2686 * for Unicode characters outside the ASCII range?
2687 */
2688}
2689
2690/*
2691 * <A>
2692 */
2693static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2694{
2695 DilloUrl *url;
2696 const char *attrbuf;
2697
2698 /* TODO: add support for MAP with A HREF */
2699 html->InFlags |= IN_A;
2700 if (html->InFlags & IN_MAP)
2701 Html_tag_content_area(html, tag, tagsize);
2702
2703 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2704 /* if it's a javascript link, extract the reference. */
2705 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2706 attrbuf = Html_get_javascript_link(html);
2707
2708 url = a_Html_url_new(html, attrbuf, NULL, 0);
2709 dReturn_if_fail ( url != NULL );
2710
2712 html->InVisitedLink = true;
2713 html->styleEngine->setPseudoVisited ();
2714 if (html->non_css_visited_color != -1)
2717 html->non_css_visited_color);
2718 } else {
2719 html->styleEngine->setPseudoLink ();
2720 if (html->non_css_link_color != -1)
2723 html->non_css_link_color);
2724 }
2725
2727 Html_set_new_link(html, &url));
2728 }
2729 if (prefs.show_tooltip &&
2730 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2732 attrbuf);
2733 }
2734
2736
2737 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2738 char *nameVal;
2739 const char *id = html->styleEngine->getId ();
2740
2742 Html_check_name_val(html, attrbuf, "name");
2743
2744 nameVal = a_Url_decode_hex_str(attrbuf);
2745
2746 if (nameVal) {
2747 /* We compare the "id" value with the url-decoded "name" value */
2748 if (!id || strcmp(nameVal, id)) {
2749 if (id)
2750 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2751 id, nameVal);
2752 Html_add_anchor(html, nameVal);
2753 }
2754
2755 dFree(nameVal);
2756 }
2757 }
2758}
2759
2760/*
2761 * <A> close function
2762 */
2763static void Html_tag_close_a(DilloHtml *html)
2764{
2765 html->InFlags &= ~IN_A;
2766 html->InVisitedLink = false;
2767}
2768
2769/*
2770 * <BLOCKQUOTE>
2771 */
2773 const char *tag, int tagsize)
2774{
2775 Html_add_textblock(html, true, 9, false);
2776}
2777
2778/*
2779 * <Q>
2780 */
2781static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2782{
2783 /*
2784 * Left Double Quotation Mark, which is wrong in many cases, but
2785 * should at least be widely recognized.
2786 */
2787 const char *U201C = "\xe2\x80\x9c";
2788
2790 HT2TB(html)->addText (U201C, html->wordStyle ());
2791}
2792
2793/*
2794 * </Q>
2795 */
2796static void Html_tag_close_q(DilloHtml *html)
2797{
2798 /* Right Double Quotation Mark */
2799 const char *U201D = "\xe2\x80\x9d";
2800
2801 HT2TB(html)->addText (U201D, html->wordStyle ());
2802}
2803
2804/*
2805 * Handle the <UL> tag.
2806 */
2807static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2808{
2809 const char *attrbuf;
2810 ListStyleType list_style_type;
2811
2812 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2813
2814 /* list_style_type explicitly defined */
2815 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2816 list_style_type = LIST_STYLE_TYPE_DISC;
2817 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2818 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2819 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2820 list_style_type = LIST_STYLE_TYPE_SQUARE;
2821 else
2822 /* invalid value */
2823 list_style_type = LIST_STYLE_TYPE_DISC;
2824
2826 CSS_TYPE_ENUM, list_style_type);
2827 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2828 BUG_MSG("<ul> type attribute is obsolete.");
2829 }
2830
2831 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2832 S_TOP(html)->list_number = 0;
2833 S_TOP(html)->ref_list_item = NULL;
2834}
2835
2836/*
2837 * Handle the <DIR> or <MENU> tag.
2838 * (Deprecated and almost the same as <UL>)
2839 */
2840static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2841{
2843 HT2TB(html)->addParbreak (9, html->wordStyle ());
2844
2845 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2846 S_TOP(html)->list_number = 0;
2847 S_TOP(html)->ref_list_item = NULL;
2848
2850 BUG_MSG("Obsolete list type; use <ul> instead.");
2851}
2852
2853/*
2854 * Handle the <MENU> tag.
2855 */
2856static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2857{
2858 /* In another bit of ridiculous mess from the HTML5 world, the menu
2859 * element, which was deprecated in HTML4:
2860 * - does not appear at all in W3C's HTML5 spec
2861 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2862 * means something totally different than it did in the old days
2863 * (now it's for popup menus and toolbar menus rather than being a
2864 * sort of list).
2865 */
2866 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2867 Html_tag_open_dir(html, tag, tagsize);
2868}
2869
2870/*
2871 * Handle the <OL> tag.
2872 */
2873static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2874{
2875 const char *attrbuf;
2876 int n = 1;
2877
2878 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2879 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2880
2881 if (*attrbuf == '1')
2882 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2883 else if (*attrbuf == 'a')
2884 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2885 else if (*attrbuf == 'A')
2886 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2887 else if (*attrbuf == 'i')
2888 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2889 else if (*attrbuf == 'I')
2890 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2891
2893 CSS_TYPE_ENUM, listStyleType);
2894 }
2895
2896 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2897
2898 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2899 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2900 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2901 n = 0;
2902 }
2903 S_TOP(html)->list_number = n;
2904 S_TOP(html)->ref_list_item = NULL;
2905}
2906
2907/*
2908 * Handle the <LI> tag.
2909 */
2910static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2911{
2912 Style *style = html->style ();
2913 int *list_number;
2914 const char *attrbuf;
2915
2916 if (S_TOP(html)->list_type == HTML_LIST_NONE &&
2917 !(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
2918 /* In WHATWG's HTML5 and W3C's HTML 5.1, LI can appear within MENUs
2919 * of the toolbar type.
2920 */
2921 BUG_MSG("<li> outside <ul> or <ol>.");
2922 }
2923
2924 html->InFlags |= IN_LI;
2925
2926 /* Get our parent tag's variables (used as state storage) */
2927 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2928
2930 // ordered
2931 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2932 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2933 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2934 *list_number = 0;
2935 }
2936 }
2937}
2938
2939/*
2940 * Close <LI>.
2941 */
2943{
2944 html->InFlags &= ~IN_LI;
2945 ((ListItem *)html->dw)->flush ();
2946}
2947
2948/*
2949 * <HR>
2950 */
2951static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2952{
2953 char *width_ptr;
2954 const char *attrbuf;
2955 int32_t size = 0;
2956
2957 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2958 if (width_ptr) {
2959 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2960 BUG_MSG("<hr> width attribute is obsolete.");
2963 a_Html_parse_length (html, width_ptr));
2964 dFree(width_ptr);
2965 }
2966
2967 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2968 size = strtol(attrbuf, NULL, 10);
2969 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2970 BUG_MSG("<hr> size attribute is obsolete.");
2971 }
2972
2973 a_Html_tag_set_align_attr(html, tag, tagsize);
2974
2975 /* TODO: evaluate attribute */
2976 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2977 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2978 BUG_MSG("<hr> noshade attribute is obsolete.");
2987
2988 if (size <= 0)
2989 size = 1;
2990 }
2991
2992 if (size > 0) {
2993 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
2994 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
2996 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
2998 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3001 size_bottom);
3004 size_bottom);
3005 }
3006
3007}
3008
3009static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3010{
3011 Widget *hruler;
3012 HT2TB(html)->addParbreak (5, html->wordStyle ());
3013
3014 hruler = new Ruler();
3015 hruler->setStyle (html->style ());
3016 HT2TB(html)->addWidget (hruler, html->style ());
3017 HT2TB(html)->addParbreak (5, html->wordStyle ());
3018}
3019
3020/*
3021 * <DL>
3022 */
3023static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3024{
3025 /* may want to actually do some stuff here. */
3027 HT2TB(html)->addParbreak (9, html->wordStyle ());
3028}
3029
3030/*
3031 * <DT>
3032 */
3033static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3034{
3036 HT2TB(html)->addParbreak (9, html->wordStyle ());
3037}
3038
3039/*
3040 * <DD>
3041 */
3042static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3043{
3044 Html_add_textblock(html, true, 9, false);
3045}
3046
3047/*
3048 * <PRE>
3049 */
3050static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3051{
3053 HT2TB(html)->addParbreak (9, html->wordStyle ());
3054
3055 html->InFlags |= IN_PRE;
3056}
3057
3058/*
3059 * Custom close for <PRE>
3060 */
3062{
3063 html->InFlags &= ~IN_PRE;
3064}
3065
3070static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3071{
3072 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3073 /* HTML5 doesn't say anything about excluding elements */
3074 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3075 "sup", "font", "basefont", NULL};
3076 static int ei_set[10], i;
3077
3078 /* initialize array */
3079 if (!ei_set[0])
3080 for (i = 0; es_set[i]; ++i)
3081 ei_set[i] = a_Html_tag_index(es_set[i]);
3082
3083 for (i = 0; ei_set[i]; ++i)
3084 if (tag_idx == ei_set[i])
3085 return 1;
3086 }
3087 return 0;
3088}
3089
3093static void Html_update_content_type(DilloHtml *html, const char *content)
3094{
3095 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3096 "meta");
3097 /* Cannot ask cache whether the content type was changed, as
3098 * this code in another bw might have already changed it for us.
3099 */
3100 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3101 html->stop_parser = true; /* The cache buffer is no longer valid */
3102 a_UIcmd_repush(html->bw);
3103 }
3104}
3105
3119static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3120{
3121 const char meta_template[] =
3122"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3123" <td bgcolor='#8899aa' width='100%%'>\n"
3124" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3125" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3126" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3127" <a href='%s'>here</a>%s</td></tr></table><br>\n";
3128
3129 const char *p, *equiv, *charset, *content;
3130 char delay_str[64], *mr_url;
3131 DilloUrl *new_url;
3132 int delay;
3133
3134 /* only valid inside HEAD */
3135 if (!(html->InFlags & IN_HEAD)) {
3136 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3137 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3138 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3139 * in the body.
3140 */
3141 BUG_MSG("This <meta> element must be inside the HEAD section.");
3142 }
3143 return;
3144 }
3145
3146 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3147 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3148 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3149
3150 /* Get delay, if present, and make a message with it */
3151 if ((delay = strtol(content, NULL, 0))) {
3152 snprintf(delay_str, 64, " after %d second%s.",
3153 delay, (delay > 1) ? "s" : "");
3154 } else {
3155 sprintf(delay_str, ".");
3156 }
3157 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3158 int has_url = 1;
3159 if ((p = dStriAsciiStr(content, "url=")))
3160 content = p + strlen("url=");
3161 else if ((p = strstr(content, ";")))
3162 content = p + strlen(";");
3163 else
3164 has_url = 0;
3165
3166 if (has_url) {
3167 /* Handle the case of a quoted URL */
3168 if (*content == '"' || *content == '\'') {
3169 if ((p = strchr(content + 1, *content)))
3170 mr_url = dStrndup(content + 1, p - content - 1);
3171 else
3172 mr_url = dStrdup(content + 1);
3173 } else {
3174 mr_url = dStrdup(content);
3175 }
3176 } else {
3177 mr_url = dStrdup("");
3178 }
3179
3180 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3181
3182 if (a_Url_cmp(html->base_url, new_url) == 0) {
3183 /* redirection loop, or empty url string: ignore */
3184 BUG_MSG("<meta> refresh: %s.",
3185 *mr_url ? "redirection loop" : "no target URL");
3186 } else if (delay == 0) {
3187 /* zero-delay redirection */
3188 html->stop_parser = true;
3189 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3190 a_UIcmd_set_msg(html->bw,
3191 "WARNING: local URL with META refresh. Aborting.");
3192 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3193 a_UIcmd_redirection0((void*)html->bw, new_url);
3194 }
3195 } else {
3196 /* Send a custom HTML message.
3197 * TODO: This is a hairy hack,
3198 * It'd be much better to build a widget. */
3199 Dstr *ds_msg = dStr_sized_new(256);
3200 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3201 {
3202 int o_InFlags = html->InFlags;
3203 int o_TagSoup = html->TagSoup;
3204 html->InFlags = IN_BODY + IN_META_HACK;
3205 html->TagSoup = false;
3206 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3207 html->TagSoup = o_TagSoup;
3208 html->InFlags = o_InFlags;
3209 }
3210 dStr_free(ds_msg, 1);
3211 }
3212 a_Url_free(new_url);
3213 dFree(mr_url);
3214
3215 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3216 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3217 _MSG("Html_tag_open_meta: content={%s}\n", content);
3218 Html_update_content_type(html, content);
3219 }
3220 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3221 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3222 char *content = dStrconcat("text/html; charset=", charset, NULL);
3223
3224 Html_update_content_type(html, content);
3225 dFree(content);
3226 }
3227}
3228
3232static void Html_css_load_callback(int Op, CacheClient_t *Client)
3233{
3234 _MSG("Html_css_load_callback: Op=%d\n", Op);
3235 if (Op) { /* EOF */
3236 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3237 /* Repush when we've got them all */
3238 if (--bw->NumPendingStyleSheets == 0)
3240 }
3241}
3242
3247{
3248 char *data;
3249 int len;
3250
3251 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3252
3253 _MSG("Html_load_stylesheet: ");
3255 a_Capi_get_buf(url, &data, &len)) {
3256 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3257 if (strncmp("@charset \"", data, 10) == 0) {
3258 char *endq = strchr(data+10, '"');
3259
3260 if (endq && (endq - data <= 51)) {
3261 /* IANA limits charset names to 40 characters */
3262 char *content_type;
3263
3264 *endq = '\0';
3265 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3266 *endq = '"';
3267 a_Capi_unref_buf(url);
3270 a_Capi_get_buf(url, &data, &len);
3271 }
3272 }
3273 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3274 a_Capi_unref_buf(url);
3275 } else {
3276 /* Fill a Web structure for the cache query */
3277 int ClientKey;
3278 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3279 Web->flags |= WEB_Stylesheet;
3280 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3281 ++html->bw->NumPendingStyleSheets;
3282 a_Bw_add_client(html->bw, ClientKey, 0);
3283 a_Bw_add_url(html->bw, url);
3284 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3285 }
3286 }
3287 _MSG("\n");
3288}
3289
3298static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3299{
3300 DilloUrl *url;
3301 const char *attrbuf;
3302
3303 //char *tag_str = dStrndup(tag, tagsize);
3304 //MSG("Html_tag_open_link(): %s\n", tag_str);
3305 //dFree(tag_str);
3306
3307 /* When viewing suspicious HTML email, don't load LINK */
3309
3310 /* Ignore LINK outside HEAD */
3311 if (!(html->InFlags & IN_HEAD)) {
3312 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3313 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3314 /* With the HTML 5.1 draft spec, link with itemprop may appear
3315 * in the body.
3316 */
3317 BUG_MSG("This <link> element must be inside the HEAD section.");
3318 }
3319 return;
3320 }
3321 /* Remote stylesheets enabled? */
3323 /* CSS stylesheet link */
3324 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3325 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3326 return;
3327
3328 /* IMPLIED attributes? */
3329 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3330 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3331 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3332 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3333 return;
3334
3335 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3336 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3337 return;
3338
3339 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3340
3341 html->addCssUrl(url);
3342 a_Url_free(url);
3343}
3344
3348static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3349{
3350 const char *attrbuf;
3351 DilloUrl *BaseUrl;
3352
3353 if (html->InFlags & IN_HEAD) {
3354 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3355 bool_t html5 = html->DocType == DT_HTML &&
3356 html->DocTypeVersion >= 5.0f;
3357
3358 BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
3359 a_Html_url_new(html, attrbuf, "", 1);
3360
3361 if (html5 || URL_SCHEME_(BaseUrl)) {
3362 /* Pass the URL_SpamSafe flag to the new base url */
3364 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3365 a_Url_free(html->base_url);
3366 html->base_url = BaseUrl;
3367 } else {
3368 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3369 a_Url_free(BaseUrl);
3370 }
3371 }
3372 } else {
3373 BUG_MSG("<base> not inside HEAD section.");
3374 }
3375}
3376
3377static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3378{
3380}
3381
3382/*
3383 * <SPAN>
3384 */
3385static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3386{
3387 const char *attrbuf;
3388
3390
3391 if (prefs.show_tooltip &&
3392 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3393
3395 attrbuf);
3396 }
3397}
3398
3399/*
3400 * html5 sectioning stuff: article aside nav section header footer
3401 */
3402static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3403 int tagsize)
3404{
3405 const char *attrbuf;
3406
3407 if (prefs.show_tooltip &&
3408 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3409
3411 attrbuf);
3412 }
3413}
3414
3415/*
3416 * <DIV> (TODO: make a complete implementation)
3417 */
3418static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3419{
3420 a_Html_tag_set_align_attr (html, tag, tagsize);
3421 Html_tag_open_sectioning(html, tag, tagsize);
3422}
3423
3428{
3429 HT2TB(html)->addParbreak (9, html->wordStyle ());
3430}
3431
3432/*
3433 * <WBR> "The wbr element represents a line break opportunity."
3434 */
3435static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3436{
3437 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3438}
3439
3440
3468static const TagInfo Tags[] = {
3469 {"a", B8(01011),'R', Html_tag_open_a, NULL, Html_tag_close_a},
3470 {"abbr", B8(01011),'R', Html_tag_open_abbr, NULL, NULL},
3471 /* acronym 010101 -- obsolete in HTML5 */
3472 {"address", B8(01110),'R', Html_tag_open_default, NULL, Html_tag_close_par},
3473 {"area", B8(01001),'F', Html_tag_open_default, Html_tag_content_area, NULL},
3474 {"article", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3475 {"aside", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3476 {"audio", B8(01111),'R', Html_tag_open_audio, NULL, Html_tag_close_media},
3477 {"b", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3478 {"base", B8(10001),'F', Html_tag_open_base, NULL, NULL},
3479 /* basefont 010001 -- obsolete in HTML5 */
3480 /* bdo 010101 */
3481 {"big", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3482 {"blockquote", B8(01110),'R', Html_tag_open_blockquote, NULL, NULL},
3483 {"body", B8(01110),'O', Html_tag_open_body, NULL, Html_tag_close_body},
3484 {"br", B8(01001),'F', Html_tag_open_default, Html_tag_content_br, NULL},
3485 {"button", B8(01111),'R', Html_tag_open_button,NULL,Html_tag_close_button},
3486 /* caption */
3487 {"center", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3488 {"cite", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3489 {"code", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3490 /* col 010010 'F' */
3491 /* colgroup */
3492 {"dd", B8(01110),'O', Html_tag_open_dd, NULL, NULL},
3493 {"del", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3494 {"dfn", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3495 {"dir", B8(01100),'R', Html_tag_open_dir, NULL, Html_tag_close_par},
3496 /* TODO: complete <div> support! */
3497 {"div", B8(01110),'R', Html_tag_open_div, NULL, NULL},
3498 {"dl", B8(01100),'R', Html_tag_open_dl, NULL, Html_tag_close_par},
3499 {"dt", B8(01010),'O', Html_tag_open_dt, NULL, Html_tag_close_par},
3500 {"em", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3501 {"embed", B8(01001),'F', Html_tag_open_embed, Html_tag_content_embed, NULL},
3502 /* fieldset */
3503 {"figcaption", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3504 {"figure", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3505 {"font", B8(01011),'R', Html_tag_open_font, NULL, NULL},
3506 {"footer", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3507 {"form", B8(01110),'R', Html_tag_open_form, NULL, Html_tag_close_form},
3508 {"frame", B8(01000),'F', Html_tag_open_frame, Html_tag_content_frame, NULL},
3509 {"frameset", B8(01110),'R', Html_tag_open_default, Html_tag_content_frameset,
3510 NULL},
3511 {"h1", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3512 {"h2", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3513 {"h3", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3514 {"h4", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3515 {"h5", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3516 {"h6", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3517 {"head", B8(10111),'O', Html_tag_open_head, NULL, Html_tag_close_head},
3518 {"header", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3519 {"hr", B8(01000),'F', Html_tag_open_hr, Html_tag_content_hr, NULL},
3520 {"html", B8(00110),'O', Html_tag_open_html, NULL, Html_tag_close_html},
3521 {"i", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3522 {"iframe", B8(01111),'R', Html_tag_open_frame, Html_tag_content_frame, NULL},
3523 {"img", B8(01001),'F', Html_tag_open_img, Html_tag_content_img, NULL},
3524 {"input", B8(01001),'F', Html_tag_open_input, NULL, NULL},
3525 {"ins", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3526 {"isindex", B8(11001),'F', Html_tag_open_isindex, NULL, NULL},
3527 {"kbd", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3528 /* label 010101 */
3529 /* legend 01?? */
3530 {"li", B8(01110),'O', Html_tag_open_li, NULL, Html_tag_close_li},
3531 {"link", B8(10001),'F', Html_tag_open_link, NULL, NULL},
3532 {"main", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3533 {"map", B8(01101),'R', Html_tag_open_default, Html_tag_content_map,
3535 {"mark", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3536 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3537 {"menu", B8(01100),'R', Html_tag_open_menu, NULL, Html_tag_close_par},
3538 {"meta", B8(11001),'F', Html_tag_open_meta, NULL, NULL},
3539 {"nav", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3540 /* noframes 1011 -- obsolete in HTML5 */
3541 /* noscript 1011 */
3542 {"object", B8(11111),'R', Html_tag_open_object, Html_tag_content_object,NULL},
3543 {"ol", B8(01100),'R', Html_tag_open_ol, NULL, NULL},
3544 {"optgroup", B8(01011),'O', Html_tag_open_optgroup, NULL,
3546 {"option", B8(01001),'O', Html_tag_open_option, NULL, Html_tag_close_option},
3547 {"p", B8(01010),'O', Html_tag_open_p, NULL, NULL},
3548 /* param 010001 'F' */
3549 {"pre", B8(01010),'R', Html_tag_open_pre, NULL, Html_tag_close_pre},
3550 {"q", B8(01011),'R', Html_tag_open_q, NULL, Html_tag_close_q},
3551 {"s", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3552 {"samp", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3553 {"script", B8(11101),'R', Html_tag_open_script,NULL,Html_tag_close_script},
3554 {"section", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3555 {"select", B8(01011),'R', Html_tag_open_select,NULL,Html_tag_close_select},
3556 {"small", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3557 {"source", B8(01001),'F', Html_tag_open_source, Html_tag_content_source,NULL},
3558 {"span", B8(01011),'R', Html_tag_open_span, NULL, NULL},
3559 {"strike", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3560 {"strong", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3561 {"style", B8(10011),'R', Html_tag_open_style, NULL, Html_tag_close_style},
3562 {"sub", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3563 {"sup", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3564 {"table", B8(01100),'R', Html_tag_open_table, Html_tag_content_table, NULL},
3565 {"tbody", B8(01010),'O', Html_tag_open_tbody, Html_tag_content_tbody, NULL},
3566 {"td", B8(01110),'O', Html_tag_open_td, Html_tag_content_td, NULL},
3567 {"textarea", B8(01011),'R', Html_tag_open_textarea, Html_tag_content_textarea,
3569 {"tfoot", B8(01010),'O', Html_tag_open_tfoot, NULL, NULL},
3570 {"th", B8(01110),'O', Html_tag_open_th, Html_tag_content_th, NULL},
3571 {"thead", B8(01010),'O', Html_tag_open_thead, NULL, NULL},
3572 {"title", B8(10011),'R', Html_tag_open_title, NULL, Html_tag_close_title},
3573 {"tr", B8(01100),'O', Html_tag_open_tr, Html_tag_content_tr, NULL},
3574 {"tt", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3575 {"u", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3576 {"ul", B8(01100),'R', Html_tag_open_ul, NULL, NULL},
3577 {"var", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3578 {"video", B8(01111),'R', Html_tag_open_video, NULL, Html_tag_close_media},
3579 {"wbr", B8(01011),'F', Html_tag_open_default, Html_tag_content_wbr, NULL}
3580};
3581#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3582
3583/* Only available in C++11 and up */
3584#if __cpp_static_assert
3585static_assert(NTAGS == HTML_NTAGS,
3586 "Mismatch between number of tags in Tags and HTML_NTAGS");
3587#endif
3588
3589/*
3590 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3591 * with tag from taglist (lowercase, zero ended string) [p2]
3592 * Return value: as strcmp()
3593 */
3594static int Html_tag_compare(const char *p1, const char *p2)
3595{
3596 while ( *p2 ) {
3597 if (D_ASCII_TOLOWER(*p1) != *p2)
3598 return(D_ASCII_TOLOWER(*p1) - *p2);
3599 ++p1;
3600 ++p2;
3601 }
3602 return !strchr(" >/\n\r\t", *p1);
3603}
3604
3609int a_Html_tag_index(const char *tag)
3610{
3611 int low, high, mid, cond;
3612
3613 /* Binary search */
3614 low = 0;
3615 high = NTAGS - 1; /* Last tag index */
3616 while (low <= high) {
3617 mid = (low + high) / 2;
3618 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3619 high = mid - 1;
3620 else if (cond > 0)
3621 low = mid + 1;
3622 else
3623 return mid;
3624 }
3625 return -1;
3626}
3627
3636static int Html_triggers_optional_close(int old_idx, int cur_idx)
3637{
3638 int Flags = Tags[cur_idx].Flags;
3639 if (old_idx == i_P || old_idx == i_DT) {
3640 /* P and DT are closed by block elements (i.e. non inline)*/
3641 return (!(Flags & 1));
3642 } else if (old_idx == i_LI) {
3643 /* LI closes LI
3644 * Note: non-flow should also close it, but FF does not. */
3645 return (cur_idx == i_LI);
3646 } else if (old_idx == i_TD || old_idx == i_TH) {
3647 /* TD and TH are closed by: TD, TH and TR.
3648 * Note: non-flow should also close it, but FF does not. */
3649 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR);
3650 } else if (old_idx == i_TR) {
3651 /* TR closes TR */
3652 return (cur_idx == i_TR);
3653 } else if (old_idx == i_DD) {
3654 /* DD is closed by DD and DT */
3655 return (cur_idx == i_DD || cur_idx == i_DT);
3656 } else if (old_idx == i_OPTGROUP) {
3657 /* i_OPTGROUP can only contain OPTION */
3658 return (cur_idx != i_OPTION);
3659 } else if (old_idx == i_OPTION) {
3660 return 1; // OPTION always needs close
3661 }
3662
3663 /* Don't close HTML, HEAD and BODY. They're handled by Html_test_section().
3664 * TODO: TBODY is pending */
3665 return 0;
3666}
3667
3675static inline int Html_forbids_cross_nesting(const int InFlags,
3676 const int new_idx)
3677{
3678 int f = InFlags, ni = new_idx, oi = -1;
3679 if (f & (IN_A | IN_BUTTON | IN_SELECT | IN_TEXTAREA) &&
3680 (ni == i_A || ni == i_BUTTON || ni == i_SELECT || ni == i_TEXTAREA))
3681 oi = (f & IN_A ? i_A : f & IN_BUTTON ? i_BUTTON : f & IN_SELECT ?
3682 i_SELECT : f & IN_TEXTAREA ? i_TEXTAREA : 0);
3683 return oi;
3684}
3685
3694static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx,
3695 int new_idx, int fi, char op)
3696{
3697 int s_top, ni = new_idx;
3698 while ((s_top = html->stack->size() - 1) >= s_idx) {
3699 int toptag_idx = S_TOP(html)->tag_idx;
3700 TagInfo toptag = Tags[toptag_idx];
3701
3702 if (fi >= 0) {
3703 // forbidden nesting
3704 if (toptag_idx != fi)
3705 BUG_MSG(" Nesting cleanup - forcing close of open tag: <%s>.",
3706 toptag.name);
3707 } else if (s_top == s_idx && op == 'c') {
3708 // target tag, no bug when closing.
3709 } else if (toptag.EndTag == 'O') {
3710 // optional close, that's OK
3711 } else if ((!(toptag.Flags & 4) &&
3712 (Tags[ni].Flags & 4 || !(Tags[ni].Flags & 1))) ||
3713 (Tags[ni].Flags & 1 && !(toptag.Flags & 2))) {
3714 // block {element, container} in non block container or
3715 // inline element in non inline container
3716 BUG_MSG((op == 'o') ?
3717 "Bad nesting: <%s> can't contain <%s>. -- closing <%s>." :
3718 "<%s> needs to be closed before </%s>. -- closing <%s>.",
3719 toptag.name, Tags[ni].name, toptag.name);
3720 } else {
3721 BUG_MSG(
3722 "<%s> should have been closed before </%s>. -- closing <%s>.",
3723 toptag.name, Tags[ni].name, toptag.name);
3724 }
3725 _MSG("op(%c): %s s_top=%d s_idx=%d\n", op, toptag.name, s_top, s_idx);
3726 if (toptag_idx == i_BODY &&
3727 !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
3728 (s_idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
3729 break; // only pop {BODY,HTML} upon EOF or redundancy
3730 }
3731 if (toptag.close)
3732 toptag.close(html);
3733 Html_real_pop_tag(html);
3734 }
3735}
3736
3746static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
3747{
3748 if (!html->TagSoup)
3749 return;
3750
3751 int s_top = html->stack->size() - 1, s_idx;
3752 int fi = Html_forbids_cross_nesting(html->InFlags, ni);
3753 for (s_idx = s_top; s_idx > 0; --s_idx) {
3754 int ti = html->stack->getRef(s_idx)->tag_idx;
3755
3756 if (fi >= 0) {
3757 // forbidden cross nesting found
3758 if (ti != fi)
3759 continue; // don't allow, close
3760 --s_idx;
3761 BUG_MSG("Forbidden nesting: <%s> can't contain <%s>. -- closing "
3762 "<%s>.", Tags[fi].name, Tags[ni].name, Tags[fi].name);
3763
3764 } else if ((html->InFlags & IN_PRE) && ni == i_HR) {
3765 break; // allow Apache's bad HTML directory listings...
3766
3767 } else if (Tags[ti].EndTag == 'O') { // Element with optional close
3768 if (Html_triggers_optional_close(ti, ni))
3769 continue; // close
3770 } else if (!(Tags[ni].Flags & 1) && !(Tags[ti].Flags & 4)) {
3771 // Block element over a NON block container
3772 if (ti == i_A && html->DocTypeVersion >= 5.0f)
3773 break;
3774 continue; // close
3775 }
3776
3777 break;
3778 }
3779
3780 if (s_idx < s_top)
3781 Html_tag_cleanup_to_idx(html, s_idx + 1, ni, fi, 'o');
3782}
3783
3796static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
3797{
3798 int stack_idx, tag_idx, matched = 0, expected = 0;
3799 TagInfo new_tag = Tags[new_idx];
3800
3801 /* Look for the candidate tag to close */
3802 stack_idx = html->stack->size();
3803 while (--stack_idx) {
3804 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
3805 if (tag_idx == new_idx) {
3806 /* matching tag found */
3807 matched = 1;
3808 break;
3809 } else if (Tags[tag_idx].EndTag == 'O') {
3810 /* close elements with optional close */
3811 continue;
3812 } else if ((new_idx == i_A && html->InFlags & IN_A) ||
3813 (new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
3814 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
3815 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
3816 /* Let these elements close anything left open inside them */
3817 continue;
3818 } else if (Tags[new_idx].Flags & 4 && // Block container
3819 Tags[stack_idx].Flags & 3) { // Inline element or container
3820 /* Let a block container close inline elements left open inside it. */
3821 continue;
3822 } else {
3823 /* this is the tag that should have been closed */
3824 expected = 1;
3825 break;
3826 }
3827 }
3828
3829 if (matched) {
3830 Html_tag_cleanup_to_idx(html, stack_idx, new_idx, -1, 'c');
3831 } else if (expected) {
3832 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
3833 new_tag.name, Tags[tag_idx].name);
3834 } else {
3835 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
3836 }
3837}
3838
3843static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3844{
3845 const char *tag;
3846 int tag_idx;
3847
3848 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3849 BUG_MSG("The required DOCTYPE declaration is missing. "
3850 "Handling as HTML4.");
3851
3852 if (!(html->InFlags & IN_HTML)) {
3853 tag = "<html>";
3854 tag_idx = a_Html_tag_index(tag + 1);
3855 if (tag_idx != new_idx || IsCloseTag) {
3856 /* implicit open */
3857 Html_force_push_tag(html, tag_idx);
3858 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3859 Tags[tag_idx].open (html, tag, strlen(tag));
3860 }
3861 }
3862
3863 if (Tags[new_idx].Flags & 16) {
3864 /* head element */
3865 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3866 tag = "<head>";
3867 tag_idx = a_Html_tag_index(tag + 1);
3868 if (tag_idx != new_idx || IsCloseTag) {
3869 /* implicit open of the head element */
3870 Html_force_push_tag(html, tag_idx);
3871 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3872 Tags[tag_idx].open (html, tag, strlen(tag));
3873 }
3874 }
3875
3876 } else if (Tags[new_idx].Flags & 8) {
3877 /* body element */
3878 if (html->InFlags & IN_HEAD) {
3879 tag = "</head>";
3880 tag_idx = a_Html_tag_index(tag + 2);
3881 Html_tag_cleanup_at_close(html, tag_idx);
3882 }
3883 tag = "<body>";
3884 tag_idx = a_Html_tag_index(tag + 1);
3885 if (tag_idx != new_idx || IsCloseTag) {
3886 /* implicit open */
3887 Html_force_push_tag(html, tag_idx);
3888 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3889 Tags[tag_idx].open (html, tag, strlen(tag));
3890 }
3891 }
3892}
3893
3897static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3898{
3899 const char *attrbuf;
3900 char lang[3];
3901
3902 if (tagsize >= 8 && /* length of "<t id=i>" */
3903 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3904 /* According to the SGML declaration of HTML 4, all NAME values
3905 * occuring outside entities must be converted to uppercase
3906 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3907 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3908 * So we don't do it and hope for better specs in the future ...
3909 */
3910 Html_check_name_val(html, attrbuf, "id");
3911
3912 html->styleEngine->setId(attrbuf);
3913 }
3914
3915 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3916 /* length of "<t class=i>" or "<t style=i>" */
3917 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3918 if (attrbuf)
3919 html->styleEngine->setClass (attrbuf);
3920
3921 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3922 if (attrbuf)
3923 html->styleEngine->setStyle (attrbuf);
3924 }
3925
3926 /* handle "xml:lang" and "lang" attributes
3927 * We use only the first two chars of the value to deal with
3928 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3929 */
3930 memset(lang, 0, sizeof(lang));
3931 if (tagsize >= 14) {
3932 /* length of "<t xml:lang=i>" */
3933 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3934 if (attrbuf)
3935 strncpy(lang, attrbuf, 2);
3936 }
3937 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3938 /* length of "<t lang=i>" */
3939 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3940 if (attrbuf)
3941 strncpy(lang, attrbuf, 2);
3942 }
3943 if (lang[0])
3945}
3946
3951static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3952{
3953 static int indexes[9] = {-1};
3954
3955 if (indexes[0] == -1) {
3956 indexes[0] = a_Html_tag_index("dir");
3957 indexes[1] = a_Html_tag_index("frame");
3958 indexes[2] = a_Html_tag_index("frameset");
3959 indexes[3] = a_Html_tag_index("isindex");
3960 indexes[4] = a_Html_tag_index("strike");
3961 indexes[5] = a_Html_tag_index("big");
3962 indexes[6] = a_Html_tag_index("center");
3963 indexes[7] = a_Html_tag_index("font");
3964 indexes[8] = a_Html_tag_index("tt");
3965 }
3966 for (int i = 0; i < 9; i++) {
3967 if (indexes[i] == ni) {
3968 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3969 break;
3970 }
3971 }
3972}
3973
3975{
3977 false /* Perhaps true for widgets oof? */);
3978}
3979
3981{
3982 Html_add_textblock(html, false, 0, true);
3983}
3984
3986{
3987 Style *style = html->style ();
3988 Style *wordStyle = html->wordStyle ();
3989 Widget **ref_list_item;
3990 ListItem *list_item;
3991 int *list_number;
3992 char buf[16];
3993
3994 /* Get our parent tag's variables (used as state storage) */
3995 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
3996 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
3997
3998 HT2TB(html)->addParbreak (0, wordStyle);
3999
4000 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
4001 HT2TB(html)->addWidget (list_item, style);
4002 HT2TB(html)->addParbreak (0, wordStyle);
4003 *ref_list_item = list_item;
4004 S_TOP(html)->textblock = html->dw = list_item;
4005
4007 // none
4009 // ordered
4010 numtostr((*list_number)++, buf, 16, style->listStyleType);
4011 list_item->initWithText (buf, wordStyle);
4012 } else {
4013 // unordered
4014 list_item->initWithWidget (new Bullet(), wordStyle);
4015 }
4016}
4017
4023static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
4024{
4025 int ti, ni; /* stack tag index and new tag index */
4026 char *start = tag + 1; /* discard the '<' */
4027 int IsCloseTag = (*start == '/');
4028
4029 dReturn_if (html->stop_parser == true);
4030
4031 ni = a_Html_tag_index(start + IsCloseTag);
4032 if (ni == -1) {
4033 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
4034 if (!(html->InFlags & IN_HTML)) {
4035 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
4036 Html_parse_doctype(html, tag, tagsize);
4037 }
4038 /* Ignore unknown tags */
4039 return;
4040 }
4041 _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);
4042
4043 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
4044 Html_check_html5_obsolete(html, ni);
4045
4046 int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
4047 if (i == 1 || (i == 2 && ni != i_HTML))
4048 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
4049 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
4050
4051 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
4052 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
4053 Html_test_section(html, ni, IsCloseTag);
4054
4055 /* Tag processing */
4056 ti = S_TOP(html)->tag_idx;
4057 switch (IsCloseTag) {
4058 case 0:
4059 /* Open function */
4060
4061 /* Cleanup before opening a new tag */
4062 if (ti != -1)
4064
4065 /* TODO: this is only raising a warning, but allows the element.
4066 * Note: Apache uses IMG inside PRE. */
4067 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
4068 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
4069
4070 /* Push the tag into the stack */
4071 Html_push_tag(html, ni);
4072
4073 html->startElement (ni);
4074 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
4075
4076 /* Parse attributes that can appear on any tag */
4077 Html_parse_common_attrs(html, tag, tagsize);
4078
4079 /* Call the open function for this tag */
4080 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
4081 Tags[ni].open (html, tag, tagsize);
4082
4083 if (! S_TOP(html)->display_none) {
4084 switch (html->style ()->display) {
4085 case DISPLAY_BLOCK:
4086 Html_display_block(html);
4087 break;
4090 break;
4091 case DISPLAY_LIST_ITEM:
4093 break;
4094 case DISPLAY_NONE:
4095 S_TOP(html)->display_none = true;
4096 break;
4097 case DISPLAY_INLINE:
4098 if (html->style()->vloat != FLOAT_NONE)
4099 Html_display_block(html);
4100 break;
4101 default:
4102 break;
4103 }
4104
4105 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4106 Tags[ni].content (html, tag, tagsize);
4107 }
4108 }
4109
4110 if (html->stop_parser)
4111 break;
4112
4113 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4114 /* don't change anything */
4115 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4116 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4117 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4118 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4119 html->pre_column = 0;
4120 html->PreFirstChar = true;
4121 }
4122
4123 if (html->styleEngine->getId ())
4124 Html_add_anchor(html, html->styleEngine->getId ());
4125
4126 /* Request immediate close for elements with forbidden close tag. */
4127 /* TODO: XHTML always requires close tags. A simple implementation
4128 * of the commented clause below will make it work. */
4129 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4130 html->ReqTagClose = true;
4131
4132 /* Don't break! Open tags may also close themselves */
4133 /* fallthrough */
4134
4135 default:
4136 /* Close function */
4137
4138 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4139 if (*start == '/' || /* </x> */
4140 html->ReqTagClose || /* request */
4141 (tag[tagsize-2] == '/' && /* XML: */
4142 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4143 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4144
4145 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4146 Html_tag_cleanup_at_close(html, ni);
4147 /* This was a close tag */
4148 html->ReqTagClose = false;
4149 }
4150 }
4151}
4152
4163static const char *Html_get_attr2(DilloHtml *html,
4164 const char *tag,
4165 int tagsize,
4166 const char *attrname,
4167 int tag_parsing_flags)
4168{
4169 int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4170 Dstr *Buf = html->attr_data;
4172
4173 dReturn_val_if_fail(*attrname, NULL);
4174
4175 dStr_truncate(Buf, 0);
4176
4177 for (i = 1; i < tagsize; ++i) {
4178 switch (state) {
4179 case SEEK_ATTR_START:
4180 if (isspace(tag[i]))
4181 state = SEEK_TOKEN_START;
4182 else if (tag[i] == '=')
4183 state = SEEK_VALUE_START;
4184 break;
4185
4186 case MATCH_ATTR_NAME:
4187 if (!attrname[attr_pos] &&
4188 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4189 Found = 1;
4190 state = SEEK_TOKEN_START;
4191 --i;
4192 } else if (!tag[i]) {
4193 state = SEEK_ATTR_START; // NULL byte is not allowed
4194 } else {
4195 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4196 state = SEEK_ATTR_START;
4197 attr_pos++;
4198 }
4199 break;
4200
4201 case SEEK_TOKEN_START:
4202 if (tag[i] == '=') {
4203 state = SEEK_VALUE_START;
4204 } else if (!isspace(tag[i])) {
4205 attr_pos = 0;
4206 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4207 --i;
4208 }
4209 break;
4210 case SEEK_VALUE_START:
4211 if (!isspace(tag[i])) {
4212 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4213 i -= (delimiter == ' ');
4214 state = (Found) ? GET_VALUE : SKIP_VALUE;
4215 }
4216 break;
4217
4218 case SKIP_VALUE:
4219 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4220 state = SEEK_TOKEN_START;
4221 break;
4222 case GET_VALUE:
4223 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4224 tag[i] == delimiter) {
4225 state = FINISHED;
4226 } else if (tag[i] == '&' &&
4227 (tag_parsing_flags & HTML_ParseEntities)) {
4228 const char *entstr;
4229 const bool_t is_attr = TRUE;
4230
4231 if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
4232 is_attr))) {
4233 dStr_append(Buf, entstr);
4234 i += entsize-1;
4235 } else {
4236 dStr_append_c(Buf, tag[i]);
4237 }
4238 } else if (tag[i] == '\r' || tag[i] == '\t') {
4239 dStr_append_c(Buf, ' ');
4240 } else if (tag[i] == '\n') {
4241 /* ignore */
4242 } else {
4243 dStr_append_c(Buf, tag[i]);
4244 }
4245 break;
4246
4247 case FINISHED:
4248 i = tagsize;
4249 break;
4250 }
4251 }
4252
4253 if (tag_parsing_flags & HTML_LeftTrim)
4254 while (isspace(Buf->str[0]))
4255 dStr_erase(Buf, 0, 1);
4256 if (tag_parsing_flags & HTML_RightTrim)
4257 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4258 dStr_truncate(Buf, Buf->len - 1);
4259
4260 return (Found) ? Buf->str : NULL;
4261}
4262
4266const char *a_Html_get_attr(DilloHtml *html,
4267 const char *tag,
4268 int tagsize,
4269 const char *attrname)
4270{
4271 return Html_get_attr2(html, tag, tagsize, attrname,
4273}
4274
4281 const char *tag,
4282 int tagsize,
4283 const char *attrname,
4284 const char *def)
4285{
4286 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4287
4288 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4289}
4290
4299static void Html_callback(int Op, CacheClient_t *Client)
4300{
4301 DilloHtml *html = (DilloHtml*)Client->CbData;
4302
4303 if (Op) { /* EOF */
4304 html->write((char*)Client->Buf, Client->BufSize, 1);
4305 html->finishParsing(Client->Key);
4306 } else {
4307 html->write((char*)Client->Buf, Client->BufSize, 0);
4308 }
4309}
4310
4315static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4316{
4317 char ch = 0, *p, *text;
4318 int token_start, buf_index;
4319
4320 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4321 * boundary. Iterate through tokens until end of buffer is reached. */
4322 buf_index = 0;
4323 token_start = buf_index;
4324 while ((buf_index < bufsize) && !html->stop_parser) {
4325 /* invariant: buf_index == bufsize || token_start == buf_index */
4326
4327 if (S_TOP(html)->parse_mode ==
4329 /* Non HTML code here, let's skip until closing tag */
4330 do {
4331 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4332 buf_index += strcspn(buf + buf_index, "<");
4333 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4334 buf_index = bufsize;
4335 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4336 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4337 /* copy VERBATIM text into the stash buffer */
4338 text = dStrndup(buf + token_start, buf_index - token_start);
4339 dStr_append(html->Stash, text);
4340 dFree(text);
4341 token_start = buf_index;
4342 break;
4343 } else
4344 ++buf_index;
4345 } while (buf_index < bufsize);
4346
4347 if (buf_index == bufsize)
4348 break;
4349 }
4350
4351 if (isspace(buf[buf_index])) {
4352 /* whitespace: group all available whitespace */
4353 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4354 Html_process_space(html, buf + token_start, buf_index - token_start);
4355 token_start = buf_index;
4356
4357 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4358 (isalpha(ch) || strchr("/!?", ch)) ) {
4359 /* Tag */
4360 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4361 /* Comment: search for close of comment, skipping over
4362 * everything except a matching "-->" tag. */
4363 while ( (p = (char*) memchr(buf + buf_index, '>',
4364 bufsize - buf_index)) ){
4365 buf_index = p - buf + 1;
4366 if (p[-1] == '-' && p[-2] == '-') break;
4367 }
4368 if (p) {
4369 /* Got the whole comment. Let's throw it away! :) */
4370 token_start = buf_index;
4371 } else
4372 buf_index = bufsize;
4373 } else {
4374 /* Tag: search end of tag (skipping over quoted strings) */
4375 html->CurrOfs = html->Start_Ofs + token_start;
4376
4377 while ( buf_index < bufsize ) {
4378 buf_index++;
4379 buf_index += strcspn(buf + buf_index, ">\"'<");
4380 if ((ch = buf[buf_index]) == '>') {
4381 break;
4382 } else if (ch == '"' || ch == '\'') {
4383 /* Skip over quoted string */
4384 buf_index++;
4385 buf_index += strcspn(buf + buf_index,
4386 (ch == '"') ? "\">" : "'>");
4387 if (buf[buf_index] == '>') {
4388 /* Unterminated string value? Let's look ahead and test:
4389 * (<: unterminated, closing-quote: terminated) */
4390 int offset = buf_index + 1;
4391 offset += strcspn(buf + offset,
4392 (ch == '"') ? "\"<" : "'<");
4393 if (buf[offset] == ch || !buf[offset]) {
4394 buf_index = offset;
4395 } else {
4396 BUG_MSG("Attribute lacks closing quote.");
4397 break;
4398 }
4399 }
4400 } else if (ch == '<') {
4401 /* unterminated tag detected */
4402 p = dStrndup(buf+token_start+1,
4403 strcspn(buf+token_start+1, " <\n\r\t"));
4404 BUG_MSG("<%s> lacks its closing '>'.", p);
4405 dFree(p);
4406 --buf_index;
4407 break;
4408 }
4409 }
4410 if (buf_index < bufsize) {
4411 buf_index++;
4412 Html_process_tag(html, buf + token_start,
4413 buf_index - token_start);
4414 token_start = buf_index;
4415 }
4416 }
4417 } else {
4418 /* A Word: search for whitespace or tag open */
4419 html->CurrOfs = html->Start_Ofs + token_start;
4420
4421 while (++buf_index < bufsize) {
4422 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4423 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4424 !isalpha(ch) && !strchr("/!?", ch))
4425 continue;
4426 break;
4427 }
4428 if (buf_index < bufsize || Eof) {
4429 /* successfully found end of token */
4430 ch = buf[buf_index];
4431 buf[buf_index] = 0;
4432 Html_process_word(html, buf + token_start,
4433 buf_index - token_start);
4434 buf[buf_index] = ch;
4435 token_start = buf_index;
4436 }
4437 }
4438 }/*while*/
4439
4440 HT2TB(html)->flush ();
4441
4442 return token_start;
4443}
4444
4445
#define B8(d)
Definition binaryconst.h:32
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
bool_t a_Bw_expecting(BrowserWindow *bw)
Definition bw.c:334
void a_Bw_close_client(BrowserWindow *bw, int ClientKey)
Close a cache-client upon successful retrieval.
Definition bw.c:167
void a_Bw_add_url(BrowserWindow *bw, const DilloUrl *Url)
Add an URL to the browser window's list.
Definition bw.c:209
void a_Bw_add_client(BrowserWindow *bw, int Key, int Root)
Add a reference to a cache-client.
Definition bw.c:128
void a_Bw_remove_doc(BrowserWindow *bw, void *vdoc)
Remove a document from the bw's list.
Definition bw.c:263
void a_Bw_add_doc(BrowserWindow *bw, void *vdoc)
Add a document to the browser window's list.
Definition bw.c:221
void(* CA_Callback_t)(int Op, CacheClient_t *Client)
Callback type for cache clients.
Definition cache.h:43
int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
Get the cache's buffer for the URL, and its size.
Definition capi.c:541
const char * a_Capi_set_content_type(const DilloUrl *url, const char *ctype, const char *from)
Set the Content-Type for the URL.
Definition capi.c:565
int a_Capi_dpi_verify_request(BrowserWindow *bw, DilloUrl *url)
Safety test: only allow GET|POST dpi-urls from dpi-generated pages.
Definition capi.c:232
void a_Capi_unref_buf(const DilloUrl *Url)
Unref the cache's buffer when no longer using it.
Definition capi.c:549
int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData)
Most used function for requesting a URL.
Definition capi.c:392
int a_Capi_get_flags_with_redirection(const DilloUrl *Url)
Same as a_Capi_get_flags() but following redirections.
Definition capi.c:530
#define CAPI_IsCached
Definition capi.h:15
#define CAPI_Completed
Definition capi.h:19
A list of CssProperty objects.
Definition css.hh:328
bool enter(dw::core::Widget *widget, int link, int img, int x, int y)
Called, when a link is entered, left, or the position has changed.
Definition html.cc:739
bool press(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "press" signal.
Definition html.cc:759
bool click(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "click" signal.
Definition html.cc:793
bool ReqTagClose
Flag to close the stack's top tag.
void freeParseData()
Free parsing data.
Definition html.cc:626
int InFlags
tracks which elements we are in
DilloHtmlDocumentType DocType
DilloUrl * base_url
int getCurrLineNumber()
Return the line number of the tag/word being processed by the parser.
Definition html.cc:604
bool stop_parser
void bugMessage(const char *format,...)
Collect HTML error strings.
Definition html.cc:152
lout::misc::SimpleVector< DilloHtmlState > * stack
void write(char *Buf, int BufSize, int Eof)
Process the newly arrived html and put it into the page structure.
Definition html.cc:577
dw::core::style::Style * backgroundStyle()
char * Start_Buf
int formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset)
Allocate and insert form information.
Definition html.cc:666
void finishParsing(int ClientKey)
Finish parsing a HTML page.
Definition html.cc:640
bool PreFirstChar
used to skip the first CR or CRLF in PRE tags
void startElement(int tag)
size_t CurrOfs
void addCssUrl(const DilloUrl *url)
Save URL in a vector (may be loaded later).
Definition html.cc:732
void initDw()
Miscellaneous initializations for Dw.
Definition html.cc:521
bool PrevWasCR
Flag to help parsing of "\r\n" in PRE tags.
float DocTypeVersion
Dstr * Stash
bool TagSoup
Flag to enable the parser's cleanup functions.
bool_t unloadedImages()
Definition html.cc:688
int pre_column
current column, used in PRE tags with tabs
DilloUrl * page_url
lout::misc::SimpleVector< DilloHtmlInput * > * inputs_outside_form
bool PrevWasHtmlClose
set when </html> is found
void restyle()
uchar_t Num_BODY
dw::core::style::Style * wordStyle()
bool StashSpace
BrowserWindow * bw
int32_t non_css_visited_color
as provided by vlink attribute in BODY
size_t OldLine
bool loadCssFromStash
current stash content should be loaded as CSS
lout::misc::SimpleVector< DilloHtmlForm * > * forms
dw::ImageMapsList maps
DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type)
Create and initialize a new DilloHtml class.
Definition html.cc:431
~DilloHtml()
Free memory used by the DilloHtml class.
Definition html.cc:535
uchar_t Num_TITLE
dw::core::style::Style * style()
size_t OldOfs
uchar_t Num_HTML
element counters: used for validation purposes.
lout::misc::SimpleVector< DilloUrl * > * links
dw::core::Widget * dw
int32_t non_css_link_color
as provided by link attribute in BODY
char * content_type
Dstr * attr_data
Buffer for attribute value.
bool PrevWasBodyClose
set when </body> is found
lout::misc::SimpleVector< DilloUrl * > * cssUrls
int32_t visited_color
as computed according to CSS
uchar_t Num_HEAD
char * charset
bool InVisitedLink
used to 'contrast_visited_colors'
StyleEngine * styleEngine
lout::misc::SimpleVector< DilloHtmlImage * > * images
HtmlLinkReceiver linkReceiver
DilloHtmlForm * getCurrentForm()
Get the current form.
Definition html.cc:683
void loadImages(const DilloUrl *pattern)
Load images if they were disabled.
Definition html.cc:701
This class provides the glue between HTML parser and CSS subsystem.
void parse(DilloHtml *html, DilloUrl *url, const char *buf, int buflen, CssOrigin origin)
void setPseudoVisited()
set the CSS pseudo class :visited.
void setNonCssHint(CssPropertyName name, CssValueType type, CssPropertyValue value)
void setId(const char *id)
const char * getId()
dw::core::style::Color * backgroundColor()
void endElement(int tag)
tell the styleEngine that a html element has ended.
void setPseudoLink()
set the CSS pseudo class :link.
void setStyle(const char *style)
void setClass(const char *klass)
void inheritBackgroundColor()
Use of the background color of the parent style as default.
dw::core::style::StyleImage * backgroundImage(dw::core::style::BackgroundRepeat *bgRepeat, dw::core::style::BackgroundAttachment *bgAttachment, dw::core::style::Length *bgPositionX, dw::core::style::Length *bgPositionY)
Displays different kind of bullets.
Definition bullet.hh:15
void addShapeToCurrentMap(core::Shape *shape, int link)
Add a shape to the current map-.
Definition image.cc:109
void startNewMap(lout::object::Object *key)
Start a new map and make it the current one.
Definition image.cc:97
void setCurrentMapDefaultLink(int link)
Set default link for current map-.
Definition image.cc:117
Displays an instance of dw::core::Imgbuf.
Definition image.hh:120
void setIsMap()
Sets image as server side image map.
Definition image.cc:540
void setUseMap(ImageMapsList *list, Object *key)
Sets image as client side image map.
Definition image.cc:553
void forceMapRedraw()
Definition image.hh:179
void initWithText(const char *text, core::style::Style *style)
Definition listitem.cc:58
void initWithWidget(core::Widget *widget, core::style::Style *style)
Definition listitem.cc:48
Widget for drawing (horizontal) rules.
Definition ruler.hh:21
A Widget for rendering text blocks, i.e.
Definition textblock.hh:206
void addWidget(core::Widget *widget, core::style::Style *style)
Add a widget (word type) to the page.
void addSpace(core::style::Style *style)
?
void addText(const char *text, size_t len, core::style::Style *style)
Add a word to the page structure.
void addParbreak(int space, core::style::Style *style)
Cause a paragraph break.
static int CLASS_ID
Definition textblock.hh:867
dw::core::Shape implemtation for simple circles.
Definition types.hh:91
Represents a button press or release event.
Definition events.hh:58
ButtonState state
Definition events.hh:42
dw::core::Shape implemtation for polygons.
Definition types.hh:105
void addPoint(int x, int y)
Definition types.cc:156
dw::core::Shape implemtation for simple rectangles.
Definition types.hh:70
Abstract interface for different shapes.
Definition types.hh:59
The base class of all dillo widgets.
Definition widget.hh:44
virtual void setStyle(style::Style *style)
Change the style of a widget.
Definition widget.cc:1233
style::Color * getFgColor()
Get the actual foreground color of a widget.
Definition widget.cc:1367
Widget * getParent()
Definition widget.hh:573
Layout * getLayout()
Definition widget.hh:588
style::Color * getBgColor()
Get the actual background of a widget.
Definition widget.cc:1348
ListStyleType listStyleType
Definition style.hh:567
static Style * create(StyleAttrs *attrs)
Definition style.hh:628
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector.
Definition misc.hh:95
void setSize(int newSize)
Set the size explicitly.
Definition misc.hh:168
void increase()
Increase the vector size by one.
Definition misc.hh:161
void set(int i, T t)
Store an object in the vector.
Definition misc.hh:247
T get(int i) const
Return the one element, explicitly.
Definition misc.hh:202
int size() const
Return the number of elements put into this vector.
Definition misc.hh:142
T * getRef(int i) const
Return the reference of one element.
Definition misc.hh:191
int32_t a_Color_parse(const char *str, int32_t default_color, int *err)
Parse a color string.
Definition colors.c:258
int32_t a_Color_vc(int32_t candidate, int32_t C_txt, int32_t C_lnk, int32_t C_bg)
Return a suitable "visited link" color.
Definition colors.c:344
@ CSS_PROPERTY_LIST_STYLE_TYPE
Definition css.hh:208
@ CSS_PROPERTY_MARGIN_BOTTOM
Definition css.hh:209
@ CSS_PROPERTY_BORDER_RIGHT_STYLE
Definition css.hh:176
@ CSS_PROPERTY_MARGIN_RIGHT
Definition css.hh:211
@ PROPERTY_X_LANG
Definition css.hh:247
@ CSS_PROPERTY_BORDER_RIGHT_WIDTH
Definition css.hh:177
@ CSS_PROPERTY_BORDER_LEFT_WIDTH
Definition css.hh:174
@ PROPERTY_X_LINK
Definition css.hh:246
@ PROPERTY_X_TOOLTIP
Definition css.hh:249
@ CSS_PROPERTY_MARGIN_LEFT
Definition css.hh:210
@ CSS_PROPERTY_HEIGHT
Definition css.hh:202
@ CSS_PROPERTY_WIDTH
Definition css.hh:240
@ PROPERTY_X_IMG
Definition css.hh:248
@ CSS_PROPERTY_BORDER_TOP_WIDTH
Definition css.hh:181
@ CSS_PROPERTY_MARGIN_TOP
Definition css.hh:212
@ CSS_PROPERTY_VERTICAL_ALIGN
Definition css.hh:237
@ CSS_PROPERTY_BORDER_LEFT_STYLE
Definition css.hh:173
@ CSS_PROPERTY_TEXT_ALIGN
Definition css.hh:230
@ CSS_PROPERTY_BORDER_TOP_STYLE
Definition css.hh:180
@ CSS_PROPERTY_BACKGROUND_COLOR
Definition css.hh:164
@ CSS_PROPERTY_COLOR
Definition css.hh:186
@ CSS_PROPERTY_BORDER_BOTTOM_STYLE
Definition css.hh:169
@ CSS_PROPERTY_BORDER_BOTTOM_WIDTH
Definition css.hh:170
@ CSS_PROPERTY_FONT_FAMILY
Definition css.hh:195
@ CSS_TYPE_ENUM
Value is i, if represented by enum_symbols[i].
Definition css.hh:39
@ CSS_TYPE_INTEGER
This type is only used internally, for x-* properties.
Definition css.hh:37
@ CSS_TYPE_LENGTH_PERCENTAGE
<length> or <percentage>.
Definition css.hh:43
@ CSS_TYPE_STRING
<string>
Definition css.hh:58
@ CSS_TYPE_SYMBOL
Symbols, which are directly copied (as opposed to CSS_TYPE_ENUM and CSS_TYPE_MULTI_ENUM).
Definition css.hh:59
@ CSS_TYPE_COLOR
Represented as integer.
Definition css.hh:55
int CssLength
Lengths are represented as int in the following way:
Definition css.hh:91
CssLengthType CSS_LENGTH_TYPE(CssLength l)
Definition css.hh:138
CssLength CSS_CREATE_LENGTH(float v, CssLengthType t)
Definition css.hh:106
@ CSS_LENGTH_TYPE_PX
Definition css.hh:95
@ CSS_LENGTH_TYPE_RELATIVE
This does not exist in CSS but is used in HTML.
Definition css.hh:101
@ CSS_LENGTH_TYPE_PERCENTAGE
Definition css.hh:100
@ CSS_LENGTH_TYPE_AUTO
This can be used as a simple value.
Definition css.hh:103
float CSS_LENGTH_VALUE(CssLength l)
Definition css.hh:142
@ CSS_ORIGIN_AUTHOR
Definition css.hh:33
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * a_Url_decode_hex_str(const char *str, size_t *p_sz)
Definition datauri.c:137
static const int bufsize
Definition decode.c:21
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:102
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
void dStr_sprintfa(Dstr *ds, const char *format,...)
Printf-like function that appends.
Definition dlib.c:464
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:316
char * dStrdup(const char *s)
Definition dlib.c:77
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:184
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:450
void dStr_vsprintfa(Dstr *ds, const char *format, va_list argp)
vsprintf-like function that appends.
Definition dlib.c:401
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:368
char * dStrnfill(size_t len, char c)
Return a new string of length 'len' filled with 'c' characters.
Definition dlib.c:149
#define dReturn_if_fail(expr)
Definition dlib.h:72
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:76
#define dReturn_if(expr)
Definition dlib.h:64
#define dReturn_val_if(expr, val)
Definition dlib.h:68
#define D_ASCII_TOLOWER(c)
Definition dlib.h:37
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define dNew(type, count)
Definition dlib.h:49
@ SKIP_VALUE
Definition dpip.c:70
static Image * image
#define IMAGE_MAX_AREA
Definition fltkimgbuf.cc:27
void a_History_set_title_by_url(const DilloUrl *url, const char *title)
Set the page-title for a given URL.
Definition history.c:130
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2510
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
Handle open TITLE.
Definition html.cc:1687
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4266
static misc::SimpleVector< int > * Html_read_coords(DilloHtml *html, const char *str)
Read coords in a string, returning a vector of ints.
Definition html.cc:2365
DilloHtmlTagParsingFlags
Definition html.cc:86
@ HTML_RightTrim
Definition html.cc:88
@ HTML_LeftTrim
Definition html.cc:87
@ HTML_ParseEntities
Definition html.cc:89
static int i_HR
Definition html.cc:131
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2693
static void Html_css_load_callback(int Op, CacheClient_t *Client)
Called by the network engine when a stylesheet has new data.
Definition html.cc:3232
static void Html_real_pop_tag(DilloHtml *html)
Pop the top tag in the stack.
Definition html.cc:1374
static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2772
static int i_OPTGROUP
Definition html.cc:133
static void Html_push_tag(DilloHtml *html, int tag_idx)
Push the tag (copying attributes from the top of the stack)
Definition html.cc:1348
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2840
static void Html_tag_close_a(DilloHtml *html)
Definition html.cc:2763
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the VALIGN attribute (top|bottom|middle|baseline) and sets the style in style_attrs.
Definition html.cc:362
static const TagInfo Tags[]
Function index for the open, content, and close functions for each tag.
Definition html.cc:3468
static int Html_triggers_optional_close(int old_idx, int cur_idx)
For elements with optional close, check whether is time to close, by also following Firefox's de fact...
Definition html.cc:3636
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
Handle open HTML element.
Definition html.cc:1605
static bool Html_must_add_breaks(DilloHtml *html)
Definition html.cc:423
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2314
static void Html_add_anchor(DilloHtml *html, const char *name)
Register an anchor for this page.
Definition html.cc:2674
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3385
static void Html_free(void *data)
Definition html.cc:221
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3050
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1782
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
HTML, HEAD and BODY elements have optional open and close tags.
Definition html.cc:3843
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
Create a new link, set it as the url's parent and return the index.
Definition html.cc:305
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
Used by the "Show/Hide hiddens" form menuitem.
Definition html.cc:278
static bool Html_contains_form(DilloHtml *html, void *v_form)
Search for form.
Definition html.cc:239
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
Handle DOCTYPE declaration.
Definition html.cc:1519
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2781
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:171
static void Html_tag_content_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1939
static int i_TH
Definition html.cc:140
static void Html_display_listitem(DilloHtml *html)
Definition html.cc:3985
static int Html_charref_comp(const void *a, const void *b)
Comparison function for binary search.
Definition html.cc:957
static void Html_tag_open_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1914
static const char * Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr)
Given an entity, return the corresponding string.
Definition html.cc:1034
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2807
DilloHtmlTagParsingState
Definition html.cc:76
@ SKIP_VALUE
Definition html.cc:81
@ SEEK_TOKEN_START
Definition html.cc:79
@ MATCH_ATTR_NAME
Definition html.cc:78
@ SEEK_ATTR_START
Definition html.cc:77
@ SEEK_VALUE_START
Definition html.cc:80
@ FINISHED
Definition html.cc:83
@ GET_VALUE
Definition html.cc:82
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt)
Create and add a new Textblock to the current Textblock.
Definition html.cc:392
#define NTAGS
Definition html.cc:3581
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1995
static void Html_process_word(DilloHtml *html, const char *word, int size)
Handles putting the word into its proper place.
Definition html.cc:1215
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2473
static int i_BUTTON
Definition html.cc:127
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:317
static int i_DT
Definition html.cc:129
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2856
static int i_SELECT
Definition html.cc:136
static void Html_display_block(DilloHtml *html)
Definition html.cc:3974
static void Html_tag_close_li(DilloHtml *html)
Definition html.cc:2942
static void Html_tag_close_pre(DilloHtml *html)
Definition html.cc:3061
static void Html_tag_content_frameset(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1984
static Charref_t * Html_charref_search(char *key)
Binary search of 'key' in charref list.
Definition html.cc:965
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
Parse the LINK element (Only CSS stylesheets by now).
Definition html.cc:3298
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2007
static void Html_tag_close_media(DilloHtml *html)
Media (AUDIO/VIDEO) close function.
Definition html.cc:2613
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
Parse attributes that can appear on any tag.
Definition html.cc:3897
static const char * Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
Definition html.cc:875
static void Html_tag_content_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2499
static void Html_tag_open_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2575
static int i_A
Definition html.cc:125
static void Html_tag_close_q(DilloHtml *html)
Definition html.cc:2796
static void Html_tag_close_map(DilloHtml *html)
Handle close <MAP>.
Definition html.cc:2339
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
This function is called after popping the stack, to handle nested Textblock widgets.
Definition html.cc:1335
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2951
static CssLength Html_parse_length_or_multi_length(const char *attr, char **endptr)
Definition html.cc:1393
static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize)
For white-space: pre-line, we must break the line if encountering a newline.
Definition html.cc:1107
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
Conditional cleanup of the stack, called before closing any tag.
Definition html.cc:3796
static void Html_process_space(DilloHtml *html, const char *space, int spacesize)
Parse spaces.
Definition html.cc:1129
void a_Html_form_submit(void *v_html, void *v_form)
Used by the "Submit form" form menuitem.
Definition html.cc:252
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3042
static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3009
static const char * Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags)
Get attribute value for 'attrname' and return it.
Definition html.cc:4163
static int i_TD
Definition html.cc:138
static int i_TR
Definition html.cc:139
static void Html_callback(int Op, CacheClient_t *Client)
Dispatch the apropriate function for 'Op'.
Definition html.cc:4299
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
Process a tag, given as 'tag' and 'tagsize'.
Definition html.cc:4023
static void Html_tag_close_par(DilloHtml *html)
Default close for paragraph tags - pop the stack and break.
Definition html.cc:3427
void(* TagCloseFunct)(DilloHtml *html)
Definition html.cc:74
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
Set the URL data for image maps.
Definition html.cc:291
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
Create a new Image struct and request the image-url to the cache.
Definition html.cc:2268
static int i_DD
Definition html.cc:128
static void Html_update_content_type(DilloHtml *html, const char *content)
Update the document's content type information based on meta tag data.
Definition html.cc:3093
void(* TagOpenFunct)(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:73
static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3377
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2622
static int Html_ms_stupid_quotes_2ucs(int codepoint)
This is M$ non-standard "smart quotes" (w1252).
Definition html.cc:855
#define TAB_SIZE
Definition html.cc:58
static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2205
void a_Html_load_images(void *v_html, DilloUrl *pattern)
Used by the "Load images" page menuitem.
Definition html.cc:229
static const char * Html_get_javascript_link(DilloHtml *html)
Test and extract the link from a javascript instruction.
Definition html.cc:2651
static void Html_tag_content_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2603
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
Here's where we parse the html and put it into the Textblock structure.
Definition html.cc:4315
static int i_OPTION
Definition html.cc:134
static int Html_forbids_cross_nesting(const int InFlags, const int new_idx)
Check nesting and cross-nesting between BUTTON, SELECT, TEXTAREA and A.
Definition html.cc:3675
static void Html_tag_close_html(DilloHtml *html)
Handle close HTML element.
Definition html.cc:1624
static void Html_tag_close_title(DilloHtml *html)
Handle close TITLE.
Definition html.cc:1707
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
Handle META We do not support http-equiv=refresh with delay>0 because it's non standard,...
Definition html.cc:3119
static int i_LI
Definition html.cc:132
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
Handle open SCRIPT.
Definition html.cc:1725
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2045
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3435
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3402
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3033
static int i_BODY
Definition html.cc:126
void * a_Html_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
Set callback function and callback data for the "html/text" MIME type.
Definition html.cc:210
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image)
Tell cache to retrieve image.
Definition html.cc:2187
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
Push the tag (used to force en element with optional open into the stack).
Definition html.cc:1365
#define SGML_SPCDEL
Definition html.cc:56
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
Handle open STYLE.
Definition html.cc:1743
static const char * Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a named character reference (e.g., "&" or "…").
Definition html.cc:978
static int Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
Check that 'val' is composed of characters inside [A-Za-z0-9:_.
Definition html.cc:1475
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3609
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
Set the Document Base URI.
Definition html.cc:3348
static void Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2395
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
Does the tag in tagstr (e.g.
Definition html.cc:1317
static void Html_tag_close_body(DilloHtml *html)
Definition html.cc:1888
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2543
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2015
static void Html_tag_close_head(DilloHtml *html)
Handle close HEAD element.
Definition html.cc:1661
static int i_TEXTAREA
Definition html.cc:137
static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx, int new_idx, int fi, char op)
Cleanup the stack to a given index.
Definition html.cc:3694
static void Html_tag_content_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2642
static void Html_tag_close_script(DilloHtml *html)
Handle close SCRIPT.
Definition html.cc:1734
static void Html_display_inline_block(DilloHtml *html)
Definition html.cc:3980
static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
Conditional cleanup of the stack (at open time).
Definition html.cc:3746
static int i_HTML
Definition html.cc:130
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
Check whether a tag is in the "excluding" element set for PRE.
Definition html.cc:3070
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
Handle open HEAD element.
Definition html.cc:1636
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2910
static int i_P
Definition html.cc:135
static int Html_tag_compare(const char *p1, const char *p2)
Definition html.cc:3594
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
Warn when encountering elements that are obsolete in HTML5.
Definition html.cc:3951
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1902
static void Html_tag_close_style(DilloHtml *html)
Handle close STYLE.
Definition html.cc:1772
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2873
void a_Html_form_reset(void *v_html, void *v_form)
Used by the "Reset form" form menuitem.
Definition html.cc:265
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3023
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3418
#define HTML_NTAGS
Definition html.hh:24
#define NumRef
static const Charref_t Charrefs[NumRef]
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4266
void a_Html_stash_init(DilloHtml *html)
Initialize the stash buffer.
Definition html.cc:842
@ DILLO_HTML_TABLE_MODE_NONE
no table at all
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:171
#define HT2TB(html)
"html struct" to Textblock
DilloImage * a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2132
int32_t a_Html_color_parse(DilloHtml *html, const char *str, int32_t default_color)
Parse a color attribute.
Definition html.cc:1457
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:317
DilloHtmlParseMode
@ DILLO_HTML_PARSE_MODE_PRE
@ DILLO_HTML_PARSE_MODE_VERBATIM
@ DILLO_HTML_PARSE_MODE_BODY
@ DILLO_HTML_PARSE_MODE_INIT
@ DILLO_HTML_PARSE_MODE_STASH_AND_BODY
@ DILLO_HTML_PARSE_MODE_STASH
#define BUG_MSG(...)
Add a bug-meter message.
void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
Read image-associated tag attributes and create new image.
Definition html.cc:2062
#define S_TOP(html)
Top of the parsing stack.
@ IN_HEAD
@ IN_BUTTON
@ IN_LI
@ IN_MEDIA
@ IN_PRE
@ IN_BODY
@ IN_MAP
@ IN_TEXTAREA
@ IN_NONE
@ IN_META_HACK
@ IN_SELECT
@ IN_A
@ IN_HTML
@ IN_EOF
@ HTML_LIST_ORDERED
@ HTML_LIST_UNORDERED
@ HTML_LIST_NONE
dw::core::style::Length a_Html_parse_length(DilloHtml *html, const char *attr)
Returns a length or a percentage, or UNDEF_LENGTH in case of an error, or if attr is NULL.
Definition html.cc:1432
#define HT2LT(html)
"html struct" to "Layout"
@ DILLO_HTML_TABLE_BORDER_SEPARATE
char * a_Html_get_attr_wdef(DilloHtml *html, const char *tag, int tagsize, const char *attrname, const char *def)
"a_Html_get_attr with default" Call a_Html_get_attr() and dStrdup() the returned string.
Definition html.cc:4280
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3609
@ DT_UNRECOGNIZED
@ DT_NONE
@ DT_XHTML
@ DT_HTML
char * a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
Parse all the entities in a token.
Definition html.cc:1069
void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
Tell cache to retrieve a stylesheet.
Definition html.cc:3246
int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
Compare two Content-Type strings.
Definition misc.c:274
void a_Misc_parse_content_type(const char *type, char **major, char **minor, char **charset)
Parse Content-Type string, e.g., "text/html; charset=utf-8".
Definition misc.c:210
#define d_isascii(c)
Definition misc.h:11
Anything related to Dillo Widget styles is defined here.
Definition style.cc:34
@ LIST_STYLE_TYPE_DISC
Definition style.hh:302
@ LIST_STYLE_TYPE_UPPER_ALPHA
Definition style.hh:312
@ LIST_STYLE_TYPE_LOWER_ALPHA
Definition style.hh:310
@ LIST_STYLE_TYPE_CIRCLE
Definition style.hh:303
@ LIST_STYLE_TYPE_UPPER_ROMAN
Definition style.hh:308
@ LIST_STYLE_TYPE_DECIMAL
Definition style.hh:305
@ LIST_STYLE_TYPE_SQUARE
Definition style.hh:304
@ LIST_STYLE_TYPE_NONE
Definition style.hh:322
@ LIST_STYLE_TYPE_LOWER_ROMAN
Definition style.hh:307
@ DISPLAY_INLINE_BLOCK
Definition style.hh:280
int Length
Type for representing all lengths within dw::core::style.
Definition style.hh:428
void numtostr(int num, char *buf, int buflen, ListStyleType listStyleType)
Convert a number into a string, in a given list style.
Definition style.cc:1422
@ WHITE_SPACE_PRE_LINE
Definition style.hh:363
@ WHITE_SPACE_PRE_WRAP
Definition style.hh:362
Anything related to embedded UI widgets is defined here.
Definition core.hh:36
The core of Dw is defined in this namespace.
Definition core.hh:23
@ SHIFT_MASK
Definition events.hh:17
Dw is in this namespace, or sub namespaces of this one.
Definition form.cc:25
DilloPrefs prefs
Global Data.
Definition prefs.c:33
void Html_tag_close_form(DilloHtml *html)
Definition form.cc:401
void a_Html_form_display_hiddens2(void *vform, bool display)
Definition form.cc:263
void Html_tag_open_textarea(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:625
void Html_tag_content_textarea(DilloHtml *html, const char *tag, int tagsize)
The textarea tag.
Definition form.cc:635
void Html_tag_close_button(DilloHtml *html)
Handle close <BUTTON>
Definition form.cc:973
void Html_tag_open_option(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:867
DilloHtmlForm * a_Html_form_new(DilloHtml *html, DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc content_type, const char *charset, bool enabled)
Definition form.cc:234
void Html_tag_open_optgroup(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:807
void a_Html_form_reset2(void *vform)
Definition form.cc:258
void a_Html_form_delete(DilloHtmlForm *form)
Definition form.cc:243
void a_Html_input_delete(DilloHtmlInput *input)
Definition form.cc:248
void Html_tag_close_select(DilloHtml *html)
Definition form.cc:787
void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize)
The ISINDEX tag is just a deprecated form of <INPUT type=text>> with implied FORM,...
Definition form.cc:588
void Html_tag_close_optgroup(DilloHtml *html)
Definition form.cc:843
void Html_tag_close_textarea(DilloHtml *html)
Close textarea.
Definition form.cc:694
void Html_tag_open_select(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:734
void Html_tag_open_input(DilloHtml *html, const char *tag, int tagsize)
Add a new input to current form.
Definition form.cc:431
void Html_tag_open_button(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:906
void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
Handle <FORM> tag.
Definition form.cc:337
void a_Html_form_submit2(void *vform)
Definition form.cc:253
void Html_tag_close_option(DilloHtml *html)
Definition form.cc:895
DilloHtmlMethod
Definition form.hh:10
DilloHtmlEnc
Definition form.hh:16
void a_Image_ref(DilloImage *Image)
Add a reference to an Image struct Do nothing if the argument is NULL.
Definition image.cc:100
void a_Image_unref(DilloImage *Image)
Unref and free if necessary Do nothing if the argument is NULL.
Definition image.cc:89
DilloImage * a_Image_new(void *layout, void *img_rndr, int32_t bg_color, int32_t fg_color)
Create and initialize a new image structure.
Definition image.cc:34
void Html_tag_open_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:242
void Html_tag_content_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:260
void Html_tag_open_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:256
void Html_tag_open_thead(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:274
void Html_tag_content_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:287
void Html_tag_open_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:47
void Html_tag_open_tfoot(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:267
void Html_tag_content_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:248
void Html_tag_content_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:223
void Html_tag_open_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:281
void Html_tag_content_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:161
void Html_tag_open_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:180
Contains the specific data for a single window.
Definition bw.h:27
Dstr * page_bugs
Definition bw.h:71
float zoom
Definition bw.h:74
int num_page_bugs
HTML-bugs detected at parse time.
Definition bw.h:70
int NumPendingStyleSheets
Number of not yet arrived style sheets.
Definition bw.h:48
Data structure for cache clients.
Definition cache.h:48
int Key
Primary Key for this client.
Definition cache.h:49
void * CbData
Client function data.
Definition cache.h:55
uint_t BufSize
Valid size of cache-data.
Definition cache.h:53
void * Buf
Pointer to cache-data.
Definition cache.h:52
void * Web
Pointer to the Web structure of our client.
Definition cache.h:56
const char * html4_str
const char * ref
const char * html5_str
DilloImage * image
DilloUrl * url
DilloHtmlTableMode table_mode
bool hand_over_break
This is used for list items etc.
DilloHtmlTableBorderMode table_border_mode
dw::core::Widget * table
dw::core::Widget * textblock
DilloHtmlListMode list_type
dw::core::Widget * ref_list_item
This is used to align list items (especially in enumerated lists)
DilloHtmlParseMode parse_mode
int tag_idx
TagInfo index for the tag that's being processed.
bool_t parse_embedded_css
Definition prefs.h:99
bool_t load_images
Definition prefs.h:96
bool_t show_tooltip
Definition prefs.h:66
bool_t limit_text_width
Definition prefs.h:71
bool_t focus_new_tab
Definition prefs.h:74
bool_t show_extra_warnings
Definition prefs.h:118
bool_t load_stylesheets
Definition prefs.h:98
bool_t contrast_visited_color
Definition prefs.h:65
bool_t middle_click_opens_new_tab
Definition prefs.h:110
Definition url.h:88
Definition dlib.h:102
Dstr_char_t * str
Definition dlib.h:105
int len
Definition dlib.h:104
void * img_rndr
Definition image.hh:61
int flags
Additional info.
Definition web.hh:29
DilloUrl * url
Requested URL.
Definition web.hh:25
BrowserWindow * bw
The requesting browser window [reference].
Definition web.hh:28
DilloImage * Image
For image urls [reference].
Definition web.hh:31
void a_UIcmd_link_popup(void *vbw, const DilloUrl *url)
Definition uicmd.cc:1222
void a_UIcmd_set_bug_prog(BrowserWindow *bw, int n_bug)
Definition uicmd.cc:1481
void a_UIcmd_page_popup(void *vbw, bool_t has_bugs, void *v_cssUrls)
Definition uicmd.cc:1212
void a_UIcmd_set_msg(BrowserWindow *bw, const char *format,...)
Definition uicmd.cc:1513
void a_UIcmd_image_popup(void *vbw, const DilloUrl *url, bool_t loaded_img, DilloUrl *page_url, DilloUrl *link_url)
Definition uicmd.cc:1230
void a_UIcmd_set_page_title(BrowserWindow *bw, const char *label)
Definition uicmd.cc:1490
void a_UIcmd_open_url_nw(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:799
void a_UIcmd_repush(void *vbw)
Definition uicmd.cc:876
void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:764
void a_UIcmd_redirection0(void *vbw, const DilloUrl *url)
Definition uicmd.cc:884
void a_UIcmd_open_url_nt(void *vbw, const DilloUrl *url, int focus)
Definition uicmd.cc:815
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
Definition url.c:527
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
Definition url.c:506
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
Definition url.c:208
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
Definition url.c:549
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
Definition url.c:371
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
Definition url.c:66
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Definition url.c:477
#define URL_ILLEGAL_CHARS_SPC(u)
Definition url.h:81
#define URL_SCHEME_(u)
Definition url.h:49
#define URL_ILLEGAL_CHARS(u)
Definition url.h:80
#define URL_SpamSafe
Definition url.h:40
#define URL_FLAGS(u)
Definition url.h:79
#define URL_STR(u)
Definition url.h:76
#define URL_STR_(u)
Definition url.h:55
#define URL_SCHEME(u)
Definition url.h:70
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
Does s point to a UTF-8-encoded ideographic character?.
Definition utf8.cc:76
int a_Utf8_encode(unsigned int ucs, char *buf)
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition utf8.cc:54
static const char utf8_zero_width_space[]
Unicode zero width space U+200B.
Definition utf8.hh:19
DilloWeb * a_Web_new(BrowserWindow *bw, const DilloUrl *url, const DilloUrl *requester)
Allocate and set safe values for a DilloWeb structure.
Definition web.cc:121
#define WEB_Stylesheet
Definition web.hh:18
#define WEB_Image
Definition web.hh:17