Dillo v3.2.0-88-g47ab7c70
Loading...
Searching...
No Matches
html.cc
Go to the documentation of this file.
1/*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024-2025 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
17/*-----------------------------------------------------------------------------
18 * Includes
19 *---------------------------------------------------------------------------*/
20#include <ctype.h> /* for isspace */
21#include <string.h> /* for memcpy and memmove */
22#include <stdlib.h>
23#include <stdio.h> /* for sprintf */
24#include <errno.h>
25
26#include "bw.h" /* for BrowserWindow */
27#include "msg.h"
28#include "binaryconst.h"
29#include "colors.h"
30#include "html_charrefs.h"
31#include "utf8.hh"
32
33#include "misc.h"
34#include "uicmd.hh"
35#include "history.h"
36#include "menu.hh"
37#include "prefs.h"
38#include "capi.h"
39#include "html.hh"
40#include "html_common.hh"
41#include "form.hh"
42#include "table.hh"
43
44#include "dw/textblock.hh"
45#include "dw/bullet.hh"
46#include "dw/listitem.hh"
47#include "dw/image.hh"
48#include "dw/ruler.hh"
49
50/*-----------------------------------------------------------------------------
51 * Defines
52 *---------------------------------------------------------------------------*/
53
54/* Define to 1 to ignore white space immediately after an open tag,
55 * and immediately before a close tag. */
56#define SGML_SPCDEL 0
57
58#define TAB_SIZE 8
59
60/*-----------------------------------------------------------------------------
61 * Name spaces
62 *---------------------------------------------------------------------------*/
63using namespace lout;
64using namespace dw;
65using namespace dw::core;
66using namespace dw::core::ui;
67using namespace dw::core::style;
68
69/*-----------------------------------------------------------------------------
70 * Typedefs
71 *---------------------------------------------------------------------------*/
72class DilloHtml;
73typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
74typedef void (*TagCloseFunct) (DilloHtml *html);
75
85
91
92
93/*
94 * Exported function with C linkage.
95 */
96extern "C" {
97void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
98}
99
100/*-----------------------------------------------------------------------------
101 * Forward declarations
102 *---------------------------------------------------------------------------*/
103static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
104static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
105 const DilloUrl *requester, DilloImage *image);
106static void Html_callback(int Op, CacheClient_t *Client);
107static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
108int a_Html_tag_index(const char *tag);
109
110/*-----------------------------------------------------------------------------
111 * Local Data
112 *---------------------------------------------------------------------------*/
114typedef struct {
115 const char *name; /* element name */
116 unsigned char Flags; /* flags (explained near the table data) */
117 char EndTag; /* Is it Required, Optional or Forbidden */
118 TagOpenFunct open; /* Open function */
119 TagOpenFunct content; /* Content function */
120 TagCloseFunct close; /* Close function */
121} TagInfo;
122
123/* Some element indexes required in scattered places */
124static int
142
143
144/*-----------------------------------------------------------------------------
145 *-----------------------------------------------------------------------------
146 * Main Code
147 *-----------------------------------------------------------------------------
148 *---------------------------------------------------------------------------*/
149
153void DilloHtml::bugMessage(const char *format, ... )
154{
155 va_list argp;
156
157 if (bw->num_page_bugs)
158 dStr_append_c(bw->page_bugs, '\n');
160 "HTML warning: line %d, ",
162 va_start(argp, format);
163 dStr_vsprintfa(bw->page_bugs, format, argp);
164 va_end(argp);
166}
167
173 const char *url_str, const char *base_url,
174 int use_base_url)
175{
176 if (!url_str) {
177 MSG("a_Html_url_new: URL is NULL\n");
178 return NULL;
179 }
180
181 DilloUrl *url = a_Url_new(url_str,
182 (use_base_url) ? base_url : URL_STR_(html->base_url));
183
184 if (!url) {
185 BUG_MSG("URL is not valid '%s'.", url_str);
186 return NULL;
187 }
188
189 int n_ic, n_ic_spc;
190 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
191 const char *suffix = (n_ic) > 1 ? "s" : "";
192 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
193 if (n_ic == n_ic_spc) {
194 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
195 } else if (n_ic_spc == 0) {
196 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
197 n_ic, suffix, url_str);
198 } else {
199 BUG_MSG("URL has %d illegal byte%s: "
200 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
201 n_ic, suffix,
202 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
203 }
204 }
205 return url;
206}
207
211void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
212{
213 DilloWeb *web = (DilloWeb*)P;
214 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
215
216 *Data = (void*)html;
218
219 return (void*)html->dw;
220}
221
222static void Html_free(void *data)
223{
224 delete ((DilloHtml*)data);
225}
226
230void a_Html_load_images(void *v_html, DilloUrl *pattern)
231{
232 DilloHtml *html = (DilloHtml*)v_html;
233
234 html->loadImages(pattern);
235}
236
240static bool Html_contains_form(DilloHtml *html, void *v_form)
241{
242 for (int i = 0; i < html->forms->size(); i++) {
243 if (html->forms->get(i) == v_form) {
244 return true;
245 }
246 }
247 return false;
248}
249
253void a_Html_form_submit(void *v_html, void *v_form)
254{
255 DilloHtml *html = (DilloHtml*)v_html;
256
257 if (Html_contains_form(html, v_form)) {
258 /* it's still valid */
259 a_Html_form_submit2(v_form);
260 }
261}
262
266void a_Html_form_reset(void *v_html, void *v_form)
267{
268 DilloHtml *html = (DilloHtml*)v_html;
269
270 if (Html_contains_form(html, v_form)) {
271 /* it's still valid */
272 a_Html_form_reset2(v_form);
273 }
274}
275
279void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
280{
281 DilloHtml *html = (DilloHtml*)v_html;
282
283 if (Html_contains_form(html, v_form)) {
284 /* it's still valid */
285 a_Html_form_display_hiddens2(v_form, (display != 0));
286 }
287}
288
292static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
293{
294 char data[64];
295
296 if (x != -1) {
297 snprintf(data, 64, "?%d,%d", x, y);
298 a_Url_set_ismap_coords(html->links->get(link), data);
299 }
300}
301
306static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
307{
308 int nl = html->links->size();
309 html->links->increase();
310 html->links->set(nl, (*url) ? *url : NULL);
311 return nl;
312}
313
318void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
319{
320 const char *align;
321
322 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
323 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
324
325 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
326 BUG_MSG("The align attribute is obsolete in HTML5.");
327
328 if (dStrAsciiCasecmp (align, "left") == 0)
329 textAlignType = TEXT_ALIGN_LEFT;
330 else if (dStrAsciiCasecmp (align, "right") == 0)
331 textAlignType = TEXT_ALIGN_RIGHT;
332 else if (dStrAsciiCasecmp (align, "center") == 0)
333 textAlignType = TEXT_ALIGN_CENTER;
334 else if (dStrAsciiCasecmp (align, "justify") == 0)
335 textAlignType = TEXT_ALIGN_JUSTIFY;
336#if 0
337 else if (dStrAsciiCasecmp (align, "char") == 0) {
338 /* TODO: Actually not supported for <p> etc. */
339 v.textAlign = TEXT_ALIGN_STRING;
340 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
341 if (charattr[0] == 0)
342 /* TODO: ALIGN=" ", and even ALIGN="&32;" will result in
343 * an empty string (don't know whether the latter is
344 * correct, has to be clarified with the specs), so
345 * that for empty strings, " " is assumed. */
346 style_attrs.textAlignChar = ' ';
347 else
348 style_attrs.textAlignChar = charattr[0];
349 } else
350 /* TODO: Examine LANG attr of <html>. */
351 style_attrs.textAlignChar = '.';
352 }
353#endif
355 textAlignType);
356 }
357}
358
363bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
364{
365 const char *attr;
366 VAlignType valign;
367
368 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
369 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
370 BUG_MSG("The valign attribute is obsolete in HTML5.");
371
372 if (dStrAsciiCasecmp (attr, "top") == 0)
373 valign = VALIGN_TOP;
374 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
375 valign = VALIGN_BOTTOM;
376 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
377 valign = VALIGN_BASELINE;
378 else
379 valign = VALIGN_MIDDLE;
380
382 CSS_TYPE_ENUM, valign);
383 return true;
384 } else
385 return false;
386}
387
388
393static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
394 bool addBreakOpt)
395{
396 Textblock *textblock = new Textblock (prefs.limit_text_width);
397 Style *style;
398
399 if (addBreaks) {
400 StyleAttrs attrs = *(html->style ());
401 attrs.display = DISPLAY_BLOCK;
402 style = Style::create (&attrs);
403 } else {
404 style = html->style ();
405 style->ref ();
406 }
407
408 if (addBreaks)
409 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
410
411 HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */
412 if (addBreakOpt)
413 HT2TB(html)->addBreakOption (html->style (), false);
414
415 if (addBreaks)
416 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
417 S_TOP(html)->textblock = html->dw = textblock;
418 if (addBreaks)
419 S_TOP(html)->hand_over_break = true;
420
421 style->unref ();
422}
423
425{
426 return HT2TB(html)->mustAddBreaks (html->style ());
427}
428
433 const char *content_type)
434{
435 /* Init main variables */
436 bw = p_bw;
437 page_url = a_Url_dup(url);
438 base_url = a_Url_dup(url);
439 dw = NULL;
440
441 /* Init event receiver */
442 linkReceiver.html = this;
443 HT2LT(this)->connectLink (&linkReceiver);
444
445 a_Bw_add_doc(p_bw, this);
446
447 /* Init for-parsing variables */
448 Start_Buf = NULL;
449 Start_Ofs = 0;
450
451 _MSG("DilloHtml(): content type: %s\n", content_type);
452 this->content_type = dStrdup(content_type);
453
454 /* get charset */
456
457 stop_parser = false;
458
459 CurrOfs = OldOfs = 0;
460 OldLine = 1;
461
462 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
463 DocTypeVersion = 0.0f;
464
466
467 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
468
469 stack = new misc::SimpleVector <DilloHtmlState> (16);
470 stack->increase();
474 stack->getRef(0)->cell_text_align_set = false;
475 stack->getRef(0)->display_none = false;
477 stack->getRef(0)->list_number = 0;
478 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
479 stack->getRef(0)->textblock = NULL;
480 stack->getRef(0)->table = NULL;
481 stack->getRef(0)->ref_list_item = NULL;
482 stack->getRef(0)->hand_over_break = false;
483
485
486 Stash = dStr_new("");
487 StashSpace = false;
488
489 pre_column = 0;
490 PreFirstChar = false;
491 PrevWasCR = false;
492 InVisitedLink = false;
493 ReqTagClose = false;
494 TagSoup = true;
495 loadCssFromStash = false;
496 PrevWasBodyClose = false;
497 PrevWasHtmlClose = false;
498
500
502
505 visited_color = -1;
506
507 /* Init page-handling variables */
508 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
509 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
510 links = new misc::SimpleVector <DilloUrl*> (64);
511 images = new misc::SimpleVector <DilloHtmlImage*> (16);
512
513 /* Initialize the main widget */
514 initDw();
515 /* Hook destructor to the dw delete call */
516 dw->setDeleteCallback(Html_free, this);
517}
518
523{
524 dReturn_if_fail (dw == NULL);
525
526 /* Create the main widget */
528
529 bw->num_page_bugs = 0;
531}
532
537{
538 _MSG("::~DilloHtml(this=%p)\n", this);
539
541
542 a_Bw_remove_doc(bw, this);
543
546
547 for (int i = 0; i < cssUrls->size(); i++)
549 delete (cssUrls);
550
551 for (int i = 0; i < forms->size(); i++)
553 delete(forms);
554
555 for (int i = 0; i < inputs_outside_form->size(); i++)
557 delete(inputs_outside_form);
558
559 for (int i = 0; i < links->size(); i++)
560 a_Url_free(links->get(i));
561 delete (links);
562
563 for (int i = 0; i < images->size(); i++) {
564 DilloHtmlImage *img = images->get(i);
565 a_Url_free(img->url);
566 a_Image_unref(img->image);
567 dFree(img);
568 }
569 delete (images);
570
571 delete styleEngine;
572}
573
578void DilloHtml::write(char *Buf, int BufSize, int Eof)
579{
580 int token_start;
581 char *buf = Buf + Start_Ofs;
582 int bufsize = BufSize - Start_Ofs;
583
584 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
585#if 0
586 char *aux = dStrndup(Buf, BufSize);
587 MSG(" {%s}\n", aux);
588 dFree(aux);
589#endif
590
591 /* Update Start_Buf. It may be used after the parser is stopped */
592 Start_Buf = Buf;
593
594 dReturn_if (dw == NULL);
595 dReturn_if (stop_parser == true);
596
597 token_start = Html_write_raw(this, buf, bufsize, Eof);
598 Start_Ofs += token_start;
599}
600
606{
607 int i, ofs, line;
608 const char *p = Start_Buf;
609
610 dReturn_val_if_fail(p != NULL, -1);
611 /* Disable line counting for META hack. Buffers differ. */
613
614 ofs = CurrOfs;
615 line = OldLine;
616 for (i = OldOfs; i < ofs; ++i)
617 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
618 ++line;
619 OldOfs = CurrOfs;
620 OldLine = line;
621 return line;
622}
623
628{
629 delete(stack);
630
634 dFree(charset);
635}
636
641void DilloHtml::finishParsing(int ClientKey)
642{
643 int si;
644
645 dReturn_if (stop_parser == true);
646
647 /* flag we've already parsed up to the last byte */
648 InFlags |= IN_EOF;
649
650 /* force the close of elements left open (TODO: not for XHTML) */
651 while ((si = stack->size() - 1)) {
652 if (stack->getRef(si)->tag_idx != -1) {
654 }
655 }
656
657 /* Nothing left to do with the parser. Clear all flags, except EOF. */
658 InFlags = IN_EOF;
659
660 /* Remove this client from our active list */
661 a_Bw_close_client(bw, ClientKey);
662}
663
668 DilloHtmlEnc enc, const char *charset)
669{
670 // avoid data loss on repush after CSS stylesheets have been loaded
671 bool enabled = bw->NumPendingStyleSheets == 0;
672 DilloHtmlForm *form = a_Html_form_new (this, method, action,
673 enc, charset, enabled);
674 int nf = forms->size ();
675 forms->increase ();
676 forms->set (nf, form);
677 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
678 return forms->size();
679}
680
685{
686 return forms->get (forms->size() - 1);
687}
688
690{
691 for (int i = 0; i < images->size(); i++) {
692 if (images->get(i)->image != NULL) {
693 return TRUE;
694 }
695 }
696 return FALSE;
697}
698
702void DilloHtml::loadImages (const DilloUrl *pattern)
703{
705
706 /* If the user asked for a specific image, the user (NULL) is the requester,
707 * and the domain mechanism will always permit the request. But if the user
708 * just asked for all images (clicking "Load images"), use the page URL as
709 * the requester so that the domain mechanism can act as a filter.
710 * If the possible patterns become more complex, it might be good to have
711 * the caller supply the requester instead.
712 */
713 const DilloUrl *requester = pattern ? NULL : this->page_url;
714
715 for (int i = 0; i < images->size(); i++) {
716 DilloHtmlImage *hi = images->get(i);
717
718 if (hi->image) {
719 assert(hi->url);
720 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
721 if (Html_load_image(bw, hi->url, requester, hi->image)) {
722 a_Image_unref (hi->image);
723 hi->image = NULL; // web owns it now
724 }
725 }
726 }
727 }
728}
729
734{
735 int nu = cssUrls->size();
736 cssUrls->increase();
737 cssUrls->set(nu, a_Url_dup(url));
738}
739
740bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
741 int x, int y)
742{
744
745 _MSG(" ** ");
746 if (link == -1) {
747 _MSG(" Link LEAVE notify...\n");
748 a_UIcmd_set_msg(bw, "");
749 } else {
750 _MSG(" Link ENTER notify...\n");
751 Html_set_link_coordinates(html, link, x, y);
752 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
753 }
754 return true;
755}
756
760bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
761 int x, int y, EventButton *event)
762{
763 BrowserWindow *bw = html->bw;
764 int ret = false;
765 DilloUrl *linkurl = NULL;
766
767 _MSG("pressed button %d\n", event->button);
768 if (event->button == 3) {
769 // popup menus
770 if (img != -1) {
771 // image menu
772 if (link != -1)
773 linkurl = html->links->get(link);
774 const bool_t loaded_img = (html->images->get(img)->image == NULL);
775 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
776 html->page_url, linkurl);
777 ret = true;
778 } else {
779 if (link == -1) {
780 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
781 ret = true;
782 } else {
783 a_UIcmd_link_popup(bw, html->links->get(link), html->page_url);
784 ret = true;
785 }
786 }
787 }
788 return ret;
789}
790
794bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
795 int x, int y, EventButton *event)
796{
797 BrowserWindow *bw = html->bw;
798
799 if ((img != -1) && (html->images->get(img)->image)) {
800 // clicked an image that has not already been loaded
801 if (event->button == 1){
802 // load all instances of this image
803 DilloUrl *pattern = html->images->get(img)->url;
804 html->loadImages(pattern);
805 return true;
806 }
807 }
808
809 if (link != -1) {
810 DilloUrl *url = html->links->get(link);
811 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
812
813 Html_set_link_coordinates(html, link, x, y);
814
815 if (event->button == 1) {
816 if (event->state & CONTROL_MASK) {
818 int focus = prefs.focus_new_tab ? 1 : 0;
819 if (event->state & SHIFT_MASK) focus = !focus;
820 a_UIcmd_open_url_nt(bw, url, focus);
821 } else {
823 }
824 } else {
825 a_UIcmd_open_url(bw, url);
826 }
827 } else if (event->button == 2) {
829 int focus = prefs.focus_new_tab ? 1 : 0;
830 if (event->state & SHIFT_MASK) focus = !focus;
831 a_UIcmd_open_url_nt(bw, url, focus);
832 } else
834 } else {
835 return false;
836 }
837
838 /* Change the link color to "visited" as visual feedback */
839 for (Widget *w = widget; w; w = w->getParent()) {
840 _MSG(" ->%s\n", w->getClassName());
841 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
842 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
843 break;
844 }
845 }
846 }
847 return true;
848}
849
854{
855 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
856 html->StashSpace = false;
857 dStr_truncate(html->Stash, 0);
858}
859
866static int Html_ms_stupid_quotes_2ucs(int codepoint)
867{
868 int ret;
869 switch (codepoint) {
870 case 145:
871 case 146: ret = '\''; break;
872 case 147:
873 case 148: ret = '"'; break;
874 case 149: ret = 176; break;
875 case 150:
876 case 151: ret = '-'; break;
877 default: ret = codepoint; break;
878 }
879 return ret;
880}
881
886static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
887 bool_t is_attr, int *entsize)
888{
889 static char buf[5];
890 char *s = tok;
891 int n, codepoint = -1;
892
893 errno = 0;
894
895 if (*s == 'x' || *s == 'X') {
896 if (isxdigit(*++s)) {
897 /* strtol with base 16 accepts leading "0x" - we don't */
898 if (*s == '0' && s[1] == 'x') {
899 s++;
900 codepoint = 0;
901 } else {
902 codepoint = strtol(s, &s, 16);
903 }
904 }
905 } else if (isdigit(*s)) {
906 codepoint = strtol(s, &s, 10);
907 }
908 if (errno)
909 codepoint = -1;
910
911 if (*s == ';')
912 s++;
913 else {
914 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
915 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
916 char c = *s;
917 *s = '\0';
918 BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
919 *s = c;
920 }
921 /* Don't require ';' for old HTML, except that our current heuristic
922 * is to require it in attributes to avoid cases like "&copy=1" found
923 * in URLs.
924 */
925 if (is_attr || html->DocType == DT_XHTML ||
926 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
927 return NULL;
928 }
929
930 }
931 if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
932 codepoint != '\f') ||
933 (codepoint >= 0x7f && codepoint <= 0x9f) ||
934 (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
935 ((codepoint & 0xfffe) == 0xfffe) ||
936 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
937 codepoint > 0xffff)) {
938 /* this catches null bytes, errors, codes out of range, disallowed
939 * control chars, permanently undefined chars, and surrogates.
940 */
941 char c = *s;
942 *s = '\0';
943 BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
944 *s = c;
945
946 codepoint = (codepoint >= 145 && codepoint <= 151) ?
947 Html_ms_stupid_quotes_2ucs(codepoint) : -1;
948 }
949 if (codepoint != -1) {
950 if (codepoint >= 128) {
951 n = a_Utf8_encode(codepoint, buf);
952 } else {
953 n = 1;
954 buf[0] = (char) codepoint;
955 }
956 assert(n < 5);
957 buf[n] = '\0';
958 *entsize = s-tok+2;
959 return buf;
960 } else {
961 return NULL;
962 }
963}
964
968static int Html_charref_comp(const void *a, const void *b)
969{
970 return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
971}
972
977{
978 Charref_t RefKey;
979
980 RefKey.ref = key;
981 return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
983}
984
989static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
990 bool_t is_attr, int *entsize)
991{
992 Charref_t *p;
993 char c;
994 char *s = tok;
995 const char *ret = NULL;
996
997 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
998 c = *s;
999 *s = '\0';
1000 if (c != ';') {
1001 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
1002 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
1003 BUG_MSG("Character reference '&%s' lacks ';'.", tok);
1004
1005 /* Don't require ';' for old HTML, except that our current heuristic
1006 * is to require it in attributes to avoid cases like "&copy=1" found
1007 * in URLs.
1008 */
1009 if (is_attr || html->DocType == DT_XHTML ||
1010 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
1011 return ret;
1012 }
1013 }
1014
1015 if ((p = Html_charref_search(tok))) {
1016 ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
1017 p->html5_str : p->html4_str;
1018 }
1019
1020 if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
1021 ret = "'";
1022
1023 *s = c;
1024 if (c == ';')
1025 s++;
1026
1027 if (!ret) {
1028 c = *s;
1029 *s = '\0';
1030 BUG_MSG("Undefined character reference '&%s'.", tok);
1031 *s = c;
1032 }
1033 *entsize = s-tok+1;
1034 return ret;
1035}
1036
1045static const char *Html_parse_entity(DilloHtml *html, const char *token,
1046 int toksize, int *entsize, bool_t is_attr)
1047{
1048 const char *ret = NULL;
1049 char *tok;
1050
1051 if (toksize > 50) {
1052 /* In pathological cases, attributes can be megabytes long and filled
1053 * with character references. As of HTML5, the longest defined character
1054 * reference is about 32 bytes long.
1055 */
1056 toksize = 50;
1057 }
1058
1059 token++;
1060 tok = dStrndup(token, (uint_t)toksize);
1061
1062 if (*tok == '#') {
1063 ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
1064 } else if (isalpha(*tok)) {
1065 ret = Html_parse_named_charref(html, tok, is_attr, entsize);
1066 } else if (prefs.show_extra_warnings &&
1067 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
1068 // HTML5 doesn't mind literal '&'s.
1069 BUG_MSG("Literal '&'.");
1070 }
1071 dFree(tok);
1072
1073 return ret;
1074}
1075
1080char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1081{
1082 const char *esc_set = "&";
1083 int i, s, entsize;
1084 char *str;
1085
1086 s = strcspn(token, esc_set);
1087 if (s >= toksize) {
1088 /* no ampersands */
1089 str = dStrndup(token, toksize);
1090 } else {
1091 Dstr *ds = dStr_sized_new(toksize);
1092
1093 dStr_append_l(ds, token, s);
1094
1095 for (i = s; i < toksize; i++) {
1096 const char *entstr;
1097 const bool_t is_attr = FALSE;
1098
1099 if (token[i] == '&' &&
1100 (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
1101 is_attr))) {
1102 dStr_append(ds, entstr);
1103 i += entsize-1;
1104 } else {
1105 dStr_append_c(ds, token[i]);
1106 }
1107 }
1108 str = ds->str;
1109 dStr_free(ds, 0);
1110 }
1111 return str;
1112}
1113
1118static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1119 int spacesize)
1120{
1121 int i, breakCnt = 0;
1122
1123 for (i = 0; i < spacesize; i++) {
1124 /* Support for "\r", "\n" and "\r\n" line breaks */
1125 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1126 breakCnt++;
1127 html->PrevWasCR = (space[i] == '\r');
1128
1129 HT2TB(html)->addLinebreak (html->wordStyle ());
1130 }
1131 }
1132 if (breakCnt == 0) {
1133 HT2TB(html)->addSpace(html->wordStyle ());
1134 }
1135}
1136
1140static void Html_process_space(DilloHtml *html, const char *space,
1141 int spacesize)
1142{
1143 char *spc;
1144 int i, offset;
1145 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1146
1147 if (S_TOP(html)->display_none) {
1148 /* do nothing */
1149 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1150 html->StashSpace = (html->Stash->len > 0);
1151
1152 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1153 dStr_append_l(html->Stash, space, spacesize);
1154
1155 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1156 int spaceCnt = 0;
1157
1158 /* re-scan the string for characters that cause line breaks */
1159 for (i = 0; i < spacesize; i++) {
1160 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1161 if (!html->PreFirstChar &&
1162 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1163
1164 if (spaceCnt) {
1165 spc = dStrnfill(spaceCnt, ' ');
1166 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1167 dFree(spc);
1168 spaceCnt = 0;
1169 }
1170 HT2TB(html)->addLinebreak (html->wordStyle ());
1171 html->pre_column = 0;
1172 }
1173 html->PreFirstChar = false;
1174
1175 /* cr and lf should not be rendered -- they appear as a break */
1176 switch (space[i]) {
1177 case '\r':
1178 case '\n':
1179 break;
1180 case '\t':
1182 BUG_MSG("TAB character inside <pre>.");
1183 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1184 spaceCnt += offset;
1185 html->pre_column += offset;
1186 break;
1187 default:
1188 spaceCnt++;
1189 html->pre_column++;
1190 break;
1191 }
1192
1193 html->PrevWasCR = (space[i] == '\r');
1194 }
1195
1196 if (spaceCnt) {
1197 // add break possibility for the white-space:pre-wrap case
1198 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1199 spc = dStrnfill(spaceCnt, ' ');
1200 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1201 dFree(spc);
1202 }
1203
1204 } else {
1205 if (SGML_SPCDEL) {
1206 /* SGML_SPCDEL ignores white space immediately after an open tag */
1207 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1208 Html_process_space_pre_line(html, space, spacesize);
1209 } else {
1210 HT2TB(html)->addSpace(html->wordStyle ());
1211 }
1212
1213 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1214 html->StashSpace = (html->Stash->len > 0);
1215 }
1216}
1217
1226static void Html_process_word(DilloHtml *html, const char *word, int size)
1227{
1228 int i, j, start;
1229 char *Pword;
1230 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1231
1232 if (S_TOP(html)->display_none)
1233 return;
1234 if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
1235 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
1236 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
1237 }
1238
1239 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1241 if (html->StashSpace) {
1242 dStr_append_c(html->Stash, ' ');
1243 html->StashSpace = false;
1244 }
1245 Pword = a_Html_parse_entities(html, word, size);
1246 dStr_append(html->Stash, Pword);
1247 dFree(Pword);
1248
1249 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1250 /* word goes in untouched, it is not processed here. */
1251 dStr_append_l(html->Stash, word, size);
1252 }
1253
1254 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1255 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1256 /* skip until the closing instructions */
1257
1258 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1259 /* all this overhead is to catch white-space entities */
1260 Pword = a_Html_parse_entities(html, word, size);
1261 for (start = i = 0; Pword[i]; start = i)
1262 if (isspace(Pword[i])) {
1263 while (Pword[++i] && isspace(Pword[i])) ;
1264 Html_process_space(html, Pword + start, i - start);
1265 } else {
1266 while (Pword[++i] && !isspace(Pword[i])) ;
1267 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1268 html->pre_column += i - start;
1269 html->PreFirstChar = false;
1270 }
1271 dFree(Pword);
1272
1273 } else {
1274 const char *word2, *beyond_word2;
1275
1276 Pword = NULL;
1277 if (!memchr(word,'&', size)) {
1278 /* No entities */
1279 word2 = word;
1280 beyond_word2 = word + size;
1281 } else {
1282 /* Collapse white-space entities inside the word (except &nbsp;) */
1283 Pword = a_Html_parse_entities(html, word, size);
1284 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1285 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1286 if (strchr(" \t\f\n\r", Pword[i])) {
1287 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1288 Pword[i] = ' ';
1289 else
1290 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1291 ;
1292 }
1293 }
1294 word2 = Pword;
1295 beyond_word2 = word2 + strlen(word2);
1296 }
1297 for (start = i = 0; word2[i]; start = i) {
1298 int len;
1299
1300 if (isspace(word2[i])) {
1301 while (word2[++i] && isspace(word2[i])) ;
1302 Html_process_space(html, word2 + start, i - start);
1303 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1304 i += 3;
1305 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1306 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1307 i += len;
1308 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1309 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1310 } else {
1311 do {
1312 i += len;
1313 } while (word2[i] && !isspace(word2[i]) &&
1314 strncmp(word2+i, utf8_zero_width_space, 3) &&
1315 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1316 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1317 }
1318 }
1319 if (Pword == word2)
1320 dFree(Pword);
1321 }
1322}
1323
1328static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1329{
1330 int i;
1331
1332 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1333 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1334 return false;
1335 }
1336 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1337 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1338 return true;
1339 return false;
1340}
1341
1346static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1347{
1348 if (html->dw != S_TOP(html)->textblock) {
1349 if (hand_over_break)
1350 HT2TB(html)->handOverBreak (html->style ());
1351 HT2TB(html)->flush ();
1352 html->dw = S_TOP(html)->textblock;
1353 }
1354}
1355
1359static void Html_push_tag(DilloHtml *html, int tag_idx)
1360{
1361 int n_items;
1362
1363 n_items = html->stack->size ();
1364 html->stack->increase ();
1365 /* We'll copy the former stack item and just change the tag and its index
1366 * instead of copying all fields except for tag. --Jcid */
1367 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1368 html->stack->getRef(n_items)->tag_idx = tag_idx;
1369 html->dw = S_TOP(html)->textblock;
1370}
1371
1376static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1377{
1378 html->startElement (tag_idx);
1379 Html_push_tag(html, tag_idx);
1380}
1381
1386{
1387 bool hand_over_break;
1388
1389 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1390 hand_over_break = S_TOP(html)->hand_over_break;
1391 html->stack->setSize (html->stack->size() - 1);
1392 Html_eventually_pop_dw(html, hand_over_break);
1393}
1394
1395
1396
1397/*
1398 * Some parsing routines.
1399 */
1400
1401/*
1402 * Used by a_Html_parse_length
1403 */
1405 char **endptr)
1406{
1407 CssLength l;
1408 double v;
1409 char *end;
1410
1411 v = strtod (attr, &end);
1412 switch (*end) {
1413 case '%':
1414 end++;
1416 break;
1417
1418 case '*':
1419 end++;
1421 break;
1422/*
1423 The "px" suffix seems not allowed by HTML4.01 SPEC.
1424 case 'p':
1425 if (end[1] == 'x')
1426 end += 2;
1427*/
1428 default:
1430 break;
1431 }
1432
1433 if (endptr)
1434 *endptr = end;
1435 return l;
1436}
1437
1438
1443CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1444{
1445 CssLength l;
1446 char *end;
1447
1448 l = Html_parse_length_or_multi_length (attr, &end);
1450 /* not allowed as &Length; */
1452 else {
1453 /* allow only whitespaces */
1454 if (*end && !isspace (*end)) {
1455 BUG_MSG("Garbage after length: '%s'.", attr);
1457 }
1458 }
1459
1460 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1461 return l;
1462}
1463
1468int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1469 int32_t default_color)
1470{
1471 int err = 1;
1472 int32_t color = a_Color_parse(str, default_color, &err);
1473
1474 if (err) {
1475 BUG_MSG("Color \"%s\" is not in \"#RRGGBB\" format.", str);
1476 }
1477 return color;
1478}
1479
1485static int
1486 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1487{
1488 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1489 bool valid = *val && !strchr(val, ' ');
1490
1491 if (!valid) {
1492 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1493 "spaces.", attrname, val);
1494 }
1495 return valid ? 1 : 0;
1496 } else {
1497 int i;
1498
1499 for (i = 0; val[i]; ++i)
1500 if (!d_isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1501 break;
1502
1503 if (val[i] || !(d_isascii(val[0]) && isalpha(val[0])))
1504 BUG_MSG("%s attribute value \"%s\" is not of the form "
1505 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1506
1507 return !(val[i]);
1508 }
1509}
1510
1530static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1531{
1532 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1533 static const char HTML20 [] = "-//IETF//DTD HTML";
1534 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1535 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1536 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1537 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1538 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1539 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1540 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1541 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1542
1543 size_t i;
1544 int quote;
1545 char *p, *ntag = dStrndup(tag, tagsize);
1546
1547 /* Tag sanitization: Collapse whitespace between tokens
1548 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1549 for (i = 0, p = ntag; *p; ++p) {
1550 if (isspace(*p)) {
1551 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1552 } else if ((quote = *p) == '"' || *p == '\'') {
1553 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1554 if (*p == '\n' || *p == '\r')
1555 ntag[i - 1] = ' ';
1556 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1557 }
1558 } else {
1559 ntag[i++] = *p;
1560 }
1561 if (!*p)
1562 break;
1563 }
1564 ntag[i] = 0;
1565
1566 _MSG("New: {%s}\n", ntag);
1567
1568 if (html->DocType != DT_NONE)
1569 BUG_MSG("Multiple DOCTYPE declarations.");
1570
1571 /* The default DT_NONE type is TagSoup */
1572 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1573 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1574 p = ntag + strlen(HTML_SGML_sig) + 1;
1575 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1576 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1577 html->DocType = DT_HTML;
1578 html->DocTypeVersion = 4.01f;
1579 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1580 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1581 html->DocType = DT_XHTML;
1582 html->DocTypeVersion = 1.0f;
1583 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1584 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1585 html->DocType = DT_XHTML;
1586 html->DocTypeVersion = 1.1f;
1587 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1588 html->DocType = DT_HTML;
1589 html->DocTypeVersion = 4.0f;
1590 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1591 html->DocType = DT_HTML;
1592 html->DocTypeVersion = 3.2f;
1593 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1594 html->DocType = DT_HTML;
1595 html->DocTypeVersion = 2.0f;
1596 }
1597 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1598 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1599 !dStrAsciiCasecmp(ntag,
1600 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1601 !dStrAsciiCasecmp(ntag,
1602 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1603 html->DocType = DT_HTML;
1604 html->DocTypeVersion = 5.0f;
1605 }
1606 if (html->DocType == DT_NONE) {
1607 html->DocType = DT_UNRECOGNIZED;
1608 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1609 }
1610 dFree(ntag);
1611}
1612
1616static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1617{
1618 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1619 * This allows to handle pages with multiple or uneven HTML tags */
1620
1621 if (!(html->InFlags & IN_HTML))
1622 html->InFlags |= IN_HTML;
1623 if (html->Num_HTML < UCHAR_MAX)
1624 ++html->Num_HTML;
1625
1626 if (html->Num_HTML > 1) {
1627 BUG_MSG("<html> was already open.");
1628 html->ReqTagClose = true;
1629 }
1630}
1631
1636{
1637 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1638
1639 /* As some Tag soup pages use multiple HTML tags, this function
1640 * gets called only on EOF and upon and extra HTML open.
1641 * Also, we defer clearing the IN_HTML flag until IN_EOF */
1642}
1643
1647static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1648{
1649 if (html->InFlags & IN_BODY) {
1650 BUG_MSG("<head> must go before the BODY section.");
1651 html->ReqTagClose = true;
1652 return;
1653 }
1654
1655 if (html->Num_HEAD < UCHAR_MAX)
1656 ++html->Num_HEAD;
1657 if (html->InFlags & IN_HEAD) {
1658 BUG_MSG("<head> was already open.");
1659 html->ReqTagClose = true;
1660 } else if (html->Num_HEAD > 1) {
1661 BUG_MSG("<head> already finished -- ignoring.");
1662 html->ReqTagClose = true;
1663 } else {
1664 html->InFlags |= IN_HEAD;
1665 }
1666}
1667
1673{
1674 if (html->InFlags & IN_HEAD) {
1675 if (html->Num_HEAD == 1) {
1676 /* match for the well formed start of HEAD section */
1677 if (html->Num_TITLE == 0)
1678 BUG_MSG("<head> lacks <title>.");
1679
1680 html->InFlags &= ~IN_HEAD;
1681
1682 /* charset is already set, load remote stylesheets now */
1683 for (int i = 0; i < html->cssUrls->size(); i++) {
1684 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1685 }
1686 } else if (html->Num_HEAD > 1) {
1687 --html->Num_HEAD;
1688 }
1689 } else {
1690 /* not reached, see Html_tag_cleanup_at_close() */
1691 }
1692}
1693
1698static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1699{
1700 /* fill the stash buffer so TITLE content can be ignored
1701 * when not valid, redundant or outside HEAD section */
1702 a_Html_stash_init(html);
1703
1704 if (html->InFlags & IN_HEAD) {
1705 if (html->Num_TITLE < UCHAR_MAX)
1706 ++html->Num_TITLE;
1707 if (html->Num_TITLE > 1)
1708 BUG_MSG("Redundant <title>.");
1709 } else {
1710 BUG_MSG("<title> must be inside <head> -- ignoring.");
1711 }
1712}
1713
1719{
1720 /* title is only valid inside HEAD */
1721 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1722 /* Ignore empty titles: <title></title> */
1723 char *title = html->Stash->str;
1724 if (!title || title[0] == '\0')
1725 return;
1726 a_UIcmd_set_page_title(html->bw, title);
1728 }
1729}
1730
1736static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1737{
1738 a_Html_stash_init(html);
1739 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1740}
1741
1746{
1747 /* eventually the stash will be sent to an interpreter for parsing */
1748}
1749
1754static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1755{
1756 const char *attrbuf;
1757
1758 html->loadCssFromStash = true;
1759
1760 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1761 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1762 BUG_MSG("<style> requires type attribute.");
1763 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1764 html->loadCssFromStash = false;
1765 }
1766 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1767 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1768 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1769 * but sec. 14.2.3 says that the attribute is case-insensitive.
1770 * TODO can be a comma-separated list.
1771 * TODO handheld.
1772 */
1773 html->loadCssFromStash = false;
1774 }
1775
1776 a_Html_stash_init(html);
1777 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1778}
1779
1784{
1786 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1787 html->Stash->len, CSS_ORIGIN_AUTHOR);
1788}
1789
1790/*
1791 * <BODY>
1792 */
1793static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1794{
1795 const char *attrbuf;
1796 int32_t color;
1797 style::Color *bgColor;
1798 style::StyleImage *bgImage;
1799 style::BackgroundRepeat bgRepeat;
1800 style::BackgroundAttachment bgAttachment;
1801 style::Length bgPositionX, bgPositionY;
1802
1803 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1804 if (!(html->InFlags & IN_BODY))
1805 html->InFlags |= IN_BODY;
1806 if (html->Num_BODY < UCHAR_MAX)
1807 ++html->Num_BODY;
1808
1809 if (html->Num_BODY > 1) {
1810 BUG_MSG("<body> was already open.");
1811 html->ReqTagClose = true;
1812 return;
1813 }
1814
1815 if (html->InFlags & IN_HEAD) {
1816 /* if we're here, it's bad XHTML, no need to recover */
1817 BUG_MSG("Unclosed <head>.");
1818 }
1819
1820 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1821 color = a_Html_color_parse(html, attrbuf, -1);
1822
1823 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1824 BUG_MSG("<body> bgcolor attribute is obsolete.");
1825
1826 if (color != -1)
1828 CSS_TYPE_COLOR, color);
1829 }
1830
1831 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1832 color = a_Html_color_parse(html, attrbuf, -1);
1833
1834 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1835 BUG_MSG("<body> text attribute is obsolete.");
1836
1837 if (color != -1)
1839 CSS_TYPE_COLOR, color);
1840 }
1841
1842 html->restyle ();
1843
1844 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1845 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1846 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1847 BUG_MSG("<body> link attribute is obsolete.");
1848 }
1849
1850 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1851 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1852 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1853 BUG_MSG("<body> vlink attribute is obsolete.");
1854 }
1855
1856 html->dw->setStyle (html->style ());
1857
1858 bgColor = html->styleEngine->backgroundColor ();
1859 if (bgColor)
1860 HT2LT(html)->setBgColor(bgColor);
1861
1862 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1863 &bgPositionX, &bgPositionY);
1864 if (bgImage)
1865 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1866 bgPositionY);
1867
1868 /* Determine a color for visited links.
1869 * This color is computed once per page and used for immediate feedback
1870 * when clicking a link.
1871 * On reload style including color for visited links is computed properly
1872 * according to CSS.
1873 */
1874 html->startElement (i_A);
1875 html->styleEngine->setPseudoVisited ();
1876 if (html->non_css_visited_color != -1) {
1878 html->non_css_visited_color);
1879 }
1880 html->visited_color = html->style ()->color->getColor ();
1881 html->styleEngine->endElement (i_A);
1882
1884 /* get a color that has a "safe distance" from text, link and bg */
1885 html->visited_color =
1887 html->style ()->color->getColor(),
1888 html->non_css_link_color,
1890 }
1891
1892
1893 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1894}
1895
1896/*
1897 * BODY
1898 */
1900{
1901 _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);
1902
1903 /* As some Tag soup pages use multiple BODY tags, this function
1904 * gets called only on EOF and upon and extra BODY open.
1905 * Also, we defer clearing the IN_BODY flag until IN_EOF */
1906}
1907
1908/*
1909 * <P>
1910 * TODO: what's the point between adding the parbreak before and
1911 * after the push?
1912 */
1913static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1914{
1915 CssPropertyList props;
1916
1917 a_Html_tag_set_align_attr (html, tag, tagsize);
1918}
1919
1920/*
1921 * <FRAME>, <IFRAME>
1922 * TODO: This is just a temporary fix while real frame support
1923 * isn't finished. Imitates lynx/w3m's frames.
1924 */
1925static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1926{
1927 const char *attrbuf;
1928 DilloUrl *url;
1929 CssPropertyList props;
1930
1931 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1932 return;
1933
1934 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1935 return;
1936
1938 /* visited frame */
1939 html->styleEngine->setPseudoVisited ();
1940 } else {
1941 /* unvisited frame */
1942 html->styleEngine->setPseudoLink ();
1943 }
1944
1946 Html_set_new_link(html,&url));
1947}
1948
1949static void
1950 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1951{
1952 const char *attrbuf;
1953 char *src;
1954 Textblock *textblock;
1955 Widget *bullet;
1956
1957 textblock = HT2TB(html);
1958
1959 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1960 return;
1961
1962 src = dStrdup(attrbuf);
1963
1964 textblock->addParbreak (5, html->wordStyle ());
1965
1966 bullet = new Bullet();
1967 textblock->addWidget(bullet, html->wordStyle ());
1968 textblock->addSpace(html->wordStyle ());
1969
1970 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1971 /* IFRAME usually comes with very long advertising/spying URLS,
1972 * to not break rendering we will force name="IFRAME" */
1973 textblock->addText ("IFRAME", html->wordStyle ());
1974
1975 } else {
1976 /* FRAME:
1977 * If 'name' tag is present use it, if not use 'src' value */
1978 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1979 textblock->addText (src, html->wordStyle ());
1980 } else {
1981 textblock->addText (attrbuf, html->wordStyle ());
1982 }
1983 }
1984
1985 textblock->addParbreak (5, html->wordStyle ());
1986
1987 dFree(src);
1988}
1989
1990/*
1991 * <FRAMESET>
1992 * TODO: This is just a temporary fix while real frame support
1993 * isn't finished. Imitates lynx/w3m's frames.
1994 */
1996 const char *tag, int tagsize)
1997{
1998 HT2TB(html)->addParbreak (9, html->wordStyle ());
1999 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
2000 Html_add_textblock(html, true, 5, false);
2001}
2002
2003/*
2004 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
2005 */
2006static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
2007{
2008 a_Html_tag_set_align_attr (html, tag, tagsize);
2009
2010 a_Html_stash_init(html);
2011 S_TOP(html)->parse_mode =
2013}
2014
2015/*
2016 * <BR>
2017 */
2018static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2019{
2020 HT2TB(html)->addLinebreak (html->wordStyle ());
2021}
2022
2023/*
2024 * <FONT>
2025 */
2026static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2027{
2028 const char *attrbuf;
2029 char *fontFamily = NULL;
2030 int32_t color;
2031
2032 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2034 color = html->visited_color;
2035 } else {
2036 /* use the tag-specified color */
2037 color = a_Html_color_parse(html, attrbuf, -1);
2038 }
2039 if (color != -1)
2041 CSS_TYPE_COLOR, color);
2042 }
2043
2044 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2045 fontFamily = dStrdup(attrbuf);
2047 CSS_TYPE_SYMBOL, fontFamily);
2048 }
2049
2050 dFree(fontFamily);
2051}
2052
2053/*
2054 * <ABBR>
2055 */
2056static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2057{
2058 const char *attrbuf;
2059
2061
2062 if (prefs.show_tooltip &&
2063 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2064
2066 attrbuf);
2067 }
2068}
2069
2073void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2074{
2075 char *width_ptr, *height_ptr;
2076 const char *attrbuf;
2079 int w = 0, h = 0;
2080
2081 if (prefs.show_tooltip &&
2082 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2084 attrbuf);
2085 }
2086 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2087 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2088 // Check for malicious values
2089 // TODO: the same for percentage and relative lengths.
2090 if (width_ptr) {
2091 l_w = a_Html_parse_length (html, width_ptr);
2092 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2093 CSS_LENGTH_VALUE(l_w) : 0);
2094 }
2095 if (height_ptr) {
2096 l_h = a_Html_parse_length (html, height_ptr);
2097 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2098 CSS_LENGTH_VALUE(l_h) : 0);
2099 }
2100 /* Check for suspicious image size request that would cause
2101 * an excessive amount of memory to be allocated for the
2102 * image buffer.
2103 * Be careful to avoid integer overflows during the checks.
2104 * There is an additional check in dw/image.cc to catch cases
2105 * where only one dimension is given and the image is scaled
2106 * preserving its original aspect ratio.
2107 * Size requests passed via CSS are also checked there.
2108 */
2109 if (w < 0 || h < 0 ||
2110 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2111 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2112 dFree(width_ptr);
2113 dFree(height_ptr);
2114 width_ptr = height_ptr = NULL;
2115 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2116 w, h);
2117 } else {
2124 }
2125
2126 /* TODO: we should scale the image respecting its ratio.
2127 * As the image size is not known at this time, maybe a flag
2128 * can be set to scale it later.
2129 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2130 [...]
2131 */
2132
2133 /* x_img is an index to a list of {url,image} pairs.
2134 * We know a_Html_image_new() will use size() as its next index */
2136 html->images->size());
2137
2138
2139 dFree(width_ptr);
2140 dFree(height_ptr);
2141}
2142
2143DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2144{
2145 bool load_now;
2146 char *alt_ptr;
2147 const char *attrbuf;
2148 DilloUrl *url;
2150
2151 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2152 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2153 return NULL;
2154
2155 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2156 if (!alt_ptr || !*alt_ptr) {
2157 dFree(alt_ptr);
2158 alt_ptr = dStrdup("[IMG]");
2159 }
2160
2161 dw::Image *dw = new dw::Image(alt_ptr);
2162 image =
2163 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0, 0);
2164
2166
2167 if (HT2TB(html)->getBgColor())
2168 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2169
2170 if (HT2TB(html)->getFgColor())
2171 image->fg_color = HT2TB(html)->getFgColor()->getColor();
2172
2174 hi->url = url;
2175 html->images->increase();
2176 html->images->set(html->images->size() - 1, hi);
2177
2178 load_now = prefs.load_images ||
2179 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2181
2182 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2183 // hi->image is NULL if dillo tries to load the image immediately
2184 hi->image = NULL;
2186 } else {
2187 // otherwise a reference is kept in html->images
2188 hi->image = image;
2189 }
2190
2191 dFree(alt_ptr);
2192 return image;
2193}
2194
2199 const DilloUrl *requester, DilloImage *Image)
2200{
2201 DilloWeb *Web;
2202 int ClientKey;
2203 /* Fill a Web structure for the cache query */
2204 Web = a_Web_new(bw, url, requester);
2205 Web->Image = Image;
2207 Web->flags |= WEB_Image;
2208 /* Request image data from the cache */
2209 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2210 a_Bw_add_client(bw, ClientKey, 0);
2211 a_Bw_add_url(bw, url);
2212 }
2213 return ClientKey != 0;
2214}
2215
2216static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2217{
2218 int border;
2219 const char *attrbuf;
2220
2221 a_Html_common_image_attrs(html, tag, tagsize);
2222
2223 /* Spacing to the left and right */
2224 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2225 int space = strtol(attrbuf, NULL, 10);
2226 if (space > 0) {
2232 }
2233 }
2234
2235 /* Spacing at the top and bottom */
2236 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2237 int space = strtol(attrbuf, NULL, 10);
2238 if (space > 0) {
2244 }
2245 }
2246
2247 /* Border */
2248 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2249 border = strtol(attrbuf, NULL, 10);
2250 if (border >= 0) {
2260
2269 }
2270 }
2271
2272}
2273
2279static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2280{
2282 DilloUrl *usemap_url;
2283 const char *attrbuf;
2284
2285 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2286 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2287 return;
2288
2289 Image = a_Html_image_new(html, tag, tagsize);
2290 if (!Image)
2291 return;
2292
2293 usemap_url = NULL;
2294 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2295 /* TODO: usemap URLs outside of the document are not used. */
2296 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2297
2298 // At this point, we know that Image->ir represents an image
2299 // widget. Notice that the order of the casts matters, because of
2300 // multiple inheritance.
2301 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2302 HT2TB(html)->addWidget(dwi, html->style());
2303 HT2TB(html)->addBreakOption (html->style (), false);
2304
2305 /* Image maps */
2306 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2307 dwi->setIsMap();
2308 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2309 } else if (html->style ()->x_link != -1 &&
2310 usemap_url == NULL) {
2311 /* For simple links, we have to suppress the "image_pressed" signal.
2312 * This is overridden for USEMAP images. */
2313// a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2314 }
2315
2316 if (usemap_url) {
2317 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2318 a_Url_free (usemap_url);
2319 }
2320}
2321
2322/*
2323 * <map>
2324 */
2325static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2326{
2327 char *hash_name;
2328 const char *attrbuf;
2329 DilloUrl *url;
2330
2331 if (html->InFlags & IN_MAP) {
2332 BUG_MSG("Nested <map>.");
2333 } else {
2334 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2335 html->InFlags |= IN_MAP;
2336 hash_name = dStrconcat("#", attrbuf, NULL);
2337 url = a_Html_url_new(html, hash_name, NULL, 0);
2338 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2339 a_Url_free (url);
2340 dFree(hash_name);
2341 } else {
2342 BUG_MSG("<map> requires name attribute.");
2343 }
2344 }
2345}
2346
2351{
2352 /* This is a hack for the perhaps frivolous feature of drawing image map
2353 * shapes when there is no image to display. If this map is defined after
2354 * an image that has not been loaded (img != NULL), tell the image to
2355 * redraw. (It will only do so if it uses a map.)
2356 */
2357 for (int i = 0; i < html->images->size(); i++) {
2358 DilloImage *img = html->images->get(i)->image;
2359
2360 if (img) {
2361 // At this point, we know that img->ir represents an image
2362 // widget. (Really? Is this assumption safe?) Notice that the
2363 // order of the casts matters, because of multiple
2364 // inheritance.
2366 dwi->forceMapRedraw();
2367 }
2368 }
2369 html->InFlags &= ~IN_MAP;
2370}
2371
2375static
2377{
2378 int coord;
2379 const char *tail = str;
2380 char *newtail = NULL;
2382
2383 while (1) {
2384 coord = strtol(tail, &newtail, 10);
2385 if (coord == 0 && newtail == tail)
2386 break;
2387 coords->increase();
2388 coords->set(coords->size() - 1, coord);
2389 while (isspace(*newtail))
2390 newtail++;
2391 if (!*newtail)
2392 break;
2393 if (*newtail != ',') {
2394 BUG_MSG("<area> coords must be integers separated by commas.");
2395 }
2396 tail = newtail + 1;
2397 }
2398
2399 return coords;
2400}
2401
2402/*
2403 * <AREA>
2404 */
2405static void
2406 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2407{
2408 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2409 types type;
2410 misc::SimpleVector<int> *coords = NULL;
2411 DilloUrl* url;
2412 const char *attrbuf;
2413 int link = -1;
2414 Shape *shape = NULL;
2415
2416 if (!(html->InFlags & IN_MAP)) {
2417 BUG_MSG("<area> not inside <map>.");
2418 return;
2419 }
2420 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2421
2422 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2423 /* the default shape is a rectangle */
2424 type = RECTANGLE;
2425 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2426 /* "default" is the background */
2427 type = BACKGROUND;
2428 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2429 type = CIRCLE;
2430 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2431 type = POLYGON;
2432 } else {
2433 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2434 type = UNKNOWN;
2435 }
2436 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2437 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2438 coords = Html_read_coords(html, attrbuf);
2439
2440 if (type == RECTANGLE) {
2441 if (coords->size() != 4)
2442 BUG_MSG("<area> rectangle must have four coordinate values.");
2443 if (coords->size() >= 4)
2444 shape = new Rectangle(coords->get(0),
2445 coords->get(1),
2446 coords->get(2) - coords->get(0),
2447 coords->get(3) - coords->get(1));
2448 } else if (type == CIRCLE) {
2449 if (coords->size() != 3)
2450 BUG_MSG("<area> circle must have three coordinate values.");
2451 if (coords->size() >= 3)
2452 shape = new Circle(coords->get(0), coords->get(1),
2453 coords->get(2));
2454 } else if (type == POLYGON) {
2455 Polygon *poly;
2456 int i;
2457 if (coords->size() % 2)
2458 BUG_MSG("<area> polygon with odd number of coordinates.");
2459 shape = poly = new Polygon();
2460 for (i = 0; i < (coords->size() / 2); i++)
2461 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2462 }
2463 delete(coords);
2464 }
2465 }
2466 if (shape != NULL || type == BACKGROUND) {
2467 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2468 url = a_Html_url_new(html, attrbuf, NULL, 0);
2469 dReturn_if_fail ( url != NULL );
2470
2471 link = Html_set_new_link(html, &url);
2472 }
2473 if (type == BACKGROUND)
2474 html->maps.setCurrentMapDefaultLink(link);
2475 else
2476 html->maps.addShapeToCurrentMap(shape, link);
2477 }
2478}
2479
2480/*
2481 * <OBJECT>
2482 * Simply provide a link if the object is something downloadable.
2483 */
2484static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2485{
2486 DilloUrl *url, *base_url = NULL;
2487 const char *attrbuf;
2488
2489 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2490 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2491 }
2492
2493 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2494 url = a_Html_url_new(html, attrbuf,
2495 URL_STR(base_url), (base_url != NULL));
2496 dReturn_if_fail ( url != NULL );
2497
2499 html->styleEngine->setPseudoVisited ();
2500 } else {
2501 html->styleEngine->setPseudoLink ();
2502 }
2503
2505 Html_set_new_link(html, &url));
2506 }
2508}
2509
2510static void Html_tag_content_object(DilloHtml *html, const char *tag,
2511 int tagsize)
2512{
2513 if (a_Html_get_attr(html, tag, tagsize, "data"))
2514 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2515}
2516
2517/*
2518 * <VIDEO>
2519 * Provide a link to the video.
2520 */
2521static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2522{
2523 DilloUrl *url;
2524 const char *attrbuf;
2525
2526 if (html->InFlags & IN_MEDIA) {
2527 MSG("<video> not handled when already inside a media element.\n");
2528 return;
2529 }
2530 /* TODO: poster attr */
2531
2532 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2533 url = a_Html_url_new(html, attrbuf, NULL, 0);
2534 dReturn_if_fail ( url != NULL );
2535
2537 html->styleEngine->setPseudoVisited ();
2538 } else {
2539 html->styleEngine->setPseudoLink ();
2540 }
2541
2543 Html_set_new_link(html, &url));
2544
2545 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2546 }
2547 html->InFlags |= IN_MEDIA;
2548}
2549
2550/*
2551 * <AUDIO>
2552 * Provide a link to the audio.
2553 */
2554static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2555{
2556 DilloUrl *url;
2557 const char *attrbuf;
2558
2559 if (html->InFlags & IN_MEDIA) {
2560 MSG("<audio> not handled when already inside a media element.\n");
2561 return;
2562 }
2563
2564 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2565 url = a_Html_url_new(html, attrbuf, NULL, 0);
2566 dReturn_if_fail ( url != NULL );
2567
2569 html->styleEngine->setPseudoVisited ();
2570 } else {
2571 html->styleEngine->setPseudoLink ();
2572 }
2573
2575 Html_set_new_link(html, &url));
2576
2577 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2578 }
2579 html->InFlags |= IN_MEDIA;
2580}
2581
2582/*
2583 * <SOURCE>
2584 * Media resource; provide a link to its address.
2585 */
2586static void Html_tag_open_source(DilloHtml *html, const char *tag,
2587 int tagsize)
2588{
2589 const char *attrbuf;
2590
2591 if (!(html->InFlags & IN_MEDIA)) {
2592 // Can also be inside a picture element.
2593 // BUG_MSG("<source> not inside a media element.");
2594 return;
2595 }
2596 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2597 BUG_MSG("<source> requires src attribute.");
2598 return;
2599 } else {
2600 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2601
2602 dReturn_if_fail ( url != NULL );
2603
2605 html->styleEngine->setPseudoVisited ();
2606 } else {
2607 html->styleEngine->setPseudoLink ();
2608 }
2610 Html_set_new_link(html, &url));
2611 }
2612}
2613
2614static void Html_tag_content_source(DilloHtml *html, const char *tag,
2615 int tagsize)
2616{
2617 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2618 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2619}
2620
2625{
2626 html->InFlags &= ~IN_MEDIA;
2627}
2628
2629/*
2630 * <EMBED>
2631 * Provide a link to embedded content.
2632 */
2633static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2634{
2635 const char *attrbuf;
2636
2637 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2638 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2639
2640 dReturn_if_fail ( url != NULL );
2641
2643 html->styleEngine->setPseudoVisited ();
2644 } else {
2645 html->styleEngine->setPseudoLink ();
2646 }
2647
2649 Html_set_new_link(html, &url));
2650 }
2651}
2652
2653static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2654{
2655 if (a_Html_get_attr(html, tag, tagsize, "src"))
2656 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2657}
2658
2662static const char* Html_get_javascript_link(DilloHtml *html)
2663{
2664 size_t i;
2665 char ch, *p1, *p2;
2666 Dstr *Buf = html->attr_data;
2667
2668 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2669 i = strcspn(Buf->str, "'\"");
2670 ch = Buf->str[i];
2671 if ((ch == '"' || ch == '\'') &&
2672 (p2 = strchr(Buf->str + i + 1 , ch))) {
2673 p1 = Buf->str + i;
2674 BUG_MSG("Link depends on javascript().");
2675 dStr_truncate(Buf, p2 - Buf->str);
2676 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2677 }
2678 }
2679 return Buf->str;
2680}
2681
2685static void Html_add_anchor(DilloHtml *html, const char *name)
2686{
2687 _MSG("Registering ANCHOR: %s\n", name);
2688 if (!HT2TB(html)->addAnchor (name, html->style ()))
2689 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2690 name);
2691 /*
2692 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2693 * differ only in case may not appear in the same document", but
2694 * "comparisons between fragment identifiers and anchor names must be
2695 * done by exact (case-sensitive) match." We ignore the case issue and
2696 * always test for exact matches. Moreover, what does uppercase mean
2697 * for Unicode characters outside the ASCII range?
2698 */
2699}
2700
2701/*
2702 * <A>
2703 */
2704static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2705{
2706 DilloUrl *url;
2707 const char *attrbuf;
2708
2709 /* TODO: add support for MAP with A HREF */
2710 html->InFlags |= IN_A;
2711 if (html->InFlags & IN_MAP)
2712 Html_tag_content_area(html, tag, tagsize);
2713
2714 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2715 /* if it's a javascript link, extract the reference. */
2716 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2717 attrbuf = Html_get_javascript_link(html);
2718
2719 url = a_Html_url_new(html, attrbuf, NULL, 0);
2720 dReturn_if_fail ( url != NULL );
2721
2723 html->InVisitedLink = true;
2724 html->styleEngine->setPseudoVisited ();
2725 if (html->non_css_visited_color != -1)
2728 html->non_css_visited_color);
2729 } else {
2730 html->styleEngine->setPseudoLink ();
2731 if (html->non_css_link_color != -1)
2734 html->non_css_link_color);
2735 }
2736
2738 Html_set_new_link(html, &url));
2739 }
2740 if (prefs.show_tooltip &&
2741 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2743 attrbuf);
2744 }
2745
2747
2748 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2749 char *nameVal;
2750 const char *id = html->styleEngine->getId ();
2751
2753 Html_check_name_val(html, attrbuf, "name");
2754
2755 nameVal = a_Url_decode_hex_str(attrbuf);
2756
2757 if (nameVal) {
2758 /* We compare the "id" value with the url-decoded "name" value */
2759 if (!id || strcmp(nameVal, id)) {
2760 if (id)
2761 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2762 id, nameVal);
2763 Html_add_anchor(html, nameVal);
2764 }
2765
2766 dFree(nameVal);
2767 }
2768 }
2769}
2770
2771/*
2772 * <A> close function
2773 */
2774static void Html_tag_close_a(DilloHtml *html)
2775{
2776 html->InFlags &= ~IN_A;
2777 html->InVisitedLink = false;
2778}
2779
2780/*
2781 * <BLOCKQUOTE>
2782 */
2784 const char *tag, int tagsize)
2785{
2786 Html_add_textblock(html, true, 9, false);
2787}
2788
2789/*
2790 * <Q>
2791 */
2792static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2793{
2794 /*
2795 * Left Double Quotation Mark, which is wrong in many cases, but
2796 * should at least be widely recognized.
2797 */
2798 const char *U201C = "\xe2\x80\x9c";
2799
2801 HT2TB(html)->addText (U201C, html->wordStyle ());
2802}
2803
2804/*
2805 * </Q>
2806 */
2807static void Html_tag_close_q(DilloHtml *html)
2808{
2809 /* Right Double Quotation Mark */
2810 const char *U201D = "\xe2\x80\x9d";
2811
2812 HT2TB(html)->addText (U201D, html->wordStyle ());
2813}
2814
2815/*
2816 * Handle the <UL> tag.
2817 */
2818static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2819{
2820 const char *attrbuf;
2821 ListStyleType list_style_type;
2822
2823 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2824
2825 /* list_style_type explicitly defined */
2826 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2827 list_style_type = LIST_STYLE_TYPE_DISC;
2828 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2829 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2830 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2831 list_style_type = LIST_STYLE_TYPE_SQUARE;
2832 else
2833 /* invalid value */
2834 list_style_type = LIST_STYLE_TYPE_DISC;
2835
2837 CSS_TYPE_ENUM, list_style_type);
2838 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2839 BUG_MSG("<ul> type attribute is obsolete.");
2840 }
2841
2842 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2843 S_TOP(html)->list_number = 0;
2844 S_TOP(html)->ref_list_item = NULL;
2845}
2846
2847/*
2848 * Handle the <DIR> or <MENU> tag.
2849 * (Deprecated and almost the same as <UL>)
2850 */
2851static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2852{
2854 HT2TB(html)->addParbreak (9, html->wordStyle ());
2855
2856 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2857 S_TOP(html)->list_number = 0;
2858 S_TOP(html)->ref_list_item = NULL;
2859
2861 BUG_MSG("Obsolete list type; use <ul> instead.");
2862}
2863
2864/*
2865 * Handle the <MENU> tag.
2866 */
2867static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2868{
2869 /* In another bit of ridiculous mess from the HTML5 world, the menu
2870 * element, which was deprecated in HTML4:
2871 * - does not appear at all in W3C's HTML5 spec
2872 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2873 * means something totally different than it did in the old days
2874 * (now it's for popup menus and toolbar menus rather than being a
2875 * sort of list).
2876 */
2877 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2878 Html_tag_open_dir(html, tag, tagsize);
2879}
2880
2881/*
2882 * Handle the <OL> tag.
2883 */
2884static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2885{
2886 const char *attrbuf;
2887 int n = 1;
2888
2889 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2890 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2891
2892 if (*attrbuf == '1')
2893 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2894 else if (*attrbuf == 'a')
2895 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2896 else if (*attrbuf == 'A')
2897 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2898 else if (*attrbuf == 'i')
2899 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2900 else if (*attrbuf == 'I')
2901 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2902
2904 CSS_TYPE_ENUM, listStyleType);
2905 }
2906
2907 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2908
2909 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2910 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2911 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2912 n = 0;
2913 }
2914 S_TOP(html)->list_number = n;
2915 S_TOP(html)->ref_list_item = NULL;
2916}
2917
2918/*
2919 * Handle the <LI> tag.
2920 */
2921static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2922{
2923 Style *style = html->style ();
2924 int *list_number;
2925 const char *attrbuf;
2926
2927 if (S_TOP(html)->list_type == HTML_LIST_NONE &&
2928 !(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
2929 /* In WHATWG's HTML5 and W3C's HTML 5.1, LI can appear within MENUs
2930 * of the toolbar type.
2931 */
2932 BUG_MSG("<li> outside <ul> or <ol>.");
2933 }
2934
2935 html->InFlags |= IN_LI;
2936
2937 /* Get our parent tag's variables (used as state storage) */
2938 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2939
2941 // ordered
2942 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2943 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2944 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2945 *list_number = 0;
2946 }
2947 }
2948}
2949
2950/*
2951 * Close <LI>.
2952 */
2954{
2955 html->InFlags &= ~IN_LI;
2956 ((ListItem *)html->dw)->flush ();
2957}
2958
2959/*
2960 * <HR>
2961 */
2962static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2963{
2964 char *width_ptr;
2965 const char *attrbuf;
2966 int32_t size = 0;
2967
2968 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2969 if (width_ptr) {
2970 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2971 BUG_MSG("<hr> width attribute is obsolete.");
2974 a_Html_parse_length (html, width_ptr));
2975 dFree(width_ptr);
2976 }
2977
2978 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2979 size = strtol(attrbuf, NULL, 10);
2980 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2981 BUG_MSG("<hr> size attribute is obsolete.");
2982 }
2983
2984 a_Html_tag_set_align_attr(html, tag, tagsize);
2985
2986 /* TODO: evaluate attribute */
2987 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2988 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2989 BUG_MSG("<hr> noshade attribute is obsolete.");
2998
2999 if (size <= 0)
3000 size = 1;
3001 }
3002
3003 if (size > 0) {
3004 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
3005 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
3007 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3009 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3012 size_bottom);
3015 size_bottom);
3016 }
3017
3018}
3019
3020static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3021{
3022 Widget *hruler;
3023 HT2TB(html)->addParbreak (5, html->wordStyle ());
3024
3025 hruler = new Ruler();
3026 hruler->setStyle (html->style ());
3027 HT2TB(html)->addWidget (hruler, html->style ());
3028 HT2TB(html)->addParbreak (5, html->wordStyle ());
3029}
3030
3031/*
3032 * <DL>
3033 */
3034static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3035{
3036 /* may want to actually do some stuff here. */
3038 HT2TB(html)->addParbreak (9, html->wordStyle ());
3039}
3040
3041/*
3042 * <DT>
3043 */
3044static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3045{
3047 HT2TB(html)->addParbreak (9, html->wordStyle ());
3048}
3049
3050/*
3051 * <DD>
3052 */
3053static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3054{
3055 Html_add_textblock(html, true, 9, false);
3056}
3057
3058/*
3059 * <PRE>
3060 */
3061static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3062{
3064 HT2TB(html)->addParbreak (9, html->wordStyle ());
3065
3066 html->InFlags |= IN_PRE;
3067}
3068
3069/*
3070 * Custom close for <PRE>
3071 */
3073{
3074 html->InFlags &= ~IN_PRE;
3075}
3076
3081static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3082{
3083 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3084 /* HTML5 doesn't say anything about excluding elements */
3085 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3086 "sup", "font", "basefont", NULL};
3087 static int ei_set[10], i;
3088
3089 /* initialize array */
3090 if (!ei_set[0])
3091 for (i = 0; es_set[i]; ++i)
3092 ei_set[i] = a_Html_tag_index(es_set[i]);
3093
3094 for (i = 0; ei_set[i]; ++i)
3095 if (tag_idx == ei_set[i])
3096 return 1;
3097 }
3098 return 0;
3099}
3100
3104static void Html_update_content_type(DilloHtml *html, const char *content)
3105{
3106 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3107 "meta");
3108 /* Cannot ask cache whether the content type was changed, as
3109 * this code in another bw might have already changed it for us.
3110 */
3111 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3112 html->stop_parser = true; /* The cache buffer is no longer valid */
3113 a_UIcmd_repush(html->bw);
3114 }
3115}
3116
3130static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3131{
3132 const char meta_template[] =
3133"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3134" <td bgcolor='#8899aa' width='100%%'>\n"
3135" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3136" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3137" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3138" <a href='%s'>here</a>%s</td></tr></table><br>\n";
3139
3140 const char *p, *equiv, *charset, *content;
3141 char delay_str[64], *mr_url;
3142 DilloUrl *new_url;
3143 int delay;
3144
3145 /* only valid inside HEAD */
3146 if (!(html->InFlags & IN_HEAD)) {
3147 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3148 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3149 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3150 * in the body.
3151 */
3152 BUG_MSG("This <meta> element must be inside the HEAD section.");
3153 }
3154 return;
3155 }
3156
3157 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3158 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3159 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3160
3161 /* Get delay, if present, and make a message with it */
3162 if ((delay = strtol(content, NULL, 0))) {
3163 snprintf(delay_str, 64, " after %d second%s.",
3164 delay, (delay > 1) ? "s" : "");
3165 } else {
3166 sprintf(delay_str, ".");
3167 }
3168 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3169 int has_url = 1;
3170 if ((p = dStriAsciiStr(content, "url=")))
3171 content = p + strlen("url=");
3172 else if ((p = strstr(content, ";")))
3173 content = p + strlen(";");
3174 else
3175 has_url = 0;
3176
3177 if (has_url) {
3178 /* Handle the case of a quoted URL */
3179 if (*content == '"' || *content == '\'') {
3180 if ((p = strchr(content + 1, *content)))
3181 mr_url = dStrndup(content + 1, p - content - 1);
3182 else
3183 mr_url = dStrdup(content + 1);
3184 } else {
3185 mr_url = dStrdup(content);
3186 }
3187 } else {
3188 mr_url = dStrdup("");
3189 }
3190
3191 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3192
3193 if (a_Url_cmp(html->base_url, new_url) == 0) {
3194 /* redirection loop, or empty url string: ignore */
3195 BUG_MSG("<meta> refresh: %s.",
3196 *mr_url ? "redirection loop" : "no target URL");
3197 } else if (delay == 0) {
3198 /* zero-delay redirection */
3199 html->stop_parser = true;
3200 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3201 a_UIcmd_set_msg(html->bw,
3202 "WARNING: local URL with META refresh. Aborting.");
3203 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3204 a_UIcmd_redirection0((void*)html->bw, new_url);
3205 }
3206 } else {
3207 /* Send a custom HTML message.
3208 * TODO: This is a hairy hack,
3209 * It'd be much better to build a widget. */
3210 Dstr *ds_msg = dStr_sized_new(256);
3211 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3212 {
3213 int o_InFlags = html->InFlags;
3214 int o_TagSoup = html->TagSoup;
3215 html->InFlags = IN_BODY + IN_META_HACK;
3216 html->TagSoup = false;
3217 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3218 html->TagSoup = o_TagSoup;
3219 html->InFlags = o_InFlags;
3220 }
3221 dStr_free(ds_msg, 1);
3222 }
3223 a_Url_free(new_url);
3224 dFree(mr_url);
3225
3226 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3227 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3228 _MSG("Html_tag_open_meta: content={%s}\n", content);
3229 Html_update_content_type(html, content);
3230 }
3231 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3232 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3233 char *content = dStrconcat("text/html; charset=", charset, NULL);
3234
3235 Html_update_content_type(html, content);
3236 dFree(content);
3237 }
3238}
3239
3243static void Html_css_load_callback(int Op, CacheClient_t *Client)
3244{
3245 _MSG("Html_css_load_callback: Op=%d\n", Op);
3246 if (Op) { /* EOF */
3247 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3248 /* Repush when we've got them all */
3249 if (--bw->NumPendingStyleSheets == 0)
3251 }
3252}
3253
3258{
3259 char *data;
3260 int len;
3261
3262 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3263
3264 _MSG("Html_load_stylesheet: ");
3266 a_Capi_get_buf(url, &data, &len)) {
3267 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3268 if (strncmp("@charset \"", data, 10) == 0) {
3269 char *endq = strchr(data+10, '"');
3270
3271 if (endq && (endq - data <= 51)) {
3272 /* IANA limits charset names to 40 characters */
3273 char *content_type;
3274
3275 *endq = '\0';
3276 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3277 *endq = '"';
3278 a_Capi_unref_buf(url);
3281 a_Capi_get_buf(url, &data, &len);
3282 }
3283 }
3284 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3285 a_Capi_unref_buf(url);
3286 } else {
3287 /* Fill a Web structure for the cache query */
3288 int ClientKey;
3289 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3290 Web->flags |= WEB_Stylesheet;
3291 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3292 ++html->bw->NumPendingStyleSheets;
3293 a_Bw_add_client(html->bw, ClientKey, 0);
3294 a_Bw_add_url(html->bw, url);
3295 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3296 }
3297 }
3298 _MSG("\n");
3299}
3300
3309static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3310{
3311 DilloUrl *url;
3312 const char *attrbuf;
3313
3314 //char *tag_str = dStrndup(tag, tagsize);
3315 //MSG("Html_tag_open_link(): %s\n", tag_str);
3316 //dFree(tag_str);
3317
3318 /* When viewing suspicious HTML email, don't load LINK */
3320
3321 /* Ignore LINK outside HEAD */
3322 if (!(html->InFlags & IN_HEAD)) {
3323 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3324 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3325 /* With the HTML 5.1 draft spec, link with itemprop may appear
3326 * in the body.
3327 */
3328 BUG_MSG("This <link> element must be inside the HEAD section.");
3329 }
3330 return;
3331 }
3332 /* Remote stylesheets enabled? */
3334 /* CSS stylesheet link */
3335 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3336 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3337 return;
3338
3339 /* IMPLIED attributes? */
3340 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3341 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3342 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3343 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3344 return;
3345
3346 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3347 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3348 return;
3349
3350 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3351
3352 html->addCssUrl(url);
3353 a_Url_free(url);
3354}
3355
3359static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3360{
3361 const char *attrbuf;
3362 DilloUrl *BaseUrl;
3363
3364 if (html->InFlags & IN_HEAD) {
3365 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3366 bool_t html5 = html->DocType == DT_HTML &&
3367 html->DocTypeVersion >= 5.0f;
3368
3369 BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
3370 a_Html_url_new(html, attrbuf, "", 1);
3371
3372 if (html5 || URL_SCHEME_(BaseUrl)) {
3373 /* Pass the URL_SpamSafe flag to the new base url */
3375 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3376 a_Url_free(html->base_url);
3377 html->base_url = BaseUrl;
3378 } else {
3379 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3380 a_Url_free(BaseUrl);
3381 }
3382 }
3383 } else {
3384 BUG_MSG("<base> not inside HEAD section.");
3385 }
3386}
3387
3388static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3389{
3391}
3392
3393/*
3394 * <SPAN>
3395 */
3396static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3397{
3398 const char *attrbuf;
3399
3401
3402 if (prefs.show_tooltip &&
3403 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3404
3406 attrbuf);
3407 }
3408}
3409
3410/*
3411 * html5 sectioning stuff: article aside nav section header footer
3412 */
3413static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3414 int tagsize)
3415{
3416 const char *attrbuf;
3417
3418 if (prefs.show_tooltip &&
3419 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3420
3422 attrbuf);
3423 }
3424}
3425
3426/*
3427 * <DIV> (TODO: make a complete implementation)
3428 */
3429static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3430{
3431 a_Html_tag_set_align_attr (html, tag, tagsize);
3432 Html_tag_open_sectioning(html, tag, tagsize);
3433}
3434
3439{
3440 HT2TB(html)->addParbreak (9, html->wordStyle ());
3441}
3442
3443/*
3444 * <WBR> "The wbr element represents a line break opportunity."
3445 */
3446static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3447{
3448 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3449}
3450
3451
3479static const TagInfo Tags[] = {
3480 {"a", B8(01011),'R', Html_tag_open_a, NULL, Html_tag_close_a},
3481 {"abbr", B8(01011),'R', Html_tag_open_abbr, NULL, NULL},
3482 /* acronym 010101 -- obsolete in HTML5 */
3483 {"address", B8(01110),'R', Html_tag_open_default, NULL, Html_tag_close_par},
3484 {"area", B8(01001),'F', Html_tag_open_default, Html_tag_content_area, NULL},
3485 {"article", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3486 {"aside", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3487 {"audio", B8(01111),'R', Html_tag_open_audio, NULL, Html_tag_close_media},
3488 {"b", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3489 {"base", B8(10001),'F', Html_tag_open_base, NULL, NULL},
3490 /* basefont 010001 -- obsolete in HTML5 */
3491 /* bdo 010101 */
3492 {"big", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3493 {"blockquote", B8(01110),'R', Html_tag_open_blockquote, NULL, NULL},
3494 {"body", B8(01110),'O', Html_tag_open_body, NULL, Html_tag_close_body},
3495 {"br", B8(01001),'F', Html_tag_open_default, Html_tag_content_br, NULL},
3496 {"button", B8(01111),'R', Html_tag_open_button,NULL,Html_tag_close_button},
3497 /* caption */
3498 {"center", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3499 {"cite", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3500 {"code", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3501 /* col 010010 'F' */
3502 /* colgroup */
3503 {"dd", B8(01110),'O', Html_tag_open_dd, NULL, NULL},
3504 {"del", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3505 {"dfn", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3506 {"dir", B8(01100),'R', Html_tag_open_dir, NULL, Html_tag_close_par},
3507 /* TODO: complete <div> support! */
3508 {"div", B8(01110),'R', Html_tag_open_div, NULL, NULL},
3509 {"dl", B8(01100),'R', Html_tag_open_dl, NULL, Html_tag_close_par},
3510 {"dt", B8(01010),'O', Html_tag_open_dt, NULL, Html_tag_close_par},
3511 {"em", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3512 {"embed", B8(01001),'F', Html_tag_open_embed, Html_tag_content_embed, NULL},
3513 /* fieldset */
3514 {"figcaption", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3515 {"figure", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3516 {"font", B8(01011),'R', Html_tag_open_font, NULL, NULL},
3517 {"footer", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3518 {"form", B8(01110),'R', Html_tag_open_form, NULL, Html_tag_close_form},
3519 {"frame", B8(01000),'F', Html_tag_open_frame, Html_tag_content_frame, NULL},
3520 {"frameset", B8(01110),'R', Html_tag_open_default, Html_tag_content_frameset,
3521 NULL},
3522 {"h1", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3523 {"h2", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3524 {"h3", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3525 {"h4", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3526 {"h5", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3527 {"h6", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3528 {"head", B8(10111),'O', Html_tag_open_head, NULL, Html_tag_close_head},
3529 {"header", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3530 {"hr", B8(01000),'F', Html_tag_open_hr, Html_tag_content_hr, NULL},
3531 {"html", B8(00110),'O', Html_tag_open_html, NULL, Html_tag_close_html},
3532 {"i", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3533 {"iframe", B8(01111),'R', Html_tag_open_frame, Html_tag_content_frame, NULL},
3534 {"img", B8(01001),'F', Html_tag_open_img, Html_tag_content_img, NULL},
3535 {"input", B8(01001),'F', Html_tag_open_input, NULL, NULL},
3536 {"ins", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3537 {"isindex", B8(11001),'F', Html_tag_open_isindex, NULL, NULL},
3538 {"kbd", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3539 /* label 010101 */
3540 /* legend 01?? */
3541 {"li", B8(01110),'O', Html_tag_open_li, NULL, Html_tag_close_li},
3542 {"link", B8(10001),'F', Html_tag_open_link, NULL, NULL},
3543 {"main", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3544 {"map", B8(01101),'R', Html_tag_open_default, Html_tag_content_map,
3546 {"mark", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3547 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3548 {"menu", B8(01100),'R', Html_tag_open_menu, NULL, Html_tag_close_par},
3549 {"meta", B8(11001),'F', Html_tag_open_meta, NULL, NULL},
3550 {"nav", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3551 /* noframes 1011 -- obsolete in HTML5 */
3552 /* noscript 1011 */
3553 {"object", B8(11111),'R', Html_tag_open_object, Html_tag_content_object,NULL},
3554 {"ol", B8(01100),'R', Html_tag_open_ol, NULL, NULL},
3555 {"optgroup", B8(01011),'O', Html_tag_open_optgroup, NULL,
3557 {"option", B8(01001),'O', Html_tag_open_option, NULL, Html_tag_close_option},
3558 {"p", B8(01010),'O', Html_tag_open_p, NULL, NULL},
3559 /* param 010001 'F' */
3560 {"pre", B8(01010),'R', Html_tag_open_pre, NULL, Html_tag_close_pre},
3561 {"q", B8(01011),'R', Html_tag_open_q, NULL, Html_tag_close_q},
3562 {"s", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3563 {"samp", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3564 {"script", B8(11101),'R', Html_tag_open_script,NULL,Html_tag_close_script},
3565 {"section", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3566 {"select", B8(01011),'R', Html_tag_open_select,NULL,Html_tag_close_select},
3567 {"small", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3568 {"source", B8(01001),'F', Html_tag_open_source, Html_tag_content_source,NULL},
3569 {"span", B8(01011),'R', Html_tag_open_span, NULL, NULL},
3570 {"strike", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3571 {"strong", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3572 {"style", B8(10011),'R', Html_tag_open_style, NULL, Html_tag_close_style},
3573 {"sub", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3574 {"sup", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3575 {"table", B8(01100),'R', Html_tag_open_table, Html_tag_content_table, NULL},
3576 {"tbody", B8(01010),'O', Html_tag_open_tbody, Html_tag_content_tbody, NULL},
3577 {"td", B8(01110),'O', Html_tag_open_td, Html_tag_content_td, NULL},
3578 {"textarea", B8(01011),'R', Html_tag_open_textarea, Html_tag_content_textarea,
3580 {"tfoot", B8(01010),'O', Html_tag_open_tfoot, NULL, NULL},
3581 {"th", B8(01110),'O', Html_tag_open_th, Html_tag_content_th, NULL},
3582 {"thead", B8(01010),'O', Html_tag_open_thead, NULL, NULL},
3583 {"title", B8(10011),'R', Html_tag_open_title, NULL, Html_tag_close_title},
3584 {"tr", B8(01100),'O', Html_tag_open_tr, Html_tag_content_tr, NULL},
3585 {"tt", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3586 {"u", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3587 {"ul", B8(01100),'R', Html_tag_open_ul, NULL, NULL},
3588 {"var", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3589 {"video", B8(01111),'R', Html_tag_open_video, NULL, Html_tag_close_media},
3590 {"wbr", B8(01011),'F', Html_tag_open_default, Html_tag_content_wbr, NULL}
3591};
3592#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3593
3594/* Only available in C++11 and up */
3595#if __cpp_static_assert
3596static_assert(NTAGS == HTML_NTAGS,
3597 "Mismatch between number of tags in Tags and HTML_NTAGS");
3598#endif
3599
3600/*
3601 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3602 * with tag from taglist (lowercase, zero ended string) [p2]
3603 * Return value: as strcmp()
3604 */
3605static int Html_tag_compare(const char *p1, const char *p2)
3606{
3607 while ( *p2 ) {
3608 if (D_ASCII_TOLOWER(*p1) != *p2)
3609 return(D_ASCII_TOLOWER(*p1) - *p2);
3610 ++p1;
3611 ++p2;
3612 }
3613 return !strchr(" >/\n\r\t", *p1);
3614}
3615
3620int a_Html_tag_index(const char *tag)
3621{
3622 int low, high, mid, cond;
3623
3624 /* Binary search */
3625 low = 0;
3626 high = NTAGS - 1; /* Last tag index */
3627 while (low <= high) {
3628 mid = (low + high) / 2;
3629 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3630 high = mid - 1;
3631 else if (cond > 0)
3632 low = mid + 1;
3633 else
3634 return mid;
3635 }
3636 return -1;
3637}
3638
3647static int Html_triggers_optional_close(int old_idx, int cur_idx)
3648{
3649 int Flags = Tags[cur_idx].Flags;
3650 if (old_idx == i_P || old_idx == i_DT) {
3651 /* P and DT are closed by block elements (i.e. non inline)*/
3652 return (!(Flags & 1));
3653 } else if (old_idx == i_LI) {
3654 /* LI closes LI
3655 * Note: non-flow should also close it, but FF does not. */
3656 return (cur_idx == i_LI);
3657 } else if (old_idx == i_TD || old_idx == i_TH) {
3658 /* TD and TH are closed by: TD, TH and TR.
3659 * Note: non-flow should also close it, but FF does not. */
3660 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR || cur_idx == i_TBODY);
3661 } else if (old_idx == i_TR) {
3662 /* TR closes TR */
3663 return (cur_idx == i_TR || cur_idx == i_TBODY);
3664 } else if (old_idx == i_DD) {
3665 /* DD is closed by DD and DT */
3666 return (cur_idx == i_DD || cur_idx == i_DT);
3667 } else if (old_idx == i_OPTGROUP) {
3668 /* i_OPTGROUP can only contain OPTION */
3669 return (cur_idx != i_OPTION);
3670 } else if (old_idx == i_OPTION) {
3671 return 1; // OPTION always needs close
3672 }
3673
3674 /* Don't close HTML, HEAD and BODY. They're handled by Html_test_section().
3675 * TODO: TBODY is pending */
3676 return 0;
3677}
3678
3686static inline int Html_forbids_cross_nesting(const int InFlags,
3687 const int new_idx)
3688{
3689 int f = InFlags, ni = new_idx, oi = -1;
3690 if (f & (IN_A | IN_BUTTON | IN_SELECT | IN_TEXTAREA) &&
3691 (ni == i_A || ni == i_BUTTON || ni == i_SELECT || ni == i_TEXTAREA))
3692 oi = (f & IN_A ? i_A : f & IN_BUTTON ? i_BUTTON : f & IN_SELECT ?
3693 i_SELECT : f & IN_TEXTAREA ? i_TEXTAREA : 0);
3694 return oi;
3695}
3696
3705static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx,
3706 int new_idx, int fi, char op)
3707{
3708 int s_top, ni = new_idx;
3709 while ((s_top = html->stack->size() - 1) >= s_idx) {
3710 int toptag_idx = S_TOP(html)->tag_idx;
3711 TagInfo toptag = Tags[toptag_idx];
3712
3713 if (fi >= 0) {
3714 // forbidden nesting
3715 if (toptag_idx != fi)
3716 BUG_MSG(" Nesting cleanup - forcing close of open tag: <%s>.",
3717 toptag.name);
3718 } else if (s_top == s_idx && op == 'c') {
3719 // target tag, no bug when closing.
3720 } else if (toptag.EndTag == 'O') {
3721 // optional close, that's OK
3722 } else if ((!(toptag.Flags & 4) &&
3723 (Tags[ni].Flags & 4 || !(Tags[ni].Flags & 1))) ||
3724 (Tags[ni].Flags & 1 && !(toptag.Flags & 2))) {
3725 // block {element, container} in non block container or
3726 // inline element in non inline container
3727 BUG_MSG((op == 'o') ?
3728 "Bad nesting: <%s> can't contain <%s>. -- closing <%s>." :
3729 "<%s> needs to be closed before </%s>. -- closing <%s>.",
3730 toptag.name, Tags[ni].name, toptag.name);
3731 } else {
3732 BUG_MSG(
3733 "<%s> should have been closed before </%s>. -- closing <%s>.",
3734 toptag.name, Tags[ni].name, toptag.name);
3735 }
3736 _MSG("op(%c): %s s_top=%d s_idx=%d\n", op, toptag.name, s_top, s_idx);
3737 if (toptag_idx == i_BODY &&
3738 !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
3739 (s_idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
3740 break; // only pop {BODY,HTML} upon EOF or redundancy
3741 }
3742 if (toptag.close)
3743 toptag.close(html);
3744 Html_real_pop_tag(html);
3745 }
3746}
3747
3757static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
3758{
3759 if (!html->TagSoup)
3760 return;
3761
3762 int s_top = html->stack->size() - 1, s_idx;
3763 int fi = Html_forbids_cross_nesting(html->InFlags, ni);
3764 for (s_idx = s_top; s_idx > 0; --s_idx) {
3765 int ti = html->stack->getRef(s_idx)->tag_idx;
3766
3767 if (fi >= 0) {
3768 // forbidden cross nesting found
3769 if (ti != fi)
3770 continue; // don't allow, close
3771 --s_idx;
3772 BUG_MSG("Forbidden nesting: <%s> can't contain <%s>. -- closing "
3773 "<%s>.", Tags[fi].name, Tags[ni].name, Tags[fi].name);
3774
3775 } else if ((html->InFlags & IN_PRE) && ni == i_HR) {
3776 break; // allow Apache's bad HTML directory listings...
3777
3778 } else if (Tags[ti].EndTag == 'O') { // Element with optional close
3779 if (Html_triggers_optional_close(ti, ni))
3780 continue; // close
3781 } else if (!(Tags[ni].Flags & 1) && !(Tags[ti].Flags & 4)) {
3782 // Block element over a NON block container
3783 if (ti == i_A && html->DocTypeVersion >= 5.0f)
3784 break;
3785 continue; // close
3786 }
3787
3788 break;
3789 }
3790
3791 if (s_idx < s_top)
3792 Html_tag_cleanup_to_idx(html, s_idx + 1, ni, fi, 'o');
3793}
3794
3807static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
3808{
3809 int stack_idx, tag_idx, matched = 0, expected = 0;
3810 TagInfo new_tag = Tags[new_idx];
3811
3812 /* Look for the candidate tag to close */
3813 stack_idx = html->stack->size();
3814 while (--stack_idx) {
3815 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
3816 if (tag_idx == new_idx) {
3817 /* matching tag found */
3818 matched = 1;
3819 break;
3820 } else if (Tags[tag_idx].EndTag == 'O') {
3821 /* close elements with optional close */
3822 continue;
3823 } else if ((new_idx == i_A && html->InFlags & IN_A) ||
3824 (new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
3825 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
3826 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
3827 /* Let these elements close anything left open inside them */
3828 continue;
3829 } else if (Tags[new_idx].Flags & 4 && // Block container
3830 Tags[stack_idx].Flags & 3) { // Inline element or container
3831 /* Let a block container close inline elements left open inside it. */
3832 continue;
3833 } else {
3834 /* this is the tag that should have been closed */
3835 expected = 1;
3836 break;
3837 }
3838 }
3839
3840 if (matched) {
3841 Html_tag_cleanup_to_idx(html, stack_idx, new_idx, -1, 'c');
3842 } else if (expected) {
3843 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
3844 new_tag.name, Tags[tag_idx].name);
3845 } else {
3846 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
3847 }
3848}
3849
3854static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3855{
3856 const char *tag;
3857 int tag_idx;
3858
3859 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3860 BUG_MSG("The required DOCTYPE declaration is missing. "
3861 "Handling as HTML4.");
3862
3863 if (!(html->InFlags & IN_HTML)) {
3864 tag = "<html>";
3865 tag_idx = a_Html_tag_index(tag + 1);
3866 if (tag_idx != new_idx || IsCloseTag) {
3867 /* implicit open */
3868 Html_force_push_tag(html, tag_idx);
3869 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3870 Tags[tag_idx].open (html, tag, strlen(tag));
3871 }
3872 }
3873
3874 if (Tags[new_idx].Flags & 16) {
3875 /* head element */
3876 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3877 tag = "<head>";
3878 tag_idx = a_Html_tag_index(tag + 1);
3879 if (tag_idx != new_idx || IsCloseTag) {
3880 /* implicit open of the head element */
3881 Html_force_push_tag(html, tag_idx);
3882 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3883 Tags[tag_idx].open (html, tag, strlen(tag));
3884 }
3885 }
3886
3887 } else if (Tags[new_idx].Flags & 8) {
3888 /* body element */
3889 if (html->InFlags & IN_HEAD) {
3890 tag = "</head>";
3891 tag_idx = a_Html_tag_index(tag + 2);
3892 Html_tag_cleanup_at_close(html, tag_idx);
3893 }
3894 tag = "<body>";
3895 tag_idx = a_Html_tag_index(tag + 1);
3896 if (tag_idx != new_idx || IsCloseTag) {
3897 /* implicit open */
3898 Html_force_push_tag(html, tag_idx);
3899 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3900 Tags[tag_idx].open (html, tag, strlen(tag));
3901 }
3902 }
3903}
3904
3908static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3909{
3910 const char *attrbuf;
3911 char lang[3];
3912
3913 if (tagsize >= 8 && /* length of "<t id=i>" */
3914 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3915 /* According to the SGML declaration of HTML 4, all NAME values
3916 * occurring outside entities must be converted to uppercase
3917 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3918 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3919 * So we don't do it and hope for better specs in the future ...
3920 */
3921 Html_check_name_val(html, attrbuf, "id");
3922
3923 html->styleEngine->setId(attrbuf);
3924 }
3925
3926 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3927 /* length of "<t class=i>" or "<t style=i>" */
3928 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3929 if (attrbuf)
3930 html->styleEngine->setClass (attrbuf);
3931
3932 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3933 if (attrbuf)
3934 html->styleEngine->setStyle (attrbuf);
3935 }
3936
3937 /* handle "xml:lang" and "lang" attributes
3938 * We use only the first two chars of the value to deal with
3939 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3940 */
3941 memset(lang, 0, sizeof(lang));
3942 if (tagsize >= 14) {
3943 /* length of "<t xml:lang=i>" */
3944 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3945 if (attrbuf)
3946 strncpy(lang, attrbuf, 2);
3947 }
3948 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3949 /* length of "<t lang=i>" */
3950 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3951 if (attrbuf)
3952 strncpy(lang, attrbuf, 2);
3953 }
3954 if (lang[0])
3956}
3957
3962static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3963{
3964 static int indexes[9] = {-1};
3965
3966 if (indexes[0] == -1) {
3967 indexes[0] = a_Html_tag_index("dir");
3968 indexes[1] = a_Html_tag_index("frame");
3969 indexes[2] = a_Html_tag_index("frameset");
3970 indexes[3] = a_Html_tag_index("isindex");
3971 indexes[4] = a_Html_tag_index("strike");
3972 indexes[5] = a_Html_tag_index("big");
3973 indexes[6] = a_Html_tag_index("center");
3974 indexes[7] = a_Html_tag_index("font");
3975 indexes[8] = a_Html_tag_index("tt");
3976 }
3977 for (int i = 0; i < 9; i++) {
3978 if (indexes[i] == ni) {
3979 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3980 break;
3981 }
3982 }
3983}
3984
3986{
3988 false /* Perhaps true for widgets oof? */);
3989}
3990
3992{
3993 Html_add_textblock(html, false, 0, true);
3994}
3995
3997{
3998 Style *style = html->style ();
3999 Style *wordStyle = html->wordStyle ();
4000 Widget **ref_list_item;
4001 ListItem *list_item;
4002 int *list_number;
4003 char buf[16];
4004
4005 /* Get our parent tag's variables (used as state storage) */
4006 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
4007 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
4008
4009 HT2TB(html)->addParbreak (0, wordStyle);
4010
4011 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
4012 HT2TB(html)->addWidget (list_item, style);
4013 HT2TB(html)->addParbreak (0, wordStyle);
4014 *ref_list_item = list_item;
4015 S_TOP(html)->textblock = html->dw = list_item;
4016
4018 // none
4020 // ordered
4021 numtostr((*list_number)++, buf, 16, style->listStyleType);
4022 list_item->initWithText (buf, wordStyle);
4023 } else {
4024 // unordered
4025 list_item->initWithWidget (new Bullet(), wordStyle);
4026 }
4027}
4028
4030{
4031 if (S_TOP(html)->display_none)
4032 return false;
4033
4034 if (html->style()->display == DISPLAY_NONE)
4035 return false;
4036
4037 return true;
4038}
4039
4045static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
4046{
4047 int ti, ni; /* stack tag index and new tag index */
4048 char *start = tag + 1; /* discard the '<' */
4049 int IsCloseTag = (*start == '/');
4050
4051 dReturn_if (html->stop_parser == true);
4052
4053 ni = a_Html_tag_index(start + IsCloseTag);
4054 if (ni == -1) {
4055 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
4056 if (!(html->InFlags & IN_HTML)) {
4057 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
4058 Html_parse_doctype(html, tag, tagsize);
4059 }
4060 /* Ignore unknown tags */
4061 return;
4062 }
4063 _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);
4064
4065 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
4066 Html_check_html5_obsolete(html, ni);
4067
4068 int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
4069 if (i == 1 || (i == 2 && ni != i_HTML))
4070 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
4071 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
4072
4073 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
4074 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
4075 Html_test_section(html, ni, IsCloseTag);
4076
4077 /* Tag processing */
4078 ti = S_TOP(html)->tag_idx;
4079 switch (IsCloseTag) {
4080 case 0:
4081 /* Open function */
4082
4083 /* Cleanup before opening a new tag */
4084 if (ti != -1)
4086
4087 /* TODO: this is only raising a warning, but allows the element.
4088 * Note: Apache uses IMG inside PRE. */
4089 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
4090 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
4091
4092 /* Push the tag into the stack */
4093 Html_push_tag(html, ni);
4094
4095 html->startElement (ni);
4096 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
4097
4098 /* Parse attributes that can appear on any tag */
4099 Html_parse_common_attrs(html, tag, tagsize);
4100
4101 /* Call the open function for this tag */
4102 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
4103 Tags[ni].open (html, tag, tagsize);
4104
4105 if (! S_TOP(html)->display_none) {
4106 switch (html->style ()->display) {
4107 case DISPLAY_BLOCK:
4108 Html_display_block(html);
4109 break;
4112 break;
4113 case DISPLAY_LIST_ITEM:
4115 break;
4116 case DISPLAY_NONE:
4117 S_TOP(html)->display_none = true;
4118 break;
4119 case DISPLAY_INLINE:
4120 if (html->style()->vloat != FLOAT_NONE)
4121 Html_display_block(html);
4122 break;
4123 default:
4124 break;
4125 }
4126
4127 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4128 Tags[ni].content (html, tag, tagsize);
4129 }
4130 }
4131
4132 if (html->stop_parser)
4133 break;
4134
4135 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4136 /* don't change anything */
4137 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4138 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4139 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4140 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4141 html->pre_column = 0;
4142 html->PreFirstChar = true;
4143 }
4144
4145 if (html->styleEngine->getId ())
4146 Html_add_anchor(html, html->styleEngine->getId ());
4147
4148 /* Request immediate close for elements with forbidden close tag. */
4149 /* TODO: XHTML always requires close tags. A simple implementation
4150 * of the commented clause below will make it work. */
4151 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4152 html->ReqTagClose = true;
4153
4154 /* Don't break! Open tags may also close themselves */
4155 /* fallthrough */
4156
4157 default:
4158 /* Close function */
4159
4160 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4161 if (*start == '/' || /* </x> */
4162 html->ReqTagClose || /* request */
4163 (tag[tagsize-2] == '/' && /* XML: */
4164 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4165 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4166
4167 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4168 Html_tag_cleanup_at_close(html, ni);
4169 /* This was a close tag */
4170 html->ReqTagClose = false;
4171 }
4172 }
4173}
4174
4185static const char *Html_get_attr2(DilloHtml *html,
4186 const char *tag,
4187 int tagsize,
4188 const char *attrname,
4189 int tag_parsing_flags)
4190{
4191 int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4192 Dstr *Buf = html->attr_data;
4194
4195 dReturn_val_if_fail(*attrname, NULL);
4196
4197 dStr_truncate(Buf, 0);
4198
4199 for (i = 1; i < tagsize; ++i) {
4200 switch (state) {
4201 case SEEK_ATTR_START:
4202 if (isspace(tag[i]))
4203 state = SEEK_TOKEN_START;
4204 else if (tag[i] == '=')
4205 state = SEEK_VALUE_START;
4206 break;
4207
4208 case MATCH_ATTR_NAME:
4209 if (!attrname[attr_pos] &&
4210 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4211 Found = 1;
4212 state = SEEK_TOKEN_START;
4213 --i;
4214 } else if (!tag[i]) {
4215 state = SEEK_ATTR_START; // NULL byte is not allowed
4216 } else {
4217 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4218 state = SEEK_ATTR_START;
4219 attr_pos++;
4220 }
4221 break;
4222
4223 case SEEK_TOKEN_START:
4224 if (tag[i] == '=') {
4225 state = SEEK_VALUE_START;
4226 } else if (!isspace(tag[i])) {
4227 attr_pos = 0;
4228 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4229 --i;
4230 }
4231 break;
4232 case SEEK_VALUE_START:
4233 if (!isspace(tag[i])) {
4234 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4235 i -= (delimiter == ' ');
4236 state = (Found) ? GET_VALUE : SKIP_VALUE;
4237 }
4238 break;
4239
4240 case SKIP_VALUE:
4241 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4242 state = SEEK_TOKEN_START;
4243 break;
4244 case GET_VALUE:
4245 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4246 tag[i] == delimiter) {
4247 state = FINISHED;
4248 } else if (tag[i] == '&' &&
4249 (tag_parsing_flags & HTML_ParseEntities)) {
4250 const char *entstr;
4251 const bool_t is_attr = TRUE;
4252
4253 if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
4254 is_attr))) {
4255 dStr_append(Buf, entstr);
4256 i += entsize-1;
4257 } else {
4258 dStr_append_c(Buf, tag[i]);
4259 }
4260 } else if (tag[i] == '\r' || tag[i] == '\t') {
4261 dStr_append_c(Buf, ' ');
4262 } else if (tag[i] == '\n') {
4263 /* ignore */
4264 } else {
4265 dStr_append_c(Buf, tag[i]);
4266 }
4267 break;
4268
4269 case FINISHED:
4270 i = tagsize;
4271 break;
4272 }
4273 }
4274
4275 if (tag_parsing_flags & HTML_LeftTrim)
4276 while (isspace(Buf->str[0]))
4277 dStr_erase(Buf, 0, 1);
4278 if (tag_parsing_flags & HTML_RightTrim)
4279 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4280 dStr_truncate(Buf, Buf->len - 1);
4281
4282 return (Found) ? Buf->str : NULL;
4283}
4284
4288const char *a_Html_get_attr(DilloHtml *html,
4289 const char *tag,
4290 int tagsize,
4291 const char *attrname)
4292{
4293 return Html_get_attr2(html, tag, tagsize, attrname,
4295}
4296
4303 const char *tag,
4304 int tagsize,
4305 const char *attrname,
4306 const char *def)
4307{
4308 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4309
4310 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4311}
4312
4321static void Html_callback(int Op, CacheClient_t *Client)
4322{
4323 DilloHtml *html = (DilloHtml*)Client->CbData;
4324
4325 if (Op) { /* EOF */
4326 html->write((char*)Client->Buf, Client->BufSize, 1);
4327 html->finishParsing(Client->Key);
4328 } else {
4329 html->write((char*)Client->Buf, Client->BufSize, 0);
4330 }
4331}
4332
4337static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4338{
4339 char ch = 0, *p, *text;
4340 int token_start, buf_index;
4341
4342 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4343 * boundary. Iterate through tokens until end of buffer is reached. */
4344 buf_index = 0;
4345 token_start = buf_index;
4346 while ((buf_index < bufsize) && !html->stop_parser) {
4347 /* invariant: buf_index == bufsize || token_start == buf_index */
4348
4349 if (S_TOP(html)->parse_mode ==
4351 /* Non HTML code here, let's skip until closing tag */
4352 do {
4353 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4354 buf_index += strcspn(buf + buf_index, "<");
4355 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4356 buf_index = bufsize;
4357 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4358 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4359 /* copy VERBATIM text into the stash buffer */
4360 text = dStrndup(buf + token_start, buf_index - token_start);
4361 dStr_append(html->Stash, text);
4362 dFree(text);
4363 token_start = buf_index;
4364 break;
4365 } else
4366 ++buf_index;
4367 } while (buf_index < bufsize);
4368
4369 if (buf_index == bufsize)
4370 break;
4371 }
4372
4373 if (isspace(buf[buf_index])) {
4374 /* whitespace: group all available whitespace */
4375 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4376 Html_process_space(html, buf + token_start, buf_index - token_start);
4377 token_start = buf_index;
4378
4379 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4380 (isalpha(ch) || strchr("/!?", ch)) ) {
4381 /* Tag */
4382 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4383 /* Comment: search for close of comment, skipping over
4384 * everything except a matching "-->" tag. */
4385 while ( (p = (char*) memchr(buf + buf_index, '>',
4386 bufsize - buf_index)) ){
4387 buf_index = p - buf + 1;
4388 if (p[-1] == '-' && p[-2] == '-') break;
4389 }
4390 if (p) {
4391 /* Got the whole comment. Let's throw it away! :) */
4392 token_start = buf_index;
4393 } else
4394 buf_index = bufsize;
4395 } else {
4396 /* Tag: search end of tag (skipping over quoted strings) */
4397 html->CurrOfs = html->Start_Ofs + token_start;
4398
4399 while ( buf_index < bufsize ) {
4400 buf_index++;
4401 buf_index += strcspn(buf + buf_index, ">\"'<");
4402 if ((ch = buf[buf_index]) == '>') {
4403 break;
4404 } else if (ch == '"' || ch == '\'') {
4405 /* Skip over quoted string */
4406 buf_index++;
4407 buf_index += strcspn(buf + buf_index,
4408 (ch == '"') ? "\">" : "'>");
4409 if (buf[buf_index] == '>') {
4410 /* Unterminated string value? Let's look ahead and test:
4411 * (<: unterminated, closing-quote: terminated) */
4412 int offset = buf_index + 1;
4413 offset += strcspn(buf + offset,
4414 (ch == '"') ? "\"<" : "'<");
4415 if (buf[offset] == ch || !buf[offset]) {
4416 buf_index = offset;
4417 } else {
4418 BUG_MSG("Attribute lacks closing quote.");
4419 break;
4420 }
4421 }
4422 } else if (ch == '<') {
4423 /* unterminated tag detected */
4424 p = dStrndup(buf+token_start+1,
4425 strcspn(buf+token_start+1, " <\n\r\t"));
4426 BUG_MSG("<%s> lacks its closing '>'.", p);
4427 dFree(p);
4428 --buf_index;
4429 break;
4430 }
4431 }
4432 if (buf_index < bufsize) {
4433 buf_index++;
4434 Html_process_tag(html, buf + token_start,
4435 buf_index - token_start);
4436 token_start = buf_index;
4437 }
4438 }
4439 } else {
4440 /* A Word: search for whitespace or tag open */
4441 html->CurrOfs = html->Start_Ofs + token_start;
4442
4443 while (++buf_index < bufsize) {
4444 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4445 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4446 !isalpha(ch) && !strchr("/!?", ch))
4447 continue;
4448 break;
4449 }
4450 if (buf_index < bufsize || Eof) {
4451 /* successfully found end of token */
4452 ch = buf[buf_index];
4453 buf[buf_index] = 0;
4454 Html_process_word(html, buf + token_start,
4455 buf_index - token_start);
4456 buf[buf_index] = ch;
4457 token_start = buf_index;
4458 }
4459 }
4460 }/*while*/
4461
4462 HT2TB(html)->flush ();
4463
4464 return token_start;
4465}
4466
4467
#define B8(d)
Definition binaryconst.h:32
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
bool_t a_Bw_expecting(BrowserWindow *bw)
Definition bw.c:334
void a_Bw_close_client(BrowserWindow *bw, int ClientKey)
Close a cache-client upon successful retrieval.
Definition bw.c:167
void a_Bw_add_url(BrowserWindow *bw, const DilloUrl *Url)
Add an URL to the browser window's list.
Definition bw.c:209
void a_Bw_add_client(BrowserWindow *bw, int Key, int Root)
Add a reference to a cache-client.
Definition bw.c:128
void a_Bw_remove_doc(BrowserWindow *bw, void *vdoc)
Remove a document from the bw's list.
Definition bw.c:263
void a_Bw_add_doc(BrowserWindow *bw, void *vdoc)
Add a document to the browser window's list.
Definition bw.c:221
void(* CA_Callback_t)(int Op, CacheClient_t *Client)
Callback type for cache clients.
Definition cache.h:55
int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
Get the cache's buffer for the URL, and its size.
Definition capi.c:543
const char * a_Capi_set_content_type(const DilloUrl *url, const char *ctype, const char *from)
Set the Content-Type for the URL.
Definition capi.c:567
int a_Capi_dpi_verify_request(BrowserWindow *bw, DilloUrl *url)
Safety test: only allow GET|POST dpi-urls from dpi-generated pages.
Definition capi.c:233
void a_Capi_unref_buf(const DilloUrl *Url)
Unref the cache's buffer when no longer using it.
Definition capi.c:551
int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData)
Most used function for requesting a URL.
Definition capi.c:394
int a_Capi_get_flags_with_redirection(const DilloUrl *Url)
Same as a_Capi_get_flags() but following redirections.
Definition capi.c:532
#define CAPI_IsCached
Definition capi.h:15
#define CAPI_Completed
Definition capi.h:19
A list of CssProperty objects.
Definition css.hh:331
bool enter(dw::core::Widget *widget, int link, int img, int x, int y)
Called, when a link is entered, left, or the position has changed.
Definition html.cc:740
bool press(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "press" signal.
Definition html.cc:760
bool click(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "click" signal.
Definition html.cc:794
bool ReqTagClose
Flag to close the stack's top tag.
void freeParseData()
Free parsing data.
Definition html.cc:627
int InFlags
tracks which elements we are in
DilloHtmlDocumentType DocType
DilloUrl * base_url
int getCurrLineNumber()
Return the line number of the tag/word being processed by the parser.
Definition html.cc:605
bool stop_parser
void bugMessage(const char *format,...)
Collect HTML error strings.
Definition html.cc:153
lout::misc::SimpleVector< DilloHtmlState > * stack
void write(char *Buf, int BufSize, int Eof)
Process the newly arrived html and put it into the page structure.
Definition html.cc:578
dw::core::style::Style * backgroundStyle()
char * Start_Buf
int formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset)
Allocate and insert form information.
Definition html.cc:667
void finishParsing(int ClientKey)
Finish parsing a HTML page.
Definition html.cc:641
bool PreFirstChar
used to skip the first CR or CRLF in PRE tags
void startElement(int tag)
size_t CurrOfs
void addCssUrl(const DilloUrl *url)
Save URL in a vector (may be loaded later).
Definition html.cc:733
void initDw()
Miscellaneous initializations for Dw.
Definition html.cc:522
bool PrevWasCR
Flag to help parsing of "\r\n" in PRE tags.
float DocTypeVersion
Dstr * Stash
bool TagSoup
Flag to enable the parser's cleanup functions.
bool_t unloadedImages()
Definition html.cc:689
int pre_column
current column, used in PRE tags with tabs
DilloUrl * page_url
lout::misc::SimpleVector< DilloHtmlInput * > * inputs_outside_form
bool PrevWasHtmlClose
set when </html> is found
void restyle()
uchar_t Num_BODY
dw::core::style::Style * wordStyle()
bool StashSpace
BrowserWindow * bw
int32_t non_css_visited_color
as provided by vlink attribute in BODY
size_t OldLine
bool loadCssFromStash
current stash content should be loaded as CSS
lout::misc::SimpleVector< DilloHtmlForm * > * forms
dw::ImageMapsList maps
DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type)
Create and initialize a new DilloHtml class.
Definition html.cc:432
~DilloHtml()
Free memory used by the DilloHtml class.
Definition html.cc:536
uchar_t Num_TITLE
dw::core::style::Style * style()
size_t OldOfs
uchar_t Num_HTML
element counters: used for validation purposes.
lout::misc::SimpleVector< DilloUrl * > * links
dw::core::Widget * dw
int32_t non_css_link_color
as provided by link attribute in BODY
char * content_type
Dstr * attr_data
Buffer for attribute value.
bool PrevWasBodyClose
set when </body> is found
lout::misc::SimpleVector< DilloUrl * > * cssUrls
int32_t visited_color
as computed according to CSS
uchar_t Num_HEAD
char * charset
bool InVisitedLink
used to 'contrast_visited_colors'
StyleEngine * styleEngine
lout::misc::SimpleVector< DilloHtmlImage * > * images
HtmlLinkReceiver linkReceiver
DilloHtmlForm * getCurrentForm()
Get the current form.
Definition html.cc:684
void loadImages(const DilloUrl *pattern)
Load images if they were disabled.
Definition html.cc:702
This class provides the glue between HTML parser and CSS subsystem.
void parse(DilloHtml *html, DilloUrl *url, const char *buf, int buflen, CssOrigin origin)
void setPseudoVisited()
set the CSS pseudo class :visited.
void setNonCssHint(CssPropertyName name, CssValueType type, CssPropertyValue value)
void setId(const char *id)
const char * getId()
dw::core::style::Color * backgroundColor()
void endElement(int tag)
tell the styleEngine that a html element has ended.
void setPseudoLink()
set the CSS pseudo class :link.
void setStyle(const char *style)
void setClass(const char *klass)
void inheritBackgroundColor()
Use of the background color of the parent style as default.
dw::core::style::StyleImage * backgroundImage(dw::core::style::BackgroundRepeat *bgRepeat, dw::core::style::BackgroundAttachment *bgAttachment, dw::core::style::Length *bgPositionX, dw::core::style::Length *bgPositionY)
Displays different kind of bullets.
Definition bullet.hh:15
void addShapeToCurrentMap(core::Shape *shape, int link)
Add a shape to the current map-.
Definition image.cc:111
void startNewMap(lout::object::Object *key)
Start a new map and make it the current one.
Definition image.cc:99
void setCurrentMapDefaultLink(int link)
Set default link for current map-.
Definition image.cc:119
Displays an instance of dw::core::Imgbuf.
Definition image.hh:120
void setIsMap()
Sets image as server side image map.
Definition image.cc:546
void setUseMap(ImageMapsList *list, Object *key)
Sets image as client side image map.
Definition image.cc:559
void forceMapRedraw()
Definition image.hh:179
void initWithText(const char *text, core::style::Style *style)
Definition listitem.cc:58
void initWithWidget(core::Widget *widget, core::style::Style *style)
Definition listitem.cc:48
Widget for drawing (horizontal) rules.
Definition ruler.hh:21
A Widget for rendering text blocks, i.e.
Definition textblock.hh:206
void addWidget(core::Widget *widget, core::style::Style *style)
Add a widget (word type) to the page.
void addSpace(core::style::Style *style)
?
void addText(const char *text, size_t len, core::style::Style *style)
Add a word to the page structure.
void addParbreak(int space, core::style::Style *style)
Cause a paragraph break.
static int CLASS_ID
Definition textblock.hh:867
dw::core::Shape implementation for simple circles.
Definition types.hh:91
Represents a button press or release event.
Definition events.hh:58
ButtonState state
Definition events.hh:42
dw::core::Shape implementation for polygons.
Definition types.hh:105
void addPoint(int x, int y)
Definition types.cc:156
dw::core::Shape implementation for simple rectangles.
Definition types.hh:70
Abstract interface for different shapes.
Definition types.hh:59
The base class of all dillo widgets.
Definition widget.hh:44
virtual void setStyle(style::Style *style)
Change the style of a widget.
Definition widget.cc:1316
style::Color * getFgColor()
Get the actual foreground color of a widget.
Definition widget.cc:1450
Widget * getParent()
Definition widget.hh:587
Layout * getLayout()
Definition widget.hh:602
style::Color * getBgColor()
Get the actual background of a widget.
Definition widget.cc:1431
ListStyleType listStyleType
Definition style.hh:568
static Style * create(StyleAttrs *attrs)
Definition style.hh:629
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector.
Definition misc.hh:115
void setSize(int newSize)
Set the size explicitly.
Definition misc.hh:188
void increase()
Increase the vector size by one.
Definition misc.hh:181
void set(int i, T t)
Store an object in the vector.
Definition misc.hh:267
T get(int i) const
Return the one element, explicitly.
Definition misc.hh:222
int size() const
Return the number of elements put into this vector.
Definition misc.hh:162
T * getRef(int i) const
Return the reference of one element.
Definition misc.hh:211
int32_t a_Color_parse(const char *str, int32_t default_color, int *err)
Parse a color string.
Definition colors.c:258
int32_t a_Color_vc(int32_t candidate, int32_t C_txt, int32_t C_lnk, int32_t C_bg)
Return a suitable "visited link" color.
Definition colors.c:344
@ CSS_PROPERTY_LIST_STYLE_TYPE
Definition css.hh:210
@ CSS_PROPERTY_MARGIN_BOTTOM
Definition css.hh:211
@ CSS_PROPERTY_BORDER_RIGHT_STYLE
Definition css.hh:178
@ CSS_PROPERTY_MARGIN_RIGHT
Definition css.hh:213
@ PROPERTY_X_LANG
Definition css.hh:249
@ CSS_PROPERTY_BORDER_RIGHT_WIDTH
Definition css.hh:179
@ CSS_PROPERTY_BORDER_LEFT_WIDTH
Definition css.hh:176
@ PROPERTY_X_LINK
Definition css.hh:248
@ PROPERTY_X_TOOLTIP
Definition css.hh:251
@ CSS_PROPERTY_MARGIN_LEFT
Definition css.hh:212
@ CSS_PROPERTY_HEIGHT
Definition css.hh:204
@ CSS_PROPERTY_WIDTH
Definition css.hh:242
@ PROPERTY_X_IMG
Definition css.hh:250
@ CSS_PROPERTY_BORDER_TOP_WIDTH
Definition css.hh:183
@ CSS_PROPERTY_MARGIN_TOP
Definition css.hh:214
@ CSS_PROPERTY_VERTICAL_ALIGN
Definition css.hh:239
@ CSS_PROPERTY_BORDER_LEFT_STYLE
Definition css.hh:175
@ CSS_PROPERTY_TEXT_ALIGN
Definition css.hh:232
@ CSS_PROPERTY_BORDER_TOP_STYLE
Definition css.hh:182
@ CSS_PROPERTY_BACKGROUND_COLOR
Definition css.hh:166
@ CSS_PROPERTY_COLOR
Definition css.hh:188
@ CSS_PROPERTY_BORDER_BOTTOM_STYLE
Definition css.hh:171
@ CSS_PROPERTY_BORDER_BOTTOM_WIDTH
Definition css.hh:172
@ CSS_PROPERTY_FONT_FAMILY
Definition css.hh:197
@ CSS_TYPE_ENUM
Value is i, if represented by enum_symbols[i].
Definition css.hh:39
@ CSS_TYPE_INTEGER
This type is only used internally, for x-* properties.
Definition css.hh:37
@ CSS_TYPE_LENGTH_PERCENTAGE
<length> or <percentage>.
Definition css.hh:43
@ CSS_TYPE_STRING
<string>
Definition css.hh:58
@ CSS_TYPE_SYMBOL
Symbols, which are directly copied (as opposed to CSS_TYPE_ENUM and CSS_TYPE_MULTI_ENUM).
Definition css.hh:59
@ CSS_TYPE_COLOR
Represented as integer.
Definition css.hh:55
CssLengthType CSS_LENGTH_TYPE(CssLength l)
Definition css.hh:134
CssLength CSS_CREATE_LENGTH(float v, CssLengthType t)
Definition css.hh:102
@ CSS_LENGTH_TYPE_PX
Definition css.hh:76
@ CSS_LENGTH_TYPE_RELATIVE
This does not exist in CSS but is used in HTML.
Definition css.hh:88
@ CSS_LENGTH_TYPE_PERCENTAGE
Definition css.hh:87
@ CSS_LENGTH_TYPE_AUTO
This can be used as a simple value.
Definition css.hh:90
float CSS_LENGTH_VALUE(CssLength l)
Definition css.hh:138
@ CSS_ORIGIN_AUTHOR
Definition css.hh:33
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * a_Url_decode_hex_str(const char *str, size_t *p_sz)
Definition datauri.c:137
static const int bufsize
Definition decode.c:28
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:102
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
void dStr_sprintfa(Dstr *ds, const char *format,...)
Printf-like function that appends.
Definition dlib.c:464
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:316
char * dStrdup(const char *s)
Definition dlib.c:77
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:184
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:450
void dStr_vsprintfa(Dstr *ds, const char *format, va_list argp)
vsprintf-like function that appends.
Definition dlib.c:401
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:368
char * dStrnfill(size_t len, char c)
Return a new string of length 'len' filled with 'c' characters.
Definition dlib.c:149
#define dReturn_if_fail(expr)
Definition dlib.h:84
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:88
#define dReturn_if(expr)
Definition dlib.h:76
#define dReturn_val_if(expr, val)
Definition dlib.h:80
#define D_ASCII_TOLOWER(c)
Definition dlib.h:49
#define TRUE
Definition dlib.h:35
#define FALSE
Definition dlib.h:31
#define dNew(type, count)
Definition dlib.h:61
@ SKIP_VALUE
Definition dpip.c:70
static Image * image
#define IMAGE_MAX_AREA
Definition fltkimgbuf.cc:27
void a_History_set_title_by_url(const DilloUrl *url, const char *title)
Set the page-title for a given URL.
Definition history.c:130
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2521
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
Handle open TITLE.
Definition html.cc:1698
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4288
static misc::SimpleVector< int > * Html_read_coords(DilloHtml *html, const char *str)
Read coords in a string, returning a vector of ints.
Definition html.cc:2376
DilloHtmlTagParsingFlags
Definition html.cc:86
@ HTML_RightTrim
Definition html.cc:88
@ HTML_LeftTrim
Definition html.cc:87
@ HTML_ParseEntities
Definition html.cc:89
static int i_HR
Definition html.cc:131
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2704
static void Html_css_load_callback(int Op, CacheClient_t *Client)
Called by the network engine when a stylesheet has new data.
Definition html.cc:3243
static void Html_real_pop_tag(DilloHtml *html)
Pop the top tag in the stack.
Definition html.cc:1385
static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2783
static int i_OPTGROUP
Definition html.cc:133
static void Html_push_tag(DilloHtml *html, int tag_idx)
Push the tag (copying attributes from the top of the stack)
Definition html.cc:1359
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2851
static void Html_tag_close_a(DilloHtml *html)
Definition html.cc:2774
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the VALIGN attribute (top|bottom|middle|baseline) and sets the style in style_attrs.
Definition html.cc:363
static const TagInfo Tags[]
Function index for the open, content, and close functions for each tag.
Definition html.cc:3479
static int Html_triggers_optional_close(int old_idx, int cur_idx)
For elements with optional close, check whether is time to close, by also following Firefox's de fact...
Definition html.cc:3647
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
Handle open HTML element.
Definition html.cc:1616
static bool Html_must_add_breaks(DilloHtml *html)
Definition html.cc:424
static int i_TBODY
Definition html.cc:140
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2325
static void Html_add_anchor(DilloHtml *html, const char *name)
Register an anchor for this page.
Definition html.cc:2685
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3396
static void Html_free(void *data)
Definition html.cc:222
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3061
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1793
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
HTML, HEAD and BODY elements have optional open and close tags.
Definition html.cc:3854
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
Create a new link, set it as the url's parent and return the index.
Definition html.cc:306
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
Used by the "Show/Hide hiddens" form menuitem.
Definition html.cc:279
static bool Html_contains_form(DilloHtml *html, void *v_form)
Search for form.
Definition html.cc:240
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
Handle DOCTYPE declaration.
Definition html.cc:1530
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2792
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
static void Html_tag_content_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1950
static int i_TH
Definition html.cc:141
static void Html_display_listitem(DilloHtml *html)
Definition html.cc:3996
static int Html_charref_comp(const void *a, const void *b)
Comparison function for binary search.
Definition html.cc:968
static void Html_tag_open_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1925
static const char * Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr)
Given an entity, return the corresponding string.
Definition html.cc:1045
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2818
DilloHtmlTagParsingState
Definition html.cc:76
@ SKIP_VALUE
Definition html.cc:81
@ SEEK_TOKEN_START
Definition html.cc:79
@ MATCH_ATTR_NAME
Definition html.cc:78
@ SEEK_ATTR_START
Definition html.cc:77
@ SEEK_VALUE_START
Definition html.cc:80
@ FINISHED
Definition html.cc:83
@ GET_VALUE
Definition html.cc:82
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt)
Create and add a new Textblock to the current Textblock.
Definition html.cc:393
#define NTAGS
Definition html.cc:3592
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2006
static void Html_process_word(DilloHtml *html, const char *word, int size)
Handles putting the word into its proper place.
Definition html.cc:1226
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2484
static int i_BUTTON
Definition html.cc:127
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
static int i_DT
Definition html.cc:129
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2867
static int i_SELECT
Definition html.cc:136
static void Html_display_block(DilloHtml *html)
Definition html.cc:3985
static void Html_tag_close_li(DilloHtml *html)
Definition html.cc:2953
static void Html_tag_close_pre(DilloHtml *html)
Definition html.cc:3072
static void Html_tag_content_frameset(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1995
static Charref_t * Html_charref_search(char *key)
Binary search of 'key' in charref list.
Definition html.cc:976
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
Parse the LINK element (Only CSS stylesheets by now).
Definition html.cc:3309
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2018
static void Html_tag_close_media(DilloHtml *html)
Media (AUDIO/VIDEO) close function.
Definition html.cc:2624
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
Parse attributes that can appear on any tag.
Definition html.cc:3908
static const char * Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
Definition html.cc:886
static void Html_tag_content_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2510
static void Html_tag_open_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2586
static int i_A
Definition html.cc:125
static void Html_tag_close_q(DilloHtml *html)
Definition html.cc:2807
static void Html_tag_close_map(DilloHtml *html)
Handle close <MAP>.
Definition html.cc:2350
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
This function is called after popping the stack, to handle nested Textblock widgets.
Definition html.cc:1346
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2962
static CssLength Html_parse_length_or_multi_length(const char *attr, char **endptr)
Definition html.cc:1404
static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize)
For white-space: pre-line, we must break the line if encountering a newline.
Definition html.cc:1118
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
Conditional cleanup of the stack, called before closing any tag.
Definition html.cc:3807
static void Html_process_space(DilloHtml *html, const char *space, int spacesize)
Parse spaces.
Definition html.cc:1140
void a_Html_form_submit(void *v_html, void *v_form)
Used by the "Submit form" form menuitem.
Definition html.cc:253
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3053
static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3020
static const char * Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags)
Get attribute value for 'attrname' and return it.
Definition html.cc:4185
static int i_TD
Definition html.cc:138
static int i_TR
Definition html.cc:139
static void Html_callback(int Op, CacheClient_t *Client)
Dispatch the appropriate function for 'Op'.
Definition html.cc:4321
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
Process a tag, given as 'tag' and 'tagsize'.
Definition html.cc:4045
static void Html_tag_close_par(DilloHtml *html)
Default close for paragraph tags - pop the stack and break.
Definition html.cc:3438
void(* TagCloseFunct)(DilloHtml *html)
Definition html.cc:74
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
Set the URL data for image maps.
Definition html.cc:292
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
Create a new Image struct and request the image-url to the cache.
Definition html.cc:2279
static int i_DD
Definition html.cc:128
static void Html_update_content_type(DilloHtml *html, const char *content)
Update the document's content type information based on meta tag data.
Definition html.cc:3104
void(* TagOpenFunct)(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:73
static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3388
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2633
static int Html_ms_stupid_quotes_2ucs(int codepoint)
This is M$ non-standard "smart quotes" (w1252).
Definition html.cc:866
#define TAB_SIZE
Definition html.cc:58
static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2216
void a_Html_load_images(void *v_html, DilloUrl *pattern)
Used by the "Load images" page menuitem.
Definition html.cc:230
static const char * Html_get_javascript_link(DilloHtml *html)
Test and extract the link from a javascript instruction.
Definition html.cc:2662
static void Html_tag_content_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2614
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
Here's where we parse the html and put it into the Textblock structure.
Definition html.cc:4337
static int i_OPTION
Definition html.cc:134
static int Html_forbids_cross_nesting(const int InFlags, const int new_idx)
Check nesting and cross-nesting between BUTTON, SELECT, TEXTAREA and A.
Definition html.cc:3686
static void Html_tag_close_html(DilloHtml *html)
Handle close HTML element.
Definition html.cc:1635
static void Html_tag_close_title(DilloHtml *html)
Handle close TITLE.
Definition html.cc:1718
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
Handle META We do not support http-equiv=refresh with delay>0 because it's non standard,...
Definition html.cc:3130
static int i_LI
Definition html.cc:132
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
Handle open SCRIPT.
Definition html.cc:1736
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2056
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3446
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3413
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3044
static int i_BODY
Definition html.cc:126
void * a_Html_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
Set callback function and callback data for the "html/text" MIME type.
Definition html.cc:211
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image)
Tell cache to retrieve image.
Definition html.cc:2198
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
Push the tag (used to force en element with optional open into the stack).
Definition html.cc:1376
#define SGML_SPCDEL
Definition html.cc:56
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
Handle open STYLE.
Definition html.cc:1754
static const char * Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a named character reference (e.g., "&" or "…").
Definition html.cc:989
static int Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
Check that 'val' is composed of characters inside [A-Za-z0-9:_.
Definition html.cc:1486
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3620
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
Set the Document Base URI.
Definition html.cc:3359
static void Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2406
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
Does the tag in tagstr (e.g.
Definition html.cc:1328
static void Html_tag_close_body(DilloHtml *html)
Definition html.cc:1899
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2554
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2026
static void Html_tag_close_head(DilloHtml *html)
Handle close HEAD element.
Definition html.cc:1672
static int i_TEXTAREA
Definition html.cc:137
static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx, int new_idx, int fi, char op)
Cleanup the stack to a given index.
Definition html.cc:3705
static void Html_tag_content_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2653
static void Html_tag_close_script(DilloHtml *html)
Handle close SCRIPT.
Definition html.cc:1745
static void Html_display_inline_block(DilloHtml *html)
Definition html.cc:3991
static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
Conditional cleanup of the stack (at open time).
Definition html.cc:3757
static int i_HTML
Definition html.cc:130
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
Check whether a tag is in the "excluding" element set for PRE.
Definition html.cc:3081
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
Handle open HEAD element.
Definition html.cc:1647
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2921
static int i_P
Definition html.cc:135
static int Html_tag_compare(const char *p1, const char *p2)
Definition html.cc:3605
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
Warn when encountering elements that are obsolete in HTML5.
Definition html.cc:3962
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1913
static void Html_tag_close_style(DilloHtml *html)
Handle close STYLE.
Definition html.cc:1783
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2884
void a_Html_form_reset(void *v_html, void *v_form)
Used by the "Reset form" form menuitem.
Definition html.cc:266
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3034
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3429
#define HTML_NTAGS
Definition html.hh:24
#define NumRef
static const Charref_t Charrefs[NumRef]
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4288
void a_Html_stash_init(DilloHtml *html)
Initialize the stash buffer.
Definition html.cc:853
@ DILLO_HTML_TABLE_MODE_NONE
no table at all
bool a_Html_should_display(DilloHtml *html)
Definition html.cc:4029
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
#define HT2TB(html)
"html struct" to Textblock
DilloImage * a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2143
CssLength a_Html_parse_length(DilloHtml *html, const char *attr)
Returns a length or a percentage, or UNDEF_LENGTH in case of an error, or if attr is NULL.
Definition html.cc:1443
int32_t a_Html_color_parse(DilloHtml *html, const char *str, int32_t default_color)
Parse a color attribute.
Definition html.cc:1468
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
DilloHtmlParseMode
@ DILLO_HTML_PARSE_MODE_PRE
@ DILLO_HTML_PARSE_MODE_VERBATIM
@ DILLO_HTML_PARSE_MODE_BODY
@ DILLO_HTML_PARSE_MODE_INIT
@ DILLO_HTML_PARSE_MODE_STASH_AND_BODY
@ DILLO_HTML_PARSE_MODE_STASH
#define BUG_MSG(...)
Add a bug-meter message.
void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
Read image-associated tag attributes and create new image.
Definition html.cc:2073
#define S_TOP(html)
Top of the parsing stack.
@ IN_HEAD
@ IN_BUTTON
@ IN_LI
@ IN_MEDIA
@ IN_PRE
@ IN_BODY
@ IN_MAP
@ IN_TEXTAREA
@ IN_NONE
@ IN_META_HACK
@ IN_SELECT
@ IN_A
@ IN_HTML
@ IN_EOF
@ HTML_LIST_ORDERED
@ HTML_LIST_UNORDERED
@ HTML_LIST_NONE
#define HT2LT(html)
"html struct" to "Layout"
@ DILLO_HTML_TABLE_BORDER_SEPARATE
char * a_Html_get_attr_wdef(DilloHtml *html, const char *tag, int tagsize, const char *attrname, const char *def)
"a_Html_get_attr with default" Call a_Html_get_attr() and dStrdup() the returned string.
Definition html.cc:4302
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3620
@ DT_UNRECOGNIZED
@ DT_NONE
@ DT_XHTML
@ DT_HTML
char * a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
Parse all the entities in a token.
Definition html.cc:1080
void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
Tell cache to retrieve a stylesheet.
Definition html.cc:3257
int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
Compare two Content-Type strings.
Definition misc.c:274
void a_Misc_parse_content_type(const char *type, char **major, char **minor, char **charset)
Parse Content-Type string, e.g., "text/html; charset=utf-8".
Definition misc.c:210
#define d_isascii(c)
Definition misc.h:12
Anything related to Dillo Widget styles is defined here.
Definition style.cc:34
@ LIST_STYLE_TYPE_DISC
Definition style.hh:303
@ LIST_STYLE_TYPE_UPPER_ALPHA
Definition style.hh:313
@ LIST_STYLE_TYPE_LOWER_ALPHA
Definition style.hh:311
@ LIST_STYLE_TYPE_CIRCLE
Definition style.hh:304
@ LIST_STYLE_TYPE_UPPER_ROMAN
Definition style.hh:309
@ LIST_STYLE_TYPE_DECIMAL
Definition style.hh:306
@ LIST_STYLE_TYPE_SQUARE
Definition style.hh:305
@ LIST_STYLE_TYPE_NONE
Definition style.hh:323
@ LIST_STYLE_TYPE_LOWER_ROMAN
Definition style.hh:308
@ DISPLAY_INLINE_BLOCK
Definition style.hh:281
int Length
Type for representing all lengths within dw::core::style.
Definition style.hh:429
void numtostr(int num, char *buf, int buflen, ListStyleType listStyleType)
Convert a number into a string, in a given list style.
Definition style.cc:1422
@ WHITE_SPACE_PRE_LINE
Definition style.hh:364
@ WHITE_SPACE_PRE_WRAP
Definition style.hh:363
Anything related to embedded UI widgets is defined here.
Definition core.hh:36
The core of Dw is defined in this namespace.
Definition core.hh:23
@ SHIFT_MASK
Definition events.hh:17
@ CONTROL_MASK
Definition events.hh:18
Dw is in this namespace, or sub namespaces of this one.
Definition form.cc:25
DilloPrefs prefs
Global Data.
Definition prefs.c:33
void Html_tag_close_form(DilloHtml *html)
Definition form.cc:407
void a_Html_form_display_hiddens2(void *vform, bool display)
Definition form.cc:267
void Html_tag_open_textarea(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:655
void Html_tag_content_textarea(DilloHtml *html, const char *tag, int tagsize)
The textarea tag.
Definition form.cc:665
void Html_tag_close_button(DilloHtml *html)
Handle close <BUTTON>
Definition form.cc:1023
void Html_tag_open_option(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:916
DilloHtmlForm * a_Html_form_new(DilloHtml *html, DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc content_type, const char *charset, bool enabled)
Definition form.cc:238
void Html_tag_open_optgroup(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:856
void a_Html_form_reset2(void *vform)
Definition form.cc:262
void a_Html_form_delete(DilloHtmlForm *form)
Definition form.cc:247
void a_Html_input_delete(DilloHtmlInput *input)
Definition form.cc:252
void Html_tag_close_select(DilloHtml *html)
Definition form.cc:834
void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize)
The ISINDEX tag is just a deprecated form of <INPUT type=text>> with implied FORM,...
Definition form.cc:614
void Html_tag_close_optgroup(DilloHtml *html)
Definition form.cc:892
void Html_tag_close_textarea(DilloHtml *html)
Close textarea.
Definition form.cc:729
void Html_tag_open_select(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:770
void Html_tag_open_input(DilloHtml *html, const char *tag, int tagsize)
Add a new input to current form.
Definition form.cc:437
void Html_tag_open_button(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:955
void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
Handle <FORM> tag.
Definition form.cc:341
void a_Html_form_submit2(void *vform)
Definition form.cc:257
void Html_tag_close_option(DilloHtml *html)
Definition form.cc:944
DilloHtmlMethod
Definition form.hh:10
DilloHtmlEnc
Definition form.hh:16
void a_Image_ref(DilloImage *Image)
Add a reference to an Image struct Do nothing if the argument is NULL.
Definition image.cc:100
void a_Image_unref(DilloImage *Image)
Unref and free if necessary Do nothing if the argument is NULL.
Definition image.cc:89
DilloImage * a_Image_new(void *layout, void *img_rndr, int32_t bg_color, int32_t fg_color)
Create and initialize a new image structure.
Definition image.cc:34
void Html_tag_open_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:242
void Html_tag_content_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:260
void Html_tag_open_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:256
void Html_tag_open_thead(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:274
void Html_tag_content_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:287
void Html_tag_open_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:47
void Html_tag_open_tfoot(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:267
void Html_tag_content_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:248
void Html_tag_content_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:223
void Html_tag_open_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:281
void Html_tag_content_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:161
void Html_tag_open_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:180
Contains the specific data for a single window.
Definition bw.h:27
Dstr * page_bugs
Definition bw.h:71
float zoom
Definition bw.h:74
int num_page_bugs
HTML-bugs detected at parse time.
Definition bw.h:70
int NumPendingStyleSheets
Number of not yet arrived style sheets.
Definition bw.h:48
Data structure for cache clients.
Definition cache.h:60
int Key
Primary Key for this client.
Definition cache.h:61
void * CbData
Client function data.
Definition cache.h:67
uint_t BufSize
Valid size of cache-data.
Definition cache.h:65
void * Buf
Pointer to cache-data.
Definition cache.h:64
void * Web
Pointer to the Web structure of our client.
Definition cache.h:68
const char * html4_str
const char * ref
const char * html5_str
DilloImage * image
DilloUrl * url
DilloHtmlTableMode table_mode
bool hand_over_break
This is used for list items etc.
DilloHtmlTableBorderMode table_border_mode
dw::core::Widget * table
dw::core::Widget * textblock
DilloHtmlListMode list_type
dw::core::Widget * ref_list_item
This is used to align list items (especially in enumerated lists)
DilloHtmlParseMode parse_mode
int tag_idx
TagInfo index for the tag that's being processed.
bool_t parse_embedded_css
Definition prefs.h:103
bool_t load_images
Definition prefs.h:99
bool_t show_tooltip
Definition prefs.h:66
bool_t limit_text_width
Definition prefs.h:71
bool_t focus_new_tab
Definition prefs.h:74
bool_t show_extra_warnings
Definition prefs.h:122
bool_t load_stylesheets
Definition prefs.h:102
bool_t contrast_visited_color
Definition prefs.h:65
bool_t middle_click_opens_new_tab
Definition prefs.h:114
Definition url.h:88
Definition dlib.h:114
Dstr_char_t * str
Definition dlib.h:117
int len
Definition dlib.h:116
void * img_rndr
Definition image.hh:61
int flags
Additional info.
Definition web.hh:29
DilloUrl * url
Requested URL.
Definition web.hh:25
BrowserWindow * bw
The requesting browser window [reference].
Definition web.hh:28
DilloImage * Image
For image urls [reference].
Definition web.hh:31
void a_UIcmd_link_popup(void *vbw, const DilloUrl *url, const DilloUrl *page_url)
Definition uicmd.cc:1316
void a_UIcmd_set_bug_prog(BrowserWindow *bw, int n_bug)
Definition uicmd.cc:1577
void a_UIcmd_page_popup(void *vbw, bool_t has_bugs, void *v_cssUrls)
Definition uicmd.cc:1306
void a_UIcmd_set_msg(BrowserWindow *bw, const char *format,...)
Definition uicmd.cc:1609
void a_UIcmd_image_popup(void *vbw, const DilloUrl *url, bool_t loaded_img, DilloUrl *page_url, DilloUrl *link_url)
Definition uicmd.cc:1324
void a_UIcmd_set_page_title(BrowserWindow *bw, const char *label)
Definition uicmd.cc:1586
void a_UIcmd_open_url_nw(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:846
void a_UIcmd_repush(void *vbw)
Definition uicmd.cc:955
void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:811
void a_UIcmd_redirection0(void *vbw, const DilloUrl *url)
Definition uicmd.cc:963
void a_UIcmd_open_url_nt(void *vbw, const DilloUrl *url, int focus)
Definition uicmd.cc:862
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
Definition url.c:527
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
Definition url.c:506
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
Definition url.c:208
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
Definition url.c:549
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
Definition url.c:371
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
Definition url.c:66
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Definition url.c:477
#define URL_ILLEGAL_CHARS_SPC(u)
Definition url.h:81
#define URL_SCHEME_(u)
Definition url.h:49
#define URL_ILLEGAL_CHARS(u)
Definition url.h:80
#define URL_SpamSafe
Definition url.h:40
#define URL_FLAGS(u)
Definition url.h:79
#define URL_STR(u)
Definition url.h:76
#define URL_STR_(u)
Definition url.h:55
#define URL_SCHEME(u)
Definition url.h:70
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
Does s point to a UTF-8-encoded ideographic character?.
Definition utf8.cc:76
int a_Utf8_encode(unsigned int ucs, char *buf)
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition utf8.cc:54
static const char utf8_zero_width_space[]
Unicode zero width space U+200B.
Definition utf8.hh:19
DilloWeb * a_Web_new(BrowserWindow *bw, const DilloUrl *url, const DilloUrl *requester)
Allocate and set safe values for a DilloWeb structure.
Definition web.cc:121
#define WEB_Stylesheet
Definition web.hh:18
#define WEB_Image
Definition web.hh:17