Dillo v3.2.0-143-gabad1053
Loading...
Searching...
No Matches
html.cc
Go to the documentation of this file.
1/*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright (C) 2024-2025 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
17/*-----------------------------------------------------------------------------
18 * Includes
19 *---------------------------------------------------------------------------*/
20#include <ctype.h> /* for isspace */
21#include <string.h> /* for memcpy and memmove */
22#include <stdlib.h>
23#include <stdio.h> /* for sprintf */
24#include <errno.h>
25
26#include "bw.h" /* for BrowserWindow */
27#include "msg.h"
28#include "binaryconst.h"
29#include "colors.h"
30#include "html_charrefs.h"
31#include "utf8.hh"
32
33#include "misc.h"
34#include "uicmd.hh"
35#include "history.h"
36#include "menu.hh"
37#include "prefs.h"
38#include "capi.h"
39#include "html.hh"
40#include "html_common.hh"
41#include "form.hh"
42#include "table.hh"
43
44#include "dw/textblock.hh"
45#include "dw/bullet.hh"
46#include "dw/listitem.hh"
47#include "dw/image.hh"
48#include "dw/ruler.hh"
49
50/*-----------------------------------------------------------------------------
51 * Defines
52 *---------------------------------------------------------------------------*/
53
54/* Define to 1 to ignore white space immediately after an open tag,
55 * and immediately before a close tag. */
56#define SGML_SPCDEL 0
57
58#define TAB_SIZE 8
59
60/*-----------------------------------------------------------------------------
61 * Name spaces
62 *---------------------------------------------------------------------------*/
63using namespace lout;
64using namespace dw;
65using namespace dw::core;
66using namespace dw::core::ui;
67using namespace dw::core::style;
68
69/*-----------------------------------------------------------------------------
70 * Typedefs
71 *---------------------------------------------------------------------------*/
72class DilloHtml;
73typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
74typedef void (*TagCloseFunct) (DilloHtml *html);
75
85
91
92
93/*
94 * Exported function with C linkage.
95 */
96extern "C" {
97void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
98}
99
100/*-----------------------------------------------------------------------------
101 * Forward declarations
102 *---------------------------------------------------------------------------*/
103static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
104static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
105 const DilloUrl *requester, DilloImage *image);
106static void Html_callback(int Op, CacheClient_t *Client);
107static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
108int a_Html_tag_index(const char *tag);
109
110/*-----------------------------------------------------------------------------
111 * Local Data
112 *---------------------------------------------------------------------------*/
114typedef struct {
115 const char *name; /* element name */
116 unsigned char Flags; /* flags (explained near the table data) */
117 char EndTag; /* Is it Required, Optional or Forbidden */
118 TagOpenFunct open; /* Open function */
119 TagOpenFunct content; /* Content function */
120 TagCloseFunct close; /* Close function */
121} TagInfo;
122
123/* Some element indexes required in scattered places */
124static int
142
143
144/*-----------------------------------------------------------------------------
145 *-----------------------------------------------------------------------------
146 * Main Code
147 *-----------------------------------------------------------------------------
148 *---------------------------------------------------------------------------*/
149
153void DilloHtml::bugMessage(const char *format, ... )
154{
155 va_list argp;
156
157 if (bw->num_page_bugs)
158 dStr_append_c(bw->page_bugs, '\n');
160 "HTML warning: line %d, ",
162 va_start(argp, format);
163 dStr_vsprintfa(bw->page_bugs, format, argp);
164 va_end(argp);
166}
167
173 const char *url_str, const char *base_url,
174 int use_base_url)
175{
176 if (!url_str) {
177 MSG("a_Html_url_new: URL is NULL\n");
178 return NULL;
179 }
180
181 DilloUrl *url = a_Url_new(url_str,
182 (use_base_url) ? base_url : URL_STR_(html->base_url));
183
184 if (!url) {
185 BUG_MSG("URL is not valid '%s'.", url_str);
186 return NULL;
187 }
188
189 int n_ic, n_ic_spc;
190 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
191 const char *suffix = (n_ic) > 1 ? "s" : "";
192 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
193 if (n_ic == n_ic_spc) {
194 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
195 } else if (n_ic_spc == 0) {
196 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
197 n_ic, suffix, url_str);
198 } else {
199 BUG_MSG("URL has %d illegal byte%s: "
200 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
201 n_ic, suffix,
202 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
203 }
204 }
205 return url;
206}
207
211void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
212{
213 DilloWeb *web = (DilloWeb*)P;
214 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
215
216 *Data = (void*)html;
218
219 return (void*)html->dw;
220}
221
222static void Html_free(void *data)
223{
224 delete ((DilloHtml*)data);
225}
226
230void a_Html_load_images(void *v_html, DilloUrl *pattern)
231{
232 DilloHtml *html = (DilloHtml*)v_html;
233
234 html->loadImages(pattern);
235}
236
240static bool Html_contains_form(DilloHtml *html, void *v_form)
241{
242 for (int i = 0; i < html->forms->size(); i++) {
243 if (html->forms->get(i) == v_form) {
244 return true;
245 }
246 }
247 return false;
248}
249
253void a_Html_form_submit(void *v_html, void *v_form)
254{
255 DilloHtml *html = (DilloHtml*)v_html;
256
257 if (Html_contains_form(html, v_form)) {
258 /* it's still valid */
259 a_Html_form_submit2(v_form);
260 }
261}
262
266void a_Html_form_reset(void *v_html, void *v_form)
267{
268 DilloHtml *html = (DilloHtml*)v_html;
269
270 if (Html_contains_form(html, v_form)) {
271 /* it's still valid */
272 a_Html_form_reset2(v_form);
273 }
274}
275
279void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
280{
281 DilloHtml *html = (DilloHtml*)v_html;
282
283 if (Html_contains_form(html, v_form)) {
284 /* it's still valid */
285 a_Html_form_display_hiddens2(v_form, (display != 0));
286 }
287}
288
292static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
293{
294 char data[64];
295
296 if (x != -1) {
297 snprintf(data, 64, "?%d,%d", x, y);
298 a_Url_set_ismap_coords(html->links->get(link), data);
299 }
300}
301
306static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
307{
308 int nl = html->links->size();
309 html->links->increase();
310 html->links->set(nl, (*url) ? *url : NULL);
311 return nl;
312}
313
318void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
319{
320 const char *align;
321
322 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
323 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
324
325 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
326 BUG_MSG("The align attribute is obsolete in HTML5.");
327
328 if (dStrAsciiCasecmp (align, "left") == 0)
329 textAlignType = TEXT_ALIGN_LEFT;
330 else if (dStrAsciiCasecmp (align, "right") == 0)
331 textAlignType = TEXT_ALIGN_RIGHT;
332 else if (dStrAsciiCasecmp (align, "center") == 0)
333 textAlignType = TEXT_ALIGN_CENTER;
334 else if (dStrAsciiCasecmp (align, "justify") == 0)
335 textAlignType = TEXT_ALIGN_JUSTIFY;
336#if 0
337 else if (dStrAsciiCasecmp (align, "char") == 0) {
338 /* TODO: Actually not supported for <p> etc. */
339 v.textAlign = TEXT_ALIGN_STRING;
340 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
341 if (charattr[0] == 0)
342 /* TODO: ALIGN=" ", and even ALIGN="&32;" will result in
343 * an empty string (don't know whether the latter is
344 * correct, has to be clarified with the specs), so
345 * that for empty strings, " " is assumed. */
346 style_attrs.textAlignChar = ' ';
347 else
348 style_attrs.textAlignChar = charattr[0];
349 } else
350 /* TODO: Examine LANG attr of <html>. */
351 style_attrs.textAlignChar = '.';
352 }
353#endif
355 textAlignType);
356 }
357}
358
363bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
364{
365 const char *attr;
366 VAlignType valign;
367
368 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
369 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
370 BUG_MSG("The valign attribute is obsolete in HTML5.");
371
372 if (dStrAsciiCasecmp (attr, "top") == 0)
373 valign = VALIGN_TOP;
374 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
375 valign = VALIGN_BOTTOM;
376 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
377 valign = VALIGN_BASELINE;
378 else
379 valign = VALIGN_MIDDLE;
380
382 CSS_TYPE_ENUM, valign);
383 return true;
384 } else
385 return false;
386}
387
388
393static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
394 bool addBreakOpt)
395{
396 Textblock *textblock = new Textblock (prefs.limit_text_width);
397 Style *style;
398
399 if (addBreaks) {
400 StyleAttrs attrs = *(html->style ());
401 attrs.display = DISPLAY_BLOCK;
402 style = Style::create (&attrs);
403 } else {
404 style = html->style ();
405 style->ref ();
406 }
407
408 if (addBreaks)
409 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
410
411 HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */
412 if (addBreakOpt)
413 HT2TB(html)->addBreakOption (html->style (), false);
414
415 if (addBreaks)
416 HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
417 S_TOP(html)->textblock = html->dw = textblock;
418 if (addBreaks)
419 S_TOP(html)->hand_over_break = true;
420
421 style->unref ();
422}
423
425{
426 return HT2TB(html)->mustAddBreaks (html->style ());
427}
428
433 const char *content_type)
434{
435 /* Init main variables */
436 bw = p_bw;
437 page_url = a_Url_dup(url);
438 base_url = a_Url_dup(url);
439 dw = NULL;
440
441 /* Init event receiver */
442 linkReceiver.html = this;
443 HT2LT(this)->connectLink (&linkReceiver);
444
445 a_Bw_add_doc(p_bw, this);
446
447 /* Init for-parsing variables */
448 Start_Buf = NULL;
449 Start_Ofs = 0;
450
451 _MSG("DilloHtml(): content type: %s\n", content_type);
452 this->content_type = dStrdup(content_type);
453
454 /* get charset */
456
457 stop_parser = false;
458
459 CurrOfs = OldOfs = 0;
460 OldLine = 1;
461
462 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
463 DocTypeVersion = 0.0f;
464
466
467 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
468
469 stack = new misc::SimpleVector <DilloHtmlState> (16);
470 stack->increase();
474 stack->getRef(0)->cell_text_align_set = false;
475 stack->getRef(0)->display_none = false;
477 stack->getRef(0)->list_number = 0;
478 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
479 stack->getRef(0)->textblock = NULL;
480 stack->getRef(0)->table = NULL;
481 stack->getRef(0)->ref_list_item = NULL;
482 stack->getRef(0)->hand_over_break = false;
483
485
486 Stash = dStr_new("");
487 StashSpace = false;
488
489 pre_column = 0;
490 PreFirstChar = false;
491 PrevWasCR = false;
492 InVisitedLink = false;
493 ReqTagClose = false;
494 TagSoup = true;
495 loadCssFromStash = false;
496 PrevWasBodyClose = false;
497 PrevWasHtmlClose = false;
498
500
502
505 visited_color = -1;
506
507 /* Init page-handling variables */
508 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
509 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
510 links = new misc::SimpleVector <DilloUrl*> (64);
511 images = new misc::SimpleVector <DilloHtmlImage*> (16);
512
513 /* Initialize the main widget */
514 initDw();
515 /* Hook destructor to the dw delete call */
516 dw->setDeleteCallback(Html_free, this);
517}
518
523{
524 dReturn_if_fail (dw == NULL);
525
526 /* Create the main widget */
528
529 bw->num_page_bugs = 0;
531}
532
537{
538 _MSG("::~DilloHtml(this=%p)\n", this);
539
541
542 a_Bw_remove_doc(bw, this);
543
546
547 for (int i = 0; i < cssUrls->size(); i++)
549 delete (cssUrls);
550
551 for (int i = 0; i < forms->size(); i++)
553 delete(forms);
554
555 for (int i = 0; i < inputs_outside_form->size(); i++)
557 delete(inputs_outside_form);
558
559 for (int i = 0; i < links->size(); i++)
560 a_Url_free(links->get(i));
561 delete (links);
562
563 for (int i = 0; i < images->size(); i++) {
564 DilloHtmlImage *img = images->get(i);
565 a_Url_free(img->url);
566 a_Image_unref(img->image);
567 dFree(img);
568 }
569 delete (images);
570
571 delete styleEngine;
572}
573
578void DilloHtml::write(char *Buf, int BufSize, int Eof)
579{
580 int token_start;
581 char *buf = Buf + Start_Ofs;
582 int bufsize = BufSize - Start_Ofs;
583
584 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
585#if 0
586 char *aux = dStrndup(Buf, BufSize);
587 MSG(" {%s}\n", aux);
588 dFree(aux);
589#endif
590
591 /* Update Start_Buf. It may be used after the parser is stopped */
592 Start_Buf = Buf;
593
594 dReturn_if (dw == NULL);
595 dReturn_if (stop_parser == true);
596
597 token_start = Html_write_raw(this, buf, bufsize, Eof);
598 Start_Ofs += token_start;
599}
600
606{
607 int i, ofs, line;
608 const char *p = Start_Buf;
609
610 dReturn_val_if_fail(p != NULL, -1);
611 /* Disable line counting for META hack. Buffers differ. */
613
614 ofs = CurrOfs;
615 line = OldLine;
616 for (i = OldOfs; i < ofs; ++i)
617 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
618 ++line;
619 OldOfs = CurrOfs;
620 OldLine = line;
621 return line;
622}
623
628{
629 delete(stack);
630
634 dFree(charset);
635}
636
641void DilloHtml::finishParsing(int ClientKey)
642{
643 int si;
644
645 dReturn_if (stop_parser == true);
646
647 /* flag we've already parsed up to the last byte */
648 InFlags |= IN_EOF;
649
650 /* force the close of elements left open (TODO: not for XHTML) */
651 while ((si = stack->size() - 1)) {
652 if (stack->getRef(si)->tag_idx != -1) {
654 }
655 }
656
657 /* Nothing left to do with the parser. Clear all flags, except EOF. */
658 InFlags = IN_EOF;
659
660 /* Remove this client from our active list */
661 a_Bw_close_client(bw, ClientKey);
662}
663
668 DilloHtmlEnc enc, const char *charset)
669{
670 // avoid data loss on repush after CSS stylesheets have been loaded
671 bool enabled = bw->NumPendingStyleSheets == 0;
672 DilloHtmlForm *form = a_Html_form_new (this, method, action,
673 enc, charset, enabled);
674 int nf = forms->size ();
675 forms->increase ();
676 forms->set (nf, form);
677 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
678 return forms->size();
679}
680
685{
686 return forms->get (forms->size() - 1);
687}
688
690{
691 for (int i = 0; i < images->size(); i++) {
692 if (images->get(i)->image != NULL) {
693 return TRUE;
694 }
695 }
696 return FALSE;
697}
698
702void DilloHtml::loadImages (const DilloUrl *pattern)
703{
705
706 /* If the user asked for a specific image, the user (NULL) is the requester,
707 * and the domain mechanism will always permit the request. But if the user
708 * just asked for all images (clicking "Load images"), use the page URL as
709 * the requester so that the domain mechanism can act as a filter.
710 * If the possible patterns become more complex, it might be good to have
711 * the caller supply the requester instead.
712 */
713 const DilloUrl *requester = pattern ? NULL : this->page_url;
714
715 for (int i = 0; i < images->size(); i++) {
716 DilloHtmlImage *hi = images->get(i);
717
718 if (hi->image) {
719 assert(hi->url);
720 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
721 if (Html_load_image(bw, hi->url, requester, hi->image)) {
722 a_Image_unref (hi->image);
723 hi->image = NULL; // web owns it now
724 }
725 }
726 }
727 }
728}
729
734{
735 int nu = cssUrls->size();
736 cssUrls->increase();
737 cssUrls->set(nu, a_Url_dup(url));
738}
739
740bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
741 int x, int y)
742{
744
745 _MSG(" ** ");
746 if (link == -1) {
747 _MSG(" Link LEAVE notify...\n");
748 a_UIcmd_set_msg(bw, "");
749 } else {
750 _MSG(" Link ENTER notify...\n");
751 Html_set_link_coordinates(html, link, x, y);
752 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
753 }
754 return true;
755}
756
760bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
761 int x, int y, EventButton *event)
762{
763 BrowserWindow *bw = html->bw;
764 int ret = false;
765 DilloUrl *linkurl = NULL;
766
767 _MSG("pressed button %d\n", event->button);
768 if (event->button == 3) {
769 // popup menus
770 if (img != -1) {
771 // image menu
772 if (link != -1)
773 linkurl = html->links->get(link);
774 const bool_t loaded_img = (html->images->get(img)->image == NULL);
775 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
776 html->page_url, linkurl);
777 ret = true;
778 } else {
779 if (link == -1) {
780 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
781 ret = true;
782 } else {
783 a_UIcmd_link_popup(bw, html->links->get(link), html->page_url);
784 ret = true;
785 }
786 }
787 }
788 return ret;
789}
790
794bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
795 int x, int y, EventButton *event)
796{
797 BrowserWindow *bw = html->bw;
798
799 if ((img != -1) && (html->images->get(img)->image)) {
800 // clicked an image that has not already been loaded
801 if (event->button == 1){
802 // load all instances of this image
803 DilloUrl *pattern = html->images->get(img)->url;
804 html->loadImages(pattern);
805 return true;
806 }
807 }
808
809 if (link != -1) {
810 DilloUrl *url = html->links->get(link);
811 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
812
813 Html_set_link_coordinates(html, link, x, y);
814
815 if (event->button == 1) {
816 if (event->state & CONTROL_MASK) {
818 int focus = prefs.focus_new_tab ? 1 : 0;
819 if (event->state & SHIFT_MASK) focus = !focus;
820 a_UIcmd_open_url_nt(bw, url, focus);
821 } else {
823 }
824 } else {
825 a_UIcmd_open_url(bw, url);
826 }
827 } else if (event->button == 2) {
829 int focus = prefs.focus_new_tab ? 1 : 0;
830 if (event->state & SHIFT_MASK) focus = !focus;
831 a_UIcmd_open_url_nt(bw, url, focus);
832 } else
834 } else {
835 return false;
836 }
837
838 /* Change the link color to "visited" as visual feedback */
839 for (Widget *w = widget; w; w = w->getParent()) {
840 _MSG(" ->%s\n", w->getClassName());
841 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
842 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
843 break;
844 }
845 }
846 }
847 return true;
848}
849
854{
855 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
856 html->StashSpace = false;
857 dStr_truncate(html->Stash, 0);
858}
859
866static int Html_ms_stupid_quotes_2ucs(int codepoint)
867{
868 int ret;
869 switch (codepoint) {
870 case 145:
871 case 146: ret = '\''; break;
872 case 147:
873 case 148: ret = '"'; break;
874 case 149: ret = 176; break;
875 case 150:
876 case 151: ret = '-'; break;
877 default: ret = codepoint; break;
878 }
879 return ret;
880}
881
886static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
887 bool_t is_attr, int *entsize)
888{
889 static char buf[5];
890 char *s = tok;
891 int n, codepoint = -1;
892
893 errno = 0;
894
895 if (*s == 'x' || *s == 'X') {
896 if (isxdigit(*++s)) {
897 /* strtol with base 16 accepts leading "0x" - we don't */
898 if (*s == '0' && s[1] == 'x') {
899 s++;
900 codepoint = 0;
901 } else {
902 codepoint = strtol(s, &s, 16);
903 }
904 }
905 } else if (isdigit(*s)) {
906 codepoint = strtol(s, &s, 10);
907 }
908 if (errno)
909 codepoint = -1;
910
911 if (*s == ';')
912 s++;
913 else {
914 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
915 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
916 char c = *s;
917 *s = '\0';
918 BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
919 *s = c;
920 }
921 /* Don't require ';' for old HTML, except that our current heuristic
922 * is to require it in attributes to avoid cases like "&copy=1" found
923 * in URLs.
924 */
925 if (is_attr || html->DocType == DT_XHTML ||
926 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
927 return NULL;
928 }
929
930 }
931 if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
932 codepoint != '\f') ||
933 (codepoint >= 0x7f && codepoint <= 0x9f) ||
934 (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
935 ((codepoint & 0xfffe) == 0xfffe) ||
936 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
937 codepoint > 0xffff)) {
938 /* this catches null bytes, errors, codes out of range, disallowed
939 * control chars, permanently undefined chars, and surrogates.
940 */
941 char c = *s;
942 *s = '\0';
943 BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
944 *s = c;
945
946 codepoint = (codepoint >= 145 && codepoint <= 151) ?
947 Html_ms_stupid_quotes_2ucs(codepoint) : -1;
948 }
949 if (codepoint != -1) {
950 if (codepoint >= 128) {
951 n = a_Utf8_encode(codepoint, buf);
952 } else {
953 n = 1;
954 buf[0] = (char) codepoint;
955 }
956 assert(n < 5);
957 buf[n] = '\0';
958 *entsize = s-tok+2;
959 return buf;
960 } else {
961 return NULL;
962 }
963}
964
968static int Html_charref_comp(const void *a, const void *b)
969{
970 return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
971}
972
977{
978 Charref_t RefKey;
979
980 RefKey.ref = key;
981 return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
983}
984
989static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
990 bool_t is_attr, int *entsize)
991{
992 Charref_t *p;
993 char c;
994 char *s = tok;
995 const char *ret = NULL;
996
997 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
998 c = *s;
999 *s = '\0';
1000 if (c != ';') {
1001 if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
1002 (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
1003 BUG_MSG("Character reference '&%s' lacks ';'.", tok);
1004
1005 /* Don't require ';' for old HTML, except that our current heuristic
1006 * is to require it in attributes to avoid cases like "&copy=1" found
1007 * in URLs.
1008 */
1009 if (is_attr || html->DocType == DT_XHTML ||
1010 (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
1011 return ret;
1012 }
1013 }
1014
1015 if ((p = Html_charref_search(tok))) {
1016 ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
1017 p->html5_str : p->html4_str;
1018 }
1019
1020 if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
1021 ret = "'";
1022
1023 *s = c;
1024 if (c == ';')
1025 s++;
1026
1027 if (!ret) {
1028 c = *s;
1029 *s = '\0';
1030 BUG_MSG("Undefined character reference '&%s'.", tok);
1031 *s = c;
1032 }
1033 *entsize = s-tok+1;
1034 return ret;
1035}
1036
1045static const char *Html_parse_entity(DilloHtml *html, const char *token,
1046 int toksize, int *entsize, bool_t is_attr)
1047{
1048 const char *ret = NULL;
1049 char *tok;
1050
1051 if (toksize > 50) {
1052 /* In pathological cases, attributes can be megabytes long and filled
1053 * with character references. As of HTML5, the longest defined character
1054 * reference is about 32 bytes long.
1055 */
1056 toksize = 50;
1057 }
1058
1059 token++;
1060 tok = dStrndup(token, (uint_t)toksize);
1061
1062 if (*tok == '#') {
1063 ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
1064 } else if (isalpha(*tok)) {
1065 ret = Html_parse_named_charref(html, tok, is_attr, entsize);
1066 } else if (prefs.show_extra_warnings &&
1067 (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
1068 // HTML5 doesn't mind literal '&'s.
1069 BUG_MSG("Literal '&'.");
1070 }
1071 dFree(tok);
1072
1073 return ret;
1074}
1075
1080char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1081{
1082 const char *esc_set = "&";
1083 int i, s, entsize;
1084 char *str;
1085
1086 s = strcspn(token, esc_set);
1087 if (s >= toksize) {
1088 /* no ampersands */
1089 str = dStrndup(token, toksize);
1090 } else {
1091 Dstr *ds = dStr_sized_new(toksize);
1092
1093 dStr_append_l(ds, token, s);
1094
1095 for (i = s; i < toksize; i++) {
1096 const char *entstr;
1097 const bool_t is_attr = FALSE;
1098
1099 if (token[i] == '&' &&
1100 (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
1101 is_attr))) {
1102 dStr_append(ds, entstr);
1103 i += entsize-1;
1104 } else {
1105 dStr_append_c(ds, token[i]);
1106 }
1107 }
1108 str = ds->str;
1109 dStr_free(ds, 0);
1110 }
1111 return str;
1112}
1113
1118static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1119 int spacesize)
1120{
1121 int i, breakCnt = 0;
1122
1123 for (i = 0; i < spacesize; i++) {
1124 /* Support for "\r", "\n" and "\r\n" line breaks */
1125 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1126 breakCnt++;
1127 html->PrevWasCR = (space[i] == '\r');
1128
1129 HT2TB(html)->addLinebreak (html->wordStyle ());
1130 }
1131 }
1132 if (breakCnt == 0) {
1133 HT2TB(html)->addSpace(html->wordStyle ());
1134 }
1135}
1136
1140static void Html_process_space(DilloHtml *html, const char *space,
1141 int spacesize)
1142{
1143 char *spc;
1144 int i, offset;
1145 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1146
1147 if (S_TOP(html)->display_none) {
1148 /* do nothing */
1149 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1150 html->StashSpace = (html->Stash->len > 0);
1151
1152 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1153 dStr_append_l(html->Stash, space, spacesize);
1154
1155 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1156 int spaceCnt = 0;
1157
1158 /* re-scan the string for characters that cause line breaks */
1159 for (i = 0; i < spacesize; i++) {
1160 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1161 if (!html->PreFirstChar &&
1162 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1163
1164 if (spaceCnt) {
1165 spc = dStrnfill(spaceCnt, ' ');
1166 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1167 dFree(spc);
1168 spaceCnt = 0;
1169 }
1170 HT2TB(html)->addLinebreak (html->wordStyle ());
1171 html->pre_column = 0;
1172 }
1173 html->PreFirstChar = false;
1174
1175 /* cr and lf should not be rendered -- they appear as a break */
1176 switch (space[i]) {
1177 case '\r':
1178 case '\n':
1179 break;
1180 case '\t':
1182 BUG_MSG("TAB character inside <pre>.");
1183 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1184 spaceCnt += offset;
1185 html->pre_column += offset;
1186 break;
1187 default:
1188 spaceCnt++;
1189 html->pre_column++;
1190 break;
1191 }
1192
1193 html->PrevWasCR = (space[i] == '\r');
1194 }
1195
1196 if (spaceCnt) {
1197 // add break possibility for the white-space:pre-wrap case
1198 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1199 spc = dStrnfill(spaceCnt, ' ');
1200 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1201 dFree(spc);
1202 }
1203
1204 } else {
1205 if (SGML_SPCDEL) {
1206 /* SGML_SPCDEL ignores white space immediately after an open tag */
1207 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1208 Html_process_space_pre_line(html, space, spacesize);
1209 } else {
1210 HT2TB(html)->addSpace(html->wordStyle ());
1211 }
1212
1213 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1214 html->StashSpace = (html->Stash->len > 0);
1215 }
1216}
1217
1226static void Html_process_word(DilloHtml *html, const char *word, int size)
1227{
1228 int i, j, start;
1229 char *Pword;
1230 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1231
1232 if (S_TOP(html)->display_none)
1233 return;
1234 if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
1235 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
1236 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
1237 }
1238
1239 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1241 if (html->StashSpace) {
1242 dStr_append_c(html->Stash, ' ');
1243 html->StashSpace = false;
1244 }
1245 Pword = a_Html_parse_entities(html, word, size);
1246 dStr_append(html->Stash, Pword);
1247 dFree(Pword);
1248
1249 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1250 /* word goes in untouched, it is not processed here. */
1251 dStr_append_l(html->Stash, word, size);
1252 }
1253
1254 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1255 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1256 /* skip until the closing instructions */
1257
1258 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1259 /* all this overhead is to catch white-space entities */
1260 Pword = a_Html_parse_entities(html, word, size);
1261 for (start = i = 0; Pword[i]; start = i)
1262 if (isspace(Pword[i])) {
1263 while (Pword[++i] && isspace(Pword[i])) ;
1264 Html_process_space(html, Pword + start, i - start);
1265 } else {
1266 while (Pword[++i] && !isspace(Pword[i])) ;
1267 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1268 html->pre_column += i - start;
1269 html->PreFirstChar = false;
1270 }
1271 dFree(Pword);
1272
1273 } else {
1274 const char *word2, *beyond_word2;
1275
1276 Pword = NULL;
1277 if (!memchr(word,'&', size)) {
1278 /* No entities */
1279 word2 = word;
1280 beyond_word2 = word + size;
1281 } else {
1282 /* Collapse white-space entities inside the word (except &nbsp;) */
1283 Pword = a_Html_parse_entities(html, word, size);
1284 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1285 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1286 if (strchr(" \t\f\n\r", Pword[i])) {
1287 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1288 Pword[i] = ' ';
1289 else
1290 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1291 ;
1292 }
1293 }
1294 word2 = Pword;
1295 beyond_word2 = word2 + strlen(word2);
1296 }
1297 for (start = i = 0; word2[i]; start = i) {
1298 int len;
1299
1300 if (isspace(word2[i])) {
1301 while (word2[++i] && isspace(word2[i])) ;
1302 Html_process_space(html, word2 + start, i - start);
1303 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1304 i += 3;
1305 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1306 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1307 i += len;
1308 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1309 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1310 } else {
1311 do {
1312 i += len;
1313 } while (word2[i] && !isspace(word2[i]) &&
1314 strncmp(word2+i, utf8_zero_width_space, 3) &&
1315 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1316 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1317 }
1318 }
1319 if (Pword == word2)
1320 dFree(Pword);
1321 }
1322}
1323
1328static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1329{
1330 int i;
1331
1332 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1333 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1334 return false;
1335 }
1336 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1337 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1338 return true;
1339 return false;
1340}
1341
1346static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1347{
1348 if (html->dw != S_TOP(html)->textblock) {
1349 if (hand_over_break)
1350 HT2TB(html)->handOverBreak (html->style ());
1351 HT2TB(html)->flush ();
1352 html->dw = S_TOP(html)->textblock;
1353 }
1354}
1355
1359static void Html_push_tag(DilloHtml *html, int tag_idx)
1360{
1361 int n_items;
1362
1363 n_items = html->stack->size ();
1364 html->stack->increase ();
1365 /* We'll copy the former stack item and just change the tag and its index
1366 * instead of copying all fields except for tag. --Jcid */
1367 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1368 html->stack->getRef(n_items)->tag_idx = tag_idx;
1369 html->dw = S_TOP(html)->textblock;
1370}
1371
1376static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1377{
1378 html->startElement (tag_idx);
1379 Html_push_tag(html, tag_idx);
1380}
1381
1386{
1387 bool hand_over_break;
1388
1389 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1390 hand_over_break = S_TOP(html)->hand_over_break;
1391 html->stack->setSize (html->stack->size() - 1);
1392 Html_eventually_pop_dw(html, hand_over_break);
1393}
1394
1395
1396
1397/*
1398 * Some parsing routines.
1399 */
1400
1401/*
1402 * Used by a_Html_parse_length
1403 */
1405 char **endptr)
1406{
1407 CssLength l;
1408 double v;
1409 char *end;
1410
1411 v = strtod (attr, &end);
1412 switch (*end) {
1413 case '%':
1414 end++;
1416 break;
1417
1418 case '*':
1419 end++;
1421 break;
1422/*
1423 The "px" suffix seems not allowed by HTML4.01 SPEC.
1424 case 'p':
1425 if (end[1] == 'x')
1426 end += 2;
1427*/
1428 default:
1430 break;
1431 }
1432
1433 if (endptr)
1434 *endptr = end;
1435 return l;
1436}
1437
1438
1443CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1444{
1445 CssLength l;
1446 char *end;
1447
1448 l = Html_parse_length_or_multi_length (attr, &end);
1450 /* not allowed as &Length; */
1452 else {
1453 /* allow only whitespaces */
1454 if (*end && !isspace (*end)) {
1455 BUG_MSG("Garbage after length: '%s'.", attr);
1457 }
1458 }
1459
1460 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1461 return l;
1462}
1463
1468int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1469 int32_t default_color)
1470{
1471 int err = 1;
1472 int32_t color = a_Color_parse(str, default_color, &err);
1473
1474 if (err) {
1475 BUG_MSG("Color \"%s\" is not in \"#RRGGBB\" format.", str);
1476 }
1477 return color;
1478}
1479
1485static int
1486 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1487{
1488 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1489 bool valid = *val && !strchr(val, ' ');
1490
1491 if (!valid) {
1492 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1493 "spaces.", attrname, val);
1494 }
1495 return valid ? 1 : 0;
1496 } else {
1497 int i;
1498
1499 for (i = 0; val[i]; ++i)
1500 if (!d_isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1501 break;
1502
1503 if (val[i] || !(d_isascii(val[0]) && isalpha(val[0])))
1504 BUG_MSG("%s attribute value \"%s\" is not of the form "
1505 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1506
1507 return !(val[i]);
1508 }
1509}
1510
1530static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1531{
1532 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1533 static const char HTML20 [] = "-//IETF//DTD HTML";
1534 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1535 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1536 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1537 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1538 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1539 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1540 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1541 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1542
1543 size_t i;
1544 int quote;
1545 char *p, *ntag = dStrndup(tag, tagsize);
1546
1547 /* Tag sanitization: Collapse whitespace between tokens
1548 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1549 for (i = 0, p = ntag; *p; ++p) {
1550 if (isspace(*p)) {
1551 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1552 } else if ((quote = *p) == '"' || *p == '\'') {
1553 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1554 if (*p == '\n' || *p == '\r')
1555 ntag[i - 1] = ' ';
1556 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1557 }
1558 } else {
1559 ntag[i++] = *p;
1560 }
1561 if (!*p)
1562 break;
1563 }
1564 ntag[i] = 0;
1565
1566 _MSG("New: {%s}\n", ntag);
1567
1568 if (html->DocType != DT_NONE)
1569 BUG_MSG("Multiple DOCTYPE declarations.");
1570
1571 /* The default DT_NONE type is TagSoup */
1572 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1573 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1574 p = ntag + strlen(HTML_SGML_sig) + 1;
1575 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1576 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1577 html->DocType = DT_HTML;
1578 html->DocTypeVersion = 4.01f;
1579 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1580 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1581 html->DocType = DT_XHTML;
1582 html->DocTypeVersion = 1.0f;
1583 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1584 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1585 html->DocType = DT_XHTML;
1586 html->DocTypeVersion = 1.1f;
1587 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1588 html->DocType = DT_HTML;
1589 html->DocTypeVersion = 4.0f;
1590 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1591 html->DocType = DT_HTML;
1592 html->DocTypeVersion = 3.2f;
1593 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1594 html->DocType = DT_HTML;
1595 html->DocTypeVersion = 2.0f;
1596 }
1597 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1598 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1599 !dStrAsciiCasecmp(ntag,
1600 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1601 !dStrAsciiCasecmp(ntag,
1602 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1603 html->DocType = DT_HTML;
1604 html->DocTypeVersion = 5.0f;
1605 }
1606 if (html->DocType == DT_NONE) {
1607 html->DocType = DT_UNRECOGNIZED;
1608 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1609 }
1610 dFree(ntag);
1611}
1612
1616static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1617{
1618 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1619 * This allows to handle pages with multiple or uneven HTML tags */
1620
1621 if (!(html->InFlags & IN_HTML))
1622 html->InFlags |= IN_HTML;
1623 if (html->Num_HTML < UCHAR_MAX)
1624 ++html->Num_HTML;
1625
1626 if (html->Num_HTML > 1) {
1627 BUG_MSG("<html> was already open.");
1628 html->ReqTagClose = true;
1629 }
1630}
1631
1636{
1637 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1638
1639 /* As some Tag soup pages use multiple HTML tags, this function
1640 * gets called only on EOF and upon and extra HTML open.
1641 * Also, we defer clearing the IN_HTML flag until IN_EOF */
1642}
1643
1647static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1648{
1649 if (html->InFlags & IN_BODY) {
1650 BUG_MSG("<head> must go before the BODY section.");
1651 html->ReqTagClose = true;
1652 return;
1653 }
1654
1655 if (html->Num_HEAD < UCHAR_MAX)
1656 ++html->Num_HEAD;
1657 if (html->InFlags & IN_HEAD) {
1658 BUG_MSG("<head> was already open.");
1659 html->ReqTagClose = true;
1660 } else if (html->Num_HEAD > 1) {
1661 BUG_MSG("<head> already finished -- ignoring.");
1662 html->ReqTagClose = true;
1663 } else {
1664 html->InFlags |= IN_HEAD;
1665 }
1666}
1667
1673{
1674 if (html->InFlags & IN_HEAD) {
1675 if (html->Num_HEAD == 1) {
1676 /* match for the well formed start of HEAD section */
1677 if (html->Num_TITLE == 0)
1678 BUG_MSG("<head> lacks <title>.");
1679
1680 html->InFlags &= ~IN_HEAD;
1681
1682 /* charset is already set, load remote stylesheets now */
1683 for (int i = 0; i < html->cssUrls->size(); i++) {
1684 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1685 }
1686 } else if (html->Num_HEAD > 1) {
1687 --html->Num_HEAD;
1688 }
1689 } else {
1690 /* not reached, see Html_tag_cleanup_at_close() */
1691 }
1692}
1693
1698static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1699{
1700 /* fill the stash buffer so TITLE content can be ignored
1701 * when not valid, redundant or outside HEAD section */
1702 a_Html_stash_init(html);
1703
1704 if (html->InFlags & IN_HEAD) {
1705 if (html->Num_TITLE < UCHAR_MAX)
1706 ++html->Num_TITLE;
1707 if (html->Num_TITLE > 1)
1708 BUG_MSG("Redundant <title>.");
1709 } else {
1710 BUG_MSG("<title> must be inside <head> -- ignoring.");
1711 }
1712}
1713
1719{
1720 /* title is only valid inside HEAD */
1721 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1722 /* Ignore empty titles: <title></title> */
1723 char *title = html->Stash->str;
1724 if (!title || title[0] == '\0')
1725 return;
1726 a_UIcmd_set_page_title(html->bw, title);
1728 }
1729}
1730
1736static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1737{
1738 a_Html_stash_init(html);
1739 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1740}
1741
1746{
1747 /* eventually the stash will be sent to an interpreter for parsing */
1748}
1749
1754static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1755{
1756 const char *attrbuf;
1757
1758 html->loadCssFromStash = true;
1759
1760 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1761 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1762 BUG_MSG("<style> requires type attribute.");
1763 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1764 html->loadCssFromStash = false;
1765 }
1766 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1767 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1768 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1769 * but sec. 14.2.3 says that the attribute is case-insensitive.
1770 * TODO can be a comma-separated list.
1771 * TODO handheld.
1772 */
1773 html->loadCssFromStash = false;
1774 }
1775
1776 a_Html_stash_init(html);
1777 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1778}
1779
1784{
1786 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1787 html->Stash->len, CSS_ORIGIN_AUTHOR);
1788}
1789
1790/*
1791 * <BODY>
1792 */
1793static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1794{
1795 const char *attrbuf;
1796 int32_t color;
1797 style::Color *bgColor;
1798 style::StyleImage *bgImage;
1799 style::BackgroundRepeat bgRepeat;
1800 style::BackgroundAttachment bgAttachment;
1801 style::Length bgPositionX, bgPositionY;
1802
1803 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1804 if (!(html->InFlags & IN_BODY))
1805 html->InFlags |= IN_BODY;
1806 if (html->Num_BODY < UCHAR_MAX)
1807 ++html->Num_BODY;
1808
1809 if (html->Num_BODY > 1) {
1810 BUG_MSG("<body> was already open.");
1811 html->ReqTagClose = true;
1812 return;
1813 }
1814
1815 if (html->InFlags & IN_HEAD) {
1816 /* if we're here, it's bad XHTML, no need to recover */
1817 BUG_MSG("Unclosed <head>.");
1818 }
1819
1820 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1821 color = a_Html_color_parse(html, attrbuf, -1);
1822
1823 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1824 BUG_MSG("<body> bgcolor attribute is obsolete.");
1825
1826 if (color != -1)
1828 CSS_TYPE_COLOR, color);
1829 }
1830
1831 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1832 color = a_Html_color_parse(html, attrbuf, -1);
1833
1834 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1835 BUG_MSG("<body> text attribute is obsolete.");
1836
1837 if (color != -1)
1839 CSS_TYPE_COLOR, color);
1840 }
1841
1842 html->restyle ();
1843
1844 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1845 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1846 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1847 BUG_MSG("<body> link attribute is obsolete.");
1848 }
1849
1850 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1851 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1852 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1853 BUG_MSG("<body> vlink attribute is obsolete.");
1854 }
1855
1856 html->dw->setStyle (html->style ());
1857
1858 bgColor = html->styleEngine->backgroundColor ();
1859 if (bgColor)
1860 HT2LT(html)->setBgColor(bgColor);
1861
1862 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1863 &bgPositionX, &bgPositionY);
1864 if (bgImage)
1865 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1866 bgPositionY);
1867
1868 /* Determine a color for visited links.
1869 * This color is computed once per page and used for immediate feedback
1870 * when clicking a link.
1871 * On reload style including color for visited links is computed properly
1872 * according to CSS.
1873 */
1874 html->startElement (i_A);
1875 html->styleEngine->setPseudoVisited ();
1876 if (html->non_css_visited_color != -1) {
1878 html->non_css_visited_color);
1879 }
1880 html->visited_color = html->style ()->color->getColor ();
1881 html->styleEngine->endElement (i_A);
1882
1884 /* get a color that has a "safe distance" from text, link and bg */
1885 html->visited_color =
1887 html->style ()->color->getColor(),
1888 html->non_css_link_color,
1890 }
1891
1892
1893 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1894}
1895
1896/*
1897 * BODY
1898 */
1900{
1901 _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);
1902
1903 /* As some Tag soup pages use multiple BODY tags, this function
1904 * gets called only on EOF and upon and extra BODY open.
1905 * Also, we defer clearing the IN_BODY flag until IN_EOF */
1906}
1907
1908/*
1909 * <P>
1910 * TODO: what's the point between adding the parbreak before and
1911 * after the push?
1912 */
1913static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1914{
1915 CssPropertyList props;
1916
1917 a_Html_tag_set_align_attr (html, tag, tagsize);
1918}
1919
1920/*
1921 * <FRAME>, <IFRAME>
1922 * TODO: This is just a temporary fix while real frame support
1923 * isn't finished. Imitates lynx/w3m's frames.
1924 */
1925static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1926{
1927 const char *attrbuf;
1928 DilloUrl *url;
1929 CssPropertyList props;
1930
1931 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1932 return;
1933
1934 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1935 return;
1936
1938 /* visited frame */
1939 html->styleEngine->setPseudoVisited ();
1940 } else {
1941 /* unvisited frame */
1942 html->styleEngine->setPseudoLink ();
1943 }
1944
1946 Html_set_new_link(html,&url));
1947}
1948
1949static void
1950 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1951{
1952 const char *attrbuf;
1953 char *src;
1954 Textblock *textblock;
1955 Widget *bullet;
1956
1957 textblock = HT2TB(html);
1958
1959 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1960 return;
1961
1962 src = dStrdup(attrbuf);
1963
1964 textblock->addParbreak (5, html->wordStyle ());
1965
1966 bullet = new Bullet();
1967 textblock->addWidget(bullet, html->wordStyle ());
1968 textblock->addSpace(html->wordStyle ());
1969
1970 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1971 /* IFRAME usually comes with very long advertising/spying URLS,
1972 * to not break rendering we will force name="IFRAME" */
1973 textblock->addText ("IFRAME", html->wordStyle ());
1974
1975 } else {
1976 /* FRAME:
1977 * If 'name' tag is present use it, if not use 'src' value */
1978 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1979 textblock->addText (src, html->wordStyle ());
1980 } else {
1981 textblock->addText (attrbuf, html->wordStyle ());
1982 }
1983 }
1984
1985 textblock->addParbreak (5, html->wordStyle ());
1986
1987 dFree(src);
1988}
1989
1990/*
1991 * <FRAMESET>
1992 * TODO: This is just a temporary fix while real frame support
1993 * isn't finished. Imitates lynx/w3m's frames.
1994 */
1996 const char *tag, int tagsize)
1997{
1998 HT2TB(html)->addParbreak (9, html->wordStyle ());
1999 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
2000 Html_add_textblock(html, true, 5, false);
2001}
2002
2003/*
2004 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
2005 */
2006static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
2007{
2008 a_Html_tag_set_align_attr (html, tag, tagsize);
2009
2010 a_Html_stash_init(html);
2011 S_TOP(html)->parse_mode =
2013}
2014
2015/*
2016 * <BR>
2017 */
2018static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2019{
2020 HT2TB(html)->addLinebreak (html->wordStyle ());
2021}
2022
2023/*
2024 * <FONT>
2025 */
2026static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2027{
2028 const char *attrbuf;
2029 char *fontFamily = NULL;
2030 int32_t color;
2031
2032 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2034 color = html->visited_color;
2035 } else {
2036 /* use the tag-specified color */
2037 color = a_Html_color_parse(html, attrbuf, -1);
2038 }
2039 if (color != -1)
2041 CSS_TYPE_COLOR, color);
2042 }
2043
2044 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2045 fontFamily = dStrdup(attrbuf);
2047 CSS_TYPE_SYMBOL, fontFamily);
2048 }
2049
2050 dFree(fontFamily);
2051}
2052
2053/*
2054 * <ABBR>
2055 */
2056static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2057{
2058 const char *attrbuf;
2059
2061
2062 if (prefs.show_tooltip &&
2063 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2064
2066 attrbuf);
2067 }
2068}
2069
2073void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2074{
2075 char *width_ptr, *height_ptr;
2076 const char *attrbuf;
2079 int w = 0, h = 0;
2080
2081 if (prefs.show_tooltip &&
2082 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2084 attrbuf);
2085 }
2086 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2087 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2088 // Check for malicious values
2089 // TODO: the same for percentage and relative lengths.
2090 if (width_ptr) {
2091 l_w = a_Html_parse_length (html, width_ptr);
2092 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2093 CSS_LENGTH_VALUE(l_w) : 0);
2094 }
2095 if (height_ptr) {
2096 l_h = a_Html_parse_length (html, height_ptr);
2097 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2098 CSS_LENGTH_VALUE(l_h) : 0);
2099 }
2100 /* Check for suspicious image size request that would cause
2101 * an excessive amount of memory to be allocated for the
2102 * image buffer.
2103 * Be careful to avoid integer overflows during the checks.
2104 * There is an additional check in dw/image.cc to catch cases
2105 * where only one dimension is given and the image is scaled
2106 * preserving its original aspect ratio.
2107 * Size requests passed via CSS are also checked there.
2108 */
2109 if (w < 0 || h < 0 ||
2110 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2111 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2112 dFree(width_ptr);
2113 dFree(height_ptr);
2114 width_ptr = height_ptr = NULL;
2115 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2116 w, h);
2117 } else {
2124 }
2125
2126 /* TODO: we should scale the image respecting its ratio.
2127 * As the image size is not known at this time, maybe a flag
2128 * can be set to scale it later.
2129 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2130 [...]
2131 */
2132
2133 /* x_img is an index to a list of {url,image} pairs.
2134 * We know a_Html_image_new() will use size() as its next index */
2136 html->images->size());
2137
2138
2139 dFree(width_ptr);
2140 dFree(height_ptr);
2141}
2142
2143DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2144{
2145 bool load_now;
2146 char *alt_ptr;
2147 const char *attrbuf;
2148 DilloUrl *url;
2150
2151 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2152 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2153 return NULL;
2154
2155 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2156 if (!alt_ptr || !*alt_ptr) {
2157 dFree(alt_ptr);
2158 alt_ptr = dStrdup("[IMG]");
2159 } else if (alt_ptr) {
2160 char *new_alt = dStrconcat("[IMG] ", alt_ptr, NULL);
2161 dFree(alt_ptr);
2162 alt_ptr = new_alt;
2163 }
2164
2166 image =
2167 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0, 0);
2168
2170
2171 if (HT2TB(html)->getBgColor())
2172 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2173
2174 if (HT2TB(html)->getFgColor())
2175 image->fg_color = HT2TB(html)->getFgColor()->getColor();
2176
2178 hi->url = url;
2179 html->images->increase();
2180 html->images->set(html->images->size() - 1, hi);
2181
2182 load_now = prefs.load_images ||
2183 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2185
2186 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2187 // hi->image is NULL if dillo tries to load the image immediately
2188 hi->image = NULL;
2190 } else {
2191 // otherwise a reference is kept in html->images
2192 hi->image = image;
2193 }
2194
2195 dFree(alt_ptr);
2196 return image;
2197}
2198
2203 const DilloUrl *requester, DilloImage *Image)
2204{
2205 DilloWeb *Web;
2206 int ClientKey;
2207 /* Fill a Web structure for the cache query */
2208 Web = a_Web_new(bw, url, requester);
2209 Web->Image = Image;
2211 Web->flags |= WEB_Image;
2212 /* Request image data from the cache */
2213 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2214 a_Bw_add_client(bw, ClientKey, 0);
2215 a_Bw_add_url(bw, url);
2216 }
2217 return ClientKey != 0;
2218}
2219
2220static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2221{
2222 int border;
2223 const char *attrbuf;
2224
2225 a_Html_common_image_attrs(html, tag, tagsize);
2226
2227 /* Spacing to the left and right */
2228 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2229 int space = strtol(attrbuf, NULL, 10);
2230 if (space > 0) {
2236 }
2237 }
2238
2239 /* Spacing at the top and bottom */
2240 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2241 int space = strtol(attrbuf, NULL, 10);
2242 if (space > 0) {
2248 }
2249 }
2250
2251 /* Border */
2252 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2253 border = strtol(attrbuf, NULL, 10);
2254 if (border >= 0) {
2264
2273 }
2274 }
2275
2276}
2277
2283static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2284{
2286 DilloUrl *usemap_url;
2287 const char *attrbuf;
2288
2289 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2290 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2291 return;
2292
2293 Image = a_Html_image_new(html, tag, tagsize);
2294 if (!Image)
2295 return;
2296
2297 usemap_url = NULL;
2298 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2299 /* TODO: usemap URLs outside of the document are not used. */
2300 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2301
2302 // At this point, we know that Image->ir represents an image
2303 // widget. Notice that the order of the casts matters, because of
2304 // multiple inheritance.
2305 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2306 HT2TB(html)->addWidget(dwi, html->style());
2307 HT2TB(html)->addBreakOption (html->style (), false);
2308
2309 /* Image maps */
2310 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2311 dwi->setIsMap();
2312 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2313 } else if (html->style ()->x_link != -1 &&
2314 usemap_url == NULL) {
2315 /* For simple links, we have to suppress the "image_pressed" signal.
2316 * This is overridden for USEMAP images. */
2317// a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2318 }
2319
2320 if (usemap_url) {
2321 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2322 a_Url_free (usemap_url);
2323 }
2324}
2325
2326/*
2327 * <map>
2328 */
2329static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2330{
2331 char *hash_name;
2332 const char *attrbuf;
2333 DilloUrl *url;
2334
2335 if (html->InFlags & IN_MAP) {
2336 BUG_MSG("Nested <map>.");
2337 } else {
2338 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2339 html->InFlags |= IN_MAP;
2340 hash_name = dStrconcat("#", attrbuf, NULL);
2341 url = a_Html_url_new(html, hash_name, NULL, 0);
2342 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2343 a_Url_free (url);
2344 dFree(hash_name);
2345 } else {
2346 BUG_MSG("<map> requires name attribute.");
2347 }
2348 }
2349}
2350
2355{
2356 /* This is a hack for the perhaps frivolous feature of drawing image map
2357 * shapes when there is no image to display. If this map is defined after
2358 * an image that has not been loaded (img != NULL), tell the image to
2359 * redraw. (It will only do so if it uses a map.)
2360 */
2361 for (int i = 0; i < html->images->size(); i++) {
2362 DilloImage *img = html->images->get(i)->image;
2363
2364 if (img) {
2365 // At this point, we know that img->ir represents an image
2366 // widget. (Really? Is this assumption safe?) Notice that the
2367 // order of the casts matters, because of multiple
2368 // inheritance.
2370 dwi->forceMapRedraw();
2371 }
2372 }
2373 html->InFlags &= ~IN_MAP;
2374}
2375
2379static
2381{
2382 int coord;
2383 const char *tail = str;
2384 char *newtail = NULL;
2386
2387 while (1) {
2388 coord = strtol(tail, &newtail, 10);
2389 if (coord == 0 && newtail == tail)
2390 break;
2391 coords->increase();
2392 coords->set(coords->size() - 1, coord);
2393 while (isspace(*newtail))
2394 newtail++;
2395 if (!*newtail)
2396 break;
2397 if (*newtail != ',') {
2398 BUG_MSG("<area> coords must be integers separated by commas.");
2399 }
2400 tail = newtail + 1;
2401 }
2402
2403 return coords;
2404}
2405
2406/*
2407 * <AREA>
2408 */
2409static void
2410 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2411{
2412 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2413 types type;
2414 misc::SimpleVector<int> *coords = NULL;
2415 DilloUrl* url;
2416 const char *attrbuf;
2417 int link = -1;
2418 Shape *shape = NULL;
2419
2420 if (!(html->InFlags & IN_MAP)) {
2421 BUG_MSG("<area> not inside <map>.");
2422 return;
2423 }
2424 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2425
2426 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2427 /* the default shape is a rectangle */
2428 type = RECTANGLE;
2429 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2430 /* "default" is the background */
2431 type = BACKGROUND;
2432 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2433 type = CIRCLE;
2434 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2435 type = POLYGON;
2436 } else {
2437 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2438 type = UNKNOWN;
2439 }
2440 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2441 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2442 coords = Html_read_coords(html, attrbuf);
2443
2444 if (type == RECTANGLE) {
2445 if (coords->size() != 4)
2446 BUG_MSG("<area> rectangle must have four coordinate values.");
2447 if (coords->size() >= 4)
2448 shape = new Rectangle(coords->get(0),
2449 coords->get(1),
2450 coords->get(2) - coords->get(0),
2451 coords->get(3) - coords->get(1));
2452 } else if (type == CIRCLE) {
2453 if (coords->size() != 3)
2454 BUG_MSG("<area> circle must have three coordinate values.");
2455 if (coords->size() >= 3)
2456 shape = new Circle(coords->get(0), coords->get(1),
2457 coords->get(2));
2458 } else if (type == POLYGON) {
2459 Polygon *poly;
2460 int i;
2461 if (coords->size() % 2)
2462 BUG_MSG("<area> polygon with odd number of coordinates.");
2463 shape = poly = new Polygon();
2464 for (i = 0; i < (coords->size() / 2); i++)
2465 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2466 }
2467 delete(coords);
2468 }
2469 }
2470 if (shape != NULL || type == BACKGROUND) {
2471 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2472 url = a_Html_url_new(html, attrbuf, NULL, 0);
2473 dReturn_if_fail ( url != NULL );
2474
2475 link = Html_set_new_link(html, &url);
2476 }
2477 if (type == BACKGROUND)
2478 html->maps.setCurrentMapDefaultLink(link);
2479 else
2480 html->maps.addShapeToCurrentMap(shape, link);
2481 }
2482}
2483
2484/*
2485 * <OBJECT>
2486 * Simply provide a link if the object is something downloadable.
2487 */
2488static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2489{
2490 DilloUrl *url, *base_url = NULL;
2491 const char *attrbuf;
2492
2493 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2494 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2495 }
2496
2497 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2498 url = a_Html_url_new(html, attrbuf,
2499 URL_STR(base_url), (base_url != NULL));
2500 dReturn_if_fail ( url != NULL );
2501
2503 html->styleEngine->setPseudoVisited ();
2504 } else {
2505 html->styleEngine->setPseudoLink ();
2506 }
2507
2509 Html_set_new_link(html, &url));
2510 }
2512}
2513
2514static void Html_tag_content_object(DilloHtml *html, const char *tag,
2515 int tagsize)
2516{
2517 if (a_Html_get_attr(html, tag, tagsize, "data"))
2518 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2519}
2520
2521/*
2522 * <VIDEO>
2523 * Provide a link to the video.
2524 */
2525static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2526{
2527 DilloUrl *url;
2528 const char *attrbuf;
2529
2530 if (html->InFlags & IN_MEDIA) {
2531 MSG("<video> not handled when already inside a media element.\n");
2532 return;
2533 }
2534 /* TODO: poster attr */
2535
2536 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2537 url = a_Html_url_new(html, attrbuf, NULL, 0);
2538 dReturn_if_fail ( url != NULL );
2539
2541 html->styleEngine->setPseudoVisited ();
2542 } else {
2543 html->styleEngine->setPseudoLink ();
2544 }
2545
2547 Html_set_new_link(html, &url));
2548
2549 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2550 }
2551 html->InFlags |= IN_MEDIA;
2552}
2553
2554/*
2555 * <AUDIO>
2556 * Provide a link to the audio.
2557 */
2558static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2559{
2560 DilloUrl *url;
2561 const char *attrbuf;
2562
2563 if (html->InFlags & IN_MEDIA) {
2564 MSG("<audio> not handled when already inside a media element.\n");
2565 return;
2566 }
2567
2568 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2569 url = a_Html_url_new(html, attrbuf, NULL, 0);
2570 dReturn_if_fail ( url != NULL );
2571
2573 html->styleEngine->setPseudoVisited ();
2574 } else {
2575 html->styleEngine->setPseudoLink ();
2576 }
2577
2579 Html_set_new_link(html, &url));
2580
2581 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2582 }
2583 html->InFlags |= IN_MEDIA;
2584}
2585
2586/*
2587 * <SOURCE>
2588 * Media resource; provide a link to its address.
2589 */
2590static void Html_tag_open_source(DilloHtml *html, const char *tag,
2591 int tagsize)
2592{
2593 const char *attrbuf;
2594
2595 if (!(html->InFlags & IN_MEDIA)) {
2596 // Can also be inside a picture element.
2597 // BUG_MSG("<source> not inside a media element.");
2598 return;
2599 }
2600 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2601 BUG_MSG("<source> requires src attribute.");
2602 return;
2603 } else {
2604 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2605
2606 dReturn_if_fail ( url != NULL );
2607
2609 html->styleEngine->setPseudoVisited ();
2610 } else {
2611 html->styleEngine->setPseudoLink ();
2612 }
2614 Html_set_new_link(html, &url));
2615 }
2616}
2617
2618static void Html_tag_content_source(DilloHtml *html, const char *tag,
2619 int tagsize)
2620{
2621 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2622 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2623}
2624
2629{
2630 html->InFlags &= ~IN_MEDIA;
2631}
2632
2633/*
2634 * <EMBED>
2635 * Provide a link to embedded content.
2636 */
2637static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2638{
2639 const char *attrbuf;
2640
2641 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2642 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2643
2644 dReturn_if_fail ( url != NULL );
2645
2647 html->styleEngine->setPseudoVisited ();
2648 } else {
2649 html->styleEngine->setPseudoLink ();
2650 }
2651
2653 Html_set_new_link(html, &url));
2654 }
2655}
2656
2657static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2658{
2659 if (a_Html_get_attr(html, tag, tagsize, "src"))
2660 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2661}
2662
2666static const char* Html_get_javascript_link(DilloHtml *html)
2667{
2668 size_t i;
2669 char ch, *p1, *p2;
2670 Dstr *Buf = html->attr_data;
2671
2672 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2673 i = strcspn(Buf->str, "'\"");
2674 ch = Buf->str[i];
2675 if ((ch == '"' || ch == '\'') &&
2676 (p2 = strchr(Buf->str + i + 1 , ch))) {
2677 p1 = Buf->str + i;
2678 BUG_MSG("Link depends on javascript().");
2679 dStr_truncate(Buf, p2 - Buf->str);
2680 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2681 }
2682 }
2683 return Buf->str;
2684}
2685
2689static void Html_add_anchor(DilloHtml *html, const char *name)
2690{
2691 _MSG("Registering ANCHOR: %s\n", name);
2692 if (!HT2TB(html)->addAnchor (name, html->style ()))
2693 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2694 name);
2695 /*
2696 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2697 * differ only in case may not appear in the same document", but
2698 * "comparisons between fragment identifiers and anchor names must be
2699 * done by exact (case-sensitive) match." We ignore the case issue and
2700 * always test for exact matches. Moreover, what does uppercase mean
2701 * for Unicode characters outside the ASCII range?
2702 */
2703}
2704
2705/*
2706 * <A>
2707 */
2708static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2709{
2710 DilloUrl *url;
2711 const char *attrbuf;
2712
2713 /* TODO: add support for MAP with A HREF */
2714 html->InFlags |= IN_A;
2715 if (html->InFlags & IN_MAP)
2716 Html_tag_content_area(html, tag, tagsize);
2717
2718 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2719 /* if it's a javascript link, extract the reference. */
2720 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2721 attrbuf = Html_get_javascript_link(html);
2722
2723 url = a_Html_url_new(html, attrbuf, NULL, 0);
2724 dReturn_if_fail ( url != NULL );
2725
2727 html->InVisitedLink = true;
2728 html->styleEngine->setPseudoVisited ();
2729 if (html->non_css_visited_color != -1)
2732 html->non_css_visited_color);
2733 } else {
2734 html->styleEngine->setPseudoLink ();
2735 if (html->non_css_link_color != -1)
2738 html->non_css_link_color);
2739 }
2740
2742 Html_set_new_link(html, &url));
2743 }
2744 if (prefs.show_tooltip &&
2745 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2747 attrbuf);
2748 }
2749
2751
2752 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2753 char *nameVal;
2754 const char *id = html->styleEngine->getId ();
2755
2757 Html_check_name_val(html, attrbuf, "name");
2758
2759 nameVal = a_Url_decode_hex_str(attrbuf);
2760
2761 if (nameVal) {
2762 /* We compare the "id" value with the url-decoded "name" value */
2763 if (!id || strcmp(nameVal, id)) {
2764 if (id)
2765 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2766 id, nameVal);
2767 Html_add_anchor(html, nameVal);
2768 }
2769
2770 dFree(nameVal);
2771 }
2772 }
2773}
2774
2775/*
2776 * <A> close function
2777 */
2778static void Html_tag_close_a(DilloHtml *html)
2779{
2780 html->InFlags &= ~IN_A;
2781 html->InVisitedLink = false;
2782}
2783
2784/*
2785 * <BLOCKQUOTE>
2786 */
2788 const char *tag, int tagsize)
2789{
2790 Html_add_textblock(html, true, 9, false);
2791}
2792
2793/*
2794 * <Q>
2795 */
2796static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2797{
2798 /*
2799 * Left Double Quotation Mark, which is wrong in many cases, but
2800 * should at least be widely recognized.
2801 */
2802 const char *U201C = "\xe2\x80\x9c";
2803
2805 HT2TB(html)->addText (U201C, html->wordStyle ());
2806}
2807
2808/*
2809 * </Q>
2810 */
2811static void Html_tag_close_q(DilloHtml *html)
2812{
2813 /* Right Double Quotation Mark */
2814 const char *U201D = "\xe2\x80\x9d";
2815
2816 HT2TB(html)->addText (U201D, html->wordStyle ());
2817}
2818
2819/*
2820 * Handle the <UL> tag.
2821 */
2822static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2823{
2824 const char *attrbuf;
2825 ListStyleType list_style_type;
2826
2827 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2828
2829 /* list_style_type explicitly defined */
2830 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2831 list_style_type = LIST_STYLE_TYPE_DISC;
2832 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2833 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2834 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2835 list_style_type = LIST_STYLE_TYPE_SQUARE;
2836 else
2837 /* invalid value */
2838 list_style_type = LIST_STYLE_TYPE_DISC;
2839
2841 CSS_TYPE_ENUM, list_style_type);
2842 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2843 BUG_MSG("<ul> type attribute is obsolete.");
2844 }
2845
2846 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2847 S_TOP(html)->list_number = 0;
2848 S_TOP(html)->ref_list_item = NULL;
2849}
2850
2851/*
2852 * Handle the <DIR> or <MENU> tag.
2853 * (Deprecated and almost the same as <UL>)
2854 */
2855static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2856{
2858 HT2TB(html)->addParbreak (9, html->wordStyle ());
2859
2860 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2861 S_TOP(html)->list_number = 0;
2862 S_TOP(html)->ref_list_item = NULL;
2863
2865 BUG_MSG("Obsolete list type; use <ul> instead.");
2866}
2867
2868/*
2869 * Handle the <MENU> tag.
2870 */
2871static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2872{
2873 /* In another bit of ridiculous mess from the HTML5 world, the menu
2874 * element, which was deprecated in HTML4:
2875 * - does not appear at all in W3C's HTML5 spec
2876 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2877 * means something totally different than it did in the old days
2878 * (now it's for popup menus and toolbar menus rather than being a
2879 * sort of list).
2880 */
2881 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2882 Html_tag_open_dir(html, tag, tagsize);
2883}
2884
2885/*
2886 * Handle the <OL> tag.
2887 */
2888static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2889{
2890 const char *attrbuf;
2891 int n = 1;
2892
2893 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2894 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2895
2896 if (*attrbuf == '1')
2897 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2898 else if (*attrbuf == 'a')
2899 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2900 else if (*attrbuf == 'A')
2901 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2902 else if (*attrbuf == 'i')
2903 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2904 else if (*attrbuf == 'I')
2905 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2906
2908 CSS_TYPE_ENUM, listStyleType);
2909 }
2910
2911 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2912
2913 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2914 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2915 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2916 n = 0;
2917 }
2918 S_TOP(html)->list_number = n;
2919 S_TOP(html)->ref_list_item = NULL;
2920}
2921
2922/*
2923 * Handle the <LI> tag.
2924 */
2925static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2926{
2927 Style *style = html->style ();
2928 int *list_number;
2929 const char *attrbuf;
2930
2931 if (S_TOP(html)->list_type == HTML_LIST_NONE &&
2932 !(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
2933 /* In WHATWG's HTML5 and W3C's HTML 5.1, LI can appear within MENUs
2934 * of the toolbar type.
2935 */
2936 BUG_MSG("<li> outside <ul> or <ol>.");
2937 }
2938
2939 html->InFlags |= IN_LI;
2940
2941 /* Get our parent tag's variables (used as state storage) */
2942 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2943
2945 // ordered
2946 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2947 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2948 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2949 *list_number = 0;
2950 }
2951 }
2952}
2953
2954/*
2955 * Close <LI>.
2956 */
2958{
2959 html->InFlags &= ~IN_LI;
2960 ((ListItem *)html->dw)->flush ();
2961}
2962
2963/*
2964 * <HR>
2965 */
2966static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2967{
2968 char *width_ptr;
2969 const char *attrbuf;
2970 int32_t size = 0;
2971
2972 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2973 if (width_ptr) {
2974 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2975 BUG_MSG("<hr> width attribute is obsolete.");
2978 a_Html_parse_length (html, width_ptr));
2979 dFree(width_ptr);
2980 }
2981
2982 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2983 size = strtol(attrbuf, NULL, 10);
2984 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2985 BUG_MSG("<hr> size attribute is obsolete.");
2986 }
2987
2988 a_Html_tag_set_align_attr(html, tag, tagsize);
2989
2990 /* TODO: evaluate attribute */
2991 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2992 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2993 BUG_MSG("<hr> noshade attribute is obsolete.");
3002
3003 if (size <= 0)
3004 size = 1;
3005 }
3006
3007 if (size > 0) {
3008 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
3009 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
3011 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3013 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3016 size_bottom);
3019 size_bottom);
3020 }
3021
3022}
3023
3024static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3025{
3026 Widget *hruler;
3027 HT2TB(html)->addParbreak (5, html->wordStyle ());
3028
3029 hruler = new Ruler();
3030 hruler->setStyle (html->style ());
3031 HT2TB(html)->addWidget (hruler, html->style ());
3032 HT2TB(html)->addParbreak (5, html->wordStyle ());
3033}
3034
3035/*
3036 * <DL>
3037 */
3038static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3039{
3040 /* may want to actually do some stuff here. */
3042 HT2TB(html)->addParbreak (9, html->wordStyle ());
3043}
3044
3045/*
3046 * <DT>
3047 */
3048static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3049{
3051 HT2TB(html)->addParbreak (9, html->wordStyle ());
3052}
3053
3054/*
3055 * <DD>
3056 */
3057static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3058{
3059 Html_add_textblock(html, true, 9, false);
3060}
3061
3062/*
3063 * <PRE>
3064 */
3065static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3066{
3068 HT2TB(html)->addParbreak (9, html->wordStyle ());
3069
3070 html->InFlags |= IN_PRE;
3071}
3072
3073/*
3074 * Custom close for <PRE>
3075 */
3077{
3078 html->InFlags &= ~IN_PRE;
3079}
3080
3085static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3086{
3087 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3088 /* HTML5 doesn't say anything about excluding elements */
3089 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3090 "sup", "font", "basefont", NULL};
3091 static int ei_set[10], i;
3092
3093 /* initialize array */
3094 if (!ei_set[0])
3095 for (i = 0; es_set[i]; ++i)
3096 ei_set[i] = a_Html_tag_index(es_set[i]);
3097
3098 for (i = 0; ei_set[i]; ++i)
3099 if (tag_idx == ei_set[i])
3100 return 1;
3101 }
3102 return 0;
3103}
3104
3108static void Html_update_content_type(DilloHtml *html, const char *content)
3109{
3110 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3111 "meta");
3112 /* Cannot ask cache whether the content type was changed, as
3113 * this code in another bw might have already changed it for us.
3114 */
3115 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3116 html->stop_parser = true; /* The cache buffer is no longer valid */
3117 a_UIcmd_repush(html->bw);
3118 }
3119}
3120
3134static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3135{
3136 const char meta_template[] =
3137"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3138" <td bgcolor='#8899aa' width='100%%'>\n"
3139" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3140" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3141" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3142" <a href='%s'>here</a>%s</td></tr></table><br>\n";
3143
3144 const char *p, *equiv, *charset, *content;
3145 char delay_str[64], *mr_url;
3146 DilloUrl *new_url;
3147 int delay;
3148
3149 /* only valid inside HEAD */
3150 if (!(html->InFlags & IN_HEAD)) {
3151 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3152 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3153 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3154 * in the body.
3155 */
3156 BUG_MSG("This <meta> element must be inside the HEAD section.");
3157 }
3158 return;
3159 }
3160
3161 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3162 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3163 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3164
3165 /* Get delay, if present, and make a message with it */
3166 if ((delay = strtol(content, NULL, 0))) {
3167 snprintf(delay_str, 64, " after %d second%s.",
3168 delay, (delay > 1) ? "s" : "");
3169 } else {
3170 sprintf(delay_str, ".");
3171 }
3172 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3173 int has_url = 1;
3174 if ((p = dStriAsciiStr(content, "url=")))
3175 content = p + strlen("url=");
3176 else if ((p = strstr(content, ";")))
3177 content = p + strlen(";");
3178 else
3179 has_url = 0;
3180
3181 if (has_url) {
3182 /* Handle the case of a quoted URL */
3183 if (*content == '"' || *content == '\'') {
3184 if ((p = strchr(content + 1, *content)))
3185 mr_url = dStrndup(content + 1, p - content - 1);
3186 else
3187 mr_url = dStrdup(content + 1);
3188 } else {
3189 mr_url = dStrdup(content);
3190 }
3191 } else {
3192 mr_url = dStrdup("");
3193 }
3194
3195 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3196
3197 if (a_Url_cmp(html->base_url, new_url) == 0) {
3198 /* redirection loop, or empty url string: ignore */
3199 BUG_MSG("<meta> refresh: %s.",
3200 *mr_url ? "redirection loop" : "no target URL");
3201 } else if (delay == 0) {
3202 /* zero-delay redirection */
3203 html->stop_parser = true;
3204 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3205 a_UIcmd_set_msg(html->bw,
3206 "WARNING: local URL with META refresh. Aborting.");
3207 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3208 a_UIcmd_redirection0((void*)html->bw, new_url);
3209 }
3210 } else {
3211 /* Send a custom HTML message.
3212 * TODO: This is a hairy hack,
3213 * It'd be much better to build a widget. */
3214 Dstr *ds_msg = dStr_sized_new(256);
3215 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3216 {
3217 int o_InFlags = html->InFlags;
3218 int o_TagSoup = html->TagSoup;
3219 html->InFlags = IN_BODY + IN_META_HACK;
3220 html->TagSoup = false;
3221 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3222 html->TagSoup = o_TagSoup;
3223 html->InFlags = o_InFlags;
3224 }
3225 dStr_free(ds_msg, 1);
3226 }
3227 a_Url_free(new_url);
3228 dFree(mr_url);
3229
3230 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3231 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3232 _MSG("Html_tag_open_meta: content={%s}\n", content);
3233 Html_update_content_type(html, content);
3234 }
3235 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3236 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3237 char *content = dStrconcat("text/html; charset=", charset, NULL);
3238
3239 Html_update_content_type(html, content);
3240 dFree(content);
3241 }
3242}
3243
3247static void Html_css_load_callback(int Op, CacheClient_t *Client)
3248{
3249 _MSG("Html_css_load_callback: Op=%d\n", Op);
3250 if (Op) { /* EOF */
3251 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3252 /* Repush when we've got them all */
3253 if (--bw->NumPendingStyleSheets == 0)
3255 }
3256}
3257
3262{
3263 char *data;
3264 int len;
3265
3266 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3267
3268 _MSG("Html_load_stylesheet: ");
3270 a_Capi_get_buf(url, &data, &len)) {
3271 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3272 if (strncmp("@charset \"", data, 10) == 0) {
3273 char *endq = strchr(data+10, '"');
3274
3275 if (endq && (endq - data <= 51)) {
3276 /* IANA limits charset names to 40 characters */
3277 char *content_type;
3278
3279 *endq = '\0';
3280 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3281 *endq = '"';
3282 a_Capi_unref_buf(url);
3285 a_Capi_get_buf(url, &data, &len);
3286 }
3287 }
3288 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3289 a_Capi_unref_buf(url);
3290 } else {
3291 /* Fill a Web structure for the cache query */
3292 int ClientKey;
3293 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3294 Web->flags |= WEB_Stylesheet;
3295 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3296 ++html->bw->NumPendingStyleSheets;
3297 a_Bw_add_client(html->bw, ClientKey, 0);
3298 a_Bw_add_url(html->bw, url);
3299 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3300 }
3301 }
3302 _MSG("\n");
3303}
3304
3313static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3314{
3315 DilloUrl *url;
3316 const char *attrbuf;
3317
3318 //char *tag_str = dStrndup(tag, tagsize);
3319 //MSG("Html_tag_open_link(): %s\n", tag_str);
3320 //dFree(tag_str);
3321
3322 /* When viewing suspicious HTML email, don't load LINK */
3324
3325 /* Ignore LINK outside HEAD */
3326 if (!(html->InFlags & IN_HEAD)) {
3327 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3328 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3329 /* With the HTML 5.1 draft spec, link with itemprop may appear
3330 * in the body.
3331 */
3332 BUG_MSG("This <link> element must be inside the HEAD section.");
3333 }
3334 return;
3335 }
3336 /* Remote stylesheets enabled? */
3338 /* CSS stylesheet link */
3339 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3340 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3341 return;
3342
3343 /* IMPLIED attributes? */
3344 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3345 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3346 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3347 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3348 return;
3349
3350 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3351 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3352 return;
3353
3354 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3355
3356 html->addCssUrl(url);
3357 a_Url_free(url);
3358}
3359
3363static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3364{
3365 const char *attrbuf;
3366 DilloUrl *BaseUrl;
3367
3368 if (html->InFlags & IN_HEAD) {
3369 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3370 bool_t html5 = html->DocType == DT_HTML &&
3371 html->DocTypeVersion >= 5.0f;
3372
3373 BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
3374 a_Html_url_new(html, attrbuf, "", 1);
3375
3376 if (html5 || URL_SCHEME_(BaseUrl)) {
3377 /* Pass the URL_SpamSafe flag to the new base url */
3379 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3380 a_Url_free(html->base_url);
3381 html->base_url = BaseUrl;
3382 } else {
3383 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3384 a_Url_free(BaseUrl);
3385 }
3386 }
3387 } else {
3388 BUG_MSG("<base> not inside HEAD section.");
3389 }
3390}
3391
3392static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3393{
3395}
3396
3397/*
3398 * <SPAN>
3399 */
3400static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3401{
3402 const char *attrbuf;
3403
3405
3406 if (prefs.show_tooltip &&
3407 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3408
3410 attrbuf);
3411 }
3412}
3413
3414/*
3415 * html5 sectioning stuff: article aside nav section header footer
3416 */
3417static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3418 int tagsize)
3419{
3420 const char *attrbuf;
3421
3422 if (prefs.show_tooltip &&
3423 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3424
3426 attrbuf);
3427 }
3428}
3429
3430/*
3431 * <DIV> (TODO: make a complete implementation)
3432 */
3433static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3434{
3435 a_Html_tag_set_align_attr (html, tag, tagsize);
3436 Html_tag_open_sectioning(html, tag, tagsize);
3437}
3438
3443{
3444 HT2TB(html)->addParbreak (9, html->wordStyle ());
3445}
3446
3447/*
3448 * <WBR> "The wbr element represents a line break opportunity."
3449 */
3450static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3451{
3452 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3453}
3454
3455
3483static const TagInfo Tags[] = {
3484 {"a", B8(01011),'R', Html_tag_open_a, NULL, Html_tag_close_a},
3485 {"abbr", B8(01011),'R', Html_tag_open_abbr, NULL, NULL},
3486 /* acronym 010101 -- obsolete in HTML5 */
3487 {"address", B8(01110),'R', Html_tag_open_default, NULL, Html_tag_close_par},
3488 {"area", B8(01001),'F', Html_tag_open_default, Html_tag_content_area, NULL},
3489 {"article", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3490 {"aside", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3491 {"audio", B8(01111),'R', Html_tag_open_audio, NULL, Html_tag_close_media},
3492 {"b", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3493 {"base", B8(10001),'F', Html_tag_open_base, NULL, NULL},
3494 /* basefont 010001 -- obsolete in HTML5 */
3495 /* bdo 010101 */
3496 {"big", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3497 {"blockquote", B8(01110),'R', Html_tag_open_blockquote, NULL, NULL},
3498 {"body", B8(01110),'O', Html_tag_open_body, NULL, Html_tag_close_body},
3499 {"br", B8(01001),'F', Html_tag_open_default, Html_tag_content_br, NULL},
3500 {"button", B8(01111),'R', Html_tag_open_button,NULL,Html_tag_close_button},
3501 /* caption */
3502 {"center", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3503 {"cite", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3504 {"code", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3505 /* col 010010 'F' */
3506 /* colgroup */
3507 {"dd", B8(01110),'O', Html_tag_open_dd, NULL, NULL},
3508 {"del", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3509 {"dfn", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3510 {"dir", B8(01100),'R', Html_tag_open_dir, NULL, Html_tag_close_par},
3511 /* TODO: complete <div> support! */
3512 {"div", B8(01110),'R', Html_tag_open_div, NULL, NULL},
3513 {"dl", B8(01100),'R', Html_tag_open_dl, NULL, Html_tag_close_par},
3514 {"dt", B8(01010),'O', Html_tag_open_dt, NULL, Html_tag_close_par},
3515 {"em", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3516 {"embed", B8(01001),'F', Html_tag_open_embed, Html_tag_content_embed, NULL},
3517 /* fieldset */
3518 {"figcaption", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3519 {"figure", B8(01110),'R', Html_tag_open_default, NULL, NULL},
3520 {"font", B8(01011),'R', Html_tag_open_font, NULL, NULL},
3521 {"footer", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3522 {"form", B8(01110),'R', Html_tag_open_form, NULL, Html_tag_close_form},
3523 {"frame", B8(01000),'F', Html_tag_open_frame, Html_tag_content_frame, NULL},
3524 {"frameset", B8(01110),'R', Html_tag_open_default, Html_tag_content_frameset,
3525 NULL},
3526 {"h1", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3527 {"h2", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3528 {"h3", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3529 {"h4", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3530 {"h5", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3531 {"h6", B8(01010),'R', Html_tag_open_h, NULL, NULL},
3532 {"head", B8(10111),'O', Html_tag_open_head, NULL, Html_tag_close_head},
3533 {"header", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3534 {"hr", B8(01000),'F', Html_tag_open_hr, Html_tag_content_hr, NULL},
3535 {"html", B8(00110),'O', Html_tag_open_html, NULL, Html_tag_close_html},
3536 {"i", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3537 {"iframe", B8(01111),'R', Html_tag_open_frame, Html_tag_content_frame, NULL},
3538 {"img", B8(01001),'F', Html_tag_open_img, Html_tag_content_img, NULL},
3539 {"input", B8(01001),'F', Html_tag_open_input, NULL, NULL},
3540 {"ins", B8(01111),'R', Html_tag_open_default, NULL, NULL},
3541 {"isindex", B8(11001),'F', Html_tag_open_isindex, NULL, NULL},
3542 {"kbd", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3543 /* label 010101 */
3544 /* legend 01?? */
3545 {"li", B8(01110),'O', Html_tag_open_li, NULL, Html_tag_close_li},
3546 {"link", B8(10001),'F', Html_tag_open_link, NULL, NULL},
3547 {"main", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3548 {"map", B8(01101),'R', Html_tag_open_default, Html_tag_content_map,
3550 {"mark", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3551 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3552 {"menu", B8(01100),'R', Html_tag_open_menu, NULL, Html_tag_close_par},
3553 {"meta", B8(11001),'F', Html_tag_open_meta, NULL, NULL},
3554 {"nav", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3555 /* noframes 1011 -- obsolete in HTML5 */
3556 /* noscript 1011 */
3557 {"object", B8(11111),'R', Html_tag_open_object, Html_tag_content_object,NULL},
3558 {"ol", B8(01100),'R', Html_tag_open_ol, NULL, NULL},
3559 {"optgroup", B8(01011),'O', Html_tag_open_optgroup, NULL,
3561 {"option", B8(01001),'O', Html_tag_open_option, NULL, Html_tag_close_option},
3562 {"p", B8(01010),'O', Html_tag_open_p, NULL, NULL},
3563 /* param 010001 'F' */
3564 {"pre", B8(01010),'R', Html_tag_open_pre, NULL, Html_tag_close_pre},
3565 {"q", B8(01011),'R', Html_tag_open_q, NULL, Html_tag_close_q},
3566 {"s", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3567 {"samp", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3568 {"script", B8(11101),'R', Html_tag_open_script,NULL,Html_tag_close_script},
3569 {"section", B8(01110),'R', Html_tag_open_sectioning, NULL, NULL},
3570 {"select", B8(01011),'R', Html_tag_open_select,NULL,Html_tag_close_select},
3571 {"small", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3572 {"source", B8(01001),'F', Html_tag_open_source, Html_tag_content_source,NULL},
3573 {"span", B8(01011),'R', Html_tag_open_span, NULL, NULL},
3574 {"strike", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3575 {"strong", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3576 {"style", B8(10011),'R', Html_tag_open_style, NULL, Html_tag_close_style},
3577 {"sub", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3578 {"sup", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3579 {"table", B8(01100),'R', Html_tag_open_table, Html_tag_content_table, NULL},
3580 {"tbody", B8(01010),'O', Html_tag_open_tbody, Html_tag_content_tbody, NULL},
3581 {"td", B8(01110),'O', Html_tag_open_td, Html_tag_content_td, NULL},
3582 {"textarea", B8(01011),'R', Html_tag_open_textarea, Html_tag_content_textarea,
3584 {"tfoot", B8(01010),'O', Html_tag_open_tfoot, NULL, NULL},
3585 {"th", B8(01110),'O', Html_tag_open_th, Html_tag_content_th, NULL},
3586 {"thead", B8(01010),'O', Html_tag_open_thead, NULL, NULL},
3587 {"title", B8(10011),'R', Html_tag_open_title, NULL, Html_tag_close_title},
3588 {"tr", B8(01100),'O', Html_tag_open_tr, Html_tag_content_tr, NULL},
3589 {"tt", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3590 {"u", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3591 {"ul", B8(01100),'R', Html_tag_open_ul, NULL, NULL},
3592 {"var", B8(01011),'R', Html_tag_open_default, NULL, NULL},
3593 {"video", B8(01111),'R', Html_tag_open_video, NULL, Html_tag_close_media},
3594 {"wbr", B8(01011),'F', Html_tag_open_default, Html_tag_content_wbr, NULL}
3595};
3596#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3597
3598/* Only available in C++11 and up */
3599#if __cpp_static_assert
3600static_assert(NTAGS == HTML_NTAGS,
3601 "Mismatch between number of tags in Tags and HTML_NTAGS");
3602#endif
3603
3604/*
3605 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3606 * with tag from taglist (lowercase, zero ended string) [p2]
3607 * Return value: as strcmp()
3608 */
3609static int Html_tag_compare(const char *p1, const char *p2)
3610{
3611 while ( *p2 ) {
3612 if (D_ASCII_TOLOWER(*p1) != *p2)
3613 return(D_ASCII_TOLOWER(*p1) - *p2);
3614 ++p1;
3615 ++p2;
3616 }
3617 return !strchr(" >/\n\r\t", *p1);
3618}
3619
3624int a_Html_tag_index(const char *tag)
3625{
3626 int low, high, mid, cond;
3627
3628 /* Binary search */
3629 low = 0;
3630 high = NTAGS - 1; /* Last tag index */
3631 while (low <= high) {
3632 mid = (low + high) / 2;
3633 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3634 high = mid - 1;
3635 else if (cond > 0)
3636 low = mid + 1;
3637 else
3638 return mid;
3639 }
3640 return -1;
3641}
3642
3651static int Html_triggers_optional_close(int old_idx, int cur_idx)
3652{
3653 int Flags = Tags[cur_idx].Flags;
3654 if (old_idx == i_P || old_idx == i_DT) {
3655 /* P and DT are closed by block elements (i.e. non inline)*/
3656 return (!(Flags & 1));
3657 } else if (old_idx == i_LI) {
3658 /* LI closes LI
3659 * Note: non-flow should also close it, but FF does not. */
3660 return (cur_idx == i_LI);
3661 } else if (old_idx == i_TD || old_idx == i_TH) {
3662 /* TD and TH are closed by: TD, TH and TR.
3663 * Note: non-flow should also close it, but FF does not. */
3664 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR || cur_idx == i_TBODY);
3665 } else if (old_idx == i_TR) {
3666 /* TR closes TR */
3667 return (cur_idx == i_TR || cur_idx == i_TBODY);
3668 } else if (old_idx == i_DD) {
3669 /* DD is closed by DD and DT */
3670 return (cur_idx == i_DD || cur_idx == i_DT);
3671 } else if (old_idx == i_OPTGROUP) {
3672 /* i_OPTGROUP can only contain OPTION */
3673 return (cur_idx != i_OPTION);
3674 } else if (old_idx == i_OPTION) {
3675 return 1; // OPTION always needs close
3676 }
3677
3678 /* Don't close HTML, HEAD and BODY. They're handled by Html_test_section().
3679 * TODO: TBODY is pending */
3680 return 0;
3681}
3682
3690static inline int Html_forbids_cross_nesting(const int InFlags,
3691 const int new_idx)
3692{
3693 int f = InFlags, ni = new_idx, oi = -1;
3694 if (f & (IN_A | IN_BUTTON | IN_SELECT | IN_TEXTAREA) &&
3695 (ni == i_A || ni == i_BUTTON || ni == i_SELECT || ni == i_TEXTAREA))
3696 oi = (f & IN_A ? i_A : f & IN_BUTTON ? i_BUTTON : f & IN_SELECT ?
3697 i_SELECT : f & IN_TEXTAREA ? i_TEXTAREA : 0);
3698 return oi;
3699}
3700
3709static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx,
3710 int new_idx, int fi, char op)
3711{
3712 int s_top, ni = new_idx;
3713 while ((s_top = html->stack->size() - 1) >= s_idx) {
3714 int toptag_idx = S_TOP(html)->tag_idx;
3715 TagInfo toptag = Tags[toptag_idx];
3716
3717 if (fi >= 0) {
3718 // forbidden nesting
3719 if (toptag_idx != fi)
3720 BUG_MSG(" Nesting cleanup - forcing close of open tag: <%s>.",
3721 toptag.name);
3722 } else if (s_top == s_idx && op == 'c') {
3723 // target tag, no bug when closing.
3724 } else if (toptag.EndTag == 'O') {
3725 // optional close, that's OK
3726 } else if ((!(toptag.Flags & 4) &&
3727 (Tags[ni].Flags & 4 || !(Tags[ni].Flags & 1))) ||
3728 (Tags[ni].Flags & 1 && !(toptag.Flags & 2))) {
3729 // block {element, container} in non block container or
3730 // inline element in non inline container
3731 BUG_MSG((op == 'o') ?
3732 "Bad nesting: <%s> can't contain <%s>. -- closing <%s>." :
3733 "<%s> needs to be closed before </%s>. -- closing <%s>.",
3734 toptag.name, Tags[ni].name, toptag.name);
3735 } else {
3736 BUG_MSG(
3737 "<%s> should have been closed before </%s>. -- closing <%s>.",
3738 toptag.name, Tags[ni].name, toptag.name);
3739 }
3740 _MSG("op(%c): %s s_top=%d s_idx=%d\n", op, toptag.name, s_top, s_idx);
3741 if (toptag_idx == i_BODY &&
3742 !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
3743 (s_idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
3744 break; // only pop {BODY,HTML} upon EOF or redundancy
3745 }
3746 if (toptag.close)
3747 toptag.close(html);
3748 Html_real_pop_tag(html);
3749 }
3750}
3751
3761static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
3762{
3763 if (!html->TagSoup)
3764 return;
3765
3766 int s_top = html->stack->size() - 1, s_idx;
3767 int fi = Html_forbids_cross_nesting(html->InFlags, ni);
3768 for (s_idx = s_top; s_idx > 0; --s_idx) {
3769 int ti = html->stack->getRef(s_idx)->tag_idx;
3770
3771 if (fi >= 0) {
3772 // forbidden cross nesting found
3773 if (ti != fi)
3774 continue; // don't allow, close
3775 --s_idx;
3776 BUG_MSG("Forbidden nesting: <%s> can't contain <%s>. -- closing "
3777 "<%s>.", Tags[fi].name, Tags[ni].name, Tags[fi].name);
3778
3779 } else if ((html->InFlags & IN_PRE) && ni == i_HR) {
3780 break; // allow Apache's bad HTML directory listings...
3781
3782 } else if (Tags[ti].EndTag == 'O') { // Element with optional close
3783 if (Html_triggers_optional_close(ti, ni))
3784 continue; // close
3785 } else if (!(Tags[ni].Flags & 1) && !(Tags[ti].Flags & 4)) {
3786 // Block element over a NON block container
3787 if (ti == i_A && html->DocTypeVersion >= 5.0f)
3788 break;
3789 continue; // close
3790 }
3791
3792 break;
3793 }
3794
3795 if (s_idx < s_top)
3796 Html_tag_cleanup_to_idx(html, s_idx + 1, ni, fi, 'o');
3797}
3798
3811static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
3812{
3813 int stack_idx, tag_idx, matched = 0, expected = 0;
3814 TagInfo new_tag = Tags[new_idx];
3815
3816 /* Look for the candidate tag to close */
3817 stack_idx = html->stack->size();
3818 while (--stack_idx) {
3819 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
3820 if (tag_idx == new_idx) {
3821 /* matching tag found */
3822 matched = 1;
3823 break;
3824 } else if (Tags[tag_idx].EndTag == 'O') {
3825 /* close elements with optional close */
3826 continue;
3827 } else if ((new_idx == i_A && html->InFlags & IN_A) ||
3828 (new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
3829 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
3830 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
3831 /* Let these elements close anything left open inside them */
3832 continue;
3833 } else if (Tags[new_idx].Flags & 4 && // Block container
3834 Tags[stack_idx].Flags & 3) { // Inline element or container
3835 /* Let a block container close inline elements left open inside it. */
3836 continue;
3837 } else {
3838 /* this is the tag that should have been closed */
3839 expected = 1;
3840 break;
3841 }
3842 }
3843
3844 if (matched) {
3845 Html_tag_cleanup_to_idx(html, stack_idx, new_idx, -1, 'c');
3846 } else if (expected) {
3847 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
3848 new_tag.name, Tags[tag_idx].name);
3849 } else {
3850 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
3851 }
3852}
3853
3858static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3859{
3860 const char *tag;
3861 int tag_idx;
3862
3863 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3864 BUG_MSG("The required DOCTYPE declaration is missing. "
3865 "Handling as HTML4.");
3866
3867 if (!(html->InFlags & IN_HTML)) {
3868 tag = "<html>";
3869 tag_idx = a_Html_tag_index(tag + 1);
3870 if (tag_idx != new_idx || IsCloseTag) {
3871 /* implicit open */
3872 Html_force_push_tag(html, tag_idx);
3873 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3874 Tags[tag_idx].open (html, tag, strlen(tag));
3875 }
3876 }
3877
3878 if (Tags[new_idx].Flags & 16) {
3879 /* head element */
3880 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3881 tag = "<head>";
3882 tag_idx = a_Html_tag_index(tag + 1);
3883 if (tag_idx != new_idx || IsCloseTag) {
3884 /* implicit open of the head element */
3885 Html_force_push_tag(html, tag_idx);
3886 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3887 Tags[tag_idx].open (html, tag, strlen(tag));
3888 }
3889 }
3890
3891 } else if (Tags[new_idx].Flags & 8) {
3892 /* body element */
3893 if (html->InFlags & IN_HEAD) {
3894 tag = "</head>";
3895 tag_idx = a_Html_tag_index(tag + 2);
3896 Html_tag_cleanup_at_close(html, tag_idx);
3897 }
3898 tag = "<body>";
3899 tag_idx = a_Html_tag_index(tag + 1);
3900 if (tag_idx != new_idx || IsCloseTag) {
3901 /* implicit open */
3902 Html_force_push_tag(html, tag_idx);
3903 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3904 Tags[tag_idx].open (html, tag, strlen(tag));
3905 }
3906 }
3907}
3908
3912static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3913{
3914 const char *attrbuf;
3915 char lang[3];
3916
3917 if (tagsize >= 8 && /* length of "<t id=i>" */
3918 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3919 /* According to the SGML declaration of HTML 4, all NAME values
3920 * occurring outside entities must be converted to uppercase
3921 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3922 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3923 * So we don't do it and hope for better specs in the future ...
3924 */
3925 Html_check_name_val(html, attrbuf, "id");
3926
3927 html->styleEngine->setId(attrbuf);
3928 }
3929
3930 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3931 /* length of "<t class=i>" or "<t style=i>" */
3932 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3933 if (attrbuf)
3934 html->styleEngine->setClass (attrbuf);
3935
3936 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3937 if (attrbuf)
3938 html->styleEngine->setStyle (attrbuf);
3939 }
3940
3941 /* handle "xml:lang" and "lang" attributes
3942 * We use only the first two chars of the value to deal with
3943 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3944 */
3945 memset(lang, 0, sizeof(lang));
3946 if (tagsize >= 14) {
3947 /* length of "<t xml:lang=i>" */
3948 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3949 if (attrbuf)
3950 strncpy(lang, attrbuf, 2);
3951 }
3952 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3953 /* length of "<t lang=i>" */
3954 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3955 if (attrbuf)
3956 strncpy(lang, attrbuf, 2);
3957 }
3958 if (lang[0])
3960}
3961
3966static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3967{
3968 static int indexes[9] = {-1};
3969
3970 if (indexes[0] == -1) {
3971 indexes[0] = a_Html_tag_index("dir");
3972 indexes[1] = a_Html_tag_index("frame");
3973 indexes[2] = a_Html_tag_index("frameset");
3974 indexes[3] = a_Html_tag_index("isindex");
3975 indexes[4] = a_Html_tag_index("strike");
3976 indexes[5] = a_Html_tag_index("big");
3977 indexes[6] = a_Html_tag_index("center");
3978 indexes[7] = a_Html_tag_index("font");
3979 indexes[8] = a_Html_tag_index("tt");
3980 }
3981 for (int i = 0; i < 9; i++) {
3982 if (indexes[i] == ni) {
3983 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3984 break;
3985 }
3986 }
3987}
3988
3990{
3992 false /* Perhaps true for widgets oof? */);
3993}
3994
3996{
3997 Html_add_textblock(html, false, 0, true);
3998}
3999
4001{
4002 Style *style = html->style ();
4003 Style *wordStyle = html->wordStyle ();
4004 Widget **ref_list_item;
4005 ListItem *list_item;
4006 int *list_number;
4007 char buf[16];
4008
4009 /* Get our parent tag's variables (used as state storage) */
4010 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
4011 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
4012
4013 HT2TB(html)->addParbreak (0, wordStyle);
4014
4015 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
4016 HT2TB(html)->addWidget (list_item, style);
4017 HT2TB(html)->addParbreak (0, wordStyle);
4018 *ref_list_item = list_item;
4019 S_TOP(html)->textblock = html->dw = list_item;
4020
4022 // none
4024 // ordered
4025 numtostr((*list_number)++, buf, 16, style->listStyleType);
4026 list_item->initWithText (buf, wordStyle);
4027 } else {
4028 // unordered
4029 list_item->initWithWidget (new Bullet(), wordStyle);
4030 }
4031}
4032
4034{
4035 if (S_TOP(html)->display_none)
4036 return false;
4037
4038 if (html->style()->display == DISPLAY_NONE)
4039 return false;
4040
4041 return true;
4042}
4043
4049static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
4050{
4051 int ti, ni; /* stack tag index and new tag index */
4052 char *start = tag + 1; /* discard the '<' */
4053 int IsCloseTag = (*start == '/');
4054
4055 dReturn_if (html->stop_parser == true);
4056
4057 ni = a_Html_tag_index(start + IsCloseTag);
4058 if (ni == -1) {
4059 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
4060 if (!(html->InFlags & IN_HTML)) {
4061 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
4062 Html_parse_doctype(html, tag, tagsize);
4063 }
4064 /* Ignore unknown tags */
4065 return;
4066 }
4067 _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);
4068
4069 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
4070 Html_check_html5_obsolete(html, ni);
4071
4072 int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
4073 if (i == 1 || (i == 2 && ni != i_HTML))
4074 BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
4075 html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
4076
4077 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
4078 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
4079 Html_test_section(html, ni, IsCloseTag);
4080
4081 /* Tag processing */
4082 ti = S_TOP(html)->tag_idx;
4083 switch (IsCloseTag) {
4084 case 0:
4085 /* Open function */
4086
4087 /* Cleanup before opening a new tag */
4088 if (ti != -1)
4090
4091 /* TODO: this is only raising a warning, but allows the element.
4092 * Note: Apache uses IMG inside PRE. */
4093 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
4094 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
4095
4096 /* Push the tag into the stack */
4097 Html_push_tag(html, ni);
4098
4099 html->startElement (ni);
4100 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
4101
4102 /* Parse attributes that can appear on any tag */
4103 Html_parse_common_attrs(html, tag, tagsize);
4104
4105 /* Call the open function for this tag */
4106 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
4107 Tags[ni].open (html, tag, tagsize);
4108
4109 if (! S_TOP(html)->display_none) {
4110 switch (html->style ()->display) {
4111 case DISPLAY_BLOCK:
4112 Html_display_block(html);
4113 break;
4116 break;
4117 case DISPLAY_LIST_ITEM:
4119 break;
4120 case DISPLAY_NONE:
4121 S_TOP(html)->display_none = true;
4122 break;
4123 case DISPLAY_INLINE:
4124 if (html->style()->vloat != FLOAT_NONE)
4125 Html_display_block(html);
4126 break;
4127 default:
4128 break;
4129 }
4130
4131 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4132 Tags[ni].content (html, tag, tagsize);
4133 }
4134 }
4135
4136 if (html->stop_parser)
4137 break;
4138
4139 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4140 /* don't change anything */
4141 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4142 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4143 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4144 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4145 html->pre_column = 0;
4146 html->PreFirstChar = true;
4147 }
4148
4149 if (html->styleEngine->getId ())
4150 Html_add_anchor(html, html->styleEngine->getId ());
4151
4152 /* Request immediate close for elements with forbidden close tag. */
4153 /* TODO: XHTML always requires close tags. A simple implementation
4154 * of the commented clause below will make it work. */
4155 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4156 html->ReqTagClose = true;
4157
4158 /* Don't break! Open tags may also close themselves */
4159 /* fallthrough */
4160
4161 default:
4162 /* Close function */
4163
4164 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4165 if (*start == '/' || /* </x> */
4166 html->ReqTagClose || /* request */
4167 (tag[tagsize-2] == '/' && /* XML: */
4168 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4169 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4170
4171 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4172 Html_tag_cleanup_at_close(html, ni);
4173 /* This was a close tag */
4174 html->ReqTagClose = false;
4175 }
4176 }
4177}
4178
4189static const char *Html_get_attr2(DilloHtml *html,
4190 const char *tag,
4191 int tagsize,
4192 const char *attrname,
4193 int tag_parsing_flags)
4194{
4195 int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4196 Dstr *Buf = html->attr_data;
4198
4199 dReturn_val_if_fail(*attrname, NULL);
4200
4201 dStr_truncate(Buf, 0);
4202
4203 for (i = 1; i < tagsize; ++i) {
4204 switch (state) {
4205 case SEEK_ATTR_START:
4206 if (isspace(tag[i]))
4207 state = SEEK_TOKEN_START;
4208 else if (tag[i] == '=')
4209 state = SEEK_VALUE_START;
4210 break;
4211
4212 case MATCH_ATTR_NAME:
4213 if (!attrname[attr_pos] &&
4214 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4215 Found = 1;
4216 state = SEEK_TOKEN_START;
4217 --i;
4218 } else if (!tag[i]) {
4219 state = SEEK_ATTR_START; // NULL byte is not allowed
4220 } else {
4221 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4222 state = SEEK_ATTR_START;
4223 attr_pos++;
4224 }
4225 break;
4226
4227 case SEEK_TOKEN_START:
4228 if (tag[i] == '=') {
4229 state = SEEK_VALUE_START;
4230 } else if (!isspace(tag[i])) {
4231 attr_pos = 0;
4232 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4233 --i;
4234 }
4235 break;
4236 case SEEK_VALUE_START:
4237 if (!isspace(tag[i])) {
4238 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4239 i -= (delimiter == ' ');
4240 state = (Found) ? GET_VALUE : SKIP_VALUE;
4241 }
4242 break;
4243
4244 case SKIP_VALUE:
4245 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4246 state = SEEK_TOKEN_START;
4247 break;
4248 case GET_VALUE:
4249 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4250 tag[i] == delimiter) {
4251 state = FINISHED;
4252 } else if (tag[i] == '&' &&
4253 (tag_parsing_flags & HTML_ParseEntities)) {
4254 const char *entstr;
4255 const bool_t is_attr = TRUE;
4256
4257 if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
4258 is_attr))) {
4259 dStr_append(Buf, entstr);
4260 i += entsize-1;
4261 } else {
4262 dStr_append_c(Buf, tag[i]);
4263 }
4264 } else if (tag[i] == '\r' || tag[i] == '\t') {
4265 dStr_append_c(Buf, ' ');
4266 } else if (tag[i] == '\n') {
4267 /* ignore */
4268 } else {
4269 dStr_append_c(Buf, tag[i]);
4270 }
4271 break;
4272
4273 case FINISHED:
4274 i = tagsize;
4275 break;
4276 }
4277 }
4278
4279 if (tag_parsing_flags & HTML_LeftTrim)
4280 while (isspace(Buf->str[0]))
4281 dStr_erase(Buf, 0, 1);
4282 if (tag_parsing_flags & HTML_RightTrim)
4283 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4284 dStr_truncate(Buf, Buf->len - 1);
4285
4286 return (Found) ? Buf->str : NULL;
4287}
4288
4292const char *a_Html_get_attr(DilloHtml *html,
4293 const char *tag,
4294 int tagsize,
4295 const char *attrname)
4296{
4297 return Html_get_attr2(html, tag, tagsize, attrname,
4299}
4300
4307 const char *tag,
4308 int tagsize,
4309 const char *attrname,
4310 const char *def)
4311{
4312 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4313
4314 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4315}
4316
4325static void Html_callback(int Op, CacheClient_t *Client)
4326{
4327 DilloHtml *html = (DilloHtml*)Client->CbData;
4328
4329 if (Op) { /* EOF */
4330 html->write((char*)Client->Buf, Client->BufSize, 1);
4331 html->finishParsing(Client->Key);
4332 } else {
4333 html->write((char*)Client->Buf, Client->BufSize, 0);
4334 }
4335}
4336
4341static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4342{
4343 char ch = 0, *p, *text;
4344 int token_start, buf_index;
4345
4346 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4347 * boundary. Iterate through tokens until end of buffer is reached. */
4348 buf_index = 0;
4349 token_start = buf_index;
4350 while ((buf_index < bufsize) && !html->stop_parser) {
4351 /* invariant: buf_index == bufsize || token_start == buf_index */
4352
4353 if (S_TOP(html)->parse_mode ==
4355 /* Non HTML code here, let's skip until closing tag */
4356 do {
4357 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4358 buf_index += strcspn(buf + buf_index, "<");
4359 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4360 buf_index = bufsize;
4361 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4362 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4363 /* copy VERBATIM text into the stash buffer */
4364 text = dStrndup(buf + token_start, buf_index - token_start);
4365 dStr_append(html->Stash, text);
4366 dFree(text);
4367 token_start = buf_index;
4368 break;
4369 } else
4370 ++buf_index;
4371 } while (buf_index < bufsize);
4372
4373 if (buf_index == bufsize)
4374 break;
4375 }
4376
4377 if (isspace(buf[buf_index])) {
4378 /* whitespace: group all available whitespace */
4379 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4380 Html_process_space(html, buf + token_start, buf_index - token_start);
4381 token_start = buf_index;
4382
4383 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4384 (isalpha(ch) || strchr("/!?", ch)) ) {
4385 /* Tag */
4386 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4387 /* Comment: search for close of comment, skipping over
4388 * everything except a matching "-->" tag. */
4389 while ( (p = (char*) memchr(buf + buf_index, '>',
4390 bufsize - buf_index)) ){
4391 buf_index = p - buf + 1;
4392 if (p[-1] == '-' && p[-2] == '-') break;
4393 }
4394 if (p) {
4395 /* Got the whole comment. Let's throw it away! :) */
4396 token_start = buf_index;
4397 } else
4398 buf_index = bufsize;
4399 } else {
4400 /* Tag: search end of tag (skipping over quoted strings) */
4401 html->CurrOfs = html->Start_Ofs + token_start;
4402
4403 while ( buf_index < bufsize ) {
4404 buf_index++;
4405 buf_index += strcspn(buf + buf_index, ">\"'<");
4406 if ((ch = buf[buf_index]) == '>') {
4407 break;
4408 } else if (ch == '"' || ch == '\'') {
4409 /* Skip over quoted string */
4410 buf_index++;
4411 buf_index += strcspn(buf + buf_index,
4412 (ch == '"') ? "\">" : "'>");
4413 if (buf[buf_index] == '>') {
4414 /* Unterminated string value? Let's look ahead and test:
4415 * (<: unterminated, closing-quote: terminated) */
4416 int offset = buf_index + 1;
4417 offset += strcspn(buf + offset,
4418 (ch == '"') ? "\"<" : "'<");
4419 if (buf[offset] == ch || !buf[offset]) {
4420 buf_index = offset;
4421 } else {
4422 BUG_MSG("Attribute lacks closing quote.");
4423 break;
4424 }
4425 }
4426 } else if (ch == '<') {
4427 /* unterminated tag detected */
4428 p = dStrndup(buf+token_start+1,
4429 strcspn(buf+token_start+1, " <\n\r\t"));
4430 BUG_MSG("<%s> lacks its closing '>'.", p);
4431 dFree(p);
4432 --buf_index;
4433 break;
4434 }
4435 }
4436 if (buf_index < bufsize) {
4437 buf_index++;
4438 Html_process_tag(html, buf + token_start,
4439 buf_index - token_start);
4440 token_start = buf_index;
4441 }
4442 }
4443 } else {
4444 /* A Word: search for whitespace or tag open */
4445 html->CurrOfs = html->Start_Ofs + token_start;
4446
4447 while (++buf_index < bufsize) {
4448 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4449 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4450 !isalpha(ch) && !strchr("/!?", ch))
4451 continue;
4452 break;
4453 }
4454 if (buf_index < bufsize || Eof) {
4455 /* successfully found end of token */
4456 ch = buf[buf_index];
4457 buf[buf_index] = 0;
4458 Html_process_word(html, buf + token_start,
4459 buf_index - token_start);
4460 buf[buf_index] = ch;
4461 token_start = buf_index;
4462 }
4463 }
4464 }/*while*/
4465
4466 HT2TB(html)->flush ();
4467
4468 return token_start;
4469}
4470
4471
#define B8(d)
Definition binaryconst.h:32
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
bool_t a_Bw_expecting(BrowserWindow *bw)
Definition bw.c:336
void a_Bw_close_client(BrowserWindow *bw, int ClientKey)
Close a cache-client upon successful retrieval.
Definition bw.c:167
void a_Bw_add_url(BrowserWindow *bw, const DilloUrl *Url)
Add an URL to the browser window's list.
Definition bw.c:211
void a_Bw_add_client(BrowserWindow *bw, int Key, int Root)
Add a reference to a cache-client.
Definition bw.c:128
void a_Bw_remove_doc(BrowserWindow *bw, void *vdoc)
Remove a document from the bw's list.
Definition bw.c:265
void a_Bw_add_doc(BrowserWindow *bw, void *vdoc)
Add a document to the browser window's list.
Definition bw.c:223
void(* CA_Callback_t)(int Op, CacheClient_t *Client)
Callback type for cache clients.
Definition cache.h:55
int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
Get the cache's buffer for the URL, and its size.
Definition capi.c:543
const char * a_Capi_set_content_type(const DilloUrl *url, const char *ctype, const char *from)
Set the Content-Type for the URL.
Definition capi.c:567
int a_Capi_dpi_verify_request(BrowserWindow *bw, DilloUrl *url)
Safety test: only allow GET|POST dpi-urls from dpi-generated pages.
Definition capi.c:233
void a_Capi_unref_buf(const DilloUrl *Url)
Unref the cache's buffer when no longer using it.
Definition capi.c:551
int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData)
Most used function for requesting a URL.
Definition capi.c:394
int a_Capi_get_flags_with_redirection(const DilloUrl *Url)
Same as a_Capi_get_flags() but following redirections.
Definition capi.c:532
#define CAPI_IsCached
Definition capi.h:15
#define CAPI_Completed
Definition capi.h:19
A list of CssProperty objects.
Definition css.hh:331
bool enter(dw::core::Widget *widget, int link, int img, int x, int y)
Called, when a link is entered, left, or the position has changed.
Definition html.cc:740
bool press(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "press" signal.
Definition html.cc:760
bool click(dw::core::Widget *widget, int link, int img, int x, int y, dw::core::EventButton *event)
Handle the "click" signal.
Definition html.cc:794
bool ReqTagClose
Flag to close the stack's top tag.
void freeParseData()
Free parsing data.
Definition html.cc:627
int InFlags
tracks which elements we are in
DilloHtmlDocumentType DocType
DilloUrl * base_url
int getCurrLineNumber()
Return the line number of the tag/word being processed by the parser.
Definition html.cc:605
bool stop_parser
void bugMessage(const char *format,...)
Collect HTML error strings.
Definition html.cc:153
lout::misc::SimpleVector< DilloHtmlState > * stack
void write(char *Buf, int BufSize, int Eof)
Process the newly arrived html and put it into the page structure.
Definition html.cc:578
dw::core::style::Style * backgroundStyle()
char * Start_Buf
int formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset)
Allocate and insert form information.
Definition html.cc:667
void finishParsing(int ClientKey)
Finish parsing a HTML page.
Definition html.cc:641
bool PreFirstChar
used to skip the first CR or CRLF in PRE tags
void startElement(int tag)
size_t CurrOfs
void addCssUrl(const DilloUrl *url)
Save URL in a vector (may be loaded later).
Definition html.cc:733
void initDw()
Miscellaneous initializations for Dw.
Definition html.cc:522
bool PrevWasCR
Flag to help parsing of "\r\n" in PRE tags.
float DocTypeVersion
Dstr * Stash
bool TagSoup
Flag to enable the parser's cleanup functions.
bool_t unloadedImages()
Definition html.cc:689
int pre_column
current column, used in PRE tags with tabs
DilloUrl * page_url
lout::misc::SimpleVector< DilloHtmlInput * > * inputs_outside_form
bool PrevWasHtmlClose
set when </html> is found
void restyle()
uchar_t Num_BODY
dw::core::style::Style * wordStyle()
bool StashSpace
BrowserWindow * bw
int32_t non_css_visited_color
as provided by vlink attribute in BODY
size_t OldLine
bool loadCssFromStash
current stash content should be loaded as CSS
lout::misc::SimpleVector< DilloHtmlForm * > * forms
dw::ImageMapsList maps
DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type)
Create and initialize a new DilloHtml class.
Definition html.cc:432
~DilloHtml()
Free memory used by the DilloHtml class.
Definition html.cc:536
uchar_t Num_TITLE
dw::core::style::Style * style()
size_t OldOfs
uchar_t Num_HTML
element counters: used for validation purposes.
lout::misc::SimpleVector< DilloUrl * > * links
dw::core::Widget * dw
int32_t non_css_link_color
as provided by link attribute in BODY
char * content_type
Dstr * attr_data
Buffer for attribute value.
bool PrevWasBodyClose
set when </body> is found
lout::misc::SimpleVector< DilloUrl * > * cssUrls
int32_t visited_color
as computed according to CSS
uchar_t Num_HEAD
char * charset
bool InVisitedLink
used to 'contrast_visited_colors'
StyleEngine * styleEngine
lout::misc::SimpleVector< DilloHtmlImage * > * images
HtmlLinkReceiver linkReceiver
DilloHtmlForm * getCurrentForm()
Get the current form.
Definition html.cc:684
void loadImages(const DilloUrl *pattern)
Load images if they were disabled.
Definition html.cc:702
This class provides the glue between HTML parser and CSS subsystem.
void parse(DilloHtml *html, DilloUrl *url, const char *buf, int buflen, CssOrigin origin)
void setPseudoVisited()
set the CSS pseudo class :visited.
void setNonCssHint(CssPropertyName name, CssValueType type, CssPropertyValue value)
void setId(const char *id)
const char * getId()
dw::core::style::Color * backgroundColor()
void endElement(int tag)
tell the styleEngine that a html element has ended.
void setPseudoLink()
set the CSS pseudo class :link.
void setStyle(const char *style)
void setClass(const char *klass)
void inheritBackgroundColor()
Use of the background color of the parent style as default.
dw::core::style::StyleImage * backgroundImage(dw::core::style::BackgroundRepeat *bgRepeat, dw::core::style::BackgroundAttachment *bgAttachment, dw::core::style::Length *bgPositionX, dw::core::style::Length *bgPositionY)
Displays different kind of bullets.
Definition bullet.hh:15
void addShapeToCurrentMap(core::Shape *shape, int link)
Add a shape to the current map-.
Definition image.cc:111
void startNewMap(lout::object::Object *key)
Start a new map and make it the current one.
Definition image.cc:99
void setCurrentMapDefaultLink(int link)
Set default link for current map-.
Definition image.cc:119
Displays an instance of dw::core::Imgbuf.
Definition image.hh:140
void setIsMap()
Sets image as server side image map.
Definition image.cc:571
void setUseMap(ImageMapsList *list, Object *key)
Sets image as client side image map.
Definition image.cc:584
void forceMapRedraw()
Definition image.hh:204
void initWithText(const char *text, core::style::Style *style)
Definition listitem.cc:58
void initWithWidget(core::Widget *widget, core::style::Style *style)
Definition listitem.cc:48
Widget for drawing (horizontal) rules.
Definition ruler.hh:21
A Widget for rendering text blocks, i.e.
Definition textblock.hh:206
void addWidget(core::Widget *widget, core::style::Style *style)
Add a widget (word type) to the page.
void addSpace(core::style::Style *style)
?
void addText(const char *text, size_t len, core::style::Style *style)
Add a word to the page structure.
void addParbreak(int space, core::style::Style *style)
Cause a paragraph break.
static int CLASS_ID
Definition textblock.hh:867
dw::core::Shape implementation for simple circles.
Definition types.hh:91
Represents a button press or release event.
Definition events.hh:58
ButtonState state
Definition events.hh:42
dw::core::Shape implementation for polygons.
Definition types.hh:105
void addPoint(int x, int y)
Definition types.cc:156
dw::core::Shape implementation for simple rectangles.
Definition types.hh:70
Abstract interface for different shapes.
Definition types.hh:59
The base class of all dillo widgets.
Definition widget.hh:44
virtual void setStyle(style::Style *style)
Change the style of a widget.
Definition widget.cc:1316
style::Color * getFgColor()
Get the actual foreground color of a widget.
Definition widget.cc:1450
Widget * getParent()
Definition widget.hh:587
Layout * getLayout()
Definition widget.hh:602
style::Color * getBgColor()
Get the actual background of a widget.
Definition widget.cc:1431
ListStyleType listStyleType
Definition style.hh:568
static Style * create(StyleAttrs *attrs)
Definition style.hh:629
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector.
Definition misc.hh:115
void setSize(int newSize)
Set the size explicitly.
Definition misc.hh:188
void increase()
Increase the vector size by one.
Definition misc.hh:181
void set(int i, T t)
Store an object in the vector.
Definition misc.hh:267
T get(int i) const
Return the one element, explicitly.
Definition misc.hh:222
int size() const
Return the number of elements put into this vector.
Definition misc.hh:162
T * getRef(int i) const
Return the reference of one element.
Definition misc.hh:211
int32_t a_Color_parse(const char *str, int32_t default_color, int *err)
Parse a color string.
Definition colors.c:258
int32_t a_Color_vc(int32_t candidate, int32_t C_txt, int32_t C_lnk, int32_t C_bg)
Return a suitable "visited link" color.
Definition colors.c:344
@ CSS_PROPERTY_LIST_STYLE_TYPE
Definition css.hh:210
@ CSS_PROPERTY_MARGIN_BOTTOM
Definition css.hh:211
@ CSS_PROPERTY_BORDER_RIGHT_STYLE
Definition css.hh:178
@ CSS_PROPERTY_MARGIN_RIGHT
Definition css.hh:213
@ PROPERTY_X_LANG
Definition css.hh:249
@ CSS_PROPERTY_BORDER_RIGHT_WIDTH
Definition css.hh:179
@ CSS_PROPERTY_BORDER_LEFT_WIDTH
Definition css.hh:176
@ PROPERTY_X_LINK
Definition css.hh:248
@ PROPERTY_X_TOOLTIP
Definition css.hh:251
@ CSS_PROPERTY_MARGIN_LEFT
Definition css.hh:212
@ CSS_PROPERTY_HEIGHT
Definition css.hh:204
@ CSS_PROPERTY_WIDTH
Definition css.hh:242
@ PROPERTY_X_IMG
Definition css.hh:250
@ CSS_PROPERTY_BORDER_TOP_WIDTH
Definition css.hh:183
@ CSS_PROPERTY_MARGIN_TOP
Definition css.hh:214
@ CSS_PROPERTY_VERTICAL_ALIGN
Definition css.hh:239
@ CSS_PROPERTY_BORDER_LEFT_STYLE
Definition css.hh:175
@ CSS_PROPERTY_TEXT_ALIGN
Definition css.hh:232
@ CSS_PROPERTY_BORDER_TOP_STYLE
Definition css.hh:182
@ CSS_PROPERTY_BACKGROUND_COLOR
Definition css.hh:166
@ CSS_PROPERTY_COLOR
Definition css.hh:188
@ CSS_PROPERTY_BORDER_BOTTOM_STYLE
Definition css.hh:171
@ CSS_PROPERTY_BORDER_BOTTOM_WIDTH
Definition css.hh:172
@ CSS_PROPERTY_FONT_FAMILY
Definition css.hh:197
@ CSS_TYPE_ENUM
Value is i, if represented by enum_symbols[i].
Definition css.hh:39
@ CSS_TYPE_INTEGER
This type is only used internally, for x-* properties.
Definition css.hh:37
@ CSS_TYPE_LENGTH_PERCENTAGE
<length> or <percentage>.
Definition css.hh:43
@ CSS_TYPE_STRING
<string>
Definition css.hh:58
@ CSS_TYPE_SYMBOL
Symbols, which are directly copied (as opposed to CSS_TYPE_ENUM and CSS_TYPE_MULTI_ENUM).
Definition css.hh:59
@ CSS_TYPE_COLOR
Represented as integer.
Definition css.hh:55
CssLengthType CSS_LENGTH_TYPE(CssLength l)
Definition css.hh:134
CssLength CSS_CREATE_LENGTH(float v, CssLengthType t)
Definition css.hh:102
@ CSS_LENGTH_TYPE_PX
Definition css.hh:76
@ CSS_LENGTH_TYPE_RELATIVE
This does not exist in CSS but is used in HTML.
Definition css.hh:88
@ CSS_LENGTH_TYPE_PERCENTAGE
Definition css.hh:87
@ CSS_LENGTH_TYPE_AUTO
This can be used as a simple value.
Definition css.hh:90
float CSS_LENGTH_VALUE(CssLength l)
Definition css.hh:138
@ CSS_ORIGIN_AUTHOR
Definition css.hh:33
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
char * a_Url_decode_hex_str(const char *str, size_t *p_sz)
Definition datauri.c:137
static const int bufsize
Definition decode.c:28
char * dStrconcat(const char *s1,...)
Concatenate a NULL-terminated list of strings.
Definition dlib.c:102
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
void dStr_sprintfa(Dstr *ds, const char *format,...)
Printf-like function that appends.
Definition dlib.c:464
void dStr_append(Dstr *ds, const char *s)
Append a C string to a Dstr.
Definition dlib.c:316
char * dStrdup(const char *s)
Definition dlib.c:77
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
int dStrnAsciiCasecmp(const char *s1, const char *s2, size_t n)
Definition dlib.c:215
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
char * dStriAsciiStr(const char *haystack, const char *needle)
Case insensitive strstr.
Definition dlib.c:184
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
void dStr_append_c(Dstr *ds, int c)
Append one character.
Definition dlib.c:349
char * dStrndup(const char *s, size_t sz)
Definition dlib.c:88
void dStr_sprintf(Dstr *ds, const char *format,...)
Printf-like function.
Definition dlib.c:450
void dStr_vsprintfa(Dstr *ds, const char *format, va_list argp)
vsprintf-like function that appends.
Definition dlib.c:401
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
void dStr_truncate(Dstr *ds, int len)
Truncate a Dstr to be 'len' bytes long.
Definition dlib.c:368
char * dStrnfill(size_t len, char c)
Return a new string of length 'len' filled with 'c' characters.
Definition dlib.c:149
#define dReturn_if_fail(expr)
Definition dlib.h:90
#define dReturn_val_if_fail(expr, val)
Definition dlib.h:94
#define dReturn_if(expr)
Definition dlib.h:82
#define dReturn_val_if(expr, val)
Definition dlib.h:86
#define D_ASCII_TOLOWER(c)
Definition dlib.h:49
#define TRUE
Definition dlib.h:35
#define FALSE
Definition dlib.h:31
#define dNew(type, count)
Definition dlib.h:67
@ SKIP_VALUE
Definition dpip.c:70
static Image * image
#define IMAGE_MAX_AREA
Definition fltkimgbuf.cc:27
void a_History_set_title_by_url(const DilloUrl *url, const char *title)
Set the page-title for a given URL.
Definition history.c:130
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2525
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
Handle open TITLE.
Definition html.cc:1698
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4292
static misc::SimpleVector< int > * Html_read_coords(DilloHtml *html, const char *str)
Read coords in a string, returning a vector of ints.
Definition html.cc:2380
DilloHtmlTagParsingFlags
Definition html.cc:86
@ HTML_RightTrim
Definition html.cc:88
@ HTML_LeftTrim
Definition html.cc:87
@ HTML_ParseEntities
Definition html.cc:89
static int i_HR
Definition html.cc:131
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2708
static void Html_css_load_callback(int Op, CacheClient_t *Client)
Called by the network engine when a stylesheet has new data.
Definition html.cc:3247
static void Html_real_pop_tag(DilloHtml *html)
Pop the top tag in the stack.
Definition html.cc:1385
static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2787
static int i_OPTGROUP
Definition html.cc:133
static void Html_push_tag(DilloHtml *html, int tag_idx)
Push the tag (copying attributes from the top of the stack)
Definition html.cc:1359
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2855
static void Html_tag_close_a(DilloHtml *html)
Definition html.cc:2778
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the VALIGN attribute (top|bottom|middle|baseline) and sets the style in style_attrs.
Definition html.cc:363
static const TagInfo Tags[]
Function index for the open, content, and close functions for each tag.
Definition html.cc:3483
static int Html_triggers_optional_close(int old_idx, int cur_idx)
For elements with optional close, check whether is time to close, by also following Firefox's de fact...
Definition html.cc:3651
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
Handle open HTML element.
Definition html.cc:1616
static bool Html_must_add_breaks(DilloHtml *html)
Definition html.cc:424
static int i_TBODY
Definition html.cc:140
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2329
static void Html_add_anchor(DilloHtml *html, const char *name)
Register an anchor for this page.
Definition html.cc:2689
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3400
static void Html_free(void *data)
Definition html.cc:222
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3065
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1793
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
HTML, HEAD and BODY elements have optional open and close tags.
Definition html.cc:3858
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
Create a new link, set it as the url's parent and return the index.
Definition html.cc:306
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
Used by the "Show/Hide hiddens" form menuitem.
Definition html.cc:279
static bool Html_contains_form(DilloHtml *html, void *v_form)
Search for form.
Definition html.cc:240
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
Handle DOCTYPE declaration.
Definition html.cc:1530
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2796
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
static void Html_tag_content_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1950
static int i_TH
Definition html.cc:141
static void Html_display_listitem(DilloHtml *html)
Definition html.cc:4000
static int Html_charref_comp(const void *a, const void *b)
Comparison function for binary search.
Definition html.cc:968
static void Html_tag_open_frame(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1925
static const char * Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr)
Given an entity, return the corresponding string.
Definition html.cc:1045
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2822
DilloHtmlTagParsingState
Definition html.cc:76
@ SKIP_VALUE
Definition html.cc:81
@ SEEK_TOKEN_START
Definition html.cc:79
@ MATCH_ATTR_NAME
Definition html.cc:78
@ SEEK_ATTR_START
Definition html.cc:77
@ SEEK_VALUE_START
Definition html.cc:80
@ FINISHED
Definition html.cc:83
@ GET_VALUE
Definition html.cc:82
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt)
Create and add a new Textblock to the current Textblock.
Definition html.cc:393
#define NTAGS
Definition html.cc:3596
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2006
static void Html_process_word(DilloHtml *html, const char *word, int size)
Handles putting the word into its proper place.
Definition html.cc:1226
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2488
static int i_BUTTON
Definition html.cc:127
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
static int i_DT
Definition html.cc:129
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2871
static int i_SELECT
Definition html.cc:136
static void Html_display_block(DilloHtml *html)
Definition html.cc:3989
static void Html_tag_close_li(DilloHtml *html)
Definition html.cc:2957
static void Html_tag_close_pre(DilloHtml *html)
Definition html.cc:3076
static void Html_tag_content_frameset(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1995
static Charref_t * Html_charref_search(char *key)
Binary search of 'key' in charref list.
Definition html.cc:976
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
Parse the LINK element (Only CSS stylesheets by now).
Definition html.cc:3313
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2018
static void Html_tag_close_media(DilloHtml *html)
Media (AUDIO/VIDEO) close function.
Definition html.cc:2628
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
Parse attributes that can appear on any tag.
Definition html.cc:3912
static const char * Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
Definition html.cc:886
static void Html_tag_content_object(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2514
static void Html_tag_open_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2590
static int i_A
Definition html.cc:125
static void Html_tag_close_q(DilloHtml *html)
Definition html.cc:2811
static void Html_tag_close_map(DilloHtml *html)
Handle close <MAP>.
Definition html.cc:2354
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
This function is called after popping the stack, to handle nested Textblock widgets.
Definition html.cc:1346
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2966
static CssLength Html_parse_length_or_multi_length(const char *attr, char **endptr)
Definition html.cc:1404
static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize)
For white-space: pre-line, we must break the line if encountering a newline.
Definition html.cc:1118
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
Conditional cleanup of the stack, called before closing any tag.
Definition html.cc:3811
static void Html_process_space(DilloHtml *html, const char *space, int spacesize)
Parse spaces.
Definition html.cc:1140
void a_Html_form_submit(void *v_html, void *v_form)
Used by the "Submit form" form menuitem.
Definition html.cc:253
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3057
static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3024
static const char * Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags)
Get attribute value for 'attrname' and return it.
Definition html.cc:4189
static int i_TD
Definition html.cc:138
static int i_TR
Definition html.cc:139
static void Html_callback(int Op, CacheClient_t *Client)
Dispatch the appropriate function for 'Op'.
Definition html.cc:4325
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
Process a tag, given as 'tag' and 'tagsize'.
Definition html.cc:4049
static void Html_tag_close_par(DilloHtml *html)
Default close for paragraph tags - pop the stack and break.
Definition html.cc:3442
void(* TagCloseFunct)(DilloHtml *html)
Definition html.cc:74
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
Set the URL data for image maps.
Definition html.cc:292
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
Create a new Image struct and request the image-url to the cache.
Definition html.cc:2283
static int i_DD
Definition html.cc:128
static void Html_update_content_type(DilloHtml *html, const char *content)
Update the document's content type information based on meta tag data.
Definition html.cc:3108
void(* TagOpenFunct)(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:73
static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3392
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2637
static int Html_ms_stupid_quotes_2ucs(int codepoint)
This is M$ non-standard "smart quotes" (w1252).
Definition html.cc:866
#define TAB_SIZE
Definition html.cc:58
static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2220
void a_Html_load_images(void *v_html, DilloUrl *pattern)
Used by the "Load images" page menuitem.
Definition html.cc:230
static const char * Html_get_javascript_link(DilloHtml *html)
Test and extract the link from a javascript instruction.
Definition html.cc:2666
static void Html_tag_content_source(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2618
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
Here's where we parse the html and put it into the Textblock structure.
Definition html.cc:4341
static int i_OPTION
Definition html.cc:134
static int Html_forbids_cross_nesting(const int InFlags, const int new_idx)
Check nesting and cross-nesting between BUTTON, SELECT, TEXTAREA and A.
Definition html.cc:3690
static void Html_tag_close_html(DilloHtml *html)
Handle close HTML element.
Definition html.cc:1635
static void Html_tag_close_title(DilloHtml *html)
Handle close TITLE.
Definition html.cc:1718
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
Handle META We do not support http-equiv=refresh with delay>0 because it's non standard,...
Definition html.cc:3134
static int i_LI
Definition html.cc:132
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
Handle open SCRIPT.
Definition html.cc:1736
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2056
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3450
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3417
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3048
static int i_BODY
Definition html.cc:126
void * a_Html_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
Set callback function and callback data for the "html/text" MIME type.
Definition html.cc:211
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image)
Tell cache to retrieve image.
Definition html.cc:2202
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
Push the tag (used to force en element with optional open into the stack).
Definition html.cc:1376
#define SGML_SPCDEL
Definition html.cc:56
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
Handle open STYLE.
Definition html.cc:1754
static const char * Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize)
Parse a named character reference (e.g., "&" or "…").
Definition html.cc:989
static int Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
Check that 'val' is composed of characters inside [A-Za-z0-9:_.
Definition html.cc:1486
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3624
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
Set the Document Base URI.
Definition html.cc:3363
static void Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2410
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
Does the tag in tagstr (e.g.
Definition html.cc:1328
static void Html_tag_close_body(DilloHtml *html)
Definition html.cc:1899
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2558
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2026
static void Html_tag_close_head(DilloHtml *html)
Handle close HEAD element.
Definition html.cc:1672
static int i_TEXTAREA
Definition html.cc:137
static void Html_tag_cleanup_to_idx(DilloHtml *html, int s_idx, int new_idx, int fi, char op)
Cleanup the stack to a given index.
Definition html.cc:3709
static void Html_tag_content_embed(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2657
static void Html_tag_close_script(DilloHtml *html)
Handle close SCRIPT.
Definition html.cc:1745
static void Html_display_inline_block(DilloHtml *html)
Definition html.cc:3995
static void Html_stack_cleanup_at_open(DilloHtml *html, int ni)
Conditional cleanup of the stack (at open time).
Definition html.cc:3761
static int i_HTML
Definition html.cc:130
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
Check whether a tag is in the "excluding" element set for PRE.
Definition html.cc:3085
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
Handle open HEAD element.
Definition html.cc:1647
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2925
static int i_P
Definition html.cc:135
static int Html_tag_compare(const char *p1, const char *p2)
Definition html.cc:3609
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
Warn when encountering elements that are obsolete in HTML5.
Definition html.cc:3966
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:1913
static void Html_tag_close_style(DilloHtml *html)
Handle close STYLE.
Definition html.cc:1783
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2888
void a_Html_form_reset(void *v_html, void *v_form)
Used by the "Reset form" form menuitem.
Definition html.cc:266
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3038
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:3433
#define HTML_NTAGS
Definition html.hh:24
#define NumRef
static const Charref_t Charrefs[NumRef]
const char * a_Html_get_attr(DilloHtml *html, const char *tag, int tagsize, const char *attrname)
Call Html_get_attr2 telling it to parse entities and strip the result.
Definition html.cc:4292
void a_Html_stash_init(DilloHtml *html)
Initialize the stash buffer.
Definition html.cc:853
@ DILLO_HTML_TABLE_MODE_NONE
no table at all
bool a_Html_should_display(DilloHtml *html)
Definition html.cc:4033
DilloUrl * a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url)
Wrapper for a_Url_new that adds an error detection message.
Definition html.cc:172
#define HT2TB(html)
"html struct" to Textblock
DilloImage * a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
Definition html.cc:2143
CssLength a_Html_parse_length(DilloHtml *html, const char *attr)
Returns a length or a percentage, or UNDEF_LENGTH in case of an error, or if attr is NULL.
Definition html.cc:1443
int32_t a_Html_color_parse(DilloHtml *html, const char *str, int32_t default_color)
Parse a color attribute.
Definition html.cc:1468
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
Evaluates the ALIGN attribute (left|center|right|justify) and sets the style at the top of the stack.
Definition html.cc:318
DilloHtmlParseMode
@ DILLO_HTML_PARSE_MODE_PRE
@ DILLO_HTML_PARSE_MODE_VERBATIM
@ DILLO_HTML_PARSE_MODE_BODY
@ DILLO_HTML_PARSE_MODE_INIT
@ DILLO_HTML_PARSE_MODE_STASH_AND_BODY
@ DILLO_HTML_PARSE_MODE_STASH
#define BUG_MSG(...)
Add a bug-meter message.
void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
Read image-associated tag attributes and create new image.
Definition html.cc:2073
#define S_TOP(html)
Top of the parsing stack.
@ IN_HEAD
@ IN_BUTTON
@ IN_LI
@ IN_MEDIA
@ IN_PRE
@ IN_BODY
@ IN_MAP
@ IN_TEXTAREA
@ IN_NONE
@ IN_META_HACK
@ IN_SELECT
@ IN_A
@ IN_HTML
@ IN_EOF
@ HTML_LIST_ORDERED
@ HTML_LIST_UNORDERED
@ HTML_LIST_NONE
#define HT2LT(html)
"html struct" to "Layout"
@ DILLO_HTML_TABLE_BORDER_SEPARATE
char * a_Html_get_attr_wdef(DilloHtml *html, const char *tag, int tagsize, const char *attrname, const char *def)
"a_Html_get_attr with default" Call a_Html_get_attr() and dStrdup() the returned string.
Definition html.cc:4306
int a_Html_tag_index(const char *tag)
Get 'tag' index.
Definition html.cc:3624
@ DT_UNRECOGNIZED
@ DT_NONE
@ DT_XHTML
@ DT_HTML
char * a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
Parse all the entities in a token.
Definition html.cc:1080
void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
Tell cache to retrieve a stylesheet.
Definition html.cc:3261
int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
Compare two Content-Type strings.
Definition misc.c:274
void a_Misc_parse_content_type(const char *type, char **major, char **minor, char **charset)
Parse Content-Type string, e.g., "text/html; charset=utf-8".
Definition misc.c:210
#define d_isascii(c)
Definition misc.h:12
Anything related to Dillo Widget styles is defined here.
Definition style.cc:34
@ LIST_STYLE_TYPE_DISC
Definition style.hh:303
@ LIST_STYLE_TYPE_UPPER_ALPHA
Definition style.hh:313
@ LIST_STYLE_TYPE_LOWER_ALPHA
Definition style.hh:311
@ LIST_STYLE_TYPE_CIRCLE
Definition style.hh:304
@ LIST_STYLE_TYPE_UPPER_ROMAN
Definition style.hh:309
@ LIST_STYLE_TYPE_DECIMAL
Definition style.hh:306
@ LIST_STYLE_TYPE_SQUARE
Definition style.hh:305
@ LIST_STYLE_TYPE_NONE
Definition style.hh:323
@ LIST_STYLE_TYPE_LOWER_ROMAN
Definition style.hh:308
@ DISPLAY_INLINE_BLOCK
Definition style.hh:281
int Length
Type for representing all lengths within dw::core::style.
Definition style.hh:429
void numtostr(int num, char *buf, int buflen, ListStyleType listStyleType)
Convert a number into a string, in a given list style.
Definition style.cc:1422
@ WHITE_SPACE_PRE_LINE
Definition style.hh:364
@ WHITE_SPACE_PRE_WRAP
Definition style.hh:363
Anything related to embedded UI widgets is defined here.
Definition core.hh:36
The core of Dw is defined in this namespace.
Definition core.hh:23
@ SHIFT_MASK
Definition events.hh:17
@ CONTROL_MASK
Definition events.hh:18
Dw is in this namespace, or sub namespaces of this one.
Definition form.cc:25
DilloPrefs prefs
Global Data.
Definition prefs.c:33
void Html_tag_close_form(DilloHtml *html)
Definition form.cc:407
void a_Html_form_display_hiddens2(void *vform, bool display)
Definition form.cc:267
void Html_tag_open_textarea(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:655
void Html_tag_content_textarea(DilloHtml *html, const char *tag, int tagsize)
The textarea tag.
Definition form.cc:665
void Html_tag_close_button(DilloHtml *html)
Handle close <BUTTON>
Definition form.cc:1023
void Html_tag_open_option(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:916
DilloHtmlForm * a_Html_form_new(DilloHtml *html, DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc content_type, const char *charset, bool enabled)
Definition form.cc:238
void Html_tag_open_optgroup(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:856
void a_Html_form_reset2(void *vform)
Definition form.cc:262
void a_Html_form_delete(DilloHtmlForm *form)
Definition form.cc:247
void a_Html_input_delete(DilloHtmlInput *input)
Definition form.cc:252
void Html_tag_close_select(DilloHtml *html)
Definition form.cc:834
void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize)
The ISINDEX tag is just a deprecated form of <INPUT type=text>> with implied FORM,...
Definition form.cc:614
void Html_tag_close_optgroup(DilloHtml *html)
Definition form.cc:892
void Html_tag_close_textarea(DilloHtml *html)
Close textarea.
Definition form.cc:729
void Html_tag_open_select(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:770
void Html_tag_open_input(DilloHtml *html, const char *tag, int tagsize)
Add a new input to current form.
Definition form.cc:437
void Html_tag_open_button(DilloHtml *html, const char *tag, int tagsize)
Definition form.cc:955
void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
Handle <FORM> tag.
Definition form.cc:341
void a_Html_form_submit2(void *vform)
Definition form.cc:257
void Html_tag_close_option(DilloHtml *html)
Definition form.cc:944
DilloHtmlMethod
Definition form.hh:10
DilloHtmlEnc
Definition form.hh:16
void a_Image_ref(DilloImage *Image)
Add a reference to an Image struct Do nothing if the argument is NULL.
Definition image.cc:101
void a_Image_unref(DilloImage *Image)
Unref and free if necessary Do nothing if the argument is NULL.
Definition image.cc:90
DilloImage * a_Image_new(void *layout, void *img_rndr, int32_t bg_color, int32_t fg_color)
Create and initialize a new image structure.
Definition image.cc:35
void Html_tag_open_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:242
void Html_tag_content_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:260
void Html_tag_open_tbody(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:256
void Html_tag_open_thead(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:274
void Html_tag_content_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:287
void Html_tag_open_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:47
void Html_tag_open_tfoot(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:267
void Html_tag_content_td(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:248
void Html_tag_content_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:223
void Html_tag_open_th(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:281
void Html_tag_content_table(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:161
void Html_tag_open_tr(DilloHtml *html, const char *tag, int tagsize)
Definition table.cc:180
Contains the specific data for a single window.
Definition bw.h:27
Dstr * page_bugs
Definition bw.h:71
float zoom
Definition bw.h:74
int num_page_bugs
HTML-bugs detected at parse time.
Definition bw.h:70
int NumPendingStyleSheets
Number of not yet arrived style sheets.
Definition bw.h:48
Data structure for cache clients.
Definition cache.h:60
int Key
Primary Key for this client.
Definition cache.h:61
void * CbData
Client function data.
Definition cache.h:67
uint_t BufSize
Valid size of cache-data.
Definition cache.h:65
void * Buf
Pointer to cache-data.
Definition cache.h:64
void * Web
Pointer to the Web structure of our client.
Definition cache.h:68
const char * html4_str
const char * ref
const char * html5_str
DilloImage * image
DilloUrl * url
DilloHtmlTableMode table_mode
bool hand_over_break
This is used for list items etc.
DilloHtmlTableBorderMode table_border_mode
dw::core::Widget * table
dw::core::Widget * textblock
DilloHtmlListMode list_type
dw::core::Widget * ref_list_item
This is used to align list items (especially in enumerated lists)
DilloHtmlParseMode parse_mode
int tag_idx
TagInfo index for the tag that's being processed.
bool_t parse_embedded_css
Definition prefs.h:104
bool_t load_images
Definition prefs.h:99
bool_t show_tooltip
Definition prefs.h:66
bool_t limit_text_width
Definition prefs.h:71
bool_t focus_new_tab
Definition prefs.h:74
bool_t show_extra_warnings
Definition prefs.h:123
bool_t load_stylesheets
Definition prefs.h:103
bool_t contrast_visited_color
Definition prefs.h:65
bool_t mark_unloaded_images
Definition prefs.h:101
bool_t middle_click_opens_new_tab
Definition prefs.h:115
Definition url.h:88
Definition dlib.h:120
Dstr_char_t * str
Definition dlib.h:123
int len
Definition dlib.h:122
void * img_rndr
Definition image.hh:61
int flags
Additional info.
Definition web.hh:29
DilloUrl * url
Requested URL.
Definition web.hh:25
BrowserWindow * bw
The requesting browser window [reference].
Definition web.hh:28
DilloImage * Image
For image urls [reference].
Definition web.hh:31
void a_UIcmd_link_popup(void *vbw, const DilloUrl *url, const DilloUrl *page_url)
Definition uicmd.cc:1329
void a_UIcmd_set_bug_prog(BrowserWindow *bw, int n_bug)
Definition uicmd.cc:1590
void a_UIcmd_page_popup(void *vbw, bool_t has_bugs, void *v_cssUrls)
Definition uicmd.cc:1319
void a_UIcmd_set_msg(BrowserWindow *bw, const char *format,...)
Definition uicmd.cc:1622
void a_UIcmd_image_popup(void *vbw, const DilloUrl *url, bool_t loaded_img, DilloUrl *page_url, DilloUrl *link_url)
Definition uicmd.cc:1337
void a_UIcmd_set_page_title(BrowserWindow *bw, const char *label)
Definition uicmd.cc:1599
void a_UIcmd_open_url_nw(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:847
void a_UIcmd_repush(void *vbw)
Definition uicmd.cc:968
void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url)
Definition uicmd.cc:812
void a_UIcmd_redirection0(void *vbw, const DilloUrl *url)
Definition uicmd.cc:976
void a_UIcmd_open_url_nt(void *vbw, const DilloUrl *url, int focus)
Definition uicmd.cc:863
void a_Url_set_flags(DilloUrl *u, int flags)
Set DilloUrl flags.
Definition url.c:527
int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
Compare two Url's to check if they're the same, or which one is bigger.
Definition url.c:506
void a_Url_free(DilloUrl *url)
Free a DilloUrl.
Definition url.c:208
void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
Set DilloUrl ismap coordinates.
Definition url.c:549
DilloUrl * a_Url_new(const char *url_str, const char *base_url)
Transform (and resolve) an URL string into the respective DilloURL.
Definition url.c:371
char * a_Url_str(const DilloUrl *u)
Return the url as a string.
Definition url.c:66
DilloUrl * a_Url_dup(const DilloUrl *ori)
Duplicate a Url structure.
Definition url.c:477
#define URL_ILLEGAL_CHARS_SPC(u)
Definition url.h:81
#define URL_SCHEME_(u)
Definition url.h:49
#define URL_ILLEGAL_CHARS(u)
Definition url.h:80
#define URL_SpamSafe
Definition url.h:40
#define URL_FLAGS(u)
Definition url.h:79
#define URL_STR(u)
Definition url.h:76
#define URL_STR_(u)
Definition url.h:55
#define URL_SCHEME(u)
Definition url.h:70
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
Does s point to a UTF-8-encoded ideographic character?.
Definition utf8.cc:76
int a_Utf8_encode(unsigned int ucs, char *buf)
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition utf8.cc:54
static const char utf8_zero_width_space[]
Unicode zero width space U+200B.
Definition utf8.hh:19
DilloWeb * a_Web_new(BrowserWindow *bw, const DilloUrl *url, const DilloUrl *requester)
Allocate and set safe values for a DilloWeb structure.
Definition web.cc:121
#define WEB_Stylesheet
Definition web.hh:18
#define WEB_Image
Definition web.hh:17